View | Details | Raw Unified | Return to bug 57031
Collapse All | Expand All

(-)src/ooxml/java/org/apache/poi/util/StAXHelper.java (+83 lines)
Line 0 Link Here
1
/* ====================================================================
2
   Licensed to the Apache Software Foundation (ASF) under one or more
3
   contributor license agreements.  See the NOTICE file distributed with
4
   this work for additional information regarding copyright ownership.
5
   The ASF licenses this file to You under the Apache License, Version 2.0
6
   (the "License"); you may not use this file except in compliance with
7
   the License.  You may obtain a copy of the License at
8
9
       http://www.apache.org/licenses/LICENSE-2.0
10
11
   Unless required by applicable law or agreed to in writing, software
12
   distributed under the License is distributed on an "AS IS" BASIS,
13
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
   See the License for the specific language governing permissions and
15
   limitations under the License.
16
==================================================================== */
17
18
package org.apache.poi.util;
19
20
import javax.xml.stream.XMLInputFactory;
21
import javax.xml.stream.XMLStreamException;
22
import javax.xml.stream.XMLStreamReader;
23
import java.io.InputStream;
24
import java.lang.reflect.Method;
25
26
27
/**
28
 * Provides handy methods for working with StAX readers
29
 */
30
public final class StAXHelper {
31
    private static POILogger logger = POILogFactory.getLogger(StAXHelper.class);
32
33
    private StAXHelper() {}
34
35
    /**
36
     * Creates a new StAX XMLReader, with sensible defaults
37
     */
38
    public static synchronized XMLStreamReader newXMLStreamReader(InputStream is) throws XMLStreamException {
39
        return staxFactory.createXMLStreamReader(is);
40
    }
41
42
    
43
    private static final XMLInputFactory staxFactory;
44
    static {
45
        staxFactory = XMLInputFactory.newInstance();
46
        trySetStAXProperty(staxFactory, XMLInputFactory.IS_NAMESPACE_AWARE, true);
47
        trySetStAXProperty(staxFactory, XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
48
        trySetStAXProperty(staxFactory, XMLInputFactory.IS_VALIDATING, false);
49
        //TODO: do we need an ignoring xmlresolver?
50
        //staxFactory.setXMLResolver(IGNORING_ENTITY_RESOLVER);
51
        trySetXercesSecurityManager(staxFactory);
52
    }
53
54
    private static void trySetStAXProperty(XMLInputFactory xmlInputFactory, String property, boolean enabled) {
55
        try {
56
            xmlInputFactory.setProperty(property, enabled);
57
        } catch (Exception e) {
58
            logger.log(POILogger.WARN, "StAX Property unsupported", property, e);
59
        } catch (AbstractMethodError ame) {
60
            logger.log(POILogger.WARN, "Cannot set StAX feature because outdated XML parser in classpath",
61
                    property, ame);
62
        }
63
    }
64
    
65
    private static void trySetXercesSecurityManager(XMLInputFactory staxFactory) {
66
        // Try built-in JVM one first, standalone if not
67
        for (String securityManagerClassName : new String[] {
68
                "com.sun.org.apache.xerces.internal.util.SecurityManager",
69
                "org.apache.xerces.util.SecurityManager"
70
        }) {
71
            try {
72
                Object mgr = Class.forName(securityManagerClassName).newInstance();
73
                Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
74
                setLimit.invoke(mgr, 4096);
75
                staxFactory.setProperty("http://apache.org/xml/properties/security-manager", mgr);
76
                // Stop once one can be setup without error
77
                return;
78
            } catch (Exception e) {
79
                logger.log(POILogger.WARN, "StAX Security Manager could not be setup", e);
80
            }
81
        }
82
    }
83
}
(-)src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java (-3 / +9 lines)
Lines 18-23 Link Here
18
18
19
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
19
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
20
20
21
import javax.xml.namespace.QName;
22
import javax.xml.stream.XMLStreamException;
23
import javax.xml.stream.XMLStreamReader;
21
import java.io.ByteArrayOutputStream;
24
import java.io.ByteArrayOutputStream;
22
import java.io.IOException;
25
import java.io.IOException;
23
import java.io.InputStream;
26
import java.io.InputStream;
Lines 32-39 Link Here
32
import java.util.List;
35
import java.util.List;
33
import java.util.Map;
36
import java.util.Map;
34
37
35
import javax.xml.namespace.QName;
36
37
import org.apache.poi.POIXMLDocument;
38
import org.apache.poi.POIXMLDocument;
38
import org.apache.poi.POIXMLDocumentPart;
39
import org.apache.poi.POIXMLDocumentPart;
39
import org.apache.poi.POIXMLException;
40
import org.apache.poi.POIXMLException;
Lines 55-60 Link Here
55
import org.apache.poi.util.POILogFactory;
56
import org.apache.poi.util.POILogFactory;
56
import org.apache.poi.util.POILogger;
57
import org.apache.poi.util.POILogger;
57
import org.apache.poi.util.PackageHelper;
58
import org.apache.poi.util.PackageHelper;
59
import org.apache.poi.util.StAXHelper;
58
import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
60
import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
59
import org.apache.xmlbeans.XmlCursor;
61
import org.apache.xmlbeans.XmlCursor;
60
import org.apache.xmlbeans.XmlException;
62
import org.apache.xmlbeans.XmlException;
Lines 161-167 Link Here
161
    @Override
163
    @Override
162
    protected void onDocumentRead() throws IOException {
164
    protected void onDocumentRead() throws IOException {
163
        try {
165
        try {
164
            DocumentDocument doc = DocumentDocument.Factory.parse(getPackagePart().getInputStream(), DEFAULT_XML_OPTIONS);
166
            XMLStreamReader reader = StAXHelper.newXMLStreamReader(getPackagePart().getInputStream());
167
168
            DocumentDocument doc = DocumentDocument.Factory.parse(reader, DEFAULT_XML_OPTIONS);
165
            ctDocument = doc.getDocument();
169
            ctDocument = doc.getDocument();
166
170
167
            initFootnotes();
171
            initFootnotes();
Lines 238-243 Link Here
238
            }
242
            }
239
            initHyperlinks();
243
            initHyperlinks();
240
        } catch (XmlException e) {
244
        } catch (XmlException e) {
245
            throw new POIXMLException(e);
246
        } catch (XMLStreamException e) {
241
            throw new POIXMLException(e);
247
            throw new POIXMLException(e);
242
        }
248
        }
243
    }
249
    }
(-)src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java (-1 / +9 lines)
Lines 409-415 Link Here
409
409
410
    public void testCheckboxes() throws IOException {
410
    public void testCheckboxes() throws IOException {
411
        XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("checkboxes.docx");
411
        XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("checkboxes.docx");
412
        System.out.println(doc);
413
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
412
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
414
413
415
        assertEquals("This is a small test for checkboxes \nunchecked: |_| \n" +
414
        assertEquals("This is a small test for checkboxes \nunchecked: |_| \n" +
Lines 417-422 Link Here
417
                "Test a checkbox within a textbox: |_| -> |X|\n\n\n" +
416
                "Test a checkbox within a textbox: |_| -> |X|\n\n\n" +
418
                "In Table:\n|_|\t|X|\n\n\n" +
417
                "In Table:\n|_|\t|X|\n\n\n" +
419
                "In Sequence:\n|X||_||X|\n", extractor.getText());
418
                "In Sequence:\n|X||_||X|\n", extractor.getText());
419
        extractor.close();
420
    }
421
422
    public void testBug57031() throws Exception {
423
        XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("57031.docx");
424
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
425
426
        // Check it gives text without error
427
        extractor.getText();
420
        extractor.close();
428
        extractor.close();
421
    }
429
    }
422
}
430
}

Return to bug 57031