ASF Bugzilla – Attachment 33591 Details for
Bug 57031
Out of Memory when extracting text from attached files
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
switch out piccolo parser for xerces
57031.patch (text/plain), 7.24 KB, created by
Tim Allison
on 2016-02-24 20:22:52 UTC
(
hide
)
Description:
switch out piccolo parser for xerces
Filename:
MIME Type:
Creator:
Tim Allison
Created:
2016-02-24 20:22:52 UTC
Size:
7.24 KB
patch
obsolete
>Index: src/ooxml/java/org/apache/poi/util/StAXHelper.java >IDEA additional info: >Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP ><+>UTF-8 >=================================================================== >--- src/ooxml/java/org/apache/poi/util/StAXHelper.java (revision ) >+++ src/ooxml/java/org/apache/poi/util/StAXHelper.java (revision ) >@@ -0,0 +1,83 @@ >+/* ==================================================================== >+ Licensed to the Apache Software Foundation (ASF) under one or more >+ contributor license agreements. See the NOTICE file distributed with >+ this work for additional information regarding copyright ownership. >+ The ASF licenses this file to You under the Apache License, Version 2.0 >+ (the "License"); you may not use this file except in compliance with >+ the License. You may obtain a copy of the License at >+ >+ http://www.apache.org/licenses/LICENSE-2.0 >+ >+ Unless required by applicable law or agreed to in writing, software >+ distributed under the License is distributed on an "AS IS" BASIS, >+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >+ See the License for the specific language governing permissions and >+ limitations under the License. >+==================================================================== */ >+ >+package org.apache.poi.util; >+ >+import javax.xml.stream.XMLInputFactory; >+import javax.xml.stream.XMLStreamException; >+import javax.xml.stream.XMLStreamReader; >+import java.io.InputStream; >+import java.lang.reflect.Method; >+ >+ >+/** >+ * Provides handy methods for working with StAX readers >+ */ >+public final class StAXHelper { >+ private static POILogger logger = POILogFactory.getLogger(StAXHelper.class); >+ >+ private StAXHelper() {} >+ >+ /** >+ * Creates a new StAX XMLReader, with sensible defaults >+ */ >+ public static synchronized XMLStreamReader newXMLStreamReader(InputStream is) throws XMLStreamException { >+ return staxFactory.createXMLStreamReader(is); >+ } >+ >+ >+ private static final XMLInputFactory staxFactory; >+ static { >+ staxFactory = XMLInputFactory.newInstance(); >+ trySetStAXProperty(staxFactory, XMLInputFactory.IS_NAMESPACE_AWARE, true); >+ trySetStAXProperty(staxFactory, XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); >+ trySetStAXProperty(staxFactory, XMLInputFactory.IS_VALIDATING, false); >+ //TODO: do we need an ignoring xmlresolver? >+ //staxFactory.setXMLResolver(IGNORING_ENTITY_RESOLVER); >+ trySetXercesSecurityManager(staxFactory); >+ } >+ >+ private static void trySetStAXProperty(XMLInputFactory xmlInputFactory, String property, boolean enabled) { >+ try { >+ xmlInputFactory.setProperty(property, enabled); >+ } catch (Exception e) { >+ logger.log(POILogger.WARN, "StAX Property unsupported", property, e); >+ } catch (AbstractMethodError ame) { >+ logger.log(POILogger.WARN, "Cannot set StAX feature because outdated XML parser in classpath", >+ property, ame); >+ } >+ } >+ >+ private static void trySetXercesSecurityManager(XMLInputFactory staxFactory) { >+ // Try built-in JVM one first, standalone if not >+ for (String securityManagerClassName : new String[] { >+ "com.sun.org.apache.xerces.internal.util.SecurityManager", >+ "org.apache.xerces.util.SecurityManager" >+ }) { >+ try { >+ Object mgr = Class.forName(securityManagerClassName).newInstance(); >+ Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE); >+ setLimit.invoke(mgr, 4096); >+ staxFactory.setProperty("http://apache.org/xml/properties/security-manager", mgr); >+ // Stop once one can be setup without error >+ return; >+ } catch (Exception e) { >+ logger.log(POILogger.WARN, "StAX Security Manager could not be setup", e); >+ } >+ } >+ } >+} >Index: src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java >IDEA additional info: >Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP ><+>UTF-8 >=================================================================== >--- src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java (date 1456341576000) >+++ src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java (revision ) >@@ -18,6 +18,9 @@ > > import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; > >+import javax.xml.namespace.QName; >+import javax.xml.stream.XMLStreamException; >+import javax.xml.stream.XMLStreamReader; > import java.io.ByteArrayOutputStream; > import java.io.IOException; > import java.io.InputStream; >@@ -32,8 +35,6 @@ > import java.util.List; > import java.util.Map; > >-import javax.xml.namespace.QName; >- > import org.apache.poi.POIXMLDocument; > import org.apache.poi.POIXMLDocumentPart; > import org.apache.poi.POIXMLException; >@@ -55,6 +56,7 @@ > import org.apache.poi.util.POILogFactory; > import org.apache.poi.util.POILogger; > import org.apache.poi.util.PackageHelper; >+import org.apache.poi.util.StAXHelper; > import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy; > import org.apache.xmlbeans.XmlCursor; > import org.apache.xmlbeans.XmlException; >@@ -161,7 +163,9 @@ > @Override > protected void onDocumentRead() throws IOException { > try { >- DocumentDocument doc = DocumentDocument.Factory.parse(getPackagePart().getInputStream(), DEFAULT_XML_OPTIONS); >+ XMLStreamReader reader = StAXHelper.newXMLStreamReader(getPackagePart().getInputStream()); >+ >+ DocumentDocument doc = DocumentDocument.Factory.parse(reader, DEFAULT_XML_OPTIONS); > ctDocument = doc.getDocument(); > > initFootnotes(); >@@ -238,6 +242,8 @@ > } > initHyperlinks(); > } catch (XmlException e) { >+ throw new POIXMLException(e); >+ } catch (XMLStreamException e) { > throw new POIXMLException(e); > } > } >Index: src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java >IDEA additional info: >Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP ><+>UTF-8 >=================================================================== >--- src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java (date 1456341576000) >+++ src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java (revision ) >@@ -409,7 +409,6 @@ > > public void testCheckboxes() throws IOException { > XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("checkboxes.docx"); >- System.out.println(doc); > XWPFWordExtractor extractor = new XWPFWordExtractor(doc); > > assertEquals("This is a small test for checkboxes \nunchecked: |_| \n" + >@@ -417,6 +416,15 @@ > "Test a checkbox within a textbox: |_| -> |X|\n\n\n" + > "In Table:\n|_|\t|X|\n\n\n" + > "In Sequence:\n|X||_||X|\n", extractor.getText()); >+ extractor.close(); >+ } >+ >+ public void testBug57031() throws Exception { >+ XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("57031.docx"); >+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc); >+ >+ // Check it gives text without error >+ extractor.getText(); > extractor.close(); > } > }
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 57031
:
32066
| 33591 |
33621