Index: src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java =================================================================== --- src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java (revision 1147857) +++ src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java (working copy) @@ -23,6 +23,7 @@ import java.util.Collections; import java.util.LinkedList; import java.util.List; +import java.util.Locale; import org.apache.poi.hwpf.model.io.HWPFFileSystem; import org.apache.poi.hwpf.model.io.HWPFOutputStream; @@ -147,6 +148,23 @@ } } + if (!bypassRebuildDocumentParagraphsStructure()) { + rebuildDocumentParagraphsStructure(dataStream, tpt); + } + + _dataStream = dataStream; + } + + private boolean bypassRebuildDocumentParagraphsStructure() { + String property = System.getProperty("POI.PAPBinTable.bypass.paragraphs.rebuild"); + if (property != null && property.toUpperCase(Locale.US).equals("TRUE")) { + return true; + } else { + return false; + } + } + + private void rebuildDocumentParagraphsStructure(byte[] dataStream, TextPieceTable tpt) throws Error { // rebuild document paragraphs structure StringBuilder docText = new StringBuilder(); for ( TextPiece textPiece : tpt.getTextPieces() ) @@ -250,8 +268,6 @@ continue; } this._paragraphs = new ArrayList( newPapxs ); - - _dataStream = dataStream; } public void insert(int listIndex, int cpStart, SprmBuffer buf) Index: src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java =================================================================== --- src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java (revision 1147857) +++ src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToConverterSuite.java (working copy) @@ -42,6 +42,9 @@ */ private static List failingFiles = Arrays .asList( "ProblemExtracting.doc" ); + + private static List largeFiles = Arrays + .asList( "007488.doc" ); public static Test suite() { @@ -81,35 +84,41 @@ protected static void test( File child, boolean html ) throws Exception { - HWPFDocumentCore hwpfDocument; - try - { - hwpfDocument = AbstractWordUtils.loadDoc( child ); - } - catch ( Exception exc ) - { - // unable to parse file -- not WordToFoConverter fault - return; + try { + if (largeFiles.contains(child.getName())) { + System.setProperty("POI.PAPBinTable.bypass.paragraphs.rebuild", "true"); + } + HWPFDocumentCore hwpfDocument; + try + { + hwpfDocument = AbstractWordUtils.loadDoc( child ); + } + catch ( Exception exc ) + { + // unable to parse file -- not WordToFoConverter fault + return; + } + + WordToFoConverter wordToFoConverter = new WordToFoConverter( + DocumentBuilderFactory.newInstance().newDocumentBuilder() + .newDocument() ); + wordToFoConverter.processDocument( hwpfDocument ); + + StringWriter stringWriter = new StringWriter(); + + Transformer transformer = TransformerFactory.newInstance() + .newTransformer(); + transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" ); + transformer.setOutputProperty( OutputKeys.INDENT, "yes" ); + transformer.transform( + new DOMSource( wordToFoConverter.getDocument() ), + new StreamResult( stringWriter ) ); + + if ( html ) + transformer.setOutputProperty( OutputKeys.METHOD, "html" ); + } finally { + System.setProperty("POI.PAPBinTable.bypass.paragraphs.rebuild", ""); } - - WordToFoConverter wordToFoConverter = new WordToFoConverter( - DocumentBuilderFactory.newInstance().newDocumentBuilder() - .newDocument() ); - wordToFoConverter.processDocument( hwpfDocument ); - - StringWriter stringWriter = new StringWriter(); - - Transformer transformer = TransformerFactory.newInstance() - .newTransformer(); - transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" ); - transformer.setOutputProperty( OutputKeys.INDENT, "yes" ); - transformer.transform( - new DOMSource( wordToFoConverter.getDocument() ), - new StreamResult( stringWriter ) ); - - if ( html ) - transformer.setOutputProperty( OutputKeys.METHOD, "html" ); - // no exceptions } } Index: src/scratchpad/testcases/org/apache/poi/hwpf/TestGovdocs007488.java =================================================================== --- src/scratchpad/testcases/org/apache/poi/hwpf/TestGovdocs007488.java (revision 0) +++ src/scratchpad/testcases/org/apache/poi/hwpf/TestGovdocs007488.java (revision 0) @@ -0,0 +1,30 @@ +package org.apache.poi.hwpf; + +import java.io.IOException; +import java.io.InputStream; + +import junit.framework.TestCase; + +import org.apache.poi.POIDataSamples; +import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; + +public class TestGovdocs007488 extends TestCase { + + @Override + public void setUp() { + System.setProperty("POI.PAPBinTable.bypass.paragraphs.rebuild", "true"); + } + + @Override + public void tearDown() { + System.setProperty("POI.PAPBinTable.bypass.paragraphs.rebuild", ""); + } + + public void test007488() throws IOException { + + InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("007488.doc"); + NPOIFSFileSystem fs = new NPOIFSFileSystem(is); + HWPFDocument d = new HWPFDocument(fs.getRoot()); + } + +}