Index: scratchpad/src/org/apache/poi/hdf/extractor/Utils.java =================================================================== RCS file: /home/cvspublic/jakarta-poi/src/scratchpad/src/org/apache/poi/hdf/extractor/Utils.java,v --- scratchpad/src/org/apache/poi/hdf/extractor/Utils.java 1.2 +++ scratchpad/src/org/apache/poi/hdf/extractor/Utils.java @@ -59,1 +59,1 @@ - * Comment me --- + * Byte to other signed/unsigned type static conversions methods. @@ -62,0 +62,2 @@ + * @author Serge Huber + * @author Guerin Thierry @@ -83,1 +85,9 @@ - return convertBytesToShort(array[offset + 1], array[offset]); --- + if (array == null) { + return 0; + } + if (offset + 1 < array.length) + return convertBytesToShort(array[offset + 1], array[offset]); + else if (offset < array.length) + return convertBytesToShort((byte) 0, array[offset]); + else + return (short) 0; Index: scratchpad/src/org/apache/poi/hdf/extractor/WordDocument.java =================================================================== RCS file: /home/cvspublic/jakarta-poi/src/scratchpad/src/org/apache/poi/hdf/extractor/WordDocument.java,v --- scratchpad/src/org/apache/poi/hdf/extractor/WordDocument.java 1.7 +++ scratchpad/src/org/apache/poi/hdf/extractor/WordDocument.java @@ -63,1 +63,0 @@ -import javax.swing.*; @@ -65,1 +64,0 @@ -import java.awt.*; @@ -68,1 +66,0 @@ -import org.apache.poi.poifs.filesystem.POIFSDocument; @@ -79,0 +76,2 @@ + * @author Serge Huber + * @author Guerin Thierry @@ -186,0 +185,7 @@ + + if (textPieces.size() == 0) { + // fall-back to full text extraction, hoping it won't be too messy... + TextPiece fullText = new TextPiece(textStart, textEnd - textStart, false); + textPieces.add(fullText); + } + @@ -208,1 +214,1 @@ - String sText = new String(_header, start, end-start); --- + String sText = new String(_header, start, end-start, "Cp1252"); @@ -267,1 +273,4 @@ - filesystem.createDocumentInputStream("WordDocument").read(_header); --- + int bytesRead = filesystem.createDocumentInputStream("WordDocument").read(_header); + + int fibVersion = LittleEndian.getShort(_header, 0x2) & 0xFFFF; + int productVersion = LittleEndian.getShort(_header, 0x4) & 0xFFFF; @@ -273,0 +282,1 @@ + int _fcMax = LittleEndian.getInt(_header, 0x1C); @@ -275,0 +285,2 @@ + int _ccpHdd = LittleEndian.getInt(_header, 0x54); + @@ -280,0 +292,3 @@ + + int lcbClx = LittleEndian.getInt(_header, 0x1A6); + @@ -281,0 +296,1 @@ + boolean isComplex = (info & 0x4) != 0; @@ -282,2 +298,25 @@ - //process the text and formatting properties - processComplexFile(useTable1, charPLC, charPlcSize, parPLC, parPlcSize); --- + if (isComplex) { + // now let's verify the existence of a table stream + String tablename=null; + DocumentEntry tableEntry = null; + if (useTable1) { + tablename="1Table"; + } else { + tablename="0Table"; + } + try { + tableEntry = (DocumentEntry) filesystem.getRoot().getEntry( + tablename); + } catch (FileNotFoundException fnfe) { + isComplex = false; + } + } + + if ((isComplex)) { + //process the text and formatting properties + processComplexFile(useTable1, charPLC, charPlcSize, parPLC, + parPlcSize); + } else { + TextPiece piece = new TextPiece(_fcMin, _fcMax - _fcMin, false); + _text.add(piece); + } @@ -1826,1 +1865,2 @@ - rowBuffer.append((String)cells.get(y)); --- + if (cells != null) + rowBuffer.append((String)cells.get(y)); Index: scratchpad/src/org/apache/poi/hdf/extractor/data/ListTables.java =================================================================== RCS file: /home/cvspublic/jakarta-poi/src/scratchpad/src/org/apache/poi/hdf/extractor/data/ListTables.java,v --- scratchpad/src/org/apache/poi/hdf/extractor/data/ListTables.java 1.2 +++ scratchpad/src/org/apache/poi/hdf/extractor/data/ListTables.java @@ -204,2 +204,3 @@ - int chpxSize = data[offset + 24]; - int papxSize = data[offset + 25]; --- + int chpxSize = Utils.convertUnsignedByteToInt( data[offset + 24] ); + int papxSize = Utils.convertUnsignedByteToInt( data[offset + 25] ); + @@ -211,1 +212,1 @@ - int xstSize = Utils.convertBytesToShort(data, offset); --- + int xstSize = Utils.convertBytesToShort(data, offset) & 0xFFFF