View | Details | Raw Unified | Return to bug 20060
Collapse All | Expand All

(-)src/scratchpad/src/org/apache/poi/hdf/extractor/Utils.java (-2 / +6 lines)
Lines 56-64 Link Here
56
package org.apache.poi.hdf.extractor;
56
package org.apache.poi.hdf.extractor;
57
57
58
/**
58
/**
59
 * Comment me
59
 * Byte to other signed/unsigned type static conversions methods.
60
 *
60
 *
61
 * @author Ryan Ackley 
61
 * @author Ryan Ackley
62
 * @author Serge Huber
62
 */
63
 */
63
64
64
public class Utils
65
public class Utils
Lines 80-85 Link Here
80
    }
81
    }
81
    public static short convertBytesToShort(byte[] array, int offset)
82
    public static short convertBytesToShort(byte[] array, int offset)
82
    {
83
    {
84
        if (array == null) {
85
            return 0;
86
        }
83
        return convertBytesToShort(array[offset + 1], array[offset]);
87
        return convertBytesToShort(array[offset + 1], array[offset]);
84
    }
88
    }
85
    public static int convertBytesToInt(byte[] array, int offset)
89
    public static int convertBytesToInt(byte[] array, int offset)
(-)src/scratchpad/src/org/apache/poi/hdf/extractor/WordDocument.java (-6 / +47 lines)
Lines 76-81 Link Here
76
 * works for non-complex files
76
 * works for non-complex files
77
 *
77
 *
78
 * @author Ryan Ackley
78
 * @author Ryan Ackley
79
 * @author Serge Huber
79
 */
80
 */
80
81
81
public class WordDocument
82
public class WordDocument
Lines 183-188 Link Here
183
    int textStart = Utils.convertBytesToInt(_header, 0x18);
184
    int textStart = Utils.convertBytesToInt(_header, 0x18);
184
    int textEnd = Utils.convertBytesToInt(_header, 0x1c);
185
    int textEnd = Utils.convertBytesToInt(_header, 0x1c);
185
    ArrayList textPieces = findProperties(textStart, textEnd, _text.root);
186
    ArrayList textPieces = findProperties(textStart, textEnd, _text.root);
187
188
    if (textPieces.size() == 0) {
189
        // fall-back to full text extraction, hoping it won't be too messy...
190
        TextPiece fullText = new TextPiece(textStart, textEnd - textStart, false);
191
        textPieces.add(fullText);
192
    }
193
186
    int size = textPieces.size();
194
    int size = textPieces.size();
187
195
188
    for(int x = 0; x < size; x++)
196
    for(int x = 0; x < size; x++)
Lines 205-211 Link Here
205
      }
213
      }
206
      else
214
      else
207
      {
215
      {
208
	String sText = new String(_header, start, end-start);
216
	// String sText = new String(_header, start, end-start);
217
	String sText = new String(_header, start, end-start, "Cp1252");
209
	out.write(sText);
218
	out.write(sText);
210
      }
219
      }
211
    }
220
    }
Lines 221-227 Link Here
221
  {
230
  {
222
  	this(new FileInputStream(fileName));
231
  	this(new FileInputStream(fileName));
223
  }
232
  }
224
  
233
225
  public WordDocument(InputStream inputStream) throws IOException
234
  public WordDocument(InputStream inputStream) throws IOException
226
  {
235
  {
227
        //do Ole stuff
236
        //do Ole stuff
Lines 264-286 Link Here
264
273
265
      //I call it the header but its also the main document stream
274
      //I call it the header but its also the main document stream
266
      _header = new byte[headerProps.getSize()];
275
      _header = new byte[headerProps.getSize()];
267
      filesystem.createDocumentInputStream("WordDocument").read(_header);
276
      int bytesRead = filesystem.createDocumentInputStream("WordDocument").read(_header);
277
278
      int fibVersion = LittleEndian.getShort(_header, 0x2) & 0xFFFF;
279
      int productVersion = LittleEndian.getShort(_header, 0x4) & 0xFFFF;
268
280
269
      //Get the information we need from the header
281
      //Get the information we need from the header
270
      int info = LittleEndian.getShort(_header, 0xa);
282
      int info = LittleEndian.getShort(_header, 0xa);
271
283
272
      _fcMin = LittleEndian.getInt(_header, 0x18);
284
      _fcMin = LittleEndian.getInt(_header, 0x18);
285
      int _fcMax = LittleEndian.getInt(_header, 0x1C);
273
      _ccpText = LittleEndian.getInt(_header, 0x4c);
286
      _ccpText = LittleEndian.getInt(_header, 0x4c);
274
      _ccpFtn = LittleEndian.getInt(_header, 0x50);
287
      _ccpFtn = LittleEndian.getInt(_header, 0x50);
288
      int _ccpHdd = LittleEndian.getInt(_header, 0x54);
275
289
276
      int charPLC = LittleEndian.getInt(_header, 0xfa);
290
      int charPLC = LittleEndian.getInt(_header, 0xfa);
277
      int charPlcSize = LittleEndian.getInt(_header, 0xfe);
291
      int charPlcSize = LittleEndian.getInt(_header, 0xfe);
278
      int parPLC = LittleEndian.getInt(_header, 0x102);
292
      int parPLC = LittleEndian.getInt(_header, 0x102);
279
      int parPlcSize = LittleEndian.getInt(_header, 0x106);
293
      int parPlcSize = LittleEndian.getInt(_header, 0x106);
294
295
      int lcbClx = LittleEndian.getInt(_header, 0x1A6);
296
280
      boolean useTable1 = (info & 0x200) != 0;
297
      boolean useTable1 = (info & 0x200) != 0;
298
      boolean isComplex = (info & 0x4) != 0;
281
299
282
      //process the text and formatting properties
300
      if (isComplex) {
283
      processComplexFile(useTable1, charPLC, charPlcSize, parPLC, parPlcSize);
301
          // now let's verify the existence of a table stream
302
          String tablename=null;
303
          DocumentEntry tableEntry = null;
304
          if (useTable1) {
305
              tablename="1Table";
306
          } else {
307
              tablename="0Table";
308
          }
309
          try {
310
              tableEntry = (DocumentEntry) filesystem.getRoot().getEntry(
311
                  tablename);
312
          } catch (FileNotFoundException fnfe) {
313
              isComplex = false;
314
          }
315
      }
316
317
      if ((isComplex)) {
318
          //process the text and formatting properties
319
          processComplexFile(useTable1, charPLC, charPlcSize, parPLC,
320
                             parPlcSize);
321
      } else {
322
          TextPiece piece = new TextPiece(_fcMin, _fcMax - _fcMin, false);
323
          _text.add(piece);
324
      }
284
  }
325
  }
285
326
286
  /**
327
  /**
Lines 328-334 Link Here
328
      //parse out the text locations
369
      //parse out the text locations
329
      findText(tableStream, complexOffset);
370
      findText(tableStream, complexOffset);
330
      //parse out text formatting
371
      //parse out text formatting
331
      findFormatting(tableStream, charTable, charPlcSize, parTable, parPlcSize);
372
      // findFormatting(tableStream, charTable, charPlcSize, parTable, parPlcSize);
332
373
333
  }
374
  }
334
  /**
375
  /**
(-)src/scratchpad/src/org/apache/poi/hdf/extractor/data/ListTables.java (-4 / +5 lines)
Lines 63-69 Link Here
63
/**
63
/**
64
 * Comment me
64
 * Comment me
65
 *
65
 *
66
 * @author Ryan Ackley 
66
 * @author Ryan Ackley
67
 * @author Serge Huber
67
 */
68
 */
68
69
69
public class ListTables
70
public class ListTables
Lines 201-214 Link Here
201
    lvl._fWord6 = StyleSheet.getFlag(code & 0x40);
202
    lvl._fWord6 = StyleSheet.getFlag(code & 0x40);
202
    System.arraycopy(data, offset + 6, lvl._rgbxchNums, 0, 9);
203
    System.arraycopy(data, offset + 6, lvl._rgbxchNums, 0, 9);
203
    lvl._ixchFollow = data[offset + 15];
204
    lvl._ixchFollow = data[offset + 15];
204
    int chpxSize = data[offset + 24];
205
    int chpxSize = Utils.convertUnsignedByteToInt( data[offset + 24] );
205
    int papxSize = data[offset + 25];
206
    int papxSize = Utils.convertUnsignedByteToInt( data[offset + 25] );
206
    lvl._chpx = new byte[chpxSize];
207
    lvl._chpx = new byte[chpxSize];
207
    lvl._papx = new byte[papxSize];
208
    lvl._papx = new byte[papxSize];
208
    System.arraycopy(data, offset + 28, lvl._papx, 0, papxSize);
209
    System.arraycopy(data, offset + 28, lvl._papx, 0, papxSize);
209
    System.arraycopy(data, offset + 28 + papxSize, lvl._chpx, 0, chpxSize);
210
    System.arraycopy(data, offset + 28 + papxSize, lvl._chpx, 0, chpxSize);
210
    offset += 28 + papxSize + chpxSize;//modify offset
211
    offset += 28 + papxSize + chpxSize;//modify offset
211
    int xstSize = Utils.convertBytesToShort(data, offset);
212
    int xstSize = Utils.convertBytesToShort(data, offset) & 0xFFFF;
212
    lvl._xst = new char[xstSize];
213
    lvl._xst = new char[xstSize];
213
214
214
    offset += 2;
215
    offset += 2;

Return to bug 20060