Attachment #6423 for bug #20060

View | Details | Raw Unified | Return to bug 20060
Collapse All | Expand All

Lines 56-64 Link Here

(-)src/scratchpad/src/org/apache/poi/hdf/extractor/Utils.java (-2 / +6 lines)
56	package org.apache.poi.hdf.extractor;	56	package org.apache.poi.hdf.extractor;
57		57
58	/**	58	/**
59	* Comment me	59	* Byte to other signed/unsigned type static conversions methods.
60	*	60	*
61	* @author Ryan Ackley	61	* @author Ryan Ackley
		62	* @author Serge Huber
62	*/	63	*/
63		64
64	public class Utils	65	public class Utils
Lines 80-85 Link Here
80	}	81	}
81	public static short convertBytesToShort(byte[] array, int offset)	82	public static short convertBytesToShort(byte[] array, int offset)
82	{	83	{
		84	if (array == null) {
		85	return 0;
		86	}
83	return convertBytesToShort(array[offset + 1], array[offset]);	87	return convertBytesToShort(array[offset + 1], array[offset]);
84	}	88	}
85	public static int convertBytesToInt(byte[] array, int offset)	89	public static int convertBytesToInt(byte[] array, int offset)

Lines 76-81 Link Here

(-)src/scratchpad/src/org/apache/poi/hdf/extractor/WordDocument.java (-6 / +47 lines)
76	* works for non-complex files	76	* works for non-complex files
77	*	77	*
78	* @author Ryan Ackley	78	* @author Ryan Ackley
		79	* @author Serge Huber
79	*/	80	*/
80		81
81	public class WordDocument	82	public class WordDocument
Lines 183-188 Link Here
183	int textStart = Utils.convertBytesToInt(_header, 0x18);	184	int textStart = Utils.convertBytesToInt(_header, 0x18);
184	int textEnd = Utils.convertBytesToInt(_header, 0x1c);	185	int textEnd = Utils.convertBytesToInt(_header, 0x1c);
185	ArrayList textPieces = findProperties(textStart, textEnd, _text.root);	186	ArrayList textPieces = findProperties(textStart, textEnd, _text.root);
		187
		188	if (textPieces.size() == 0) {
		189	// fall-back to full text extraction, hoping it won't be too messy...
		190	TextPiece fullText = new TextPiece(textStart, textEnd - textStart, false);
		191	textPieces.add(fullText);
		192	}
		193
186	int size = textPieces.size();	194	int size = textPieces.size();
187		195
188	for(int x = 0; x < size; x++)	196	for(int x = 0; x < size; x++)
Lines 205-211 Link Here
205	}	213	}
206	else	214	else
207	{	215	{
208	String sText = new String(_header, start, end-start);	216	// String sText = new String(_header, start, end-start);
		217	String sText = new String(_header, start, end-start, "Cp1252");
209	out.write(sText);	218	out.write(sText);
210	}	219	}
211	}	220	}
Lines 221-227 Link Here
221	{	230	{
222	this(new FileInputStream(fileName));	231	this(new FileInputStream(fileName));
223	}	232	}
224		233
225	public WordDocument(InputStream inputStream) throws IOException	234	public WordDocument(InputStream inputStream) throws IOException
226	{	235	{
227	//do Ole stuff	236	//do Ole stuff
Lines 264-286 Link Here
264		273
265	//I call it the header but its also the main document stream	274	//I call it the header but its also the main document stream
266	_header = new byte[headerProps.getSize()];	275	_header = new byte[headerProps.getSize()];
267	filesystem.createDocumentInputStream("WordDocument").read(_header);	276	int bytesRead = filesystem.createDocumentInputStream("WordDocument").read(_header);
		277
		278	int fibVersion = LittleEndian.getShort(_header, 0x2) & 0xFFFF;
		279	int productVersion = LittleEndian.getShort(_header, 0x4) & 0xFFFF;
268		280
269	//Get the information we need from the header	281	//Get the information we need from the header
270	int info = LittleEndian.getShort(_header, 0xa);	282	int info = LittleEndian.getShort(_header, 0xa);
271		283
272	_fcMin = LittleEndian.getInt(_header, 0x18);	284	_fcMin = LittleEndian.getInt(_header, 0x18);
		285	int _fcMax = LittleEndian.getInt(_header, 0x1C);
273	_ccpText = LittleEndian.getInt(_header, 0x4c);	286	_ccpText = LittleEndian.getInt(_header, 0x4c);
274	_ccpFtn = LittleEndian.getInt(_header, 0x50);	287	_ccpFtn = LittleEndian.getInt(_header, 0x50);
		288	int _ccpHdd = LittleEndian.getInt(_header, 0x54);
275		289
276	int charPLC = LittleEndian.getInt(_header, 0xfa);	290	int charPLC = LittleEndian.getInt(_header, 0xfa);
277	int charPlcSize = LittleEndian.getInt(_header, 0xfe);	291	int charPlcSize = LittleEndian.getInt(_header, 0xfe);
278	int parPLC = LittleEndian.getInt(_header, 0x102);	292	int parPLC = LittleEndian.getInt(_header, 0x102);
279	int parPlcSize = LittleEndian.getInt(_header, 0x106);	293	int parPlcSize = LittleEndian.getInt(_header, 0x106);
		294
		295	int lcbClx = LittleEndian.getInt(_header, 0x1A6);
		296
280	boolean useTable1 = (info & 0x200) != 0;	297	boolean useTable1 = (info & 0x200) != 0;
		298	boolean isComplex = (info & 0x4) != 0;
281		299
282	//process the text and formatting properties	300	if (isComplex) {
283	processComplexFile(useTable1, charPLC, charPlcSize, parPLC, parPlcSize);	301	// now let's verify the existence of a table stream
		302	String tablename=null;
		303	DocumentEntry tableEntry = null;
		304	if (useTable1) {
		305	tablename="1Table";
		306	} else {
		307	tablename="0Table";
		308	}
		309	try {
		310	tableEntry = (DocumentEntry) filesystem.getRoot().getEntry(
		311	tablename);
		312	} catch (FileNotFoundException fnfe) {
		313	isComplex = false;
		314	}
		315	}
		316
		317	if ((isComplex)) {
		318	//process the text and formatting properties
		319	processComplexFile(useTable1, charPLC, charPlcSize, parPLC,
		320	parPlcSize);
		321	} else {
		322	TextPiece piece = new TextPiece(_fcMin, _fcMax - _fcMin, false);
		323	_text.add(piece);
		324	}
284	}	325	}
285		326
286	/**	327	/**
Lines 328-334 Link Here
328	//parse out the text locations	369	//parse out the text locations
329	findText(tableStream, complexOffset);	370	findText(tableStream, complexOffset);
330	//parse out text formatting	371	//parse out text formatting
331	findFormatting(tableStream, charTable, charPlcSize, parTable, parPlcSize);	372	// findFormatting(tableStream, charTable, charPlcSize, parTable, parPlcSize);
332		373
333	}	374	}
334	/**	375	/**




/**
 * Comment me
 *
 * @author Ryan Ackley
 * @author Serge Huber
 */

public class ListTables

    lvl._fWord6 = StyleSheet.getFlag(code & 0x40);
    System.arraycopy(data, offset + 6, lvl._rgbxchNums, 0, 9);
    lvl._ixchFollow = data[offset + 15];
    int chpxSize = Utils.convertUnsignedByteToInt( data[offset + 24] );
    int papxSize = Utils.convertUnsignedByteToInt( data[offset + 25] );
    lvl._chpx = new byte[chpxSize];
    lvl._papx = new byte[papxSize];
    System.arraycopy(data, offset + 28, lvl._papx, 0, papxSize);
    System.arraycopy(data, offset + 28 + papxSize, lvl._chpx, 0, chpxSize);
    offset += 28 + papxSize + chpxSize;//modify offset
    int xstSize = Utils.convertBytesToShort(data, offset) & 0xFFFF;
    lvl._xst = new char[xstSize];

    offset += 2;

Return to bug 20060