Attachment #23833 for bug #46610

View | Details | Raw Unified | Return to bug 46610
Collapse All | Expand All




public final class CHPX extends BytePropertyNode
{

  public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] grpprl)
  {
    super(fcStart, fcEnd, translator, new SprmBuffer(grpprl));
  }

  public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf)
  {
    super(fcStart, fcEnd, translator ,buf);
  }






 *  convertion.
 * @author Ryan Ackley
 */
public final class TextPieceTable implements CharIndexTranslator
{
  protected ArrayList _textPieces = new ArrayList();
  //int _multiple;

	  // If they ask off the end, just go with the last one...
	  return lastWas;
  }

   * Is the text at the given byte offset
   *  unicode, or plain old ascii?
   * In a very evil fashion, you have to actually
   *  know this to make sense of character and
   *  paragraph properties :(
   * @param bytePos The character offset to check about
   */
  public boolean isUnicodeAtByteOffset(int bytePos) {
	  boolean lastWas = false;
	 

	  Iterator it = _textPieces.iterator();
	  while(it.hasNext()) {
		  TextPiece tp = (TextPiece)it.next();
		  int curByte = tp.getPieceDescriptor().getFilePosition();
		  int pieceEnd = curByte + tp.bytesLength();

		  // If the text piece covers the character, all good
		  if(curByte <= bytePos && pieceEnd > bytePos) {
			  return tp.isUnicode();
		  }
		  // Otherwise keep track for the last one
		  lastWas = tp.isUnicode();
		  // Move along
		  curByte = pieceEnd;
	  }

	  // If they ask off the end, just go with the last one...

    }
    return false;
  }
  	/* (non-Javadoc)
	 * @see org.apache.poi.hwpf.model.CharIndexTranslator#getLengthInChars(int)
	 */
	public int getCharIndex(int bytePos) {
		int charCount = 0;

		Iterator it = _textPieces.iterator();
		while (it.hasNext()) {
			TextPiece tp = (TextPiece) it.next();
			int pieceStart = tp.getPieceDescriptor().getFilePosition();
			if(pieceStart >= bytePos) {
				break;
			}
			
			int bytesLength = tp.bytesLength();
			int pieceEnd = pieceStart + bytesLength;

			int toAdd = bytePos > pieceEnd ? bytesLength : bytesLength
					- (pieceEnd - bytePos);

			if (tp.isUnicode()) {
				charCount += toAdd / 2;
			} else {
				charCount += toAdd;
			}
		}

		return charCount;
	}
	
}





  public void insert(int listIndex, int cpStart, SprmBuffer buf)
  {
	boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);

    CHPX insertChpx = new CHPX(0, 0, tpt,buf);

    // Ensure character offsets are really characters
    insertChpx.setStart(cpStart);

    	//  Original, until insert at point
    	//  New one
    	//  Clone of original, on to the old end
        CHPX clone = new CHPX(0, 0, tpt,chpx.getSprmBuf());
        // Again ensure contains character based offsets no matter what
        clone.setStart(cpStart);
        clone.setEnd(chpx.getEnd());




    public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt)
    {
      super(documentStream, offset);

      for (int x = 0; x < _crun; x++) {
         int startAt = getStart(x);
         int endAt = getEnd(x);
         _papxList.add(new PAPX(startAt, endAt, tpt, getGrpprl(x), getParagraphHeight(x), dataStream));
         //System.err.println(startAt + " -> " + endAt + " = " + isUnicode);

         _papxList.add(new PAPX(startAt, endAt, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode));
      }
      _fkp = null;
      _dataStream = dataStream;




  private ParagraphHeight _phe;
  private int _hugeGrpprlOffset = -1;

  public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] papx, ParagraphHeight phe, byte[] dataStream)
  {
    super(fcStart, fcEnd, translator, new SprmBuffer(papx));
    _phe = phe;
    SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream);
    if(buf != null)
      _buf = buf;
  }

  public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf, byte[] dataStream)
  {
    super(fcStart, fcEnd, translator, buf);
    _phe = new ParagraphHeight();
    buf = findHuge(buf, dataStream);
    if(buf != null)




      int startAt = CPtoFC(node.getStart());
      int endAt = CPtoFC(node.getEnd());

      boolean isUnicodeAtStart = tpt.isUnicodeAtByteOffset( startAt );
//      System.err.println(startAt + " -> " + endAt + " = " + isUnicodeAtStart);

      // check for the optimization
      if (fileOffset == 0xffffffff)
      {
        _sections.add(new SEPX(sed, startAt, endAt, tpt, new byte[0]));
      }
      else
      {

        byte[] buf = new byte[sepxSize];
        fileOffset += LittleEndian.SHORT_SIZE;
        System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
        _sections.add(new SEPX(sed, startAt, endAt, tpt, buf));
      }
    }


      }
      int FC = TP.getPieceDescriptor().getFilePosition();
      int offset = CP - TP.getCP();
      if (TP.isUnicode()) {
        offset = offset*2;
      }
      FC = FC+offset;
      return FC;
    }

    // Ryans code
    private int FCtoCP(int fc)
   {
     int size = _text.size();
     int cp = 0;
     for (int x = 0; x < size; x++)
     {
       TextPiece piece = (TextPiece)_text.get(x);

       if (fc <= piece.getEnd())
       {
         cp += (fc - piece.getStart());
         break;
       }
       else
       {
         cp += (piece.getEnd() - piece.getStart());
       }
     }
     return cp;
   }


  public ArrayList getSections()
  {
    return _sections;


      // Line using Ryan's FCtoCP() conversion method -
      // unable to observe any effect on our testcases when using this code - piers
      GenericPropertyNode property = new GenericPropertyNode(tpt.getCharIndex(sepx.getStartBytes()), tpt.getCharIndex(sepx.getEndBytes()), sed.toByteArray());


      plex.addProperty(property);




 *  and characters.
 */
public abstract class BytePropertyNode extends PropertyNode {
        private final int startBytes;
        private final int endBytes;

	/**
	 * @param fcStart The start of the text for this property, in _bytes_
	 * @param fcEnd The end of the text for this property, in _bytes_
	 */
	public BytePropertyNode(int fcStart, int fcEnd, CharIndexTranslator translator, Object buf) {
		super(
				translator.getCharIndex(fcStart),
				translator.getCharIndex(fcEnd),
				buf
		);
                this.startBytes = fcStart;
                this.endBytes = fcEnd;
	}
	private static int generateCp(int val, boolean isUnicode) {
		if(isUnicode)
			return val/2;
		return val;
	}

	public boolean isUnicode() {
		return isUnicode;
	}
	public int getStartBytes() {
                return startBytes;
			return getStart()*2;
		return getStart();
	}

	public int getEndBytes() {
                return endBytes;
			return getEnd()*2;
		return getEnd();
	}
}

Lines 28-36 Link Here

(-)src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java (-2 / +2 lines)
28		28
29	SectionDescriptor _sed;	29	SectionDescriptor _sed;
30		30
31	public SEPX(SectionDescriptor sed, int start, int end, byte[] grpprl, boolean isUnicode)	31	public SEPX(SectionDescriptor sed, int start, int end, CharIndexTranslator translator, byte[] grpprl)
32	{	32	{
33	super(start, end, SectionSprmUncompressor.uncompressSEP(grpprl, 0), isUnicode);	33	super(start, end, translator, SectionSprmUncompressor.uncompressSEP(grpprl, 0));
34	_sed = sed;	34	_sed = sed;
35	}	35	}
36		36





  public void insert(int listIndex, int cpStart, SprmBuffer buf)
  {
    boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);

    PAPX forInsert = new PAPX(0, 0, tpt, buf, _dataStream);

    // Ensure character offsets are really characters
    forInsert.setStart(cpStart);

    	//  Original, until insert at point
    	//  New one
    	//  Clone of original, on to the old end
        PAPX clone = new PAPX(0, 0, tpt, clonedBuf, _dataStream);
        // Again ensure contains character based offsets no matter what
        clone.setStart(cpStart);
        clone.setEnd(currentPap.getEnd());

Lines 60-67 Link Here

(-)src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java (-2 / +3 lines)
60		60
61	for (int x = 0; x < _crun; x++)	61	for (int x = 0; x < _crun; x++)
62	{	62	{
63	boolean isUnicode = tpt.isUnicodeAtByteOffset( getStart(x) );	63	int startAt = getStart(x);
64	_chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode));	64	int endAt = getEnd(x);
		65	_chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
65	}	66	}
66	}	67	}
67		68

Return to bug 46610

Lines 34-47 Link Here

(-)src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java (-4 / +4 lines)
34	public final class CHPX extends BytePropertyNode	34	public final class CHPX extends BytePropertyNode
35	{	35	{
36		36
37	public CHPX(int fcStart, int fcEnd, byte[] grpprl, boolean isUnicode)	37	public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] grpprl)
38	{	38	{
39	super(fcStart, fcEnd, new SprmBuffer(grpprl), isUnicode);	39	super(fcStart, fcEnd, translator, new SprmBuffer(grpprl));
40	}	40	}
41		41
42	public CHPX(int fcStart, int fcEnd, SprmBuffer buf, boolean isUnicode)	42	public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf)
43	{	43	{
44	super(fcStart, fcEnd, buf, isUnicode);	44	super(fcStart, fcEnd, translator ,buf);
45	}	45	}
46		46
47		47

Lines 37-43 Link Here

(-)src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java (-13 / +37 lines)
37	* convertion.	37	* convertion.
38	* @author Ryan Ackley	38	* @author Ryan Ackley
39	*/	39	*/
40	public final class TextPieceTable	40	public final class TextPieceTable implements CharIndexTranslator
41	{	41	{
42	protected ArrayList _textPieces = new ArrayList();	42	protected ArrayList _textPieces = new ArrayList();
43	//int _multiple;	43	//int _multiple;
Lines 150-180 Link Here
150	// If they ask off the end, just go with the last one...	150	// If they ask off the end, just go with the last one...
151	return lastWas;	151	return lastWas;
152	}	152	}
153	/**	153
154	* Is the text at the given byte offset
155	* unicode, or plain old ascii?
156	* In a very evil fashion, you have to actually
157	* know this to make sense of character and
158	* paragraph properties :(
159	* @param bytePos The character offset to check about
160	*/
161	public boolean isUnicodeAtByteOffset(int bytePos) {	154	public boolean isUnicodeAtByteOffset(int bytePos) {
162	boolean lastWas = false;	155	boolean lastWas = false;
163	int curByte = 0;	156
164		157
165	Iterator it = _textPieces.iterator();	158	Iterator it = _textPieces.iterator();
166	while(it.hasNext()) {	159	while(it.hasNext()) {
167	TextPiece tp = (TextPiece)it.next();	160	TextPiece tp = (TextPiece)it.next();
168	int nextByte = curByte + tp.bytesLength();	161	int curByte = tp.getPieceDescriptor().getFilePosition();
		162	int pieceEnd = curByte + tp.bytesLength();
169		163
170	// If the text piece covers the character, all good	164	// If the text piece covers the character, all good
171	if(curByte <= bytePos && nextByte >= bytePos) {	165	if(curByte <= bytePos && pieceEnd > bytePos) {
172	return tp.isUnicode();	166	return tp.isUnicode();
173	}	167	}
174	// Otherwise keep track for the last one	168	// Otherwise keep track for the last one
175	lastWas = tp.isUnicode();	169	lastWas = tp.isUnicode();
176	// Move along	170	// Move along
177	curByte = nextByte;	171	curByte = pieceEnd;
178	}	172	}
179		173
180	// If they ask off the end, just go with the last one...	174	// If they ask off the end, just go with the last one...
Lines 268-271 Link Here
268	}	262	}
269	return false;	263	return false;
270	}	264	}
		265	/* (non-Javadoc)
		266	* @see org.apache.poi.hwpf.model.CharIndexTranslator#getLengthInChars(int)
		267	*/
		268	public int getCharIndex(int bytePos) {
		269	int charCount = 0;
		270
		271	Iterator it = _textPieces.iterator();
		272	while (it.hasNext()) {
		273	TextPiece tp = (TextPiece) it.next();
		274	int pieceStart = tp.getPieceDescriptor().getFilePosition();
		275	if(pieceStart >= bytePos) {
		276	break;
		277	}
		278
		279	int bytesLength = tp.bytesLength();
		280	int pieceEnd = pieceStart + bytesLength;
		281
		282	int toAdd = bytePos > pieceEnd ? bytesLength : bytesLength
		283	- (pieceEnd - bytePos);
		284
		285	if (tp.isUnicode()) {
		286	charCount += toAdd / 2;
		287	} else {
		288	charCount += toAdd;
		289	}
		290	}
		291
		292	return charCount;
		293	}
		294
271	}	295	}

Lines 119-127 Link Here

(-)src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java (-3 / +2 lines)
119		119
120	public void insert(int listIndex, int cpStart, SprmBuffer buf)	120	public void insert(int listIndex, int cpStart, SprmBuffer buf)
121	{	121	{
122	boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);
123		122
124	CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode);	123	CHPX insertChpx = new CHPX(0, 0, tpt,buf);
125		124
126	// Ensure character offsets are really characters	125	// Ensure character offsets are really characters
127	insertChpx.setStart(cpStart);	126	insertChpx.setStart(cpStart);
Lines 141-147 Link Here
141	// Original, until insert at point	140	// Original, until insert at point
142	// New one	141	// New one
143	// Clone of original, on to the old end	142	// Clone of original, on to the old end
144	CHPX clone = new CHPX(0, 0, chpx.getSprmBuf(), needsToBeUnicode);	143	CHPX clone = new CHPX(0, 0, tpt,chpx.getSprmBuf());
145	// Again ensure contains character based offsets no matter what	144	// Again ensure contains character based offsets no matter what
146	clone.setStart(cpStart);	145	clone.setStart(cpStart);
147	clone.setEnd(chpx.getEnd());	146	clone.setEnd(chpx.getEnd());

Lines 40-57 Link Here

(-)src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java (-4 / +4 lines)
40	private ParagraphHeight _phe;	40	private ParagraphHeight _phe;
41	private int _hugeGrpprlOffset = -1;	41	private int _hugeGrpprlOffset = -1;
42		42
43	public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream, boolean isUnicode)	43	public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] papx, ParagraphHeight phe, byte[] dataStream)
44	{	44	{
45	super(fcStart, fcEnd, new SprmBuffer(papx), isUnicode);	45	super(fcStart, fcEnd, translator, new SprmBuffer(papx));
46	_phe = phe;	46	_phe = phe;
47	SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream);	47	SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream);
48	if(buf != null)	48	if(buf != null)
49	_buf = buf;	49	_buf = buf;
50	}	50	}
51		51
52	public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream, boolean isUnicode)	52	public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf, byte[] dataStream)
53	{	53	{
54	super(fcStart, fcEnd, buf, isUnicode);	54	super(fcStart, fcEnd, translator, buf);
55	_phe = new ParagraphHeight();	55	_phe = new ParagraphHeight();
56	buf = findHuge(buf, dataStream);	56	buf = findHuge(buf, dataStream);
57	if(buf != null)	57	if(buf != null)

Lines 61-73 Link Here

(-)src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java (-30 / +7 lines)
61	int startAt = CPtoFC(node.getStart());	61	int startAt = CPtoFC(node.getStart());
62	int endAt = CPtoFC(node.getEnd());	62	int endAt = CPtoFC(node.getEnd());
63		63
64	boolean isUnicodeAtStart = tpt.isUnicodeAtByteOffset( startAt );
65	// System.err.println(startAt + " -> " + endAt + " = " + isUnicodeAtStart);
66
67	// check for the optimization	64	// check for the optimization
68	if (fileOffset == 0xffffffff)	65	if (fileOffset == 0xffffffff)
69	{	66	{
70	_sections.add(new SEPX(sed, startAt, endAt, new byte[0], isUnicodeAtStart));	67	_sections.add(new SEPX(sed, startAt, endAt, tpt, new byte[0]));
71	}	68	}
72	else	69	else
73	{	70	{
Lines 76-82 Link Here
76	byte[] buf = new byte[sepxSize];	73	byte[] buf = new byte[sepxSize];
77	fileOffset += LittleEndian.SHORT_SIZE;	74	fileOffset += LittleEndian.SHORT_SIZE;
78	System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);	75	System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
79	_sections.add(new SEPX(sed, startAt, endAt, buf, isUnicodeAtStart));	76	_sections.add(new SEPX(sed, startAt, endAt, tpt, buf));
80	}	77	}
81	}	78	}
82		79
Lines 138-170 Link Here
138	}	135	}
139	int FC = TP.getPieceDescriptor().getFilePosition();	136	int FC = TP.getPieceDescriptor().getFilePosition();
140	int offset = CP - TP.getCP();	137	int offset = CP - TP.getCP();
141	FC = FC+offset-((TextPiece)_text.get(0)).getPieceDescriptor().getFilePosition();	138	if (TP.isUnicode()) {
		139	offset = offset*2;
		140	}
		141	FC = FC+offset;
142	return FC;	142	return FC;
143	}	143	}
144		144
145	// Ryans code
146	private int FCtoCP(int fc)
147	{
148	int size = _text.size();
149	int cp = 0;
150	for (int x = 0; x < size; x++)
151	{
152	TextPiece piece = (TextPiece)_text.get(x);
153
154	if (fc <= piece.getEnd())
155	{
156	cp += (fc - piece.getStart());
157	break;
158	}
159	else
160	{
161	cp += (piece.getEnd() - piece.getStart());
162	}
163	}
164	return cp;
165	}
166
167
168	public ArrayList getSections()	145	public ArrayList getSections()
169	{	146	{
170	return _sections;	147	return _sections;
Lines 205-211 Link Here
205		182
206	// Line using Ryan's FCtoCP() conversion method -	183	// Line using Ryan's FCtoCP() conversion method -
207	// unable to observe any effect on our testcases when using this code - piers	184	// unable to observe any effect on our testcases when using this code - piers
208	GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStartBytes()), FCtoCP(sepx.getEndBytes()), sed.toByteArray());	185	GenericPropertyNode property = new GenericPropertyNode(tpt.getCharIndex(sepx.getStartBytes()), tpt.getCharIndex(sepx.getEndBytes()), sed.toByteArray());
209		186
210		187
211	plex.addProperty(property);	188	plex.addProperty(property);

Lines 62-75 Link Here

(-)src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java (-7 / +3 lines)
62	public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt)	62	public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt)
63	{	63	{
64	super(documentStream, offset);	64	super(documentStream, offset);
65
66	for (int x = 0; x < _crun; x++) {	65	for (int x = 0; x < _crun; x++) {
67	int startAt = getStart(x) - fcMin;	66	int startAt = getStart(x);
68	int endAt = getEnd(x) - fcMin;	67	int endAt = getEnd(x);
69	boolean isUnicode = tpt.isUnicodeAtByteOffset(startAt);	68	_papxList.add(new PAPX(startAt, endAt, tpt, getGrpprl(x), getParagraphHeight(x), dataStream));
70	//System.err.println(startAt + " -> " + endAt + " = " + isUnicode);
71
72	_papxList.add(new PAPX(startAt, endAt, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode));
73	}	69	}
74	_fkp = null;	70	_fkp = null;
75	_dataStream = dataStream;	71	_dataStream = dataStream;

Lines 25-61 Link Here

(-)src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java (-19 / +10 lines)
25	* and characters.	25	* and characters.
26	*/	26	*/
27	public abstract class BytePropertyNode extends PropertyNode {	27	public abstract class BytePropertyNode extends PropertyNode {
28	private boolean isUnicode;	28	private final int startBytes;
		29	private final int endBytes;
29		30
30	/**	31	/**
31	* @param fcStart The start of the text for this property, in _bytes_	32	* @param fcStart The start of the text for this property, in _bytes_
32	* @param fcEnd The end of the text for this property, in _bytes_	33	* @param fcEnd The end of the text for this property, in _bytes_
33	*/	34	*/
34	public BytePropertyNode(int fcStart, int fcEnd, Object buf, boolean isUnicode) {	35	public BytePropertyNode(int fcStart, int fcEnd, CharIndexTranslator translator, Object buf) {
35	super(	36	super(
36	generateCp(fcStart, isUnicode),	37	translator.getCharIndex(fcStart),
37	generateCp(fcEnd, isUnicode),	38	translator.getCharIndex(fcEnd),
38	buf	39	buf
39	);	40	);
40	this.isUnicode = isUnicode;	41	this.startBytes = fcStart;
		42	this.endBytes = fcEnd;
41	}	43	}
42	private static int generateCp(int val, boolean isUnicode) {
43	if(isUnicode)
44	return val/2;
45	return val;
46	}
47		44
48	public boolean isUnicode() {
49	return isUnicode;
50	}
51	public int getStartBytes() {	45	public int getStartBytes() {
52	if(isUnicode)	46	return startBytes;
53	return getStart()*2;
54	return getStart();
55	}	47	}
		48
56	public int getEndBytes() {	49	public int getEndBytes() {
57	if(isUnicode)	50	return endBytes;
58	return getEnd()*2;
59	return getEnd();
60	}	51	}
61	}	52	}