View | Details | Raw Unified | Return to bug 46610
Collapse All | Expand All

(-)src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java (-4 / +4 lines)
Lines 34-47 Link Here
34
public final class CHPX extends BytePropertyNode
34
public final class CHPX extends BytePropertyNode
35
{
35
{
36
36
37
  public CHPX(int fcStart, int fcEnd, byte[] grpprl, boolean isUnicode)
37
  public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] grpprl)
38
  {
38
  {
39
    super(fcStart, fcEnd, new SprmBuffer(grpprl), isUnicode);
39
    super(fcStart, fcEnd, translator, new SprmBuffer(grpprl));
40
  }
40
  }
41
41
42
  public CHPX(int fcStart, int fcEnd, SprmBuffer buf, boolean isUnicode)
42
  public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf)
43
  {
43
  {
44
    super(fcStart, fcEnd, buf, isUnicode);
44
    super(fcStart, fcEnd, translator ,buf);
45
  }
45
  }
46
46
47
47
(-)src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java (-13 / +37 lines)
Lines 37-43 Link Here
37
 *  convertion.
37
 *  convertion.
38
 * @author Ryan Ackley
38
 * @author Ryan Ackley
39
 */
39
 */
40
public final class TextPieceTable
40
public final class TextPieceTable implements CharIndexTranslator
41
{
41
{
42
  protected ArrayList _textPieces = new ArrayList();
42
  protected ArrayList _textPieces = new ArrayList();
43
  //int _multiple;
43
  //int _multiple;
Lines 150-180 Link Here
150
	  // If they ask off the end, just go with the last one...
150
	  // If they ask off the end, just go with the last one...
151
	  return lastWas;
151
	  return lastWas;
152
  }
152
  }
153
  /**
153
154
   * Is the text at the given byte offset
155
   *  unicode, or plain old ascii?
156
   * In a very evil fashion, you have to actually
157
   *  know this to make sense of character and
158
   *  paragraph properties :(
159
   * @param bytePos The character offset to check about
160
   */
161
  public boolean isUnicodeAtByteOffset(int bytePos) {
154
  public boolean isUnicodeAtByteOffset(int bytePos) {
162
	  boolean lastWas = false;
155
	  boolean lastWas = false;
163
	  int curByte = 0;
156
	 
164
157
165
	  Iterator it = _textPieces.iterator();
158
	  Iterator it = _textPieces.iterator();
166
	  while(it.hasNext()) {
159
	  while(it.hasNext()) {
167
		  TextPiece tp = (TextPiece)it.next();
160
		  TextPiece tp = (TextPiece)it.next();
168
		  int nextByte = curByte + tp.bytesLength();
161
		  int curByte = tp.getPieceDescriptor().getFilePosition();
162
		  int pieceEnd = curByte + tp.bytesLength();
169
163
170
		  // If the text piece covers the character, all good
164
		  // If the text piece covers the character, all good
171
		  if(curByte <= bytePos && nextByte >= bytePos) {
165
		  if(curByte <= bytePos && pieceEnd > bytePos) {
172
			  return tp.isUnicode();
166
			  return tp.isUnicode();
173
		  }
167
		  }
174
		  // Otherwise keep track for the last one
168
		  // Otherwise keep track for the last one
175
		  lastWas = tp.isUnicode();
169
		  lastWas = tp.isUnicode();
176
		  // Move along
170
		  // Move along
177
		  curByte = nextByte;
171
		  curByte = pieceEnd;
178
	  }
172
	  }
179
173
180
	  // If they ask off the end, just go with the last one...
174
	  // If they ask off the end, just go with the last one...
Lines 268-271 Link Here
268
    }
262
    }
269
    return false;
263
    return false;
270
  }
264
  }
265
  	/* (non-Javadoc)
266
	 * @see org.apache.poi.hwpf.model.CharIndexTranslator#getLengthInChars(int)
267
	 */
268
	public int getCharIndex(int bytePos) {
269
		int charCount = 0;
270
271
		Iterator it = _textPieces.iterator();
272
		while (it.hasNext()) {
273
			TextPiece tp = (TextPiece) it.next();
274
			int pieceStart = tp.getPieceDescriptor().getFilePosition();
275
			if(pieceStart >= bytePos) {
276
				break;
277
			}
278
			
279
			int bytesLength = tp.bytesLength();
280
			int pieceEnd = pieceStart + bytesLength;
281
282
			int toAdd = bytePos > pieceEnd ? bytesLength : bytesLength
283
					- (pieceEnd - bytePos);
284
285
			if (tp.isUnicode()) {
286
				charCount += toAdd / 2;
287
			} else {
288
				charCount += toAdd;
289
			}
290
		}
291
292
		return charCount;
293
	}
294
	
271
}
295
}
(-)src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java (-3 / +2 lines)
Lines 119-127 Link Here
119
119
120
  public void insert(int listIndex, int cpStart, SprmBuffer buf)
120
  public void insert(int listIndex, int cpStart, SprmBuffer buf)
121
  {
121
  {
122
	boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);
123
122
124
    CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode);
123
    CHPX insertChpx = new CHPX(0, 0, tpt,buf);
125
124
126
    // Ensure character offsets are really characters
125
    // Ensure character offsets are really characters
127
    insertChpx.setStart(cpStart);
126
    insertChpx.setStart(cpStart);
Lines 141-147 Link Here
141
    	//  Original, until insert at point
140
    	//  Original, until insert at point
142
    	//  New one
141
    	//  New one
143
    	//  Clone of original, on to the old end
142
    	//  Clone of original, on to the old end
144
        CHPX clone = new CHPX(0, 0, chpx.getSprmBuf(), needsToBeUnicode);
143
        CHPX clone = new CHPX(0, 0, tpt,chpx.getSprmBuf());
145
        // Again ensure contains character based offsets no matter what
144
        // Again ensure contains character based offsets no matter what
146
        clone.setStart(cpStart);
145
        clone.setStart(cpStart);
147
        clone.setEnd(chpx.getEnd());
146
        clone.setEnd(chpx.getEnd());
(-)src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java (-7 / +3 lines)
Lines 62-75 Link Here
62
    public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt)
62
    public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt)
63
    {
63
    {
64
      super(documentStream, offset);
64
      super(documentStream, offset);
65
66
      for (int x = 0; x < _crun; x++) {
65
      for (int x = 0; x < _crun; x++) {
67
         int startAt = getStart(x) - fcMin;
66
         int startAt = getStart(x);
68
         int endAt = getEnd(x) - fcMin;
67
         int endAt = getEnd(x);
69
    	 boolean isUnicode = tpt.isUnicodeAtByteOffset(startAt);
68
         _papxList.add(new PAPX(startAt, endAt, tpt, getGrpprl(x), getParagraphHeight(x), dataStream));
70
         //System.err.println(startAt + " -> " + endAt + " = " + isUnicode);
71
72
         _papxList.add(new PAPX(startAt, endAt, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode));
73
      }
69
      }
74
      _fkp = null;
70
      _fkp = null;
75
      _dataStream = dataStream;
71
      _dataStream = dataStream;
(-)src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java (-4 / +4 lines)
Lines 40-57 Link Here
40
  private ParagraphHeight _phe;
40
  private ParagraphHeight _phe;
41
  private int _hugeGrpprlOffset = -1;
41
  private int _hugeGrpprlOffset = -1;
42
42
43
  public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream, boolean isUnicode)
43
  public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] papx, ParagraphHeight phe, byte[] dataStream)
44
  {
44
  {
45
    super(fcStart, fcEnd, new SprmBuffer(papx), isUnicode);
45
    super(fcStart, fcEnd, translator, new SprmBuffer(papx));
46
    _phe = phe;
46
    _phe = phe;
47
    SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream);
47
    SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream);
48
    if(buf != null)
48
    if(buf != null)
49
      _buf = buf;
49
      _buf = buf;
50
  }
50
  }
51
51
52
  public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream, boolean isUnicode)
52
  public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf, byte[] dataStream)
53
  {
53
  {
54
    super(fcStart, fcEnd, buf, isUnicode);
54
    super(fcStart, fcEnd, translator, buf);
55
    _phe = new ParagraphHeight();
55
    _phe = new ParagraphHeight();
56
    buf = findHuge(buf, dataStream);
56
    buf = findHuge(buf, dataStream);
57
    if(buf != null)
57
    if(buf != null)
(-)src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java (-30 / +7 lines)
Lines 61-73 Link Here
61
      int startAt = CPtoFC(node.getStart());
61
      int startAt = CPtoFC(node.getStart());
62
      int endAt = CPtoFC(node.getEnd());
62
      int endAt = CPtoFC(node.getEnd());
63
63
64
      boolean isUnicodeAtStart = tpt.isUnicodeAtByteOffset( startAt );
65
//      System.err.println(startAt + " -> " + endAt + " = " + isUnicodeAtStart);
66
67
      // check for the optimization
64
      // check for the optimization
68
      if (fileOffset == 0xffffffff)
65
      if (fileOffset == 0xffffffff)
69
      {
66
      {
70
        _sections.add(new SEPX(sed, startAt, endAt, new byte[0], isUnicodeAtStart));
67
        _sections.add(new SEPX(sed, startAt, endAt, tpt, new byte[0]));
71
      }
68
      }
72
      else
69
      else
73
      {
70
      {
Lines 76-82 Link Here
76
        byte[] buf = new byte[sepxSize];
73
        byte[] buf = new byte[sepxSize];
77
        fileOffset += LittleEndian.SHORT_SIZE;
74
        fileOffset += LittleEndian.SHORT_SIZE;
78
        System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
75
        System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
79
        _sections.add(new SEPX(sed, startAt, endAt, buf, isUnicodeAtStart));
76
        _sections.add(new SEPX(sed, startAt, endAt, tpt, buf));
80
      }
77
      }
81
    }
78
    }
82
79
Lines 138-170 Link Here
138
      }
135
      }
139
      int FC = TP.getPieceDescriptor().getFilePosition();
136
      int FC = TP.getPieceDescriptor().getFilePosition();
140
      int offset = CP - TP.getCP();
137
      int offset = CP - TP.getCP();
141
      FC = FC+offset-((TextPiece)_text.get(0)).getPieceDescriptor().getFilePosition();
138
      if (TP.isUnicode()) {
139
        offset = offset*2;
140
      }
141
      FC = FC+offset;
142
      return FC;
142
      return FC;
143
    }
143
    }
144
144
145
    // Ryans code
146
    private int FCtoCP(int fc)
147
   {
148
     int size = _text.size();
149
     int cp = 0;
150
     for (int x = 0; x < size; x++)
151
     {
152
       TextPiece piece = (TextPiece)_text.get(x);
153
154
       if (fc <= piece.getEnd())
155
       {
156
         cp += (fc - piece.getStart());
157
         break;
158
       }
159
       else
160
       {
161
         cp += (piece.getEnd() - piece.getStart());
162
       }
163
     }
164
     return cp;
165
   }
166
167
168
  public ArrayList getSections()
145
  public ArrayList getSections()
169
  {
146
  {
170
    return _sections;
147
    return _sections;
Lines 205-211 Link Here
205
182
206
      // Line using Ryan's FCtoCP() conversion method -
183
      // Line using Ryan's FCtoCP() conversion method -
207
      // unable to observe any effect on our testcases when using this code - piers
184
      // unable to observe any effect on our testcases when using this code - piers
208
      GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStartBytes()), FCtoCP(sepx.getEndBytes()), sed.toByteArray());
185
      GenericPropertyNode property = new GenericPropertyNode(tpt.getCharIndex(sepx.getStartBytes()), tpt.getCharIndex(sepx.getEndBytes()), sed.toByteArray());
209
186
210
187
211
      plex.addProperty(property);
188
      plex.addProperty(property);
(-)src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java (-19 / +10 lines)
Lines 25-61 Link Here
25
 *  and characters.
25
 *  and characters.
26
 */
26
 */
27
public abstract class BytePropertyNode extends PropertyNode {
27
public abstract class BytePropertyNode extends PropertyNode {
28
	private boolean isUnicode;
28
        private final int startBytes;
29
        private final int endBytes;
29
30
30
	/**
31
	/**
31
	 * @param fcStart The start of the text for this property, in _bytes_
32
	 * @param fcStart The start of the text for this property, in _bytes_
32
	 * @param fcEnd The end of the text for this property, in _bytes_
33
	 * @param fcEnd The end of the text for this property, in _bytes_
33
	 */
34
	 */
34
	public BytePropertyNode(int fcStart, int fcEnd, Object buf, boolean isUnicode) {
35
	public BytePropertyNode(int fcStart, int fcEnd, CharIndexTranslator translator, Object buf) {
35
		super(
36
		super(
36
				generateCp(fcStart, isUnicode),
37
				translator.getCharIndex(fcStart),
37
				generateCp(fcEnd, isUnicode),
38
				translator.getCharIndex(fcEnd),
38
				buf
39
				buf
39
		);
40
		);
40
		this.isUnicode = isUnicode;
41
                this.startBytes = fcStart;
42
                this.endBytes = fcEnd;
41
	}
43
	}
42
	private static int generateCp(int val, boolean isUnicode) {
43
		if(isUnicode)
44
			return val/2;
45
		return val;
46
	}
47
44
48
	public boolean isUnicode() {
49
		return isUnicode;
50
	}
51
	public int getStartBytes() {
45
	public int getStartBytes() {
52
		if(isUnicode)
46
                return startBytes;
53
			return getStart()*2;
54
		return getStart();
55
	}
47
	}
48
56
	public int getEndBytes() {
49
	public int getEndBytes() {
57
		if(isUnicode)
50
                return endBytes;
58
			return getEnd()*2;
59
		return getEnd();
60
	}
51
	}
61
}
52
}
(-)src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java (-2 / +2 lines)
Lines 28-36 Link Here
28
28
29
  SectionDescriptor _sed;
29
  SectionDescriptor _sed;
30
30
31
  public SEPX(SectionDescriptor sed, int start, int end, byte[] grpprl, boolean isUnicode)
31
  public SEPX(SectionDescriptor sed, int start, int end, CharIndexTranslator translator, byte[] grpprl)
32
  {
32
  {
33
    super(start, end, SectionSprmUncompressor.uncompressSEP(grpprl, 0), isUnicode);
33
    super(start, end, translator, SectionSprmUncompressor.uncompressSEP(grpprl, 0));
34
    _sed = sed;
34
    _sed = sed;
35
  }
35
  }
36
36
(-)src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java (-3 / +2 lines)
Lines 76-84 Link Here
76
76
77
  public void insert(int listIndex, int cpStart, SprmBuffer buf)
77
  public void insert(int listIndex, int cpStart, SprmBuffer buf)
78
  {
78
  {
79
    boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);
80
79
81
    PAPX forInsert = new PAPX(0, 0, buf, _dataStream, needsToBeUnicode);
80
    PAPX forInsert = new PAPX(0, 0, tpt, buf, _dataStream);
82
81
83
    // Ensure character offsets are really characters
82
    // Ensure character offsets are really characters
84
    forInsert.setStart(cpStart);
83
    forInsert.setStart(cpStart);
Lines 108-114 Link Here
108
    	//  Original, until insert at point
107
    	//  Original, until insert at point
109
    	//  New one
108
    	//  New one
110
    	//  Clone of original, on to the old end
109
    	//  Clone of original, on to the old end
111
        PAPX clone = new PAPX(0, 0, clonedBuf, _dataStream, needsToBeUnicode);
110
        PAPX clone = new PAPX(0, 0, tpt, clonedBuf, _dataStream);
112
        // Again ensure contains character based offsets no matter what
111
        // Again ensure contains character based offsets no matter what
113
        clone.setStart(cpStart);
112
        clone.setStart(cpStart);
114
        clone.setEnd(currentPap.getEnd());
113
        clone.setEnd(currentPap.getEnd());
(-)src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java (-2 / +3 lines)
Lines 60-67 Link Here
60
60
61
      for (int x = 0; x < _crun; x++)
61
      for (int x = 0; x < _crun; x++)
62
      {
62
      {
63
    	boolean isUnicode = tpt.isUnicodeAtByteOffset( getStart(x) );
63
    	int startAt = getStart(x);
64
        _chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode));
64
		int endAt = getEnd(x);
65
		_chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
65
      }
66
      }
66
    }
67
    }
67
68

Return to bug 46610