Attachment 23833 Details for Bug 46610 – Unicode patch v.2

[patch] Unicode patch v.2

unicode.patch (text/plain), 13.23 KB, created by Maxim Valyanskiy on 2009-06-19 04:58:24 UTC

(hide)

Description:

Filename:

MIME Type:

Creator: Maxim Valyanskiy

Created: 2009-06-19 04:58:24 UTC

Size: 13.23 KB

patch

obsolete

>Index: src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java
>===================================================================
>--- src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java	(revision 786463)
>+++ src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java	(working copy)
>@@ -34,14 +34,14 @@
> public final class CHPX extends BytePropertyNode
> {
> 
>-  public CHPX(int fcStart, int fcEnd, byte[] grpprl, boolean isUnicode)
>+  public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] grpprl)
>   {
>-    super(fcStart, fcEnd, new SprmBuffer(grpprl), isUnicode);
>+    super(fcStart, fcEnd, translator, new SprmBuffer(grpprl));
>   }
> 
>-  public CHPX(int fcStart, int fcEnd, SprmBuffer buf, boolean isUnicode)
>+  public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf)
>   {
>-    super(fcStart, fcEnd, buf, isUnicode);
>+    super(fcStart, fcEnd, translator ,buf);
>   }
> 
> 
>Index: src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
>===================================================================
>--- src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java	(revision 786463)
>+++ src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java	(working copy)
>@@ -37,7 +37,7 @@
>  *  convertion.
>  * @author Ryan Ackley
>  */
>-public final class TextPieceTable
>+public final class TextPieceTable implements CharIndexTranslator
> {
>   protected ArrayList _textPieces = new ArrayList();
>   //int _multiple;
>@@ -150,31 +150,25 @@
> 	  // If they ask off the end, just go with the last one...
> 	  return lastWas;
>   }
>-  /**
>-   * Is the text at the given byte offset
>-   *  unicode, or plain old ascii?
>-   * In a very evil fashion, you have to actually
>-   *  know this to make sense of character and
>-   *  paragraph properties :(
>-   * @param bytePos The character offset to check about
>-   */
>+
>   public boolean isUnicodeAtByteOffset(int bytePos) {
> 	  boolean lastWas = false;
>-	  int curByte = 0;
>+	 
> 
> 	  Iterator it = _textPieces.iterator();
> 	  while(it.hasNext()) {
> 		  TextPiece tp = (TextPiece)it.next();
>-		  int nextByte = curByte + tp.bytesLength();
>+		  int curByte = tp.getPieceDescriptor().getFilePosition();
>+		  int pieceEnd = curByte + tp.bytesLength();
> 
> 		  // If the text piece covers the character, all good
>-		  if(curByte <= bytePos && nextByte >= bytePos) {
>+		  if(curByte <= bytePos && pieceEnd > bytePos) {
> 			  return tp.isUnicode();
> 		  }
> 		  // Otherwise keep track for the last one
> 		  lastWas = tp.isUnicode();
> 		  // Move along
>-		  curByte = nextByte;
>+		  curByte = pieceEnd;
> 	  }
> 
> 	  // If they ask off the end, just go with the last one...
>@@ -268,4 +262,34 @@
>     }
>     return false;
>   }
>+  	/* (non-Javadoc)
>+	 * @see org.apache.poi.hwpf.model.CharIndexTranslator#getLengthInChars(int)
>+	 */
>+	public int getCharIndex(int bytePos) {
>+		int charCount = 0;
>+
>+		Iterator it = _textPieces.iterator();
>+		while (it.hasNext()) {
>+			TextPiece tp = (TextPiece) it.next();
>+			int pieceStart = tp.getPieceDescriptor().getFilePosition();
>+			if(pieceStart >= bytePos) {
>+				break;
>+			}
>+			
>+			int bytesLength = tp.bytesLength();
>+			int pieceEnd = pieceStart + bytesLength;
>+
>+			int toAdd = bytePos > pieceEnd ? bytesLength : bytesLength
>+					- (pieceEnd - bytePos);
>+
>+			if (tp.isUnicode()) {
>+				charCount += toAdd / 2;
>+			} else {
>+				charCount += toAdd;
>+			}
>+		}
>+
>+		return charCount;
>+	}
>+	
> }
>Index: src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java
>===================================================================
>--- src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java	(revision 786463)
>+++ src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java	(working copy)
>@@ -119,9 +119,8 @@
> 
>   public void insert(int listIndex, int cpStart, SprmBuffer buf)
>   {
>-	boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);
> 
>-    CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode);
>+    CHPX insertChpx = new CHPX(0, 0, tpt,buf);
> 
>     // Ensure character offsets are really characters
>     insertChpx.setStart(cpStart);
>@@ -141,7 +140,7 @@
>     	//  Original, until insert at point
>     	//  New one
>     	//  Clone of original, on to the old end
>-        CHPX clone = new CHPX(0, 0, chpx.getSprmBuf(), needsToBeUnicode);
>+        CHPX clone = new CHPX(0, 0, tpt,chpx.getSprmBuf());
>         // Again ensure contains character based offsets no matter what
>         clone.setStart(cpStart);
>         clone.setEnd(chpx.getEnd());
>Index: src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java
>===================================================================
>--- src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java	(revision 786463)
>+++ src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java	(working copy)
>@@ -62,14 +62,10 @@
>     public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt)
>     {
>       super(documentStream, offset);
>-
>       for (int x = 0; x < _crun; x++) {
>-         int startAt = getStart(x) - fcMin;
>-         int endAt = getEnd(x) - fcMin;
>-    	 boolean isUnicode = tpt.isUnicodeAtByteOffset(startAt);
>-         //System.err.println(startAt + " -> " + endAt + " = " + isUnicode);
>-
>-         _papxList.add(new PAPX(startAt, endAt, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode));
>+         int startAt = getStart(x);
>+         int endAt = getEnd(x);
>+         _papxList.add(new PAPX(startAt, endAt, tpt, getGrpprl(x), getParagraphHeight(x), dataStream));
>       }
>       _fkp = null;
>       _dataStream = dataStream;
>Index: src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java
>===================================================================
>--- src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java	(revision 786463)
>+++ src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java	(working copy)
>@@ -40,18 +40,18 @@
>   private ParagraphHeight _phe;
>   private int _hugeGrpprlOffset = -1;
> 
>-  public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream, boolean isUnicode)
>+  public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] papx, ParagraphHeight phe, byte[] dataStream)
>   {
>-    super(fcStart, fcEnd, new SprmBuffer(papx), isUnicode);
>+    super(fcStart, fcEnd, translator, new SprmBuffer(papx));
>     _phe = phe;
>     SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream);
>     if(buf != null)
>       _buf = buf;
>   }
> 
>-  public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream, boolean isUnicode)
>+  public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf, byte[] dataStream)
>   {
>-    super(fcStart, fcEnd, buf, isUnicode);
>+    super(fcStart, fcEnd, translator, buf);
>     _phe = new ParagraphHeight();
>     buf = findHuge(buf, dataStream);
>     if(buf != null)
>Index: src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java
>===================================================================
>--- src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java	(revision 786463)
>+++ src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java	(working copy)
>@@ -61,13 +61,10 @@
>       int startAt = CPtoFC(node.getStart());
>       int endAt = CPtoFC(node.getEnd());
> 
>-      boolean isUnicodeAtStart = tpt.isUnicodeAtByteOffset( startAt );
>-//      System.err.println(startAt + " -> " + endAt + " = " + isUnicodeAtStart);
>-
>       // check for the optimization
>       if (fileOffset == 0xffffffff)
>       {
>-        _sections.add(new SEPX(sed, startAt, endAt, new byte[0], isUnicodeAtStart));
>+        _sections.add(new SEPX(sed, startAt, endAt, tpt, new byte[0]));
>       }
>       else
>       {
>@@ -76,7 +73,7 @@
>         byte[] buf = new byte[sepxSize];
>         fileOffset += LittleEndian.SHORT_SIZE;
>         System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
>-        _sections.add(new SEPX(sed, startAt, endAt, buf, isUnicodeAtStart));
>+        _sections.add(new SEPX(sed, startAt, endAt, tpt, buf));
>       }
>     }
> 
>@@ -138,33 +135,13 @@
>       }
>       int FC = TP.getPieceDescriptor().getFilePosition();
>       int offset = CP - TP.getCP();
>-      FC = FC+offset-((TextPiece)_text.get(0)).getPieceDescriptor().getFilePosition();
>+      if (TP.isUnicode()) {
>+        offset = offset*2;
>+      }
>+      FC = FC+offset;
>       return FC;
>     }
> 
>-    // Ryans code
>-    private int FCtoCP(int fc)
>-   {
>-     int size = _text.size();
>-     int cp = 0;
>-     for (int x = 0; x < size; x++)
>-     {
>-       TextPiece piece = (TextPiece)_text.get(x);
>-
>-       if (fc <= piece.getEnd())
>-       {
>-         cp += (fc - piece.getStart());
>-         break;
>-       }
>-       else
>-       {
>-         cp += (piece.getEnd() - piece.getStart());
>-       }
>-     }
>-     return cp;
>-   }
>-
>-
>   public ArrayList getSections()
>   {
>     return _sections;
>@@ -205,7 +182,7 @@
> 
>       // Line using Ryan's FCtoCP() conversion method -
>       // unable to observe any effect on our testcases when using this code - piers
>-      GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStartBytes()), FCtoCP(sepx.getEndBytes()), sed.toByteArray());
>+      GenericPropertyNode property = new GenericPropertyNode(tpt.getCharIndex(sepx.getStartBytes()), tpt.getCharIndex(sepx.getEndBytes()), sed.toByteArray());
> 
> 
>       plex.addProperty(property);
>Index: src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java
>===================================================================
>--- src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java	(revision 786463)
>+++ src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java	(working copy)
>@@ -25,37 +25,28 @@
>  *  and characters.
>  */
> public abstract class BytePropertyNode extends PropertyNode {
>-	private boolean isUnicode;
>+        private final int startBytes;
>+        private final int endBytes;
> 
> 	/**
> 	 * @param fcStart The start of the text for this property, in _bytes_
> 	 * @param fcEnd The end of the text for this property, in _bytes_
> 	 */
>-	public BytePropertyNode(int fcStart, int fcEnd, Object buf, boolean isUnicode) {
>+	public BytePropertyNode(int fcStart, int fcEnd, CharIndexTranslator translator, Object buf) {
> 		super(
>-				generateCp(fcStart, isUnicode),
>-				generateCp(fcEnd, isUnicode),
>+				translator.getCharIndex(fcStart),
>+				translator.getCharIndex(fcEnd),
> 				buf
> 		);
>-		this.isUnicode = isUnicode;
>+                this.startBytes = fcStart;
>+                this.endBytes = fcEnd;
> 	}
>-	private static int generateCp(int val, boolean isUnicode) {
>-		if(isUnicode)
>-			return val/2;
>-		return val;
>-	}
> 
>-	public boolean isUnicode() {
>-		return isUnicode;
>-	}
> 	public int getStartBytes() {
>-		if(isUnicode)
>-			return getStart()*2;
>-		return getStart();
>+                return startBytes;
> 	}
>+
> 	public int getEndBytes() {
>-		if(isUnicode)
>-			return getEnd()*2;
>-		return getEnd();
>+                return endBytes;
> 	}
> }
>Index: src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java
>===================================================================
>--- src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java	(revision 786463)
>+++ src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java	(working copy)
>@@ -28,9 +28,9 @@
> 
>   SectionDescriptor _sed;
> 
>-  public SEPX(SectionDescriptor sed, int start, int end, byte[] grpprl, boolean isUnicode)
>+  public SEPX(SectionDescriptor sed, int start, int end, CharIndexTranslator translator, byte[] grpprl)
>   {
>-    super(start, end, SectionSprmUncompressor.uncompressSEP(grpprl, 0), isUnicode);
>+    super(start, end, translator, SectionSprmUncompressor.uncompressSEP(grpprl, 0));
>     _sed = sed;
>   }
> 
>Index: src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
>===================================================================
>--- src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java	(revision 786463)
>+++ src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java	(working copy)
>@@ -76,9 +76,8 @@
> 
>   public void insert(int listIndex, int cpStart, SprmBuffer buf)
>   {
>-    boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);
> 
>-    PAPX forInsert = new PAPX(0, 0, buf, _dataStream, needsToBeUnicode);
>+    PAPX forInsert = new PAPX(0, 0, tpt, buf, _dataStream);
> 
>     // Ensure character offsets are really characters
>     forInsert.setStart(cpStart);
>@@ -108,7 +107,7 @@
>     	//  Original, until insert at point
>     	//  New one
>     	//  Clone of original, on to the old end
>-        PAPX clone = new PAPX(0, 0, clonedBuf, _dataStream, needsToBeUnicode);
>+        PAPX clone = new PAPX(0, 0, tpt, clonedBuf, _dataStream);
>         // Again ensure contains character based offsets no matter what
>         clone.setStart(cpStart);
>         clone.setEnd(currentPap.getEnd());
>Index: src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
>===================================================================
>--- src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java	(revision 786463)
>+++ src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java	(working copy)
>@@ -60,8 +60,9 @@
> 
>       for (int x = 0; x < _crun; x++)
>       {
>-    	boolean isUnicode = tpt.isUnicodeAtByteOffset( getStart(x) );
>-        _chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode));
>+    	int startAt = getStart(x);
>+		int endAt = getEnd(x);
>+		_chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
>       }
>     }
>

Actions: View | Diff

Attachments on bug 46610: 23178 | 23179 | 23180 | 23181 | 23184 | 23829 | 23833 | 23834 | 23835