ASF Bugzilla – Attachment 23181 Details for
Bug 46610
[PATCH] Problems accessing documents containing unicode
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Patch for Exception triggered by utf2.doc
utf2.patch (text/plain), 10.69 KB, created by
Benjamin Engele
on 2009-01-27 05:57:11 UTC
(
hide
)
Description:
Patch for Exception triggered by utf2.doc
Filename:
MIME Type:
Creator:
Benjamin Engele
Created:
2009-01-27 05:57:11 UTC
Size:
10.69 KB
patch
obsolete
>Index: src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java >=================================================================== >--- src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java (revision 738040) >+++ src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java (working copy) >@@ -30,10 +30,10 @@ > * @param fcStart The start of the text for this property, in _bytes_ > * @param fcEnd The end of the text for this property, in _bytes_ > */ >- public BytePropertyNode(int fcStart, int fcEnd, Object buf, boolean isUnicode) { >+ public BytePropertyNode(int fcStart, int fcEnd, CharIndexTranslator translator, Object buf, boolean isUnicode) { > super( >- generateCp(fcStart, isUnicode), >- generateCp(fcEnd, isUnicode), >+ translator.getCharIndex(fcStart), >+ translator.getCharIndex(fcEnd), > buf > ); > this.isUnicode = isUnicode; >@@ -45,14 +45,17 @@ > } > > public boolean isUnicode() { >+ //XXX Is this method useful? > return isUnicode; > } > public int getStartBytes() { >+ //XXX That is wrong in most cases! > if(isUnicode) > return getStart()*2; > return getStart(); > } > public int getEndBytes() { >+ //XXX That is wrong in most cases! > if(isUnicode) > return getEnd()*2; > return getEnd(); >Index: src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java >=================================================================== >--- src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java (revision 0) >+++ src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java (revision 0) >@@ -0,0 +1,14 @@ >+package org.apache.poi.hwpf.model; >+ >+public interface CharIndexTranslator { >+ >+ /** >+ * Calculates the char index of the given byte index. >+ * >+ * @param byteStart >+ * @param bytePos >+ * @return >+ */ >+ int getCharIndex(int bytePos); >+ >+} >\ No newline at end of file >Index: src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java >=================================================================== >--- src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java (revision 738040) >+++ src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java (working copy) >@@ -121,7 +121,7 @@ > { > boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart); > >- CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode); >+ CHPX insertChpx = new CHPX(0, 0, tpt,buf, needsToBeUnicode); > > // Ensure character offsets are really characters > insertChpx.setStart(cpStart); >@@ -141,7 +141,7 @@ > // Original, until insert at point > // New one > // Clone of original, on to the old end >- CHPX clone = new CHPX(0, 0, chpx.getSprmBuf(), needsToBeUnicode); >+ CHPX clone = new CHPX(0, 0, tpt,chpx.getSprmBuf(), needsToBeUnicode); > // Again ensure contains character based offsets no matter what > clone.setStart(cpStart); > clone.setEnd(chpx.getEnd()); >Index: src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java >=================================================================== >--- src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java (revision 738040) >+++ src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java (working copy) >@@ -61,8 +61,10 @@ > > for (int x = 0; x < _crun; x++) > { >- boolean isUnicode = tpt.isUnicodeAtByteOffset( getStart(x) ); >- _chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode)); >+ int startAt = getStart(x) - fcMin; >+ boolean isUnicode = tpt.isUnicodeAtByteOffset( startAt ); >+ int endAt = getEnd(x) - fcMin; >+ _chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x), isUnicode)); > } > } > >Index: src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java >=================================================================== >--- src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java (revision 738040) >+++ src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java (working copy) >@@ -37,14 +37,14 @@ > public class CHPX extends BytePropertyNode > { > >- public CHPX(int fcStart, int fcEnd, byte[] grpprl, boolean isUnicode) >+ public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] grpprl, boolean isUnicode) > { >- super(fcStart, fcEnd, new SprmBuffer(grpprl), isUnicode); >+ super(fcStart, fcEnd, translator, new SprmBuffer(grpprl), isUnicode); > } > >- public CHPX(int fcStart, int fcEnd, SprmBuffer buf, boolean isUnicode) >+ public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf, boolean isUnicode) > { >- super(fcStart, fcEnd, buf, isUnicode); >+ super(fcStart, fcEnd, translator ,buf, isUnicode); > } > > >Index: src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java >=================================================================== >--- src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java (revision 738040) >+++ src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java (working copy) >@@ -78,7 +78,7 @@ > { > boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart); > >- PAPX forInsert = new PAPX(0, 0, buf, _dataStream, needsToBeUnicode); >+ PAPX forInsert = new PAPX(0, 0, tpt, buf, _dataStream, needsToBeUnicode); > > // Ensure character offsets are really characters > forInsert.setStart(cpStart); >@@ -108,7 +108,7 @@ > // Original, until insert at point > // New one > // Clone of original, on to the old end >- PAPX clone = new PAPX(0, 0, clonedBuf, _dataStream, needsToBeUnicode); >+ PAPX clone = new PAPX(0, 0, tpt, clonedBuf, _dataStream, needsToBeUnicode); > // Again ensure contains character based offsets no matter what > clone.setStart(cpStart); > clone.setEnd(currentPap.getEnd()); >Index: src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java >=================================================================== >--- src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java (revision 738040) >+++ src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java (working copy) >@@ -70,7 +70,7 @@ > boolean isUnicode = tpt.isUnicodeAtByteOffset(startAt); > //System.err.println(startAt + " -> " + endAt + " = " + isUnicode); > >- _papxList.add(new PAPX(startAt, endAt, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode)); >+ _papxList.add(new PAPX(startAt, endAt, tpt, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode)); > } > _fkp = null; > _dataStream = dataStream; >Index: src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java >=================================================================== >--- src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java (revision 738040) >+++ src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java (working copy) >@@ -43,18 +43,18 @@ > private ParagraphHeight _phe; > private int _hugeGrpprlOffset = -1; > >- public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream, boolean isUnicode) >+ public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] papx, ParagraphHeight phe, byte[] dataStream, boolean isUnicode) > { >- super(fcStart, fcEnd, new SprmBuffer(papx), isUnicode); >+ super(fcStart, fcEnd, translator, new SprmBuffer(papx), isUnicode); > _phe = phe; > SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream); > if(buf != null) > _buf = buf; > } > >- public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream, boolean isUnicode) >+ public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf, byte[] dataStream, boolean isUnicode) > { >- super(fcStart, fcEnd, buf, isUnicode); >+ super(fcStart, fcEnd, translator, buf, isUnicode); > _phe = new ParagraphHeight(); > buf = findHuge(buf, dataStream); > if(buf != null) >Index: src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java >=================================================================== >--- src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java (revision 738040) >+++ src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java (working copy) >@@ -67,7 +67,7 @@ > // check for the optimization > if (fileOffset == 0xffffffff) > { >- _sections.add(new SEPX(sed, startAt, endAt, new byte[0], isUnicodeAtStart)); >+ _sections.add(new SEPX(sed, startAt, endAt, tpt, new byte[0], isUnicodeAtStart)); > } > else > { >@@ -76,7 +76,7 @@ > byte[] buf = new byte[sepxSize]; > fileOffset += LittleEndian.SHORT_SIZE; > System.arraycopy(documentStream, fileOffset, buf, 0, buf.length); >- _sections.add(new SEPX(sed, startAt, endAt, buf, isUnicodeAtStart)); >+ _sections.add(new SEPX(sed, startAt, endAt, tpt, buf, isUnicodeAtStart)); > } > } > >Index: src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java >=================================================================== >--- src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java (revision 738040) >+++ src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java (working copy) >@@ -31,9 +31,9 @@ > > SectionDescriptor _sed; > >- public SEPX(SectionDescriptor sed, int start, int end, byte[] grpprl, boolean isUnicode) >+ public SEPX(SectionDescriptor sed, int start, int end, CharIndexTranslator translator, byte[] grpprl, boolean isUnicode) > { >- super(start, end, SectionSprmUncompressor.uncompressSEP(grpprl, 0), isUnicode); >+ super(start, end, translator, SectionSprmUncompressor.uncompressSEP(grpprl, 0), isUnicode); > _sed = sed; > } > >Index: src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java >=================================================================== >--- src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java (revision 738040) >+++ src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java (working copy) >@@ -38,7 +38,7 @@ > * convertion. > * @author Ryan Ackley > */ >-public class TextPieceTable >+public class TextPieceTable implements CharIndexTranslator > { > protected ArrayList _textPieces = new ArrayList(); > //int _multiple; >@@ -269,4 +269,32 @@ > } > return false; > } >+ /* (non-Javadoc) >+ * @see org.apache.poi.hwpf.model.CharIndexTranslator#getLengthInChars(int) >+ */ >+ public int getCharIndex(int bytePos) { >+ int charCount = 0; >+ int curByte = 0; >+ >+ Iterator it = _textPieces.iterator(); >+ while (it.hasNext() && curByte < bytePos) { >+ TextPiece tp = (TextPiece) it.next(); >+ >+ int bytesLength = tp.bytesLength(); >+ int nextByte = curByte + bytesLength; >+ >+ int toAdd = bytePos > nextByte ? bytesLength : bytesLength >+ - (nextByte - bytePos); >+ >+ if (tp.isUnicode()) { >+ charCount += toAdd / 2; >+ } else { >+ charCount += toAdd; >+ } >+ >+ curByte = nextByte; >+ } >+ >+ return charCount; >+ } > }
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 46610
:
23178
|
23179
|
23180
|
23181
|
23184
|
23829
|
23833
|
23834
|
23835