ASF Bugzilla – Attachment 19798 Details for
Bug 41076
StringIndexOutOfBoundsException when extracting text from a Word document.
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
A proposed fix which rewrites the loops
WordExtractor.patch (text/plain), 3.97 KB, created by
Eric Porter
on 2007-03-26 10:45:18 UTC
(
hide
)
Description:
A proposed fix which rewrites the loops
Filename:
MIME Type:
Creator:
Eric Porter
Created:
2007-03-26 10:45:18 UTC
Size:
3.97 KB
patch
obsolete
>Index: WordExtractor.java >=================================================================== >--- WordExtractor.java (revision 522571) >+++ WordExtractor.java (working copy) >@@ -135,73 +135,47 @@ > List textRuns = cbt.getTextRuns(); > Iterator runIt = textRuns.iterator(); > Iterator textIt = textPieces.iterator(); >- >- TextPiece currentPiece = (TextPiece)textIt.next(); >- int currentTextStart = currentPiece.getStart(); >- int currentTextEnd = currentPiece.getEnd(); >- >+ >+ if(!runIt.hasNext()) >+ return ""; >+ > WordTextBuffer finalTextBuf = new WordTextBuffer(); >- >- // iterate through all text runs extract the text only if they haven't been >- // deleted >- while (runIt.hasNext()) >- { >- CHPX chpx = (CHPX)runIt.next(); >- boolean deleted = isDeleted(chpx.getGrpprl()); >- if (deleted) >- { >- continue; >- } >- >- int runStart = chpx.getStart(); >- int runEnd = chpx.getEnd(); >- >- while (runStart >= currentTextEnd) >- { >- currentPiece = (TextPiece) textIt.next (); >- currentTextStart = currentPiece.getStart (); >- currentTextEnd = currentPiece.getEnd (); >- } >- >- if (runEnd < currentTextEnd) >- { >- String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); >- finalTextBuf.append(str); >- } >- else if (runEnd > currentTextEnd) >- { >- while (runEnd > currentTextEnd) >- { >- String str = currentPiece.substring(runStart - currentTextStart, >- currentTextEnd - currentTextStart); >- finalTextBuf.append(str); >- if (textIt.hasNext()) >- { >- currentPiece = (TextPiece) textIt.next (); >- currentTextStart = currentPiece.getStart (); >- runStart = currentTextStart; >- currentTextEnd = currentPiece.getEnd (); >- } >- else >- { >- return finalTextBuf.toString(); >- } >- } >- String str = currentPiece.substring(0, runEnd - currentTextStart); >- finalTextBuf.append(str); >- } >- else >- { >- String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); >- if (textIt.hasNext()) >- { >- currentPiece = (TextPiece) textIt.next(); >- currentTextStart = currentPiece.getStart(); >- currentTextEnd = currentPiece.getEnd(); >- } >- finalTextBuf.append(str); >- } >+ >+ // This code is built to handle all 6 cases of pieces and runs: >+ // The two cases where there is no overlap. >+ // The two cases where one is completely contained in the other. >+ // The two cases where there is partial overlap. >+ >+ CHPX currRun = (CHPX) runIt.next(); >+ outer: >+ while(textIt.hasNext()) { >+ TextPiece currPiece = (TextPiece) textIt.next(); >+ do { >+ // If all of the current run is after the current piece, go on to the next piece. >+ if(currRun.getStart() >= currPiece.getEnd()) { >+ continue outer; >+ } >+ // If the current text run isn't deleted and this piece starts before the >+ // current run ends, there must be some overlap between these objects. >+ if(!isDeleted(currRun.getGrpprl()) && currPiece.getStart() < currRun.getEnd()) { >+ int startIndex = Math.max(currRun.getStart() - currPiece.getStart(), 0); >+ int endIndex = Math.min(currRun.getEnd(), currPiece.getEnd()) - currPiece.getStart(); >+ String str = currPiece.substring(startIndex, endIndex); >+ finalTextBuf.append(str); >+ >+ // if this run ends after the current piece ends, go on to the next piece >+ // while still using the current run. >+ if(currRun.getEnd() >= currPiece.getEnd()) { >+ continue outer; >+ } >+ } >+ >+ if(runIt.hasNext()) { >+ currRun = (CHPX) runIt.next(); >+ } >+ }while(runIt.hasNext()); > } >+ > return finalTextBuf.toString(); > } >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 41076
:
19200
|
19768
| 19798 |
22957