--- WordToFoExtractor.java (revision 1139204) +++ WordToFoExtractor.java (working copy) @@ -242,6 +242,15 @@ { CharacterRun characterRun = paragraph.getCharacterRun( c ); + if ( characterRun == null ) + { + logger.log( POILogger.WARN, + "Paragraph " + paragraph.getStartOffset() + "--" + + paragraph.getEndOffset() + + " contains null CharacterRun #" + c ); + continue; + } + if ( hwpfDocument.getPicturesTable().hasPicture( characterRun ) ) { Picture picture = hwpfDocument.getPicturesTable() @@ -345,28 +354,46 @@ if ( separatorMark - beginMark > 1 ) { - CharacterRun firstAfterBegin = paragraph - .getCharacterRun( beginMark + 1 ); - - final Matcher hyperlinkMatcher = hyperlinkPattern - .matcher( firstAfterBegin.text() ); - if ( hyperlinkMatcher.matches() ) + int index = beginMark + 1; + CharacterRun firstAfterBegin = null; + while ( index < separatorMark ) { - String hyperlink = hyperlinkMatcher.group( 1 ); - processHyperlink( hwpfDocument, currentBlock, paragraph, - currentTableLevel, hyperlink, separatorMark + 1, - endMark ); - return; + firstAfterBegin = paragraph.getCharacterRun( index ); + if ( firstAfterBegin == null ) + { + logger.log( POILogger.WARN, + "Paragraph " + paragraph.getStartOffset() + "--" + + paragraph.getEndOffset() + + " contains null CharacterRun #" + index ); + index++; + continue; + } + break; } - final Matcher pagerefMatcher = pagerefPattern - .matcher( firstAfterBegin.text() ); - if ( pagerefMatcher.matches() ) + if ( firstAfterBegin != null ) { - String pageref = pagerefMatcher.group( 1 ); - processPageref( hwpfDocument, currentBlock, paragraph, - currentTableLevel, pageref, separatorMark + 1, endMark ); - return; + final Matcher hyperlinkMatcher = hyperlinkPattern + .matcher( firstAfterBegin.text() ); + if ( hyperlinkMatcher.matches() ) + { + String hyperlink = hyperlinkMatcher.group( 1 ); + processHyperlink( hwpfDocument, currentBlock, paragraph, + currentTableLevel, hyperlink, separatorMark + 1, + endMark ); + return; + } + + final Matcher pagerefMatcher = pagerefPattern + .matcher( firstAfterBegin.text() ); + if ( pagerefMatcher.matches() ) + { + String pageref = pagerefMatcher.group( 1 ); + processPageref( hwpfDocument, currentBlock, paragraph, + currentTableLevel, pageref, separatorMark + 1, + endMark ); + return; + } } } @@ -461,10 +488,20 @@ final boolean pItalic; { CharacterRun characterRun = paragraph.getCharacterRun( 0 ); - pFontSize = characterRun.getFontSize() / 2; - pFontName = characterRun.getFontName(); - pBold = characterRun.isBold(); - pItalic = characterRun.isItalic(); + if (characterRun != null) + { + pFontSize = characterRun.getFontSize() / 2; + pFontName = characterRun.getFontName(); + pBold = characterRun.isBold(); + pItalic = characterRun.isItalic(); + } + else + { + pFontSize = -1; + pFontName = WordToFoUtils.EMPTY; + pBold = false; + pItalic = false; + } } WordToFoUtils.setFontFamily( block, pFontName ); WordToFoUtils.setFontSize( block, pFontSize ); @@ -723,6 +760,15 @@ { CharacterRun characterRun = paragraph.getCharacterRun( c ); + if ( characterRun == null ) + { + logger.log( POILogger.WARN, + "Paragraph " + paragraph.getStartOffset() + "--" + + paragraph.getEndOffset() + + " contains null CharacterRun #" + c ); + continue; + } + String text = characterRun.text(); if ( text.getBytes().length == 0 ) continue;