@@ -, +, @@ docx: text extraction from deleted/inserted blocks --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java +++ a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java @@ -18,6 +18,7 @@ package org.apache.poi.xwpf.usermodel; import java.math.BigInteger; import java.util.ArrayList; +import java.util.Arrays; import org.apache.xmlbeans.XmlCursor; import org.apache.xmlbeans.XmlObject; @@ -56,23 +57,24 @@ public class XWPFParagraph { // TODO - replace this with some sort of XPath expression // to directly find all the CTRs, in the right order ArrayList rs = new ArrayList(); - CTR[] tmp; // Get the main text runs - tmp = paragraph.getRArray(); - for (int i = 0; i < tmp.length; i++) { - rs.add(tmp[i]); - } + rs.addAll(Arrays.asList(paragraph.getRArray())); // Not sure quite what these are, but they hold // more text runs CTSdtRun[] sdts = paragraph.getSdtArray(); for (int i = 0; i < sdts.length; i++) { CTSdtContentRun run = sdts[i].getSdtContent(); - tmp = run.getRArray(); - for (int j = 0; j < tmp.length; j++) { - rs.add(tmp[j]); - } + rs.addAll(Arrays.asList(run.getRArray())); + } + + for (CTRunTrackChange c : paragraph.getDelArray()) { + rs.addAll(Arrays.asList(c.getRArray())); + } + + for (CTRunTrackChange c : paragraph.getInsArray()) { + rs.addAll(Arrays.asList(c.getRArray())); } // Get text of the paragraph --- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java +++ a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java @@ -191,6 +191,13 @@ public class TestXWPFWordExtractor extends TestCase { assertTrue(extractor.getText().contains("XXX")); } + public void testInsertedDeletedText() throws Exception { + XWPFDocument doc = open("delins.docx"); + XWPFWordExtractor extractor = new XWPFWordExtractor(doc); + + assertTrue(extractor.getText().contains("pendant worn")); + assertTrue(extractor.getText().contains("extremely well")); + } //TODO use the same logic for opening test files as in HSSFTestDataSamples private XWPFDocument open(String sampleFileName) throws IOException {