Index: src/ooxml/java/org/apache/poi/xwpf/model/XWPFHeaderFooterPolicy.java =================================================================== --- src/ooxml/java/org/apache/poi/xwpf/model/XWPFHeaderFooterPolicy.java (revision 807220) +++ src/ooxml/java/org/apache/poi/xwpf/model/XWPFHeaderFooterPolicy.java (working copy) @@ -83,19 +83,26 @@ private XWPFHeader defaultHeader; private XWPFFooter defaultFooter; - + /** + * Figures out the policy for the given document, + * and creates any header and footer objects + * as required. + */ + public XWPFHeaderFooterPolicy(XWPFDocument doc) throws IOException, XmlException { + this(doc, doc.getDocument().getBody().getSectPr()); + } + /** * Figures out the policy for the given document, * and creates any header and footer objects * as required. */ - public XWPFHeaderFooterPolicy(XWPFDocument doc) throws IOException, XmlException { + public XWPFHeaderFooterPolicy(XWPFDocument doc, CTSectPr sectPr) throws IOException, XmlException { // Grab what headers and footers have been defined // For now, we don't care about different ranges, as it // doesn't seem that .docx properly supports that // feature of the file format yet this.doc = doc; - CTSectPr sectPr = doc.getDocument().getBody().getSectPr(); for(int i=0; i i = document.getParagraphsIterator(); while(i.hasNext()) { - XWPFParagraphDecorator decorator = new XWPFCommentsDecorator( - new XWPFHyperlinkDecorator(i.next(), null, fetchHyperlinks)); - text.append(decorator.getText()+"\n"); - } + XWPFParagraph paragraph = i.next(); + + try { + CTSectPr ctSectPr = null; + if (paragraph.getCTP().getPPr()!=null) { + ctSectPr = paragraph.getCTP().getPPr().getSectPr(); + } + + XWPFHeaderFooterPolicy headerFooterPolicy = null; + + if (ctSectPr!=null) { + headerFooterPolicy = new XWPFHeaderFooterPolicy(document, ctSectPr); + + extractHeaders(text, headerFooterPolicy); + } + + XWPFParagraphDecorator decorator = new XWPFCommentsDecorator( + new XWPFHyperlinkDecorator(paragraph, null, fetchHyperlinks)); + text.append(decorator.getText()).append('\n'); + + if (ctSectPr!=null) { + extractFooters(text, headerFooterPolicy); + } + } catch (IOException e) { + throw new POIXMLException(e); + } catch (XmlException e) { + throw new POIXMLException(e); + } + } + // Then our table based text Iterator j = document.getTablesIterator(); while(j.hasNext()) { - text.append(j.next().getText()+"\n"); + text.append(j.next().getText()).append('\n'); } // Finish up with all the footers - // TODO - put them in where they're needed - if(hfPolicy.getFirstPageFooter() != null) { - text.append( hfPolicy.getFirstPageFooter().getText() ); - } - if(hfPolicy.getEvenPageFooter() != null) { - text.append( hfPolicy.getEvenPageFooter().getText() ); - } - if(hfPolicy.getDefaultFooter() != null) { - text.append( hfPolicy.getDefaultFooter().getText() ); - } + extractFooters(text, hfPolicy); return text.toString(); } + + private void extractFooters(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) { + if(hfPolicy.getFirstPageFooter() != null) { + text.append( hfPolicy.getFirstPageFooter().getText() ); + } + if(hfPolicy.getEvenPageFooter() != null) { + text.append( hfPolicy.getEvenPageFooter().getText() ); + } + if(hfPolicy.getDefaultFooter() != null) { + text.append( hfPolicy.getDefaultFooter().getText() ); + } + } + + private void extractHeaders(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) { + if(hfPolicy.getFirstPageHeader() != null) { + text.append( hfPolicy.getFirstPageHeader().getText() ); + } + if(hfPolicy.getEvenPageHeader() != null) { + text.append( hfPolicy.getEvenPageHeader().getText() ); + } + if(hfPolicy.getDefaultHeader() != null) { + text.append( hfPolicy.getDefaultHeader().getText() ); + } + } } Index: src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java =================================================================== --- src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java (revision 807220) +++ src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java (working copy) @@ -198,4 +198,13 @@ assertTrue(extractor.getText().contains("extremely well")); } + public void testParagraphHeader() { + XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Headers.docx"); + XWPFWordExtractor extractor = new XWPFWordExtractor(doc); + + assertTrue(extractor.getText().contains("Section 1")); + assertTrue(extractor.getText().contains("Section 2")); + assertTrue(extractor.getText().contains("Section 3")); + } + }