View | Details | Raw Unified | Return to bug 47727
Collapse All | Expand All

(-)src/ooxml/java/org/apache/poi/xwpf/model/XWPFHeaderFooterPolicy.java (-3 / +10 lines)
Lines 83-101 Link Here
83
	private XWPFHeader defaultHeader;
83
	private XWPFHeader defaultHeader;
84
	private XWPFFooter defaultFooter;
84
	private XWPFFooter defaultFooter;
85
	
85
	
86
	
86
        /**
87
         * Figures out the policy for the given document,
88
         *  and creates any header and footer objects
89
         *  as required.
90
         */
91
        public XWPFHeaderFooterPolicy(XWPFDocument doc) throws IOException, XmlException {
92
                this(doc, doc.getDocument().getBody().getSectPr());
93
        }
94
87
	/**
95
	/**
88
	 * Figures out the policy for the given document,
96
	 * Figures out the policy for the given document,
89
	 *  and creates any header and footer objects
97
	 *  and creates any header and footer objects
90
	 *  as required.
98
	 *  as required.
91
	 */
99
	 */
92
	public XWPFHeaderFooterPolicy(XWPFDocument doc) throws IOException, XmlException {
100
	public XWPFHeaderFooterPolicy(XWPFDocument doc, CTSectPr sectPr) throws IOException, XmlException {
93
		// Grab what headers and footers have been defined
101
		// Grab what headers and footers have been defined
94
		// For now, we don't care about different ranges, as it
102
		// For now, we don't care about different ranges, as it
95
		//  doesn't seem that .docx properly supports that
103
		//  doesn't seem that .docx properly supports that
96
		//  feature of the file format yet
104
		//  feature of the file format yet
97
		this.doc = doc;
105
		this.doc = doc;
98
		CTSectPr sectPr = doc.getDocument().getBody().getSectPr();
99
		for(int i=0; i<sectPr.sizeOfHeaderReferenceArray(); i++) {
106
		for(int i=0; i<sectPr.sizeOfHeaderReferenceArray(); i++) {
100
			// Get the header
107
			// Get the header
101
			CTHdrFtrRef ref = sectPr.getHeaderReferenceArray(i);
108
			CTHdrFtrRef ref = sectPr.getHeaderReferenceArray(i);
(-)src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java (-26 / +60 lines)
Lines 21-26 Link Here
21
21
22
import org.apache.poi.POIXMLDocument;
22
import org.apache.poi.POIXMLDocument;
23
import org.apache.poi.POIXMLTextExtractor;
23
import org.apache.poi.POIXMLTextExtractor;
24
import org.apache.poi.POIXMLException;
24
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
25
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
25
import org.apache.poi.openxml4j.opc.OPCPackage;
26
import org.apache.poi.openxml4j.opc.OPCPackage;
26
import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
27
import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
Lines 31-36 Link Here
31
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
32
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
32
import org.apache.poi.xwpf.usermodel.XWPFTable;
33
import org.apache.poi.xwpf.usermodel.XWPFTable;
33
import org.apache.xmlbeans.XmlException;
34
import org.apache.xmlbeans.XmlException;
35
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
34
36
35
/**
37
/**
36
 * Helper class to extract text from an OOXML Word file
38
 * Helper class to extract text from an OOXML Word file
Lines 72-116 Link Here
72
	public String getText() {
74
	public String getText() {
73
		StringBuffer text = new StringBuffer();
75
		StringBuffer text = new StringBuffer();
74
		XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
76
		XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
75
		
77
76
		// Start out with all headers
78
		// Start out with all headers
77
		// TODO - put them in where they're needed
79
                extractHeaders(text, hfPolicy);
78
		if(hfPolicy.getFirstPageHeader() != null) {
79
			text.append( hfPolicy.getFirstPageHeader().getText() );
80
		}
81
		if(hfPolicy.getEvenPageHeader() != null) {
82
			text.append( hfPolicy.getEvenPageHeader().getText() );
83
		}
84
		if(hfPolicy.getDefaultHeader() != null) {
85
			text.append( hfPolicy.getDefaultHeader().getText() );
86
		}
87
		
80
		
88
		// First up, all our paragraph based text
81
		// First up, all our paragraph based text
89
		Iterator<XWPFParagraph> i = document.getParagraphsIterator();
82
		Iterator<XWPFParagraph> i = document.getParagraphsIterator();
90
		while(i.hasNext()) {
83
		while(i.hasNext()) {
91
			XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
84
                        XWPFParagraph paragraph = i.next();
92
					new XWPFHyperlinkDecorator(i.next(), null, fetchHyperlinks));
93
			text.append(decorator.getText()+"\n");
94
		}
95
85
86
87
                        try {
88
                                CTSectPr ctSectPr = null;
89
                                if (paragraph.getCTP().getPPr()!=null) {
90
                                        ctSectPr = paragraph.getCTP().getPPr().getSectPr();
91
                                }
92
93
                                XWPFHeaderFooterPolicy headerFooterPolicy = null;
94
95
                                if (ctSectPr!=null) {
96
                                        headerFooterPolicy = new XWPFHeaderFooterPolicy(document, ctSectPr);
97
98
                                        extractHeaders(text, headerFooterPolicy);
99
                                }
100
101
                                XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
102
                                                new XWPFHyperlinkDecorator(paragraph, null, fetchHyperlinks));
103
                                text.append(decorator.getText()).append('\n');
104
105
                                if (ctSectPr!=null) {
106
                                        extractFooters(text, headerFooterPolicy);
107
                                }
108
                        } catch (IOException e) {
109
                                throw new POIXMLException(e);
110
                        } catch (XmlException e) {
111
                                throw new POIXMLException(e);
112
                        }
113
                }
114
96
		// Then our table based text
115
		// Then our table based text
97
		Iterator<XWPFTable> j = document.getTablesIterator();
116
		Iterator<XWPFTable> j = document.getTablesIterator();
98
		while(j.hasNext()) {
117
		while(j.hasNext()) {
99
			text.append(j.next().getText()+"\n");
118
                        text.append(j.next().getText()).append('\n');
100
		}
119
		}
101
		
120
		
102
		// Finish up with all the footers
121
		// Finish up with all the footers
103
		// TODO - put them in where they're needed
122
                extractFooters(text, hfPolicy);
104
		if(hfPolicy.getFirstPageFooter() != null) {
105
			text.append( hfPolicy.getFirstPageFooter().getText() );
106
		}
107
		if(hfPolicy.getEvenPageFooter() != null) {
108
			text.append( hfPolicy.getEvenPageFooter().getText() );
109
		}
110
		if(hfPolicy.getDefaultFooter() != null) {
111
			text.append( hfPolicy.getDefaultFooter().getText() );
112
		}
113
		
123
		
114
		return text.toString();
124
		return text.toString();
115
	}
125
	}
126
127
        private void extractFooters(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
128
                if(hfPolicy.getFirstPageFooter() != null) {
129
                        text.append( hfPolicy.getFirstPageFooter().getText() );
130
                }
131
                if(hfPolicy.getEvenPageFooter() != null) {
132
                        text.append( hfPolicy.getEvenPageFooter().getText() );
133
                }
134
                if(hfPolicy.getDefaultFooter() != null) {
135
                        text.append( hfPolicy.getDefaultFooter().getText() );
136
                }
137
        }
138
139
        private void extractHeaders(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
140
                if(hfPolicy.getFirstPageHeader() != null) {
141
                        text.append( hfPolicy.getFirstPageHeader().getText() );
142
                }
143
                if(hfPolicy.getEvenPageHeader() != null) {
144
                        text.append( hfPolicy.getEvenPageHeader().getText() );
145
                }
146
                if(hfPolicy.getDefaultHeader() != null) {
147
                        text.append( hfPolicy.getDefaultHeader().getText() );
148
                }
149
        }
116
}
150
}
(-)src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java (+9 lines)
Lines 198-201 Link Here
198
        assertTrue(extractor.getText().contains("extremely well"));
198
        assertTrue(extractor.getText().contains("extremely well"));
199
    }
199
    }
200
200
201
    public void testParagraphHeader() {
202
        XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Headers.docx");
203
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
204
205
        assertTrue(extractor.getText().contains("Section 1"));
206
        assertTrue(extractor.getText().contains("Section 2"));
207
        assertTrue(extractor.getText().contains("Section 3"));
208
    }
209
201
}
210
}

Return to bug 47727