Link Here
|
16 |
==================================================================== */ |
16 |
==================================================================== */ |
17 |
package org.apache.poi.xslf.extractor; |
17 |
package org.apache.poi.xslf.extractor; |
18 |
|
18 |
|
19 |
import java.io.IOException; |
|
|
20 |
|
21 |
import org.apache.poi.POIXMLTextExtractor; |
19 |
import org.apache.poi.POIXMLTextExtractor; |
22 |
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; |
20 |
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; |
23 |
import org.apache.poi.openxml4j.opc.OPCPackage; |
21 |
import org.apache.poi.openxml4j.opc.OPCPackage; |
24 |
import org.apache.poi.xslf.XSLFSlideShow; |
22 |
import org.apache.poi.xslf.XSLFSlideShow; |
|
|
23 |
import org.apache.poi.xslf.usermodel.DrawingParagraph; |
25 |
import org.apache.poi.xslf.usermodel.XMLSlideShow; |
24 |
import org.apache.poi.xslf.usermodel.XMLSlideShow; |
|
|
25 |
import org.apache.poi.xslf.usermodel.XSLFCommonSlideData; |
26 |
import org.apache.poi.xslf.usermodel.XSLFSlide; |
26 |
import org.apache.poi.xslf.usermodel.XSLFSlide; |
27 |
import org.apache.xmlbeans.XmlException; |
27 |
import org.apache.xmlbeans.XmlException; |
28 |
import org.apache.xmlbeans.XmlObject; |
28 |
import org.openxmlformats.schemas.presentationml.x2006.main.*; |
29 |
import org.apache.xmlbeans.XmlCursor; |
|
|
30 |
import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun; |
31 |
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody; |
32 |
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph; |
33 |
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak; |
34 |
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment; |
35 |
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList; |
36 |
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape; |
37 |
import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide; |
38 |
import org.openxmlformats.schemas.presentationml.x2006.main.CTShape; |
39 |
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide; |
40 |
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; |
41 |
|
29 |
|
|
|
30 |
import java.io.IOException; |
31 |
|
42 |
public class XSLFPowerPointExtractor extends POIXMLTextExtractor { |
32 |
public class XSLFPowerPointExtractor extends POIXMLTextExtractor { |
43 |
private XMLSlideShow slideshow; |
33 |
private XMLSlideShow slideshow; |
44 |
private boolean slidesByDefault = true; |
34 |
private boolean slidesByDefault = true; |
Link Here
|
110 |
slideshow._getXSLFSlideShow().getSlideComments(slideId); |
100 |
slideshow._getXSLFSlideShow().getSlideComments(slideId); |
111 |
|
101 |
|
112 |
if(slideText) { |
102 |
if(slideText) { |
113 |
extractText(rawSlide.getCSld().getSpTree(), text); |
103 |
extractText(slides[i].getCommonSlideData(), text); |
114 |
|
104 |
|
115 |
// Comments too for the slide |
105 |
// Comments too for the slide |
116 |
if(comments != null) { |
106 |
if(comments != null) { |
Link Here
|
123 |
} |
113 |
} |
124 |
} |
114 |
} |
125 |
} |
115 |
} |
|
|
116 |
|
126 |
if(notesText && notes != null) { |
117 |
if(notesText && notes != null) { |
127 |
extractText(notes.getCSld().getSpTree(), text); |
118 |
extractText(new XSLFCommonSlideData(notes.getCSld()), text); |
128 |
} |
119 |
} |
129 |
} catch(Exception e) { |
120 |
} catch(Exception e) { |
130 |
throw new RuntimeException(e); |
121 |
throw new RuntimeException(e); |
Link Here
|
134 |
return text.toString(); |
125 |
return text.toString(); |
135 |
} |
126 |
} |
136 |
|
127 |
|
137 |
private void extractText(CTGroupShape gs, StringBuffer text) { |
128 |
private void extractText(XSLFCommonSlideData data, StringBuffer text) { |
138 |
CTShape[] shapes = gs.getSpArray(); |
129 |
for (DrawingParagraph p : data.getText()) { |
139 |
for (int i = 0; i < shapes.length; i++) { |
130 |
text.append(p.getText()); |
140 |
CTTextBody textBody = |
131 |
text.append("\n"); |
141 |
shapes[i].getTxBody(); |
132 |
} |
142 |
if(textBody != null) { |
133 |
} |
143 |
CTTextParagraph[] paras = |
|
|
144 |
textBody.getPArray(); |
145 |
for (int j = 0; j < paras.length; j++) { |
146 |
XmlCursor c = paras[j].newCursor(); |
147 |
c.selectPath("./*"); |
148 |
while (c.toNextSelection()) { |
149 |
XmlObject o = c.getObject(); |
150 |
if(o instanceof CTRegularTextRun){ |
151 |
CTRegularTextRun txrun = (CTRegularTextRun)o; |
152 |
text.append( txrun.getT() ); |
153 |
} else if (o instanceof CTTextLineBreak){ |
154 |
text.append('\n'); |
155 |
} |
156 |
} |
157 |
|
158 |
// End each paragraph with a new line |
159 |
text.append("\n"); |
160 |
} |
161 |
} |
162 |
} |
163 |
} |
164 |
} |
134 |
} |