View | Details | Raw Unified | Return to bug 48426
Collapse All | Expand All

(-)src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java (+67 lines)
Line 0 Link Here
1
package org.apache.poi.xslf.usermodel;
2
3
import org.apache.xmlbeans.XmlCursor;
4
import org.apache.xmlbeans.XmlObject;
5
import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObjectData;
6
import org.openxmlformats.schemas.drawingml.x2006.main.CTTable;
7
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
8
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommonSlideData;
9
import org.openxmlformats.schemas.presentationml.x2006.main.CTGraphicalObjectFrame;
10
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
11
import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
12
13
import java.util.ArrayList;
14
import java.util.Arrays;
15
import java.util.List;
16
17
public class XSLFCommonSlideData {
18
    private final CTCommonSlideData data;
19
20
    public XSLFCommonSlideData(CTCommonSlideData data) {
21
        this.data = data;
22
    }
23
24
    public List<DrawingParagraph> getText() {
25
        CTGroupShape gs = data.getSpTree();
26
27
        List<DrawingParagraph> out = new ArrayList<DrawingParagraph>();
28
29
        CTShape[] shapes = gs.getSpArray();
30
        for (int i = 0; i < shapes.length; i++) {
31
            CTTextBody ctTextBody = shapes[i].getTxBody();
32
            if (ctTextBody==null) {
33
                continue;
34
            }
35
36
            DrawingTextBody textBody = new DrawingTextBody(ctTextBody);
37
38
            out.addAll(Arrays.asList(textBody.getParagraphs()));
39
        }
40
41
        CTGraphicalObjectFrame[] graphicFrames = gs.getGraphicFrameArray();
42
        for (CTGraphicalObjectFrame frame: graphicFrames) {
43
            CTGraphicalObjectData data = frame.getGraphic().getGraphicData();
44
            XmlCursor c = data.newCursor();
45
            c.selectPath("./*");
46
47
            while (c.toNextSelection()) {
48
                XmlObject o = c.getObject();
49
50
                if (o instanceof CTTable) {
51
                    DrawingTable table = new DrawingTable((CTTable) o);
52
53
                    for (DrawingTableRow row : table.getRows()) {
54
                        for (DrawingTableCell cell : row.getCells()) {
55
                            DrawingTextBody textBody = cell.getTextBody();
56
57
                            out.addAll(Arrays.asList(textBody.getParagraphs()));
58
                        }
59
                    }
60
                }
61
            }
62
        }
63
64
        return out;
65
    }
66
67
}
(-)src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingParagraph.java (+33 lines)
Line 0 Link Here
1
package org.apache.poi.xslf.usermodel;
2
3
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
4
import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
5
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak;
6
import org.apache.xmlbeans.XmlCursor;
7
import org.apache.xmlbeans.XmlObject;
8
9
public class DrawingParagraph {
10
    private final CTTextParagraph p;
11
12
    public DrawingParagraph(CTTextParagraph p) {
13
        this.p = p;
14
    }
15
16
    public CharSequence getText() {
17
        StringBuilder text = new StringBuilder();
18
19
        XmlCursor c = p.newCursor();
20
        c.selectPath("./*");
21
        while (c.toNextSelection()) {
22
            XmlObject o = c.getObject();
23
            if (o instanceof CTRegularTextRun) {
24
                CTRegularTextRun txrun = (CTRegularTextRun) o;
25
                text.append(txrun.getT());
26
            } else if (o instanceof CTTextLineBreak) {
27
                text.append('\n');
28
            }
29
        }
30
        
31
        return text;
32
    }
33
}
(-)src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTable.java (+23 lines)
Line 0 Link Here
1
package org.apache.poi.xslf.usermodel;
2
3
import org.openxmlformats.schemas.drawingml.x2006.main.CTTable;
4
import org.openxmlformats.schemas.drawingml.x2006.main.CTTableRow;
5
6
public class DrawingTable {
7
    private final CTTable table;
8
9
    public DrawingTable(CTTable table) {
10
        this.table = table;
11
    }
12
13
    public DrawingTableRow[] getRows() {
14
        CTTableRow[] ctTableRows = table.getTrArray();
15
        DrawingTableRow[] o = new DrawingTableRow[ctTableRows.length];
16
17
        for (int i=0; i<o.length; i++) {
18
            o[i] = new DrawingTableRow(ctTableRows[i]);
19
        }
20
21
        return o;
22
    }
23
}
(-)src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableCell.java (+17 lines)
Line 0 Link Here
1
package org.apache.poi.xslf.usermodel;
2
3
import org.openxmlformats.schemas.drawingml.x2006.main.CTTableCell;
4
5
public class DrawingTableCell {
6
    private final CTTableCell cell;
7
    private final DrawingTextBody drawingTextBody;
8
9
    public DrawingTableCell(CTTableCell cell) {
10
        this.cell = cell;
11
        drawingTextBody = new DrawingTextBody(this.cell.getTxBody());
12
    }
13
14
    public DrawingTextBody getTextBody() {
15
        return drawingTextBody;
16
    }
17
}
(-)src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTableRow.java (+23 lines)
Line 0 Link Here
1
package org.apache.poi.xslf.usermodel;
2
3
import org.openxmlformats.schemas.drawingml.x2006.main.CTTableRow;
4
import org.openxmlformats.schemas.drawingml.x2006.main.CTTableCell;
5
6
public class DrawingTableRow {
7
    private final CTTableRow row;
8
9
    public DrawingTableRow(CTTableRow row) {
10
        this.row = row;
11
    }
12
13
    public DrawingTableCell[] getCells() {
14
        CTTableCell[] ctTableCells = row.getTcArray();
15
        DrawingTableCell[] o = new DrawingTableCell[ctTableCells.length];
16
17
        for (int i=0; i<o.length; i++) {
18
            o[i] = new DrawingTableCell(ctTableCells[i]);
19
        }
20
21
        return o;
22
    }
23
}
(-)src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java (+6 lines)
Lines 26-36 Link Here
26
public class XSLFSlide extends XSLFSheet implements Slide {
26
public class XSLFSlide extends XSLFSheet implements Slide {
27
	private CTSlide slide;
27
	private CTSlide slide;
28
	private CTSlideIdListEntry slideId;
28
	private CTSlideIdListEntry slideId;
29
    private XSLFCommonSlideData data;
29
	
30
	
30
	public XSLFSlide(CTSlide slide, CTSlideIdListEntry slideId, SlideShow parent) {
31
	public XSLFSlide(CTSlide slide, CTSlideIdListEntry slideId, SlideShow parent) {
31
		super(parent);
32
		super(parent);
32
		this.slide = slide;
33
		this.slide = slide;
33
		this.slideId = slideId;
34
		this.slideId = slideId;
35
        this.data = new XSLFCommonSlideData(slide.getCSld());
34
	}
36
	}
35
	
37
	
36
	/**
38
	/**
Lines 88-91 Link Here
88
		// TODO Auto-generated method stub
90
		// TODO Auto-generated method stub
89
91
90
	}
92
	}
93
94
    public XSLFCommonSlideData getCommonSlideData() {
95
        return data;
96
    }
91
}
97
}
(-)src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java (+23 lines)
Line 0 Link Here
1
package org.apache.poi.xslf.usermodel;
2
3
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
4
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
5
6
public class DrawingTextBody {
7
    private final CTTextBody textBody;
8
9
    public DrawingTextBody(CTTextBody textBody) {
10
        this.textBody = textBody;
11
    }
12
13
    public DrawingParagraph[] getParagraphs() {
14
        CTTextParagraph[] pArray = textBody.getPArray();
15
        DrawingParagraph[] o = new DrawingParagraph[pArray.length];
16
17
        for (int i=0; i<o.length; i++) {
18
            o[i] = new DrawingParagraph(pArray[i]);
19
        }
20
21
        return o;
22
    }
23
}
(-)src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java (-44 / +14 lines)
Lines 16-44 Link Here
16
==================================================================== */
16
==================================================================== */
17
package org.apache.poi.xslf.extractor;
17
package org.apache.poi.xslf.extractor;
18
18
19
import java.io.IOException;
20
21
import org.apache.poi.POIXMLTextExtractor;
19
import org.apache.poi.POIXMLTextExtractor;
22
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
20
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
23
import org.apache.poi.openxml4j.opc.OPCPackage;
21
import org.apache.poi.openxml4j.opc.OPCPackage;
24
import org.apache.poi.xslf.XSLFSlideShow;
22
import org.apache.poi.xslf.XSLFSlideShow;
23
import org.apache.poi.xslf.usermodel.DrawingParagraph;
25
import org.apache.poi.xslf.usermodel.XMLSlideShow;
24
import org.apache.poi.xslf.usermodel.XMLSlideShow;
25
import org.apache.poi.xslf.usermodel.XSLFCommonSlideData;
26
import org.apache.poi.xslf.usermodel.XSLFSlide;
26
import org.apache.poi.xslf.usermodel.XSLFSlide;
27
import org.apache.xmlbeans.XmlException;
27
import org.apache.xmlbeans.XmlException;
28
import org.apache.xmlbeans.XmlObject;
28
import org.openxmlformats.schemas.presentationml.x2006.main.*;
29
import org.apache.xmlbeans.XmlCursor;
30
import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
31
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
32
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
33
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak;
34
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
35
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
36
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
37
import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
38
import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
39
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
40
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
41
29
30
import java.io.IOException;
31
42
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
32
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
43
	private XMLSlideShow slideshow;
33
	private XMLSlideShow slideshow;
44
	private boolean slidesByDefault = true;
34
	private boolean slidesByDefault = true;
Lines 110-116 Link Here
110
					slideshow._getXSLFSlideShow().getSlideComments(slideId);
100
					slideshow._getXSLFSlideShow().getSlideComments(slideId);
111
				
101
				
112
				if(slideText) {
102
				if(slideText) {
113
					extractText(rawSlide.getCSld().getSpTree(), text);
103
					extractText(slides[i].getCommonSlideData(), text);
114
					
104
					
115
					// Comments too for the slide
105
					// Comments too for the slide
116
					if(comments != null) {
106
					if(comments != null) {
Lines 123-130 Link Here
123
						}
113
						}
124
					}
114
					}
125
				}
115
				}
116
126
				if(notesText && notes != null) {
117
				if(notesText && notes != null) {
127
					extractText(notes.getCSld().getSpTree(), text);
118
					extractText(new XSLFCommonSlideData(notes.getCSld()), text);
128
				}
119
				}
129
			} catch(Exception e) {
120
			} catch(Exception e) {
130
				throw new RuntimeException(e);
121
				throw new RuntimeException(e);
Lines 134-164 Link Here
134
		return text.toString();
125
		return text.toString();
135
	}
126
	}
136
	
127
	
137
	private void extractText(CTGroupShape gs, StringBuffer text) {
128
	private void extractText(XSLFCommonSlideData data, StringBuffer text) {
138
		CTShape[] shapes = gs.getSpArray();
129
        for (DrawingParagraph p : data.getText()) {
139
		for (int i = 0; i < shapes.length; i++) {
130
            text.append(p.getText());
140
			CTTextBody textBody =
131
            text.append("\n");
141
				shapes[i].getTxBody();
132
        }
142
			if(textBody != null) {
133
    }
143
				CTTextParagraph[] paras = 
144
					textBody.getPArray();
145
				for (int j = 0; j < paras.length; j++) {
146
                    XmlCursor c = paras[j].newCursor();
147
                    c.selectPath("./*");
148
                    while (c.toNextSelection()) {
149
                        XmlObject o = c.getObject();
150
                        if(o instanceof CTRegularTextRun){
151
                            CTRegularTextRun txrun = (CTRegularTextRun)o;
152
                            text.append( txrun.getT() );
153
                        } else if (o instanceof CTTextLineBreak){
154
                            text.append('\n');
155
                        }
156
                    }
157
                    
158
					// End each paragraph with a new line
159
					text.append("\n");
160
				}
161
			}
162
		}
163
	}
164
}
134
}
(-)src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java (+13 lines)
Lines 113-116 Link Here
113
		// Check comments are there
113
		// Check comments are there
114
		assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
114
		assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
115
	}
115
	}
116
117
    public void testTable() throws Exception {
118
        POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
119
        xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx")));
120
        XSLFPowerPointExtractor extractor =
121
            new XSLFPowerPointExtractor(xmlA);
122
123
        String text = extractor.getText();
124
        assertTrue(text.length() > 0);
125
126
        // Check comments are there
127
        assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
128
    }
116
}
129
}

Return to bug 48426