View | Details | Raw Unified | Return to bug 60519
Collapse All | Expand All

(-)src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java (+105 lines)
Line 0 Link Here
1
/* ====================================================================
2
   Licensed to the Apache Software Foundation (ASF) under one or more
3
   contributor license agreements.  See the NOTICE file distributed with
4
   this work for additional information regarding copyright ownership.
5
   The ASF licenses this file to You under the Apache License, Version 2.0
6
   (the "License"); you may not use this file except in compliance with
7
   the License.  You may obtain a copy of the License at
8
9
       http://www.apache.org/licenses/LICENSE-2.0
10
11
   Unless required by applicable law or agreed to in writing, software
12
   distributed under the License is distributed on an "AS IS" BASIS,
13
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
   See the License for the specific language governing permissions and
15
   limitations under the License.
16
==================================================================== */
17
18
package org.apache.poi.ss.extractor;
19
20
import org.apache.poi.ss.usermodel.Shape;
21
22
/**
23
 * A collection of embedded object informations and content
24
 */
25
public class EmbeddedData {
26
    private String filename;
27
    private byte[] embeddedData;
28
    private Shape shape;
29
    private String contentType = "binary/octet-stream";
30
31
    public EmbeddedData(String filename, byte[] embeddedData, String contentType) {
32
        setFilename(filename);
33
        setEmbeddedData(embeddedData);
34
        setContentType(contentType);
35
    }
36
    
37
    /**
38
     * @return the filename
39
     */
40
    public String getFilename() {
41
        return filename;
42
    }
43
    
44
    /**
45
     * Sets the filename 
46
     *
47
     * @param filename the filename
48
     */
49
    public void setFilename(String filename) {
50
        if (filename == null) {
51
            this.filename = "unknown.bin";
52
        } else {
53
            this.filename = filename.replaceAll("[^/\\\\]*[/\\\\]", "").trim();
54
        }
55
    }
56
    
57
    /**
58
     * @return the embedded object byte array
59
     */
60
    public byte[] getEmbeddedData() {
61
        return embeddedData;
62
    }
63
64
    /**
65
     * Sets the embedded object as byte array
66
     *
67
     * @param embeddedData the embedded object byte array
68
     */
69
    public void setEmbeddedData(byte[] embeddedData) {
70
        this.embeddedData = (embeddedData == null) ? null : embeddedData.clone();
71
    }
72
73
    /**
74
     * @return the shape which links to the embedded object
75
     */
76
    public Shape getShape() {
77
        return shape;
78
    }
79
80
    /**
81
     * Sets the shape which links to the embedded object
82
     *
83
     * @param shape the shape
84
     */
85
    public void setShape(Shape shape) {
86
        this.shape = shape;
87
    }
88
89
    /**
90
     * @return the content-/mime-type of the embedded object, the default (if unknown) is {@code binary/octet-stream} 
91
     */
92
    public String getContentType() {
93
        return contentType;
94
    }
95
96
    /**
97
     * Sets the content-/mime-type
98
     *
99
     * @param contentType the content-type
100
     */
101
    public void setContentType(String contentType) {
102
        this.contentType = contentType;
103
    }
104
}
105
native
(-)src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java (+357 lines)
Line 0 Link Here
1
/* ====================================================================
2
   Licensed to the Apache Software Foundation (ASF) under one or more
3
   contributor license agreements.  See the NOTICE file distributed with
4
   this work for additional information regarding copyright ownership.
5
   The ASF licenses this file to You under the Apache License, Version 2.0
6
   (the "License"); you may not use this file except in compliance with
7
   the License.  You may obtain a copy of the License at
8
9
       http://www.apache.org/licenses/LICENSE-2.0
10
11
   Unless required by applicable law or agreed to in writing, software
12
   distributed under the License is distributed on an "AS IS" BASIS,
13
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
   See the License for the specific language governing permissions and
15
   limitations under the License.
16
==================================================================== */
17
18
package org.apache.poi.ss.extractor;
19
20
import java.io.ByteArrayOutputStream;
21
import java.io.File;
22
import java.io.FileInputStream;
23
import java.io.FileOutputStream;
24
import java.io.IOException;
25
import java.io.InputStream;
26
import java.util.ArrayList;
27
import java.util.Arrays;
28
import java.util.Collections;
29
import java.util.Iterator;
30
import java.util.List;
31
32
import org.apache.poi.hpsf.ClassID;
33
import org.apache.poi.poifs.filesystem.DirectoryNode;
34
import org.apache.poi.poifs.filesystem.Entry;
35
import org.apache.poi.poifs.filesystem.Ole10Native;
36
import org.apache.poi.poifs.filesystem.Ole10NativeException;
37
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
38
import org.apache.poi.ss.usermodel.Drawing;
39
import org.apache.poi.ss.usermodel.ObjectData;
40
import org.apache.poi.ss.usermodel.Picture;
41
import org.apache.poi.ss.usermodel.PictureData;
42
import org.apache.poi.ss.usermodel.Shape;
43
import org.apache.poi.ss.usermodel.ShapeContainer;
44
import org.apache.poi.ss.usermodel.Sheet;
45
import org.apache.poi.ss.usermodel.Workbook;
46
import org.apache.poi.ss.usermodel.WorkbookFactory;
47
import org.apache.poi.util.IOUtils;
48
import org.apache.poi.util.LocaleUtil;
49
import org.apache.poi.util.POILogFactory;
50
import org.apache.poi.util.POILogger;
51
52
public class EmbeddedExtractor implements Iterable<EmbeddedExtractor> {
53
    private static final POILogger LOG = POILogFactory.getLogger(EmbeddedExtractor.class);
54
    
55
    /**
56
     * @return the list of known extractors, if you provide custom extractors, override this method
57
     */
58
    @Override
59
    public Iterator<EmbeddedExtractor> iterator() {
60
        EmbeddedExtractor[] ee = {
61
            new Ole10Extractor(), new PdfExtractor(), new WordExtractor(), new ExcelExtractor(), new FsExtractor()
62
        };
63
        return Arrays.asList(ee).iterator();
64
    }
65
66
    public EmbeddedData extractOne(DirectoryNode src) throws IOException {
67
        for (EmbeddedExtractor ee : this) {
68
            if (ee.canExtract(src)) {
69
                return ee.extract(src);
70
            }
71
        }
72
        return null;
73
    }
74
75
    public EmbeddedData extractOne(Picture src) throws IOException {
76
        for (EmbeddedExtractor ee : this) {
77
            if (ee.canExtract(src)) {
78
                return ee.extract(src);
79
            }
80
        }
81
        return null;
82
    }
83
84
    public List<EmbeddedData> extractAll(Sheet sheet) throws IOException {
85
        Drawing<?> patriarch = sheet.getDrawingPatriarch();
86
        if (null == patriarch){
87
            return Collections.emptyList();
88
        }
89
        List<EmbeddedData> embeddings = new ArrayList<EmbeddedData>();
90
        extractAll(patriarch, embeddings);
91
        return embeddings;
92
    }
93
    
94
    protected void extractAll(ShapeContainer<?> parent, List<EmbeddedData> embeddings) throws IOException {
95
        for (Shape shape : parent) {
96
            EmbeddedData data = null;
97
            if (shape instanceof ObjectData) {
98
                ObjectData od = (ObjectData)shape;
99
                try {
100
                    if (od.hasDirectoryEntry()) {
101
                        data = extractOne((DirectoryNode)od.getDirectory());
102
                    } else {
103
                        data = new EmbeddedData(od.getFileName(), od.getObjectData(), "binary/octet-stream");
104
                    }
105
                } catch (Exception e) {
106
                    LOG.log(POILogger.WARN, "Entry not found / readable - ignoring OLE embedding", e);
107
                }
108
            } else if (shape instanceof Picture) {
109
                data = extractOne((Picture)shape);
110
            } else if (shape instanceof ShapeContainer) {
111
                extractAll((ShapeContainer<?>)shape, embeddings);
112
            }
113
            
114
            if (data == null) {
115
                continue;
116
            }
117
118
            data.setShape(shape);
119
            String filename = data.getFilename();
120
            String extension = (filename == null || filename.indexOf('.') == -1) ? ".bin" : filename.substring(filename.indexOf('.'));
121
            
122
            // try to find an alternative name
123
            if (filename == null || "".equals(filename) || filename.startsWith("MBD") || filename.startsWith("Root Entry")) {
124
                filename = shape.getShapeName();
125
                if (filename != null) {
126
                    filename += extension;
127
                }
128
            }
129
            // default to dummy name
130
            if (filename == null || "".equals(filename)) {
131
                filename = "picture_"+embeddings.size()+extension;
132
            }
133
            filename = filename.trim();
134
            data.setFilename(filename);
135
            
136
            embeddings.add(data);
137
        }
138
    }
139
    
140
141
    public boolean canExtract(DirectoryNode source) {
142
        return false;
143
    }
144
145
    public boolean canExtract(Picture source) {
146
        return false;
147
    }
148
149
    protected EmbeddedData extract(DirectoryNode dn) throws IOException {
150
        assert(canExtract(dn));
151
        POIFSFileSystem dest = new POIFSFileSystem();
152
        copyNodes(dn, dest.getRoot());
153
        // start with a reasonable big size
154
        ByteArrayOutputStream bos = new ByteArrayOutputStream(20000);
155
        dest.writeFilesystem(bos);
156
        dest.close();
157
158
        return new EmbeddedData(dn.getName(), bos.toByteArray(), "binary/octet-stream");
159
    }
160
161
    protected EmbeddedData extract(Picture source) throws IOException {
162
        return null;
163
    }
164
    
165
    public static class Ole10Extractor extends EmbeddedExtractor {
166
        @Override
167
        public boolean canExtract(DirectoryNode dn) {
168
            ClassID clsId = dn.getStorageClsid();
169
            return ClassID.OLE10_PACKAGE.equals(clsId);
170
        }
171
172
        @Override
173
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
174
            try {
175
                Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn);
176
                return new EmbeddedData(ole10.getFileName(), ole10.getDataBuffer(), "binary/octet-stream");
177
            } catch (Ole10NativeException e) {
178
                throw new IOException(e);
179
            }
180
        }
181
    }
182
183
    static class PdfExtractor extends EmbeddedExtractor {
184
        static ClassID PdfClassID = new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}");
185
        @Override
186
        public boolean canExtract(DirectoryNode dn) {
187
            ClassID clsId = dn.getStorageClsid();
188
            return (PdfClassID.equals(clsId)
189
            || dn.hasEntry("CONTENTS"));
190
        }
191
192
        @Override
193
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
194
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
195
            InputStream is = dn.createDocumentInputStream("CONTENTS");
196
            IOUtils.copy(is, bos);
197
            is.close();
198
            return new EmbeddedData(dn.getName()+".pdf", bos.toByteArray(), "application/pdf");
199
        }
200
        
201
        @Override
202
        public boolean canExtract(Picture source) {
203
            PictureData pd = source.getPictureData();
204
            return (pd.getPictureType() == Workbook.PICTURE_TYPE_EMF);
205
        }
206
207
        /**
208
         * Mac Office encodes embedded objects inside the picture, e.g. PDF is part of an EMF.
209
         * If an embedded stream is inside an EMF picture, this method extracts the payload.
210
         *
211
         * @return the embedded data in an EMF picture or null if none is found
212
         */
213
        @Override
214
        protected EmbeddedData extract(Picture source) throws IOException {
215
            // check for emf+ embedded pdf (poor mans style :( )
216
            // Mac Excel 2011 embeds pdf files with this method.
217
            PictureData pd = source.getPictureData();
218
            if (pd.getPictureType() != Workbook.PICTURE_TYPE_EMF) {
219
                return null;
220
            }
221
222
            // TODO: investigate if this is just an EMF-hack or if other formats are also embedded in EMF
223
            byte pictureBytes[] = pd.getData();
224
            int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes(LocaleUtil.CHARSET_1252));
225
            if (idxStart == -1) {
226
                return null;
227
            }
228
            
229
            int idxEnd = indexOf(pictureBytes, idxStart, "%%EOF".getBytes(LocaleUtil.CHARSET_1252));
230
            if (idxEnd == -1) {
231
                return null;
232
            }
233
            
234
            int pictureBytesLen = idxEnd-idxStart+6;
235
            byte[] pdfBytes = new byte[pictureBytesLen];
236
            System.arraycopy(pictureBytes, idxStart, pdfBytes, 0, pictureBytesLen);
237
            String filename = source.getShapeName().trim();
238
            if (!filename.toLowerCase().endsWith(".pdf")) {
239
                filename += ".pdf";
240
            }
241
            return new EmbeddedData(filename, pdfBytes, "application/pdf");
242
        }
243
        
244
245
    }
246
247
    static class WordExtractor extends EmbeddedExtractor {
248
        @Override
249
        public boolean canExtract(DirectoryNode dn) {
250
            ClassID clsId = dn.getStorageClsid();
251
            return (ClassID.WORD95.equals(clsId)
252
            || ClassID.WORD97.equals(clsId)
253
            || dn.hasEntry("WordDocument"));
254
        }
255
256
        @Override
257
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
258
            EmbeddedData ed = super.extract(dn);
259
            ed.setFilename(dn.getName()+".doc");
260
            return ed;
261
        }
262
    }
263
264
    static class ExcelExtractor extends EmbeddedExtractor {
265
        @Override
266
        public boolean canExtract(DirectoryNode dn) {
267
            ClassID clsId = dn.getStorageClsid();
268
            return (ClassID.EXCEL95.equals(clsId)
269
                    || ClassID.EXCEL97.equals(clsId)
270
                    || dn.hasEntry("Workbook") /*...*/);
271
        }
272
        
273
        @Override
274
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
275
            EmbeddedData ed = super.extract(dn);
276
            ed.setFilename(dn.getName()+".xls");
277
            return ed;
278
        }
279
    }
280
281
    static class FsExtractor extends EmbeddedExtractor {
282
        @Override
283
        public boolean canExtract(DirectoryNode dn) {
284
            return true;
285
        }
286
        @Override
287
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
288
            EmbeddedData ed = super.extract(dn);
289
            ed.setFilename(dn.getName()+".ole");
290
            // TODO: read the content type from CombObj stream
291
            return ed;
292
        }
293
    }
294
    
295
    protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException {
296
        for (Entry e : src) {
297
            if (e instanceof DirectoryNode) {
298
                DirectoryNode srcDir = (DirectoryNode)e;
299
                DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName());
300
                destDir.setStorageClsid(srcDir.getStorageClsid());
301
                copyNodes(srcDir, destDir);
302
            } else {
303
                InputStream is = src.createDocumentInputStream(e);
304
                dest.createDocument(e.getName(), is);
305
                is.close();
306
            }
307
        }
308
    }
309
    
310
    
311
312
    /**
313
     * Knuth-Morris-Pratt Algorithm for Pattern Matching
314
     * Finds the first occurrence of the pattern in the text.
315
     */
316
    private static int indexOf(byte[] data, int offset, byte[] pattern) {
317
        int[] failure = computeFailure(pattern);
318
319
        int j = 0;
320
        if (data.length == 0) return -1;
321
322
        for (int i = offset; i < data.length; i++) {
323
            while (j > 0 && pattern[j] != data[i]) {
324
                j = failure[j - 1];
325
            }
326
            if (pattern[j] == data[i]) { j++; }
327
            if (j == pattern.length) {
328
                return i - pattern.length + 1;
329
            }
330
        }
331
        return -1;
332
    }
333
334
    /**
335
     * Computes the failure function using a boot-strapping process,
336
     * where the pattern is matched against itself.
337
     */
338
    private static int[] computeFailure(byte[] pattern) {
339
        int[] failure = new int[pattern.length];
340
341
        int j = 0;
342
        for (int i = 1; i < pattern.length; i++) {
343
            while (j > 0 && pattern[j] != pattern[i]) {
344
                j = failure[j - 1];
345
            }
346
            if (pattern[j] == pattern[i]) {
347
                j++;
348
            }
349
            failure[i] = j;
350
        }
351
352
        return failure;
353
    }
354
355
    
356
}
357
native
(-)src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java (+16 lines)
Lines 23-28 Link Here
23
import java.io.IOException;
23
import java.io.IOException;
24
24
25
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
25
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
26
import org.apache.poi.ss.extractor.EmbeddedData;
27
import org.apache.poi.ss.extractor.EmbeddedExtractor;
26
import org.apache.poi.ss.usermodel.Cell;
28
import org.apache.poi.ss.usermodel.Cell;
27
import org.apache.poi.ss.usermodel.Row;
29
import org.apache.poi.ss.usermodel.Row;
28
import org.apache.poi.ss.usermodel.Sheet;
30
import org.apache.poi.ss.usermodel.Sheet;
Lines 55-60 Link Here
55
		
57
		
56
		readContent(read);
58
		readContent(read);
57
		
59
		
60
		extractEmbedded(read);
61
		
58
		modifyContent(read);
62
		modifyContent(read);
59
63
60
		read.close();
64
		read.close();
Lines 91-96 Link Here
91
			}
95
			}
92
		}
96
		}
93
	}
97
	}
98
99
	private void extractEmbedded(Workbook wb) throws IOException {
100
        EmbeddedExtractor ee = new EmbeddedExtractor();
101
102
        for (Sheet s : wb) {
103
            for (EmbeddedData ed : ee.extractAll(s)) {
104
                assertNotNull(ed.getFilename());
105
                assertNotNull(ed.getEmbeddedData());
106
                assertNotNull(ed.getShape());
107
            }
108
        }
109
	}
94
	
110
	
95
	private void modifyContent(Workbook wb) {
111
	private void modifyContent(Workbook wb) {
96
		/* a number of file fail because of various things: udf, unimplemented functions, ...
112
		/* a number of file fail because of various things: udf, unimplemented functions, ...
(-)src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java (+170 lines)
Line 0 Link Here
1
/* ====================================================================
2
   Licensed to the Apache Software Foundation (ASF) under one or more
3
   contributor license agreements.  See the NOTICE file distributed with
4
   this work for additional information regarding copyright ownership.
5
   The ASF licenses this file to You under the Apache License, Version 2.0
6
   (the "License"); you may not use this file except in compliance with
7
   the License.  You may obtain a copy of the License at
8
9
       http://www.apache.org/licenses/LICENSE-2.0
10
11
   Unless required by applicable law or agreed to in writing, software
12
   distributed under the License is distributed on an "AS IS" BASIS,
13
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
   See the License for the specific language governing permissions and
15
   limitations under the License.
16
==================================================================== */
17
18
package org.apache.poi.xssf.usermodel;
19
20
import java.io.ByteArrayOutputStream;
21
import java.io.IOException;
22
import java.io.InputStream;
23
import java.io.PushbackInputStream;
24
25
import javax.xml.namespace.QName;
26
27
import org.apache.poi.POIXMLDocumentPart;
28
import org.apache.poi.POIXMLException;
29
import org.apache.poi.openxml4j.opc.PackagePart;
30
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
31
import org.apache.poi.poifs.filesystem.DirectoryEntry;
32
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
33
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
34
import org.apache.poi.ss.usermodel.ObjectData;
35
import org.apache.poi.util.IOUtils;
36
import org.apache.poi.util.POILogFactory;
37
import org.apache.poi.util.POILogger;
38
import org.apache.xmlbeans.XmlCursor;
39
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTShape;
40
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTOleObject;
41
42
/**
43
 * Represents binary object (i.e. OLE) data stored in the file.  Eg. A GIF, JPEG etc...
44
 */
45
public class XSSFObjectData extends XSSFSimpleShape implements ObjectData {
46
    private static final POILogger LOG = POILogFactory.getLogger(XSSFObjectData.class);
47
    
48
    /**
49
     * A default instance of CTShape used for creating new shapes.
50
     */
51
    private static CTShape prototype = null;
52
53
    private CTOleObject oleObject;
54
55
    protected XSSFObjectData(XSSFDrawing drawing, CTShape ctShape) {
56
        super(drawing, ctShape);
57
    }
58
59
    /**
60
     * Prototype with the default structure of a new auto-shape.
61
     */
62
    protected static CTShape prototype() {
63
        if(prototype == null) {
64
            prototype = XSSFSimpleShape.prototype();
65
        }
66
        return prototype;
67
    }
68
69
    @Override
70
    public String getOLE2ClassName() {
71
        return getOleObject().getProgId();
72
    }
73
74
    /**
75
     * @return the CTOleObject associated with the shape 
76
     */
77
    public CTOleObject getOleObject() {
78
        if (oleObject == null) {
79
            long shapeId = getCTShape().getNvSpPr().getCNvPr().getId();
80
            oleObject = getSheet().readOleObject(shapeId);
81
            if (oleObject == null) {
82
                throw new POIXMLException("Ole object not found in sheet container - it's probably a control element");
83
            }
84
        }
85
        return oleObject;
86
    }
87
    
88
    @Override
89
    public byte[] getObjectData() throws IOException {
90
        InputStream is = getObjectPart().getInputStream();
91
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
92
        IOUtils.copy(is, bos);
93
        is.close();
94
        return bos.toByteArray();
95
    }
96
    
97
    /**
98
     * @return the package part of the object data
99
     */
100
    public PackagePart getObjectPart() {
101
        if (!getOleObject().isSetId()) {
102
            throw new POIXMLException("Invalid ole object found in sheet container");
103
        }
104
        POIXMLDocumentPart pdp = getSheet().getRelationById(getOleObject().getId());
105
        return (pdp == null) ? null : pdp.getPackagePart();
106
    }
107
108
    @Override
109
    public boolean hasDirectoryEntry() {
110
        InputStream is = null;
111
        try {
112
            is = getObjectPart().getInputStream();
113
114
            // If clearly doesn't do mark/reset, wrap up
115
            if (! is.markSupported()) {
116
                is = new PushbackInputStream(is, 8);
117
            }
118
119
            // Ensure that there is at least some data there
120
            byte[] header8 = IOUtils.peekFirst8Bytes(is);
121
122
            // Try to create
123
            return NPOIFSFileSystem.hasPOIFSHeader(header8);
124
        } catch (IOException e) {
125
            LOG.log(POILogger.WARN, "can't determine if directory entry exists", e);
126
            return false;
127
        } finally {
128
            IOUtils.closeQuietly(is);
129
        }
130
    }
131
132
    @Override
133
    @SuppressWarnings("resource")
134
    public DirectoryEntry getDirectory() throws IOException {
135
        InputStream is = null;
136
        try {
137
            is = getObjectPart().getInputStream();
138
            return new POIFSFileSystem(is).getRoot();
139
        } finally {
140
            IOUtils.closeQuietly(is);
141
        }
142
    }
143
144
    /**
145
     * The filename of the embedded image
146
     */
147
    @Override
148
    public String getFileName() {
149
        return getObjectPart().getPartName().getName();
150
    }
151
    
152
    protected XSSFSheet getSheet() {
153
        return (XSSFSheet)getDrawing().getParent();
154
    }
155
156
    @Override
157
    public XSSFPictureData getPictureData() {
158
        XmlCursor cur = getOleObject().newCursor();
159
        try {
160
            if (cur.toChild(XSSFRelation.NS_SPREADSHEETML, "objectPr")) {
161
                String blipId = cur.getAttributeText(new QName(PackageRelationshipTypes.CORE_PROPERTIES_ECMA376_NS, "id"));
162
                return (XSSFPictureData)getDrawing().getRelationById(blipId);
163
            }
164
            return null;
165
        } finally {
166
            cur.dispose();
167
        }
168
    }
169
}
170
native
(-)src/java/org/apache/poi/hssf/usermodel/HSSFObjectData.java (-21 / +6 lines)
Lines 25-30 Link Here
25
import org.apache.poi.hssf.record.*;
25
import org.apache.poi.hssf.record.*;
26
import org.apache.poi.poifs.filesystem.DirectoryEntry;
26
import org.apache.poi.poifs.filesystem.DirectoryEntry;
27
import org.apache.poi.poifs.filesystem.Entry;
27
import org.apache.poi.poifs.filesystem.Entry;
28
import org.apache.poi.ss.usermodel.ObjectData;
28
import org.apache.poi.util.HexDump;
29
import org.apache.poi.util.HexDump;
29
30
30
/**
31
/**
Lines 32-38 Link Here
32
 * <p/>
33
 * <p/>
33
 * Right now, 13, july, 2012 can not be created from scratch
34
 * Right now, 13, july, 2012 can not be created from scratch
34
 */
35
 */
35
public final class HSSFObjectData extends HSSFPicture {
36
public final class HSSFObjectData extends HSSFPicture implements ObjectData {
36
    /**
37
    /**
37
     * Reference to the filesystem root, required for retrieving the object data.
38
     * Reference to the filesystem root, required for retrieving the object data.
38
     */
39
     */
Lines 43-62 Link Here
43
        this._root = _root;
44
        this._root = _root;
44
    }
45
    }
45
46
46
    /**
47
    @Override
47
     * Returns the OLE2 Class Name of the object
48
     */
49
    public String getOLE2ClassName() {
48
    public String getOLE2ClassName() {
50
        return findObjectRecord().getOLEClassName();
49
        return findObjectRecord().getOLEClassName();
51
    }
50
    }
52
51
53
    /**
52
    @Override
54
     * Gets the object data. Only call for ones that have
55
     * data though. See {@link #hasDirectoryEntry()}
56
     *
57
     * @return the object data as an OLE2 directory.
58
     * @throws IOException if there was an error reading the data.
59
     */
60
    public DirectoryEntry getDirectory() throws IOException {
53
    public DirectoryEntry getDirectory() throws IOException {
61
        EmbeddedObjectRefSubRecord subRecord = findObjectRecord();
54
        EmbeddedObjectRefSubRecord subRecord = findObjectRecord();
62
55
Lines 70-89 Link Here
70
        throw new IOException("Stream " + streamName + " was not an OLE2 directory");
63
        throw new IOException("Stream " + streamName + " was not an OLE2 directory");
71
    }
64
    }
72
65
73
    /**
66
    @Override
74
     * Returns the data portion, for an ObjectData
75
     * that doesn't have an associated POIFS Directory
76
     * Entry
77
     */
78
    public byte[] getObjectData() {
67
    public byte[] getObjectData() {
79
        return findObjectRecord().getObjectData();
68
        return findObjectRecord().getObjectData();
80
    }
69
    }
81
70
82
    /**
71
    @Override
83
     * Does this ObjectData have an associated POIFS
84
     * Directory Entry?
85
     * (Not all do, those that don't have a data portion)
86
     */
87
    public boolean hasDirectoryEntry() {
72
    public boolean hasDirectoryEntry() {
88
        EmbeddedObjectRefSubRecord subRecord = findObjectRecord();
73
        EmbeddedObjectRefSubRecord subRecord = findObjectRecord();
89
74
(-)src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java (+105 lines)
Line 0 Link Here
1
/* ====================================================================
2
   Licensed to the Apache Software Foundation (ASF) under one or more
3
   contributor license agreements.  See the NOTICE file distributed with
4
   this work for additional information regarding copyright ownership.
5
   The ASF licenses this file to You under the Apache License, Version 2.0
6
   (the "License"); you may not use this file except in compliance with
7
   the License.  You may obtain a copy of the License at
8
9
       http://www.apache.org/licenses/LICENSE-2.0
10
11
   Unless required by applicable law or agreed to in writing, software
12
   distributed under the License is distributed on an "AS IS" BASIS,
13
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
   See the License for the specific language governing permissions and
15
   limitations under the License.
16
==================================================================== */
17
18
package org.apache.poi.ss.extractor;
19
20
import org.apache.poi.ss.usermodel.Shape;
21
22
/**
23
 * A collection of embedded object informations and content
24
 */
25
public class EmbeddedData {
26
    private String filename;
27
    private byte[] embeddedData;
28
    private Shape shape;
29
    private String contentType = "binary/octet-stream";
30
31
    public EmbeddedData(String filename, byte[] embeddedData, String contentType) {
32
        setFilename(filename);
33
        setEmbeddedData(embeddedData);
34
        setContentType(contentType);
35
    }
36
    
37
    /**
38
     * @return the filename
39
     */
40
    public String getFilename() {
41
        return filename;
42
    }
43
    
44
    /**
45
     * Sets the filename 
46
     *
47
     * @param filename the filename
48
     */
49
    public void setFilename(String filename) {
50
        if (filename == null) {
51
            this.filename = "unknown.bin";
52
        } else {
53
            this.filename = filename.replaceAll("[^/\\\\]*[/\\\\]", "").trim();
54
        }
55
    }
56
    
57
    /**
58
     * @return the embedded object byte array
59
     */
60
    public byte[] getEmbeddedData() {
61
        return embeddedData;
62
    }
63
64
    /**
65
     * Sets the embedded object as byte array
66
     *
67
     * @param embeddedData the embedded object byte array
68
     */
69
    public void setEmbeddedData(byte[] embeddedData) {
70
        this.embeddedData = (embeddedData == null) ? null : embeddedData.clone();
71
    }
72
73
    /**
74
     * @return the shape which links to the embedded object
75
     */
76
    public Shape getShape() {
77
        return shape;
78
    }
79
80
    /**
81
     * Sets the shape which links to the embedded object
82
     *
83
     * @param shape the shape
84
     */
85
    public void setShape(Shape shape) {
86
        this.shape = shape;
87
    }
88
89
    /**
90
     * @return the content-/mime-type of the embedded object, the default (if unknown) is {@code binary/octet-stream} 
91
     */
92
    public String getContentType() {
93
        return contentType;
94
    }
95
96
    /**
97
     * Sets the content-/mime-type
98
     *
99
     * @param contentType the content-type
100
     */
101
    public void setContentType(String contentType) {
102
        this.contentType = contentType;
103
    }
104
}
105
native
(-)src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedExtractor.java (+357 lines)
Line 0 Link Here
1
/* ====================================================================
2
   Licensed to the Apache Software Foundation (ASF) under one or more
3
   contributor license agreements.  See the NOTICE file distributed with
4
   this work for additional information regarding copyright ownership.
5
   The ASF licenses this file to You under the Apache License, Version 2.0
6
   (the "License"); you may not use this file except in compliance with
7
   the License.  You may obtain a copy of the License at
8
9
       http://www.apache.org/licenses/LICENSE-2.0
10
11
   Unless required by applicable law or agreed to in writing, software
12
   distributed under the License is distributed on an "AS IS" BASIS,
13
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
   See the License for the specific language governing permissions and
15
   limitations under the License.
16
==================================================================== */
17
18
package org.apache.poi.ss.extractor;
19
20
import java.io.ByteArrayOutputStream;
21
import java.io.File;
22
import java.io.FileInputStream;
23
import java.io.FileOutputStream;
24
import java.io.IOException;
25
import java.io.InputStream;
26
import java.util.ArrayList;
27
import java.util.Arrays;
28
import java.util.Collections;
29
import java.util.Iterator;
30
import java.util.List;
31
32
import org.apache.poi.hpsf.ClassID;
33
import org.apache.poi.poifs.filesystem.DirectoryNode;
34
import org.apache.poi.poifs.filesystem.Entry;
35
import org.apache.poi.poifs.filesystem.Ole10Native;
36
import org.apache.poi.poifs.filesystem.Ole10NativeException;
37
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
38
import org.apache.poi.ss.usermodel.Drawing;
39
import org.apache.poi.ss.usermodel.ObjectData;
40
import org.apache.poi.ss.usermodel.Picture;
41
import org.apache.poi.ss.usermodel.PictureData;
42
import org.apache.poi.ss.usermodel.Shape;
43
import org.apache.poi.ss.usermodel.ShapeContainer;
44
import org.apache.poi.ss.usermodel.Sheet;
45
import org.apache.poi.ss.usermodel.Workbook;
46
import org.apache.poi.ss.usermodel.WorkbookFactory;
47
import org.apache.poi.util.IOUtils;
48
import org.apache.poi.util.LocaleUtil;
49
import org.apache.poi.util.POILogFactory;
50
import org.apache.poi.util.POILogger;
51
52
public class EmbeddedExtractor implements Iterable<EmbeddedExtractor> {
53
    private static final POILogger LOG = POILogFactory.getLogger(EmbeddedExtractor.class);
54
    
55
    /**
56
     * @return the list of known extractors, if you provide custom extractors, override this method
57
     */
58
    @Override
59
    public Iterator<EmbeddedExtractor> iterator() {
60
        EmbeddedExtractor[] ee = {
61
            new Ole10Extractor(), new PdfExtractor(), new WordExtractor(), new ExcelExtractor(), new FsExtractor()
62
        };
63
        return Arrays.asList(ee).iterator();
64
    }
65
66
    public EmbeddedData extractOne(DirectoryNode src) throws IOException {
67
        for (EmbeddedExtractor ee : this) {
68
            if (ee.canExtract(src)) {
69
                return ee.extract(src);
70
            }
71
        }
72
        return null;
73
    }
74
75
    public EmbeddedData extractOne(Picture src) throws IOException {
76
        for (EmbeddedExtractor ee : this) {
77
            if (ee.canExtract(src)) {
78
                return ee.extract(src);
79
            }
80
        }
81
        return null;
82
    }
83
84
    public List<EmbeddedData> extractAll(Sheet sheet) throws IOException {
85
        Drawing<?> patriarch = sheet.getDrawingPatriarch();
86
        if (null == patriarch){
87
            return Collections.emptyList();
88
        }
89
        List<EmbeddedData> embeddings = new ArrayList<EmbeddedData>();
90
        extractAll(patriarch, embeddings);
91
        return embeddings;
92
    }
93
    
94
    protected void extractAll(ShapeContainer<?> parent, List<EmbeddedData> embeddings) throws IOException {
95
        for (Shape shape : parent) {
96
            EmbeddedData data = null;
97
            if (shape instanceof ObjectData) {
98
                ObjectData od = (ObjectData)shape;
99
                try {
100
                    if (od.hasDirectoryEntry()) {
101
                        data = extractOne((DirectoryNode)od.getDirectory());
102
                    } else {
103
                        data = new EmbeddedData(od.getFileName(), od.getObjectData(), "binary/octet-stream");
104
                    }
105
                } catch (Exception e) {
106
                    LOG.log(POILogger.WARN, "Entry not found / readable - ignoring OLE embedding", e);
107
                }
108
            } else if (shape instanceof Picture) {
109
                data = extractOne((Picture)shape);
110
            } else if (shape instanceof ShapeContainer) {
111
                extractAll((ShapeContainer<?>)shape, embeddings);
112
            }
113
            
114
            if (data == null) {
115
                continue;
116
            }
117
118
            data.setShape(shape);
119
            String filename = data.getFilename();
120
            String extension = (filename == null || filename.indexOf('.') == -1) ? ".bin" : filename.substring(filename.indexOf('.'));
121
            
122
            // try to find an alternative name
123
            if (filename == null || "".equals(filename) || filename.startsWith("MBD") || filename.startsWith("Root Entry")) {
124
                filename = shape.getShapeName();
125
                if (filename != null) {
126
                    filename += extension;
127
                }
128
            }
129
            // default to dummy name
130
            if (filename == null || "".equals(filename)) {
131
                filename = "picture_"+embeddings.size()+extension;
132
            }
133
            filename = filename.trim();
134
            data.setFilename(filename);
135
            
136
            embeddings.add(data);
137
        }
138
    }
139
    
140
141
    public boolean canExtract(DirectoryNode source) {
142
        return false;
143
    }
144
145
    public boolean canExtract(Picture source) {
146
        return false;
147
    }
148
149
    protected EmbeddedData extract(DirectoryNode dn) throws IOException {
150
        assert(canExtract(dn));
151
        POIFSFileSystem dest = new POIFSFileSystem();
152
        copyNodes(dn, dest.getRoot());
153
        // start with a reasonable big size
154
        ByteArrayOutputStream bos = new ByteArrayOutputStream(20000);
155
        dest.writeFilesystem(bos);
156
        dest.close();
157
158
        return new EmbeddedData(dn.getName(), bos.toByteArray(), "binary/octet-stream");
159
    }
160
161
    protected EmbeddedData extract(Picture source) throws IOException {
162
        return null;
163
    }
164
    
165
    public static class Ole10Extractor extends EmbeddedExtractor {
166
        @Override
167
        public boolean canExtract(DirectoryNode dn) {
168
            ClassID clsId = dn.getStorageClsid();
169
            return ClassID.OLE10_PACKAGE.equals(clsId);
170
        }
171
172
        @Override
173
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
174
            try {
175
                Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn);
176
                return new EmbeddedData(ole10.getFileName(), ole10.getDataBuffer(), "binary/octet-stream");
177
            } catch (Ole10NativeException e) {
178
                throw new IOException(e);
179
            }
180
        }
181
    }
182
183
    static class PdfExtractor extends EmbeddedExtractor {
184
        static ClassID PdfClassID = new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}");
185
        @Override
186
        public boolean canExtract(DirectoryNode dn) {
187
            ClassID clsId = dn.getStorageClsid();
188
            return (PdfClassID.equals(clsId)
189
            || dn.hasEntry("CONTENTS"));
190
        }
191
192
        @Override
193
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
194
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
195
            InputStream is = dn.createDocumentInputStream("CONTENTS");
196
            IOUtils.copy(is, bos);
197
            is.close();
198
            return new EmbeddedData(dn.getName()+".pdf", bos.toByteArray(), "application/pdf");
199
        }
200
        
201
        @Override
202
        public boolean canExtract(Picture source) {
203
            PictureData pd = source.getPictureData();
204
            return (pd.getPictureType() == Workbook.PICTURE_TYPE_EMF);
205
        }
206
207
        /**
208
         * Mac Office encodes embedded objects inside the picture, e.g. PDF is part of an EMF.
209
         * If an embedded stream is inside an EMF picture, this method extracts the payload.
210
         *
211
         * @return the embedded data in an EMF picture or null if none is found
212
         */
213
        @Override
214
        protected EmbeddedData extract(Picture source) throws IOException {
215
            // check for emf+ embedded pdf (poor mans style :( )
216
            // Mac Excel 2011 embeds pdf files with this method.
217
            PictureData pd = source.getPictureData();
218
            if (pd.getPictureType() != Workbook.PICTURE_TYPE_EMF) {
219
                return null;
220
            }
221
222
            // TODO: investigate if this is just an EMF-hack or if other formats are also embedded in EMF
223
            byte pictureBytes[] = pd.getData();
224
            int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes(LocaleUtil.CHARSET_1252));
225
            if (idxStart == -1) {
226
                return null;
227
            }
228
            
229
            int idxEnd = indexOf(pictureBytes, idxStart, "%%EOF".getBytes(LocaleUtil.CHARSET_1252));
230
            if (idxEnd == -1) {
231
                return null;
232
            }
233
            
234
            int pictureBytesLen = idxEnd-idxStart+6;
235
            byte[] pdfBytes = new byte[pictureBytesLen];
236
            System.arraycopy(pictureBytes, idxStart, pdfBytes, 0, pictureBytesLen);
237
            String filename = source.getShapeName().trim();
238
            if (!filename.toLowerCase().endsWith(".pdf")) {
239
                filename += ".pdf";
240
            }
241
            return new EmbeddedData(filename, pdfBytes, "application/pdf");
242
        }
243
        
244
245
    }
246
247
    static class WordExtractor extends EmbeddedExtractor {
248
        @Override
249
        public boolean canExtract(DirectoryNode dn) {
250
            ClassID clsId = dn.getStorageClsid();
251
            return (ClassID.WORD95.equals(clsId)
252
            || ClassID.WORD97.equals(clsId)
253
            || dn.hasEntry("WordDocument"));
254
        }
255
256
        @Override
257
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
258
            EmbeddedData ed = super.extract(dn);
259
            ed.setFilename(dn.getName()+".doc");
260
            return ed;
261
        }
262
    }
263
264
    static class ExcelExtractor extends EmbeddedExtractor {
265
        @Override
266
        public boolean canExtract(DirectoryNode dn) {
267
            ClassID clsId = dn.getStorageClsid();
268
            return (ClassID.EXCEL95.equals(clsId)
269
                    || ClassID.EXCEL97.equals(clsId)
270
                    || dn.hasEntry("Workbook") /*...*/);
271
        }
272
        
273
        @Override
274
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
275
            EmbeddedData ed = super.extract(dn);
276
            ed.setFilename(dn.getName()+".xls");
277
            return ed;
278
        }
279
    }
280
281
    static class FsExtractor extends EmbeddedExtractor {
282
        @Override
283
        public boolean canExtract(DirectoryNode dn) {
284
            return true;
285
        }
286
        @Override
287
        public EmbeddedData extract(DirectoryNode dn) throws IOException {
288
            EmbeddedData ed = super.extract(dn);
289
            ed.setFilename(dn.getName()+".ole");
290
            // TODO: read the content type from CombObj stream
291
            return ed;
292
        }
293
    }
294
    
295
    protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException {
296
        for (Entry e : src) {
297
            if (e instanceof DirectoryNode) {
298
                DirectoryNode srcDir = (DirectoryNode)e;
299
                DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName());
300
                destDir.setStorageClsid(srcDir.getStorageClsid());
301
                copyNodes(srcDir, destDir);
302
            } else {
303
                InputStream is = src.createDocumentInputStream(e);
304
                dest.createDocument(e.getName(), is);
305
                is.close();
306
            }
307
        }
308
    }
309
    
310
    
311
312
    /**
313
     * Knuth-Morris-Pratt Algorithm for Pattern Matching
314
     * Finds the first occurrence of the pattern in the text.
315
     */
316
    private static int indexOf(byte[] data, int offset, byte[] pattern) {
317
        int[] failure = computeFailure(pattern);
318
319
        int j = 0;
320
        if (data.length == 0) return -1;
321
322
        for (int i = offset; i < data.length; i++) {
323
            while (j > 0 && pattern[j] != data[i]) {
324
                j = failure[j - 1];
325
            }
326
            if (pattern[j] == data[i]) { j++; }
327
            if (j == pattern.length) {
328
                return i - pattern.length + 1;
329
            }
330
        }
331
        return -1;
332
    }
333
334
    /**
335
     * Computes the failure function using a boot-strapping process,
336
     * where the pattern is matched against itself.
337
     */
338
    private static int[] computeFailure(byte[] pattern) {
339
        int[] failure = new int[pattern.length];
340
341
        int j = 0;
342
        for (int i = 1; i < pattern.length; i++) {
343
            while (j > 0 && pattern[j] != pattern[i]) {
344
                j = failure[j - 1];
345
            }
346
            if (pattern[j] == pattern[i]) {
347
                j++;
348
            }
349
            failure[i] = j;
350
        }
351
352
        return failure;
353
    }
354
355
    
356
}
357
native
(-)src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java (+5 lines)
Lines 42-47 Link Here
42
    String CORE_PROPERTIES_ECMA376 = "http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties";
42
    String CORE_PROPERTIES_ECMA376 = "http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties";
43
43
44
    /**
44
    /**
45
     * Namespace of Core properties relationship type as defiend in ECMA 376
46
     */
47
    String CORE_PROPERTIES_ECMA376_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships";
48
    
49
    /**
45
     * Digital signature relationship type.
50
     * Digital signature relationship type.
46
     */
51
     */
47
    String DIGITAL_SIGNATURE = "http://schemas.openxmlformats.org/package/2006/relationships/digital-signature/signature";
52
    String DIGITAL_SIGNATURE = "http://schemas.openxmlformats.org/package/2006/relationships/digital-signature/signature";
(-)src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFDrawing.java (-16 / +167 lines)
Lines 20-27 Link Here
20
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
20
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
21
21
22
import java.io.IOException;
22
import java.io.IOException;
23
import java.io.InputStream;
23
import java.io.OutputStream;
24
import java.io.OutputStream;
24
import java.util.ArrayList;
25
import java.util.ArrayList;
26
import java.util.Iterator;
25
import java.util.List;
27
import java.util.List;
26
28
27
import javax.xml.namespace.QName;
29
import javax.xml.namespace.QName;
Lines 32-38 Link Here
32
import org.apache.poi.ss.usermodel.ClientAnchor;
34
import org.apache.poi.ss.usermodel.ClientAnchor;
33
import org.apache.poi.ss.usermodel.Drawing;
35
import org.apache.poi.ss.usermodel.Drawing;
34
import org.apache.poi.ss.util.CellAddress;
36
import org.apache.poi.ss.util.CellAddress;
37
import org.apache.poi.ss.util.ImageUtils;
35
import org.apache.poi.util.Internal;
38
import org.apache.poi.util.Internal;
39
import org.apache.poi.util.POILogFactory;
40
import org.apache.poi.util.POILogger;
36
import org.apache.poi.util.Units;
41
import org.apache.poi.util.Units;
37
import org.apache.poi.xssf.model.CommentsTable;
42
import org.apache.poi.xssf.model.CommentsTable;
38
import org.apache.xmlbeans.XmlCursor;
43
import org.apache.xmlbeans.XmlCursor;
Lines 39-44 Link Here
39
import org.apache.xmlbeans.XmlException;
44
import org.apache.xmlbeans.XmlException;
40
import org.apache.xmlbeans.XmlObject;
45
import org.apache.xmlbeans.XmlObject;
41
import org.apache.xmlbeans.XmlOptions;
46
import org.apache.xmlbeans.XmlOptions;
47
import org.apache.xmlbeans.impl.values.XmlAnyTypeImpl;
48
import org.openxmlformats.schemas.drawingml.x2006.main.CTGroupTransform2D;
49
import org.openxmlformats.schemas.drawingml.x2006.main.CTPoint2D;
50
import org.openxmlformats.schemas.drawingml.x2006.main.CTPositiveSize2D;
51
import org.openxmlformats.schemas.drawingml.x2006.main.CTTransform2D;
42
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTConnector;
52
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTConnector;
43
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTDrawing;
53
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTDrawing;
44
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTGraphicalObjectFrame;
54
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTGraphicalObjectFrame;
Lines 53-59 Link Here
53
/**
63
/**
54
 * Represents a SpreadsheetML drawing
64
 * Represents a SpreadsheetML drawing
55
 */
65
 */
56
public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
66
public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing<XSSFShape> {
67
    private static final POILogger LOG = POILogFactory.getLogger(XSSFDrawing.class);
68
    
57
    /**
69
    /**
58
     * Root element of the SpreadsheetML Drawing part
70
     * Root element of the SpreadsheetML Drawing part
59
     */
71
     */
Lines 86-92 Link Here
86
        XmlOptions options  = new XmlOptions(DEFAULT_XML_OPTIONS);
98
        XmlOptions options  = new XmlOptions(DEFAULT_XML_OPTIONS);
87
        //Removing root element
99
        //Removing root element
88
        options.setLoadReplaceDocumentElement(null);
100
        options.setLoadReplaceDocumentElement(null);
89
        drawing = CTDrawing.Factory.parse(part.getInputStream(),options);
101
        InputStream is = part.getInputStream();
102
        try {
103
            drawing = CTDrawing.Factory.parse(is,options);
104
        } finally {
105
            is.close();
106
        }
90
    }
107
    }
91
    
108
    
92
    /**
109
    /**
Lines 176-181 Link Here
176
        XSSFPicture shape = new XSSFPicture(this, ctShape);
193
        XSSFPicture shape = new XSSFPicture(this, ctShape);
177
        shape.anchor = anchor;
194
        shape.anchor = anchor;
178
        shape.setPictureReference(rel);
195
        shape.setPictureReference(rel);
196
        ctShape.getSpPr().setXfrm(createXfrm(anchor));
197
        
179
        return shape;
198
        return shape;
180
    }
199
    }
181
200
Lines 202-207 Link Here
202
221
203
        XSSFGraphicFrame frame = createGraphicFrame(anchor);
222
        XSSFGraphicFrame frame = createGraphicFrame(anchor);
204
        frame.setChart(chart, chartRelId);
223
        frame.setChart(chart, chartRelId);
224
        frame.getCTGraphicalObjectFrame().setXfrm(createXfrm(anchor));
205
225
206
        return chart;
226
        return chart;
207
    }
227
    }
Lines 241-246 Link Here
241
        CTShape ctShape = ctAnchor.addNewSp();
261
        CTShape ctShape = ctAnchor.addNewSp();
242
        ctShape.set(XSSFSimpleShape.prototype());
262
        ctShape.set(XSSFSimpleShape.prototype());
243
        ctShape.getNvSpPr().getCNvPr().setId(shapeId);
263
        ctShape.getNvSpPr().getCNvPr().setId(shapeId);
264
        ctShape.getSpPr().setXfrm(createXfrm(anchor));
244
        XSSFSimpleShape shape = new XSSFSimpleShape(this, ctShape);
265
        XSSFSimpleShape shape = new XSSFSimpleShape(this, ctShape);
245
        shape.anchor = anchor;
266
        shape.anchor = anchor;
246
        return shape;
267
        return shape;
Lines 278-283 Link Here
278
        CTTwoCellAnchor ctAnchor = createTwoCellAnchor(anchor);
299
        CTTwoCellAnchor ctAnchor = createTwoCellAnchor(anchor);
279
        CTGroupShape ctGroup = ctAnchor.addNewGrpSp();
300
        CTGroupShape ctGroup = ctAnchor.addNewGrpSp();
280
        ctGroup.set(XSSFShapeGroup.prototype());
301
        ctGroup.set(XSSFShapeGroup.prototype());
302
        CTTransform2D xfrm = createXfrm(anchor);
303
        CTGroupTransform2D grpXfrm =ctGroup.getGrpSpPr().getXfrm();
304
        grpXfrm.setOff(xfrm.getOff());
305
        grpXfrm.setExt(xfrm.getExt());
306
        grpXfrm.setChExt(xfrm.getExt());
281
307
282
        XSSFShapeGroup shape = new XSSFShapeGroup(this, ctGroup);
308
        XSSFShapeGroup shape = new XSSFShapeGroup(this, ctGroup);
283
        shape.anchor = anchor;
309
        shape.anchor = anchor;
Lines 333-338 Link Here
333
        CTTwoCellAnchor ctAnchor = createTwoCellAnchor(anchor);
359
        CTTwoCellAnchor ctAnchor = createTwoCellAnchor(anchor);
334
        CTGraphicalObjectFrame ctGraphicFrame = ctAnchor.addNewGraphicFrame();
360
        CTGraphicalObjectFrame ctGraphicFrame = ctAnchor.addNewGraphicFrame();
335
        ctGraphicFrame.set(XSSFGraphicFrame.prototype());
361
        ctGraphicFrame.set(XSSFGraphicFrame.prototype());
362
        ctGraphicFrame.setXfrm(createXfrm(anchor));
336
363
337
        long frameId = numOfGraphicFrames++;
364
        long frameId = numOfGraphicFrames++;
338
        XSSFGraphicFrame graphicFrame = new XSSFGraphicFrame(this, ctGraphicFrame);
365
        XSSFGraphicFrame graphicFrame = new XSSFGraphicFrame(this, ctGraphicFrame);
Lines 378-416 Link Here
378
        return ctAnchor;
405
        return ctAnchor;
379
    }
406
    }
380
407
408
    private CTTransform2D createXfrm(XSSFClientAnchor anchor) {
409
        CTTransform2D xfrm = CTTransform2D.Factory.newInstance();
410
        CTPoint2D off = xfrm.addNewOff();
411
        off.setX(anchor.getDx1());
412
        off.setY(anchor.getDy1());
413
        XSSFSheet sheet = (XSSFSheet)getParent();
414
        double widthPx = 0;
415
        for (int col=anchor.getCol1(); col<anchor.getCol2(); col++) {
416
            widthPx += sheet.getColumnWidthInPixels(col);
417
        }
418
        double heightPx = 0;
419
        for (int row=anchor.getRow1(); row<anchor.getRow2(); row++) {
420
            heightPx += ImageUtils.getRowHeightInPixels(sheet, row);
421
        }
422
        int width = Units.pixelToEMU((int)widthPx);
423
        int height = Units.pixelToEMU((int)heightPx);
424
        CTPositiveSize2D ext = xfrm.addNewExt();
425
        ext.setCx(width - anchor.getDx1() + anchor.getDx2());
426
        ext.setCy(height - anchor.getDy1() + anchor.getDy2());
427
        
428
        // TODO: handle vflip/hflip
429
        return xfrm;
430
    }
431
    
381
    private long newShapeId(){
432
    private long newShapeId(){
382
        return drawing.sizeOfTwoCellAnchorArray() + 1;
433
        return drawing.sizeOfTwoCellAnchorArray() + 1;
383
    }
434
    }
384
435
385
    /**
436
    /**
386
     *
387
     * @return list of shapes in this drawing
437
     * @return list of shapes in this drawing
388
     */
438
     */
389
    public List<XSSFShape>  getShapes(){
439
    public List<XSSFShape> getShapes(){
390
        List<XSSFShape> lst = new ArrayList<XSSFShape>();
440
        List<XSSFShape> lst = new ArrayList<XSSFShape>();
391
        for(XmlObject obj : drawing.selectPath("./*/*")) {
441
        XmlCursor cur = drawing.newCursor();
392
            XSSFShape shape = null;
442
        try {
393
            if(obj instanceof CTPicture) shape = new XSSFPicture(this, (CTPicture)obj) ;
443
            if (cur.toFirstChild()) {
394
            else if(obj instanceof CTConnector) shape = new XSSFConnector(this, (CTConnector)obj) ;
444
                addShapes(cur, lst);
395
            else if(obj instanceof CTShape) shape = new XSSFSimpleShape(this, (CTShape)obj) ;
396
            else if(obj instanceof CTGraphicalObjectFrame) shape = new XSSFGraphicFrame(this, (CTGraphicalObjectFrame)obj) ;
397
            else if(obj instanceof CTGroupShape) shape = new XSSFShapeGroup(this, (CTGroupShape)obj) ;
398
399
            if(shape != null){
400
                shape.anchor = getAnchorFromParent(obj);
401
                lst.add(shape);
402
            }
445
            }
446
        } finally {
447
            cur.dispose();
403
        }
448
        }
404
        return lst;
449
        return lst;
405
    }
450
    }
406
451
452
    /**
453
     * @return list of shapes in this shape group
454
     */
455
    public List<XSSFShape> getShapes(XSSFShapeGroup groupshape){
456
        List<XSSFShape> lst = new ArrayList<XSSFShape>();
457
        XmlCursor cur = groupshape.getCTGroupShape().newCursor();
458
        try {
459
            addShapes(cur, lst);
460
        } finally {
461
            cur.dispose();
462
        }
463
        return lst;
464
    }
465
    
466
    private void addShapes(XmlCursor cur, List<XSSFShape> lst) {
467
        try {
468
            do {
469
                cur.push();
470
                if (cur.toFirstChild()) {
471
                    do {
472
                        XmlObject obj = cur.getObject();
473
    
474
                        XSSFShape shape;
475
                        if (obj instanceof CTMarker) {
476
                            // ignore anchor elements
477
                            continue;
478
                        } else if (obj instanceof CTPicture) {
479
                            shape = new XSSFPicture(this, (CTPicture)obj) ;
480
                        } else if(obj instanceof CTConnector) {
481
                            shape = new XSSFConnector(this, (CTConnector)obj) ;
482
                        } else if(obj instanceof CTShape) {
483
                            shape = hasOleLink(obj) 
484
                                ? new XSSFObjectData(this, (CTShape)obj)
485
                                : new XSSFSimpleShape(this, (CTShape)obj) ;
486
                        } else if(obj instanceof CTGraphicalObjectFrame) {
487
                            shape = new XSSFGraphicFrame(this, (CTGraphicalObjectFrame)obj) ;
488
                        } else if(obj instanceof CTGroupShape) {
489
                            shape = new XSSFShapeGroup(this, (CTGroupShape)obj) ;
490
                        } else if(obj instanceof XmlAnyTypeImpl) {
491
                            LOG.log(POILogger.WARN, "trying to parse AlternateContent, "
492
                                    + "this unlinks the returned Shapes from the underlying xml content, "
493
                                    + "so those shapes can't be used to modify the drawing, "
494
                                    + "i.e. modifications will be ignored!");
495
                            
496
                            // XmlAnyTypeImpl is returned for AlternateContent parts, which might contain a CTDrawing
497
                            cur.push();
498
                            cur.toFirstChild();
499
                            XmlCursor cur2 = null;
500
                            try {
501
                                // need to parse AlternateContent again, otherwise the child elements aren't typed,
502
                                // but also XmlAnyTypes
503
                                CTDrawing alterWS = CTDrawing.Factory.parse(cur.newXMLStreamReader());
504
                                cur2 = alterWS.newCursor();
505
                                if (cur2.toFirstChild()) {
506
                                    addShapes(cur2, lst);
507
                                }
508
                            } catch (XmlException e) {
509
                                LOG.log(POILogger.WARN, "unable to parse CTDrawing in alternate content.", e);
510
                            } finally {
511
                                if (cur2 != null) {
512
                                    cur2.dispose();
513
                                }
514
                                cur.pop();
515
                            }
516
                            continue;
517
                        } else {
518
                            // ignore anything else
519
                            continue;
520
                        }
407
521
522
                        assert(shape != null);
523
                        shape.anchor = getAnchorFromParent(obj);
524
                        lst.add(shape);
525
                        
526
                    } while (cur.toNextSibling());
527
                }
528
                cur.pop();
529
            } while (cur.toNextSibling());
530
        } finally {
531
            cur.dispose();
532
        }
533
    }
534
535
    private boolean hasOleLink(XmlObject shape) {
536
        QName uriName = new QName(null, "uri");
537
        String xquery = "declare namespace a='"+XSSFRelation.NS_DRAWINGML+"' .//a:extLst/a:ext";
538
        XmlCursor cur = shape.newCursor();
539
        cur.selectPath(xquery);
540
        try {
541
            while (cur.toNextSelection()) {
542
                String uri = cur.getAttributeText(uriName);
543
                if ("{63B3BB69-23CF-44E3-9099-C40C66FF867C}".equals(uri)) {
544
                    return true;
545
                }
546
            }
547
        } finally {
548
            cur.dispose();
549
        }
550
        return false;
551
    }
552
408
    private XSSFAnchor getAnchorFromParent(XmlObject obj){
553
    private XSSFAnchor getAnchorFromParent(XmlObject obj){
409
        XSSFAnchor anchor = null;
554
        XSSFAnchor anchor = null;
410
555
411
        XmlObject parentXbean = null;
556
        XmlObject parentXbean = null;
412
        XmlCursor cursor = obj.newCursor();
557
        XmlCursor cursor = obj.newCursor();
413
        if(cursor.toParent()) parentXbean = cursor.getObject();
558
        if(cursor.toParent()) {
559
            parentXbean = cursor.getObject();
560
        }
414
        cursor.dispose();
561
        cursor.dispose();
415
        if(parentXbean != null){
562
        if(parentXbean != null){
416
            if (parentXbean instanceof CTTwoCellAnchor) {
563
            if (parentXbean instanceof CTTwoCellAnchor) {
Lines 424-427 Link Here
424
        return anchor;
571
        return anchor;
425
    }
572
    }
426
573
574
    @Override
575
    public Iterator<XSSFShape> iterator() {
576
        return getShapes().iterator();
577
    }
427
}
578
}
(-)src/ooxml/testcases/org/apache/poi/ss/extractor/TestEmbeddedExtractor.java (+116 lines)
Line 0 Link Here
1
/* ====================================================================
2
   Licensed to the Apache Software Foundation (ASF) under one or more
3
   contributor license agreements.  See the NOTICE file distributed with
4
   this work for additional information regarding copyright ownership.
5
   The ASF licenses this file to You under the Apache License, Version 2.0
6
   (the "License"); you may not use this file except in compliance with
7
   the License.  You may obtain a copy of the License at
8
9
       http://www.apache.org/licenses/LICENSE-2.0
10
11
   Unless required by applicable law or agreed to in writing, software
12
   distributed under the License is distributed on an "AS IS" BASIS,
13
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
   See the License for the specific language governing permissions and
15
   limitations under the License.
16
==================================================================== */
17
18
package org.apache.poi.ss.extractor;
19
20
import static org.junit.Assert.assertEquals;
21
22
import java.io.IOException;
23
import java.io.InputStream;
24
import java.security.MessageDigest;
25
import java.security.NoSuchAlgorithmException;
26
import java.util.ArrayList;
27
import java.util.List;
28
29
import javax.xml.bind.DatatypeConverter;
30
31
import org.apache.poi.EncryptedDocumentException;
32
import org.apache.poi.POIDataSamples;
33
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
34
import org.apache.poi.ss.usermodel.Sheet;
35
import org.apache.poi.ss.usermodel.Workbook;
36
import org.apache.poi.ss.usermodel.WorkbookFactory;
37
import org.junit.Test;
38
39
public class TestEmbeddedExtractor {
40
    private static final POIDataSamples samples = POIDataSamples.getSpreadSheetInstance();
41
42
    @Test
43
    public void extractPDFfromEMF() throws Exception {
44
        InputStream fis = samples.openResourceAsStream("Basic_Expense_Template_2011.xls");
45
        Workbook wb = WorkbookFactory.create(fis);
46
        fis.close();
47
48
        EmbeddedExtractor ee = new EmbeddedExtractor();
49
        List<EmbeddedData> edList = new ArrayList<EmbeddedData>();
50
        for (Sheet s : wb) {
51
            edList.addAll(ee.extractAll(s));
52
        }
53
        wb.close();
54
55
        assertEquals(2, edList.size());
56
57
        String filename1 = "Sample.pdf";
58
        EmbeddedData ed0 = edList.get(0);
59
        assertEquals(filename1, ed0.getFilename());
60
        assertEquals(filename1, ed0.getShape().getShapeName().trim());
61
        assertEquals("uNplB1QpYug+LWappiTh0w==", md5hash(ed0.getEmbeddedData()));
62
63
        String filename2 = "kalastuslupa_jiyjhnj_yuiyuiyuio_uyte_sldfsdfsdf_sfsdfsdf_sfsssfsf_sdfsdfsdfsdf_sdfsdfsdf.pdf";
64
        EmbeddedData ed1 = edList.get(1);
65
        assertEquals(filename2, ed1.getFilename());
66
        assertEquals(filename2, ed1.getShape().getShapeName().trim());
67
        assertEquals("QjLuAZ+cd7KbhVz4sj+QdA==", md5hash(ed1.getEmbeddedData()));
68
    }
69
70
    @Test
71
    public void extractFromXSSF() throws IOException, EncryptedDocumentException, InvalidFormatException {
72
        InputStream fis = samples.openResourceAsStream("58325_db.xlsx");
73
        Workbook wb = WorkbookFactory.create(fis);
74
        fis.close();
75
76
        EmbeddedExtractor ee = new EmbeddedExtractor();
77
        List<EmbeddedData> edList = new ArrayList<EmbeddedData>();
78
        for (Sheet s : wb) {
79
            edList.addAll(ee.extractAll(s));
80
        }
81
        wb.close();
82
83
        assertEquals(4, edList.size());
84
        EmbeddedData ed0 = edList.get(0);
85
        assertEquals("Object 1.pdf", ed0.getFilename());
86
        assertEquals("Object 1", ed0.getShape().getShapeName().trim());
87
        assertEquals("Oyys6UtQU1gbHYBYqA4NFA==", md5hash(ed0.getEmbeddedData()));
88
89
        EmbeddedData ed1 = edList.get(1);
90
        assertEquals("Object 2.pdf", ed1.getFilename());
91
        assertEquals("Object 2", ed1.getShape().getShapeName().trim());
92
        assertEquals("xLScPUS0XH+5CTZ2A3neNw==", md5hash(ed1.getEmbeddedData()));
93
94
        EmbeddedData ed2 = edList.get(2);
95
        assertEquals("Object 3.pdf", ed2.getFilename());
96
        assertEquals("Object 3", ed2.getShape().getShapeName().trim());
97
        assertEquals("rX4klZqJAeM5npb54Gi2+Q==", md5hash(ed2.getEmbeddedData()));
98
99
        EmbeddedData ed3 = edList.get(3);
100
        assertEquals("Microsoft_Excel_Worksheet1.xlsx", ed3.getFilename());
101
        assertEquals("Object 1", ed3.getShape().getShapeName().trim());
102
        assertEquals("4m4N8ji2tjpEGPQuw2YwGA==", md5hash(ed3.getEmbeddedData()));
103
    }
104
105
    public static String md5hash(byte[] input) {
106
        try {
107
            MessageDigest md = MessageDigest.getInstance("MD5");
108
            byte hash[] = md.digest(input);
109
            return DatatypeConverter.printBase64Binary(hash);
110
        } catch (NoSuchAlgorithmException e) {
111
            // doesn't happen
112
            return "";
113
        }
114
    }
115
}
116
native
(-)src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFSheet.java (+64 lines)
Lines 40-45 Link Here
40
import java.util.TreeMap;
40
import java.util.TreeMap;
41
41
42
import javax.xml.namespace.QName;
42
import javax.xml.namespace.QName;
43
import javax.xml.stream.XMLStreamException;
44
import javax.xml.stream.XMLStreamReader;
43
45
44
import org.apache.poi.POIXMLDocumentPart;
46
import org.apache.poi.POIXMLDocumentPart;
45
import org.apache.poi.POIXMLException;
47
import org.apache.poi.POIXMLException;
Lines 86-92 Link Here
86
import org.apache.poi.xssf.usermodel.helpers.ColumnHelper;
88
import org.apache.poi.xssf.usermodel.helpers.ColumnHelper;
87
import org.apache.poi.xssf.usermodel.helpers.XSSFIgnoredErrorHelper;
89
import org.apache.poi.xssf.usermodel.helpers.XSSFIgnoredErrorHelper;
88
import org.apache.poi.xssf.usermodel.helpers.XSSFRowShifter;
90
import org.apache.poi.xssf.usermodel.helpers.XSSFRowShifter;
91
import org.apache.xmlbeans.XmlCursor;
89
import org.apache.xmlbeans.XmlException;
92
import org.apache.xmlbeans.XmlException;
93
import org.apache.xmlbeans.XmlObject;
90
import org.apache.xmlbeans.XmlOptions;
94
import org.apache.xmlbeans.XmlOptions;
91
import org.openxmlformats.schemas.spreadsheetml.x2006.main.*;
95
import org.openxmlformats.schemas.spreadsheetml.x2006.main.*;
92
96
Lines 4371-4374 Link Here
4371
        CTIgnoredError ctIgnoredError = ctIgnoredErrors.addNewIgnoredError();
4375
        CTIgnoredError ctIgnoredError = ctIgnoredErrors.addNewIgnoredError();
4372
        XSSFIgnoredErrorHelper.addIgnoredErrors(ctIgnoredError, ref, ignoredErrorTypes);
4376
        XSSFIgnoredErrorHelper.addIgnoredErrors(ctIgnoredError, ref, ignoredErrorTypes);
4373
    }
4377
    }
4378
4379
    /**
4380
     * Determine the OleObject which links shapes with embedded resources
4381
     *
4382
     * @param shapeId the shape id
4383
     * @return the CTOleObject of the shape
4384
     */
4385
    protected CTOleObject readOleObject(long shapeId) {
4386
        if (!getCTWorksheet().isSetOleObjects()) {
4387
            return null;
4388
        }
4389
        
4390
        // we use a XmlCursor here to handle oleObject with-/out AlternateContent wrappers
4391
        String xquery = "declare namespace p='"+XSSFRelation.NS_SPREADSHEETML+"' .//p:oleObject";
4392
        XmlCursor cur = getCTWorksheet().getOleObjects().newCursor();
4393
        try {
4394
            cur.selectPath(xquery);
4395
            CTOleObject coo = null;
4396
            while (cur.toNextSelection()) {
4397
                String sId = cur.getAttributeText(new QName(null, "shapeId"));
4398
                if (sId == null || Long.parseLong(sId)  != shapeId) {
4399
                    continue;
4400
                }
4401
                
4402
                XmlObject xObj = cur.getObject();
4403
                if (xObj instanceof CTOleObject) {
4404
                    // the unusual case ...
4405
                    coo = (CTOleObject)xObj;
4406
                } else {
4407
                    XMLStreamReader reader = cur.newXMLStreamReader();
4408
                    try {
4409
                        CTOleObjects coos = CTOleObjects.Factory.parse(reader);
4410
                        if (coos.sizeOfOleObjectArray() == 0) {
4411
                            continue;
4412
                        }
4413
                        coo = coos.getOleObjectArray(0);
4414
                    } catch (XmlException e) {
4415
                        logger.log(POILogger.INFO, "can't parse CTOleObjects", e);
4416
                    } finally {
4417
                        try {
4418
                            reader.close();
4419
                        } catch (XMLStreamException e) {
4420
                            logger.log(POILogger.INFO, "can't close reader", e);
4421
                        }
4422
                    }
4423
                }
4424
                
4425
                // there are choice and fallback OleObject ... we prefer the one having the objectPr element,
4426
                // which is in the choice element
4427
                if (cur.toChild(XSSFRelation.NS_SPREADSHEETML, "objectPr")) {
4428
                    break;
4429
                }
4430
            }
4431
            return (coo == null) ? null : coo;
4432
        } finally {
4433
            cur.dispose();
4434
        }
4435
    }
4436
4437
4374
}
4438
}
(-)src/ooxml/testcases/org/apache/poi/ss/extractor/TestEmbeddedExtractor.java (+116 lines)
Line 0 Link Here
1
/* ====================================================================
2
   Licensed to the Apache Software Foundation (ASF) under one or more
3
   contributor license agreements.  See the NOTICE file distributed with
4
   this work for additional information regarding copyright ownership.
5
   The ASF licenses this file to You under the Apache License, Version 2.0
6
   (the "License"); you may not use this file except in compliance with
7
   the License.  You may obtain a copy of the License at
8
9
       http://www.apache.org/licenses/LICENSE-2.0
10
11
   Unless required by applicable law or agreed to in writing, software
12
   distributed under the License is distributed on an "AS IS" BASIS,
13
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
   See the License for the specific language governing permissions and
15
   limitations under the License.
16
==================================================================== */
17
18
package org.apache.poi.ss.extractor;
19
20
import static org.junit.Assert.assertEquals;
21
22
import java.io.IOException;
23
import java.io.InputStream;
24
import java.security.MessageDigest;
25
import java.security.NoSuchAlgorithmException;
26
import java.util.ArrayList;
27
import java.util.List;
28
29
import javax.xml.bind.DatatypeConverter;
30
31
import org.apache.poi.EncryptedDocumentException;
32
import org.apache.poi.POIDataSamples;
33
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
34
import org.apache.poi.ss.usermodel.Sheet;
35
import org.apache.poi.ss.usermodel.Workbook;
36
import org.apache.poi.ss.usermodel.WorkbookFactory;
37
import org.junit.Test;
38
39
public class TestEmbeddedExtractor {
40
    private static final POIDataSamples samples = POIDataSamples.getSpreadSheetInstance();
41
42
    @Test
43
    public void extractPDFfromEMF() throws Exception {
44
        InputStream fis = samples.openResourceAsStream("Basic_Expense_Template_2011.xls");
45
        Workbook wb = WorkbookFactory.create(fis);
46
        fis.close();
47
48
        EmbeddedExtractor ee = new EmbeddedExtractor();
49
        List<EmbeddedData> edList = new ArrayList<EmbeddedData>();
50
        for (Sheet s : wb) {
51
            edList.addAll(ee.extractAll(s));
52
        }
53
        wb.close();
54
55
        assertEquals(2, edList.size());
56
57
        String filename1 = "Sample.pdf";
58
        EmbeddedData ed0 = edList.get(0);
59
        assertEquals(filename1, ed0.getFilename());
60
        assertEquals(filename1, ed0.getShape().getShapeName().trim());
61
        assertEquals("uNplB1QpYug+LWappiTh0w==", md5hash(ed0.getEmbeddedData()));
62
63
        String filename2 = "kalastuslupa_jiyjhnj_yuiyuiyuio_uyte_sldfsdfsdf_sfsdfsdf_sfsssfsf_sdfsdfsdfsdf_sdfsdfsdf.pdf";
64
        EmbeddedData ed1 = edList.get(1);
65
        assertEquals(filename2, ed1.getFilename());
66
        assertEquals(filename2, ed1.getShape().getShapeName().trim());
67
        assertEquals("QjLuAZ+cd7KbhVz4sj+QdA==", md5hash(ed1.getEmbeddedData()));
68
    }
69
70
    @Test
71
    public void extractFromXSSF() throws IOException, EncryptedDocumentException, InvalidFormatException {
72
        InputStream fis = samples.openResourceAsStream("58325_db.xlsx");
73
        Workbook wb = WorkbookFactory.create(fis);
74
        fis.close();
75
76
        EmbeddedExtractor ee = new EmbeddedExtractor();
77
        List<EmbeddedData> edList = new ArrayList<EmbeddedData>();
78
        for (Sheet s : wb) {
79
            edList.addAll(ee.extractAll(s));
80
        }
81
        wb.close();
82
83
        assertEquals(4, edList.size());
84
        EmbeddedData ed0 = edList.get(0);
85
        assertEquals("Object 1.pdf", ed0.getFilename());
86
        assertEquals("Object 1", ed0.getShape().getShapeName().trim());
87
        assertEquals("Oyys6UtQU1gbHYBYqA4NFA==", md5hash(ed0.getEmbeddedData()));
88
89
        EmbeddedData ed1 = edList.get(1);
90
        assertEquals("Object 2.pdf", ed1.getFilename());
91
        assertEquals("Object 2", ed1.getShape().getShapeName().trim());
92
        assertEquals("xLScPUS0XH+5CTZ2A3neNw==", md5hash(ed1.getEmbeddedData()));
93
94
        EmbeddedData ed2 = edList.get(2);
95
        assertEquals("Object 3.pdf", ed2.getFilename());
96
        assertEquals("Object 3", ed2.getShape().getShapeName().trim());
97
        assertEquals("rX4klZqJAeM5npb54Gi2+Q==", md5hash(ed2.getEmbeddedData()));
98
99
        EmbeddedData ed3 = edList.get(3);
100
        assertEquals("Microsoft_Excel_Worksheet1.xlsx", ed3.getFilename());
101
        assertEquals("Object 1", ed3.getShape().getShapeName().trim());
102
        assertEquals("4m4N8ji2tjpEGPQuw2YwGA==", md5hash(ed3.getEmbeddedData()));
103
    }
104
105
    public static String md5hash(byte[] input) {
106
        try {
107
            MessageDigest md = MessageDigest.getInstance("MD5");
108
            byte hash[] = md.digest(input);
109
            return DatatypeConverter.printBase64Binary(hash);
110
        } catch (NoSuchAlgorithmException e) {
111
            // doesn't happen
112
            return "";
113
        }
114
    }
115
}
116
native
(-)src/java/org/apache/poi/ss/usermodel/ObjectData.java (+66 lines)
Line 0 Link Here
1
/* ====================================================================
2
   Licensed to the Apache Software Foundation (ASF) under one or more
3
   contributor license agreements.  See the NOTICE file distributed with
4
   this work for additional information regarding copyright ownership.
5
   The ASF licenses this file to You under the Apache License, Version 2.0
6
   (the "License"); you may not use this file except in compliance with
7
   the License.  You may obtain a copy of the License at
8
9
       http://www.apache.org/licenses/LICENSE-2.0
10
11
   Unless required by applicable law or agreed to in writing, software
12
   distributed under the License is distributed on an "AS IS" BASIS,
13
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
   See the License for the specific language governing permissions and
15
   limitations under the License.
16
==================================================================== */
17
18
package org.apache.poi.ss.usermodel;
19
20
import java.io.IOException;
21
22
import org.apache.poi.poifs.filesystem.DirectoryEntry;
23
24
/**
25
 * Common interface for OLE shapes, i.e. shapes linked to embedded documents
26
 * 
27
 * @since POI 3.16-beta2
28
 */
29
public interface ObjectData extends SimpleShape {
30
    /**
31
     * @return the data portion, for an ObjectData that doesn't have an associated POIFS Directory Entry
32
     */
33
    byte[] getObjectData() throws IOException;
34
35
    /**
36
     * @return does this ObjectData have an associated POIFS Directory Entry?
37
     * (Not all do, those that don't have a data portion)
38
     */
39
    boolean hasDirectoryEntry();
40
41
    /**
42
     * Gets the object data. Only call for ones that have
43
     * data though. See {@link #hasDirectoryEntry()}.
44
     * The caller has to close the corresponding POIFSFileSystem
45
     *
46
     * @return the object data as an OLE2 directory.
47
     * @throws IOException if there was an error reading the data.
48
     */
49
    DirectoryEntry getDirectory() throws IOException;
50
51
    /**
52
     * @return the OLE2 Class Name of the object
53
     */
54
    String getOLE2ClassName();
55
56
    /**
57
     * @return a filename suggestion - inspecting/interpreting the Directory object probably gives a better result
58
     */
59
    String getFileName();
60
61
    /**
62
     * @return the preview picture
63
     */
64
    PictureData getPictureData();
65
}
66
native

Return to bug 60519