View | Details | Raw Unified | Return to bug 54982
Collapse All | Expand All

(-)a/src/java/org/apache/poi/POITextExtractor.java (-1 / +15 lines)
Lines 16-21 Link Here
16
==================================================================== */
16
==================================================================== */
17
package org.apache.poi;
17
package org.apache.poi;
18
18
19
import java.io.Closeable;
20
import java.io.IOException;
21
19
/**
22
/**
20
 * Common Parent for Text Extractors
23
 * Common Parent for Text Extractors
21
 *  of POI Documents. 
24
 *  of POI Documents. 
Lines 27-33 package org.apache.poi; Link Here
27
 * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
30
 * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
28
 * @see org.apache.poi.hwpf.extractor.WordExtractor
31
 * @see org.apache.poi.hwpf.extractor.WordExtractor
29
 */
32
 */
30
public abstract class POITextExtractor {
33
public abstract class POITextExtractor implements Closeable {
31
	/** The POIDocument that's open */
34
	/** The POIDocument that's open */
32
	protected POIDocument document;
35
	protected POIDocument document;
33
36
Lines 61-64 public abstract class POITextExtractor { Link Here
61
	 *  metadata / properties, such as author and title.
64
	 *  metadata / properties, such as author and title.
62
	 */
65
	 */
63
	public abstract POITextExtractor getMetadataTextExtractor();
66
	public abstract POITextExtractor getMetadataTextExtractor();
67
	
68
	/**
69
	 * Allows to free resources of the Extractor as soon as
70
	 * it is not needed any more. This may include closing
71
	 * open file handles and freeing memory.
72
	 * 
73
	 * The Extractor cannot be used after close has been called.
74
	 */
75
	public void close() throws IOException {
76
		// nothing to do in abstract class, derived classes may perform actions.
77
	}
64
}
78
}
(-)a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java (+14 lines)
Lines 17-22 Link Here
17
17
18
package org.apache.poi;
18
package org.apache.poi;
19
19
20
import java.io.IOException;
21
20
import org.apache.poi.POIXMLProperties.CoreProperties;
22
import org.apache.poi.POIXMLProperties.CoreProperties;
21
import org.apache.poi.POIXMLProperties.CustomProperties;
23
import org.apache.poi.POIXMLProperties.CustomProperties;
22
import org.apache.poi.POIXMLProperties.ExtendedProperties;
24
import org.apache.poi.POIXMLProperties.ExtendedProperties;
Lines 75-78 public abstract class POIXMLTextExtractor extends POITextExtractor { Link Here
75
	public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
77
	public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
76
		return new POIXMLPropertiesTextExtractor(_document);
78
		return new POIXMLPropertiesTextExtractor(_document);
77
	}
79
	}
80
81
	@Override
82
	public void close() throws IOException {
83
		// e.g. XSSFEventBaseExcelExtractor passes a null-document
84
		if(_document != null) {
85
			OPCPackage pkg = _document.getPackage();
86
			if(pkg != null) {
87
				pkg.close();
88
			}
89
		}
90
		super.close();
91
	}
78
}
92
}
(-)a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java (+9 lines)
Lines 191-196 public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor { Link Here
191
       }
191
       }
192
   }
192
   }
193
   
193
   
194
	@Override
195
	public void close() throws IOException {
196
		if (container != null) {
197
			container.close();
198
			container = null;
199
		}
200
		super.close();
201
	}
202
194
   protected class SheetTextExtractor implements SheetContentsHandler {
203
   protected class SheetTextExtractor implements SheetContentsHandler {
195
      private final StringBuffer output;
204
      private final StringBuffer output;
196
      private boolean firstCellOfRow = true;
205
      private boolean firstCellOfRow = true;
(-)a/src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java (+11 lines)
Lines 46-51 public final class TestXMLPropertiesTextExtractor extends TestCase { Link Here
46
46
47
		assertTrue(text.contains("LastModifiedBy = Yury Batrakov"));
47
		assertTrue(text.contains("LastModifiedBy = Yury Batrakov"));
48
		assertTrue(cText.contains("LastModifiedBy = Yury Batrakov"));
48
		assertTrue(cText.contains("LastModifiedBy = Yury Batrakov"));
49
		
50
		textExt.close();
51
		ext.close();
49
	}
52
	}
50
53
51
	public void testCore() throws Exception {
54
	public void testCore() throws Exception {
Lines 63-68 public final class TestXMLPropertiesTextExtractor extends TestCase { Link Here
63
66
64
		assertTrue(text.contains("LastModifiedBy = Yury Batrakov"));
67
		assertTrue(text.contains("LastModifiedBy = Yury Batrakov"));
65
		assertTrue(cText.contains("LastModifiedBy = Yury Batrakov"));
68
		assertTrue(cText.contains("LastModifiedBy = Yury Batrakov"));
69
		
70
		ext.close();
66
	}
71
	}
67
72
68
	public void testExtended() throws Exception {
73
	public void testExtended() throws Exception {
Lines 82-87 public final class TestXMLPropertiesTextExtractor extends TestCase { Link Here
82
		assertTrue(text.contains("Company = Mera"));
87
		assertTrue(text.contains("Company = Mera"));
83
		assertTrue(eText.contains("Application = Microsoft Excel"));
88
		assertTrue(eText.contains("Application = Microsoft Excel"));
84
		assertTrue(eText.contains("Company = Mera"));
89
		assertTrue(eText.contains("Company = Mera"));
90
91
		ext.close();
85
	}
92
	}
86
93
87
	public void testCustom() throws Exception {
94
	public void testCustom() throws Exception {
Lines 99-104 public final class TestXMLPropertiesTextExtractor extends TestCase { Link Here
99
      
106
      
100
      assertTrue(text.contains("description = another value"));
107
      assertTrue(text.contains("description = another value"));
101
      assertTrue(cText.contains("description = another value"));
108
      assertTrue(cText.contains("description = another value"));
109
110
      ext.close();
102
	}
111
	}
103
	
112
	
104
	/**
113
	/**
Lines 118-122 public final class TestXMLPropertiesTextExtractor extends TestCase { Link Here
118
      assertFalse(text.contains("Created =")); // With date is null
127
      assertFalse(text.contains("Created =")); // With date is null
119
      assertTrue(text.contains("CreatedString = ")); // Via string is blank
128
      assertTrue(text.contains("CreatedString = ")); // Via string is blank
120
      assertTrue(text.contains("LastModifiedBy = IT Client Services"));
129
      assertTrue(text.contains("LastModifiedBy = IT Client Services"));
130
		
131
      ext.close();
121
	}
132
	}
122
}
133
}
(-)a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java (-2 / +12 lines)
Lines 43-50 public class TestXSLFPowerPointExtractor extends TestCase { Link Here
43
	 * Get text out of the simple file
43
	 * Get text out of the simple file
44
	 */
44
	 */
45
	public void testGetSimpleText() throws Exception {
45
	public void testGetSimpleText() throws Exception {
46
		new XSLFPowerPointExtractor(xmlA);
46
		new XSLFPowerPointExtractor(xmlA).close();
47
		new XSLFPowerPointExtractor(pkg);
47
		new XSLFPowerPointExtractor(pkg).close();
48
		
48
		
49
		XSLFPowerPointExtractor extractor = 
49
		XSLFPowerPointExtractor extractor = 
50
			new XSLFPowerPointExtractor(xmlA);
50
			new XSLFPowerPointExtractor(xmlA);
Lines 148-153 public class TestXSLFPowerPointExtractor extends TestCase { Link Here
148
		assertEquals(
148
		assertEquals(
149
				"\n\n\n\n", text
149
				"\n\n\n\n", text
150
		);
150
		);
151
		
152
		extractor.close();
151
	}
153
	}
152
	
154
	
153
   public void testGetComments() throws Exception {
155
   public void testGetComments() throws Exception {
Lines 165-170 public class TestXSLFPowerPointExtractor extends TestCase { Link Here
165
167
166
      // Check the authors came through too
168
      // Check the authors came through too
167
      assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01"));
169
      assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01"));
170
		
171
		extractor.close();
168
   }
172
   }
169
	
173
	
170
	public void testGetMasterText() throws Exception {
174
	public void testGetMasterText() throws Exception {
Lines 206-211 public class TestXSLFPowerPointExtractor extends TestCase { Link Here
206
            "This text comes from the Master Slide\n"
210
            "This text comes from the Master Slide\n"
207
            , text
211
            , text
208
      );
212
      );
213
		
214
		extractor.close();
209
	}
215
	}
210
216
211
    public void testTable() throws Exception {
217
    public void testTable() throws Exception {
Lines 219-224 public class TestXSLFPowerPointExtractor extends TestCase { Link Here
219
225
220
        // Check comments are there
226
        // Check comments are there
221
        assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
227
        assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
228
		
229
		extractor.close();
222
    }
230
    }
223
    
231
    
224
    /**
232
    /**
Lines 267-272 public class TestXSLFPowerPointExtractor extends TestCase { Link Here
267
               "Text missing for " + filename + "\n" + text, 
275
               "Text missing for " + filename + "\n" + text, 
268
               text.contains("Mystery")
276
               text.contains("Mystery")
269
         );
277
         );
278
         
279
 		 extractor.close();
270
       }
280
       }
271
    }
281
    }
272
}
282
}
(-)a/src/ooxml/testcases/org/apache/poi/xssf/extractor/ExtractorTestSuite.java (+63 lines)
Line 0 Link Here
1
/* ====================================================================
2
   Licensed to the Apache Software Foundation (ASF) under one or more
3
   contributor license agreements.  See the NOTICE file distributed with
4
   this work for additional information regarding copyright ownership.
5
   The ASF licenses this file to You under the Apache License, Version 2.0
6
   (the "License"); you may not use this file except in compliance with
7
   the License.  You may obtain a copy of the License at
8
9
       http://www.apache.org/licenses/LICENSE-2.0
10
11
   Unless required by applicable law or agreed to in writing, software
12
   distributed under the License is distributed on an "AS IS" BASIS,
13
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
   See the License for the specific language governing permissions and
15
   limitations under the License.
16
==================================================================== */
17
package org.apache.poi.xssf.extractor;
18
19
import org.apache.poi.TestXMLPropertiesTextExtractor;
20
import org.apache.poi.extractor.TestExtractorFactory;
21
import org.apache.poi.hdgf.extractor.TestVisioExtractor;
22
import org.apache.poi.hpbf.extractor.TestPublisherTextExtractor;
23
import org.apache.poi.hpsf.extractor.TestHPSFPropertiesExtractor;
24
import org.apache.poi.hslf.extractor.TestCruddyExtractor;
25
import org.apache.poi.hslf.extractor.TestExtractor;
26
import org.apache.poi.hsmf.extractor.TestOutlookTextExtractor;
27
import org.apache.poi.hssf.extractor.TestExcelExtractor;
28
import org.apache.poi.hwpf.extractor.TestWordExtractor;
29
import org.apache.poi.hwpf.extractor.TestWordExtractorBugs;
30
import org.apache.poi.xslf.extractor.TestXSLFPowerPointExtractor;
31
import org.apache.poi.xwpf.extractor.TestXWPFWordExtractor;
32
import org.junit.runner.RunWith;
33
import org.junit.runners.Suite;
34
import org.junit.runners.Suite.SuiteClasses;
35
36
/**
37
 * Simple test-suite to execute a number of related tests in one go.
38
 *
39
 * @author dominik.stadler
40
 */
41
@RunWith(Suite.class)
42
@SuiteClasses({
43
	TestCruddyExtractor.class,
44
	TestExcelExtractor.class,
45
	TestExtractor.class,
46
	TestExtractorFactory.class,
47
	TestHPSFPropertiesExtractor.class,
48
	TestOutlookTextExtractor.class,
49
	TestPublisherTextExtractor.class,
50
	TestVisioExtractor.class,
51
	TestWordExtractor.class,
52
	TestWordExtractorBugs.class,
53
	TestXMLPropertiesTextExtractor.class,
54
	TestXSLFPowerPointExtractor.class,
55
	TestXSSFEventBasedExcelExtractor.class,
56
	TestXSSFEventBasedExcelExtractorUsingFactory.class,
57
	TestXSSFExcelExtractor.class,
58
	TestXSSFExcelExtractorUsingFactory.class,
59
	TestXWPFWordExtractor.class
60
})
61
public class ExtractorTestSuite {
62
63
}
(-)a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java (-6 / +13 lines)
Lines 30-41 import org.apache.poi.xssf.XSSFTestDataSamples; Link Here
30
/**
30
/**
31
 * Tests for {@link XSSFEventBasedExcelExtractor}
31
 * Tests for {@link XSSFEventBasedExcelExtractor}
32
 */
32
 */
33
public final class TestXSSFEventBasedExcelExtractor extends TestCase {
33
public class TestXSSFEventBasedExcelExtractor extends TestCase {
34
34
	protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
35
35
        return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples.
36
	private static final XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
36
                openSamplePackage(sampleName));
37
		return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples.
38
		      openSamplePackage(sampleName));
39
	}
37
	}
40
38
41
	/**
39
	/**
Lines 97-102 public final class TestXSSFEventBasedExcelExtractor extends TestCase { Link Here
97
				CHUNK2 +
95
				CHUNK2 +
98
				"Sheet3\n"
96
				"Sheet3\n"
99
				, text);
97
				, text);
98
		
99
		extractor.close();
100
	}
100
	}
101
	
101
	
102
	public void testGetComplexText() throws Exception {
102
	public void testGetComplexText() throws Exception {
Lines 112-117 public final class TestXSSFEventBasedExcelExtractor extends TestCase { Link Here
112
						"Avgtxfull\n" +
112
						"Avgtxfull\n" +
113
						"(iii) AVERAGE TAX RATES ON ANNUAL"	
113
						"(iii) AVERAGE TAX RATES ON ANNUAL"	
114
		));
114
		));
115
		
116
		extractor.close();
115
	}
117
	}
116
	
118
	
117
   public void testInlineStrings() throws Exception {
119
   public void testInlineStrings() throws Exception {
Lines 134-139 public final class TestXSSFEventBasedExcelExtractor extends TestCase { Link Here
134
      // Formulas
136
      // Formulas
135
      assertTrue("Unable to find expected word in text\n" + text, text.contains("A2"));
137
      assertTrue("Unable to find expected word in text\n" + text, text.contains("A2"));
136
      assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2"));
138
      assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2"));
139
		
140
      extractor.close();
137
   }
141
   }
138
   
142
   
139
	/**
143
	/**
Lines 159-163 public final class TestXSSFEventBasedExcelExtractor extends TestCase { Link Here
159
			Matcher m = pattern.matcher(text);
163
			Matcher m = pattern.matcher(text);
160
			assertTrue(m.matches());			
164
			assertTrue(m.matches());			
161
		}
165
		}
166
		
167
		ole2Extractor.close();
168
		ooxmlExtractor.close();
162
	}
169
	}
163
}
170
}
(-)a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java (+29 lines)
Line 0 Link Here
1
/* ====================================================================
2
   Licensed to the Apache Software Foundation (ASF) under one or more
3
   contributor license agreements.  See the NOTICE file distributed with
4
   this work for additional information regarding copyright ownership.
5
   The ASF licenses this file to You under the Apache License, Version 2.0
6
   (the "License"); you may not use this file except in compliance with
7
   the License.  You may obtain a copy of the License at
8
9
       http://www.apache.org/licenses/LICENSE-2.0
10
11
   Unless required by applicable law or agreed to in writing, software
12
   distributed under the License is distributed on an "AS IS" BASIS,
13
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
   See the License for the specific language governing permissions and
15
   limitations under the License.
16
==================================================================== */
17
package org.apache.poi.xssf.extractor;
18
19
import org.apache.poi.extractor.ExtractorFactory;
20
import org.apache.poi.hssf.HSSFTestDataSamples;
21
22
23
public class TestXSSFEventBasedExcelExtractorUsingFactory extends TestXSSFEventBasedExcelExtractor {
24
	@Override
25
	protected final XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
26
		ExtractorFactory.setAllThreadsPreferEventExtractors(true);
27
		return (XSSFEventBasedExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName));
28
	}
29
}
(-)a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java (-12 / +27 lines)
Lines 17-22 Link Here
17
17
18
package org.apache.poi.xssf.extractor;
18
package org.apache.poi.xssf.extractor;
19
19
20
import java.io.IOException;
20
import java.util.regex.Matcher;
21
import java.util.regex.Matcher;
21
import java.util.regex.Pattern;
22
import java.util.regex.Pattern;
22
23
Lines 30-46 import org.apache.poi.xssf.XSSFTestDataSamples; Link Here
30
/**
31
/**
31
 * Tests for {@link XSSFExcelExtractor}
32
 * Tests for {@link XSSFExcelExtractor}
32
 */
33
 */
33
public final class TestXSSFExcelExtractor extends TestCase {
34
public class TestXSSFExcelExtractor extends TestCase {
34
35
	protected XSSFExcelExtractor getExtractor(String sampleName) {
35
36
	private static final XSSFExcelExtractor getExtractor(String sampleName) {
37
		return new XSSFExcelExtractor(XSSFTestDataSamples.openSampleWorkbook(sampleName));
36
		return new XSSFExcelExtractor(XSSFTestDataSamples.openSampleWorkbook(sampleName));
38
	}
37
	}
39
38
40
	/**
39
	/**
41
	 * Get text out of the simple file
40
	 * Get text out of the simple file
41
	 * @throws IOException 
42
	 */
42
	 */
43
	public void testGetSimpleText() {
43
	public void testGetSimpleText() throws IOException {
44
		// a very simple file
44
		// a very simple file
45
		XSSFExcelExtractor extractor = getExtractor("sample.xlsx");
45
		XSSFExcelExtractor extractor = getExtractor("sample.xlsx");
46
		extractor.getText();
46
		extractor.getText();
Lines 96-104 public final class TestXSSFExcelExtractor extends TestCase { Link Here
96
				CHUNK2 +
96
				CHUNK2 +
97
				"Sheet3\n"
97
				"Sheet3\n"
98
				, text);
98
				, text);
99
		
100
		extractor.close();
99
	}
101
	}
100
	
102
	
101
	public void testGetComplexText() {
103
	public void testGetComplexText() throws IOException {
102
		// A fairly complex file
104
		// A fairly complex file
103
		XSSFExcelExtractor extractor = getExtractor("AverageTaxRates.xlsx");
105
		XSSFExcelExtractor extractor = getExtractor("AverageTaxRates.xlsx");
104
		extractor.getText();
106
		extractor.getText();
Lines 112-125 public final class TestXSSFExcelExtractor extends TestCase { Link Here
112
						"Avgtxfull\n" +
114
						"Avgtxfull\n" +
113
						"null\t(iii) AVERAGE TAX RATES ON ANNUAL"	
115
						"null\t(iii) AVERAGE TAX RATES ON ANNUAL"	
114
		));
116
		));
117
		
118
		extractor.close();
115
	}
119
	}
116
	
120
	
117
	/**
121
	/**
118
	 * Test that we return pretty much the same as
122
	 * Test that we return pretty much the same as
119
	 *  ExcelExtractor does, when we're both passed
123
	 *  ExcelExtractor does, when we're both passed
120
	 *  the same file, just saved as xls and xlsx
124
	 *  the same file, just saved as xls and xlsx
125
	 * @throws IOException 
121
	 */
126
	 */
122
	public void testComparedToOLE2() {
127
	public void testComparedToOLE2() throws IOException {
123
		// A fairly simple file - ooxml
128
		// A fairly simple file - ooxml
124
		XSSFExcelExtractor ooxmlExtractor = getExtractor("SampleSS.xlsx");
129
		XSSFExcelExtractor ooxmlExtractor = getExtractor("SampleSS.xlsx");
125
130
Lines 137-148 public final class TestXSSFExcelExtractor extends TestCase { Link Here
137
			Matcher m = pattern.matcher(text);
142
			Matcher m = pattern.matcher(text);
138
			assertTrue(m.matches());			
143
			assertTrue(m.matches());			
139
		}
144
		}
145
146
		ole2Extractor.close();
147
		ooxmlExtractor.close();
140
	}
148
	}
141
	
149
	
142
	/**
150
	/**
143
	 * From bug #45540
151
	 * From bug #45540
152
	 * @throws IOException 
144
	 */
153
	 */
145
	public void testHeaderFooter() {
154
	public void testHeaderFooter() throws IOException {
146
		String[] files = new String[] {
155
		String[] files = new String[] {
147
			"45540_classic_Header.xlsx", "45540_form_Header.xlsx",
156
			"45540_classic_Header.xlsx", "45540_form_Header.xlsx",
148
			"45540_classic_Footer.xlsx", "45540_form_Footer.xlsx",
157
			"45540_classic_Footer.xlsx", "45540_form_Footer.xlsx",
Lines 152-166 public final class TestXSSFExcelExtractor extends TestCase { Link Here
152
			String text = extractor.getText();
161
			String text = extractor.getText();
153
			
162
			
154
			assertTrue("Unable to find expected word in text from " + sampleName + "\n" + text, text.contains("testdoc"));
163
			assertTrue("Unable to find expected word in text from " + sampleName + "\n" + text, text.contains("testdoc"));
155
			assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); 
164
			assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
165
			
166
			extractor.close();
156
		}
167
		}
157
	}
168
	}
158
169
159
	/**
170
	/**
160
	 * From bug #45544
171
	 * From bug #45544
172
	 * @throws IOException 
161
	 */
173
	 */
162
	public void testComments() {
174
	public void testComments() throws IOException {
163
		
164
		XSSFExcelExtractor extractor = getExtractor("45544.xlsx");
175
		XSSFExcelExtractor extractor = getExtractor("45544.xlsx");
165
		String text = extractor.getText();
176
		String text = extractor.getText();
166
177
Lines 173-181 public final class TestXSSFExcelExtractor extends TestCase { Link Here
173
		text = extractor.getText();
184
		text = extractor.getText();
174
		assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
185
		assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
175
		assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
186
		assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
187
		
188
		extractor.close();
176
	}
189
	}
177
	
190
	
178
	public void testInlineStrings() {
191
	public void testInlineStrings() throws IOException {
179
      XSSFExcelExtractor extractor = getExtractor("InlineStrings.xlsx");
192
      XSSFExcelExtractor extractor = getExtractor("InlineStrings.xlsx");
180
      extractor.setFormulasNotResults(true);
193
      extractor.setFormulasNotResults(true);
181
      String text = extractor.getText();
194
      String text = extractor.getText();
Lines 195-199 public final class TestXSSFExcelExtractor extends TestCase { Link Here
195
      // Formulas
208
      // Formulas
196
      assertTrue("Unable to find expected word in text\n" + text, text.contains("A2"));
209
      assertTrue("Unable to find expected word in text\n" + text, text.contains("A2"));
197
      assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2"));
210
      assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2"));
211
      
212
      extractor.close();
198
	}
213
	}
199
}
214
}
(-)a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java (+37 lines)
Line 0 Link Here
1
/* ====================================================================
2
   Licensed to the Apache Software Foundation (ASF) under one or more
3
   contributor license agreements.  See the NOTICE file distributed with
4
   this work for additional information regarding copyright ownership.
5
   The ASF licenses this file to You under the Apache License, Version 2.0
6
   (the "License"); you may not use this file except in compliance with
7
   the License.  You may obtain a copy of the License at
8
9
       http://www.apache.org/licenses/LICENSE-2.0
10
11
   Unless required by applicable law or agreed to in writing, software
12
   distributed under the License is distributed on an "AS IS" BASIS,
13
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
   See the License for the specific language governing permissions and
15
   limitations under the License.
16
==================================================================== */
17
18
package org.apache.poi.xssf.extractor;
19
20
import org.apache.poi.extractor.ExtractorFactory;
21
import org.apache.poi.hssf.HSSFTestDataSamples;
22
23
/**
24
 * Tests for {@link XSSFExcelExtractor}
25
 */
26
public final class TestXSSFExcelExtractorUsingFactory extends TestXSSFExcelExtractor {
27
	@Override
28
	protected final XSSFExcelExtractor getExtractor(String sampleName) {
29
		ExtractorFactory.setAllThreadsPreferEventExtractors(false);
30
		ExtractorFactory.setThreadPrefersEventExtractors(false);
31
		try {
32
			return (XSSFExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName));
33
		} catch (Exception e) {
34
			throw new RuntimeException(e);
35
		}
36
	}
37
}
(-)a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java (+34 lines)
Lines 57-62 public class TestXWPFWordExtractor extends TestCase { Link Here
57
            }
57
            }
58
        }
58
        }
59
        assertEquals(3, ps);
59
        assertEquals(3, ps);
60
        
61
        extractor.close();
60
    }
62
    }
61
63
62
    /**
64
    /**
Lines 93-98 public class TestXWPFWordExtractor extends TestCase { Link Here
93
            }
95
            }
94
        }
96
        }
95
        assertEquals(134, ps);
97
        assertEquals(134, ps);
98
        
99
        extractor.close();
96
    }
100
    }
97
101
98
    public void testGetWithHyperlinks() throws IOException {
102
    public void testGetWithHyperlinks() throws IOException {
Lines 118-123 public class TestXWPFWordExtractor extends TestCase { Link Here
118
				"We have a hyperlink <http://poi.apache.org/> here, and another.\n",
122
				"We have a hyperlink <http://poi.apache.org/> here, and another.\n",
119
                extractor.getText()
123
                extractor.getText()
120
        );
124
        );
125
        
126
        extractor.close();
121
    }
127
    }
122
128
123
    public void testHeadersFooters() throws IOException {
129
    public void testHeadersFooters() throws IOException {
Lines 141-147 public class TestXWPFWordExtractor extends TestCase { Link Here
141
        // Now another file, expect multiple headers
147
        // Now another file, expect multiple headers
142
        //  and multiple footers
148
        //  and multiple footers
143
        doc = XWPFTestDataSamples.openSampleDocument("DiffFirstPageHeadFoot.docx");
149
        doc = XWPFTestDataSamples.openSampleDocument("DiffFirstPageHeadFoot.docx");
150
        extractor.close();
151
144
        extractor = new XWPFWordExtractor(doc);
152
        extractor = new XWPFWordExtractor(doc);
153
        extractor.close();
154
145
        extractor =
155
        extractor =
146
                new XWPFWordExtractor(doc);
156
                new XWPFWordExtractor(doc);
147
        extractor.getText();
157
        extractor.getText();
Lines 161-166 public class TestXWPFWordExtractor extends TestCase { Link Here
161
                        "Footer Left\tFooter Middle\tFooter Right\n",
171
                        "Footer Left\tFooter Middle\tFooter Right\n",
162
                extractor.getText()
172
                extractor.getText()
163
        );
173
        );
174
        
175
        extractor.close();
164
    }
176
    }
165
177
166
    public void testFootnotes() throws IOException {
178
    public void testFootnotes() throws IOException {
Lines 168-173 public class TestXWPFWordExtractor extends TestCase { Link Here
168
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
180
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
169
181
170
        assertTrue(extractor.getText().contains("snoska"));
182
        assertTrue(extractor.getText().contains("snoska"));
183
        
184
        extractor.close();
171
    }
185
    }
172
186
173
187
Lines 176-181 public class TestXWPFWordExtractor extends TestCase { Link Here
176
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
190
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
177
191
178
        assertTrue(extractor.getText().contains("snoska"));
192
        assertTrue(extractor.getText().contains("snoska"));
193
        
194
        extractor.close();
179
    }
195
    }
180
196
181
    public void testFormFootnotes() throws IOException {
197
    public void testFormFootnotes() throws IOException {
Lines 185-190 public class TestXWPFWordExtractor extends TestCase { Link Here
185
        String text = extractor.getText();
201
        String text = extractor.getText();
186
        assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
202
        assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
187
        assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
203
        assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
204
        
205
        extractor.close();
188
    }
206
    }
189
207
190
    public void testEndnotes() throws IOException {
208
    public void testEndnotes() throws IOException {
Lines 192-197 public class TestXWPFWordExtractor extends TestCase { Link Here
192
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
210
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
193
211
194
        assertTrue(extractor.getText().contains("XXX"));
212
        assertTrue(extractor.getText().contains("XXX"));
213
        
214
        extractor.close();
195
    }
215
    }
196
216
197
    public void testInsertedDeletedText() throws IOException {
217
    public void testInsertedDeletedText() throws IOException {
Lines 200-205 public class TestXWPFWordExtractor extends TestCase { Link Here
200
220
201
        assertTrue(extractor.getText().contains("pendant worn"));
221
        assertTrue(extractor.getText().contains("pendant worn"));
202
        assertTrue(extractor.getText().contains("extremely well"));
222
        assertTrue(extractor.getText().contains("extremely well"));
223
        
224
        extractor.close();
203
    }
225
    }
204
226
205
    public void testParagraphHeader() throws IOException {
227
    public void testParagraphHeader() throws IOException {
Lines 209-214 public class TestXWPFWordExtractor extends TestCase { Link Here
209
        assertTrue(extractor.getText().contains("Section 1"));
231
        assertTrue(extractor.getText().contains("Section 1"));
210
        assertTrue(extractor.getText().contains("Section 2"));
232
        assertTrue(extractor.getText().contains("Section 2"));
211
        assertTrue(extractor.getText().contains("Section 3"));
233
        assertTrue(extractor.getText().contains("Section 3"));
234
        
235
        extractor.close();
212
    }
236
    }
213
237
214
    /**
238
    /**
Lines 223-228 public class TestXWPFWordExtractor extends TestCase { Link Here
223
        assertTrue(extractor.getText().contains("2004"));
247
        assertTrue(extractor.getText().contains("2004"));
224
        assertTrue(extractor.getText().contains("2008"));
248
        assertTrue(extractor.getText().contains("2008"));
225
        assertTrue(extractor.getText().contains("(120 "));
249
        assertTrue(extractor.getText().contains("(120 "));
250
        
251
        extractor.close();
226
    }
252
    }
227
    
253
    
228
    /**
254
    /**
Lines 242-247 public class TestXWPFWordExtractor extends TestCase { Link Here
242
       
268
       
243
       // Now check the first paragraph in total
269
       // Now check the first paragraph in total
244
       assertTrue(extractor.getText().contains("a\tb\n"));
270
       assertTrue(extractor.getText().contains("a\tb\n"));
271
       
272
       extractor.close();
245
    }
273
    }
246
    
274
    
247
    /**
275
    /**
Lines 256-261 public class TestXWPFWordExtractor extends TestCase { Link Here
256
        assertTrue(text.length() > 0);
284
        assertTrue(text.length() > 0);
257
        assertFalse(text.contains("AUTHOR"));
285
        assertFalse(text.contains("AUTHOR"));
258
        assertFalse(text.contains("CREATEDATE"));
286
        assertFalse(text.contains("CREATEDATE"));
287
        
288
        extractor.close();
259
    }
289
    }
260
    
290
    
261
    /**
291
    /**
Lines 269-274 public class TestXWPFWordExtractor extends TestCase { Link Here
269
        String text = extractor.getText();
299
        String text = extractor.getText();
270
        assertTrue(text.length() > 0);
300
        assertTrue(text.length() > 0);
271
        assertTrue(text.contains("FldSimple.docx"));
301
        assertTrue(text.contains("FldSimple.docx"));
302
        
303
        extractor.close();
272
    }
304
    }
273
305
274
    /**
306
    /**
Lines 280-284 public class TestXWPFWordExtractor extends TestCase { Link Here
280
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
312
        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
281
        String text = extractor.getText();
313
        String text = extractor.getText();
282
        assertTrue(text.length() > 0);
314
        assertTrue(text.length() > 0);
315
        
316
        extractor.close();
283
    }
317
    }
284
}
318
}

Return to bug 54982