View | Details | Raw Unified | Return to bug 51519
Collapse All | Expand All

(-)src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java (-1 / +14 lines)
Lines 22-28 Link Here
22
import java.util.regex.Pattern;
22
import java.util.regex.Pattern;
23
23
24
import junit.framework.TestCase;
24
import junit.framework.TestCase;
25
26
import org.apache.poi.POITextExtractor;
25
import org.apache.poi.POITextExtractor;
27
import org.apache.poi.hssf.HSSFTestDataSamples;
26
import org.apache.poi.hssf.HSSFTestDataSamples;
28
import org.apache.poi.hssf.extractor.ExcelExtractor;
27
import org.apache.poi.hssf.extractor.ExcelExtractor;
Lines 226-229 Link Here
226
	        extractor.close();
225
	        extractor.close();
227
	    }
226
	    }
228
	}
227
	}
228
229
	public void testPhoneticRuns() throws Exception {
230
		XSSFExcelExtractor extractor = getExtractor("51519.xlsx");
231
		try {
232
			String text = extractor.getText();
233
			assertTrue(text.contains("\u8C4A\u7530"));
234
			//this shows up only as a phonetic run and should not appear
235
			//in the extracted text
236
			assertFalse(text.contains("\u30CB\u30DB\u30F3"));
237
		} finally {
238
			extractor.close();
239
		}
240
241
	}
229
}
242
}
(-)src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java (-8 / +35 lines)
Lines 18-31 Link Here
18
18
19
import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML;
19
import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML;
20
20
21
import javax.xml.parsers.ParserConfigurationException;
21
import java.io.IOException;
22
import java.io.IOException;
22
import java.io.InputStream;
23
import java.io.InputStream;
23
import java.io.PushbackInputStream;
24
import java.io.PushbackInputStream;
24
import java.util.ArrayList;
25
import java.util.ArrayList;
26
import java.util.Collections;
25
import java.util.List;
27
import java.util.List;
26
28
27
import javax.xml.parsers.ParserConfigurationException;
28
29
import org.apache.poi.openxml4j.opc.OPCPackage;
29
import org.apache.poi.openxml4j.opc.OPCPackage;
30
import org.apache.poi.openxml4j.opc.PackagePart;
30
import org.apache.poi.openxml4j.opc.PackagePart;
31
import org.apache.poi.util.SAXHelper;
31
import org.apache.poi.util.SAXHelper;
Lines 93-99 Link Here
93
    /**
93
    /**
94
     * The shared strings table.
94
     * The shared strings table.
95
     */
95
     */
96
    private List<String> strings;
96
    private List<XSSFSharedString> strings;
97
97
98
    /**
98
    /**
99
     * @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
99
     * @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
Lines 174-190 Link Here
174
     * @return the item at the specified position in this Shared String table.
174
     * @return the item at the specified position in this Shared String table.
175
     */
175
     */
176
    public String getEntryAt(int idx) {
176
    public String getEntryAt(int idx) {
177
        return strings.get(idx);
177
        return strings.get(idx).getString();
178
    }
178
    }
179
179
180
    /**
181
     * Returns the text only portion of XSSFSharedString
182
     *
183
     * @return
184
     */
180
    public List<String> getItems() {
185
    public List<String> getItems() {
186
        List<String> stringsOnly = new ArrayList<String>();
187
        for (XSSFSharedString s : strings) {
188
            stringsOnly.add(s.getString());
189
        }
190
        Collections.unmodifiableList(stringsOnly);
191
        return stringsOnly;
192
    }
193
194
    public List<XSSFSharedString> getSharedStrings() {
181
        return strings;
195
        return strings;
182
    }
196
    }
183
197
184
    //// ContentHandler methods ////
198
    //// ContentHandler methods ////
185
199
186
    private StringBuffer characters;
200
    private StringBuffer characters;
201
    private StringBuffer rphCharacters;
187
    private boolean tIsOpen;
202
    private boolean tIsOpen;
203
    private boolean inRPh;
188
204
189
    public void startElement(String uri, String localName, String name,
205
    public void startElement(String uri, String localName, String name,
190
                             Attributes attributes) throws SAXException {
206
                             Attributes attributes) throws SAXException {
Lines 198-210 Link Here
198
            String uniqueCount = attributes.getValue("uniqueCount");
214
            String uniqueCount = attributes.getValue("uniqueCount");
199
            if(uniqueCount != null) this.uniqueCount = Integer.parseInt(uniqueCount);
215
            if(uniqueCount != null) this.uniqueCount = Integer.parseInt(uniqueCount);
200
216
201
            this.strings = new ArrayList<String>(this.uniqueCount);
217
            this.strings = new ArrayList<XSSFSharedString>(this.uniqueCount);
202
218
203
            characters = new StringBuffer();
219
            characters = new StringBuffer();
220
            rphCharacters = new StringBuffer();
204
        } else if ("si".equals(localName)) {
221
        } else if ("si".equals(localName)) {
205
            characters.setLength(0);
222
            characters.setLength(0);
223
            rphCharacters.setLength(0);
206
        } else if ("t".equals(localName)) {
224
        } else if ("t".equals(localName)) {
207
            tIsOpen = true;
225
            tIsOpen = true;
226
        } else if ("rPh".equals(localName)) {
227
            inRPh = true;
208
        }
228
        }
209
    }
229
    }
210
230
Lines 215-223 Link Here
215
        }
235
        }
216
        
236
        
217
        if ("si".equals(localName)) {
237
        if ("si".equals(localName)) {
218
            strings.add(characters.toString());
238
            strings.add(new XSSFSharedString(characters.toString(), rphCharacters.toString()));
219
        } else if ("t".equals(localName)) {
239
        } else if ("t".equals(localName)) {
220
           tIsOpen = false;
240
           tIsOpen = false;
241
        } else if ("rPh".equals(localName)) {
242
            inRPh = false;
221
        }
243
        }
222
    }
244
    }
223
245
Lines 226-233 Link Here
226
     */
248
     */
227
    public void characters(char[] ch, int start, int length)
249
    public void characters(char[] ch, int start, int length)
228
            throws SAXException {
250
            throws SAXException {
229
        if (tIsOpen)
251
        if (tIsOpen) {
230
            characters.append(ch, start, length);
252
            if (inRPh) {
253
                rphCharacters.append(ch, start, length);
254
            } else {
255
                characters.append(ch, start, length);
256
            }
257
        }
231
    }
258
    }
232
259
233
}
260
}
(-)src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSharedString.java (+37 lines)
Line 0 Link Here
1
/* ====================================================================
2
   Licensed to the Apache Software Foundation (ASF) under one or more
3
   contributor license agreements.  See the NOTICE file distributed with
4
   this work for additional information regarding copyright ownership.
5
   The ASF licenses this file to You under the Apache License, Version 2.0
6
   (the "License"); you may not use this file except in compliance with
7
   the License.  You may obtain a copy of the License at
8
9
       http://www.apache.org/licenses/LICENSE-2.0
10
11
   Unless required by applicable law or agreed to in writing, software
12
   distributed under the License is distributed on an "AS IS" BASIS,
13
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
   See the License for the specific language governing permissions and
15
   limitations under the License.
16
==================================================================== */
17
package org.apache.poi.xssf.eventusermodel;
18
19
20
public class XSSFSharedString {
21
    private final String string;
22
    private final String phoneticString;
23
24
25
    public XSSFSharedString(String string, String phoneticString) {
26
        this.string = string;
27
        this.phoneticString = phoneticString;
28
    }
29
30
    public String getString() {
31
        return string;
32
    }
33
34
    public String getPhoneticString() {
35
        return phoneticString;
36
    }
37
}
(-)src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java (-6 / +20 lines)
Lines 19-26 Link Here
19
19
20
package org.apache.poi.xssf.eventusermodel;
20
package org.apache.poi.xssf.eventusermodel;
21
21
22
import java.io.IOException;
23
import java.util.List;
24
import java.util.regex.Pattern;
25
22
import junit.framework.TestCase;
26
import junit.framework.TestCase;
23
24
import org.apache.poi.POIDataSamples;
27
import org.apache.poi.POIDataSamples;
25
import org.apache.poi.openxml4j.opc.OPCPackage;
28
import org.apache.poi.openxml4j.opc.OPCPackage;
26
import org.apache.poi.openxml4j.opc.PackagePart;
29
import org.apache.poi.openxml4j.opc.PackagePart;
Lines 29-38 Link Here
29
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
32
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
30
import org.xml.sax.SAXException;
33
import org.xml.sax.SAXException;
31
34
32
import java.io.IOException;
33
import java.util.List;
34
import java.util.regex.Pattern;
35
36
/**
35
/**
37
 * Tests for {@link org.apache.poi.xssf.eventusermodel.XSSFReader}
36
 * Tests for {@link org.apache.poi.xssf.eventusermodel.XSSFReader}
38
 */
37
 */
Lines 59-65 Link Here
59
        }
58
        }
60
59
61
	}
60
	}
62
    
61
62
	public void testPhoneticRuns() throws Exception {
63
        OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsx"));
64
        List<PackagePart> parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.xml"));
65
        assertEquals(1, parts.size());
66
67
        ReadOnlySharedStringsTable rtbl = new ReadOnlySharedStringsTable(parts.get(0));
68
        List<XSSFSharedString> sharedStrings = rtbl.getSharedStrings();
69
        assertEquals(49, sharedStrings.size());
70
71
        assertEquals("\u30B3\u30E1\u30F3\u30C8", sharedStrings.get(0).getString());
72
        assertEquals("", sharedStrings.get(0).getPhoneticString());
73
        assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", sharedStrings.get(3).getString());
74
        assertEquals("\u30CB\u30DB\u30F3", sharedStrings.get(3).getPhoneticString());
75
    }
76
63
    public void testEmptySSTOnPackageObtainedViaWorkbook() throws Exception {
77
    public void testEmptySSTOnPackageObtainedViaWorkbook() throws Exception {
64
        XSSFWorkbook wb = new XSSFWorkbook(_ssTests.openResourceAsStream("noSharedStringTable.xlsx"));
78
        XSSFWorkbook wb = new XSSFWorkbook(_ssTests.openResourceAsStream("noSharedStringTable.xlsx"));
65
        OPCPackage pkg = wb.getPackage();
79
        OPCPackage pkg = wb.getPackage();

Return to bug 51519