ASF Bugzilla – Attachment 34805 Details for
Bug 51519
[PATCH] XSSFEventBasedExcelExtractor's Japanese xlsx file processing shouldn't extract t element within rPh elemtnts.
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Initial patch
51519.patch (text/plain), 8.97 KB, created by
Tim Allison
on 2017-03-07 14:58:19 UTC
(
hide
)
Description:
Initial patch
Filename:
MIME Type:
Creator:
Tim Allison
Created:
2017-03-07 14:58:19 UTC
Size:
8.97 KB
patch
obsolete
>Index: src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java >IDEA additional info: >Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP ><+>UTF-8 >=================================================================== >--- src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java (revision 1785840) >+++ src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java (revision ) >@@ -22,7 +22,6 @@ > import java.util.regex.Pattern; > > import junit.framework.TestCase; >- > import org.apache.poi.POITextExtractor; > import org.apache.poi.hssf.HSSFTestDataSamples; > import org.apache.poi.hssf.extractor.ExcelExtractor; >@@ -226,4 +225,18 @@ > extractor.close(); > } > } >+ >+ public void testPhoneticRuns() throws Exception { >+ XSSFExcelExtractor extractor = getExtractor("51519.xlsx"); >+ try { >+ String text = extractor.getText(); >+ assertTrue(text.contains("\u8C4A\u7530")); >+ //this shows up only as a phonetic run and should not appear >+ //in the extracted text >+ assertFalse(text.contains("\u30CB\u30DB\u30F3")); >+ } finally { >+ extractor.close(); >+ } >+ >+ } > } >Index: src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java >IDEA additional info: >Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP ><+>UTF-8 >=================================================================== >--- src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java (revision 1785840) >+++ src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java (revision ) >@@ -18,14 +18,14 @@ > > import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML; > >+import javax.xml.parsers.ParserConfigurationException; > import java.io.IOException; > import java.io.InputStream; > import java.io.PushbackInputStream; > import java.util.ArrayList; >+import java.util.Collections; > import java.util.List; > >-import javax.xml.parsers.ParserConfigurationException; >- > import org.apache.poi.openxml4j.opc.OPCPackage; > import org.apache.poi.openxml4j.opc.PackagePart; > import org.apache.poi.util.SAXHelper; >@@ -93,7 +93,7 @@ > /** > * The shared strings table. > */ >- private List<String> strings; >+ private List<XSSFSharedString> strings; > > /** > * @param pkg The {@link OPCPackage} to use as basis for the shared-strings table. >@@ -174,17 +174,33 @@ > * @return the item at the specified position in this Shared String table. > */ > public String getEntryAt(int idx) { >- return strings.get(idx); >+ return strings.get(idx).getString(); > } > >+ /** >+ * Returns the text only portion of XSSFSharedString >+ * >+ * @return >+ */ > public List<String> getItems() { >+ List<String> stringsOnly = new ArrayList<String>(); >+ for (XSSFSharedString s : strings) { >+ stringsOnly.add(s.getString()); >+ } >+ Collections.unmodifiableList(stringsOnly); >+ return stringsOnly; >+ } >+ >+ public List<XSSFSharedString> getSharedStrings() { > return strings; > } > > //// ContentHandler methods //// > > private StringBuffer characters; >+ private StringBuffer rphCharacters; > private boolean tIsOpen; >+ private boolean inRPh; > > public void startElement(String uri, String localName, String name, > Attributes attributes) throws SAXException { >@@ -198,13 +214,17 @@ > String uniqueCount = attributes.getValue("uniqueCount"); > if(uniqueCount != null) this.uniqueCount = Integer.parseInt(uniqueCount); > >- this.strings = new ArrayList<String>(this.uniqueCount); >+ this.strings = new ArrayList<XSSFSharedString>(this.uniqueCount); > > characters = new StringBuffer(); >+ rphCharacters = new StringBuffer(); > } else if ("si".equals(localName)) { > characters.setLength(0); >+ rphCharacters.setLength(0); > } else if ("t".equals(localName)) { > tIsOpen = true; >+ } else if ("rPh".equals(localName)) { >+ inRPh = true; > } > } > >@@ -215,9 +235,11 @@ > } > > if ("si".equals(localName)) { >- strings.add(characters.toString()); >+ strings.add(new XSSFSharedString(characters.toString(), rphCharacters.toString())); > } else if ("t".equals(localName)) { > tIsOpen = false; >+ } else if ("rPh".equals(localName)) { >+ inRPh = false; > } > } > >@@ -226,8 +248,13 @@ > */ > public void characters(char[] ch, int start, int length) > throws SAXException { >- if (tIsOpen) >- characters.append(ch, start, length); >+ if (tIsOpen) { >+ if (inRPh) { >+ rphCharacters.append(ch, start, length); >+ } else { >+ characters.append(ch, start, length); >+ } >+ } > } > > } >Index: src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSharedString.java >IDEA additional info: >Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP ><+>UTF-8 >=================================================================== >--- src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSharedString.java (revision ) >+++ src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSharedString.java (revision ) >@@ -0,0 +1,37 @@ >+/* ==================================================================== >+ Licensed to the Apache Software Foundation (ASF) under one or more >+ contributor license agreements. See the NOTICE file distributed with >+ this work for additional information regarding copyright ownership. >+ The ASF licenses this file to You under the Apache License, Version 2.0 >+ (the "License"); you may not use this file except in compliance with >+ the License. You may obtain a copy of the License at >+ >+ http://www.apache.org/licenses/LICENSE-2.0 >+ >+ Unless required by applicable law or agreed to in writing, software >+ distributed under the License is distributed on an "AS IS" BASIS, >+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >+ See the License for the specific language governing permissions and >+ limitations under the License. >+==================================================================== */ >+package org.apache.poi.xssf.eventusermodel; >+ >+ >+public class XSSFSharedString { >+ private final String string; >+ private final String phoneticString; >+ >+ >+ public XSSFSharedString(String string, String phoneticString) { >+ this.string = string; >+ this.phoneticString = phoneticString; >+ } >+ >+ public String getString() { >+ return string; >+ } >+ >+ public String getPhoneticString() { >+ return phoneticString; >+ } >+} >Index: src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java >IDEA additional info: >Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP ><+>UTF-8 >=================================================================== >--- src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java (revision 1785840) >+++ src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java (revision ) >@@ -19,8 +19,11 @@ > > package org.apache.poi.xssf.eventusermodel; > >+import java.io.IOException; >+import java.util.List; >+import java.util.regex.Pattern; >+ > import junit.framework.TestCase; >- > import org.apache.poi.POIDataSamples; > import org.apache.poi.openxml4j.opc.OPCPackage; > import org.apache.poi.openxml4j.opc.PackagePart; >@@ -29,10 +32,6 @@ > import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst; > import org.xml.sax.SAXException; > >-import java.io.IOException; >-import java.util.List; >-import java.util.regex.Pattern; >- > /** > * Tests for {@link org.apache.poi.xssf.eventusermodel.XSSFReader} > */ >@@ -59,7 +58,22 @@ > } > > } >- >+ >+ public void testPhoneticRuns() throws Exception { >+ OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsx")); >+ List<PackagePart> parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.xml")); >+ assertEquals(1, parts.size()); >+ >+ ReadOnlySharedStringsTable rtbl = new ReadOnlySharedStringsTable(parts.get(0)); >+ List<XSSFSharedString> sharedStrings = rtbl.getSharedStrings(); >+ assertEquals(49, sharedStrings.size()); >+ >+ assertEquals("\u30B3\u30E1\u30F3\u30C8", sharedStrings.get(0).getString()); >+ assertEquals("", sharedStrings.get(0).getPhoneticString()); >+ assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", sharedStrings.get(3).getString()); >+ assertEquals("\u30CB\u30DB\u30F3", sharedStrings.get(3).getPhoneticString()); >+ } >+ > public void testEmptySSTOnPackageObtainedViaWorkbook() throws Exception { > XSSFWorkbook wb = new XSSFWorkbook(_ssTests.openResourceAsStream("noSharedStringTable.xlsx")); > OPCPackage pkg = wb.getPackage();
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 51519
:
28092
|
31165
|
31295
| 34805