Line 0
Link Here
|
|
|
1 |
/* ==================================================================== |
2 |
Licensed to the Apache Software Foundation (ASF) under one or more |
3 |
contributor license agreements. See the NOTICE file distributed with |
4 |
this work for additional information regarding copyright ownership. |
5 |
The ASF licenses this file to You under the Apache License, Version 2.0 |
6 |
(the "License"); you may not use this file except in compliance with |
7 |
the License. You may obtain a copy of the License at |
8 |
|
9 |
http://www.apache.org/licenses/LICENSE-2.0 |
10 |
|
11 |
Unless required by applicable law or agreed to in writing, software |
12 |
distributed under the License is distributed on an "AS IS" BASIS, |
13 |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 |
See the License for the specific language governing permissions and |
15 |
limitations under the License. |
16 |
==================================================================== */ |
17 |
|
18 |
package org.apache.poi.xssf.model; |
19 |
|
20 |
import org.apache.poi.openxml4j.opc.PackagePart; |
21 |
import org.apache.poi.openxml4j.opc.PackageRelationship; |
22 |
import org.apache.poi.util.TempFile; |
23 |
import org.apache.xmlbeans.XmlException; |
24 |
import org.apache.xmlbeans.XmlOptions; |
25 |
import org.mapdb.DB; |
26 |
import org.mapdb.DBMaker; |
27 |
import org.mapdb.HTreeMap; |
28 |
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst; |
29 |
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSst; |
30 |
import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument; |
31 |
|
32 |
import javax.xml.stream.XMLStreamException; |
33 |
import java.io.*; |
34 |
import java.math.BigInteger; |
35 |
import java.security.SecureRandom; |
36 |
import java.util.ArrayList; |
37 |
import java.util.Collections; |
38 |
import java.util.List; |
39 |
|
40 |
/** |
41 |
* SharedStringsTable With Map DB implementation |
42 |
* </p> |
43 |
* |
44 |
*/ |
45 |
public class DBMappedSharedStringsTable extends SharedStringsTable implements AutoCloseable{ |
46 |
|
47 |
/** |
48 |
* Maps strings and their indexes in the <code>recordVsIndexBasedSTMap</code> map db |
49 |
*/ |
50 |
private DB recordVsIndexMapDB; |
51 |
private HTreeMap<String, Integer> recordVsIndexBasedSTMap; //string vs index map to lookup existing record in stTable |
52 |
/** |
53 |
* Maps strings and their indexes in the <code>recordVsIndexBasedSTMap</code> map db |
54 |
*/ |
55 |
private DB indexVsRecordMapDB; |
56 |
private HTreeMap<Integer, String> indexVsRecordBasedSTMap; //index vs string map to retrieve record with index |
57 |
|
58 |
private final File temp_shared_string_file; |
59 |
|
60 |
/** |
61 |
* An integer representing the total count of strings in the workbook. This count does not |
62 |
* include any numbers, it counts only the total of text strings in the workbook. |
63 |
*/ |
64 |
private int count; |
65 |
|
66 |
/** |
67 |
* An integer representing the total count of unique strings in the Shared String Table. |
68 |
* A string is unique even if it is a copy of another string, but has different formatting applied |
69 |
* at the character level. |
70 |
*/ |
71 |
private int uniqueCount; |
72 |
|
73 |
private SstDocument _sstDoc; |
74 |
|
75 |
private final static XmlOptions options = new XmlOptions(); |
76 |
private final static XmlOptions out_options = new XmlOptions(); |
77 |
|
78 |
|
79 |
static { |
80 |
options.put(XmlOptions.SAVE_INNER); |
81 |
options.put(XmlOptions.SAVE_AGGRESSIVE_NAMESPACES); |
82 |
options.put(XmlOptions.SAVE_USE_DEFAULT_NAMESPACE); |
83 |
options.setSaveImplicitNamespaces(Collections.singletonMap("", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")); |
84 |
|
85 |
out_options.setLoadSubstituteNamespaces(Collections.singletonMap("", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")); //TODO add options if required |
86 |
} |
87 |
|
88 |
public DBMappedSharedStringsTable() { |
89 |
super(); |
90 |
temp_shared_string_file = createTempFile("poi-shared-string-table", ".xml"); |
91 |
initMapDbBasedSharedStringTableMap(); |
92 |
} |
93 |
|
94 |
private File createTempFile(String prefix, String suffix) { |
95 |
try { |
96 |
return TempFile.createTempFile(prefix, suffix); |
97 |
} catch (IOException e) { |
98 |
throw new RuntimeException("Couldn't create required temp file", e); |
99 |
} |
100 |
} |
101 |
|
102 |
public DBMappedSharedStringsTable(PackagePart part, PackageRelationship rel) throws IOException { |
103 |
super(part, rel);//TODO needs to be commented out whiler reading |
104 |
temp_shared_string_file = createTempFile("poi-shared-string-table", ".xml"); |
105 |
initMapDbBasedSharedStringTableMap(); |
106 |
readFrom(part.getInputStream()); |
107 |
} |
108 |
|
109 |
public FileInputStream getSharedStringInputStream() throws IOException { |
110 |
return new FileInputStream(temp_shared_string_file); |
111 |
} |
112 |
|
113 |
public FileOutputStream getSharedStringsTableOutputStream() throws IOException { |
114 |
return new FileOutputStream(temp_shared_string_file); |
115 |
} |
116 |
|
117 |
public File getTemp_shared_string_file() { |
118 |
return temp_shared_string_file; |
119 |
} |
120 |
|
121 |
private void initMapDbBasedSharedStringTableMap() { |
122 |
initRecordVsIndexBasedMapDB(); |
123 |
initIndexVsRecordBasedMapDB(); |
124 |
} |
125 |
|
126 |
private void initRecordVsIndexBasedMapDB() { |
127 |
File mapDbFile = createTempFile(new BigInteger(130, new SecureRandom()).toString(32), "");//creating random name file to store map db |
128 |
recordVsIndexMapDB = DBMaker.newFileDB(mapDbFile) |
129 |
.transactionDisable() |
130 |
.cacheHardRefEnable() |
131 |
.cacheSize(65536) |
132 |
.deleteFilesAfterClose() |
133 |
.mmapFileEnablePartial() |
134 |
.closeOnJvmShutdown().make(); |
135 |
recordVsIndexBasedSTMap = recordVsIndexMapDB.createHashMap(new BigInteger(130, new SecureRandom()).toString(32)).make(); |
136 |
} |
137 |
|
138 |
private void initIndexVsRecordBasedMapDB() { |
139 |
File mapDb2File = createTempFile(new BigInteger(130, new SecureRandom()).toString(32), "");//creating random name file to store map db |
140 |
indexVsRecordMapDB = DBMaker.newFileDB(mapDb2File) |
141 |
.transactionDisable() |
142 |
.cacheDisable() //caching not required indexVsRecordBasedSTMap will be used to write all existing values |
143 |
.deleteFilesAfterClose() |
144 |
.mmapFileEnablePartial() |
145 |
.closeOnJvmShutdown().make(); |
146 |
indexVsRecordBasedSTMap = indexVsRecordMapDB.createHashMap(new BigInteger(130, new SecureRandom()).toString(32)).make(); |
147 |
} |
148 |
|
149 |
/** |
150 |
* Read this shared strings table from an XML file. |
151 |
* |
152 |
* @param is The input stream containing the XML document. |
153 |
* @throws java.io.IOException if an error occurs while reading. |
154 |
*/ |
155 |
@SuppressWarnings("deprecation") //YK: getXYZArray() array accessors are deprecated in xmlbeans with JDK 1.5 support |
156 |
public void readFrom(InputStream is) throws IOException { |
157 |
try { |
158 |
int cnt = 0; |
159 |
_sstDoc = SstDocument.Factory.parse(is); |
160 |
CTSst sst = _sstDoc.getSst(); |
161 |
count = (int) sst.getCount(); |
162 |
uniqueCount = (int) sst.getUniqueCount(); |
163 |
for (CTRst st : sst.getSiArray()) { |
164 |
String key = getKey(st); |
165 |
recordVsIndexBasedSTMap.put(key, cnt); |
166 |
indexVsRecordBasedSTMap.put(cnt, key); |
167 |
cnt++; |
168 |
} |
169 |
} catch (XmlException e) { |
170 |
throw new IOException(e.getLocalizedMessage()); |
171 |
} |
172 |
} |
173 |
|
174 |
private String getKey(CTRst st) { |
175 |
return st.xmlText(options); |
176 |
} |
177 |
|
178 |
/** |
179 |
* Return a string item by index |
180 |
* |
181 |
* @param idx index of item to return. |
182 |
* @return the item at the specified position in this Shared String table. |
183 |
*/ |
184 |
public CTRst getEntryAt(int idx) { |
185 |
try { |
186 |
return CTRst.Factory.parse(indexVsRecordBasedSTMap.get(idx), out_options); |
187 |
} catch (XmlException e) { |
188 |
throw new RuntimeException("Error Parsing xmlText from SSTable"); |
189 |
} |
190 |
} |
191 |
|
192 |
/** |
193 |
* Return an integer representing the total count of strings in the workbook. This count does not |
194 |
* include any numbers, it counts only the total of text strings in the workbook. |
195 |
* |
196 |
* @return the total count of strings in the workbook |
197 |
*/ |
198 |
public int getCount() { |
199 |
return count; |
200 |
} |
201 |
|
202 |
/** |
203 |
* Returns an integer representing the total count of unique strings in the Shared String Table. |
204 |
* A string is unique even if it is a copy of another string, but has different formatting applied |
205 |
* at the character level. |
206 |
* |
207 |
* @return the total count of unique strings in the workbook |
208 |
*/ |
209 |
public int getUniqueCount() { |
210 |
return uniqueCount; |
211 |
} |
212 |
|
213 |
/** |
214 |
* Add an entry to this Shared String table (a new value is appened to the end). |
215 |
* <p/> |
216 |
* <p> |
217 |
* If the Shared String table already contains this <code>CTRst</code> bean, its index is returned. |
218 |
* Otherwise a new entry is aded. |
219 |
* </p> |
220 |
* |
221 |
* @param st the entry to add |
222 |
* @return index the index of added entry |
223 |
*/ |
224 |
public int addEntry(CTRst st) { |
225 |
String s = getKey(st); |
226 |
count++; |
227 |
if (recordVsIndexBasedSTMap.containsKey(s)) { |
228 |
return recordVsIndexBasedSTMap.get(s); |
229 |
} |
230 |
//new unique record |
231 |
recordVsIndexBasedSTMap.put(s, uniqueCount); |
232 |
indexVsRecordBasedSTMap.put(uniqueCount, s); |
233 |
return uniqueCount++; |
234 |
} |
235 |
/** |
236 |
* Provide low-level access to the underlying array of CTRst beans |
237 |
* |
238 |
* @return array of CTRst beans |
239 |
*/ |
240 |
public List<CTRst> getItems() { |
241 |
List<CTRst> beans = new ArrayList<CTRst>(); |
242 |
for (int i = 0; i < uniqueCount; i++) { |
243 |
beans.add(getEntryAt(i)); |
244 |
} |
245 |
return beans; |
246 |
} |
247 |
|
248 |
/** |
249 |
* Write this table out as XML. |
250 |
* |
251 |
* @param out The stream to write to. |
252 |
* @throws java.io.IOException if an error occurs while writing. |
253 |
*/ |
254 |
public void writeTo(OutputStream out) throws IOException { |
255 |
//re-create the sst table every time saving a workbook at the end after adding all record using map DB |
256 |
try { |
257 |
Writer writer = new BufferedWriter(new OutputStreamWriter(out, "UTF-8")); |
258 |
addDefaultXmlOptions(writer); |
259 |
if (uniqueCount != 0) { |
260 |
addStringItems(writer); |
261 |
addEndDocument(writer); |
262 |
} |
263 |
writer.flush(); |
264 |
} catch (XMLStreamException e) { |
265 |
throw new RuntimeException("Couldn't write to SharedStringsTable", e); |
266 |
} |
267 |
} |
268 |
|
269 |
private void addDefaultXmlOptions(Writer writer) throws XMLStreamException, IOException { |
270 |
writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n"); |
271 |
String isNoSIElements = uniqueCount == 0 ? "/" : ""; |
272 |
writer.write("<sst count=\"" + count + "\" uniqueCount=\"" + uniqueCount + "\" xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\"" + isNoSIElements + ">"); |
273 |
} |
274 |
|
275 |
private void addStringItems(Writer writer) throws XMLStreamException, IOException { |
276 |
for (int i = 0; i < uniqueCount; i++) { |
277 |
String s = indexVsRecordBasedSTMap.get(i); |
278 |
writer.write("<si>"); |
279 |
writer.write(s); |
280 |
writer.write("</si>"); |
281 |
} |
282 |
} |
283 |
|
284 |
private void addEndDocument(Writer writer) throws XMLStreamException, IOException { |
285 |
writer.write("</sst>"); |
286 |
} |
287 |
|
288 |
@Override |
289 |
protected void commit() throws IOException { |
290 |
// createDefaultSSTTableXml(); |
291 |
FileOutputStream sharedStringOutputStream = getSharedStringsTableOutputStream(); |
292 |
writeTo(sharedStringOutputStream); |
293 |
sharedStringOutputStream.close(); |
294 |
} |
295 |
|
296 |
private void createDefaultSSTTableXml() throws IOException { //Todo, check if needed to create default one |
297 |
_sstDoc = SstDocument.Factory.newInstance(); |
298 |
PackagePart part = getPackagePart(); |
299 |
OutputStream out = part.getOutputStream(); |
300 |
_sstDoc.save(out, options); |
301 |
out.close(); |
302 |
} |
303 |
|
304 |
@Override |
305 |
public void close() throws Exception { |
306 |
recordVsIndexBasedSTMap.clear(); |
307 |
indexVsRecordBasedSTMap.clear(); |
308 |
recordVsIndexMapDB.close(); |
309 |
indexVsRecordMapDB.close(); |
310 |
} |
311 |
} |