--- src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java (revision 8bf3ebfcee2e2799486de4ed8a5e063616b4083a) +++ src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java (revision ) @@ -17,37 +17,26 @@ package org.apache.poi.xssf.streaming; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.List; -import java.util.zip.ZipEntry; -import java.util.zip.ZipFile; -import java.util.zip.ZipOutputStream; - import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.ss.formula.udf.UDFFinder; -import org.apache.poi.ss.usermodel.CellStyle; -import org.apache.poi.ss.usermodel.CreationHelper; -import org.apache.poi.ss.usermodel.DataFormat; -import org.apache.poi.ss.usermodel.Font; -import org.apache.poi.ss.usermodel.Name; -import org.apache.poi.ss.usermodel.PictureData; +import org.apache.poi.ss.usermodel.*; import org.apache.poi.ss.usermodel.Row.MissingCellPolicy; -import org.apache.poi.ss.usermodel.Sheet; -import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.util.CellRangeAddress; import org.apache.poi.util.TempFile; +import org.apache.poi.xssf.model.DBMappedSharedStringsTable; import org.apache.poi.xssf.model.SharedStringsTable; +import org.apache.poi.xssf.model.SharedStringsTableType; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import java.io.*; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.List; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; +import java.util.zip.ZipOutputStream; + /** * Streaming version of XSSFWorkbook implementing the "BigGridDemo" strategy. * @@ -231,7 +220,38 @@ } } } + /** + * Constructs an workbook from an existing workbook. + *
+ * When a new node is created via createRow() and the total number + * of unflushed records would exceed the specified value, then the + * row with the lowest index value is flushed and cannot be accessed + * via getRow() anymore. + *
+ *+ * A value of -1 indicates unlimited access. In this case all + * records that have not been flushed by a call to flush() are available + * for random access. + *
+ *
+ * A value of 0 is not allowed because it would flush any newly created row + * without having a chance to specify any cells. + * + * + * @param workbook the template workbook + * @param rowAccessWindowSize + * @param compressTmpFiles whether to use gzip compression for temporary files + *@param sharedStringsTableType whether to use default shared string or custom + */ + public SXSSFWorkbook(XSSFWorkbook workbook, int rowAccessWindowSize, boolean compressTmpFiles, SharedStringsTableType sharedStringsTableType) { + this(workbook, rowAccessWindowSize, compressTmpFiles, true); + if (sharedStringsTableType == SharedStringsTableType.LOW_FOOTPRINT_MAP_DB_SST) { + this._sharedStringSource = new DBMappedSharedStringsTable(); + } + } + + /** * Construct an empty workbook and specify the window for row access. *
* When a new node is created via createRow() and the total number
@@ -350,11 +370,13 @@
{
xis.close();
}
- }
- else
- {
+ } else {
+ if (isDBMappedSharedStringsTableEntry(ze)) {
+ injectSharedStringFromTempFile(zos);
+ } else {
- copyStream(is, zos);
- }
+ copyStream(is, zos);
+ }
+ }
is.close();
}
}
@@ -368,6 +390,22 @@
zip.close();
}
}
+
+ private boolean isDBMappedSharedStringsTableEntry(ZipEntry ze) {
+ return ze.getName().equals("xl/sharedStrings.xml") && DBMappedSharedStringsTable.class.isInstance(_sharedStringSource);
+ }
+
+ private void injectSharedStringFromTempFile(ZipOutputStream zos) throws IOException {
+ DBMappedSharedStringsTable _sst = (DBMappedSharedStringsTable) _sharedStringSource;
+ _sst.commit();//creating DBMapped SharedStringsTable.xml file
+ InputStream is = _sst.getSharedStringInputStream(); //injecting DBMapped SharedStringsTable.xml file in target output
+ copyStream(is, zos);
+ is.close();
+ if (!_sst.getTemp_shared_string_file().delete()) {
+ throw new IOException("Could not delete temporary file after processing: " + _sst.getTemp_shared_string_file().getName());
+ }
+ }
+
private static void copyStream(InputStream in, OutputStream out) throws IOException {
byte[] chunk = new byte[1024];
int count;
--- src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTableType.java (revision )
+++ src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTableType.java (revision )
@@ -0,0 +1,37 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.model;
+
+/**
+ * enum to specify shared strings table to use
+ */
+public enum SharedStringsTableType {
+ DEFAULT_SST(SharedStringsTable.class),//in memory shared strings string table
+ LOW_FOOTPRINT_MAP_DB_SST(DBMappedSharedStringsTable.class); //streaming version low foot print shared strings table
+ /**
+ * Defines what object is used to construct instances of this relationship
+ */
+ private Class extends SharedStringsTable> instance;
+
+ private SharedStringsTableType(Class extends SharedStringsTable> sharedStringsTableInstance) {
+ instance = sharedStringsTableInstance;
+ }
+
+ public Class extends SharedStringsTable> getInstance() {
+ return instance;
+ }
+}
--- src/ooxml/testcases/org/apache/poi/xssf/streaming/TestDBMappedSharedStringsTableOption.java (revision )
+++ src/ooxml/testcases/org/apache/poi/xssf/streaming/TestDBMappedSharedStringsTableOption.java (revision )
@@ -0,0 +1,216 @@
+package org.apache.poi.xssf.streaming;
+
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.xssf.model.SharedStringsTableType;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.*;
+import java.util.Random;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class TestDBMappedSharedStringsTableOption {
+ //Streaming version of workbook
+ private SXSSFWorkbook workbook;
+
+ private SXSSFSheet sheet;
+
+ private File outputFile;
+ public static final String TEST_OUTPUT_DIR = "poi.test.xssf.output.dir";
+
+ @Before
+ public void setUp() {
+ outputFile = new File(System.getProperty(TEST_OUTPUT_DIR), "output.xlsx");
+ setupWorkBook();
+ setupBlankSheet();
+ }
+
+ private void setupWorkBook() {
+ XSSFWorkbook wb = new XSSFWorkbook();
+ workbook = new SXSSFWorkbook(wb, 2, false, SharedStringsTableType.LOW_FOOTPRINT_MAP_DB_SST);
+ }
+
+ private void setupBlankSheet() {
+ sheet = (SXSSFSheet) workbook.createSheet("Employee Data");
+ }
+
+ @After
+ public void cleanup() {
+ outputFile.delete();
+ }
+
+ @Test
+ public void testWrite100UniqueRecordsOf10Char() throws IOException {
+ int recordCount = 100;
+ addUniqueRecordsToSheet(0, 100, 10);
+ writeAndAssertRecord(recordCount);
+ }
+
+ @Test
+ public void testWrite1MUniqueRecordsOf100Char() {
+ int recordCount = 1000000;
+ addUniqueRecordsToSheet(0, recordCount, 100);
+ writeAndAssertRecord(recordCount);
+ }
+
+ @Test
+ public void testWriteFromTextFile() {
+ int recordCount = 3;
+ File textInputFile = new File(System.getProperty(TEST_OUTPUT_DIR), "temp.txt");
+ try {
+ FileWriter w = new FileWriter(textInputFile);
+ for (int i = 1; i <= recordCount; i++) {
+ w.write("Line" + i + ",FirstColumn,SecondColumn,ThirdColumn\r\n");
+ }
+ w.close();
+ } catch (IOException e) {
+ }
+ addRecordsFromFile("temp.txt");
+ writeAndAssertRecord(recordCount);
+ textInputFile.delete();
+ }
+
+ @Test
+ public void testWrite1MRandomRecordsOf10Char() {
+ int recordCount = 100000;
+ addRandomRecordsToSheet(0, recordCount, 200000, 10);
+ writeAndAssertRecord(recordCount);
+ }
+
+ @Test
+ public void test1MRecordHavingRepetitiveRecordsOf10Char() {
+ int recordCount = 1000000;
+ addUniqueRecordsToSheet(0, 200000, 10);
+ addUniqueRecordsToSheet(200000, 200000, 10);
+ addUniqueRecordsToSheet(400000, 200000, 10);
+ addUniqueRecordsToSheet(600000, 200000, 10);
+ addUniqueRecordsToSheet(800000, 200000, 10);
+ writeAndAssertRecord(recordCount);
+ }
+
+ @Test
+ public void testWriteAllDuplicateRecord() {
+ int recordCount = 100000;
+ addRepeatingRecordsToSheet(recordCount);
+ writeAndAssertRecord(recordCount);
+ }
+
+ private void writeAndAssertRecord(int recordCount) {
+ System.out.print("Started writing.....");
+ try {
+ FileOutputStream out = new FileOutputStream(outputFile);
+ workbook.write(out);
+ out.close();
+ } catch (Exception e) {
+ assertTrue("Excel File Creation Failed: " + e.getMessage(), false);
+ }
+ System.out.println("File creation done...Asserting");
+
+ //NOTE: all tests can be executed within -Xmx100M by commenting out assertion code below
+
+ try {
+ assertRows(new XSSFWorkbook(outputFile), recordCount);
+ } catch (Exception e) {
+ assertTrue("Invalid Output File: " + e.getMessage(), false);
+ }
+ //----
+ }
+
+ private void addUniqueRecordsToSheet(int fromRowNum, int numberOfRecords, int constantStringLength) {
+ System.out.print("adding records to sheet.....");
+ int i = 0;
+ String constantString = getStringOf(constantStringLength);
+ while (i++ < numberOfRecords) {
+ if (i % 10000 == 0) System.out.print(i + ",");
+ Row row = sheet.createRow(fromRowNum++);
+ Object[] objArr = new Object[]{constantString + i};
+ int cellNum = 0;
+ for (Object obj : objArr) {
+ Cell cell = row.createCell(cellNum++);
+ if (obj instanceof String) {
+ cell.setCellValue((String) obj);
+ } else if (obj instanceof Integer)
+ cell.setCellValue((Integer) obj);
+ }
+ }
+ }
+
+ private String getStringOf(int length) {
+ StringBuilder str = new StringBuilder();
+ for (int j = 0; j < length; j++) {
+ str.append("a");
+ }
+ return str.toString();
+ }
+
+ private void addRandomRecordsToSheet(int fromRowNum, int numberOfRecords, int recordLength, int constantStringLength) {
+ int i = 0;
+ String constantString = getStringOf(constantStringLength);
+ while (i++ < numberOfRecords) {
+ if (i % 1000 == 0) System.out.print(i + ",");
+ Row row = sheet.createRow(fromRowNum++);
+ Object[] objArr = new Object[]{constantString + new Random().nextInt(recordLength)};
+ int cellNum = 0;
+ for (Object obj : objArr) {
+ Cell cell = row.createCell(cellNum++);
+ if (obj instanceof String)
+ cell.setCellValue((String) obj);
+ else if (obj instanceof Integer)
+ cell.setCellValue((Integer) obj);
+ }
+ }
+ }
+
+ private void addRecordsFromFile(String fileName) {
+ System.out.print("adding records to sheet.....");
+ try {
+ int fromRowNum = 0;
+ BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName)));
+ String line = null;
+ while ((line = br.readLine()) != null) {
+ Row row = sheet.createRow(fromRowNum++);
+ Object[] objArr = line.split(",");
+ int cellNum = 0;
+ for (Object obj : objArr) {
+ Cell cell = row.createCell(cellNum++);
+ if (obj instanceof String)
+ cell.setCellValue((String) obj);
+ else if (obj instanceof Integer)
+ cell.setCellValue((Integer) obj);
+ }
+ }
+ br.close();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ private void addRepeatingRecordsToSheet(int count) {
+ int rownum = 0;
+ int i = 0;
+ String constantString = getStringOf(10);
+ while (i++ < count) {
+ Row row = sheet.createRow(rownum++);
+ Object[] objArr = new Object[]{constantString};
+ int cellnum = 0;
+ for (Object obj : objArr) {
+ Cell cell = row.createCell(cellnum++);
+ if (obj instanceof String)
+ cell.setCellValue((String) obj);
+ else if (obj instanceof Integer)
+ cell.setCellValue((Integer) obj);
+ }
+ }
+ }
+
+ public void assertRows(Workbook wb, int expectedRecordCount) {
+ assertEquals(expectedRecordCount, wb.getSheetAt(0).getLastRowNum() + 1);
+ }
+
+}
--- src/ooxml/java/org/apache/poi/xssf/model/DBMappedSharedStringsTable.java (revision )
+++ src/ooxml/java/org/apache/poi/xssf/model/DBMappedSharedStringsTable.java (revision )
@@ -0,0 +1,236 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.model;
+
+import org.apache.poi.util.TempFile;
+import org.apache.xmlbeans.XmlOptions;
+import org.mapdb.DB;
+import org.mapdb.DBMaker;
+import org.mapdb.HTreeMap;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
+
+import javax.xml.stream.XMLStreamException;
+import java.io.*;
+import java.math.BigInteger;
+import java.security.SecureRandom;
+import java.util.Collections;
+
+/**
+ * SharedStringsTable With Map DB implementation
+ * To reduce memory footprint of POI’s shared strings table
+ * It flows data to disk as per availability of memory (Reference)
+ */
+public class DBMappedSharedStringsTable extends SharedStringsTable implements AutoCloseable {
+
+ /**
+ * Maps strings and their indexes in the
+ * If the Shared String table already contains this stringVsIndexSTMap
map db
+ */
+ private DB stringVsIndexMapDB;
+ //string vs index map to lookup existing record in stTable look at add entry method
+ private HTreeMapstringVsIndexSTMap
map db
+ */
+ private DB indexVsStringMapDB;
+ //index vs string map to retrieve record with index
+ private HTreeMapCTRst
bean, its index is returned.
+ * Otherwise a new entry is aded.
+ *