--- src/ooxml/java/org/apache/poi/xssf/streaming/GZIPSheetDataWriter.java (revision 1516092) +++ src/ooxml/java/org/apache/poi/xssf/streaming/GZIPSheetDataWriter.java (working copy) @@ -22,14 +22,15 @@ import java.io.*; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; +import org.apache.poi.xssf.model.SharedStringsTable; /** * Sheet writer that supports gzip compression of the temp files. */ public class GZIPSheetDataWriter extends SheetDataWriter { - public GZIPSheetDataWriter() throws IOException { - super(); + public GZIPSheetDataWriter(SharedStringsTable sharedStringsTable) throws IOException { + super(sharedStringsTable); } /** --- src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java (revision 1516092) +++ src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java (working copy) @@ -23,12 +23,11 @@ import org.apache.poi.ss.usermodel.CellStyle; import org.apache.poi.ss.usermodel.FormulaError; import org.apache.poi.ss.util.CellReference; -import org.apache.xmlbeans.XmlCursor; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.STXstring; - -import javax.xml.namespace.QName; import java.io.*; import java.util.Iterator; +import org.apache.poi.xssf.model.SharedStringsTable; +import org.apache.poi.xssf.usermodel.XSSFRichTextString; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellType; /** * Initially copied from BigGridDemo "SpreadsheetWriter". @@ -46,11 +45,21 @@ int _numberOfCellsOfLastFlushedRow; // meaningful only of _numberOfFlushedRows>0 int _numberLastFlushedRow = -1; // meaningful only of _numberOfFlushedRows>0 + /** + * Table of strings shared across this workbook. + * If two cells contain the same string, then the cell value is the same index into SharedStringsTable + */ + private SharedStringsTable _sharedStringSource; + public SheetDataWriter() throws IOException { _fd = createTempFile(); _out = createWriter(_fd); } + public SheetDataWriter(SharedStringsTable sharedStringsTable) throws IOException{ + this(); + this._sharedStringSource = sharedStringsTable; + } /** * Create a temp file to write sheet data. * By default, temp files are created in the default temporary-file directory @@ -188,14 +197,24 @@ break; } case Cell.CELL_TYPE_STRING: { - _out.write(" t=\"inlineStr\">"); - _out.write(""); + _out.write(""); + _out.write(String.valueOf(sRef)); + _out.write(""); + } else { + _out.write(" t=\"inlineStr\">"); + _out.write(""); + outputQuotedString(cell.getStringCellValue()); + _out.write(""); } - _out.write(">"); - outputQuotedString(cell.getStringCellValue()); - _out.write(""); break; } case Cell.CELL_TYPE_NUMERIC: { @@ -237,7 +256,7 @@ } //Taken from jdk1.3/src/javax/swing/text/html/HTMLWriter.java - protected void outputQuotedString(String s) throws IOException { + protected void outputQuotedString(String s) throws IOException { if (s == null || s.length() == 0) { return; } --- src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java (revision 1516092) +++ src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java (working copy) @@ -43,6 +43,7 @@ import org.apache.poi.ss.formula.udf.UDFFinder; import org.apache.poi.ss.usermodel.Row.MissingCellPolicy; import org.apache.poi.ss.util.CellRangeAddress; +import org.apache.poi.xssf.model.SharedStringsTable; /** * Streaming version of XSSFWorkbook implementing the "BigGridDemo" strategy. @@ -73,6 +74,11 @@ private boolean _compressTmpFiles = false; /** + * shared string table - a cache of strings in this workbook + */ + private SharedStringsTable _sharedStringSource = null; + + /** * Construct a new workbook */ public SXSSFWorkbook(){ @@ -80,6 +86,17 @@ } /** + * Construct a new workbook + *

+ * Whether to use shared string table or not + *

+ * @param useSahredStringTable + */ + public SXSSFWorkbook(boolean useSahredStringTable){ + this(null /*workbook*/, useSahredStringTable); + } + + /** * Construct a workbook from a template. *

* There are three use-cases to use SXSSFWorkbook(XSSFWorkbook) : @@ -115,11 +132,53 @@ * @param workbook the template workbook */ public SXSSFWorkbook(XSSFWorkbook workbook){ - this(workbook, DEFAULT_WINDOW_SIZE); + this(workbook, DEFAULT_WINDOW_SIZE, false); } - /** + * Construct a workbook from a template. + *

+ * There are three use-cases to use SXSSFWorkbook(XSSFWorkbook) : + *

    + *
  1. + * Append new sheets to existing workbooks. You can open existing + * workbook from a file or create on the fly with XSSF. + *
  2. + *
  3. + * Append rows to existing sheets. The row number MUST be greater + * than max(rownum) in the template sheet. + *
  4. + *
  5. + * Use existing workbook as a template and re-use global objects such + * as cell styles, formats, images, etc. + *
  6. + *
+ * All three use cases can work in a combination. + *

+ * What is not supported: + * + * + * @param workbook the template workbook + *

+ * Whether to use shared string table or not + *

+ * @param useSahredStringTable + */ + public SXSSFWorkbook(XSSFWorkbook workbook, boolean useSahredStringTable){ + this(workbook, DEFAULT_WINDOW_SIZE, useSahredStringTable); + } + + /** * Constructs an workbook from an existing workbook. *

* When a new node is created via createRow() and the total number @@ -138,9 +197,13 @@ *

* * @param rowAccessWindowSize + *

+ * Whether to use shared string table or not + *

+ * @param useSahredStringTable */ - public SXSSFWorkbook(XSSFWorkbook workbook, int rowAccessWindowSize){ - this(workbook,rowAccessWindowSize, false); + public SXSSFWorkbook(XSSFWorkbook workbook, int rowAccessWindowSize, boolean useSahredStringTable){ + this(workbook,rowAccessWindowSize, false, useSahredStringTable); } /** @@ -164,16 +227,22 @@ * @param rowAccessWindowSize * @param compressTmpFiles whether to use gzip compression for temporary files */ - public SXSSFWorkbook(XSSFWorkbook workbook, int rowAccessWindowSize, boolean compressTmpFiles){ + public SXSSFWorkbook(XSSFWorkbook workbook, int rowAccessWindowSize, boolean compressTmpFiles, boolean useSahredStringTable){ setRandomAccessWindowSize(rowAccessWindowSize); setCompressTempFiles(compressTmpFiles); if (workbook == null) { _wb=new XSSFWorkbook(); + if(useSahredStringTable){ + _sharedStringSource = _wb.getSharedStringSource(); + } } else { _wb=workbook; + if(useSahredStringTable){ + _sharedStringSource = _wb.getSharedStringSource(); + } for ( int i = 0; i < _wb.getNumberOfSheets(); i++ ) { XSSFSheet sheet = _wb.getSheetAt( i ); @@ -202,8 +271,38 @@ * @param rowAccessWindowSize */ public SXSSFWorkbook(int rowAccessWindowSize){ - this(null /*workbook*/, rowAccessWindowSize); + this(null /*workbook*/, rowAccessWindowSize, false); } + + /** + * Construct an empty workbook and specify the window for row access. + *

+ * When a new node is created via createRow() and the total number + * of unflushed records would exceed the specified value, then the + * row with the lowest index value is flushed and cannot be accessed + * via getRow() anymore. + *

+ *

+ * A value of -1 indicates unlimited access. In this case all + * records that have not been flushed by a call to flush() are available + * for random access. + *

+ *

+ * A value of 0 is not allowed because it would flush any newly created row + * without having a chance to specify any cells. + *

+ * + * @param rowAccessWindowSize + *

+ * Whether to use shared string table or not + *

+ * @param useSahredStringTable + * + * + */ + public SXSSFWorkbook(int rowAccessWindowSize, boolean useSahredStringTable){ + this(null /*workbook*/, rowAccessWindowSize, useSahredStringTable); + } public int getRandomAccessWindowSize() { @@ -236,9 +335,9 @@ SheetDataWriter createSheetDataWriter() throws IOException { if(_compressTmpFiles) { - return new GZIPSheetDataWriter(); + return new GZIPSheetDataWriter(_sharedStringSource); } else { - return new SheetDataWriter(); + return new SheetDataWriter(_sharedStringSource); } }