--- src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java (revision 1575994) +++ src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java (working copy) @@ -16,17 +16,26 @@ ==================================================================== */ package org.apache.poi.xssf.eventusermodel; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; + +import org.apache.poi.hssf.util.CellReference; import org.apache.poi.ss.usermodel.BuiltinFormats; import org.apache.poi.ss.usermodel.DataFormatter; +import org.apache.poi.xssf.model.CommentsTable; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFCellStyle; +import org.apache.poi.xssf.usermodel.XSSFComment; import org.apache.poi.xssf.usermodel.XSSFRichTextString; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTComment; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; /** - * This class handles the processing of a sheet#.xml + * This class handles the processing of a sheet#.xml * sheet part of a XSSF .xlsx file, and generates * row and cell events for it. */ @@ -44,12 +53,17 @@ SST_STRING, NUMBER, } - + /** * Table with the styles used for formatting */ private StylesTable stylesTable; + /** + * Table with cell comments + */ + private CommentsTable commentsTable; + private ReadOnlySharedStringsTable sharedStringsTable; /** @@ -74,6 +88,7 @@ private short formatIndex; private String formatString; private final DataFormatter formatter; + private int rowNum; private String cellRef; private boolean formulasNotResults; @@ -82,6 +97,23 @@ private StringBuffer formula = new StringBuffer(); private StringBuffer headerFooter = new StringBuffer(); + private Queue commentCellRefs; + + private static final Comparator cellRefComparator = new Comparator() { + @Override + public int compare(CellReference o1, CellReference o2) { + int result = compare(o1.getRow(), o2.getRow()); + if (result == 0) { + result = compare(o1.getCol(), o2.getCol()); + } + return result; + } + + public int compare(int x, int y) { + return (x < y) ? -1 : ((x == y) ? 0 : 1); + } + }; + /** * Accepts objects needed while parsing. * @@ -90,16 +122,26 @@ */ public XSSFSheetXMLHandler( StylesTable styles, + CommentsTable comments, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetContentsHandler, DataFormatter dataFormatter, boolean formulasNotResults) { this.stylesTable = styles; + this.commentsTable = comments; this.sharedStringsTable = strings; this.output = sheetContentsHandler; this.formulasNotResults = formulasNotResults; this.nextDataType = xssfDataType.NUMBER; this.formatter = dataFormatter; + + if (commentsTable != null) { + List commentList = commentsTable.getCTComments().getCommentList().getCommentList(); + commentCellRefs = new LinkedList(); + for (CTComment comment : commentList) { + commentCellRefs.add(new CellReference(comment.getRef())); + } + } } /** * Accepts objects needed while parsing. @@ -109,10 +151,11 @@ */ public XSSFSheetXMLHandler( StylesTable styles, + CommentsTable comments, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetContentsHandler, boolean formulasNotResults) { - this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults); + this(styles, comments, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults); } private boolean isTextTag(String name) { @@ -131,7 +174,7 @@ // It isn't a text tag return false; } - + @Override public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { @@ -146,26 +189,26 @@ } else if ("f".equals(name)) { // Clear contents cache formula.setLength(0); - + // Mark us as being a formula if not already if(nextDataType == xssfDataType.NUMBER) { nextDataType = xssfDataType.FORMULA; } - + // Decide where to get the formula string from String type = attributes.getValue("t"); if(type != null && type.equals("shared")) { // Is it the one that defines the shared, or uses it? String ref = attributes.getValue("ref"); String si = attributes.getValue("si"); - + if(ref != null) { // This one defines it // TODO Save it somewhere fIsOpen = true; } else { // This one uses a shared formula - // TODO Retrieve the shared formula and tweak it to + // TODO Retrieve the shared formula and tweak it to // match the current cell if(formulasNotResults) { System.err.println("Warning - shared formulas not yet supported!"); @@ -186,7 +229,7 @@ headerFooter.setLength(0); } else if("row".equals(name)) { - int rowNum = Integer.parseInt(attributes.getValue("r")) - 1; + rowNum = Integer.parseInt(attributes.getValue("r")) - 1; output.startRow(rowNum); } // c => cell @@ -235,7 +278,7 @@ // v => contents of a cell if (isTextTag(name)) { vIsOpen = false; - + // Process the value contents as required, now we have it all switch (nextDataType) { case BOOLEAN: @@ -252,7 +295,7 @@ thisStr = formula.toString(); } else { String fv = value.toString(); - + if (this.formatString != null) { try { // Try to use the value as a formattable number @@ -299,15 +342,20 @@ thisStr = "(TODO: Unexpected type: " + nextDataType + ")"; break; } - + // Output - output.cell(cellRef, thisStr); + checkForEmptyCellComments(EmptyCellCommentsCheckType.CELL); + XSSFComment comment = commentsTable != null ? commentsTable.findCellComment(cellRef) : null; + output.cell(cellRef, thisStr, comment); } else if ("f".equals(name)) { fIsOpen = false; } else if ("is".equals(name)) { isIsOpen = false; } else if ("row".equals(name)) { - output.endRow(); + checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_ROW); + output.endRow(rowNum); + } else if ("sheetData".equals(name)) { + checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA); } else if("oddHeader".equals(name) || "evenHeader".equals(name) || "firstHeader".equals(name)) { @@ -321,6 +369,75 @@ } } + private enum EmptyCellCommentsCheckType { + CELL, + END_OF_ROW, + END_OF_SHEET_DATA + } + + /** + * Do a check for, and output, comments in otherwise empty cells. + */ + private void checkForEmptyCellComments(EmptyCellCommentsCheckType type) { + if (commentCellRefs != null && !commentCellRefs.isEmpty()) { + // we've reached the end of the sheet data; output any comments we haven't yet outputted + if (type == EmptyCellCommentsCheckType.END_OF_SHEET_DATA) { + while (!commentCellRefs.isEmpty()) { + outputEmptyCellComment(commentCellRefs.remove()); + } + return; + } + + // handle any empty cells with comments in rows prior to any rows with content in them + if (this.cellRef == null) { + if (type == EmptyCellCommentsCheckType.END_OF_ROW) { + while (!commentCellRefs.isEmpty()) { + if (commentCellRefs.peek().getRow() == rowNum) { + outputEmptyCellComment(commentCellRefs.remove()); + } else { + return; + } + } + return; + } else { + throw new IllegalStateException("Cell ref should be null only if there are only empty cells in the row; rowNum: " + rowNum); + } + } + + CellReference nextCommentCellRef; + do { + CellReference cellRef = new CellReference(this.cellRef); + CellReference peekCellRef = commentCellRefs.peek(); + if (type == EmptyCellCommentsCheckType.CELL && cellRef.equals(peekCellRef)) { + // remove the comment cell ref from the list if we're about to handle it alongside the cell content + commentCellRefs.remove(); + return; + } else { + // fill in any gaps if there are empty cells with comment mixed in with non-empty cells + int comparison = cellRefComparator.compare(peekCellRef, cellRef); + if (comparison > 0 && type == EmptyCellCommentsCheckType.END_OF_ROW && peekCellRef.getRow() <= rowNum) { + nextCommentCellRef = commentCellRefs.remove(); + outputEmptyCellComment(nextCommentCellRef); + } else if (comparison < 0 && type == EmptyCellCommentsCheckType.CELL && peekCellRef.getRow() <= rowNum) { + nextCommentCellRef = commentCellRefs.remove(); + outputEmptyCellComment(nextCommentCellRef); + } else { + nextCommentCellRef = null; + } + } + } while (nextCommentCellRef != null && !commentCellRefs.isEmpty()); + } + } + + /** + * Output an empty-cell comment. + */ + private void outputEmptyCellComment(CellReference cellRef) { + String cellRefString = cellRef.formatAsString(); + XSSFComment comment = commentsTable.findCellComment(cellRefString); + output.emptyCellComment(cellRefString, comment); + } + /** * Captures characters only if a suitable element is open. * Originally was just "v"; extended for inlineStr also. @@ -347,9 +464,11 @@ /** A row with the (zero based) row number has started */ public void startRow(int rowNum); /** A row with the (zero based) row number has ended */ - public void endRow(); - /** A cell, with the given formatted value, was encountered */ - public void cell(String cellReference, String formattedValue); + public void endRow(int rowNum); + /** A cell, with the given formatted value and, possibly-null, comment, was encountered */ + public void cell(String cellReference, String formattedValue, XSSFComment comment); + /** A comment for an otherwise-empty cell was encountered */ + public void emptyCellComment(String cellReference, XSSFComment comment); /** A header or footer has been encountered */ public void headerFooter(String text, boolean isHeader, String tagName); } --- src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java (revision 1575994) +++ src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java (working copy) @@ -39,7 +39,9 @@ import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; +import org.apache.poi.xssf.model.CommentsTable; import org.apache.poi.xssf.model.StylesTable; +import org.apache.poi.xssf.usermodel.XSSFComment; import org.apache.poi.xssf.usermodel.XSSFShape; import org.apache.poi.xssf.usermodel.XSSFSimpleShape; import org.apache.xmlbeans.XmlException; @@ -60,6 +62,7 @@ private Locale locale; private boolean includeTextBoxes = true; private boolean includeSheetNames = true; + private boolean includeCellComments = false; private boolean includeHeadersFooters = true; private boolean formulasNotResults = false; @@ -112,11 +115,10 @@ } /** - * Would control the inclusion of cell comments from the document, - * if we supported it + * Should cell comments be included? Default is false */ public void setIncludeCellComments(boolean includeCellComments) { - throw new IllegalStateException("Comment extraction not supported in streaming mode, please use XSSFExcelExtractor"); + this.includeCellComments = includeCellComments; } public void setLocale(Locale locale) { @@ -159,6 +161,7 @@ public void processSheet( SheetContentsHandler sheetContentsExtractor, StylesTable styles, + CommentsTable comments, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws IOException, SAXException { @@ -176,7 +179,7 @@ SAXParser saxParser = saxFactory.newSAXParser(); XMLReader sheetParser = saxParser.getXMLReader(); ContentHandler handler = new XSSFSheetXMLHandler( - styles, strings, sheetContentsExtractor, formatter, formulasNotResults); + styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults); sheetParser.setContentHandler(handler); sheetParser.parse(sheetSource); } catch(ParserConfigurationException e) { @@ -203,7 +206,8 @@ text.append(iter.getSheetName()); text.append('\n'); } - processSheet(sheetExtractor, styles, strings, stream); + CommentsTable comments = includeCellComments ? iter.getSheetComments() : null; + processSheet(sheetExtractor, styles, comments, strings, stream); if (includeHeadersFooters) { sheetExtractor.appendHeaderText(text); } @@ -268,26 +272,40 @@ firstCellOfRow = true; } - public void endRow() { + public void endRow(int rowNum) { output.append('\n'); } - public void cell(String cellRef, String formattedValue) { + public void cell(String cellRef, String formattedValue, XSSFComment comment) { if(firstCellOfRow) { firstCellOfRow = false; } else { output.append('\t'); } - output.append(formattedValue); + if (formattedValue != null) { + output.append(formattedValue); + } + if (includeCellComments && comment != null) { + String commentText = comment.getString().getString().replace('\n', ' '); + output.append(formattedValue != null ? " Comment by " : "Comment by "); + if (commentText.startsWith(comment.getAuthor() + ": ")) { + output.append(commentText); + } else { + output.append(comment.getAuthor()).append(": ").append(commentText); + } + } } + public void emptyCellComment(String cellRef, XSSFComment comment) { + cell(cellRef, null, comment); + } + public void headerFooter(String text, boolean isHeader, String tagName) { if (headerFooterMap != null) { headerFooterMap.put(tagName, text); } } - /** * Append the text for the named header or footer if found. */ --- src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java (revision 1575994) +++ src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java (working copy) @@ -20,13 +20,13 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import junit.framework.TestCase; - import org.apache.poi.POITextExtractor; import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.extractor.ExcelExtractor; import org.apache.poi.xssf.XSSFTestDataSamples; +import junit.framework.TestCase; + /** * Tests for {@link XSSFEventBasedExcelExtractor} */ @@ -240,4 +240,68 @@ fixture.setIncludeHeadersFooters(false); assertEquals(expectedOutputWithoutHeadersAndFooters, fixture.getText()); } + + /** + * Test that XSSFEventBasedExcelExtractor outputs comments when specified. + * The output will contain two improvements over the output from + * XSSFExcelExtractor in that (1) comments from empty cells will be + * outputted, and (2) the author will not be outputted twice. + *

+ * This test will need to be modified if these improvements are ported to + * XSSFExcelExtractor. + */ + public void testCommentsComparedToNonEventBasedExtractor() + throws Exception { + + String expectedOutputWithoutComments = + "Sheet1\n" + + "\n" + + "abc\n" + + "\n" + + "123\n" + + "\n" + + "\n" + + "\n"; + + String nonEventBasedExtractorOutputWithComments = + "Sheet1\n" + + "\n" + + "abc Comment by Shaun Kalley: Shaun Kalley: Comment A2\n" + + "\n" + + "123 Comment by Shaun Kalley: Shaun Kalley: Comment B4\n" + + "\n" + + "\n" + + "\n"; + + String eventBasedExtractorOutputWithComments = + "Sheet1\n" + + "Comment by Shaun Kalley: Comment A1\tComment by Shaun Kalley: Comment B1\n" + + "abc Comment by Shaun Kalley: Comment A2\tComment by Shaun Kalley: Comment B2\n" + + "Comment by Shaun Kalley: Comment A3\tComment by Shaun Kalley: Comment B3\n" + + "Comment by Shaun Kalley: Comment A4\t123 Comment by Shaun Kalley: Comment B4\n" + + "Comment by Shaun Kalley: Comment A5\tComment by Shaun Kalley: Comment B5\n" + + "Comment by Shaun Kalley: Comment A7\tComment by Shaun Kalley: Comment B7\n" + + "Comment by Shaun Kalley: Comment A8\tComment by Shaun Kalley: Comment B8\n"; + + XSSFExcelExtractor extractor = new XSSFExcelExtractor( + XSSFTestDataSamples.openSampleWorkbook("commentTest.xlsx")); + try { + assertEquals(expectedOutputWithoutComments, extractor.getText()); + extractor.setIncludeCellComments(true); + assertEquals(nonEventBasedExtractorOutputWithComments, extractor.getText()); + } finally { + extractor.close(); + } + + XSSFEventBasedExcelExtractor fixture = + new XSSFEventBasedExcelExtractor( + XSSFTestDataSamples.openSamplePackage("commentTest.xlsx")); + try { + assertEquals(expectedOutputWithoutComments, fixture.getText()); + fixture.setIncludeCellComments(true); + assertEquals(eventBasedExtractorOutputWithComments, fixture.getText()); + } finally { + fixture.close(); + } + } }