ASF Bugzilla – Attachment 34731 Details for
Bug 60707
[PATCH] Reading very large excel files using StAX made easier - StreamingWorkbook
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Patch that contains the new classes anf files introduced for enabling this functionality.
streamed.patch (text/plain), 42.20 KB, created by
Renjith R
on 2017-02-08 04:41:43 UTC
(
hide
)
Description:
Patch that contains the new classes anf files introduced for enabling this functionality.
Filename:
MIME Type:
Creator:
Renjith R
Created:
2017-02-08 04:41:43 UTC
Size:
42.20 KB
patch
obsolete
>diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedCell.java b/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedCell.java >new file mode 100644 >index 0000000..47cc296 >--- /dev/null >+++ b/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedCell.java >@@ -0,0 +1,67 @@ >+/* ==================================================================== >+ Licensed to the Apache Software Foundation (ASF) under one or more >+ contributor license agreements. See the NOTICE file distributed with >+ this work for additional information regarding copyright ownership. >+ The ASF licenses this file to You under the Apache License, Version 2.0 >+ (the "License"); you may not use this file except in compliance with >+ the License. You may obtain a copy of the License at >+ >+ http://www.apache.org/licenses/LICENSE-2.0 >+ >+ Unless required by applicable law or agreed to in writing, software >+ distributed under the License is distributed on an "AS IS" BASIS, >+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >+ See the License for the specific language governing permissions and >+ limitations under the License. >+==================================================================== */ >+package org.apache.poi.xssf.streaming.reader; >+/** >+ * Represents cell in a row >+ * Value of cell is represented as a string. >+ * >+ */ >+public class StreamedCell { >+ private String value; >+ private int cellNumber; >+ >+ public StreamedCell(){ >+ >+ } >+ >+ /** >+ * <pre> >+ * Return cell value >+ * </pre> >+ * Return the value of a cell in String format. >+ * Value will be same as how it is represented in excel. >+ * @return String >+ */ >+ public String getValue() { >+ return value; >+ } >+ >+ public void setValue(String value) { >+ this.value = value; >+ } >+ >+ public String toString(){ >+ return value; >+ } >+ >+ public int getCellNumber() { >+ return cellNumber; >+ } >+ >+ public void setCellNumber(int cellNumber) { >+ this.cellNumber = cellNumber; >+ } >+ >+ /*@Override >+ protected void finalize() throws Throwable { >+ super.finalize(); >+ value = null; >+ }*/ >+ >+ >+ >+} >diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedRow.java b/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedRow.java >new file mode 100644 >index 0000000..024479d >--- /dev/null >+++ b/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedRow.java >@@ -0,0 +1,94 @@ >+/* ==================================================================== >+ Licensed to the Apache Software Foundation (ASF) under one or more >+ contributor license agreements. See the NOTICE file distributed with >+ this work for additional information regarding copyright ownership. >+ The ASF licenses this file to You under the Apache License, Version 2.0 >+ (the "License"); you may not use this file except in compliance with >+ the License. You may obtain a copy of the License at >+ >+ http://www.apache.org/licenses/LICENSE-2.0 >+ >+ Unless required by applicable law or agreed to in writing, software >+ distributed under the License is distributed on an "AS IS" BASIS, >+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >+ See the License for the specific language governing permissions and >+ limitations under the License. >+==================================================================== */ >+package org.apache.poi.xssf.streaming.reader; >+ >+import java.util.ArrayList; >+import java.util.Iterator; >+import java.util.List; >+ >+ >+/** >+ * Represents an excel row >+ * >+ */ >+public class StreamedRow { >+ private List<StreamedCell> cells; >+ private int rowNumber; >+ >+ public StreamedRow(int rowNumber) { >+ this.rowNumber = rowNumber; >+ } >+ >+ /** >+ * <pre> >+ * Used to get cells of a Row >+ * </pre> >+ * @return Iterator<Cell> >+ */ >+ public Iterator<StreamedCell> getCellIterator() { >+ if (cells == null) { >+ cells = new ArrayList<StreamedCell>(); >+ } >+ return cells.iterator(); >+ } >+ >+ /** >+ * <pre> >+ * Returns the row number >+ * </pre> >+ * @return int >+ */ >+ public int getRowNumber() { >+ return rowNumber; >+ } >+ >+ public void setRowNumber(int rowNumber) { >+ this.rowNumber = rowNumber; >+ } >+ >+ public String toString() { >+ StringBuffer sb = new StringBuffer(250); >+ sb.append("Row Number:").append(rowNumber); >+ sb.append(" --> "); >+ if (cells != null) { >+ for (StreamedCell cell : cells) { >+ sb.append(cell.toString()); >+ sb.append(" | "); >+ } >+ } >+ >+ return sb.toString(); >+ } >+ >+/* @Override >+ protected void finalize() throws Throwable { >+ super.finalize(); >+ if (cells != null) { >+ cells.clear(); >+ cells = null; >+ } >+ }*/ >+ >+ public List<StreamedCell> getCells() { >+ if (cells == null) { >+ cells = new ArrayList<StreamedCell>(); >+ } >+ return cells; >+ } >+ >+ >+} >diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedSheet.java b/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedSheet.java >new file mode 100644 >index 0000000..7125868 >--- /dev/null >+++ b/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedSheet.java >@@ -0,0 +1,209 @@ >+/* ==================================================================== >+ Licensed to the Apache Software Foundation (ASF) under one or more >+ contributor license agreements. See the NOTICE file distributed with >+ this work for additional information regarding copyright ownership. >+ The ASF licenses this file to You under the Apache License, Version 2.0 >+ (the "License"); you may not use this file except in compliance with >+ the License. You may obtain a copy of the License at >+ >+ http://www.apache.org/licenses/LICENSE-2.0 >+ >+ Unless required by applicable law or agreed to in writing, software >+ distributed under the License is distributed on an "AS IS" BASIS, >+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >+ See the License for the specific language governing permissions and >+ limitations under the License. >+==================================================================== */ >+package org.apache.poi.xssf.streaming.reader; >+ >+import java.util.ArrayList; >+import java.util.Iterator; >+import java.util.List; >+ >+import javax.xml.stream.XMLEventReader; >+import javax.xml.stream.XMLStreamException; >+import javax.xml.stream.events.XMLEvent; >+ >+import org.apache.poi.xssf.model.SharedStringsTable; >+import org.apache.poi.xssf.model.StylesTable; >+ >+/** >+ * Represents an excel sheet. >+ * >+ */ >+public class StreamedSheet { >+ private XMLEventReader xmlParser; >+ private SharedStringsTable sharedStringsTable; >+ private StylesTable stylesTable; >+ private int numberOfColumns; >+ private int sheetNumber; >+ private StreamedSheetEventHandler eventHandler = null; >+ >+ /** >+ * <pre> >+ * Fetch all rows from the excel. >+ * </pre> >+ * >+ * This method consumes only less memory, but it is >+ * advisable to use it only for small excel files, since >+ * it will fetch all rows in a single call. >+ * >+ * @return Iterator<Row> >+ * @throws XMLStreamException >+ */ >+ public Iterator<StreamedRow> getAllRows() throws XMLStreamException{ >+ return getAllRows(this, sharedStringsTable, stylesTable); >+ } >+ >+ /** >+ * <pre> >+ * Used to fetch N number of rows. >+ * </pre> >+ * >+ * Recommended method to reduce memory utilization. >+ * It allows to read big excel files in batch. >+ * This gives control to the user to process the records already fetched, >+ * before fetching next set of records. >+ * After reading N records, invoke the same method with number of rows to be >+ * fetched to get the next set of rows. >+ * <br> >+ * >+ * *********************Usage**************************** >+ * Iterator<StreamedRow> rows = sheet.getNRows(1); >+ * <br> >+ * while(rows.hasNext()){ //read the first 1 row >+ * <br> >+ * StreamedRow row = rows.next(); >+ * <br> >+ * } >+ * <br> >+ * rows = sheet.getNRows(10); >+ * <br> >+ * while(rows.hasNext()){ //read the next 10 rows >+ * <br> >+ * StreamedRow row = rows.next(); >+ * <br> >+ * } >+ * >+ * @param numberOfRows >+ * @return Iterator<Row> >+ * @throws XMLStreamException >+ */ >+ public Iterator<StreamedRow> getNRows(int numberOfRows) throws XMLStreamException{ >+ return getNRows(this, eventHandler, numberOfRows); >+ } >+ >+ public boolean hasMoreRows(){ >+ return xmlParser.hasNext(); >+ } >+ >+ >+ public XMLEventReader getXmlParser() { >+ return xmlParser; >+ } >+ >+ public void setXmlParser(XMLEventReader xmlParser) { >+ this.xmlParser = xmlParser; >+ } >+ >+ public void setSharedStringsTable(SharedStringsTable sharedStringsTable) { >+ this.sharedStringsTable = sharedStringsTable; >+ } >+ >+ public void setStylesTable(StylesTable stylesTable) { >+ this.stylesTable = stylesTable; >+ } >+ >+ public void createEventHandler(){ >+ eventHandler = new StreamedSheetEventHandler(sharedStringsTable, stylesTable); >+ } >+ >+ >+ public int getNumberOfColumns() { >+ return numberOfColumns; >+ } >+ >+ >+ public void setNumberOfColumns(int numberOfColumns) { >+ this.numberOfColumns = numberOfColumns; >+ } >+ >+ >+ public int getSheetNumber() { >+ return sheetNumber; >+ } >+ >+ >+ public void setSheetNumber(int sheetNumber) { >+ this.sheetNumber = sheetNumber; >+ } >+ >+/* @Override >+ protected void finalize() throws Throwable { >+ super.finalize(); >+ >+ xmlParser = null; >+ sharedStringsTable = null; >+ stylesTable = null; >+ eventHandler = null; >+ >+ }*/ >+ >+ /** >+ * reads all data from sheet >+ * @param xmlParser >+ * @param sharedStringsTable >+ * @param stylesTable >+ * @return >+ * @throws XMLStreamException >+ */ >+ private Iterator<StreamedRow> getAllRows(StreamedSheet sheet, >+ SharedStringsTable sharedStringsTable, StylesTable stylesTable) throws XMLStreamException { >+ List<StreamedRow> dataList = new ArrayList<StreamedRow>(); >+ StreamedSheetEventHandler eventHandler = new StreamedSheetEventHandler(sharedStringsTable, stylesTable); >+ while(sheet.getXmlParser().hasNext()){ >+ XMLEvent event = sheet.getXmlParser().nextEvent(); >+ eventHandler.handleEvent(event); >+ if(eventHandler.isEndOfRow()){ >+ dataList.add(eventHandler.getRow()); >+ eventHandler.setEndOfRow(false); >+ } >+ sheet.setNumberOfColumns(eventHandler.getNumberOfColumns()); >+ } >+ >+ return dataList.iterator(); >+ } >+ >+ /** >+ * Reads N Rows from excel >+ * @param sheet >+ * @param sharedStringsTable >+ * @param stylesTable >+ * @param numberOFRows >+ * @return >+ * @throws XMLStreamException >+ */ >+ private Iterator<StreamedRow> getNRows(StreamedSheet sheet, StreamedSheetEventHandler eventHandler, int numberOfRows) throws XMLStreamException { >+ List<StreamedRow> dataList = new ArrayList<StreamedRow>(); >+ while(sheet.getXmlParser().hasNext()){ >+ XMLEvent event = sheet.getXmlParser().nextEvent(); >+ eventHandler.handleEvent(event); >+ if(eventHandler.isEndOfRow()){ >+ dataList.add(eventHandler.getRow()); >+ eventHandler.setEndOfRow(false); >+ } >+ sheet.setNumberOfColumns(eventHandler.getNumberOfColumns()); >+ if(dataList.size() == numberOfRows){ >+ break; >+ } >+ } >+ >+ return dataList.iterator(); >+ } >+ >+ >+ >+ >+ >+ >+} >diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedSheetEventHandler.java b/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedSheetEventHandler.java >new file mode 100644 >index 0000000..0b491e7 >--- /dev/null >+++ b/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedSheetEventHandler.java >@@ -0,0 +1,419 @@ >+/* ==================================================================== >+ Licensed to the Apache Software Foundation (ASF) under one or more >+ contributor license agreements. See the NOTICE file distributed with >+ this work for additional information regarding copyright ownership. >+ The ASF licenses this file to You under the Apache License, Version 2.0 >+ (the "License"); you may not use this file except in compliance with >+ the License. You may obtain a copy of the License at >+ >+ http://www.apache.org/licenses/LICENSE-2.0 >+ >+ Unless required by applicable law or agreed to in writing, software >+ distributed under the License is distributed on an "AS IS" BASIS, >+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >+ See the License for the specific language governing permissions and >+ limitations under the License. >+==================================================================== */ >+package org.apache.poi.xssf.streaming.reader; >+ >+import java.util.ArrayList; >+import java.util.Arrays; >+import java.util.HashMap; >+import java.util.Iterator; >+import java.util.List; >+import java.util.Map; >+import java.util.Properties; >+ >+import javax.xml.namespace.QName; >+import javax.xml.stream.events.Attribute; >+import javax.xml.stream.events.Characters; >+import javax.xml.stream.events.EndElement; >+import javax.xml.stream.events.StartElement; >+import javax.xml.stream.events.XMLEvent; >+ >+import org.apache.poi.ss.usermodel.DataFormatter; >+import org.apache.poi.xssf.model.SharedStringsTable; >+import org.apache.poi.xssf.model.StylesTable; >+import org.apache.poi.xssf.usermodel.XSSFCellStyle; >+import org.apache.poi.xssf.usermodel.XSSFRichTextString; >+ >+/** >+ * Handler class that handles the event generated by StAX parser >+ * >+ */ >+public class StreamedSheetEventHandler { >+ >+ private boolean isEndOfRow = false; >+ private boolean isRow = false; >+ //private boolean mergeCell = false; >+ >+ private String formatString = null; >+ private int formatIndex = 0; >+ private String cellContentType; >+ private int rowNumber = 0; >+ private int numberOfColumns = 0; >+ private List<String> cellPositions = null; >+ private String currentCellPosition = null; >+ private int previousCellIndex = -1; >+ //private List<String> mergeList = null; //merge cell not supported right now. Complexity involved if multiple rows are merged. >+ >+ private SharedStringsTable sharedStringsTable; >+ private StylesTable stylesTable; >+ >+ private DataFormatter formatter = new DataFormatter(); >+ private Properties properties = new Properties(); >+ private Map<String, String> attributes = new HashMap<String, String>(); >+ private StringBuffer sb = new StringBuffer(250); >+ private List<String> dateFormatsAry = new ArrayList<String>(); >+ >+ private StreamedRow row; >+ private StreamedCell cell; >+ >+ private final String dateFormat = "m/d/yy|DD/MM/YY|[$-F800]dddd\\,\\ mmmm\\ dd\\,\\ yyyy|m/d;@|m/d/yy;@|mm/dd/yy;@|" >+ + "[$-409]d\\-mmm;@|[$-409]d\\-mmm\\-yy;@|[$-409]dd\\-mmm\\-yy;@|[$-409]mmm\\-yy;@|[$-409]mmmm\\-yy;@|" >+ + "[$-409]mmmm\\ d\\,\\ yyyy;@|[$-409]m/d/yy\\ h:mm\\ AM/PM;@|m/d/yy\\ h:mm;@|[$-409]mmmmm;@|" >+ + "[$-409]mmmmm\\-yy;@|m/d/yyyy;@|[$-409]d\\-mmm\\-yyyy;@|[$-F400]h:mm:ss\\ AM/PM|" >+ + "h:mm;@|[$-409]h:mm\\ AM/PM;@|h:mm:ss;@|[$-409]h:mm:ss\\ AM/PM;@|mm:ss.0;@|[h]:mm:ss;@|" >+ + "[$-409]m/d/yy\\ h:mm\\ AM/PM;@|DD/MM/YYYY"; >+ >+ public static final String DEFAULT_CELL_TYPE = "DEFAULT"; >+ public static final String STRING_CELL_TYPE = "STRING"; >+ public static final String BOOLEAN_CELL_TYPE = "BOOLEAN"; >+ public static final String DATE_CELL_TYPE = "DATE"; >+ public static final String FORMULA_CELL_TYPE = "FORMULA"; >+ public static final String NUMERIC_CELL_TYPE = "NUMERIC"; >+ >+ >+ /** >+ * Constructor >+ * @param sharedStringsTable >+ * @param stylesTable >+ */ >+ public StreamedSheetEventHandler(SharedStringsTable sharedStringsTable, StylesTable stylesTable){ >+ this.sharedStringsTable = sharedStringsTable; >+ this.stylesTable = stylesTable; >+ /*try{ >+ properties.load(getClass().getClassLoader().getResourceAsStream("excelreader.properties")); >+ }catch(Exception e){ >+ e.printStackTrace(); >+ }*/ >+ >+ /*if(properties.get("date.formatStrings") != null){ >+ dateFormat = properties.get("date.formatStrings").toString(); >+ }*/ >+ >+ dateFormatsAry.addAll(Arrays.asList(dateFormat.split("\\|"))); >+ >+ } >+ >+ /** >+ * Handler method for handling the event generated for StAX parser. >+ * For each XML event(Start Tag, End Tag & Characters), StAX parser will >+ * generate event. This event will be handled by this method. >+ * Attributes of each element are put in a map with key as attribute >+ * and value as attribute value. >+ * @param event >+ */ >+ public void handleEvent(XMLEvent event){ >+ switch(event.getEventType()){ >+ case XMLEvent.START_ELEMENT: >+ StartElement startElemet = event.asStartElement(); >+ QName startName = startElemet.getName(); >+ String name = startName.getLocalPart(); >+ Iterator<Attribute> iterator = startElemet.getAttributes(); >+ while(iterator.hasNext()){ >+ Attribute attribute = iterator.next(); >+ attributes.put(attribute.getName().getLocalPart(), attribute.getValue()); >+ } >+ if((name.trim().equals("row")) && (attributes.size() > 0)){ >+ isRow = true; >+ } >+ >+ /*else if(name.trim().equals("mergeCell")){ >+ mergeCell = true; >+ } >+ >+ if(isRow || mergeCell){ >+ startElement(name); >+ }*/ >+ >+ if(isRow){ >+ startElement(name); >+ } >+ break; >+ case XMLEvent.CHARACTERS: >+ Characters chars = event.asCharacters(); >+ if(isRow){ >+ characters(chars.getData()); >+ } >+ break; >+ case XMLEvent.END_ELEMENT: >+ EndElement endElement = event.asEndElement(); >+ QName endName = endElement.getName(); >+ String endTageName = endName.getLocalPart(); >+ endElement(endTageName); >+ attributes.clear(); >+ break; >+ } >+ } >+ >+ /** >+ * All the start element events are handled here. >+ * It identifies the things like rowNumber, cell content type etc >+ * @param name >+ */ >+ private void startElement(String name) { >+ if(name.equals("row")) { >+ previousCellIndex = -1; >+ isEndOfRow = false; >+ rowNumber = (Integer.parseInt(attributes.get("r")) - 1); >+ row = new StreamedRow(rowNumber); >+ if(rowNumber == 0){ >+ cellPositions = new ArrayList<String>(); >+ } >+ }else if(name.equals("c")){ >+ String cellPosition = attributes.get("r").toUpperCase(); >+ cellPosition = cellPosition.replaceAll(String.valueOf(rowNumber+1), "").trim(); >+ currentCellPosition = cellPosition; >+ if(rowNumber == 0){ >+ numberOfColumns ++; >+ cellPositions.add(cellPosition); >+ } >+ >+ cellContentType = getCellContentType(); >+ }else if(name.equals("f")){ >+ cellContentType = FORMULA_CELL_TYPE; >+ } >+ >+ /*else if(name.equals("mergeCell")){ >+ if(mergeList == null){ >+ mergeList = >+ } >+ }*/ >+ } >+ >+ >+ /** >+ * This method is invoked on end element event. >+ * Mostly used for setting the default values before handling >+ * next start element. >+ * All data captured in characters event are stored here. >+ * Data will be stored only on reaching the end element. >+ * @param name >+ */ >+ private void endElement(String name) { >+ if(name.equals("c")){ >+ if(isRow){ >+ storeData(sb.toString()); >+ } >+ cellContentType = DEFAULT_CELL_TYPE; >+ formatString = null; >+ formatIndex = 0; >+ currentCellPosition = null; >+ previousCellIndex++; >+ sb.delete(0, sb.length()); >+ }else if(name.equals("row")){ >+ isEndOfRow = true; >+ isRow = false; >+ }else if(name.equals("f")){ >+ cellContentType = DEFAULT_CELL_TYPE; >+ } >+ >+ /*else if(name.equals("mergeCell")){ >+ mergeCell = false; >+ }*/ >+ } >+ >+ /** >+ * Captures the data >+ * @param data >+ */ >+ private void characters(String data) { >+ if(!(cellContentType.equals(FORMULA_CELL_TYPE))){ >+ sb.append(data); >+ } >+ } >+ >+ /** >+ * Creates cells and store data >+ * @param data >+ */ >+ private void storeData(String data){ >+ if (!(cellContentType.equals(FORMULA_CELL_TYPE))) { >+ if(cellContentType.equals(STRING_CELL_TYPE)){ >+ data = getStringData(data); >+ }else if(cellContentType.equals(DATE_CELL_TYPE)){ >+ data = getDateAsString(data); >+ }else if(cellContentType.equals(BOOLEAN_CELL_TYPE)){ >+ if(data.trim().equals("1")){ >+ data = "TRUE"; >+ }else if(data.trim().equals("0")){ >+ data = "FALSE"; >+ }else if(data.trim().equals("")){ >+ data = null; >+ }else{ >+ try{ >+ int value = Integer.parseInt(data); >+ if(value > 0){ >+ data = "TRUE"; >+ } >+ }catch(Exception e){ >+ data = null; >+ } >+ } >+ } >+ >+ if(cellPositions != null){ >+ fillEmptyCells(); >+ } >+ >+ cell = new StreamedCell(); >+ cell.setValue(data); >+ cell.setCellNumber(previousCellIndex+1); >+ row.getCells().add(cell); >+ >+ } >+ } >+ >+ >+ /** >+ * Identifies the cell content type >+ * @return >+ */ >+ private String getCellContentType(){ >+ String cellConentType = DEFAULT_CELL_TYPE; >+ String cellType = attributes.get("t"); >+ if((null != cellType) && (cellType.equals("s"))){ >+ cellConentType = STRING_CELL_TYPE; >+ }else if((null != cellType) && (equals("b"))){ >+ cellConentType = BOOLEAN_CELL_TYPE; >+ }else{ >+ >+ if(checkForDate()){ >+ cellConentType = DATE_CELL_TYPE; >+ }else if(checkForBoolean()){ >+ cellConentType = BOOLEAN_CELL_TYPE; >+ } >+ } >+ return cellConentType; >+ } >+ >+ >+ >+ /** >+ * Identifies if the given cell is a boolean or not >+ * @return >+ */ >+ private boolean checkForBoolean(){ >+ String cellStyleType = attributes.get("s"); >+ if(cellStyleType != null){ >+ int styleIndex = Integer.parseInt(cellStyleType); >+ XSSFCellStyle style = stylesTable.getStyleAt(styleIndex); >+ formatIndex = style.getDataFormat(); >+ formatString = style.getDataFormatString(); >+ if(formatString.contains("TRUE")){ >+ return true; >+ }else{ >+ return false; >+ } >+ }else{ >+ return false; >+ } >+ } >+ >+ /** >+ * identifies if the given cell is a Date cell or not >+ * @return >+ */ >+ private boolean checkForDate(){ >+ boolean isDate = false; >+ String cellStyleType = attributes.get("s"); >+ if(cellStyleType != null){ >+ int styleIndex = Integer.parseInt(cellStyleType); >+ XSSFCellStyle style = stylesTable.getStyleAt(styleIndex); >+ formatIndex = style.getDataFormat(); >+ formatString = style.getDataFormatString(); >+ if(dateFormatsAry.contains(formatString.trim())){ >+ isDate = true; >+ } >+ } >+ >+ return isDate; >+ } >+ >+ >+ /** >+ * fetches the String data from SharedStringsTable.xml >+ * @param data >+ * @return >+ */ >+ private String getStringData(String data){ >+ int index = Integer.parseInt(data); >+ return new XSSFRichTextString(sharedStringsTable.getEntryAt(index)).toString().trim(); >+ } >+ >+ /** >+ * check the styles applied to the cell and fetches the exact content of date cell >+ * @param data >+ * @return >+ */ >+ private String getDateAsString(String data) { >+ if ((data != null) && (!(data.trim().equals("")))) { >+ /*return formatter.formatRawCellContents(Double.parseDouble(data), >+ formatIndex, formatString);*/ >+ >+ /* >+ * All dates are formatted with 'DD/MM/YYYY', this is to avoid confusion between >+ * 10/12/2017 & 10/12/1917. Otherwise both will be read as 10/12/17 if the format >+ * is DD/MM/YY >+ */ >+ return formatter.formatRawCellContents(Double.parseDouble(data), >+ formatIndex, "DD/MM/YYYY"); >+ } else { >+ return null; >+ } >+ } >+ >+ >+ /** >+ * Used to fill empty cells as null >+ */ >+ private void fillEmptyCells() { >+ int currentCellIndex = cellPositions.indexOf(currentCellPosition); >+ if ((currentCellIndex - previousCellIndex) > 1) { >+ for (int i = (previousCellIndex + 1); i < currentCellIndex; i++) { >+ cell = new StreamedCell(); >+ cell.setValue(null); >+ row.getCells().add(cell); >+ } >+ >+ previousCellIndex = currentCellIndex; >+ } >+ } >+ >+ public boolean isEndOfRow() { >+ return isEndOfRow; >+ } >+ >+ public void setEndOfRow(boolean isEndOfRow) { >+ this.isEndOfRow = isEndOfRow; >+ } >+ >+ public int getRowNumber() { >+ return rowNumber; >+ } >+ >+ public int getNumberOfColumns() { >+ return numberOfColumns; >+ } >+ >+ public List<String> getCellPositions() { >+ return cellPositions; >+ } >+ >+ public StreamedRow getRow() { >+ return row; >+ } >+ >+ >+} >diff --git a/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedWorkbook.java b/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedWorkbook.java >new file mode 100644 >index 0000000..6216c8e >--- /dev/null >+++ b/src/ooxml/java/org/apache/poi/xssf/streaming/reader/StreamedWorkbook.java >@@ -0,0 +1,134 @@ >+/* ==================================================================== >+ Licensed to the Apache Software Foundation (ASF) under one or more >+ contributor license agreements. See the NOTICE file distributed with >+ this work for additional information regarding copyright ownership. >+ The ASF licenses this file to You under the Apache License, Version 2.0 >+ (the "License"); you may not use this file except in compliance with >+ the License. You may obtain a copy of the License at >+ >+ http://www.apache.org/licenses/LICENSE-2.0 >+ >+ Unless required by applicable law or agreed to in writing, software >+ distributed under the License is distributed on an "AS IS" BASIS, >+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >+ See the License for the specific language governing permissions and >+ limitations under the License. >+==================================================================== */ >+package org.apache.poi.xssf.streaming.reader; >+ >+import java.io.File; >+import java.io.InputStream; >+import java.util.ArrayList; >+import java.util.Iterator; >+import java.util.List; >+ >+import javax.xml.stream.XMLInputFactory; >+ >+import org.apache.poi.openxml4j.opc.OPCPackage; >+import org.apache.poi.xssf.eventusermodel.XSSFReader; >+import org.apache.poi.xssf.model.SharedStringsTable; >+import org.apache.poi.xssf.model.StylesTable; >+ >+/** >+ * Represents the excel workbook >+ * >+ */ >+public class StreamedWorkbook { >+ >+ private File inputFile; >+ >+ /** >+ * <pre> >+ * Accepts the file path and return an instance of StreamedWorkBook >+ *</pre> >+ * @param filePath >+ */ >+ public StreamedWorkbook(String filePath){ >+ if((filePath != null) && !(filePath.trim().isEmpty())){ >+ inputFile = new File(filePath); >+ } >+ } >+ >+ /** >+ * <pre> >+ * Fetch all sheets from given excel file >+ * </pre> >+ * @return >+ * @throws Exception >+ */ >+ public Iterator<StreamedSheet> getSheetIterator() throws Exception{ >+ if((inputFile != null) && (inputFile.exists())){ >+ return getAllSheets(inputFile); >+ }else{ >+ throw new Exception("No sheets found"); >+ } >+ } >+ >+ >+ /** >+ * Returns the list of sheets for the given excel file >+ * @param file >+ * @return >+ * @throws Exception >+ */ >+ private Iterator<StreamedSheet> getAllSheets(File file) throws Exception{ >+ XSSFReader reader = null; >+ Iterator<InputStream> sheetIterator = null; >+ >+ SharedStringsTable sharedStringsTable = null; >+ StylesTable stylesTable = null; >+ >+ if(file != null){ >+ reader = getExcelReader(file); >+ if(reader != null){ >+ sheetIterator = reader.getSheetsData(); >+ sharedStringsTable = reader.getSharedStringsTable(); >+ stylesTable = reader.getStylesTable(); >+ } >+ } >+ >+ return getStreamedSheetIterator(sheetIterator, sharedStringsTable, stylesTable); >+ } >+ >+ private Iterator<StreamedSheet> getStreamedSheetIterator(Iterator<InputStream> sheetIterator, SharedStringsTable sharedStringsTable, >+ StylesTable stylesTable) throws Exception{ >+ List<StreamedSheet> sheetList = null; >+ XMLInputFactory factory = null; >+ >+ if(sheetIterator != null){ >+ factory = XMLInputFactory.newInstance(); >+ sheetList = new ArrayList<StreamedSheet>(); >+ int sheetNumber = 0; >+ while(sheetIterator.hasNext()){ >+ sheetNumber++; >+ InputStream sheetInputStream = sheetIterator.next(); >+ StreamedSheet sheet = new StreamedSheet(); >+ sheet.setXmlParser(factory.createXMLEventReader(sheetInputStream)); >+ sheet.setSharedStringsTable(sharedStringsTable); >+ sheet.setStylesTable(stylesTable); >+ sheet.setSheetNumber(sheetNumber); >+ sheet.createEventHandler(); >+ sheetList.add(sheet); >+ } >+ } >+ >+ return sheetList.iterator(); >+ >+ } >+ >+ >+ >+ /** >+ * Receives the excel file and returns the file excel file reader >+ * @param inputStream >+ * @return >+ * @throws Exception >+ */ >+ private XSSFReader getExcelReader(File file) throws Exception{ >+ XSSFReader reader = null; >+ OPCPackage pkg = null; >+ pkg = OPCPackage.open(file); >+ reader = new XSSFReader(pkg); >+ return reader; >+ } >+} >diff --git a/src/ooxml/testcases/org/apache/poi/xssf/streaming/reader/TestStreamedWorkbook.java b/src/ooxml/testcases/org/apache/poi/xssf/streaming/reader/TestStreamedWorkbook.java >new file mode 100644 >index 0000000..d56a253 >--- /dev/null >+++ b/src/ooxml/testcases/org/apache/poi/xssf/streaming/reader/TestStreamedWorkbook.java >@@ -0,0 +1,334 @@ >+/* >+ * ==================================================================== >+ * Licensed to the Apache Software Foundation (ASF) under one or more >+ * contributor license agreements. See the NOTICE file distributed with >+ * this work for additional information regarding copyright ownership. >+ * The ASF licenses this file to You under the Apache License, Version 2.0 >+ * (the "License"); you may not use this file except in compliance with >+ * the License. You may obtain a copy of the License at >+ * >+ * http://www.apache.org/licenses/LICENSE-2.0 >+ * >+ * Unless required by applicable law or agreed to in writing, software >+ * distributed under the License is distributed on an "AS IS" BASIS, >+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >+ * See the License for the specific language governing permissions and >+ * limitations under the License. >+ * ==================================================================== >+ */ >+package org.apache.poi.xssf.streaming.reader; >+ >+import static org.junit.Assert.assertEquals; >+import static org.junit.Assert.fail; >+ >+import java.io.File; >+import java.util.Iterator; >+ >+import org.apache.poi.POIDataSamples; >+import org.junit.Test; >+ >+ >+public class TestStreamedWorkbook{ >+ >+ @Test >+ public void testInvalidFilePath(){ >+ StreamedWorkbook workbook = new StreamedWorkbook(null); >+ try { >+ workbook.getSheetIterator(); >+ fail("expected exception"); >+ } catch (Exception e) { >+ assertEquals("No sheets found", e.getMessage()); >+ } >+ >+ } >+ >+ @Test >+ public void testInvalidFile(){ >+ POIDataSamples files = POIDataSamples.getSpreadSheetInstance(); >+ File f= files.getFile("InvalidFile.txt"); >+ StreamedWorkbook workbook = new StreamedWorkbook(f.getAbsolutePath()); >+ >+ try { >+ workbook.getSheetIterator(); >+ } catch (Exception e) { >+ assertEquals("No valid entries or contents found, this is not a valid OOXML (Office Open XML) file", e.getMessage()); >+ } >+ } >+ >+ >+ @Test >+ public void testSheetCount(){ >+ POIDataSamples files = POIDataSamples.getSpreadSheetInstance(); >+ File f= files.getFile("SpreadSheetSample04022017.xlsx"); >+ StreamedWorkbook workbook = new StreamedWorkbook(f.getAbsolutePath()); >+ int count = 0; >+ >+ try { >+ Iterator<StreamedSheet> sheetIterator = workbook.getSheetIterator(); >+ >+ while(sheetIterator.hasNext()){ >+ sheetIterator.next(); >+ count++; >+ } >+ >+ assertEquals(2, count); >+ } catch (Exception e) { >+ } >+ >+ } >+ >+ @Test >+ public void testTotalRowCount() throws Exception{ >+ POIDataSamples files = POIDataSamples.getSpreadSheetInstance(); >+ File f= files.getFile("SpreadSheetSample04022017.xlsx"); >+ StreamedWorkbook workbook = new StreamedWorkbook(f.getAbsolutePath()); >+ int sheetCount = 0; >+ >+ long count = 0; >+ >+ Iterator<StreamedSheet> sheetIterator = workbook.getSheetIterator(); >+ >+ while((sheetIterator.hasNext()) && (sheetCount == 0)){ >+ StreamedSheet sheet = sheetIterator.next(); >+ >+ Iterator<StreamedRow> rows = sheet.getAllRows(); >+ >+ while(rows.hasNext()){ >+ rows.next(); >+ count++; >+ } >+ >+ assertEquals(6, count); >+ sheetCount++; >+ } >+ } >+ >+ @Test >+ public void testNRowCount() throws Exception{ >+ POIDataSamples files = POIDataSamples.getSpreadSheetInstance(); >+ File f= files.getFile("SpreadSheetSample04022017.xlsx"); >+ StreamedWorkbook workbook = new StreamedWorkbook(f.getAbsolutePath()); >+ >+ long count = 0; >+ int sheetCount = 0; >+ >+ Iterator<StreamedSheet> sheetIterator = workbook.getSheetIterator(); >+ >+ while((sheetIterator.hasNext()) && (sheetCount == 0)){ >+ StreamedSheet sheet = sheetIterator.next(); >+ >+ Iterator<StreamedRow> rows = sheet.getNRows(4); >+ >+ while(rows.hasNext()){ >+ rows.next(); >+ count++; >+ } >+ >+ assertEquals(4, count); >+ sheetCount++; >+ } >+ } >+ >+ @Test >+ public void testCellCount() throws Exception{ >+ POIDataSamples files = POIDataSamples.getSpreadSheetInstance(); >+ File f= files.getFile("SpreadSheetSample04022017.xlsx"); >+ StreamedWorkbook workbook = new StreamedWorkbook(f.getAbsolutePath()); >+ int sheetCount = 0; >+ >+ Iterator<StreamedSheet> sheetIterator = workbook.getSheetIterator(); >+ >+ while((sheetIterator.hasNext()) && (sheetCount == 0)){ >+ StreamedSheet sheet = sheetIterator.next(); >+ >+ Iterator<StreamedRow> rows = sheet.getAllRows(); >+ >+ while(rows.hasNext()){ >+ long count = 0; >+ StreamedRow row = rows.next(); >+ >+ Iterator<StreamedCell> cellIterator = row.getCellIterator(); >+ >+ while(cellIterator.hasNext()){ >+ cellIterator.next(); >+ count++; >+ } >+ >+ assertEquals(7, count); >+ } >+ >+ sheetCount++; >+ } >+ } >+ >+ >+ @Test >+ public void testStartingRowAndCellNumber() throws Exception{ >+ POIDataSamples files = POIDataSamples.getSpreadSheetInstance(); >+ File f= files.getFile("SpreadSheetSample04022017.xlsx"); >+ StreamedWorkbook workbook = new StreamedWorkbook(f.getAbsolutePath()); >+ int sheetCount = 0; >+ >+ >+ Iterator<StreamedSheet> sheetIterator = workbook.getSheetIterator(); >+ >+ while((sheetIterator.hasNext()) && (sheetCount == 0)){ >+ int rowCount = 0; >+ int cellCount = 0; >+ StreamedSheet sheet = sheetIterator.next(); >+ >+ Iterator<StreamedRow> rows = sheet.getAllRows(); >+ >+ while(rows.hasNext()){ >+ StreamedRow row = rows.next(); >+ if(rowCount == 0){ >+ assertEquals(rowCount, row.getRowNumber()); >+ } >+ rowCount++; >+ >+ >+ Iterator<StreamedCell> cellIterator = row.getCellIterator(); >+ >+ while(cellIterator.hasNext()){ >+ StreamedCell cell = cellIterator.next(); >+ if(cellCount == 0){ >+ assertEquals(cellCount, cell.getCellNumber()); >+ } >+ cellCount++; >+ >+ } >+ >+ } >+ >+ sheetCount++; >+ } >+ } >+ >+ @Test >+ public void testSheetData() throws Exception{ >+ POIDataSamples files = POIDataSamples.getSpreadSheetInstance(); >+ File f= files.getFile("SpreadSheetSample04022017.xlsx"); >+ StreamedWorkbook workbook = new StreamedWorkbook(f.getAbsolutePath()); >+ int sheetCount = 0; >+ >+ >+ Iterator<StreamedSheet> sheetIterator = workbook.getSheetIterator(); >+ >+ while((sheetIterator.hasNext()) && (sheetCount == 0)){ >+ int rowCount = 0; >+ >+ StreamedSheet sheet = sheetIterator.next(); >+ >+ Iterator<StreamedRow> rows = sheet.getAllRows(); >+ >+ while(rows.hasNext()){ >+ >+ StreamedRow row = rows.next(); >+ >+ int cellCount = 0; >+ >+ Iterator<StreamedCell> cellIterator = row.getCellIterator(); >+ >+ while(cellIterator.hasNext()){ >+ StreamedCell cell = cellIterator.next(); >+ if(rowCount == 1){ >+ >+ if(cellCount == 0){ >+ assertEquals("1", cell.getValue()); >+ }else if(cellCount == 1){ >+ assertEquals("Item1", cell.getValue()); >+ }else if(cellCount == 2){ >+ assertEquals("201", cell.getValue()); >+ }else if(cellCount == 3){ >+ assertEquals("100.11", cell.getValue()); >+ }else if(cellCount == 4){ >+ assertEquals("TRUE", cell.getValue()); >+ }else if(cellCount == 5){ >+ assertEquals("04/02/1917", cell.getValue()); >+ }else if(cellCount == 6){ >+ assertEquals("90.11", cell.getValue()); >+ } >+ }else if(rowCount == 3){ >+ if(cellCount == 4){ >+ assertEquals(null, cell.getValue()); >+ } >+ } >+ >+ cellCount++; >+ >+ } >+ >+ >+ rowCount++; >+ >+ } >+ >+ sheetCount++; >+ } >+ } >+ >+ @Test >+ public void testBatchData() throws Exception{ >+ >+ POIDataSamples files = POIDataSamples.getSpreadSheetInstance(); >+ File f= files.getFile("SpreadSheetSample04022017.xlsx"); >+ StreamedWorkbook workbook = new StreamedWorkbook(f.getAbsolutePath()); >+ int sheetCount = 0; >+ >+ >+ Iterator<StreamedSheet> sheetIterator = workbook.getSheetIterator(); >+ >+ while(sheetIterator.hasNext()){ >+ >+ StreamedSheet sheet = sheetIterator.next(); >+ >+ if(sheetCount == 1){ >+ Iterator<StreamedRow> rows = sheet.getNRows(1); >+ >+ while(rows.hasNext()){ >+ StreamedRow row = rows.next(); >+ assertEquals("Row Number:0 --> Item | item description | Strore | Price | Promotion applied | MFD | Discount rate |", row.toString().trim()); >+ } >+ >+ >+ rows = sheet.getNRows(4); >+ >+ while(rows.hasNext()){ >+ StreamedRow row = rows.next(); >+ assertEquals("Row Number:1 --> 1 | Item1 | 201 | 100.11 | TRUE | 04/02/1917 | 90.11 |", row.toString().trim()); >+ row = rows.next(); >+ assertEquals("Row Number:2 --> 2 | Item2 | 202 | 101.11 | TRUE | 05/02/1917 | 91.11 |", row.toString().trim()); >+ row = rows.next(); >+ assertEquals("Row Number:3 --> 3 | Item3 | 203 | 102.11 | TRUE | 06/02/1917 | 92.11 |", row.toString().trim()); >+ row = rows.next(); >+ assertEquals("Row Number:4 --> 4 | Item4 | 204 | 103.11 | TRUE | 07/02/1917 | 93.11 |", row.toString().trim()); >+ } >+ >+ >+ >+ rows = sheet.getNRows(4); >+ >+ while(rows.hasNext()){ >+ StreamedRow row = rows.next(); >+ assertEquals("Row Number:5 --> 5 | Item5 | 205 | 104.11 | TRUE | 08/02/1917 | 94.11 |", row.toString().trim()); >+ row = rows.next(); >+ assertEquals("Row Number:6 --> 6 | Item6 | 206 | 105.11 | TRUE | 09/02/1917 | 95.11 |", row.toString().trim()); >+ row = rows.next(); >+ assertEquals("Row Number:7 --> 7 | Item7 | 207 | 106.11 | TRUE | 10/02/1917 | 96.11 |", row.toString().trim()); >+ row = rows.next(); >+ assertEquals("Row Number:8 --> 8 | Item8 | 208 | 107.11 | FALSE | 11/02/1917 | 97.11 |", row.toString().trim()); >+ } >+ >+ } >+ >+ >+ sheetCount++; >+ >+ } >+ >+ } >+ >+ >+ >+ >+} >diff --git a/test-data/spreadsheet/InvalidFile.txt b/test-data/spreadsheet/InvalidFile.txt >new file mode 100644 >index 0000000..e69de29 >diff --git a/test-data/spreadsheet/SpreadSheetSample04022017.xlsx b/test-data/spreadsheet/SpreadSheetSample04022017.xlsx >new file mode 100644 >index 0000000..e14788d >Binary files /dev/null and b/test-data/spreadsheet/SpreadSheetSample04022017.xlsx differ
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 60707
: 34731 |
34757
|
34943
|
34945