Index: src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java =================================================================== --- src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java (revision 789239) +++ src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java (working copy) @@ -134,7 +134,7 @@ Record r = null; // Create a new RecordStream and use that - HSSFRecordStream recordStream = new HSSFRecordStream(in); + RecordFactoryInputStream recordStream = new RecordFactoryInputStream(in); // Process each record as they come in while(going) { Index: src/java/org/apache/poi/hssf/eventusermodel/HSSFRecordStream.java =================================================================== --- src/java/org/apache/poi/hssf/eventusermodel/HSSFRecordStream.java (revision 789239) +++ src/java/org/apache/poi/hssf/eventusermodel/HSSFRecordStream.java (working copy) @@ -1,234 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hssf.eventusermodel; - -import java.util.Vector; - -import org.apache.poi.hssf.record.ContinueRecord; -import org.apache.poi.hssf.record.DrawingGroupRecord; -import org.apache.poi.hssf.record.DrawingRecord; -import org.apache.poi.hssf.record.ObjRecord; -import org.apache.poi.hssf.record.Record; -import org.apache.poi.hssf.record.RecordFactory; -import org.apache.poi.hssf.record.RecordFormatException; -import org.apache.poi.hssf.record.RecordInputStream; -import org.apache.poi.hssf.record.TextObjectRecord; -import org.apache.poi.hssf.record.UnknownRecord; - -/** - * A stream based way to get at complete records, with - * as low a memory footprint as possible. - * This handles reading from a RecordInputStream, turning - * the data into full records, processing continue records - * etc. - * Most users should use {@link HSSFEventFactory} / - * {@link HSSFListener} and have new records pushed to - * them, but this does allow for a "pull" style of coding. - */ -public class HSSFRecordStream { - private RecordInputStream in; - - /** Have we run out of records on the stream? */ - private boolean hitEOS = false; - /** Have we returned all the records there are? */ - private boolean complete = false; - - /** - * Sometimes we end up with a bunch of - * records. When we do, these should - * be returned before the next normal - * record processing occurs (i.e. before - * we check for continue records and - * return rec) - */ - private Vector bonusRecords = null; - - /** - * The next record to return, which may need to have its - * continue records passed to it before we do - */ - private Record rec = null; - /** - * The most recent record that we gave to the user - */ - private Record lastRec = null; - /** - * The most recent DrawingRecord seen - */ - private DrawingRecord lastDrawingRecord = new DrawingRecord(); - - public HSSFRecordStream(RecordInputStream inp) { - this.in = inp; - } - - /** - * Returns the next (complete) record from the - * stream, or null if there are no more. - */ - public Record nextRecord() { - Record r = null; - - // Loop until we get something - while(r == null && !complete) { - // Are there any bonus records that we need to - // return? - r = getBonusRecord(); - - // If not, ask for the next real record - if(r == null) { - r = getNextRecord(); - } - } - - // All done - return r; - } - - /** - * If there are any "bonus" records, that should - * be returned before processing new ones, - * grabs the next and returns it. - * If not, returns null; - */ - private Record getBonusRecord() { - if(bonusRecords != null) { - Record r = (Record)bonusRecords.remove(0); - if(bonusRecords.size() == 0) { - bonusRecords = null; - } - return r; - } - return null; - } - - /** - * Returns the next available record, or null if - * this pass didn't return a record that's - * suitable for returning (eg was a continue record). - */ - private Record getNextRecord() { - Record toReturn = null; - - if(in.hasNextRecord()) { - // Grab our next record - in.nextRecord(); - short sid = in.getSid(); - - // - // for some reasons we have to make the workbook to be at least 4096 bytes - // but if we have such workbook we fill the end of it with zeros (many zeros) - // - // it is not good: - // if the length( all zero records ) % 4 = 1 - // e.g.: any zero record would be readed as 4 bytes at once ( 2 - id and 2 - size ). - // And the last 1 byte will be readed WRONG ( the id must be 2 bytes ) - // - // So we should better to check if the sid is zero and not to read more data - // The zero sid shows us that rest of the stream data is a fake to make workbook - // certain size - // - if ( sid == 0 ) - return null; - - - // If we had a last record, and this one - // isn't a continue record, then pass - // it on to the listener - if ((rec != null) && (sid != ContinueRecord.sid)) - { - // This last record ought to be returned - toReturn = rec; - } - - // If this record isn't a continue record, - // then build it up - if (sid != ContinueRecord.sid) - { - //System.out.println("creating "+sid); - Record[] recs = RecordFactory.createRecord(in); - - // We know that the multiple record situations - // don't contain continue records, so just - // pass those on to the listener now - if (recs.length > 1) { - bonusRecords = new Vector(recs.length-1); - for (int k = 0; k < (recs.length - 1); k++) { - bonusRecords.add(recs[k]); - } - } - - // Regardless of the number we created, always hold - // onto the last record to be processed on the next - // loop, in case it has any continue records - rec = recs[ recs.length - 1 ]; - // Don't return it just yet though, as we probably have - // a record from the last round to return - } - else { - // Normally, ContinueRecords are handled internally - // However, in a few cases, there is a gap between a record at - // its Continue, so we have to handle them specially - // This logic is much like in RecordFactory.createRecords() - Record[] recs = RecordFactory.createRecord(in); - ContinueRecord crec = (ContinueRecord)recs[0]; - if((lastRec instanceof ObjRecord) || (lastRec instanceof TextObjectRecord)) { - // You can have Obj records between a DrawingRecord - // and its continue! - lastDrawingRecord.processContinueRecord( crec.getData() ); - // Trigger them on the drawing record, now it's complete - rec = lastDrawingRecord; - } - else if((lastRec instanceof DrawingGroupRecord)) { - ((DrawingGroupRecord)lastRec).processContinueRecord(crec.getData()); - // Trigger them on the drawing record, now it's complete - rec = lastRec; - } - else { - if (rec instanceof UnknownRecord) { - ;//silently skip records we don't know about - } else { - throw new RecordFormatException("Records should handle ContinueRecord internally. Should not see this exception"); - } - } - } - - // Update our tracking of the last record - lastRec = rec; - if(rec instanceof DrawingRecord) { - lastDrawingRecord = (DrawingRecord)rec; - } - } else { - // No more records - hitEOS = true; - } - - // If we've hit the end-of-stream, then - // finish off the last record and be done - if(hitEOS) { - complete = true; - - // Return the last record if there was - // one, otherwise null - if(rec != null) { - toReturn = rec; - rec = null; - } - } - - return toReturn; - } -} \ No newline at end of file Index: src/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java =================================================================== --- src/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java (revision 789239) +++ src/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java (working copy) @@ -14,20 +14,14 @@ See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ -package org.apache.poi.hssf.eventusermodel; +package org.apache.poi.hssf.record; -import java.util.Vector; +import org.apache.poi.hssf.eventusermodel.HSSFEventFactory; +import org.apache.poi.hssf.eventusermodel.HSSFListener; -import org.apache.poi.hssf.record.ContinueRecord; -import org.apache.poi.hssf.record.DrawingGroupRecord; -import org.apache.poi.hssf.record.DrawingRecord; -import org.apache.poi.hssf.record.ObjRecord; -import org.apache.poi.hssf.record.Record; -import org.apache.poi.hssf.record.RecordFactory; -import org.apache.poi.hssf.record.RecordFormatException; -import org.apache.poi.hssf.record.RecordInputStream; -import org.apache.poi.hssf.record.TextObjectRecord; -import org.apache.poi.hssf.record.UnknownRecord; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; /** * A stream based way to get at complete records, with @@ -39,11 +33,9 @@ * {@link HSSFListener} and have new records pushed to * them, but this does allow for a "pull" style of coding. */ -public class HSSFRecordStream { - private RecordInputStream in; +public class RecordFactoryInputStream { + private final RecordInputStream recStream; - /** Have we run out of records on the stream? */ - private boolean hitEOS = false; /** Have we returned all the records there are? */ private boolean complete = false; @@ -55,27 +47,28 @@ * we check for continue records and * return rec) */ - private Vector bonusRecords = null; - - /** - * The next record to return, which may need to have its - * continue records passed to it before we do - */ - private Record rec = null; - /** + private final LinkedList bonusRecords = new LinkedList(); + + /** * The most recent record that we gave to the user */ - private Record lastRec = null; + private Record lastRecord = null; /** * The most recent DrawingRecord seen */ private DrawingRecord lastDrawingRecord = new DrawingRecord(); - - public HSSFRecordStream(RecordInputStream inp) { - this.in = inp; + + private int bofDepth=0; + + private boolean lastRecordWasEOFLevelZero = false; + + private boolean includeContinueRecords = false; + + public RecordFactoryInputStream(RecordInputStream inp) { + recStream = inp; } - /** + /** * Returns the next (complete) record from the * stream, or null if there are no more. */ @@ -105,12 +98,8 @@ * If not, returns null; */ private Record getBonusRecord() { - if(bonusRecords != null) { - Record r = (Record)bonusRecords.remove(0); - if(bonusRecords.size() == 0) { - bonusRecords = null; - } - return r; + if(!bonusRecords.isEmpty()) { + return (Record) bonusRecords.removeFirst(); } return null; } @@ -120,115 +109,123 @@ * this pass didn't return a record that's * suitable for returning (eg was a continue record). */ - private Record getNextRecord() { - Record toReturn = null; - - if(in.hasNextRecord()) { - // Grab our next record - in.nextRecord(); - short sid = in.getSid(); - - // - // for some reasons we have to make the workbook to be at least 4096 bytes - // but if we have such workbook we fill the end of it with zeros (many zeros) - // - // it is not good: - // if the length( all zero records ) % 4 = 1 - // e.g.: any zero record would be readed as 4 bytes at once ( 2 - id and 2 - size ). - // And the last 1 byte will be readed WRONG ( the id must be 2 bytes ) - // - // So we should better to check if the sid is zero and not to read more data - // The zero sid shows us that rest of the stream data is a fake to make workbook - // certain size - // - if ( sid == 0 ) - return null; + private Record getNextRecord() { + /* + * How to recognise end of stream? + * In the best case, the underlying input stream (in) ends just after the last EOF record + * Usually however, the stream is padded with an arbitrary byte count. Excel and most apps + * reliably use zeros for padding and if this were always the case, this code could just + * skip all the (zero sized) records with sid==0. However, bug 46987 shows a file with + * non-zero padding that is read OK by Excel (Excel also fixes the padding). + * + * So to properly detect the workbook end of stream, this code has to identify the last + * EOF record. This is not so easy because the worbook bof+eof pair do not bracket the + * whole stream. The worksheets follow the workbook, but it is not easy to tell how many + * sheet sub-streams should be present. Hence we are looking for an EOF record that is not + * immediately followed by a BOF record. One extra complication is that bof+eof sub- + * streams can be nested within worksheet streams and it's not clear in these cases what + * record might follow any EOF record. So we also need to keep track of the bof/eof + * nesting level. + */ + + if (recStream.hasNextRecord()) { + // Grab our next record + recStream.nextRecord(); + if (lastRecordWasEOFLevelZero && recStream.getSid() != BOFRecord.sid) { + // Normally InputStream (in) contains only zero padding after this point + complete = true; + return null; + } - // If we had a last record, and this one - // isn't a continue record, then pass - // it on to the listener - if ((rec != null) && (sid != ContinueRecord.sid)) - { - // This last record ought to be returned - toReturn = rec; - } - - // If this record isn't a continue record, - // then build it up - if (sid != ContinueRecord.sid) - { - //System.out.println("creating "+sid); - Record[] recs = RecordFactory.createRecord(in); + Record record = RecordFactory.createSingleRecord(recStream); + lastRecordWasEOFLevelZero = false; - // We know that the multiple record situations - // don't contain continue records, so just - // pass those on to the listener now - if (recs.length > 1) { - bonusRecords = new Vector(recs.length-1); - for (int k = 0; k < (recs.length - 1); k++) { - bonusRecords.add(recs[k]); - } - } - - // Regardless of the number we created, always hold - // onto the last record to be processed on the next - // loop, in case it has any continue records - rec = recs[ recs.length - 1 ]; - // Don't return it just yet though, as we probably have - // a record from the last round to return - } - else { - // Normally, ContinueRecords are handled internally - // However, in a few cases, there is a gap between a record at - // its Continue, so we have to handle them specially - // This logic is much like in RecordFactory.createRecords() - Record[] recs = RecordFactory.createRecord(in); - ContinueRecord crec = (ContinueRecord)recs[0]; - if((lastRec instanceof ObjRecord) || (lastRec instanceof TextObjectRecord)) { - // You can have Obj records between a DrawingRecord - // and its continue! - lastDrawingRecord.processContinueRecord( crec.getData() ); - // Trigger them on the drawing record, now it's complete - rec = lastDrawingRecord; - } - else if((lastRec instanceof DrawingGroupRecord)) { - ((DrawingGroupRecord)lastRec).processContinueRecord(crec.getData()); - // Trigger them on the drawing record, now it's complete - rec = lastRec; - } - else { - if (rec instanceof UnknownRecord) { - ;//silently skip records we don't know about - } else { - throw new RecordFormatException("Records should handle ContinueRecord internally. Should not see this exception"); - } - } - } + if (record instanceof BOFRecord) { + bofDepth++; + return record; + } - // Update our tracking of the last record - lastRec = rec; - if(rec instanceof DrawingRecord) { - lastDrawingRecord = (DrawingRecord)rec; - } - } else { - // No more records - hitEOS = true; - } - - // If we've hit the end-of-stream, then - // finish off the last record and be done - if(hitEOS) { - complete = true; - - // Return the last record if there was - // one, otherwise null - if(rec != null) { - toReturn = rec; - rec = null; - } - } - - return toReturn; - } + if (record instanceof EOFRecord) { + bofDepth--; + if (bofDepth<1) { + lastRecordWasEOFLevelZero = true; + } + + return record; + } + + if (record instanceof DBCellRecord) { + // Not needed by POI. Regenerated from scratch by POI when spreadsheet is written + return null; + } + + if (record instanceof RKRecord) { + return RecordFactory.convertToNumberRecord((RKRecord) record); + } + + if (record instanceof MulRKRecord) { + NumberRecord[] records = RecordFactory.convertRKRecords((MulRKRecord) record); + + List list = Arrays.asList(records); + bonusRecords.addAll(list.subList(1, list.size())); + + return records[0]; + } + + if (record.getSid() == DrawingGroupRecord.sid + && lastRecord instanceof DrawingGroupRecord) { + DrawingGroupRecord lastDGRecord = (DrawingGroupRecord) lastRecord; + lastDGRecord.join((AbstractEscherHolderRecord) record); + return null; + } else if (record.getSid() == ContinueRecord.sid) { + ContinueRecord contRec = (ContinueRecord) record; + + if (lastRecord instanceof ObjRecord || lastRecord instanceof TextObjectRecord) { + // Drawing records have a very strange continue behaviour. + //There can actually be OBJ records mixed between the continues. + lastDrawingRecord.processContinueRecord(contRec.getData() ); + //we must remember the position of the continue record. + //in the serialization procedure the original structure of records must be preserved + if (includeContinueRecords) { + return record; + } else { + return null; + } + } else if (lastRecord instanceof DrawingGroupRecord) { + ((DrawingGroupRecord)lastRecord).processContinueRecord(contRec.getData()); + return null; + } else if (lastRecord instanceof UnknownRecord) { + //Gracefully handle records that we don't know about, + //that happen to be continued + return record; + } else if (lastRecord instanceof EOFRecord) { + // This is really odd, but excel still sometimes + // outputs a file like this all the same + return record; + } else { + throw new RecordFormatException("Unhandled Continue Record"); + } + } else { + lastRecord = record; + if (record instanceof DrawingRecord) { + lastDrawingRecord = (DrawingRecord) record; + } + + return record; + } + + } else { + // No more records + complete = true; + return null; + } + } + + /** + * Return or not ContinueRecord in nextRecord + */ + public void setIncludeContinueRecords(boolean includeContinueRecords) { + this.includeContinueRecords = includeContinueRecords; + } } \ No newline at end of file Property changes on: src/java/org/apache/poi/hssf/record/RecordFactoryInputStream.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/poi/hssf/record/RecordFactory.java =================================================================== --- src/java/org/apache/poi/hssf/record/RecordFactory.java (revision 789239) +++ src/java/org/apache/poi/hssf/record/RecordFactory.java (working copy) @@ -17,22 +17,15 @@ package org.apache.poi.hssf.record; +import org.apache.poi.hssf.record.chart.*; +import org.apache.poi.hssf.record.pivottable.*; + import java.io.InputStream; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Modifier; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; -import org.apache.poi.hssf.record.chart.*; -import org.apache.poi.hssf.record.pivottable.*; - /** * Title: Record Factory

* Description: Takes a stream and outputs an array of Record objects.

@@ -259,7 +252,7 @@ return new Record[] { record, }; } - static Record createSingleRecord(RecordInputStream in) { + public static Record createSingleRecord(RecordInputStream in) { I_RecordCreator constructor = _recordCreatorsById.get(new Integer(in.getSid())); if (constructor == null) { @@ -273,7 +266,7 @@ * RK record is a slightly smaller alternative to NumberRecord * POI likes NumberRecord better */ - private static NumberRecord convertToNumberRecord(RKRecord rk) { + public static NumberRecord convertToNumberRecord(RKRecord rk) { NumberRecord num = new NumberRecord(); num.setColumn(rk.getColumn()); @@ -286,7 +279,7 @@ /** * Converts a {@link MulRKRecord} into an equivalent array of {@link NumberRecord}s */ - private static NumberRecord[] convertRKRecords(MulRKRecord mrk) { + public static NumberRecord[] convertRKRecords(MulRKRecord mrk) { NumberRecord[] mulRecs = new NumberRecord[mrk.getNumColumns()]; for (int k = 0; k < mrk.getNumColumns(); k++) { @@ -374,109 +367,17 @@ * @exception RecordFormatException on error processing the InputStream */ public static List createRecords(InputStream in) throws RecordFormatException { - List records = new ArrayList(NUM_RECORDS); - RecordInputStream recStream = new RecordInputStream(in); - DrawingRecord lastDrawingRecord = new DrawingRecord( ); - Record lastRecord = null; - /* - * How to recognise end of stream? - * In the best case, the underlying input stream (in) ends just after the last EOF record - * Usually however, the stream is padded with an arbitrary byte count. Excel and most apps - * reliably use zeros for padding and if this were always the case, this code could just - * skip all the (zero sized) records with sid==0. However, bug 46987 shows a file with - * non-zero padding that is read OK by Excel (Excel also fixes the padding). - * - * So to properly detect the workbook end of stream, this code has to identify the last - * EOF record. This is not so easy because the worbook bof+eof pair do not bracket the - * whole stream. The worksheets follow the workbook, but it is not easy to tell how many - * sheet sub-streams should be present. Hence we are looking for an EOF record that is not - * immediately followed by a BOF record. One extra complication is that bof+eof sub- - * streams can be nested within worksheet streams and it's not clear in these cases what - * record might follow any EOF record. So we also need to keep track of the bof/eof - * nesting level. - */ + RecordFactoryInputStream recStream = new RecordFactoryInputStream(new RecordInputStream(in)); + recStream.setIncludeContinueRecords(true); - int bofDepth=0; - boolean lastRecordWasEOFLevelZero = false; - while (recStream.hasNextRecord()) { - recStream.nextRecord(); - if (lastRecordWasEOFLevelZero && recStream.getSid() != BOFRecord.sid) { - // Normally InputStream (in) contains only zero padding after this point - break; - } - Record record = createSingleRecord(recStream); - lastRecordWasEOFLevelZero = false; - if (record instanceof BOFRecord) { - bofDepth++; - records.add(record); - continue; - } - if (record instanceof EOFRecord) { - bofDepth--; - records.add(record); - if (bofDepth<1) { - lastRecordWasEOFLevelZero = true; - } - continue; - } + Record record; - if (record instanceof DBCellRecord) { - // Not needed by POI. Regenerated from scratch by POI when spreadsheet is written - continue; - } - - if (record instanceof RKRecord) { - records.add(convertToNumberRecord((RKRecord) record)); - continue; - } - if (record instanceof MulRKRecord) { - addAll(records, convertRKRecords((MulRKRecord)record)); - continue; - } - - if (record.getSid() == DrawingGroupRecord.sid - && lastRecord instanceof DrawingGroupRecord) { - DrawingGroupRecord lastDGRecord = (DrawingGroupRecord) lastRecord; - lastDGRecord.join((AbstractEscherHolderRecord) record); - } else if (record.getSid() == ContinueRecord.sid) { - ContinueRecord contRec = (ContinueRecord)record; - - if (lastRecord instanceof ObjRecord || lastRecord instanceof TextObjectRecord) { - // Drawing records have a very strange continue behaviour. - //There can actually be OBJ records mixed between the continues. - lastDrawingRecord.processContinueRecord(contRec.getData() ); - //we must remember the position of the continue record. - //in the serialization procedure the original structure of records must be preserved - records.add(record); - } else if (lastRecord instanceof DrawingGroupRecord) { - ((DrawingGroupRecord)lastRecord).processContinueRecord(contRec.getData()); - } else if (lastRecord instanceof UnknownRecord) { - //Gracefully handle records that we don't know about, - //that happen to be continued - records.add(record); - } else if (lastRecord instanceof EOFRecord) { - // This is really odd, but excel still sometimes - // outputs a file like this all the same - records.add(record); - } else { - throw new RecordFormatException("Unhandled Continue Record"); - } - } else { - lastRecord = record; - if (record instanceof DrawingRecord) { - lastDrawingRecord = (DrawingRecord) record; - } - records.add(record); - } + while ((record = recStream.nextRecord())!=null) { + records.add(record); } + return records; } - - private static void addAll(List destList, Record[] srcRecs) { - for (int i = 0; i < srcRecs.length; i++) { - destList.add(srcRecs[i]); - } - } }