--- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -32,12 +32,11 @@ import org.apache.poi.hwpf.model.ComplexFileTable; import org.apache.poi.hwpf.model.DocumentProperties; import org.apache.poi.hwpf.model.EscherRecordHolder; import org.apache.poi.hwpf.model.FSPATable; +import org.apache.poi.hwpf.model.FieldsTables; import org.apache.poi.hwpf.model.FontTable; -import org.apache.poi.hwpf.model.GenericPropertyNode; import org.apache.poi.hwpf.model.ListTables; import org.apache.poi.hwpf.model.PAPBinTable; import org.apache.poi.hwpf.model.PicturesTable; -import org.apache.poi.hwpf.model.PlexOfCps; import org.apache.poi.hwpf.model.PropertyNode; import org.apache.poi.hwpf.model.RevisionMarkAuthorTable; import org.apache.poi.hwpf.model.SavedByTable; @@ -100,6 +99,9 @@ public final class HWPFDocument extends HWPFDocumentCore /** Holds Office Art objects */ protected ShapesTable _officeArts; + + /** Holds the fields PLCFs */ + protected FieldsTables _fieldsTables; protected HWPFDocument() { @@ -250,13 +252,7 @@ public final class HWPFDocument extends HWPFDocumentCore _rmat = new RevisionMarkAuthorTable(_tableStream, rmarkOffset, rmarkLength); } - PlexOfCps plc = new PlexOfCps(_tableStream, _fib.getFcPlcffldMom(), _fib.getLcbPlcffldMom(), 2); - for (int x = 0; x < plc.length(); x++) - { - GenericPropertyNode node = plc.getProperty(x); - byte[] fld = node.getBytes(); - int breakpoint = 0; - } + _fieldsTables = new FieldsTables(_tableStream, _fib); } public TextPieceTable getTextTable() @@ -402,6 +398,13 @@ public final class HWPFDocument extends HWPFDocumentCore } /** + * @return FieldsTables object, that is able to extract fields descriptors from this document + */ + public FieldsTables getFieldsTables() { + return _fieldsTables; + } + + /** * Writes out the word file that is represented by an instance of this class. * * @param out The OutputStream to write to. --- a/src/scratchpad/src/org/apache/poi/hwpf/model/FieldDescriptor.java +++ a/src/scratchpad/src/org/apache/poi/hwpf/model/FieldDescriptor.java @@ -17,23 +17,146 @@ package org.apache.poi.hwpf.model; -import org.apache.poi.util.BitField; -import org.apache.poi.util.BitFieldFactory; +import java.text.MessageFormat; +import org.apache.poi.util.HexDump; + +/** + * Class for the FLD structure. + * + * @author Cedric Bosdonnat + * + */ public final class FieldDescriptor { + public static final int FIELD_BEGIN_MARK = 0x13; + public static final int FIELD_SEPARATOR_MARK = 0x14; + public static final int FIELD_END_MARK = 0x15; + + private static final short BOUNDARY_MASK = 0x1F; + private static final short TYPE_MASK = 0xFF; + private static final short ZOMBIE_EMBED_MASK = 0x02; + private static final short RESULT_DIRTY_MASK = 0x04; + private static final short RESULT_EDITED_MASK = 0x08; + private static final short LOCKED_MASK = 0x10; + private static final short PRIVATE_RESULT_MASK = 0x20; + private static final short NESTED_MASK = 0x40; + private static final short HAS_SEP_MASK = 0x80; + byte _fieldBoundaryType; byte _info; - private final static BitField fZombieEmbed = BitFieldFactory.getInstance(0x02); - private final static BitField fResultDiry = BitFieldFactory.getInstance(0x04); - private final static BitField fResultEdited = BitFieldFactory.getInstance(0x08); - private final static BitField fLocked = BitFieldFactory.getInstance(0x10); - private final static BitField fPrivateResult = BitFieldFactory.getInstance(0x20); - private final static BitField fNested = BitFieldFactory.getInstance(0x40); - private final static BitField fHasSep = BitFieldFactory.getInstance(0x80); + private int fieldType; + private boolean zombieEmbed; + private boolean resultDirty; + private boolean resultEdited; + private boolean locked; + private boolean privateResult; + private boolean nested; + private boolean hasSep; + + public FieldDescriptor(byte[] data) + { + _fieldBoundaryType = (byte) (data[0] & BOUNDARY_MASK); + _info = data[1]; + + if (_fieldBoundaryType == FIELD_BEGIN_MARK) + { + fieldType = _info & TYPE_MASK; + } else if (_fieldBoundaryType == FIELD_END_MARK) + { + zombieEmbed = ((_info & ZOMBIE_EMBED_MASK) == 1); + resultDirty = ((_info & RESULT_DIRTY_MASK) == 1); + resultEdited = ((_info & RESULT_EDITED_MASK) == 1); + locked = ((_info & LOCKED_MASK) == 1); + privateResult = ((_info & PRIVATE_RESULT_MASK) == 1); + nested = ((_info & NESTED_MASK) == 1); + hasSep = ((_info & HAS_SEP_MASK) == 1); + } + } + + public int getBoundaryType() + { + return _fieldBoundaryType; + } + + public int getFieldType() + { + if (_fieldBoundaryType != FIELD_BEGIN_MARK) + throw new UnsupportedOperationException( + "This field is only defined for begin marks."); + return fieldType; + } - public FieldDescriptor() + public boolean isZombieEmbed() { + if (_fieldBoundaryType != FIELD_END_MARK) + throw new UnsupportedOperationException( + "This field is only defined for end marks."); + return zombieEmbed; + } + + public boolean isResultDirty() + { + if (_fieldBoundaryType != FIELD_END_MARK) + throw new UnsupportedOperationException( + "This field is only defined for end marks."); + return resultDirty; + } + + public boolean isResultEdited() + { + if (_fieldBoundaryType != FIELD_END_MARK) + throw new UnsupportedOperationException( + "This field is only defined for end marks."); + return resultEdited; + } + + public boolean isLocked() + { + if (_fieldBoundaryType != FIELD_END_MARK) + throw new UnsupportedOperationException( + "This field is only defined for end marks."); + return locked; + } + + public boolean isPrivateResult() + { + if (_fieldBoundaryType != FIELD_END_MARK) + throw new UnsupportedOperationException( + "This field is only defined for end marks."); + return privateResult; + } + + public boolean isNested() + { + if (_fieldBoundaryType != FIELD_END_MARK) + throw new UnsupportedOperationException( + "This field is only defined for end marks."); + return nested; + } + + public boolean isHasSep() + { + if (_fieldBoundaryType != FIELD_END_MARK) + throw new UnsupportedOperationException( + "This field is only defined for end marks."); + return hasSep; + } + + public String toString() + { + String details = new String(); + if (_fieldBoundaryType == FIELD_BEGIN_MARK) + { + details = " type: " + fieldType; + } + else if (_fieldBoundaryType == FIELD_END_MARK) + { + details = " flags: 0x" + HexDump.toHex(_info); + } + + return MessageFormat.format("FLD - 0x{0}{1}", HexDump + .toHex((byte) _fieldBoundaryType), details); } } --- a/src/scratchpad/src/org/apache/poi/hwpf/model/FieldsTables.java +++ a/src/scratchpad/src/org/apache/poi/hwpf/model/FieldsTables.java @@ -0,0 +1,114 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hwpf.model; + +import java.util.ArrayList; +import java.util.HashMap; + + +/** + * This class provides access to all the fields Plex. + * + * @author Cedric Bosdonnat + * + */ +public class FieldsTables +{ + public static final int PLCFFLDATN = 0; + public static final int PLCFFLDEDN = 1; + public static final int PLCFFLDFTN = 2; + public static final int PLCFFLDHDR = 3; + public static final int PLCFFLDHDRTXBX = 4; + public static final int PLCFFLDMOM = 5; + public static final int PLCFFLDTXBX = 6; + + // The size in bytes of the FLD data structure + private static final int FLD_SIZE = 2; + + private HashMap> _tables; + + public FieldsTables(byte[] tableStream, FileInformationBlock fib) + { + _tables = new HashMap>(); + + for (int i = PLCFFLDATN; i <= PLCFFLDTXBX; i++ ) + { + _tables.put(i, readPLCF(tableStream, fib, i)); + } + } + + public ArrayList getFieldsPLCF( int type ) + { + return _tables.get(type); + } + + private ArrayList readPLCF(byte[] tableStream, FileInformationBlock fib, int type) + { + int start = 0; + int length = 0; + + switch (type) + { + case PLCFFLDATN: + start = fib.getFcPlcffldAtn(); + length = fib.getLcbPlcffldAtn(); + break; + case PLCFFLDEDN: + start = fib.getFcPlcffldEdn(); + length = fib.getLcbPlcffldEdn(); + break; + case PLCFFLDFTN: + start = fib.getFcPlcffldFtn(); + length = fib.getLcbPlcffldFtn(); + break; + case PLCFFLDHDR: + start = fib.getFcPlcffldHdr(); + length = fib.getLcbPlcffldHdr(); + break; + case PLCFFLDHDRTXBX: + start = fib.getFcPlcffldHdrtxbx(); + length = fib.getLcbPlcffldHdrtxbx(); + break; + case PLCFFLDMOM: + start = fib.getFcPlcffldMom(); + length = fib.getLcbPlcffldMom(); + break; + case PLCFFLDTXBX: + start = fib.getFcPlcffldTxbx(); + length = fib.getLcbPlcffldTxbx(); + default: + break; + } + + ArrayList fields = new ArrayList(); + + if (start > 0 && length > 0) + { + PlexOfCps plcf = new PlexOfCps(tableStream, start, length, FLD_SIZE); + fields.ensureCapacity(plcf.length()); + + for ( int i = 0; i < plcf.length(); i ++ ) { + GenericPropertyNode propNode = plcf.getProperty( i ); + PlexOfField plex = new PlexOfField( propNode.getStart(), propNode.getEnd(), propNode.getBytes() ); + fields.add( plex ); + } + } + + return fields; + } +} --- a/src/scratchpad/src/org/apache/poi/hwpf/model/FileInformationBlock.java +++ a/src/scratchpad/src/org/apache/poi/hwpf/model/FileInformationBlock.java @@ -66,7 +66,13 @@ public final class FileInformationBlock extends FIBAbstractType fieldSet.add(Integer.valueOf(FIBFieldHandler.PLCFSED)); fieldSet.add(Integer.valueOf(FIBFieldHandler.PLCFLST)); fieldSet.add(Integer.valueOf(FIBFieldHandler.PLFLFO)); + fieldSet.add(Integer.valueOf(FIBFieldHandler.PLCFFLDATN)); + fieldSet.add(Integer.valueOf(FIBFieldHandler.PLCFFLDEDN)); + fieldSet.add(Integer.valueOf(FIBFieldHandler.PLCFFLDFTN)); + fieldSet.add(Integer.valueOf(FIBFieldHandler.PLCFFLDHDR)); + fieldSet.add(Integer.valueOf(FIBFieldHandler.PLCFFLDHDRTXBX)); fieldSet.add(Integer.valueOf(FIBFieldHandler.PLCFFLDMOM)); + fieldSet.add(Integer.valueOf(FIBFieldHandler.PLCFFLDTXBX)); fieldSet.add(Integer.valueOf(FIBFieldHandler.STTBFFFN)); fieldSet.add(Integer.valueOf(FIBFieldHandler.STTBFRMARK)); fieldSet.add(Integer.valueOf(FIBFieldHandler.STTBSAVEDBY)); @@ -456,6 +462,56 @@ public final class FileInformationBlock extends FIBAbstractType _fieldHandler.clearFields(); } + public int getFcPlcffldAtn() + { + return _fieldHandler.getFieldOffset(FIBFieldHandler.PLCFFLDATN); + } + + public int getLcbPlcffldAtn() + { + return _fieldHandler.getFieldSize(FIBFieldHandler.PLCFFLDATN); + } + + public int getFcPlcffldEdn() + { + return _fieldHandler.getFieldOffset(FIBFieldHandler.PLCFFLDEDN); + } + + public int getLcbPlcffldEdn() + { + return _fieldHandler.getFieldSize(FIBFieldHandler.PLCFFLDEDN); + } + + public int getFcPlcffldFtn() + { + return _fieldHandler.getFieldOffset(FIBFieldHandler.PLCFFLDFTN); + } + + public int getLcbPlcffldFtn() + { + return _fieldHandler.getFieldSize(FIBFieldHandler.PLCFFLDFTN); + } + + public int getFcPlcffldHdr() + { + return _fieldHandler.getFieldOffset(FIBFieldHandler.PLCFFLDHDR); + } + + public int getLcbPlcffldHdr() + { + return _fieldHandler.getFieldSize(FIBFieldHandler.PLCFFLDHDR); + } + + public int getFcPlcffldHdrtxbx() + { + return _fieldHandler.getFieldOffset(FIBFieldHandler.PLCFFLDHDRTXBX); + } + + public int getLcbPlcffldHdrtxbx() + { + return _fieldHandler.getFieldSize(FIBFieldHandler.PLCFFLDHDRTXBX); + } + public int getFcPlcffldMom() { return _fieldHandler.getFieldOffset(FIBFieldHandler.PLCFFLDMOM); @@ -465,6 +521,16 @@ public final class FileInformationBlock extends FIBAbstractType { return _fieldHandler.getFieldSize(FIBFieldHandler.PLCFFLDMOM); } + + public int getFcPlcffldTxbx() + { + return _fieldHandler.getFieldOffset(FIBFieldHandler.PLCFFLDTXBX); + } + + public int getLcbPlcffldTxbx() + { + return _fieldHandler.getFieldSize(FIBFieldHandler.PLCFFLDTXBX); + } public int getFcPlcspaMom() { --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PlexOfField.java +++ a/src/scratchpad/src/org/apache/poi/hwpf/model/PlexOfField.java @@ -0,0 +1,58 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hwpf.model; + +import java.text.MessageFormat; + +/** + * Structure describing the Plex for fields (contained plclfd* in the spec). + * + * @author Cedric Bosdonnat + * + */ +public class PlexOfField +{ + private int fcStart; + private int fcEnd; + private FieldDescriptor fld; + + public PlexOfField( int fcStart, int fcEnd, byte[] data ) { + this.fcStart = fcStart; + this.fcEnd = fcEnd; + + fld = new FieldDescriptor( data ); + } + + public int getFcStart() { + return fcStart; + } + + public int getFcEnd() { + return fcEnd; + } + + public FieldDescriptor getFld() { + return fld; + } + + public String toString() { + return MessageFormat.format( "[{0}, {1}) - {2}", + fcStart, fcEnd, fld.toString() ); + + } +} --- a/src/scratchpad/testcases/org/apache/poi/hwpf/AllHWPFTests.java +++ a/src/scratchpad/testcases/org/apache/poi/hwpf/AllHWPFTests.java @@ -69,6 +69,7 @@ public final class AllHWPFTests { suite.addTestSuite(TestSectionTable.class); suite.addTestSuite(TestStyleSheet.class); suite.addTestSuite(TestTextPieceTable.class); + suite.addTestSuite(TestFieldsTables.class); suite.addTestSuite(TestBug46610.class); suite.addTestSuite(TestHeaderStories.class); --- a/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFDocFixture.java +++ a/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFDocFixture.java @@ -25,13 +25,16 @@ import org.apache.poi.POIDataSamples; public final class HWPFDocFixture { + public static final String DEFAULT_TEST_FILE = "test.doc"; + public byte[] _tableStream; public byte[] _mainStream; public FileInformationBlock _fib; + private String _testFile; - public HWPFDocFixture(Object obj) + public HWPFDocFixture(Object obj, String testFile) { - + _testFile = testFile; } public void setUp() @@ -39,7 +42,7 @@ public final class HWPFDocFixture try { POIFSFileSystem filesystem = new POIFSFileSystem( - POIDataSamples.getDocumentInstance().openResourceAsStream("test.doc")); + POIDataSamples.getDocumentInstance().openResourceAsStream(_testFile)); DocumentEntry documentProps = (DocumentEntry) filesystem.getRoot().getEntry("WordDocument"); --- a/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestCase.java +++ a/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestCase.java @@ -32,11 +32,16 @@ public abstract class HWPFTestCase extends TestCase { protected void setUp() throws Exception { super.setUp(); /** @todo verify the constructors */ - _hWPFDocFixture = new HWPFDocFixture(this); + _hWPFDocFixture = new HWPFDocFixture(this, getTestFile()); _hWPFDocFixture.setUp(); } + protected String getTestFile() + { + return HWPFDocFixture.DEFAULT_TEST_FILE; + } + protected void tearDown() throws Exception { if (_hWPFDocFixture != null) { _hWPFDocFixture.tearDown(); --- a/src/scratchpad/testcases/org/apache/poi/hwpf/TestFieldsTables.java +++ a/src/scratchpad/testcases/org/apache/poi/hwpf/TestFieldsTables.java @@ -0,0 +1,89 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hwpf; + +import java.util.ArrayList; + +import org.apache.poi.hwpf.model.FieldsTables; +import org.apache.poi.hwpf.model.FileInformationBlock; +import org.apache.poi.hwpf.model.PlexOfField; + +/** + * Test case for the fields tables, this test is based on the test-fields.doc file + * instead of the usual test.doc. + * + * @author Cedric Bosdonnat + * + */ +public class TestFieldsTables extends HWPFTestCase +{ + private static final int ALL_TYPES[] = { + FieldsTables.PLCFFLDATN, + FieldsTables.PLCFFLDEDN, + FieldsTables.PLCFFLDFTN, + FieldsTables.PLCFFLDHDR, + FieldsTables.PLCFFLDHDRTXBX, + FieldsTables.PLCFFLDMOM, + FieldsTables.PLCFFLDTXBX + }; + + private static final String EXPECTED[] = { + "[19, 43) - FLD - 0x13 type: 31\n[43, 54) - FLD - 0x14\n[54, 59) - FLD - 0x15 flags: 0x81\n", + "[31, 59) - FLD - 0x13 type: 69\n[59, 61) - FLD - 0x14\n[61, 66) - FLD - 0x15 flags: 0x80\n", + "[23, 49) - FLD - 0x13 type: 17\n[49, 64) - FLD - 0x14\n[64, 69) - FLD - 0x15 flags: 0x80\n", + "[18, 42) - FLD - 0x13 type: 33\n[42, 44) - FLD - 0x14\n[44, 47) - FLD - 0x15 flags: 0x81\n" + + "[47, 75) - FLD - 0x13 type: 29\n[75, 85) - FLD - 0x14\n[85, 91) - FLD - 0x15 flags: 0x81\n", + "[30, 54) - FLD - 0x13 type: 32\n[54, 62) - FLD - 0x14\n[62, 68) - FLD - 0x15 flags: 0x81\n", + "[1, 31) - FLD - 0x13 type: 21\n[31, 51) - FLD - 0x14\n[51, 541) - FLD - 0x15 flags: 0x81\n", + "[19, 47) - FLD - 0x13 type: 25\n[47, 49) - FLD - 0x14\n[49, 55) - FLD - 0x15 flags: 0x81\n" + }; + + public TestFieldsTables() + { + } + + protected String getTestFile() + { + return "test-fields.doc"; + } + + public void testReadFields() + { + FileInformationBlock fib = _hWPFDocFixture._fib; + byte[] tableStream = _hWPFDocFixture._tableStream; + + FieldsTables fieldsTables = new FieldsTables(tableStream, fib); + + for (int i = 0; i < ALL_TYPES.length; i++) + { + ArrayList fieldsPlexes = fieldsTables.getFieldsPLCF( ALL_TYPES[i] ); + String result = dumpPlexes(fieldsPlexes); + assertEquals(EXPECTED[i], result); + } + } + + private String dumpPlexes(ArrayList fieldsPlexes) + { + String dump = new String(); + for ( int i=0; i