View | Details | Raw Unified | Return to bug 51351
Collapse All | Expand All

(-)src/org/apache/poi/hwpf/extractor/AbstractToFoExtractor.java (+206 lines)
Line 0 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import org.w3c.dom.Document;
22
import org.w3c.dom.Element;
23
import org.w3c.dom.Text;
24
25
public abstract class AbstractToFoExtractor
26
{
27
28
    private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
29
30
    protected final Document document;
31
    protected final Element layoutMasterSet;
32
    protected final Element root;
33
34
    public AbstractToFoExtractor( Document document )
35
    {
36
        this.document = document;
37
38
        root = document.createElementNS( NS_XSLFO, "fo:root" );
39
        document.appendChild( root );
40
41
        layoutMasterSet = document.createElementNS( NS_XSLFO,
42
                "fo:layout-master-set" );
43
        root.appendChild( layoutMasterSet );
44
    }
45
46
    protected Element addFlowToPageSequence( final Element pageSequence,
47
            String flowName )
48
    {
49
        final Element flow = document.createElementNS( NS_XSLFO, "fo:flow" );
50
        flow.setAttribute( "flow-name", flowName );
51
        pageSequence.appendChild( flow );
52
53
        return flow;
54
    }
55
56
    protected Element addListItem( Element listBlock )
57
    {
58
        Element result = createListItem();
59
        listBlock.appendChild( result );
60
        return result;
61
    }
62
63
    protected Element addListItemBody( Element listItem )
64
    {
65
        Element result = createListItemBody();
66
        listItem.appendChild( result );
67
        return result;
68
    }
69
70
    protected Element addListItemLabel( Element listItem, String text )
71
    {
72
        Element result = createListItemLabel( text );
73
        listItem.appendChild( result );
74
        return result;
75
    }
76
77
    protected Element addPageSequence( String pageMaster )
78
    {
79
        final Element pageSequence = document.createElementNS( NS_XSLFO,
80
                "fo:page-sequence" );
81
        pageSequence.setAttribute( "master-reference", pageMaster );
82
        root.appendChild( pageSequence );
83
        return pageSequence;
84
    }
85
86
    protected Element addRegionBody( Element pageMaster )
87
    {
88
        final Element regionBody = document.createElementNS( NS_XSLFO,
89
                "fo:region-body" );
90
        pageMaster.appendChild( regionBody );
91
92
        return regionBody;
93
    }
94
95
    protected Element addSimplePageMaster( String masterName )
96
    {
97
        final Element simplePageMaster = document.createElementNS( NS_XSLFO,
98
                "fo:simple-page-master" );
99
        simplePageMaster.setAttribute( "master-name", masterName );
100
        layoutMasterSet.appendChild( simplePageMaster );
101
102
        return simplePageMaster;
103
    }
104
105
    protected Element addTable( Element flow )
106
    {
107
        final Element table = document.createElementNS( NS_XSLFO, "fo:table" );
108
        flow.appendChild( table );
109
        return table;
110
    }
111
112
    protected Element createBasicLinkExternal( String externalDestination )
113
    {
114
        final Element basicLink = document.createElementNS( NS_XSLFO,
115
                "fo:basic-link" );
116
        basicLink.setAttribute( "external-destination", externalDestination );
117
        return basicLink;
118
    }
119
120
    protected Element createBasicLinkInternal( String internalDestination )
121
    {
122
        final Element basicLink = document.createElementNS( NS_XSLFO,
123
                "fo:basic-link" );
124
        basicLink.setAttribute( "internal-destination", internalDestination );
125
        return basicLink;
126
    }
127
128
    protected Element createBlock()
129
    {
130
        return document.createElementNS( NS_XSLFO, "fo:block" );
131
    }
132
133
    protected Element createExternalGraphic( String source )
134
    {
135
        Element result = document.createElementNS( NS_XSLFO,
136
                "fo:external-graphic" );
137
        result.setAttribute( "src", "url('" + source + "')" );
138
        return result;
139
    }
140
141
    protected Element createInline()
142
    {
143
        return document.createElementNS( NS_XSLFO, "fo:inline" );
144
    }
145
146
    protected Element createLeader()
147
    {
148
        return document.createElementNS( NS_XSLFO, "fo:leader" );
149
    }
150
151
    protected Element createListBlock()
152
    {
153
        return document.createElementNS( NS_XSLFO, "fo:list-block" );
154
    }
155
156
    protected Element createListItem()
157
    {
158
        return document.createElementNS( NS_XSLFO, "fo:list-item" );
159
    }
160
161
    protected Element createListItemBody()
162
    {
163
        return document.createElementNS( NS_XSLFO, "fo:list-item-body" );
164
    }
165
166
    protected Element createListItemLabel( String text )
167
    {
168
        Element result = document.createElementNS( NS_XSLFO,
169
                "fo:list-item-label" );
170
        Element block = createBlock();
171
        block.appendChild( document.createTextNode( text ) );
172
        result.appendChild( block );
173
        return result;
174
    }
175
176
    protected Element createTableBody()
177
    {
178
        return document.createElementNS( NS_XSLFO, "fo:table-body" );
179
    }
180
181
    protected Element createTableCell()
182
    {
183
        return document.createElementNS( NS_XSLFO, "fo:table-cell" );
184
    }
185
186
    protected Element createTableHeader()
187
    {
188
        return document.createElementNS( NS_XSLFO, "fo:table-header" );
189
    }
190
191
    protected Element createTableRow()
192
    {
193
        return document.createElementNS( NS_XSLFO, "fo:table-row" );
194
    }
195
196
    protected Text createText( String data )
197
    {
198
        return document.createTextNode( data );
199
    }
200
201
    public Document getDocument()
202
    {
203
        return document;
204
    }
205
206
}
(-)src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java (-459 / +532 lines)
Lines 16-22 Link Here
16
 *    limitations under the License.
16
 *    limitations under the License.
17
 * ====================================================================
17
 * ====================================================================
18
 */
18
 */
19
20
package org.apache.poi.hwpf.extractor;
19
package org.apache.poi.hwpf.extractor;
21
20
22
import java.io.File;
21
import java.io.File;
Lines 25-30 Link Here
25
import java.io.IOException;
24
import java.io.IOException;
26
import java.util.HashMap;
25
import java.util.HashMap;
27
import java.util.Map;
26
import java.util.Map;
27
import java.util.Stack;
28
import java.util.regex.Matcher;
29
import java.util.regex.Pattern;
28
30
29
import javax.xml.parsers.DocumentBuilderFactory;
31
import javax.xml.parsers.DocumentBuilderFactory;
30
import javax.xml.transform.OutputKeys;
32
import javax.xml.transform.OutputKeys;
Lines 46-51 Link Here
46
import org.apache.poi.hwpf.usermodel.TableCell;
48
import org.apache.poi.hwpf.usermodel.TableCell;
47
import org.apache.poi.hwpf.usermodel.TableIterator;
49
import org.apache.poi.hwpf.usermodel.TableIterator;
48
import org.apache.poi.hwpf.usermodel.TableRow;
50
import org.apache.poi.hwpf.usermodel.TableRow;
51
import org.apache.poi.util.POILogFactory;
52
import org.apache.poi.util.POILogger;
49
import org.w3c.dom.Document;
53
import org.w3c.dom.Document;
50
import org.w3c.dom.Element;
54
import org.w3c.dom.Element;
51
import org.w3c.dom.Text;
55
import org.w3c.dom.Text;
Lines 55-61 Link Here
55
/**
59
/**
56
 * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
60
 * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
57
 */
61
 */
58
public class WordToFoExtractor {
62
public class WordToFoExtractor extends AbstractToFoExtractor
63
{
64
65
    /**
66
     * Holds properties values, applied to current <tt>fo:block</tt> element.
67
     * Those properties shall not be doubled in children <tt>fo:inline</tt>
68
     * elements.
69
     */
70
    private static class BlockProperies
71
    {
72
        final boolean pBold;
73
        final String pFontName;
74
        final int pFontSize;
75
        final boolean pItalic;
76
77
        public BlockProperies( String pFontName, int pFontSize, boolean pBold,
78
                boolean pItalic )
79
        {
80
            this.pFontName = pFontName;
81
            this.pFontSize = pFontSize;
82
            this.pBold = pBold;
83
            this.pItalic = pItalic;
84
        }
85
    }
59
86
60
    private static final byte BEL_MARK = 7;
87
    private static final byte BEL_MARK = 7;
61
88
Lines 65-282 Link Here
65
92
66
    private static final byte FIELD_SEPARATOR_MARK = 20;
93
    private static final byte FIELD_SEPARATOR_MARK = 20;
67
94
68
    private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
95
    private static final POILogger logger = POILogFactory
96
            .getLogger( WordToFoExtractor.class );
69
97
70
    private static HWPFDocument loadDoc(File docFile) throws IOException {
98
    private static HWPFDocument loadDoc( File docFile ) throws IOException
71
	final FileInputStream istream = new FileInputStream(docFile);
99
    {
72
	try {
100
        final FileInputStream istream = new FileInputStream( docFile );
73
	    return new HWPFDocument(istream);
101
        try
74
	} finally {
102
        {
75
	    try {
103
            return new HWPFDocument( istream );
76
		istream.close();
104
        }
77
	    } catch (Exception exc) {
105
        finally
78
		// no op
106
        {
79
	    }
107
            try
80
	}
108
            {
81
    }
109
                istream.close();
82
110
            }
83
    static Document process(File docFile) throws Exception {
111
            catch ( Exception exc )
84
	final HWPFDocument hwpfDocument = loadDoc(docFile);
112
            {
85
	WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
113
                logger.log( POILogger.ERROR,
86
		DocumentBuilderFactory.newInstance().newDocumentBuilder()
114
                        "Unable to close FileInputStream: " + exc, exc );
87
			.newDocument());
115
            }
88
	wordToFoExtractor.processDocument(hwpfDocument);
116
        }
89
	return wordToFoExtractor.getDocument();
90
    }
117
    }
91
118
92
    private final Document document;
119
    /**
93
120
     * Java main() interface to interact with WordToFoExtractor
94
    private final Element layoutMasterSet;
121
     * 
95
122
     * <p>
96
    private final Element root;
123
     * Usage: WordToFoExtractor infile outfile
97
124
     * </p>
98
    public WordToFoExtractor(Document document) throws Exception {
125
     * Where infile is an input .doc file ( Word 97-2007) which will be rendered
99
	this.document = document;
126
     * as XSL-FO into outfile
100
127
     * 
101
	root = document.createElementNS(NS_XSLFO, "fo:root");
128
     */
102
	document.appendChild(root);
129
    public static void main( String[] args )
103
130
    {
104
	layoutMasterSet = document.createElementNS(NS_XSLFO,
131
        if ( args.length < 2 )
105
		"fo:layout-master-set");
132
        {
106
	root.appendChild(layoutMasterSet);
133
            System.err
107
    }
134
                    .println( "Usage: WordToFoExtractor <inputFile.doc> <saveTo.fo>" );
135
            return;
136
        }
108
137
109
    protected Element addFlowToPageSequence(final Element pageSequence,
138
        System.out.println( "Converting " + args[0] );
110
	    String flowName) {
139
        System.out.println( "Saving output to " + args[1] );
111
	final Element flow = document.createElementNS(NS_XSLFO, "fo:flow");
140
        try
112
	flow.setAttribute("flow-name", flowName);
141
        {
113
	pageSequence.appendChild(flow);
142
            Document doc = WordToFoExtractor.process( new File( args[0] ) );
114
143
115
	return flow;
144
            FileWriter out = new FileWriter( args[1] );
145
            DOMSource domSource = new DOMSource( doc );
146
            StreamResult streamResult = new StreamResult( out );
147
            TransformerFactory tf = TransformerFactory.newInstance();
148
            Transformer serializer = tf.newTransformer();
149
            // TODO set encoding from a command argument
150
            serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
151
            serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
152
            serializer.transform( domSource, streamResult );
153
            out.close();
154
        }
155
        catch ( Exception e )
156
        {
157
            e.printStackTrace();
158
        }
116
    }
159
    }
117
160
118
    protected Element addListItem(Element listBlock) {
161
    static Document process( File docFile ) throws Exception
119
	Element result = createListItem();
162
    {
120
	listBlock.appendChild(result);
163
        final HWPFDocument hwpfDocument = loadDoc( docFile );
121
	return result;
164
        WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
165
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
166
                        .newDocument() );
167
        wordToFoExtractor.processDocument( hwpfDocument );
168
        return wordToFoExtractor.getDocument();
122
    }
169
    }
123
170
124
    protected Element addListItemBody(Element listItem) {
171
    private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
125
	Element result = createListItemBody();
126
	listItem.appendChild(result);
127
	return result;
128
    }
129
172
130
    protected Element addListItemLabel(Element listItem, String text) {
173
    /**
131
	Element result = createListItemLabel(text);
174
     * Creates new instance of {@link WordToFoExtractor}. Can be used for output
132
	listItem.appendChild(result);
175
     * several {@link HWPFDocument}s into single FO document.
133
	return result;
176
     * 
177
     * @param document
178
     *            XML DOM Document used as XSL FO document. Shall support
179
     *            namespaces
180
     */
181
    public WordToFoExtractor( Document document )
182
    {
183
        super( document );
134
    }
184
    }
135
185
136
    protected Element addPageSequence(String pageMaster) {
186
    protected String createPageMaster( SectionProperties sep, String type,
137
	final Element pageSequence = document.createElementNS(NS_XSLFO,
187
            int section )
138
		"fo:page-sequence");
188
    {
139
	pageSequence.setAttribute("master-reference", pageMaster);
189
        float height = sep.getYaPage() / TWIPS_PER_INCH;
140
	root.appendChild(pageSequence);
190
        float width = sep.getXaPage() / TWIPS_PER_INCH;
141
	return pageSequence;
191
        float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH;
142
    }
192
        float rightMargin = sep.getDxaRight() / TWIPS_PER_INCH;
193
        float topMargin = sep.getDyaTop() / TWIPS_PER_INCH;
194
        float bottomMargin = sep.getDyaBottom() / TWIPS_PER_INCH;
143
195
144
    protected Element addRegionBody(Element pageMaster) {
196
        // add these to the header
145
	final Element regionBody = document.createElementNS(NS_XSLFO,
197
        String pageMasterName = type + "-page" + section;
146
		"fo:region-body");
147
	pageMaster.appendChild(regionBody);
148
198
149
	return regionBody;
199
        Element pageMaster = addSimplePageMaster( pageMasterName );
150
    }
200
        pageMaster.setAttribute( "page-height", height + "in" );
201
        pageMaster.setAttribute( "page-width", width + "in" );
151
202
152
    protected Element addSimplePageMaster(String masterName) {
203
        Element regionBody = addRegionBody( pageMaster );
153
	final Element simplePageMaster = document.createElementNS(NS_XSLFO,
204
        regionBody.setAttribute( "margin", topMargin + "in " + rightMargin
154
		"fo:simple-page-master");
205
                + "in " + bottomMargin + "in " + leftMargin + "in" );
155
	simplePageMaster.setAttribute("master-name", masterName);
156
	layoutMasterSet.appendChild(simplePageMaster);
157
206
158
	return simplePageMaster;
207
        /*
159
    }
208
         * 6.4.14 fo:region-body
209
         * 
210
         * The values of the padding and border-width traits must be "0".
211
         */
212
        // WordToFoUtils.setBorder(regionBody, sep.getBrcTop(), "top");
213
        // WordToFoUtils.setBorder(regionBody, sep.getBrcBottom(), "bottom");
214
        // WordToFoUtils.setBorder(regionBody, sep.getBrcLeft(), "left");
215
        // WordToFoUtils.setBorder(regionBody, sep.getBrcRight(), "right");
160
216
161
    protected Element addTable(Element flow) {
217
        if ( sep.getCcolM1() > 0 )
162
	final Element table = document.createElementNS(NS_XSLFO, "fo:table");
218
        {
163
	flow.appendChild(table);
219
            regionBody
164
	return table;
220
                    .setAttribute( "column-count", "" + (sep.getCcolM1() + 1) );
165
    }
221
            if ( sep.getFEvenlySpaced() )
222
            {
223
                regionBody.setAttribute( "column-gap",
224
                        (sep.getDxaColumns() / TWIPS_PER_INCH) + "in" );
225
            }
226
            else
227
            {
228
                regionBody.setAttribute( "column-gap", "0.25in" );
229
            }
230
        }
166
231
167
    protected Element createBlock() {
232
        return pageMasterName;
168
	return document.createElementNS(NS_XSLFO, "fo:block");
169
    }
233
    }
170
234
171
    protected Element createExternalGraphic(String source) {
235
    protected boolean processCharacters( HWPFDocument hwpfDocument,
172
	Element result = document.createElementNS(NS_XSLFO,
236
            int currentTableLevel, Paragraph paragraph, final Element block,
173
		"fo:external-graphic");
237
            final int start, final int end )
174
	result.setAttribute("src", "url('" + source + "')");
238
    {
175
	return result;
239
        boolean haveAnyText = false;
176
    }
177
240
178
    protected Element createInline() {
241
        for ( int c = start; c < end; c++ )
179
	return document.createElementNS(NS_XSLFO, "fo:inline");
242
        {
180
    }
243
            CharacterRun characterRun = paragraph.getCharacterRun( c );
181
244
182
    protected Element createLeader() {
245
            if ( hwpfDocument.getPicturesTable().hasPicture( characterRun ) )
183
	return document.createElementNS(NS_XSLFO, "fo:leader");
246
            {
184
    }
247
                Picture picture = hwpfDocument.getPicturesTable()
248
                        .extractPicture( characterRun, true );
185
249
186
    protected Element createListBlock() {
250
                processImage( block, characterRun.text().charAt( 0 ) == 0x01,
187
	return document.createElementNS(NS_XSLFO, "fo:list-block");
251
                        picture );
188
    }
252
                continue;
253
            }
189
254
190
    protected Element createListItem() {
255
            String text = characterRun.text();
191
	return document.createElementNS(NS_XSLFO, "fo:list-item");
256
            if ( text.getBytes().length == 0 )
192
    }
257
                continue;
193
258
194
    protected Element createListItemBody() {
259
            if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
195
	return document.createElementNS(NS_XSLFO, "fo:list-item-body");
260
            {
196
    }
261
                int skipTo = tryField( hwpfDocument, paragraph,
262
                        currentTableLevel, c, block );
197
263
198
    protected Element createListItemLabel(String text) {
264
                if ( skipTo != c )
199
	Element result = document.createElementNS(NS_XSLFO,
265
                {
200
		"fo:list-item-label");
266
                    c = skipTo;
201
	Element block = createBlock();
267
                    continue;
202
	block.appendChild(document.createTextNode(text));
268
                }
203
	result.appendChild(block);
204
	return result;
205
    }
206
269
207
    protected String createPageMaster(SectionProperties sep, String type,
270
                continue;
208
	    int section) {
271
            }
209
	float height = sep.getYaPage() / TWIPS_PER_INCH;
272
            if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
210
	float width = sep.getXaPage() / TWIPS_PER_INCH;
273
            {
211
	float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH;
274
                // shall not appear without FIELD_BEGIN_MARK
212
	float rightMargin = sep.getDxaRight() / TWIPS_PER_INCH;
275
                continue;
213
	float topMargin = sep.getDyaTop() / TWIPS_PER_INCH;
276
            }
214
	float bottomMargin = sep.getDyaBottom() / TWIPS_PER_INCH;
277
            if ( text.getBytes()[0] == FIELD_END_MARK )
278
            {
279
                // shall not appear without FIELD_BEGIN_MARK
280
                continue;
281
            }
215
282
216
	// add these to the header
283
            if ( characterRun.isSpecialCharacter() || characterRun.isObj()
217
	String pageMasterName = type + "-page" + section;
284
                    || characterRun.isOle2() )
285
            {
286
                continue;
287
            }
218
288
219
	Element pageMaster = addSimplePageMaster(pageMasterName);
289
            BlockProperies blockProperies = this.blocksProperies.peek();
220
	pageMaster.setAttribute("page-height", height + "in");
290
            Element inline = createInline();
221
	pageMaster.setAttribute("page-width", width + "in");
291
            if ( characterRun.isBold() != blockProperies.pBold )
292
            {
293
                WordToFoUtils.setBold( inline, characterRun.isBold() );
294
            }
295
            if ( characterRun.isItalic() != blockProperies.pItalic )
296
            {
297
                WordToFoUtils.setItalic( inline, characterRun.isItalic() );
298
            }
299
            if ( !WordToFoUtils.equals( characterRun.getFontName(),
300
                    blockProperies.pFontName ) )
301
            {
302
                WordToFoUtils
303
                        .setFontFamily( inline, characterRun.getFontName() );
304
            }
305
            if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
306
            {
307
                WordToFoUtils.setFontSize( inline,
308
                        characterRun.getFontSize() / 2 );
309
            }
310
            WordToFoUtils.setCharactersProperties( characterRun, inline );
311
            block.appendChild( inline );
222
312
223
	Element regionBody = addRegionBody(pageMaster);
313
            if ( text.endsWith( "\r" )
224
	regionBody.setAttribute("margin", topMargin + "in " + rightMargin
314
                    || (text.charAt( text.length() - 1 ) == BEL_MARK && currentTableLevel != 0) )
225
		+ "in " + bottomMargin + "in " + leftMargin + "in");
315
                text = text.substring( 0, text.length() - 1 );
226
316
227
	/*
317
            Text textNode = createText( text );
228
	 * 6.4.14 fo:region-body
318
            inline.appendChild( textNode );
229
	 *
230
	 * The values of the padding and border-width traits must be "0".
231
	 */
232
	// WordToFoUtils.setBorder(regionBody, sep.getBrcTop(), "top");
233
	// WordToFoUtils.setBorder(regionBody, sep.getBrcBottom(), "bottom");
234
	// WordToFoUtils.setBorder(regionBody, sep.getBrcLeft(), "left");
235
	// WordToFoUtils.setBorder(regionBody, sep.getBrcRight(), "right");
236
319
237
	if (sep.getCcolM1() > 0) {
320
            haveAnyText |= text.trim().length() != 0;
238
	    regionBody.setAttribute("column-count", "" + (sep.getCcolM1() + 1));
321
        }
239
	    if (sep.getFEvenlySpaced()) {
240
		regionBody.setAttribute("column-gap",
241
			(sep.getDxaColumns() / TWIPS_PER_INCH) + "in");
242
	    } else {
243
		regionBody.setAttribute("column-gap", "0.25in");
244
	    }
245
	}
246
322
247
	return pageMasterName;
323
        return haveAnyText;
248
    }
324
    }
249
325
250
    protected Element createTableBody() {
326
    public void processDocument( HWPFDocument hwpfDocument )
251
	return document.createElementNS(NS_XSLFO, "fo:table-body");
327
    {
252
    }
328
        final Range range = hwpfDocument.getRange();
253
329
254
    protected Element createTableCell() {
330
        for ( int s = 0; s < range.numSections(); s++ )
255
	return document.createElementNS(NS_XSLFO, "fo:table-cell");
331
        {
332
            processSection( hwpfDocument, range.getSection( s ), s );
333
        }
256
    }
334
    }
257
335
258
    protected Element createTableHeader() {
336
    protected void processField( HWPFDocument hwpfDocument,
259
	return document.createElementNS(NS_XSLFO, "fo:table-header");
337
            Element currentBlock, Paragraph paragraph, int currentTableLevel,
260
    }
338
            int beginMark, int separatorMark, int endMark )
339
    {
261
340
262
    protected Element createTableRow() {
341
        Pattern hyperlinkPattern = Pattern
263
	return document.createElementNS(NS_XSLFO, "fo:table-row");
342
                .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
264
    }
343
        Pattern pagerefPattern = Pattern
344
                .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
265
345
266
    protected Text createText(String data) {
346
        if ( separatorMark - beginMark > 1 )
267
	return document.createTextNode(data);
347
        {
268
    }
348
            CharacterRun firstAfterBegin = paragraph
349
                    .getCharacterRun( beginMark + 1 );
269
350
270
    public Document getDocument() {
351
            final Matcher hyperlinkMatcher = hyperlinkPattern
271
	return document;
352
                    .matcher( firstAfterBegin.text() );
353
            if ( hyperlinkMatcher.matches() )
354
            {
355
                String hyperlink = hyperlinkMatcher.group( 1 );
356
                processHyperlink( hwpfDocument, currentBlock, paragraph,
357
                        currentTableLevel, hyperlink, separatorMark + 1,
358
                        endMark );
359
                return;
360
            }
361
362
            final Matcher pagerefMatcher = pagerefPattern
363
                    .matcher( firstAfterBegin.text() );
364
            if ( pagerefMatcher.matches() )
365
            {
366
                String pageref = pagerefMatcher.group( 1 );
367
                processPageref( hwpfDocument, currentBlock, paragraph,
368
                        currentTableLevel, pageref, separatorMark + 1, endMark );
369
                return;
370
            }
371
        }
372
373
        StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
374
        for ( int i = beginMark; i <= endMark; i++ )
375
        {
376
            debug.append( "\t" );
377
            debug.append( paragraph.getCharacterRun( i ) );
378
            debug.append( "\n" );
379
        }
380
        logger.log( POILogger.WARN, debug );
381
382
        // just output field value
383
        if ( separatorMark + 1 < endMark )
384
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
385
                    currentBlock, separatorMark + 1, endMark );
386
387
        return;
272
    }
388
    }
273
389
274
    public void processDocument(HWPFDocument hwpfDocument) {
390
    protected void processHyperlink( HWPFDocument hwpfDocument,
275
	final Range range = hwpfDocument.getRange();
391
            Element currentBlock, Paragraph paragraph, int currentTableLevel,
392
            String hyperlink, int beginTextInclusive, int endTextExclusive )
393
    {
394
        Element basicLink = createBasicLinkExternal( hyperlink );
395
        currentBlock.appendChild( basicLink );
276
396
277
	for (int s = 0; s < range.numSections(); s++) {
397
        if ( beginTextInclusive < endTextExclusive )
278
	    processSection(hwpfDocument, range.getSection(s), s);
398
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
279
	}
399
                    basicLink, beginTextInclusive, endTextExclusive );
280
    }
400
    }
281
401
282
    /**
402
    /**
Lines 298-601 Link Here
298
     * @param picture
418
     * @param picture
299
     *            HWPF object, contained picture data and properties
419
     *            HWPF object, contained picture data and properties
300
     */
420
     */
301
    protected void processImage(Element currentBlock, boolean inlined,
421
    protected void processImage( Element currentBlock, boolean inlined,
302
            Picture picture) {
422
            Picture picture )
423
    {
303
        // no default implementation -- skip
424
        // no default implementation -- skip
425
        currentBlock.appendChild( document.createComment( "Image link to '"
426
                + picture.suggestFullFileName() + "' can be here" ) );
304
    }
427
    }
305
428
306
    protected void processParagraph(HWPFDocument hwpfDocument,
429
    protected void processPageref( HWPFDocument hwpfDocument,
430
            Element currentBlock, Paragraph paragraph, int currentTableLevel,
431
            String pageref, int beginTextInclusive, int endTextExclusive )
432
    {
433
        Element basicLink = createBasicLinkInternal( pageref );
434
        currentBlock.appendChild( basicLink );
435
436
        if ( beginTextInclusive < endTextExclusive )
437
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
438
                    basicLink, beginTextInclusive, endTextExclusive );
439
    }
440
441
    protected void processParagraph( HWPFDocument hwpfDocument,
307
            Element parentFopElement, int currentTableLevel,
442
            Element parentFopElement, int currentTableLevel,
308
            Paragraph paragraph, String bulletText) {
443
            Paragraph paragraph, String bulletText )
444
    {
309
        final Element block = createBlock();
445
        final Element block = createBlock();
310
        parentFopElement.appendChild(block);
446
        parentFopElement.appendChild( block );
311
447
312
        WordToFoUtils.setParagraphProperties(paragraph, block);
448
        WordToFoUtils.setParagraphProperties( paragraph, block );
313
449
314
        final int charRuns = paragraph.numCharacterRuns();
450
        final int charRuns = paragraph.numCharacterRuns();
315
451
316
        if (charRuns == 0) {
452
        if ( charRuns == 0 )
453
        {
317
            return;
454
            return;
318
        }
455
        }
319
456
320
        final String pFontName;
321
        final int pFontSize;
322
        final boolean pBold;
323
        final boolean pItalic;
324
        {
457
        {
325
            CharacterRun characterRun = paragraph.getCharacterRun(0);
458
            final String pFontName;
326
            pFontSize = characterRun.getFontSize() / 2;
459
            final int pFontSize;
327
            pFontName = characterRun.getFontName();
460
            final boolean pBold;
328
            pBold = characterRun.isBold();
461
            final boolean pItalic;
329
            pItalic = characterRun.isItalic();
462
            {
330
        }
463
                CharacterRun characterRun = paragraph.getCharacterRun( 0 );
331
        WordToFoUtils.setFontFamily(block, pFontName);
464
                pFontSize = characterRun.getFontSize() / 2;
332
        WordToFoUtils.setFontSize(block, pFontSize);
465
                pFontName = characterRun.getFontName();
333
        WordToFoUtils.setBold(block, pBold);
466
                pBold = characterRun.isBold();
334
        WordToFoUtils.setItalic(block, pItalic);
467
                pItalic = characterRun.isItalic();
335
468
            }
336
        StringBuilder lineText = new StringBuilder();
469
            WordToFoUtils.setFontFamily( block, pFontName );
337
470
            WordToFoUtils.setFontSize( block, pFontSize );
338
        if (WordToFoUtils.isNotEmpty(bulletText)) {
471
            WordToFoUtils.setBold( block, pBold );
339
            Element inline = createInline();
472
            WordToFoUtils.setItalic( block, pItalic );
340
            block.appendChild(inline);
341
342
            Text textNode = createText(bulletText);
343
            inline.appendChild(textNode);
344
473
345
            lineText.append(bulletText);
474
            blocksProperies.push( new BlockProperies( pFontName, pFontSize,
475
                    pBold, pItalic ) );
346
        }
476
        }
477
        try
478
        {
479
            boolean haveAnyText = false;
347
480
348
        for (int c = 0; c < charRuns; c++) {
481
            if ( WordToFoUtils.isNotEmpty( bulletText ) )
349
            CharacterRun characterRun = paragraph.getCharacterRun(c);
482
            {
483
                Element inline = createInline();
484
                block.appendChild( inline );
350
485
351
            if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) {
486
                Text textNode = createText( bulletText );
352
                Picture picture = hwpfDocument.getPicturesTable()
487
                inline.appendChild( textNode );
353
                        .extractPicture(characterRun, true);
354
488
355
                processImage(block, characterRun.text().charAt(0) == 0x01,
489
                haveAnyText |= bulletText.trim().length() != 0;
356
                        picture);
357
                continue;
358
            }
490
            }
359
491
360
	    String text = characterRun.text();
492
            haveAnyText = processCharacters( hwpfDocument, currentTableLevel,
361
	    if (text.getBytes().length == 0)
493
                    paragraph, block, 0, charRuns );
362
		continue;
363
364
            if (text.getBytes()[0] == FIELD_BEGIN_MARK) {
365
                /*
366
                 * check if we have a field with calculated image as a result.
367
                 * MathType equation, for example.
368
                 */
369
                int skipTo = tryImageWithinField(hwpfDocument, paragraph, c,
370
                        block);
371
494
372
		if (skipTo != c) {
495
            if ( !haveAnyText )
373
		    c = skipTo;
496
            {
374
		    continue;
497
                Element leader = createLeader();
375
		}
498
                block.appendChild( leader );
376
		continue;
499
            }
377
	    }
500
        }
378
	    if (text.getBytes()[0] == FIELD_SEPARATOR_MARK) {
501
        finally
379
		continue;
502
        {
380
	    }
503
            blocksProperies.pop();
381
	    if (text.getBytes()[0] == FIELD_END_MARK) {
504
        }
382
		continue;
383
	    }
384
385
	    if (characterRun.isSpecialCharacter() || characterRun.isObj()
386
		    || characterRun.isOle2()) {
387
		continue;
388
	    }
389
390
	    Element inline = createInline();
391
	    if (characterRun.isBold() != pBold) {
392
		WordToFoUtils.setBold(inline, characterRun.isBold());
393
	    }
394
	    if (characterRun.isItalic() != pItalic) {
395
		WordToFoUtils.setItalic(inline, characterRun.isItalic());
396
	    }
397
	    if (!WordToFoUtils.equals(characterRun.getFontName(), pFontName)) {
398
		WordToFoUtils.setFontFamily(inline, characterRun.getFontName());
399
	    }
400
	    if (characterRun.getFontSize() / 2 != pFontSize) {
401
		WordToFoUtils.setFontSize(inline,
402
			characterRun.getFontSize() / 2);
403
	    }
404
	    WordToFoUtils.setCharactersProperties(characterRun, inline);
405
	    block.appendChild(inline);
406
407
	    if (text.endsWith("\r")
408
		    || (text.charAt(text.length() - 1) == BEL_MARK && currentTableLevel != 0))
409
		text = text.substring(0, text.length() - 1);
410
411
	    Text textNode = createText(text);
412
	    inline.appendChild(textNode);
413
414
	    lineText.append(text);
415
	}
416
417
	if (lineText.toString().trim().length() == 0) {
418
	    Element leader = createLeader();
419
	    block.appendChild(leader);
420
	}
421
505
422
	return;
506
        return;
423
    }
507
    }
424
508
425
    protected void processSection(HWPFDocument hwpfDocument, Section section,
509
    protected void processSection( HWPFDocument hwpfDocument, Section section,
426
	    int sectionCounter) {
510
            int sectionCounter )
427
	String regularPage = createPageMaster(
511
    {
428
		WordToFoUtils.getSectionProperties(section), "page",
512
        String regularPage = createPageMaster(
429
		sectionCounter);
513
                WordToFoUtils.getSectionProperties( section ), "page",
514
                sectionCounter );
430
515
431
	Element pageSequence = addPageSequence(regularPage);
516
        Element pageSequence = addPageSequence( regularPage );
432
	Element flow = addFlowToPageSequence(pageSequence, "xsl-region-body");
517
        Element flow = addFlowToPageSequence( pageSequence, "xsl-region-body" );
433
518
434
	processSectionParagraphes(hwpfDocument, flow, section, 0);
519
        processSectionParagraphes( hwpfDocument, flow, section, 0 );
435
    }
520
    }
436
521
437
    protected void processSectionParagraphes(HWPFDocument hwpfDocument,
522
    protected void processSectionParagraphes( HWPFDocument hwpfDocument,
438
	    Element flow, Range range, int currentTableLevel) {
523
            Element flow, Range range, int currentTableLevel )
439
	final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
524
    {
440
	for (TableIterator tableIterator = WordToFoUtils.newTableIterator(
525
        final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
441
		range, currentTableLevel + 1); tableIterator.hasNext();) {
526
        for ( TableIterator tableIterator = WordToFoUtils.newTableIterator(
442
	    Table next = tableIterator.next();
527
                range, currentTableLevel + 1 ); tableIterator.hasNext(); )
443
	    allTables.put(Integer.valueOf(next.getStartOffset()), next);
528
        {
444
	}
529
            Table next = tableIterator.next();
530
            allTables.put( Integer.valueOf( next.getStartOffset() ), next );
531
        }
445
532
446
	final ListTables listTables = hwpfDocument.getListTables();
533
        final ListTables listTables = hwpfDocument.getListTables();
447
	int currentListInfo = 0;
534
        int currentListInfo = 0;
448
535
449
	final int paragraphs = range.numParagraphs();
536
        final int paragraphs = range.numParagraphs();
450
	for (int p = 0; p < paragraphs; p++) {
537
        for ( int p = 0; p < paragraphs; p++ )
451
	    Paragraph paragraph = range.getParagraph(p);
538
        {
539
            Paragraph paragraph = range.getParagraph( p );
452
540
453
	    if (allTables.containsKey(Integer.valueOf(paragraph
541
            if ( allTables.containsKey( Integer.valueOf( paragraph
454
		    .getStartOffset()))) {
542
                    .getStartOffset() ) ) )
455
		Table table = allTables.get(Integer.valueOf(paragraph
543
            {
456
			.getStartOffset()));
544
                Table table = allTables.get( Integer.valueOf( paragraph
457
		processTable(hwpfDocument, flow, table, currentTableLevel + 1);
545
                        .getStartOffset() ) );
458
		continue;
546
                processTable( hwpfDocument, flow, table, currentTableLevel + 1 );
459
	    }
547
                continue;
548
            }
460
549
461
	    if (paragraph.isInTable()
550
            if ( paragraph.isInTable()
462
		    && paragraph.getTableLevel() != currentTableLevel) {
551
                    && paragraph.getTableLevel() != currentTableLevel )
463
		continue;
552
            {
464
	    }
553
                continue;
554
            }
465
555
466
	    if (paragraph.getIlfo() != currentListInfo) {
556
            if ( paragraph.getIlfo() != currentListInfo )
467
		currentListInfo = paragraph.getIlfo();
557
            {
468
	    }
558
                currentListInfo = paragraph.getIlfo();
559
            }
469
560
470
	    if (currentListInfo != 0) {
561
            if ( currentListInfo != 0 )
471
		final ListFormatOverride listFormatOverride = listTables
562
            {
472
			.getOverride(paragraph.getIlfo());
563
                final ListFormatOverride listFormatOverride = listTables
564
                        .getOverride( paragraph.getIlfo() );
473
565
474
		String label = WordToFoUtils.getBulletText(listTables,
566
                String label = WordToFoUtils.getBulletText( listTables,
475
			paragraph, listFormatOverride.getLsid());
567
                        paragraph, listFormatOverride.getLsid() );
476
568
477
		processParagraph(hwpfDocument, flow, currentTableLevel,
569
                processParagraph( hwpfDocument, flow, currentTableLevel,
478
			paragraph, label);
570
                        paragraph, label );
479
	    } else {
571
            }
480
		processParagraph(hwpfDocument, flow, currentTableLevel,
572
            else
481
			paragraph, WordToFoUtils.EMPTY);
573
            {
482
	    }
574
                processParagraph( hwpfDocument, flow, currentTableLevel,
483
	}
575
                        paragraph, WordToFoUtils.EMPTY );
576
            }
577
        }
484
578
485
    }
579
    }
486
580
487
    protected void processTable(HWPFDocument hwpfDocument, Element flow,
581
    protected void processTable( HWPFDocument hwpfDocument, Element flow,
488
	    Table table, int thisTableLevel) {
582
            Table table, int thisTableLevel )
489
	Element tableElement = addTable(flow);
583
    {
584
        Element tableElement = addTable( flow );
490
585
491
	Element tableHeader = createTableHeader();
586
        Element tableHeader = createTableHeader();
492
	Element tableBody = createTableBody();
587
        Element tableBody = createTableBody();
493
588
494
	final int tableRows = table.numRows();
589
        final int tableRows = table.numRows();
495
590
496
	int maxColumns = Integer.MIN_VALUE;
591
        int maxColumns = Integer.MIN_VALUE;
497
	for (int r = 0; r < tableRows; r++) {
592
        for ( int r = 0; r < tableRows; r++ )
498
	    maxColumns = Math.max(maxColumns, table.getRow(r).numCells());
593
        {
499
	}
594
            maxColumns = Math.max( maxColumns, table.getRow( r ).numCells() );
595
        }
500
596
501
	for (int r = 0; r < tableRows; r++) {
597
        for ( int r = 0; r < tableRows; r++ )
502
	    TableRow tableRow = table.getRow(r);
598
        {
599
            TableRow tableRow = table.getRow( r );
503
600
504
	    Element tableRowElement = createTableRow();
601
            Element tableRowElement = createTableRow();
505
	    WordToFoUtils.setTableRowProperties(tableRow, tableRowElement);
602
            WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
506
603
507
	    final int rowCells = tableRow.numCells();
604
            final int rowCells = tableRow.numCells();
508
	    for (int c = 0; c < rowCells; c++) {
605
            for ( int c = 0; c < rowCells; c++ )
509
		TableCell tableCell = tableRow.getCell(c);
606
            {
607
                TableCell tableCell = tableRow.getCell( c );
510
608
511
		if (tableCell.isMerged() && !tableCell.isFirstMerged())
609
                if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
512
		    continue;
610
                    continue;
513
611
514
		if (tableCell.isVerticallyMerged()
612
                if ( tableCell.isVerticallyMerged()
515
			&& !tableCell.isFirstVerticallyMerged())
613
                        && !tableCell.isFirstVerticallyMerged() )
516
		    continue;
614
                    continue;
517
615
518
		Element tableCellElement = createTableCell();
616
                Element tableCellElement = createTableCell();
519
		WordToFoUtils.setTableCellProperties(tableRow, tableCell,
617
                WordToFoUtils.setTableCellProperties( tableRow, tableCell,
520
			tableCellElement, r == 0, r == tableRows - 1, c == 0,
618
                        tableCellElement, r == 0, r == tableRows - 1, c == 0,
521
			c == rowCells - 1);
619
                        c == rowCells - 1 );
522
620
523
		if (tableCell.isFirstMerged()) {
621
                if ( tableCell.isFirstMerged() )
524
		    int count = 0;
622
                {
525
		    for (int c1 = c; c1 < rowCells; c1++) {
623
                    int count = 0;
526
			TableCell nextCell = tableRow.getCell(c1);
624
                    for ( int c1 = c; c1 < rowCells; c1++ )
527
			if (nextCell.isMerged())
625
                    {
528
			    count++;
626
                        TableCell nextCell = tableRow.getCell( c1 );
529
			if (!nextCell.isMerged())
627
                        if ( nextCell.isMerged() )
530
			    break;
628
                            count++;
531
		    }
629
                        if ( !nextCell.isMerged() )
532
		    tableCellElement.setAttribute("number-columns-spanned", ""
630
                            break;
533
			    + count);
631
                    }
534
		} else {
632
                    tableCellElement.setAttribute( "number-columns-spanned", ""
535
		    if (c == rowCells - 1 && c != maxColumns - 1) {
633
                            + count );
536
			tableCellElement.setAttribute("number-columns-spanned",
634
                }
537
				"" + (maxColumns - c));
635
                else
538
		    }
636
                {
539
		}
637
                    if ( c == rowCells - 1 && c != maxColumns - 1 )
638
                    {
639
                        tableCellElement
640
                                .setAttribute( "number-columns-spanned", ""
641
                                        + (maxColumns - c) );
642
                    }
643
                }
540
644
541
		if (tableCell.isFirstVerticallyMerged()) {
645
                if ( tableCell.isFirstVerticallyMerged() )
542
		    int count = 0;
646
                {
543
		    for (int r1 = r; r1 < tableRows; r1++) {
647
                    int count = 0;
544
			TableRow nextRow = table.getRow(r1);
648
                    for ( int r1 = r; r1 < tableRows; r1++ )
545
			if (nextRow.numCells() < c)
649
                    {
546
			    break;
650
                        TableRow nextRow = table.getRow( r1 );
547
			TableCell nextCell = nextRow.getCell(c);
651
                        if ( nextRow.numCells() < c )
548
			if (nextCell.isVerticallyMerged())
652
                            break;
549
			    count++;
653
                        TableCell nextCell = nextRow.getCell( c );
550
			if (!nextCell.isVerticallyMerged())
654
                        if ( nextCell.isVerticallyMerged() )
551
			    break;
655
                            count++;
552
		    }
656
                        if ( !nextCell.isVerticallyMerged() )
553
		    tableCellElement.setAttribute("number-rows-spanned", ""
657
                            break;
554
			    + count);
658
                    }
555
		}
659
                    tableCellElement.setAttribute( "number-rows-spanned", ""
660
                            + count );
661
                }
556
662
557
		processSectionParagraphes(hwpfDocument, tableCellElement,
663
                processSectionParagraphes( hwpfDocument, tableCellElement,
558
			tableCell, thisTableLevel);
664
                        tableCell, thisTableLevel );
559
665
560
		if (!tableCellElement.hasChildNodes()) {
666
                if ( !tableCellElement.hasChildNodes() )
561
		    tableCellElement.appendChild(createBlock());
667
                {
562
		}
668
                    tableCellElement.appendChild( createBlock() );
669
                }
563
670
564
		tableRowElement.appendChild(tableCellElement);
671
                tableRowElement.appendChild( tableCellElement );
565
	    }
672
            }
566
673
567
	    if (tableRow.isTableHeader()) {
674
            if ( tableRow.isTableHeader() )
568
		tableHeader.appendChild(tableRowElement);
675
            {
569
	    } else {
676
                tableHeader.appendChild( tableRowElement );
570
		tableBody.appendChild(tableRowElement);
677
            }
571
	    }
678
            else
572
	}
679
            {
680
                tableBody.appendChild( tableRowElement );
681
            }
682
        }
573
683
574
	if (tableHeader.hasChildNodes()) {
684
        if ( tableHeader.hasChildNodes() )
575
	    tableElement.appendChild(tableHeader);
685
        {
576
	}
686
            tableElement.appendChild( tableHeader );
577
	if (tableBody.hasChildNodes()) {
687
        }
578
	    tableElement.appendChild(tableBody);
688
        if ( tableBody.hasChildNodes() )
579
	} else {
689
        {
580
	    System.err.println("Table without body");
690
            tableElement.appendChild( tableBody );
581
	}
691
        }
692
        else
693
        {
694
            logger.log(
695
                    POILogger.WARN,
696
                    "Table without body starting on offset "
697
                            + table.getStartOffset() + " -- "
698
                            + table.getEndOffset() );
699
        }
582
    }
700
    }
583
701
584
    protected int tryImageWithinField(HWPFDocument hwpfDocument,
702
    protected int tryField( HWPFDocument hwpfDocument, Paragraph paragraph,
585
            Paragraph paragraph, int beginMark, Element currentBlock) {
703
            int currentTableLevel, int beginMark, Element currentBlock )
704
    {
586
        int separatorMark = -1;
705
        int separatorMark = -1;
587
        int pictureMark = -1;
588
        int pictureChar = Integer.MIN_VALUE;
589
        int endMark = -1;
706
        int endMark = -1;
590
        for (int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++) {
707
        for ( int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++ )
591
            CharacterRun characterRun = paragraph.getCharacterRun(c);
708
        {
709
            CharacterRun characterRun = paragraph.getCharacterRun( c );
592
710
593
            String text = characterRun.text();
711
            String text = characterRun.text();
594
            if (text.getBytes().length == 0)
712
            if ( text.getBytes().length == 0 )
595
                continue;
713
                continue;
596
714
597
            if (text.getBytes()[0] == FIELD_SEPARATOR_MARK) {
715
            if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
598
                if (separatorMark != -1) {
716
            {
717
                if ( separatorMark != -1 )
718
                {
599
                    // double;
719
                    // double;
600
                    return beginMark;
720
                    return beginMark;
601
                }
721
                }
Lines 604-611 Link Here
604
                continue;
724
                continue;
605
            }
725
            }
606
726
607
            if (text.getBytes()[0] == FIELD_END_MARK) {
727
            if ( text.getBytes()[0] == FIELD_END_MARK )
608
                if (endMark != -1) {
728
            {
729
                if ( endMark != -1 )
730
                {
609
                    // double;
731
                    // double;
610
                    return beginMark;
732
                    return beginMark;
611
                }
733
                }
Lines 614-676 Link Here
614
                break;
736
                break;
615
            }
737
            }
616
738
617
            if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) {
618
                if (c != -1) {
619
                    // double;
620
                    return beginMark;
621
                }
622
623
                pictureMark = c;
624
                pictureChar = characterRun.text().charAt(0);
625
                continue;
626
            }
627
        }
739
        }
628
740
629
        if (separatorMark == -1 || pictureMark == -1 || endMark == -1)
741
        if ( separatorMark == -1 || endMark == -1 )
630
            return beginMark;
742
            return beginMark;
631
743
632
        final CharacterRun pictureRun = paragraph.getCharacterRun(pictureMark);
744
        processField( hwpfDocument, currentBlock, paragraph, currentTableLevel,
633
        final Picture picture = hwpfDocument.getPicturesTable().extractPicture(
745
                beginMark, separatorMark, endMark );
634
                pictureRun, true);
635
636
        processImage(currentBlock, pictureChar == 0x01, picture);
637
746
638
        return endMark;
747
        return endMark;
639
    }
748
    }
640
641
    /**
642
     * Java main() interface to interact with WordToFoExtractor
643
     *
644
     * <p>
645
     *     Usage: WordToFoExtractor infile outfile
646
     * </p>
647
     * Where infile is an input .doc file ( Word 97-2007)
648
     * which will be rendered as XSL-FO into outfile
649
     *
650
     */
651
    public static void main(String[] args) {
652
        if (args.length < 2) {
653
            System.err.println("Usage: WordToFoExtractor <inputFile.doc> <saveTo.fo>");
654
            return;
655
        }
656
657
        System.out.println("Converting " + args[0]);
658
        System.out.println("Saving output to " + args[1]);
659
        try {
660
            Document doc = WordToFoExtractor.process(new File(args[0]));
661
662
            FileWriter out = new FileWriter(args[1]);
663
            DOMSource domSource = new DOMSource(doc);
664
            StreamResult streamResult = new StreamResult(out);
665
            TransformerFactory tf = TransformerFactory.newInstance();
666
            Transformer serializer = tf.newTransformer();
667
            serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");  // TODO set encoding from a command argument
668
            serializer.setOutputProperty(OutputKeys.INDENT, "yes");
669
            serializer.transform(domSource, streamResult);
670
            out.close();
671
        } catch (Exception e) {
672
            e.printStackTrace();
673
        }
674
    }
675
676
}
749
}
(-)testcases/org/apache/poi/hwpf/extractor/TestWordToFoExtractor.java (+95 lines)
Line 0 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import java.io.StringWriter;
22
23
import javax.xml.parsers.DocumentBuilderFactory;
24
import javax.xml.transform.OutputKeys;
25
import javax.xml.transform.Transformer;
26
import javax.xml.transform.TransformerFactory;
27
import javax.xml.transform.dom.DOMSource;
28
import javax.xml.transform.stream.StreamResult;
29
30
import junit.framework.TestCase;
31
import org.apache.poi.POIDataSamples;
32
import org.apache.poi.hwpf.HWPFDocument;
33
34
/**
35
 * Test cases for {@link WordToFoExtractor}
36
 * 
37
 * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
38
 */
39
public class TestWordToFoExtractor extends TestCase
40
{
41
    private static String getFoText( final String sampleFileName )
42
            throws Exception
43
    {
44
        HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
45
                .getDocumentInstance().openResourceAsStream( sampleFileName ) );
46
47
        WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
48
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
49
                        .newDocument() );
50
        wordToFoExtractor.processDocument( hwpfDocument );
51
52
        StringWriter stringWriter = new StringWriter();
53
54
        Transformer transformer = TransformerFactory.newInstance()
55
                .newTransformer();
56
        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
57
        transformer.transform(
58
                new DOMSource( wordToFoExtractor.getDocument() ),
59
                new StreamResult( stringWriter ) );
60
61
        String result = stringWriter.toString();
62
        return result;
63
    }
64
65
    public void testHyperlink() throws Exception
66
    {
67
        final String sampleFileName = "hyperlink.doc";
68
        String result = getFoText( sampleFileName );
69
70
        assertTrue( result
71
                .contains( "<fo:basic-link external-destination=\"http://testuri.org/\">" ) );
72
        assertTrue( result.contains( "Hyperlink text" ) );
73
    }
74
75
    public void testEquation() throws Exception
76
    {
77
        final String sampleFileName = "equation.doc";
78
        String result = getFoText( sampleFileName );
79
80
        assertTrue( result
81
                .contains( "<!--Image link to '0.emf' can be here-->" ) );
82
    }
83
84
    public void testPageref() throws Exception
85
    {
86
        final String sampleFileName = "pageref.doc";
87
        String result = getFoText( sampleFileName );
88
89
        System.out.println( result );
90
91
        assertTrue( result
92
                .contains( "<fo:basic-link internal-destination=\"userref\">" ) );
93
        assertTrue( result.contains( "1" ) );
94
    }
95
}

Return to bug 51351