View | Details | Raw Unified | Return to bug 51351
Collapse All | Expand All

(-)src/org/apache/poi/hwpf/sprm/CharacterSprmUncompressor.java (-1 / +12 lines)
Lines 17-30 Link Here
17
17
18
package org.apache.poi.hwpf.sprm;
18
package org.apache.poi.hwpf.sprm;
19
19
20
import org.apache.poi.hwpf.usermodel.BorderCode;
20
import org.apache.poi.hwpf.usermodel.CharacterProperties;
21
import org.apache.poi.hwpf.usermodel.CharacterProperties;
21
import org.apache.poi.hwpf.usermodel.DateAndTime;
22
import org.apache.poi.hwpf.usermodel.DateAndTime;
22
import org.apache.poi.hwpf.usermodel.BorderCode;
23
import org.apache.poi.hwpf.usermodel.ShadingDescriptor;
23
import org.apache.poi.hwpf.usermodel.ShadingDescriptor;
24
import org.apache.poi.util.LittleEndian;
24
import org.apache.poi.util.LittleEndian;
25
import org.apache.poi.util.POILogFactory;
26
import org.apache.poi.util.POILogger;
25
27
26
public final class CharacterSprmUncompressor
28
public final class CharacterSprmUncompressor
27
{
29
{
30
    private static final POILogger logger = POILogFactory
31
            .getLogger( CharacterSprmUncompressor.class );
32
28
  public CharacterSprmUncompressor()
33
  public CharacterSprmUncompressor()
29
  {
34
  {
30
  }
35
  }
Lines 47-52 Link Here
47
    while (sprmIt.hasNext())
52
    while (sprmIt.hasNext())
48
    {
53
    {
49
      SprmOperation sprm = sprmIt.next();
54
      SprmOperation sprm = sprmIt.next();
55
56
      if (sprm.getType() != 2) {
57
        logger.log( POILogger.WARN, "Non-CHP SPRM returned by SprmIterator" );
58
        continue;
59
      }
60
50
      unCompressCHPOperation(parent, newProperties, sprm);
61
      unCompressCHPOperation(parent, newProperties, sprm);
51
    }
62
    }
52
63
(-)src/org/apache/poi/hwpf/model/CHPX.java (-2 / +8 lines)
Lines 57-67 Link Here
57
57
58
  public CharacterProperties getCharacterProperties(StyleSheet ss, short istd)
58
  public CharacterProperties getCharacterProperties(StyleSheet ss, short istd)
59
  {
59
  {
60
    CharacterProperties baseStyle = ss.getCharacterStyle(istd);
60
    CharacterProperties baseStyle;
61
    if (ss == null) {
62
      //old document format
63
      baseStyle = new CharacterProperties();
64
    } else {
65
      baseStyle = ss.getCharacterStyle(istd);
66
    }
61
    CharacterProperties props = CharacterSprmUncompressor.uncompressCHP(baseStyle, getGrpprl(), 0);
67
    CharacterProperties props = CharacterSprmUncompressor.uncompressCHP(baseStyle, getGrpprl(), 0);
62
    return props;
68
    return props;
63
  }
69
  }
64
  
70
65
  public String toString() {
71
  public String toString() {
66
      return "CHPX from " + getStart() + " to " + getEnd() + 
72
      return "CHPX from " + getStart() + " to " + getEnd() + 
67
         " (in bytes " + getStartBytes() + " to " + getEndBytes() + ")";
73
         " (in bytes " + getStartBytes() + " to " + getEndBytes() + ")";
(-)src/org/apache/poi/hwpf/usermodel/CharacterRun.java (+4 lines)
Lines 426-431 Link Here
426
426
427
  public String getFontName()
427
  public String getFontName()
428
  {
428
  {
429
    if (_doc.getFontTable() == null)
430
      // old word format
431
      return null;
432
429
    return _doc.getFontTable().getMainFont(_props.getFtcAscii());
433
    return _doc.getFontTable().getMainFont(_props.getFtcAscii());
430
  }
434
  }
431
435
(-)src/org/apache/poi/hwpf/usermodel/BorderCode.java (+28 lines)
Lines 194-197 Link Here
194
    _fFrame.setValue(_info2, frame ? 1 : 0);
194
    _fFrame.setValue(_info2, frame ? 1 : 0);
195
  }
195
  }
196
196
197
    @Override
198
    public String toString()
199
    {
200
        StringBuffer buffer = new StringBuffer();
201
202
        buffer.append( "[BRC]\n" );
203
204
        buffer.append( "        .dptLineWidth         = " );
205
        buffer.append( " (" ).append( getLineWidth() ).append( " )\n" );
206
207
        buffer.append( "        .brcType              = " );
208
        buffer.append( " (" ).append( getBorderType() ).append( " )\n" );
209
210
        buffer.append( "        .ico                  = " );
211
        buffer.append( " (" ).append( getColor() ).append( " )\n" );
212
213
        buffer.append( "        .dptSpace             = " );
214
        buffer.append( " (" ).append( getSpace() ).append( " )\n" );
215
216
        buffer.append( "        .fShadow              = " );
217
        buffer.append( " (" ).append( isShadow() ).append( " )\n" );
218
219
        buffer.append( "        .fFrame               = " );
220
        buffer.append( " (" ).append( isFrame() ).append( " )\n" );
221
222
        return buffer.toString();
223
    }
224
197
}
225
}
(-)src/org/apache/poi/hwpf/usermodel/Range.java (-13 / +27 lines)
Lines 781-808 Link Here
781
	 *            The index of the character run to get.
781
	 *            The index of the character run to get.
782
	 * @return The character run at the specified index in this range.
782
	 * @return The character run at the specified index in this range.
783
	 */
783
	 */
784
	public CharacterRun getCharacterRun(int index) {
784
    public CharacterRun getCharacterRun( int index )
785
		initCharacterRuns();
785
    {
786
		CHPX chpx = _characters.get(index + _charStart);
786
        initCharacterRuns();
787
        
787
        CHPX chpx = _characters.get( index + _charStart );
788
        if (chpx == null) {
788
        return getCharacterRun( chpx );
789
    }
790
791
    private CharacterRun getCharacterRun( CHPX chpx )
792
    {
793
        if ( chpx == null )
794
        {
789
            return null;
795
            return null;
790
        }
796
        }
791
797
792
		int[] point = findRange(_paragraphs, _parStart, Math.max(chpx.getStart(), _start), chpx
798
        int[] point = findRange( _paragraphs, _parStart,
793
				.getEnd());
799
                Math.max( chpx.getStart(), _start ), chpx.getEnd() );
794
800
795
        if (point[0] >= _paragraphs.size()) {
801
        if ( point[0] >= _paragraphs.size() )
802
        {
796
            return null;
803
            return null;
797
        }
804
        }
798
805
799
		PAPX papx = _paragraphs.get(point[0]);
806
        PAPX papx = _paragraphs.get( point[0] );
800
		short istd = papx.getIstd();
807
        short istd = papx.getIstd();
801
808
802
		CharacterRun chp = new CharacterRun(chpx, _doc.getStyleSheet(), istd, this);
809
        CharacterRun chp = new CharacterRun( chpx, _doc.getStyleSheet(), istd,
810
                this );
803
811
804
		return chp;
812
        return chp;
805
	}
813
    }
806
814
807
	/**
815
	/**
808
	 * Gets the section at index. The index is relative to this range.
816
	 * Gets the section at index. The index is relative to this range.
Lines 1077-1086 Link Here
1077
		}
1085
		}
1078
	}
1086
	}
1079
1087
1088
	/**
1089
	 * @return Starting character offset of the range
1090
	 */
1080
	public int getStartOffset() {
1091
	public int getStartOffset() {
1081
		return _start;
1092
		return _start;
1082
	}
1093
	}
1083
1094
1095
	/**
1096
	 * @return The ending character offset of this range
1097
	 */
1084
	public int getEndOffset() {
1098
	public int getEndOffset() {
1085
		return _end;
1099
		return _end;
1086
	}
1100
	}
(-)src/org/apache/poi/hwpf/extractor/AbstractWordUtils.java (+406 lines)
Line 0 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import java.io.Closeable;
22
import java.io.File;
23
import java.io.FileInputStream;
24
import java.io.IOException;
25
import java.io.InputStream;
26
import java.lang.reflect.Constructor;
27
import java.lang.reflect.Field;
28
import java.lang.reflect.Method;
29
import java.util.ArrayList;
30
import java.util.List;
31
32
import org.apache.poi.hwpf.HWPFDocument;
33
import org.apache.poi.hwpf.HWPFDocumentCore;
34
import org.apache.poi.hwpf.HWPFOldDocument;
35
import org.apache.poi.hwpf.OldWordFileFormatException;
36
import org.apache.poi.hwpf.model.CHPX;
37
import org.apache.poi.hwpf.model.ListLevel;
38
import org.apache.poi.hwpf.model.ListTables;
39
import org.apache.poi.hwpf.usermodel.BorderCode;
40
import org.apache.poi.hwpf.usermodel.CharacterRun;
41
import org.apache.poi.hwpf.usermodel.Paragraph;
42
import org.apache.poi.hwpf.usermodel.Range;
43
import org.apache.poi.hwpf.usermodel.Section;
44
import org.apache.poi.hwpf.usermodel.SectionProperties;
45
import org.apache.poi.hwpf.usermodel.TableIterator;
46
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
47
import org.apache.poi.util.POILogFactory;
48
import org.apache.poi.util.POILogger;
49
50
public class AbstractWordUtils
51
{
52
    static final String EMPTY = "";
53
54
    private static final POILogger logger = POILogFactory
55
            .getLogger( AbstractWordUtils.class );
56
57
    public static final float TWIPS_PER_INCH = 1440.0f;
58
    public static final int TWIPS_PER_PT = 20;
59
60
    static void closeQuietly( final Closeable closeable )
61
    {
62
        try
63
        {
64
            closeable.close();
65
        }
66
        catch ( Exception exc )
67
        {
68
            logger.log( POILogger.ERROR, "Unable to close resource: " + exc,
69
                    exc );
70
        }
71
    }
72
73
    static boolean equals( String str1, String str2 )
74
    {
75
        return str1 == null ? str2 == null : str1.equals( str2 );
76
    }
77
78
    // XXX incorporate into Range
79
    static List<CharacterRun> findCharacterRuns( Range range )
80
    {
81
        final int min = range.getStartOffset();
82
        final int max = range.getEndOffset();
83
84
        List<CharacterRun> result = new ArrayList<CharacterRun>();
85
        List<CHPX> chpxs = getCharacters( range );
86
        for ( int i = 0; i < chpxs.size(); i++ )
87
        {
88
            CHPX chpx = chpxs.get( i );
89
            if ( chpx == null )
90
                continue;
91
92
            if ( min <= chpx.getStart() && chpx.getEnd() <= max )
93
            {
94
                final CharacterRun characterRun = getCharacterRun( range, chpx );
95
96
                if ( characterRun == null )
97
                    continue;
98
99
                result.add( characterRun );
100
            }
101
        }
102
103
        return result;
104
    }
105
106
    public static String getBorderType( BorderCode borderCode )
107
    {
108
        if ( borderCode == null )
109
            throw new IllegalArgumentException( "borderCode is null" );
110
111
        switch ( borderCode.getBorderType() )
112
        {
113
        case 1:
114
        case 2:
115
            return "solid";
116
        case 3:
117
            return "double";
118
        case 5:
119
            return "solid";
120
        case 6:
121
            return "dotted";
122
        case 7:
123
        case 8:
124
            return "dashed";
125
        case 9:
126
            return "dotted";
127
        case 10:
128
        case 11:
129
        case 12:
130
        case 13:
131
        case 14:
132
        case 15:
133
        case 16:
134
        case 17:
135
        case 18:
136
        case 19:
137
            return "double";
138
        case 20:
139
            return "solid";
140
        case 21:
141
            return "double";
142
        case 22:
143
            return "dashed";
144
        case 23:
145
            return "dashed";
146
        case 24:
147
            return "ridge";
148
        case 25:
149
            return "grooved";
150
        default:
151
            return "solid";
152
        }
153
    }
154
155
    public static String getBorderWidth( BorderCode borderCode )
156
    {
157
        int lineWidth = borderCode.getLineWidth();
158
        int pt = lineWidth / 8;
159
        int pte = lineWidth - pt * 8;
160
161
        StringBuilder stringBuilder = new StringBuilder();
162
        stringBuilder.append( pt );
163
        stringBuilder.append( "." );
164
        stringBuilder.append( 1000 / 8 * pte );
165
        stringBuilder.append( "pt" );
166
        return stringBuilder.toString();
167
    }
168
169
    public static String getBulletText( ListTables listTables,
170
            Paragraph paragraph, int listId )
171
    {
172
        final ListLevel listLevel = listTables.getLevel( listId,
173
                paragraph.getIlvl() );
174
175
        if ( listLevel.getNumberText() == null )
176
            return EMPTY;
177
178
        StringBuffer bulletBuffer = new StringBuffer();
179
        char[] xst = listLevel.getNumberText().toCharArray();
180
        for ( char element : xst )
181
        {
182
            if ( element < 9 )
183
            {
184
                ListLevel numLevel = listTables.getLevel( listId, element );
185
186
                int num = numLevel.getStartAt();
187
                bulletBuffer.append( NumberFormatter.getNumber( num,
188
                        listLevel.getNumberFormat() ) );
189
190
                if ( numLevel == listLevel )
191
                {
192
                    numLevel.setStartAt( numLevel.getStartAt() + 1 );
193
                }
194
195
            }
196
            else
197
            {
198
                bulletBuffer.append( element );
199
            }
200
        }
201
202
        byte follow = getIxchFollow( listLevel );
203
        switch ( follow )
204
        {
205
        case 0:
206
            bulletBuffer.append( "\t" );
207
            break;
208
        case 1:
209
            bulletBuffer.append( " " );
210
            break;
211
        default:
212
            break;
213
        }
214
215
        return bulletBuffer.toString();
216
    }
217
218
    private static CharacterRun getCharacterRun( Range range, CHPX chpx )
219
    {
220
        try
221
        {
222
            Method method = Range.class.getDeclaredMethod( "getCharacterRun",
223
                    CHPX.class );
224
            method.setAccessible( true );
225
            return (CharacterRun) method.invoke( range, chpx );
226
        }
227
        catch ( Exception exc )
228
        {
229
            throw new Error( exc );
230
        }
231
    }
232
233
    @SuppressWarnings( "unchecked" )
234
    private static List<CHPX> getCharacters( Range range )
235
    {
236
        try
237
        {
238
            Field field = Range.class.getDeclaredField( "_characters" );
239
            field.setAccessible( true );
240
            return (List<CHPX>) field.get( range );
241
        }
242
        catch ( Exception exc )
243
        {
244
            throw new Error( exc );
245
        }
246
    }
247
248
    public static String getColor( int ico )
249
    {
250
        switch ( ico )
251
        {
252
        case 1:
253
            return "black";
254
        case 2:
255
            return "blue";
256
        case 3:
257
            return "cyan";
258
        case 4:
259
            return "green";
260
        case 5:
261
            return "magenta";
262
        case 6:
263
            return "red";
264
        case 7:
265
            return "yellow";
266
        case 8:
267
            return "white";
268
        case 9:
269
            return "darkblue";
270
        case 10:
271
            return "darkcyan";
272
        case 11:
273
            return "darkgreen";
274
        case 12:
275
            return "darkmagenta";
276
        case 13:
277
            return "darkred";
278
        case 14:
279
            return "darkyellow";
280
        case 15:
281
            return "darkgray";
282
        case 16:
283
            return "lightgray";
284
        default:
285
            return "black";
286
        }
287
    }
288
289
    public static byte getIxchFollow( ListLevel listLevel )
290
    {
291
        try
292
        {
293
            Field field = ListLevel.class.getDeclaredField( "_ixchFollow" );
294
            field.setAccessible( true );
295
            return ((Byte) field.get( listLevel )).byteValue();
296
        }
297
        catch ( Exception exc )
298
        {
299
            throw new Error( exc );
300
        }
301
    }
302
303
    public static String getJustification( int js )
304
    {
305
        switch ( js )
306
        {
307
        case 0:
308
            return "start";
309
        case 1:
310
            return "center";
311
        case 2:
312
            return "end";
313
        case 3:
314
        case 4:
315
            return "justify";
316
        case 5:
317
            return "center";
318
        case 6:
319
            return "left";
320
        case 7:
321
            return "start";
322
        case 8:
323
            return "end";
324
        case 9:
325
            return "justify";
326
        }
327
        return "";
328
    }
329
330
    public static String getListItemNumberLabel( int number, int format )
331
    {
332
333
        if ( format != 0 )
334
            System.err.println( "NYI: toListItemNumberLabel(): " + format );
335
336
        return String.valueOf( number );
337
    }
338
339
    public static SectionProperties getSectionProperties( Section section )
340
    {
341
        try
342
        {
343
            Field field = Section.class.getDeclaredField( "_props" );
344
            field.setAccessible( true );
345
            return (SectionProperties) field.get( section );
346
        }
347
        catch ( Exception exc )
348
        {
349
            throw new Error( exc );
350
        }
351
    }
352
353
    static boolean isEmpty( String str )
354
    {
355
        return str == null || str.length() == 0;
356
    }
357
358
    static boolean isNotEmpty( String str )
359
    {
360
        return !isEmpty( str );
361
    }
362
363
    public static HWPFDocumentCore loadDoc( File docFile ) throws IOException
364
    {
365
        final FileInputStream istream = new FileInputStream( docFile );
366
        try
367
        {
368
            return loadDoc( istream );
369
        }
370
        finally
371
        {
372
            closeQuietly( istream );
373
        }
374
    }
375
376
    public static HWPFDocumentCore loadDoc( InputStream inputStream )
377
            throws IOException
378
    {
379
        final POIFSFileSystem poifsFileSystem = HWPFDocumentCore
380
                .verifyAndBuildPOIFS( inputStream );
381
        try
382
        {
383
            return new HWPFDocument( poifsFileSystem );
384
        }
385
        catch ( OldWordFileFormatException exc )
386
        {
387
            return new HWPFOldDocument( poifsFileSystem );
388
        }
389
    }
390
391
    public static TableIterator newTableIterator( Range range, int level )
392
    {
393
        try
394
        {
395
            Constructor<TableIterator> constructor = TableIterator.class
396
                    .getDeclaredConstructor( Range.class, int.class );
397
            constructor.setAccessible( true );
398
            return constructor.newInstance( range, Integer.valueOf( level ) );
399
        }
400
        catch ( Exception exc )
401
        {
402
            throw new Error( exc );
403
        }
404
    }
405
406
}
0
  + text/plain
407
  + text/plain
(-)src/org/apache/poi/hwpf/extractor/WordToHtmlUtils.java (+294 lines)
Line 0 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import org.apache.poi.hwpf.usermodel.BorderCode;
22
import org.apache.poi.hwpf.usermodel.CharacterProperties;
23
import org.apache.poi.hwpf.usermodel.CharacterRun;
24
import org.apache.poi.hwpf.usermodel.Paragraph;
25
import org.apache.poi.hwpf.usermodel.Picture;
26
import org.apache.poi.hwpf.usermodel.TableCell;
27
import org.apache.poi.hwpf.usermodel.TableRow;
28
import org.w3c.dom.Element;
29
30
public class WordToHtmlUtils extends AbstractWordUtils
31
{
32
    public static void addBold( final boolean bold, StringBuilder style )
33
    {
34
        style.append( "font-weight: " + (bold ? "bold" : "normal") + ";" );
35
    }
36
37
    public static void addBorder( BorderCode borderCode, String where,
38
            StringBuilder style )
39
    {
40
        if ( borderCode == null || borderCode.getBorderType() == 0 )
41
            return;
42
43
        if ( isEmpty( where ) )
44
        {
45
            style.append( "border-style: " + getBorderType( borderCode ) + "; " );
46
            style.append( "border-color: " + getColor( borderCode.getColor() )
47
                    + "; " );
48
            style.append( "border-width: " + getBorderWidth( borderCode )
49
                    + "; " );
50
        }
51
        else
52
        {
53
            style.append( "border-" + where + "-style: "
54
                    + getBorderType( borderCode ) + "; " );
55
            style.append( "border-" + where + "-color: "
56
                    + getColor( borderCode.getColor() ) + "; " );
57
            style.append( "border-" + where + "-width: "
58
                    + getBorderWidth( borderCode ) + "; " );
59
        }
60
    }
61
62
    public static void addCharactersProperties(
63
            final CharacterRun characterRun, StringBuilder style )
64
    {
65
        final CharacterProperties clonedProperties = characterRun
66
                .cloneProperties();
67
68
        if ( characterRun.isBold() )
69
        {
70
            style.append( "font-weight: bold; " );
71
        }
72
        if ( characterRun.isItalic() )
73
        {
74
            style.append( "font-style: italic; " );
75
        }
76
77
        addBorder( clonedProperties.getBrc(), EMPTY, style );
78
79
        if ( characterRun.isCapitalized() )
80
        {
81
            style.append( "text-transform: uppercase; " );
82
        }
83
        if ( characterRun.isHighlighted() )
84
        {
85
            style.append( "background-color: "
86
                    + getColor( clonedProperties.getIcoHighlight() ) + "; " );
87
        }
88
        if ( characterRun.isStrikeThrough() )
89
        {
90
            style.append( "text-decoration: line-through; " );
91
        }
92
        if ( characterRun.isShadowed() )
93
        {
94
            style.append( "text-shadow: " + characterRun.getFontSize() / 24
95
                    + "pt; " );
96
        }
97
        if ( characterRun.isSmallCaps() )
98
        {
99
            style.append( "font-variant: small-caps; " );
100
        }
101
        if ( characterRun.getSubSuperScriptIndex() == 1 )
102
        {
103
            style.append( "baseline-shift: super; " );
104
            style.append( "font-size: smaller; " );
105
        }
106
        if ( characterRun.getSubSuperScriptIndex() == 2 )
107
        {
108
            style.append( "baseline-shift: sub; " );
109
            style.append( "font-size: smaller; " );
110
        }
111
        if ( characterRun.getUnderlineCode() > 0 )
112
        {
113
            style.append( "text-decoration: underline; " );
114
        }
115
        if ( characterRun.isVanished() )
116
        {
117
            style.append( "visibility: hidden; " );
118
        }
119
    }
120
121
    public static void addFontFamily( final String fontFamily,
122
            StringBuilder style )
123
    {
124
        if ( isEmpty( fontFamily ) )
125
            return;
126
127
        style.append( "font-family: " + fontFamily );
128
    }
129
130
    public static void addFontSize( final int fontSize, StringBuilder style )
131
    {
132
        style.append( "font-size: " + fontSize );
133
    }
134
135
    public static void addIndent( Paragraph paragraph, StringBuilder style )
136
    {
137
        addIndent( style, "text-indent", paragraph.getFirstLineIndent() );
138
        addIndent( style, "start-indent", paragraph.getIndentFromLeft() );
139
        addIndent( style, "end-indent", paragraph.getIndentFromRight() );
140
        addIndent( style, "space-before", paragraph.getSpacingBefore() );
141
        addIndent( style, "space-after", paragraph.getSpacingAfter() );
142
    }
143
144
    private static void addIndent( StringBuilder style, final String cssName,
145
            final int twipsValue )
146
    {
147
        if ( twipsValue == 0 )
148
            return;
149
150
        style.append( cssName + ": " + (twipsValue / TWIPS_PER_PT) + "pt; " );
151
    }
152
153
    public static void addJustification( Paragraph paragraph,
154
            final StringBuilder style )
155
    {
156
        String justification = getJustification( paragraph.getJustification() );
157
        if ( isNotEmpty( justification ) )
158
            style.append( "text-align: " + justification + "; " );
159
    }
160
161
    public static void addParagraphProperties( Paragraph paragraph,
162
            StringBuilder style )
163
    {
164
        addIndent( paragraph, style );
165
        addJustification( paragraph, style );
166
167
        addBorder( paragraph.getBottomBorder(), "bottom", style );
168
        addBorder( paragraph.getLeftBorder(), "left", style );
169
        addBorder( paragraph.getRightBorder(), "right", style );
170
        addBorder( paragraph.getTopBorder(), "top", style );
171
172
        if ( paragraph.pageBreakBefore() )
173
        {
174
            style.append( "break-before: page; " );
175
        }
176
177
        style.append( "hyphenate: " + paragraph.isAutoHyphenated() + "; " );
178
179
        if ( paragraph.keepOnPage() )
180
        {
181
            style.append( "keep-together.within-page: always; " );
182
        }
183
184
        if ( paragraph.keepWithNext() )
185
        {
186
            style.append( "keep-with-next.within-page: always; " );
187
        }
188
189
        style.append( "linefeed-treatment: preserve; " );
190
        style.append( "white-space-collapse: false; " );
191
    }
192
193
    public static void addTableCellProperties( TableRow tableRow,
194
            TableCell tableCell, boolean toppest, boolean bottomest,
195
            boolean leftest, boolean rightest, StringBuilder style )
196
    {
197
        style.append( "width: " + (tableCell.getWidth() / TWIPS_PER_INCH)
198
                + "in; " );
199
        style.append( "padding-start: "
200
                + (tableRow.getGapHalf() / TWIPS_PER_INCH) + "in; " );
201
        style.append( "padding-end: "
202
                + (tableRow.getGapHalf() / TWIPS_PER_INCH) + "in; " );
203
204
        BorderCode top = tableCell.getBrcTop() != null
205
                && tableCell.getBrcTop().getBorderType() != 0 ? tableCell
206
                .getBrcTop() : toppest ? tableRow.getTopBorder() : tableRow
207
                .getHorizontalBorder();
208
        BorderCode bottom = tableCell.getBrcBottom() != null
209
                && tableCell.getBrcBottom().getBorderType() != 0 ? tableCell
210
                .getBrcBottom() : bottomest ? tableRow.getBottomBorder()
211
                : tableRow.getHorizontalBorder();
212
213
        BorderCode left = tableCell.getBrcLeft() != null
214
                && tableCell.getBrcLeft().getBorderType() != 0 ? tableCell
215
                .getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
216
                .getVerticalBorder();
217
        BorderCode right = tableCell.getBrcRight() != null
218
                && tableCell.getBrcRight().getBorderType() != 0 ? tableCell
219
                .getBrcRight() : rightest ? tableRow.getRightBorder()
220
                : tableRow.getVerticalBorder();
221
222
        addBorder( bottom, "bottom", style );
223
        addBorder( left, "left", style );
224
        addBorder( right, "right", style );
225
        addBorder( top, "top", style );
226
    }
227
228
    public static void addTableRowProperties( TableRow tableRow,
229
            StringBuilder style )
230
    {
231
        if ( tableRow.getRowHeight() > 0 )
232
        {
233
            style.append( "height: "
234
                    + (tableRow.getRowHeight() / TWIPS_PER_INCH) + "in; " );
235
        }
236
        if ( !tableRow.cantSplit() )
237
        {
238
            style.append( "keep-together: always; " );
239
        }
240
    }
241
242
    public static void setPictureProperties( Picture picture,
243
            Element graphicElement )
244
    {
245
        final int aspectRatioX = picture.getAspectRatioX();
246
        final int aspectRatioY = picture.getAspectRatioY();
247
248
        if ( aspectRatioX > 0 )
249
        {
250
            graphicElement
251
                    .setAttribute( "content-width", ((picture.getDxaGoal()
252
                            * aspectRatioX / 100) / TWIPS_PER_PT)
253
                            + "pt" );
254
        }
255
        else
256
            graphicElement.setAttribute( "content-width",
257
                    (picture.getDxaGoal() / TWIPS_PER_PT) + "pt" );
258
259
        if ( aspectRatioY > 0 )
260
            graphicElement
261
                    .setAttribute( "content-height", ((picture.getDyaGoal()
262
                            * aspectRatioY / 100) / TWIPS_PER_PT)
263
                            + "pt" );
264
        else
265
            graphicElement.setAttribute( "content-height",
266
                    (picture.getDyaGoal() / TWIPS_PER_PT) + "pt" );
267
268
        if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
269
        {
270
            graphicElement.setAttribute( "scaling", "uniform" );
271
        }
272
        else
273
        {
274
            graphicElement.setAttribute( "scaling", "non-uniform" );
275
        }
276
277
        graphicElement.setAttribute( "vertical-align", "text-bottom" );
278
279
        if ( picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
280
                || picture.getDyaCropBottom() != 0
281
                || picture.getDxaCropLeft() != 0 )
282
        {
283
            int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
284
            int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
285
            int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
286
            int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
287
            graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
288
                    + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
289
                    + "pt)" );
290
            graphicElement.setAttribute( "oveerflow", "hidden" );
291
        }
292
    }
293
294
}
0
  + text/plain
295
  + text/plain
(-)src/org/apache/poi/hwpf/extractor/WordToHtmlExtractor.java (+477 lines)
Line 0 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import java.io.File;
22
import java.io.FileWriter;
23
import java.util.List;
24
import java.util.Stack;
25
26
import javax.xml.parsers.DocumentBuilderFactory;
27
import javax.xml.transform.OutputKeys;
28
import javax.xml.transform.Transformer;
29
import javax.xml.transform.TransformerFactory;
30
import javax.xml.transform.dom.DOMSource;
31
import javax.xml.transform.stream.StreamResult;
32
33
import org.apache.poi.hwpf.HWPFDocument;
34
import org.apache.poi.hwpf.HWPFDocumentCore;
35
import org.apache.poi.hwpf.usermodel.CharacterRun;
36
import org.apache.poi.hwpf.usermodel.Paragraph;
37
import org.apache.poi.hwpf.usermodel.Picture;
38
import org.apache.poi.hwpf.usermodel.Section;
39
import org.apache.poi.hwpf.usermodel.SectionProperties;
40
import org.apache.poi.hwpf.usermodel.Table;
41
import org.apache.poi.hwpf.usermodel.TableCell;
42
import org.apache.poi.hwpf.usermodel.TableRow;
43
import org.apache.poi.util.POILogFactory;
44
import org.apache.poi.util.POILogger;
45
import org.w3c.dom.Document;
46
import org.w3c.dom.Element;
47
import org.w3c.dom.Text;
48
49
import static org.apache.poi.hwpf.extractor.AbstractWordUtils.TWIPS_PER_INCH;
50
51
/**
52
 * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
53
 */
54
public class WordToHtmlExtractor extends AbstractWordExtractor
55
{
56
57
    /**
58
     * Holds properties values, applied to current <tt>p</tt> element. Those
59
     * properties shall not be doubled in children <tt>span</tt> elements.
60
     */
61
    private static class BlockProperies
62
    {
63
        final String pFontName;
64
        final int pFontSize;
65
66
        public BlockProperies( String pFontName, int pFontSize )
67
        {
68
            this.pFontName = pFontName;
69
            this.pFontSize = pFontSize;
70
        }
71
    }
72
73
    private static final POILogger logger = POILogFactory
74
            .getLogger( WordToHtmlExtractor.class );
75
76
    private static String getSectionStyle( Section section )
77
    {
78
        SectionProperties sep = WordToHtmlUtils.getSectionProperties( section );
79
80
        float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH;
81
        float rightMargin = sep.getDxaRight() / TWIPS_PER_INCH;
82
        float topMargin = sep.getDyaTop() / TWIPS_PER_INCH;
83
        float bottomMargin = sep.getDyaBottom() / TWIPS_PER_INCH;
84
85
        String style = "margin: " + topMargin + "in " + rightMargin + "in "
86
                + bottomMargin + "in " + leftMargin + "in; ";
87
88
        if ( sep.getCcolM1() > 0 )
89
        {
90
            style += "column-count: " + (sep.getCcolM1() + 1) + "; ";
91
            if ( sep.getFEvenlySpaced() )
92
            {
93
                style += "column-gap: "
94
                        + (sep.getDxaColumns() / TWIPS_PER_INCH) + "in; ";
95
            }
96
            else
97
            {
98
                style += "column-gap: 0.25in; ";
99
            }
100
        }
101
        return style;
102
    }
103
104
    /**
105
     * Java main() interface to interact with WordToHtmlExtractor
106
     * 
107
     * <p>
108
     * Usage: WordToHtmlExtractor infile outfile
109
     * </p>
110
     * Where infile is an input .doc file ( Word 95-2007) which will be rendered
111
     * as HTML into outfile
112
     */
113
    public static void main( String[] args )
114
    {
115
        if ( args.length < 2 )
116
        {
117
            System.err
118
                    .println( "Usage: WordToHtmlExtractor <inputFile.doc> <saveTo.html>" );
119
            return;
120
        }
121
122
        System.out.println( "Converting " + args[0] );
123
        System.out.println( "Saving output to " + args[1] );
124
        try
125
        {
126
            Document doc = WordToHtmlExtractor.process( new File( args[0] ) );
127
128
            FileWriter out = new FileWriter( args[1] );
129
            DOMSource domSource = new DOMSource( doc );
130
            StreamResult streamResult = new StreamResult( out );
131
132
            TransformerFactory tf = TransformerFactory.newInstance();
133
            Transformer serializer = tf.newTransformer();
134
            // TODO set encoding from a command argument
135
            serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
136
            serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
137
            serializer.setOutputProperty( OutputKeys.METHOD, "html" );
138
            serializer.transform( domSource, streamResult );
139
            out.close();
140
        }
141
        catch ( Exception e )
142
        {
143
            e.printStackTrace();
144
        }
145
    }
146
147
    static Document process( File docFile ) throws Exception
148
    {
149
        final HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc( docFile );
150
        WordToHtmlExtractor wordToHtmlExtractor = new WordToHtmlExtractor(
151
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
152
                        .newDocument() );
153
        wordToHtmlExtractor.processDocument( wordDocument );
154
        return wordToHtmlExtractor.getDocument();
155
    }
156
157
    private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
158
159
    private final HtmlDocumentFacade htmlDocumentFacade;
160
161
    /**
162
     * Creates new instance of {@link WordToHtmlExtractor}. Can be used for
163
     * output several {@link HWPFDocument}s into single HTML document.
164
     * 
165
     * @param document
166
     *            XML DOM Document used as HTML document
167
     */
168
    public WordToHtmlExtractor( Document document )
169
    {
170
        this.htmlDocumentFacade = new HtmlDocumentFacade( document );
171
    }
172
173
    public Document getDocument()
174
    {
175
        return htmlDocumentFacade.getDocument();
176
    }
177
178
    @Override
179
    protected void outputCharacters( Element pElement,
180
            CharacterRun characterRun, String text )
181
    {
182
        Element span = htmlDocumentFacade.document.createElement( "span" );
183
        pElement.appendChild( span );
184
185
        StringBuilder style = new StringBuilder();
186
        BlockProperies blockProperies = this.blocksProperies.peek();
187
        if ( characterRun.getFontName() != null
188
                && !WordToHtmlUtils.equals( characterRun.getFontName(),
189
                        blockProperies.pFontName ) )
190
        {
191
            style.append( "font-family: " + characterRun.getFontName() + "; " );
192
        }
193
        if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
194
        {
195
            style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " );
196
        }
197
198
        WordToHtmlUtils.addCharactersProperties( characterRun, style );
199
        if ( style.length() != 0 )
200
            span.setAttribute( "style", style.toString() );
201
202
        Text textNode = htmlDocumentFacade.createText( text );
203
        span.appendChild( textNode );
204
    }
205
206
    protected void processHyperlink( HWPFDocumentCore wordDocument,
207
            Element currentBlock, Paragraph paragraph,
208
            List<CharacterRun> characterRuns, int currentTableLevel,
209
            String hyperlink, int beginTextInclusive, int endTextExclusive )
210
    {
211
        Element basicLink = htmlDocumentFacade.createHyperlink( hyperlink );
212
        currentBlock.appendChild( basicLink );
213
214
        if ( beginTextInclusive < endTextExclusive )
215
            processCharacters( wordDocument, currentTableLevel, paragraph,
216
                    basicLink, characterRuns, beginTextInclusive,
217
                    endTextExclusive );
218
    }
219
220
    /**
221
     * This method shall store image bytes in external file and convert it if
222
     * necessary. Images shall be stored using PNG format. Other formats may be
223
     * not supported by user browser.
224
     * <p>
225
     * Please note the
226
     * {@link WordToHtmlUtils#setPictureProperties(Picture, Element)} method.
227
     * 
228
     * @param currentBlock
229
     *            currently processed HTML element, like <tt>p</tt>. Shall be
230
     *            used as parent of newly created <tt>img</tt>
231
     * @param inlined
232
     *            if image is inlined
233
     * @param picture
234
     *            HWPF object, contained picture data and properties
235
     */
236
    protected void processImage( Element currentBlock, boolean inlined,
237
            Picture picture )
238
    {
239
        // no default implementation -- skip
240
        currentBlock.appendChild( htmlDocumentFacade.document
241
                .createComment( "Image link to '"
242
                        + picture.suggestFullFileName() + "' can be here" ) );
243
    }
244
245
    protected void processPageref( HWPFDocumentCore hwpfDocument,
246
            Element currentBlock, Paragraph paragraph,
247
            List<CharacterRun> characterRuns, int currentTableLevel,
248
            String pageref, int beginTextInclusive, int endTextExclusive )
249
    {
250
        Element basicLink = htmlDocumentFacade.createHyperlink( "#" + pageref );
251
        currentBlock.appendChild( basicLink );
252
253
        if ( beginTextInclusive < endTextExclusive )
254
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
255
                    basicLink, characterRuns, beginTextInclusive,
256
                    endTextExclusive );
257
    }
258
259
    protected void processParagraph( HWPFDocumentCore hwpfDocument,
260
            Element parentFopElement, int currentTableLevel,
261
            Paragraph paragraph, String bulletText )
262
    {
263
        final Element pElement = htmlDocumentFacade.createParagraph();
264
        parentFopElement.appendChild( pElement );
265
266
        StringBuilder style = new StringBuilder();
267
        WordToHtmlUtils.addParagraphProperties( paragraph, style );
268
269
        final int charRuns = paragraph.numCharacterRuns();
270
271
        if ( charRuns == 0 )
272
        {
273
            return;
274
        }
275
276
        {
277
            final String pFontName;
278
            final int pFontSize;
279
            final CharacterRun characterRun = paragraph.getCharacterRun( 0 );
280
            if ( characterRun != null )
281
            {
282
                pFontSize = characterRun.getFontSize() / 2;
283
                pFontName = characterRun.getFontName();
284
                WordToHtmlUtils.addFontFamily( pFontName, style );
285
                WordToHtmlUtils.addFontSize( pFontSize, style );
286
            }
287
            else
288
            {
289
                pFontSize = -1;
290
                pFontName = WordToHtmlUtils.EMPTY;
291
            }
292
            blocksProperies.push( new BlockProperies( pFontName, pFontSize ) );
293
        }
294
        try
295
        {
296
            if ( WordToHtmlUtils.isNotEmpty( bulletText ) )
297
            {
298
                Text textNode = htmlDocumentFacade.createText( bulletText );
299
                pElement.appendChild( textNode );
300
            }
301
302
            List<CharacterRun> characterRuns = WordToHtmlUtils
303
                    .findCharacterRuns( paragraph );
304
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
305
                    pElement, characterRuns, 0, characterRuns.size() );
306
        }
307
        finally
308
        {
309
            blocksProperies.pop();
310
        }
311
312
        if ( style.length() > 0 )
313
            pElement.setAttribute( "style", style.toString() );
314
315
        return;
316
    }
317
318
    protected void processSection( HWPFDocumentCore wordDocument,
319
            Section section, int sectionCounter )
320
    {
321
        Element div = htmlDocumentFacade.document.createElement( "div" );
322
        div.setAttribute( "style", getSectionStyle( section ) );
323
        htmlDocumentFacade.body.appendChild( div );
324
325
        processSectionParagraphes( wordDocument, div, section, 0 );
326
    }
327
328
    @Override
329
    protected void processSingleSection( HWPFDocumentCore wordDocument,
330
            Section section )
331
    {
332
        htmlDocumentFacade.body.setAttribute( "style",
333
                getSectionStyle( section ) );
334
335
        processSectionParagraphes( wordDocument, htmlDocumentFacade.body,
336
                section, 0 );
337
    }
338
339
    protected void processTable( HWPFDocumentCore hwpfDocument, Element flow,
340
            Table table, int thisTableLevel )
341
    {
342
        Element tableHeader = htmlDocumentFacade.createTableHeader();
343
        Element tableBody = htmlDocumentFacade.createTableBody();
344
345
        final int tableRows = table.numRows();
346
347
        int maxColumns = Integer.MIN_VALUE;
348
        for ( int r = 0; r < tableRows; r++ )
349
        {
350
            maxColumns = Math.max( maxColumns, table.getRow( r ).numCells() );
351
        }
352
353
        for ( int r = 0; r < tableRows; r++ )
354
        {
355
            TableRow tableRow = table.getRow( r );
356
357
            Element tableRowElement = htmlDocumentFacade.createTableRow();
358
            StringBuilder tableRowStyle = new StringBuilder();
359
            WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle );
360
361
            final int rowCells = tableRow.numCells();
362
            for ( int c = 0; c < rowCells; c++ )
363
            {
364
                TableCell tableCell = tableRow.getCell( c );
365
366
                if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
367
                    continue;
368
369
                if ( tableCell.isVerticallyMerged()
370
                        && !tableCell.isFirstVerticallyMerged() )
371
                    continue;
372
373
                Element tableCellElement;
374
                if ( tableRow.isTableHeader() )
375
                {
376
                    tableCellElement = htmlDocumentFacade
377
                            .createTableHeaderCell();
378
                }
379
                else
380
                {
381
                    tableCellElement = htmlDocumentFacade.createTableCell();
382
                }
383
                StringBuilder tableCellStyle = new StringBuilder();
384
                WordToHtmlUtils.addTableCellProperties( tableRow, tableCell,
385
                        r == 0, r == tableRows - 1, c == 0, c == rowCells - 1,
386
                        tableCellStyle );
387
388
                if ( tableCell.isFirstMerged() )
389
                {
390
                    int count = 0;
391
                    for ( int c1 = c; c1 < rowCells; c1++ )
392
                    {
393
                        TableCell nextCell = tableRow.getCell( c1 );
394
                        if ( nextCell.isMerged() )
395
                            count++;
396
                        if ( !nextCell.isMerged() )
397
                            break;
398
                    }
399
                    tableCellElement.setAttribute( "colspan", "" + count );
400
                }
401
                else
402
                {
403
                    if ( c == rowCells - 1 && c != maxColumns - 1 )
404
                    {
405
                        tableCellElement.setAttribute( "colspan", ""
406
                                + (maxColumns - c) );
407
                    }
408
                }
409
410
                if ( tableCell.isFirstVerticallyMerged() )
411
                {
412
                    int count = 0;
413
                    for ( int r1 = r; r1 < tableRows; r1++ )
414
                    {
415
                        TableRow nextRow = table.getRow( r1 );
416
                        if ( nextRow.numCells() < c )
417
                            break;
418
                        TableCell nextCell = nextRow.getCell( c );
419
                        if ( nextCell.isVerticallyMerged() )
420
                            count++;
421
                        if ( !nextCell.isVerticallyMerged() )
422
                            break;
423
                    }
424
                    tableCellElement.setAttribute( "rowspan", "" + count );
425
                }
426
427
                processSectionParagraphes( hwpfDocument, tableCellElement,
428
                        tableCell, thisTableLevel );
429
430
                if ( !tableCellElement.hasChildNodes() )
431
                {
432
                    tableCellElement.appendChild( htmlDocumentFacade
433
                            .createParagraph() );
434
                }
435
                if ( tableCellStyle.length() > 0 )
436
                    tableCellElement.setAttribute( "style",
437
                            tableCellStyle.toString() );
438
439
                tableRowElement.appendChild( tableCellElement );
440
            }
441
442
            if ( tableRowStyle.length() > 0 )
443
                tableRowElement
444
                        .setAttribute( "style", tableRowStyle.toString() );
445
446
            if ( tableRow.isTableHeader() )
447
            {
448
                tableHeader.appendChild( tableRowElement );
449
            }
450
            else
451
            {
452
                tableBody.appendChild( tableRowElement );
453
            }
454
455
        }
456
457
        final Element tableElement = htmlDocumentFacade.createTable();
458
        if ( tableHeader.hasChildNodes() )
459
        {
460
            tableElement.appendChild( tableHeader );
461
        }
462
        if ( tableBody.hasChildNodes() )
463
        {
464
            tableElement.appendChild( tableBody );
465
            flow.appendChild( tableElement );
466
        }
467
        else
468
        {
469
            logger.log(
470
                    POILogger.WARN,
471
                    "Table without body starting on offset "
472
                            + table.getStartOffset() + " -- "
473
                            + table.getEndOffset() );
474
        }
475
    }
476
477
}
0
  + text/plain
478
  + text/plain
(-)src/org/apache/poi/hwpf/extractor/AbstractWordExtractor.java (+369 lines)
Line 0 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import java.util.HashMap;
22
import java.util.List;
23
import java.util.Map;
24
import java.util.regex.Matcher;
25
import java.util.regex.Pattern;
26
27
import org.apache.poi.hwpf.HWPFDocument;
28
import org.apache.poi.hwpf.HWPFDocumentCore;
29
import org.apache.poi.hwpf.model.ListFormatOverride;
30
import org.apache.poi.hwpf.model.ListTables;
31
import org.apache.poi.hwpf.usermodel.CharacterRun;
32
import org.apache.poi.hwpf.usermodel.Paragraph;
33
import org.apache.poi.hwpf.usermodel.Picture;
34
import org.apache.poi.hwpf.usermodel.Range;
35
import org.apache.poi.hwpf.usermodel.Section;
36
import org.apache.poi.hwpf.usermodel.Table;
37
import org.apache.poi.hwpf.usermodel.TableIterator;
38
import org.apache.poi.util.POILogFactory;
39
import org.apache.poi.util.POILogger;
40
import org.w3c.dom.Document;
41
import org.w3c.dom.Element;
42
43
public abstract class AbstractWordExtractor
44
{
45
    private static final byte BEL_MARK = 7;
46
47
    private static final byte FIELD_BEGIN_MARK = 19;
48
49
    private static final byte FIELD_END_MARK = 21;
50
51
    private static final byte FIELD_SEPARATOR_MARK = 20;
52
53
    private static final POILogger logger = POILogFactory
54
            .getLogger( AbstractWordExtractor.class );
55
56
    public abstract Document getDocument();
57
58
    protected abstract void outputCharacters( Element block,
59
            CharacterRun characterRun, String text );
60
61
    protected boolean processCharacters( HWPFDocumentCore hwpfDocument,
62
            int currentTableLevel, Paragraph paragraph, final Element block,
63
            List<CharacterRun> characterRuns, final int start, final int end )
64
    {
65
        boolean haveAnyText = false;
66
67
        for ( int c = start; c < end; c++ )
68
        {
69
            CharacterRun characterRun = characterRuns.get( c );
70
71
            if ( characterRun == null )
72
                throw new AssertionError();
73
74
            if ( hwpfDocument instanceof HWPFDocument
75
                    && ((HWPFDocument) hwpfDocument).getPicturesTable()
76
                            .hasPicture( characterRun ) )
77
            {
78
                HWPFDocument newFormat = (HWPFDocument) hwpfDocument;
79
                Picture picture = newFormat.getPicturesTable().extractPicture(
80
                        characterRun, true );
81
82
                processImage( block, characterRun.text().charAt( 0 ) == 0x01,
83
                        picture );
84
                continue;
85
            }
86
87
            String text = characterRun.text();
88
            if ( text.getBytes().length == 0 )
89
                continue;
90
91
            if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
92
            {
93
                int skipTo = tryField( hwpfDocument, paragraph,
94
                        currentTableLevel, characterRuns, c, block );
95
96
                if ( skipTo != c )
97
                {
98
                    c = skipTo;
99
                    continue;
100
                }
101
102
                continue;
103
            }
104
            if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
105
            {
106
                // shall not appear without FIELD_BEGIN_MARK
107
                continue;
108
            }
109
            if ( text.getBytes()[0] == FIELD_END_MARK )
110
            {
111
                // shall not appear without FIELD_BEGIN_MARK
112
                continue;
113
            }
114
115
            if ( characterRun.isSpecialCharacter() || characterRun.isObj()
116
                    || characterRun.isOle2() )
117
            {
118
                continue;
119
            }
120
121
            if ( text.endsWith( "\r" )
122
                    || (text.charAt( text.length() - 1 ) == BEL_MARK && currentTableLevel != 0) )
123
                text = text.substring( 0, text.length() - 1 );
124
125
            outputCharacters( block, characterRun, text );
126
127
            haveAnyText |= text.trim().length() != 0;
128
        }
129
130
        return haveAnyText;
131
    }
132
133
    public void processDocument( HWPFDocumentCore wordDocument )
134
    {
135
        final Range range = wordDocument.getRange();
136
        for ( int s = 0; s < range.numSections(); s++ )
137
        {
138
            processSection( wordDocument, range.getSection( s ), s );
139
        }
140
    }
141
142
    protected void processField( HWPFDocumentCore wordDocument,
143
            Element currentBlock, Paragraph paragraph, int currentTableLevel,
144
            List<CharacterRun> characterRuns, int beginMark, int separatorMark,
145
            int endMark )
146
    {
147
148
        Pattern hyperlinkPattern = Pattern
149
                .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
150
        Pattern pagerefPattern = Pattern
151
                .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
152
153
        if ( separatorMark - beginMark > 1 )
154
        {
155
            int index = beginMark + 1;
156
            CharacterRun firstAfterBegin = null;
157
            while ( index < separatorMark )
158
            {
159
                firstAfterBegin = paragraph.getCharacterRun( index );
160
                if ( firstAfterBegin == null )
161
                {
162
                    logger.log( POILogger.WARN,
163
                            "Paragraph " + paragraph.getStartOffset() + "--"
164
                                    + paragraph.getEndOffset()
165
                                    + " contains null CharacterRun #" + index );
166
                    index++;
167
                    continue;
168
                }
169
                break;
170
            }
171
172
            if ( firstAfterBegin != null )
173
            {
174
                final Matcher hyperlinkMatcher = hyperlinkPattern
175
                        .matcher( firstAfterBegin.text() );
176
                if ( hyperlinkMatcher.matches() )
177
                {
178
                    String hyperlink = hyperlinkMatcher.group( 1 );
179
                    processHyperlink( wordDocument, currentBlock, paragraph,
180
                            characterRuns, currentTableLevel, hyperlink,
181
                            separatorMark + 1, endMark );
182
                    return;
183
                }
184
185
                final Matcher pagerefMatcher = pagerefPattern
186
                        .matcher( firstAfterBegin.text() );
187
                if ( pagerefMatcher.matches() )
188
                {
189
                    String pageref = pagerefMatcher.group( 1 );
190
                    processPageref( wordDocument, currentBlock, paragraph,
191
                            characterRuns, currentTableLevel, pageref,
192
                            separatorMark + 1, endMark );
193
                    return;
194
                }
195
            }
196
        }
197
198
        StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
199
        for ( int i = beginMark; i <= endMark; i++ )
200
        {
201
            debug.append( "\t" );
202
            debug.append( paragraph.getCharacterRun( i ) );
203
            debug.append( "\n" );
204
        }
205
        logger.log( POILogger.WARN, debug );
206
207
        // just output field value
208
        if ( separatorMark + 1 < endMark )
209
            processCharacters( wordDocument, currentTableLevel, paragraph,
210
                    currentBlock, characterRuns, separatorMark + 1, endMark );
211
212
        return;
213
    }
214
215
    protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
216
            Element currentBlock, Paragraph paragraph,
217
            List<CharacterRun> characterRuns, int currentTableLevel,
218
            String hyperlink, int i, int endMark );
219
220
    protected abstract void processImage( Element currentBlock,
221
            boolean inlined, Picture picture );
222
223
    protected abstract void processPageref( HWPFDocumentCore wordDocument,
224
            Element currentBlock, Paragraph paragraph,
225
            List<CharacterRun> characterRuns, int currentTableLevel,
226
            String pageref, int beginTextInclusive, int endTextExclusive );
227
228
    protected abstract void processParagraph( HWPFDocumentCore wordDocument,
229
            Element parentFopElement, int currentTableLevel,
230
            Paragraph paragraph, String bulletText );
231
232
    protected abstract void processSection( HWPFDocumentCore wordDocument,
233
            Section section, int s );
234
235
    protected void processSectionParagraphes( HWPFDocumentCore wordDocument,
236
            Element flow, Range range, int currentTableLevel )
237
    {
238
        final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
239
        for ( TableIterator tableIterator = AbstractWordUtils
240
                .newTableIterator( range, currentTableLevel + 1 ); tableIterator
241
                .hasNext(); )
242
        {
243
            Table next = tableIterator.next();
244
            allTables.put( Integer.valueOf( next.getStartOffset() ), next );
245
        }
246
247
        final ListTables listTables = wordDocument.getListTables();
248
        int currentListInfo = 0;
249
250
        final int paragraphs = range.numParagraphs();
251
        for ( int p = 0; p < paragraphs; p++ )
252
        {
253
            Paragraph paragraph = range.getParagraph( p );
254
255
            if ( allTables.containsKey( Integer.valueOf( paragraph
256
                    .getStartOffset() ) ) )
257
            {
258
                Table table = allTables.get( Integer.valueOf( paragraph
259
                        .getStartOffset() ) );
260
                processTable( wordDocument, flow, table, currentTableLevel + 1 );
261
                continue;
262
            }
263
264
            if ( paragraph.isInTable()
265
                    && paragraph.getTableLevel() != currentTableLevel )
266
            {
267
                continue;
268
            }
269
270
            if ( paragraph.getIlfo() != currentListInfo )
271
            {
272
                currentListInfo = paragraph.getIlfo();
273
            }
274
275
            if ( currentListInfo != 0 )
276
            {
277
                if ( listTables != null )
278
                {
279
                    final ListFormatOverride listFormatOverride = listTables
280
                            .getOverride( paragraph.getIlfo() );
281
282
                    String label = AbstractWordUtils
283
                            .getBulletText( listTables, paragraph,
284
                                    listFormatOverride.getLsid() );
285
286
                    processParagraph( wordDocument, flow, currentTableLevel,
287
                            paragraph, label );
288
                }
289
                else
290
                {
291
                    logger.log( POILogger.WARN,
292
                            "Paragraph #" + paragraph.getStartOffset() + "-"
293
                                    + paragraph.getEndOffset()
294
                                    + " has reference to list structure #"
295
                                    + currentListInfo
296
                                    + ", but listTables not defined in file" );
297
298
                    processParagraph( wordDocument, flow, currentTableLevel,
299
                            paragraph, AbstractWordUtils.EMPTY );
300
                }
301
            }
302
            else
303
            {
304
                processParagraph( wordDocument, flow, currentTableLevel,
305
                        paragraph, AbstractWordUtils.EMPTY );
306
            }
307
        }
308
309
    }
310
311
    protected void processSingleSection( HWPFDocumentCore wordDocument,
312
            Section section )
313
    {
314
        processSection( wordDocument, section, 0 );
315
    }
316
317
    protected abstract void processTable( HWPFDocumentCore wordDocument,
318
            Element flow, Table table, int newTableLevel );
319
320
    protected int tryField( HWPFDocumentCore wordDocument, Paragraph paragraph,
321
            int currentTableLevel, List<CharacterRun> characterRuns,
322
            int beginMark, Element currentBlock )
323
    {
324
        int separatorMark = -1;
325
        int endMark = -1;
326
        for ( int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++ )
327
        {
328
            CharacterRun characterRun = paragraph.getCharacterRun( c );
329
330
            String text = characterRun.text();
331
            if ( text.getBytes().length == 0 )
332
                continue;
333
334
            if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
335
            {
336
                if ( separatorMark != -1 )
337
                {
338
                    // double;
339
                    return beginMark;
340
                }
341
342
                separatorMark = c;
343
                continue;
344
            }
345
346
            if ( text.getBytes()[0] == FIELD_END_MARK )
347
            {
348
                if ( endMark != -1 )
349
                {
350
                    // double;
351
                    return beginMark;
352
                }
353
354
                endMark = c;
355
                break;
356
            }
357
358
        }
359
360
        if ( separatorMark == -1 || endMark == -1 )
361
            return beginMark;
362
363
        processField( wordDocument, currentBlock, paragraph, currentTableLevel,
364
                characterRuns, beginMark, separatorMark, endMark );
365
366
        return endMark;
367
    }
368
369
}
0
  + text/plain
370
  + text/plain
(-)src/org/apache/poi/hwpf/extractor/FoDocumentFacade.java (-32 / +29 lines)
Lines 22-37 Link Here
22
import org.w3c.dom.Element;
22
import org.w3c.dom.Element;
23
import org.w3c.dom.Text;
23
import org.w3c.dom.Text;
24
24
25
public abstract class AbstractToFoExtractor
25
public class FoDocumentFacade
26
{
26
{
27
28
    private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
27
    private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
29
28
30
    protected final Document document;
29
    protected final Document document;
31
    protected final Element layoutMasterSet;
30
    protected final Element layoutMasterSet;
32
    protected final Element root;
31
    protected final Element root;
33
32
34
    public AbstractToFoExtractor( Document document )
33
    public FoDocumentFacade( Document document )
35
    {
34
    {
36
        this.document = document;
35
        this.document = document;
37
36
Lines 43-49 Link Here
43
        root.appendChild( layoutMasterSet );
42
        root.appendChild( layoutMasterSet );
44
    }
43
    }
45
44
46
    protected Element addFlowToPageSequence( final Element pageSequence,
45
    public Element addFlowToPageSequence( final Element pageSequence,
47
            String flowName )
46
            String flowName )
48
    {
47
    {
49
        final Element flow = document.createElementNS( NS_XSLFO, "fo:flow" );
48
        final Element flow = document.createElementNS( NS_XSLFO, "fo:flow" );
Lines 53-80 Link Here
53
        return flow;
52
        return flow;
54
    }
53
    }
55
54
56
    protected Element addListItem( Element listBlock )
55
    public Element addListItem( Element listBlock )
57
    {
56
    {
58
        Element result = createListItem();
57
        Element result = createListItem();
59
        listBlock.appendChild( result );
58
        listBlock.appendChild( result );
60
        return result;
59
        return result;
61
    }
60
    }
62
61
63
    protected Element addListItemBody( Element listItem )
62
    public Element addListItemBody( Element listItem )
64
    {
63
    {
65
        Element result = createListItemBody();
64
        Element result = createListItemBody();
66
        listItem.appendChild( result );
65
        listItem.appendChild( result );
67
        return result;
66
        return result;
68
    }
67
    }
69
68
70
    protected Element addListItemLabel( Element listItem, String text )
69
    public Element addListItemLabel( Element listItem, String text )
71
    {
70
    {
72
        Element result = createListItemLabel( text );
71
        Element result = createListItemLabel( text );
73
        listItem.appendChild( result );
72
        listItem.appendChild( result );
74
        return result;
73
        return result;
75
    }
74
    }
76
75
77
    protected Element addPageSequence( String pageMaster )
76
    public Element addPageSequence( String pageMaster )
78
    {
77
    {
79
        final Element pageSequence = document.createElementNS( NS_XSLFO,
78
        final Element pageSequence = document.createElementNS( NS_XSLFO,
80
                "fo:page-sequence" );
79
                "fo:page-sequence" );
Lines 83-89 Link Here
83
        return pageSequence;
82
        return pageSequence;
84
    }
83
    }
85
84
86
    protected Element addRegionBody( Element pageMaster )
85
    public Element addRegionBody( Element pageMaster )
87
    {
86
    {
88
        final Element regionBody = document.createElementNS( NS_XSLFO,
87
        final Element regionBody = document.createElementNS( NS_XSLFO,
89
                "fo:region-body" );
88
                "fo:region-body" );
Lines 92-98 Link Here
92
        return regionBody;
91
        return regionBody;
93
    }
92
    }
94
93
95
    protected Element addSimplePageMaster( String masterName )
94
    public Element addSimplePageMaster( String masterName )
96
    {
95
    {
97
        final Element simplePageMaster = document.createElementNS( NS_XSLFO,
96
        final Element simplePageMaster = document.createElementNS( NS_XSLFO,
98
                "fo:simple-page-master" );
97
                "fo:simple-page-master" );
Lines 102-115 Link Here
102
        return simplePageMaster;
101
        return simplePageMaster;
103
    }
102
    }
104
103
105
    protected Element addTable( Element flow )
104
    public Element createBasicLinkExternal( String externalDestination )
106
    {
107
        final Element table = document.createElementNS( NS_XSLFO, "fo:table" );
108
        flow.appendChild( table );
109
        return table;
110
    }
111
112
    protected Element createBasicLinkExternal( String externalDestination )
113
    {
105
    {
114
        final Element basicLink = document.createElementNS( NS_XSLFO,
106
        final Element basicLink = document.createElementNS( NS_XSLFO,
115
                "fo:basic-link" );
107
                "fo:basic-link" );
Lines 117-123 Link Here
117
        return basicLink;
109
        return basicLink;
118
    }
110
    }
119
111
120
    protected Element createBasicLinkInternal( String internalDestination )
112
    public Element createBasicLinkInternal( String internalDestination )
121
    {
113
    {
122
        final Element basicLink = document.createElementNS( NS_XSLFO,
114
        final Element basicLink = document.createElementNS( NS_XSLFO,
123
                "fo:basic-link" );
115
                "fo:basic-link" );
Lines 125-136 Link Here
125
        return basicLink;
117
        return basicLink;
126
    }
118
    }
127
119
128
    protected Element createBlock()
120
    public Element createBlock()
129
    {
121
    {
130
        return document.createElementNS( NS_XSLFO, "fo:block" );
122
        return document.createElementNS( NS_XSLFO, "fo:block" );
131
    }
123
    }
132
124
133
    protected Element createExternalGraphic( String source )
125
    public Element createExternalGraphic( String source )
134
    {
126
    {
135
        Element result = document.createElementNS( NS_XSLFO,
127
        Element result = document.createElementNS( NS_XSLFO,
136
                "fo:external-graphic" );
128
                "fo:external-graphic" );
Lines 138-169 Link Here
138
        return result;
130
        return result;
139
    }
131
    }
140
132
141
    protected Element createInline()
133
    public Element createInline()
142
    {
134
    {
143
        return document.createElementNS( NS_XSLFO, "fo:inline" );
135
        return document.createElementNS( NS_XSLFO, "fo:inline" );
144
    }
136
    }
145
137
146
    protected Element createLeader()
138
    public Element createLeader()
147
    {
139
    {
148
        return document.createElementNS( NS_XSLFO, "fo:leader" );
140
        return document.createElementNS( NS_XSLFO, "fo:leader" );
149
    }
141
    }
150
142
151
    protected Element createListBlock()
143
    public Element createListBlock()
152
    {
144
    {
153
        return document.createElementNS( NS_XSLFO, "fo:list-block" );
145
        return document.createElementNS( NS_XSLFO, "fo:list-block" );
154
    }
146
    }
155
147
156
    protected Element createListItem()
148
    public Element createListItem()
157
    {
149
    {
158
        return document.createElementNS( NS_XSLFO, "fo:list-item" );
150
        return document.createElementNS( NS_XSLFO, "fo:list-item" );
159
    }
151
    }
160
152
161
    protected Element createListItemBody()
153
    public Element createListItemBody()
162
    {
154
    {
163
        return document.createElementNS( NS_XSLFO, "fo:list-item-body" );
155
        return document.createElementNS( NS_XSLFO, "fo:list-item-body" );
164
    }
156
    }
165
157
166
    protected Element createListItemLabel( String text )
158
    public Element createListItemLabel( String text )
167
    {
159
    {
168
        Element result = document.createElementNS( NS_XSLFO,
160
        Element result = document.createElementNS( NS_XSLFO,
169
                "fo:list-item-label" );
161
                "fo:list-item-label" );
Lines 173-199 Link Here
173
        return result;
165
        return result;
174
    }
166
    }
175
167
176
    protected Element createTableBody()
168
    public Element createTable()
169
    {
170
        return document.createElementNS( NS_XSLFO, "fo:table" );
171
    }
172
173
    public Element createTableBody()
177
    {
174
    {
178
        return document.createElementNS( NS_XSLFO, "fo:table-body" );
175
        return document.createElementNS( NS_XSLFO, "fo:table-body" );
179
    }
176
    }
180
177
181
    protected Element createTableCell()
178
    public Element createTableCell()
182
    {
179
    {
183
        return document.createElementNS( NS_XSLFO, "fo:table-cell" );
180
        return document.createElementNS( NS_XSLFO, "fo:table-cell" );
184
    }
181
    }
185
182
186
    protected Element createTableHeader()
183
    public Element createTableHeader()
187
    {
184
    {
188
        return document.createElementNS( NS_XSLFO, "fo:table-header" );
185
        return document.createElementNS( NS_XSLFO, "fo:table-header" );
189
    }
186
    }
190
187
191
    protected Element createTableRow()
188
    public Element createTableRow()
192
    {
189
    {
193
        return document.createElementNS( NS_XSLFO, "fo:table-row" );
190
        return document.createElementNS( NS_XSLFO, "fo:table-row" );
194
    }
191
    }
195
192
196
    protected Text createText( String data )
193
    public Text createText( String data )
197
    {
194
    {
198
        return document.createTextNode( data );
195
        return document.createTextNode( data );
199
    }
196
    }
(-)src/org/apache/poi/hwpf/extractor/WordToFoUtils.java (-413 / +231 lines)
Lines 1-489 Link Here
1
package org.apache.poi.hwpf.extractor;
1
package org.apache.poi.hwpf.extractor;
2
2
3
import java.lang.reflect.Constructor;
4
import java.lang.reflect.Field;
5
6
import org.apache.poi.hwpf.model.ListLevel;
7
import org.apache.poi.hwpf.model.ListTables;
8
import org.apache.poi.hwpf.usermodel.BorderCode;
3
import org.apache.poi.hwpf.usermodel.BorderCode;
9
import org.apache.poi.hwpf.usermodel.CharacterProperties;
4
import org.apache.poi.hwpf.usermodel.CharacterProperties;
10
import org.apache.poi.hwpf.usermodel.CharacterRun;
5
import org.apache.poi.hwpf.usermodel.CharacterRun;
11
import org.apache.poi.hwpf.usermodel.Paragraph;
6
import org.apache.poi.hwpf.usermodel.Paragraph;
12
import org.apache.poi.hwpf.usermodel.Picture;
7
import org.apache.poi.hwpf.usermodel.Picture;
13
import org.apache.poi.hwpf.usermodel.Range;
14
import org.apache.poi.hwpf.usermodel.Section;
15
import org.apache.poi.hwpf.usermodel.SectionProperties;
16
import org.apache.poi.hwpf.usermodel.TableCell;
8
import org.apache.poi.hwpf.usermodel.TableCell;
17
import org.apache.poi.hwpf.usermodel.TableIterator;
18
import org.apache.poi.hwpf.usermodel.TableRow;
9
import org.apache.poi.hwpf.usermodel.TableRow;
19
import org.w3c.dom.Element;
10
import org.w3c.dom.Element;
20
11
21
public class WordToFoUtils {
12
public class WordToFoUtils extends AbstractWordUtils
22
    static final String EMPTY = "";
13
{
23
14
    public static void setBold( final Element element, final boolean bold )
24
    public static final float TWIPS_PER_INCH = 1440.0f;
15
    {
25
16
        element.setAttribute( "font-weight", bold ? "bold" : "normal" );
26
    public static final int TWIPS_PER_PT = 20;
27
28
    static boolean equals(String str1, String str2) {
29
	return str1 == null ? str2 == null : str1.equals(str2);
30
    }
31
32
    public static String getBorderType(BorderCode borderCode) {
33
	if (borderCode == null)
34
	    throw new IllegalArgumentException("borderCode is null");
35
36
	switch (borderCode.getBorderType()) {
37
	case 1:
38
	case 2:
39
	    return "solid";
40
	case 3:
41
	    return "double";
42
	case 5:
43
	    return "solid";
44
	case 6:
45
	    return "dotted";
46
	case 7:
47
	case 8:
48
	    return "dashed";
49
	case 9:
50
	    return "dotted";
51
	case 10:
52
	case 11:
53
	case 12:
54
	case 13:
55
	case 14:
56
	case 15:
57
	case 16:
58
	case 17:
59
	case 18:
60
	case 19:
61
	    return "double";
62
	case 20:
63
	    return "solid";
64
	case 21:
65
	    return "double";
66
	case 22:
67
	    return "dashed";
68
	case 23:
69
	    return "dashed";
70
	case 24:
71
	    return "ridge";
72
	case 25:
73
	    return "grooved";
74
	default:
75
	    return "solid";
76
	}
77
    }
17
    }
78
18
79
    public static String getBorderWidth(BorderCode borderCode) {
19
    public static void setBorder( Element element, BorderCode borderCode,
80
	int lineWidth = borderCode.getLineWidth();
20
            String where )
81
	int pt = lineWidth / 8;
21
    {
82
	int pte = lineWidth - pt * 8;
22
        if ( element == null )
23
            throw new IllegalArgumentException( "element is null" );
83
24
84
	StringBuilder stringBuilder = new StringBuilder();
25
        if ( borderCode == null || borderCode.getBorderType() == 0 )
85
	stringBuilder.append(pt);
26
            return;
86
	stringBuilder.append(".");
87
	stringBuilder.append(1000 / 8 * pte);
88
	stringBuilder.append("pt");
89
	return stringBuilder.toString();
90
    }
91
27
92
    public static String getBulletText(ListTables listTables,
28
        if ( isEmpty( where ) )
93
	    Paragraph paragraph, int listId) {
29
        {
94
	final ListLevel listLevel = listTables.getLevel(listId,
30
            element.setAttribute( "border-style", getBorderType( borderCode ) );
95
		paragraph.getIlvl());
31
            element.setAttribute( "border-color",
96
32
                    getColor( borderCode.getColor() ) );
97
	if (listLevel.getNumberText() == null)
33
            element.setAttribute( "border-width", getBorderWidth( borderCode ) );
98
	    return EMPTY;
99
100
	StringBuffer bulletBuffer = new StringBuffer();
101
	char[] xst = listLevel.getNumberText().toCharArray();
102
	for (char element : xst) {
103
	    if (element < 9) {
104
		ListLevel numLevel = listTables.getLevel(listId, element);
105
106
		int num = numLevel.getStartAt();
107
		bulletBuffer.append(NumberFormatter.getNumber(num,
108
			listLevel.getNumberFormat()));
109
110
		if (numLevel == listLevel) {
111
		    numLevel.setStartAt(numLevel.getStartAt() + 1);
112
		}
113
114
	    } else {
115
		bulletBuffer.append(element);
116
	    }
117
	}
118
119
	byte follow = getIxchFollow(listLevel);
120
	switch (follow) {
121
	case 0:
122
	    bulletBuffer.append("\t");
123
	    break;
124
	case 1:
125
	    bulletBuffer.append(" ");
126
	    break;
127
	default:
128
	    break;
129
	}
130
131
	return bulletBuffer.toString();
132
    }
133
134
    public static String getColor(int ico) {
135
	switch (ico) {
136
	case 1:
137
	    return "black";
138
	case 2:
139
	    return "blue";
140
	case 3:
141
	    return "cyan";
142
	case 4:
143
	    return "green";
144
	case 5:
145
	    return "magenta";
146
	case 6:
147
	    return "red";
148
	case 7:
149
	    return "yellow";
150
	case 8:
151
	    return "white";
152
	case 9:
153
	    return "darkblue";
154
	case 10:
155
	    return "darkcyan";
156
	case 11:
157
	    return "darkgreen";
158
	case 12:
159
	    return "darkmagenta";
160
	case 13:
161
	    return "darkred";
162
	case 14:
163
	    return "darkyellow";
164
	case 15:
165
	    return "darkgray";
166
	case 16:
167
	    return "lightgray";
168
	default:
169
	    return "black";
170
	}
171
    }
172
173
    public static byte getIxchFollow(ListLevel listLevel) {
174
	try {
175
	    Field field = ListLevel.class.getDeclaredField("_ixchFollow");
176
	    field.setAccessible(true);
177
	    return ((Byte) field.get(listLevel)).byteValue();
178
	} catch (Exception exc) {
179
	    throw new Error(exc);
180
	}
181
    }
182
183
    public static String getJustification(int js) {
184
        switch (js) {
185
        case 0:
186
            return "start";
187
        case 1:
188
            return "center";
189
        case 2:
190
            return "end";
191
        case 3:
192
        case 4:
193
            return "justify";
194
        case 5:
195
            return "center";
196
        case 6:
197
            return "left";
198
        case 7:
199
            return "start";
200
        case 8:
201
            return "end";
202
        case 9:
203
            return "justify";
204
        }
34
        }
205
        return "";
35
        else
206
    }
36
        {
207
37
            element.setAttribute( "border-" + where + "-style",
208
    public static String getListItemNumberLabel(int number, int format) {
38
                    getBorderType( borderCode ) );
209
39
            element.setAttribute( "border-" + where + "-color",
210
	if (format != 0)
40
                    getColor( borderCode.getColor() ) );
211
	    System.err.println("NYI: toListItemNumberLabel(): " + format);
41
            element.setAttribute( "border-" + where + "-width",
212
42
                    getBorderWidth( borderCode ) );
213
	return String.valueOf(number);
43
        }
214
    }
215
216
    public static SectionProperties getSectionProperties(Section section) {
217
	try {
218
	    Field field = Section.class.getDeclaredField("_props");
219
	    field.setAccessible(true);
220
	    return (SectionProperties) field.get(section);
221
	} catch (Exception exc) {
222
	    throw new Error(exc);
223
	}
224
    }
225
226
    static boolean isEmpty(String str) {
227
	return str == null || str.length() == 0;
228
    }
229
230
    static boolean isNotEmpty(String str) {
231
	return !isEmpty(str);
232
    }
233
234
    public static TableIterator newTableIterator(Range range, int level) {
235
	try {
236
	    Constructor<TableIterator> constructor = TableIterator.class
237
		    .getDeclaredConstructor(Range.class, int.class);
238
	    constructor.setAccessible(true);
239
	    return constructor.newInstance(range, Integer.valueOf(level));
240
	} catch (Exception exc) {
241
	    throw new Error(exc);
242
	}
243
    }
244
245
    public static void setBold(final Element element, final boolean bold) {
246
	element.setAttribute("font-weight", bold ? "bold" : "normal");
247
    }
248
249
    public static void setBorder(Element element, BorderCode borderCode,
250
	    String where) {
251
	if (element == null)
252
	    throw new IllegalArgumentException("element is null");
253
254
	if (borderCode == null)
255
	    return;
256
257
	if (isEmpty(where)) {
258
	    element.setAttribute("border-style", getBorderType(borderCode));
259
	    element.setAttribute("border-color",
260
		    getColor(borderCode.getColor()));
261
	    element.setAttribute("border-width", getBorderWidth(borderCode));
262
	} else {
263
	    element.setAttribute("border-" + where + "-style",
264
		    getBorderType(borderCode));
265
	    element.setAttribute("border-" + where + "-color",
266
		    getColor(borderCode.getColor()));
267
	    element.setAttribute("border-" + where + "-width",
268
		    getBorderWidth(borderCode));
269
	}
270
    }
44
    }
271
45
272
    public static void setCharactersProperties(final CharacterRun characterRun,
46
    public static void setCharactersProperties(
273
            final Element inline) {
47
            final CharacterRun characterRun, final Element inline )
48
    {
274
        final CharacterProperties clonedProperties = characterRun
49
        final CharacterProperties clonedProperties = characterRun
275
                .cloneProperties();
50
                .cloneProperties();
276
        StringBuilder textDecorations = new StringBuilder();
51
        StringBuilder textDecorations = new StringBuilder();
277
52
278
        setBorder(inline, clonedProperties.getBrc(), EMPTY);
53
        setBorder( inline, clonedProperties.getBrc(), EMPTY );
279
54
280
        if (characterRun.isCapitalized()) {
55
        if ( characterRun.isCapitalized() )
281
            inline.setAttribute("text-transform", "uppercase");
56
        {
57
            inline.setAttribute( "text-transform", "uppercase" );
282
        }
58
        }
283
        if (characterRun.isHighlighted()) {
59
        if ( characterRun.isHighlighted() )
284
            inline.setAttribute("background-color",
60
        {
285
                    getColor(clonedProperties.getIcoHighlight()));
61
            inline.setAttribute( "background-color",
62
                    getColor( clonedProperties.getIcoHighlight() ) );
286
        }
63
        }
287
        if (characterRun.isStrikeThrough()) {
64
        if ( characterRun.isStrikeThrough() )
288
            if (textDecorations.length() > 0)
65
        {
289
                textDecorations.append(" ");
66
            if ( textDecorations.length() > 0 )
290
            textDecorations.append("line-through");
67
                textDecorations.append( " " );
68
            textDecorations.append( "line-through" );
291
        }
69
        }
292
        if (characterRun.isShadowed()) {
70
        if ( characterRun.isShadowed() )
293
            inline.setAttribute("text-shadow", characterRun.getFontSize() / 24
71
        {
294
                    + "pt");
72
            inline.setAttribute( "text-shadow", characterRun.getFontSize() / 24
73
                    + "pt" );
295
        }
74
        }
296
        if (characterRun.isSmallCaps()) {
75
        if ( characterRun.isSmallCaps() )
297
            inline.setAttribute("font-variant", "small-caps");
76
        {
77
            inline.setAttribute( "font-variant", "small-caps" );
298
        }
78
        }
299
        if (characterRun.getSubSuperScriptIndex() == 1) {
79
        if ( characterRun.getSubSuperScriptIndex() == 1 )
300
            inline.setAttribute("baseline-shift", "super");
80
        {
301
            inline.setAttribute("font-size", "smaller");
81
            inline.setAttribute( "baseline-shift", "super" );
82
            inline.setAttribute( "font-size", "smaller" );
302
        }
83
        }
303
        if (characterRun.getSubSuperScriptIndex() == 2) {
84
        if ( characterRun.getSubSuperScriptIndex() == 2 )
304
            inline.setAttribute("baseline-shift", "sub");
85
        {
305
            inline.setAttribute("font-size", "smaller");
86
            inline.setAttribute( "baseline-shift", "sub" );
87
            inline.setAttribute( "font-size", "smaller" );
306
        }
88
        }
307
        if (characterRun.getUnderlineCode() > 0) {
89
        if ( characterRun.getUnderlineCode() > 0 )
308
            if (textDecorations.length() > 0)
90
        {
309
                textDecorations.append(" ");
91
            if ( textDecorations.length() > 0 )
310
            textDecorations.append("underline");
92
                textDecorations.append( " " );
93
            textDecorations.append( "underline" );
311
        }
94
        }
312
        if (characterRun.isVanished()) {
95
        if ( characterRun.isVanished() )
313
            inline.setAttribute("visibility", "hidden");
96
        {
97
            inline.setAttribute( "visibility", "hidden" );
314
        }
98
        }
315
        if (textDecorations.length() > 0) {
99
        if ( textDecorations.length() > 0 )
316
            inline.setAttribute("text-decoration", textDecorations.toString());
100
        {
101
            inline.setAttribute( "text-decoration", textDecorations.toString() );
317
        }
102
        }
318
    }
103
    }
319
104
320
    public static void setFontFamily(final Element element,
105
    public static void setFontFamily( final Element element,
321
	    final String fontFamily) {
106
            final String fontFamily )
322
	element.setAttribute("font-family", fontFamily);
107
    {
108
        if ( isEmpty( fontFamily ) )
109
            return;
110
111
        element.setAttribute( "font-family", fontFamily );
323
    }
112
    }
324
113
325
    public static void setFontSize(final Element element, final int fontSize) {
114
    public static void setFontSize( final Element element, final int fontSize )
326
	element.setAttribute("font-size", String.valueOf(fontSize));
115
    {
116
        element.setAttribute( "font-size", String.valueOf( fontSize ) );
327
    }
117
    }
328
118
329
    public static void setIndent(Paragraph paragraph, Element block) {
119
    public static void setIndent( Paragraph paragraph, Element block )
330
	if (paragraph.getFirstLineIndent() != 0) {
120
    {
331
	    block.setAttribute(
121
        if ( paragraph.getFirstLineIndent() != 0 )
332
		    "text-indent",
122
        {
333
		    String.valueOf(paragraph.getFirstLineIndent()
123
            block.setAttribute(
334
			    / TWIPS_PER_PT)
124
                    "text-indent",
335
			    + "pt");
125
                    String.valueOf( paragraph.getFirstLineIndent()
336
	}
126
                            / TWIPS_PER_PT )
337
	if (paragraph.getIndentFromLeft() != 0) {
127
                            + "pt" );
338
	    block.setAttribute(
128
        }
339
		    "start-indent",
129
        if ( paragraph.getIndentFromLeft() != 0 )
340
		    String.valueOf(paragraph.getIndentFromLeft() / TWIPS_PER_PT)
130
        {
341
			    + "pt");
131
            block.setAttribute(
342
	}
132
                    "start-indent",
343
	if (paragraph.getIndentFromRight() != 0) {
133
                    String.valueOf( paragraph.getIndentFromLeft()
344
	    block.setAttribute(
134
                            / TWIPS_PER_PT )
345
		    "end-indent",
135
                            + "pt" );
346
		    String.valueOf(paragraph.getIndentFromRight()
136
        }
347
			    / TWIPS_PER_PT)
137
        if ( paragraph.getIndentFromRight() != 0 )
348
			    + "pt");
138
        {
349
	}
139
            block.setAttribute(
350
	if (paragraph.getSpacingBefore() != 0) {
140
                    "end-indent",
351
	    block.setAttribute("space-before",
141
                    String.valueOf( paragraph.getIndentFromRight()
352
		    String.valueOf(paragraph.getSpacingBefore() / TWIPS_PER_PT)
142
                            / TWIPS_PER_PT )
353
			    + "pt");
143
                            + "pt" );
354
	}
144
        }
355
	if (paragraph.getSpacingAfter() != 0) {
145
        if ( paragraph.getSpacingBefore() != 0 )
356
	    block.setAttribute("space-after",
146
        {
357
		    String.valueOf(paragraph.getSpacingAfter() / TWIPS_PER_PT)
147
            block.setAttribute(
358
			    + "pt");
148
                    "space-before",
359
	}
149
                    String.valueOf( paragraph.getSpacingBefore() / TWIPS_PER_PT )
150
                            + "pt" );
151
        }
152
        if ( paragraph.getSpacingAfter() != 0 )
153
        {
154
            block.setAttribute( "space-after",
155
                    String.valueOf( paragraph.getSpacingAfter() / TWIPS_PER_PT )
156
                            + "pt" );
157
        }
360
    }
158
    }
361
159
362
    public static void setItalic(final Element element, final boolean italic) {
160
    public static void setItalic( final Element element, final boolean italic )
363
	element.setAttribute("font-style", italic ? "italic" : "normal");
161
    {
162
        element.setAttribute( "font-style", italic ? "italic" : "normal" );
364
    }
163
    }
365
164
366
    public static void setJustification(Paragraph paragraph,
165
    public static void setJustification( Paragraph paragraph,
367
            final Element element) {
166
            final Element element )
368
        String justification = getJustification(paragraph.getJustification());
167
    {
369
        if (isNotEmpty(justification))
168
        String justification = getJustification( paragraph.getJustification() );
370
            element.setAttribute("text-align", justification);
169
        if ( isNotEmpty( justification ) )
170
            element.setAttribute( "text-align", justification );
371
    }
171
    }
372
172
373
    public static void setParagraphProperties(Paragraph paragraph, Element block) {
173
    public static void setParagraphProperties( Paragraph paragraph,
374
	setIndent(paragraph, block);
174
            Element block )
375
	setJustification(paragraph, block);
175
    {
176
        setIndent( paragraph, block );
177
        setJustification( paragraph, block );
376
178
377
	setBorder(block, paragraph.getBottomBorder(), "bottom");
179
        setBorder( block, paragraph.getBottomBorder(), "bottom" );
378
	setBorder(block, paragraph.getLeftBorder(), "left");
180
        setBorder( block, paragraph.getLeftBorder(), "left" );
379
	setBorder(block, paragraph.getRightBorder(), "right");
181
        setBorder( block, paragraph.getRightBorder(), "right" );
380
	setBorder(block, paragraph.getTopBorder(), "top");
182
        setBorder( block, paragraph.getTopBorder(), "top" );
381
183
382
	if (paragraph.pageBreakBefore()) {
184
        if ( paragraph.pageBreakBefore() )
383
	    block.setAttribute("break-before", "page");
185
        {
384
	}
186
            block.setAttribute( "break-before", "page" );
187
        }
385
188
386
	block.setAttribute("hyphenate",
189
        block.setAttribute( "hyphenate",
387
		String.valueOf(paragraph.isAutoHyphenated()));
190
                String.valueOf( paragraph.isAutoHyphenated() ) );
388
191
389
	if (paragraph.keepOnPage()) {
192
        if ( paragraph.keepOnPage() )
390
	    block.setAttribute("keep-together.within-page", "always");
193
        {
391
	}
194
            block.setAttribute( "keep-together.within-page", "always" );
195
        }
392
196
393
	if (paragraph.keepWithNext()) {
197
        if ( paragraph.keepWithNext() )
394
	    block.setAttribute("keep-with-next.within-page", "always");
198
        {
395
	}
199
            block.setAttribute( "keep-with-next.within-page", "always" );
200
        }
396
201
397
	block.setAttribute("linefeed-treatment", "preserve");
202
        block.setAttribute( "linefeed-treatment", "preserve" );
398
	block.setAttribute("white-space-collapse", "false");
203
        block.setAttribute( "white-space-collapse", "false" );
399
    }
204
    }
400
205
401
    public static void setPictureProperties(Picture picture,
206
    public static void setPictureProperties( Picture picture,
402
            Element graphicElement) {
207
            Element graphicElement )
208
    {
403
        final int aspectRatioX = picture.getAspectRatioX();
209
        final int aspectRatioX = picture.getAspectRatioX();
404
        final int aspectRatioY = picture.getAspectRatioY();
210
        final int aspectRatioY = picture.getAspectRatioY();
405
211
406
        if (aspectRatioX > 0) {
212
        if ( aspectRatioX > 0 )
407
            graphicElement.setAttribute("content-width", ((picture.getDxaGoal()
213
        {
408
                    * aspectRatioX / 100) / WordToFoUtils.TWIPS_PER_PT)
214
            graphicElement
409
                    + "pt");
215
                    .setAttribute( "content-width", ((picture.getDxaGoal()
410
        } else
216
                            * aspectRatioX / 100) / TWIPS_PER_PT)
411
            graphicElement.setAttribute("content-width",
217
                            + "pt" );
412
                    (picture.getDxaGoal() / WordToFoUtils.TWIPS_PER_PT) + "pt");
218
        }
219
        else
220
            graphicElement.setAttribute( "content-width",
221
                    (picture.getDxaGoal() / TWIPS_PER_PT) + "pt" );
413
222
414
        if (aspectRatioY > 0)
223
        if ( aspectRatioY > 0 )
415
            graphicElement
224
            graphicElement
416
                    .setAttribute("content-height", ((picture.getDyaGoal()
225
                    .setAttribute( "content-height", ((picture.getDyaGoal()
417
                            * aspectRatioY / 100) / WordToFoUtils.TWIPS_PER_PT)
226
                            * aspectRatioY / 100) / TWIPS_PER_PT)
418
                            + "pt");
227
                            + "pt" );
419
        else
228
        else
420
            graphicElement.setAttribute("content-height",
229
            graphicElement.setAttribute( "content-height",
421
                    (picture.getDyaGoal() / WordToFoUtils.TWIPS_PER_PT) + "pt");
230
                    (picture.getDyaGoal() / TWIPS_PER_PT) + "pt" );
422
231
423
        if (aspectRatioX <= 0 || aspectRatioY <= 0) {
232
        if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
424
            graphicElement.setAttribute("scaling", "uniform");
233
        {
425
        } else {
234
            graphicElement.setAttribute( "scaling", "uniform" );
426
            graphicElement.setAttribute("scaling", "non-uniform");
235
        }
236
        else
237
        {
238
            graphicElement.setAttribute( "scaling", "non-uniform" );
427
        }
239
        }
428
240
429
        graphicElement.setAttribute("vertical-align", "text-bottom");
241
        graphicElement.setAttribute( "vertical-align", "text-bottom" );
430
242
431
        if (picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
243
        if ( picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
432
                || picture.getDyaCropBottom() != 0
244
                || picture.getDyaCropBottom() != 0
433
                || picture.getDxaCropLeft() != 0) {
245
                || picture.getDxaCropLeft() != 0 )
434
            int rectTop = picture.getDyaCropTop() / WordToFoUtils.TWIPS_PER_PT;
246
        {
435
            int rectRight = picture.getDxaCropRight()
247
            int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
436
                    / WordToFoUtils.TWIPS_PER_PT;
248
            int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
437
            int rectBottom = picture.getDyaCropBottom()
249
            int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
438
                    / WordToFoUtils.TWIPS_PER_PT;
250
            int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
439
            int rectLeft = picture.getDxaCropLeft()
251
            graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
440
                    / WordToFoUtils.TWIPS_PER_PT;
441
            graphicElement.setAttribute("clip", "rect(" + rectTop + "pt, "
442
                    + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
252
                    + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
443
                    + "pt)");
253
                    + "pt)" );
444
            graphicElement.setAttribute("oveerflow", "hidden");
254
            graphicElement.setAttribute( "oveerflow", "hidden" );
445
        }
255
        }
446
    }
256
    }
447
257
448
    public static void setTableCellProperties(TableRow tableRow,
258
    public static void setTableCellProperties( TableRow tableRow,
449
	    TableCell tableCell, Element element, boolean toppest,
259
            TableCell tableCell, Element element, boolean toppest,
450
	    boolean bottomest, boolean leftest, boolean rightest) {
260
            boolean bottomest, boolean leftest, boolean rightest )
451
	element.setAttribute("width", (tableCell.getWidth() / TWIPS_PER_INCH)
261
    {
452
		+ "in");
262
        element.setAttribute( "width", (tableCell.getWidth() / TWIPS_PER_INCH)
453
	element.setAttribute("padding-start",
263
                + "in" );
454
		(tableRow.getGapHalf() / TWIPS_PER_INCH) + "in");
264
        element.setAttribute( "padding-start",
455
	element.setAttribute("padding-end",
265
                (tableRow.getGapHalf() / TWIPS_PER_INCH) + "in" );
456
		(tableRow.getGapHalf() / TWIPS_PER_INCH) + "in");
266
        element.setAttribute( "padding-end",
267
                (tableRow.getGapHalf() / TWIPS_PER_INCH) + "in" );
457
268
458
	BorderCode top = tableCell.getBrcTop() != null ? tableCell.getBrcTop()
269
        BorderCode top = tableCell.getBrcTop() != null
459
		: toppest ? tableRow.getTopBorder() : tableRow
270
                && tableCell.getBrcTop().getBorderType() != 0 ? tableCell
460
			.getHorizontalBorder();
271
                .getBrcTop() : toppest ? tableRow.getTopBorder() : tableRow
461
	BorderCode bottom = tableCell.getBrcBottom() != null ? tableCell
272
                .getHorizontalBorder();
462
		.getBrcBottom() : bottomest ? tableRow.getBottomBorder()
273
        BorderCode bottom = tableCell.getBrcBottom() != null
463
		: tableRow.getHorizontalBorder();
274
                && tableCell.getBrcBottom().getBorderType() != 0 ? tableCell
275
                .getBrcBottom() : bottomest ? tableRow.getBottomBorder()
276
                : tableRow.getHorizontalBorder();
464
277
465
	BorderCode left = tableCell.getBrcLeft() != null ? tableCell
278
        BorderCode left = tableCell.getBrcLeft() != null
466
		.getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
279
                && tableCell.getBrcLeft().getBorderType() != 0 ? tableCell
467
		.getVerticalBorder();
280
                .getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
468
	BorderCode right = tableCell.getBrcRight() != null ? tableCell
281
                .getVerticalBorder();
469
		.getBrcRight() : rightest ? tableRow.getRightBorder()
282
        BorderCode right = tableCell.getBrcRight() != null
470
		: tableRow.getVerticalBorder();
283
                && tableCell.getBrcRight().getBorderType() != 0 ? tableCell
284
                .getBrcRight() : rightest ? tableRow.getRightBorder()
285
                : tableRow.getVerticalBorder();
471
286
472
	setBorder(element, bottom, "bottom");
287
        setBorder( element, bottom, "bottom" );
473
	setBorder(element, left, "left");
288
        setBorder( element, left, "left" );
474
	setBorder(element, right, "right");
289
        setBorder( element, right, "right" );
475
	setBorder(element, top, "top");
290
        setBorder( element, top, "top" );
476
    }
291
    }
477
292
478
    public static void setTableRowProperties(TableRow tableRow,
293
    public static void setTableRowProperties( TableRow tableRow,
479
	    Element tableRowElement) {
294
            Element tableRowElement )
480
	if (tableRow.getRowHeight() > 0) {
295
    {
481
	    tableRowElement.setAttribute("height",
296
        if ( tableRow.getRowHeight() > 0 )
482
		    (tableRow.getRowHeight() / TWIPS_PER_INCH) + "in");
297
        {
483
	}
298
            tableRowElement.setAttribute( "height",
484
	if (!tableRow.cantSplit()) {
299
                    (tableRow.getRowHeight() / TWIPS_PER_INCH) + "in" );
485
	    tableRowElement.setAttribute("keep-together", "always");
300
        }
486
	}
301
        if ( !tableRow.cantSplit() )
302
        {
303
            tableRowElement.setAttribute( "keep-together", "always" );
304
        }
487
    }
305
    }
488
306
489
}
307
}
(-)src/org/apache/poi/hwpf/extractor/AbstractToFoExtractor.java (-204 lines)
Lines 1-204 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import org.w3c.dom.Document;
22
import org.w3c.dom.Element;
23
import org.w3c.dom.Text;
24
25
public abstract class AbstractToFoExtractor
26
{
27
28
    private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
29
30
    protected final Document document;
31
    protected final Element layoutMasterSet;
32
    protected final Element root;
33
34
    public AbstractToFoExtractor( Document document )
35
    {
36
        this.document = document;
37
38
        root = document.createElementNS( NS_XSLFO, "fo:root" );
39
        document.appendChild( root );
40
41
        layoutMasterSet = document.createElementNS( NS_XSLFO,
42
                "fo:layout-master-set" );
43
        root.appendChild( layoutMasterSet );
44
    }
45
46
    protected Element addFlowToPageSequence( final Element pageSequence,
47
            String flowName )
48
    {
49
        final Element flow = document.createElementNS( NS_XSLFO, "fo:flow" );
50
        flow.setAttribute( "flow-name", flowName );
51
        pageSequence.appendChild( flow );
52
53
        return flow;
54
    }
55
56
    protected Element addListItem( Element listBlock )
57
    {
58
        Element result = createListItem();
59
        listBlock.appendChild( result );
60
        return result;
61
    }
62
63
    protected Element addListItemBody( Element listItem )
64
    {
65
        Element result = createListItemBody();
66
        listItem.appendChild( result );
67
        return result;
68
    }
69
70
    protected Element addListItemLabel( Element listItem, String text )
71
    {
72
        Element result = createListItemLabel( text );
73
        listItem.appendChild( result );
74
        return result;
75
    }
76
77
    protected Element addPageSequence( String pageMaster )
78
    {
79
        final Element pageSequence = document.createElementNS( NS_XSLFO,
80
                "fo:page-sequence" );
81
        pageSequence.setAttribute( "master-reference", pageMaster );
82
        root.appendChild( pageSequence );
83
        return pageSequence;
84
    }
85
86
    protected Element addRegionBody( Element pageMaster )
87
    {
88
        final Element regionBody = document.createElementNS( NS_XSLFO,
89
                "fo:region-body" );
90
        pageMaster.appendChild( regionBody );
91
92
        return regionBody;
93
    }
94
95
    protected Element addSimplePageMaster( String masterName )
96
    {
97
        final Element simplePageMaster = document.createElementNS( NS_XSLFO,
98
                "fo:simple-page-master" );
99
        simplePageMaster.setAttribute( "master-name", masterName );
100
        layoutMasterSet.appendChild( simplePageMaster );
101
102
        return simplePageMaster;
103
    }
104
105
    protected Element createBasicLinkExternal( String externalDestination )
106
    {
107
        final Element basicLink = document.createElementNS( NS_XSLFO,
108
                "fo:basic-link" );
109
        basicLink.setAttribute( "external-destination", externalDestination );
110
        return basicLink;
111
    }
112
113
    protected Element createBasicLinkInternal( String internalDestination )
114
    {
115
        final Element basicLink = document.createElementNS( NS_XSLFO,
116
                "fo:basic-link" );
117
        basicLink.setAttribute( "internal-destination", internalDestination );
118
        return basicLink;
119
    }
120
121
    protected Element createBlock()
122
    {
123
        return document.createElementNS( NS_XSLFO, "fo:block" );
124
    }
125
126
    protected Element createExternalGraphic( String source )
127
    {
128
        Element result = document.createElementNS( NS_XSLFO,
129
                "fo:external-graphic" );
130
        result.setAttribute( "src", "url('" + source + "')" );
131
        return result;
132
    }
133
134
    protected Element createInline()
135
    {
136
        return document.createElementNS( NS_XSLFO, "fo:inline" );
137
    }
138
139
    protected Element createLeader()
140
    {
141
        return document.createElementNS( NS_XSLFO, "fo:leader" );
142
    }
143
144
    protected Element createListBlock()
145
    {
146
        return document.createElementNS( NS_XSLFO, "fo:list-block" );
147
    }
148
149
    protected Element createListItem()
150
    {
151
        return document.createElementNS( NS_XSLFO, "fo:list-item" );
152
    }
153
154
    protected Element createListItemBody()
155
    {
156
        return document.createElementNS( NS_XSLFO, "fo:list-item-body" );
157
    }
158
159
    protected Element createListItemLabel( String text )
160
    {
161
        Element result = document.createElementNS( NS_XSLFO,
162
                "fo:list-item-label" );
163
        Element block = createBlock();
164
        block.appendChild( document.createTextNode( text ) );
165
        result.appendChild( block );
166
        return result;
167
    }
168
169
    protected Element createTable()
170
    {
171
        return document.createElementNS( NS_XSLFO, "fo:table" );
172
    }
173
174
    protected Element createTableBody()
175
    {
176
        return document.createElementNS( NS_XSLFO, "fo:table-body" );
177
    }
178
179
    protected Element createTableCell()
180
    {
181
        return document.createElementNS( NS_XSLFO, "fo:table-cell" );
182
    }
183
184
    protected Element createTableHeader()
185
    {
186
        return document.createElementNS( NS_XSLFO, "fo:table-header" );
187
    }
188
189
    protected Element createTableRow()
190
    {
191
        return document.createElementNS( NS_XSLFO, "fo:table-row" );
192
    }
193
194
    protected Text createText( String data )
195
    {
196
        return document.createTextNode( data );
197
    }
198
199
    public Document getDocument()
200
    {
201
        return document;
202
    }
203
204
}
(-)src/org/apache/poi/hwpf/extractor/HtmlDocumentFacade.java (+109 lines)
Line 0 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import org.w3c.dom.Document;
22
import org.w3c.dom.Element;
23
import org.w3c.dom.Text;
24
25
public class HtmlDocumentFacade
26
{
27
28
    protected final Element body;
29
    protected final Document document;
30
    protected final Element head;
31
    protected final Element html;
32
33
    public HtmlDocumentFacade( Document document )
34
    {
35
        this.document = document;
36
37
        html = document.createElement( "html" );
38
        document.appendChild( html );
39
40
        body = document.createElement( "body" );
41
        head = document.createElement( "head" );
42
43
        html.appendChild( head );
44
        html.appendChild( body );
45
    }
46
47
    public Element createHyperlink( String internalDestination )
48
    {
49
        final Element basicLink = document.createElement( "a" );
50
        basicLink.setAttribute( "href", internalDestination );
51
        return basicLink;
52
    }
53
54
    public Element createListItem()
55
    {
56
        return document.createElement( "li" );
57
    }
58
59
    public Element createParagraph()
60
    {
61
        return document.createElement( "p" );
62
    }
63
64
    public Element createTable()
65
    {
66
        return document.createElement( "table" );
67
    }
68
69
    public Element createTableBody()
70
    {
71
        return document.createElement( "tbody" );
72
    }
73
74
    public Element createTableCell()
75
    {
76
        return document.createElement( "td" );
77
    }
78
79
    public Element createTableHeader()
80
    {
81
        return document.createElement( "thead" );
82
    }
83
84
    public Element createTableHeaderCell()
85
    {
86
        return document.createElement( "th" );
87
    }
88
89
    public Element createTableRow()
90
    {
91
        return document.createElement( "tr" );
92
    }
93
94
    public Text createText( String data )
95
    {
96
        return document.createTextNode( data );
97
    }
98
99
    public Element createUnorderedList()
100
    {
101
        return document.createElement( "ul" );
102
    }
103
104
    public Document getDocument()
105
    {
106
        return document;
107
    }
108
109
}
0
  + text/plain
110
  + text/plain
(-)src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java (-266 / +131 lines)
Lines 19-32 Link Here
19
package org.apache.poi.hwpf.extractor;
19
package org.apache.poi.hwpf.extractor;
20
20
21
import java.io.File;
21
import java.io.File;
22
import java.io.FileInputStream;
23
import java.io.FileWriter;
22
import java.io.FileWriter;
24
import java.io.IOException;
25
import java.util.HashMap;
23
import java.util.HashMap;
24
import java.util.List;
26
import java.util.Map;
25
import java.util.Map;
27
import java.util.Stack;
26
import java.util.Stack;
28
import java.util.regex.Matcher;
29
import java.util.regex.Pattern;
30
27
31
import javax.xml.parsers.DocumentBuilderFactory;
28
import javax.xml.parsers.DocumentBuilderFactory;
32
import javax.xml.transform.OutputKeys;
29
import javax.xml.transform.OutputKeys;
Lines 36-43 Link Here
36
import javax.xml.transform.stream.StreamResult;
33
import javax.xml.transform.stream.StreamResult;
37
34
38
import org.apache.poi.hwpf.HWPFDocument;
35
import org.apache.poi.hwpf.HWPFDocument;
36
import org.apache.poi.hwpf.HWPFDocumentCore;
39
import org.apache.poi.hwpf.model.ListFormatOverride;
37
import org.apache.poi.hwpf.model.ListFormatOverride;
40
import org.apache.poi.hwpf.model.ListTables;
38
import org.apache.poi.hwpf.model.ListTables;
39
import org.apache.poi.hwpf.usermodel.BorderCode;
41
import org.apache.poi.hwpf.usermodel.CharacterRun;
40
import org.apache.poi.hwpf.usermodel.CharacterRun;
42
import org.apache.poi.hwpf.usermodel.Paragraph;
41
import org.apache.poi.hwpf.usermodel.Paragraph;
43
import org.apache.poi.hwpf.usermodel.Picture;
42
import org.apache.poi.hwpf.usermodel.Picture;
Lines 54-65 Link Here
54
import org.w3c.dom.Element;
53
import org.w3c.dom.Element;
55
import org.w3c.dom.Text;
54
import org.w3c.dom.Text;
56
55
57
import static org.apache.poi.hwpf.extractor.WordToFoUtils.TWIPS_PER_INCH;
58
59
/**
56
/**
60
 * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
57
 * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
61
 */
58
 */
62
public class WordToFoExtractor extends AbstractToFoExtractor
59
public class WordToFoExtractor extends AbstractWordExtractor
63
{
60
{
64
61
65
    /**
62
    /**
Lines 84-118 Link Here
84
        }
81
        }
85
    }
82
    }
86
83
87
    private static final byte BEL_MARK = 7;
88
89
    private static final byte FIELD_BEGIN_MARK = 19;
90
91
    private static final byte FIELD_END_MARK = 21;
92
93
    private static final byte FIELD_SEPARATOR_MARK = 20;
94
95
    private static final POILogger logger = POILogFactory
84
    private static final POILogger logger = POILogFactory
96
            .getLogger( WordToFoExtractor.class );
85
            .getLogger( WordToFoExtractor.class );
97
86
98
    private static HWPFDocument loadDoc( File docFile ) throws IOException
87
    public static String getBorderType( BorderCode borderCode )
99
    {
88
    {
100
        final FileInputStream istream = new FileInputStream( docFile );
89
        if ( borderCode == null )
101
        try
90
            throw new IllegalArgumentException( "borderCode is null" );
91
92
        switch ( borderCode.getBorderType() )
102
        {
93
        {
103
            return new HWPFDocument( istream );
94
        case 1:
104
        }
95
        case 2:
105
        finally
96
            return "solid";
106
        {
97
        case 3:
107
            try
98
            return "double";
108
            {
99
        case 5:
109
                istream.close();
100
            return "solid";
110
            }
101
        case 6:
111
            catch ( Exception exc )
102
            return "dotted";
112
            {
103
        case 7:
113
                logger.log( POILogger.ERROR,
104
        case 8:
114
                        "Unable to close FileInputStream: " + exc, exc );
105
            return "dashed";
115
            }
106
        case 9:
107
            return "dotted";
108
        case 10:
109
        case 11:
110
        case 12:
111
        case 13:
112
        case 14:
113
        case 15:
114
        case 16:
115
        case 17:
116
        case 18:
117
        case 19:
118
            return "double";
119
        case 20:
120
            return "solid";
121
        case 21:
122
            return "double";
123
        case 22:
124
            return "dashed";
125
        case 23:
126
            return "dashed";
127
        case 24:
128
            return "ridge";
129
        case 25:
130
            return "grooved";
131
        default:
132
            return "solid";
116
        }
133
        }
117
    }
134
    }
118
135
Lines 160-166 Link Here
160
177
161
    static Document process( File docFile ) throws Exception
178
    static Document process( File docFile ) throws Exception
162
    {
179
    {
163
        final HWPFDocument hwpfDocument = loadDoc( docFile );
180
        final HWPFDocumentCore hwpfDocument = WordToFoUtils.loadDoc( docFile );
164
        WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
181
        WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
165
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
182
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
166
                        .newDocument() );
183
                        .newDocument() );
Lines 170-175 Link Here
170
187
171
    private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
188
    private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
172
189
190
    protected final FoDocumentFacade foDocumentFacade;
191
173
    /**
192
    /**
174
     * Creates new instance of {@link WordToFoExtractor}. Can be used for output
193
     * Creates new instance of {@link WordToFoExtractor}. Can be used for output
175
     * several {@link HWPFDocument}s into single FO document.
194
     * several {@link HWPFDocument}s into single FO document.
Lines 180-206 Link Here
180
     */
199
     */
181
    public WordToFoExtractor( Document document )
200
    public WordToFoExtractor( Document document )
182
    {
201
    {
183
        super( document );
202
        this.foDocumentFacade = new FoDocumentFacade( document );
184
    }
203
    }
185
204
186
    protected String createPageMaster( SectionProperties sep, String type,
205
    protected String createPageMaster( SectionProperties sep, String type,
187
            int section )
206
            int section )
188
    {
207
    {
189
        float height = sep.getYaPage() / TWIPS_PER_INCH;
208
        float height = sep.getYaPage() / WordToFoUtils.TWIPS_PER_INCH;
190
        float width = sep.getXaPage() / TWIPS_PER_INCH;
209
        float width = sep.getXaPage() / WordToFoUtils.TWIPS_PER_INCH;
191
        float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH;
210
        float leftMargin = sep.getDxaLeft() / WordToFoUtils.TWIPS_PER_INCH;
192
        float rightMargin = sep.getDxaRight() / TWIPS_PER_INCH;
211
        float rightMargin = sep.getDxaRight() / WordToFoUtils.TWIPS_PER_INCH;
193
        float topMargin = sep.getDyaTop() / TWIPS_PER_INCH;
212
        float topMargin = sep.getDyaTop() / WordToFoUtils.TWIPS_PER_INCH;
194
        float bottomMargin = sep.getDyaBottom() / TWIPS_PER_INCH;
213
        float bottomMargin = sep.getDyaBottom() / WordToFoUtils.TWIPS_PER_INCH;
195
214
196
        // add these to the header
215
        // add these to the header
197
        String pageMasterName = type + "-page" + section;
216
        String pageMasterName = type + "-page" + section;
198
217
199
        Element pageMaster = addSimplePageMaster( pageMasterName );
218
        Element pageMaster = foDocumentFacade
219
                .addSimplePageMaster( pageMasterName );
200
        pageMaster.setAttribute( "page-height", height + "in" );
220
        pageMaster.setAttribute( "page-height", height + "in" );
201
        pageMaster.setAttribute( "page-width", width + "in" );
221
        pageMaster.setAttribute( "page-width", width + "in" );
202
222
203
        Element regionBody = addRegionBody( pageMaster );
223
        Element regionBody = foDocumentFacade.addRegionBody( pageMaster );
204
        regionBody.setAttribute( "margin", topMargin + "in " + rightMargin
224
        regionBody.setAttribute( "margin", topMargin + "in " + rightMargin
205
                + "in " + bottomMargin + "in " + leftMargin + "in" );
225
                + "in " + bottomMargin + "in " + leftMargin + "in" );
206
226
Lines 221-227 Link Here
221
            if ( sep.getFEvenlySpaced() )
241
            if ( sep.getFEvenlySpaced() )
222
            {
242
            {
223
                regionBody.setAttribute( "column-gap",
243
                regionBody.setAttribute( "column-gap",
224
                        (sep.getDxaColumns() / TWIPS_PER_INCH) + "in" );
244
                        (sep.getDxaColumns() / WordToFoUtils.TWIPS_PER_INCH)
245
                                + "in" );
225
            }
246
            }
226
            else
247
            else
227
            {
248
            {
Lines 232-402 Link Here
232
        return pageMasterName;
253
        return pageMasterName;
233
    }
254
    }
234
255
235
    protected boolean processCharacters( HWPFDocument hwpfDocument,
256
    public Document getDocument()
236
            int currentTableLevel, Paragraph paragraph, final Element block,
237
            final int start, final int end )
238
    {
257
    {
239
        boolean haveAnyText = false;
258
        return foDocumentFacade.getDocument();
240
241
        for ( int c = start; c < end; c++ )
242
        {
243
            CharacterRun characterRun = paragraph.getCharacterRun( c );
244
245
            if ( hwpfDocument.getPicturesTable().hasPicture( characterRun ) )
246
            {
247
                Picture picture = hwpfDocument.getPicturesTable()
248
                        .extractPicture( characterRun, true );
249
250
                processImage( block, characterRun.text().charAt( 0 ) == 0x01,
251
                        picture );
252
                continue;
253
            }
254
255
            String text = characterRun.text();
256
            if ( text.getBytes().length == 0 )
257
                continue;
258
259
            if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
260
            {
261
                int skipTo = tryField( hwpfDocument, paragraph,
262
                        currentTableLevel, c, block );
263
264
                if ( skipTo != c )
265
                {
266
                    c = skipTo;
267
                    continue;
268
                }
269
270
                continue;
271
            }
272
            if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
273
            {
274
                // shall not appear without FIELD_BEGIN_MARK
275
                continue;
276
            }
277
            if ( text.getBytes()[0] == FIELD_END_MARK )
278
            {
279
                // shall not appear without FIELD_BEGIN_MARK
280
                continue;
281
            }
282
283
            if ( characterRun.isSpecialCharacter() || characterRun.isObj()
284
                    || characterRun.isOle2() )
285
            {
286
                continue;
287
            }
288
289
            BlockProperies blockProperies = this.blocksProperies.peek();
290
            Element inline = createInline();
291
            if ( characterRun.isBold() != blockProperies.pBold )
292
            {
293
                WordToFoUtils.setBold( inline, characterRun.isBold() );
294
            }
295
            if ( characterRun.isItalic() != blockProperies.pItalic )
296
            {
297
                WordToFoUtils.setItalic( inline, characterRun.isItalic() );
298
            }
299
            if ( !WordToFoUtils.equals( characterRun.getFontName(),
300
                    blockProperies.pFontName ) )
301
            {
302
                WordToFoUtils
303
                        .setFontFamily( inline, characterRun.getFontName() );
304
            }
305
            if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
306
            {
307
                WordToFoUtils.setFontSize( inline,
308
                        characterRun.getFontSize() / 2 );
309
            }
310
            WordToFoUtils.setCharactersProperties( characterRun, inline );
311
            block.appendChild( inline );
312
313
            if ( text.endsWith( "\r" )
314
                    || (text.charAt( text.length() - 1 ) == BEL_MARK && currentTableLevel != 0) )
315
                text = text.substring( 0, text.length() - 1 );
316
317
            Text textNode = createText( text );
318
            inline.appendChild( textNode );
319
320
            haveAnyText |= text.trim().length() != 0;
321
        }
322
323
        return haveAnyText;
324
    }
259
    }
325
260
326
    public void processDocument( HWPFDocument hwpfDocument )
261
    @Override
262
    protected void outputCharacters( Element block, CharacterRun characterRun,
263
            String text )
327
    {
264
    {
328
        final Range range = hwpfDocument.getRange();
265
        BlockProperies blockProperies = this.blocksProperies.peek();
329
266
        Element inline = foDocumentFacade.createInline();
330
        for ( int s = 0; s < range.numSections(); s++ )
267
        if ( characterRun.isBold() != blockProperies.pBold )
331
        {
268
        {
332
            processSection( hwpfDocument, range.getSection( s ), s );
269
            WordToFoUtils.setBold( inline, characterRun.isBold() );
333
        }
270
        }
334
    }
271
        if ( characterRun.isItalic() != blockProperies.pItalic )
335
336
    protected void processField( HWPFDocument hwpfDocument,
337
            Element currentBlock, Paragraph paragraph, int currentTableLevel,
338
            int beginMark, int separatorMark, int endMark )
339
    {
340
341
        Pattern hyperlinkPattern = Pattern
342
                .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
343
        Pattern pagerefPattern = Pattern
344
                .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
345
346
        if ( separatorMark - beginMark > 1 )
347
        {
272
        {
348
            CharacterRun firstAfterBegin = paragraph
273
            WordToFoUtils.setItalic( inline, characterRun.isItalic() );
349
                    .getCharacterRun( beginMark + 1 );
350
351
            final Matcher hyperlinkMatcher = hyperlinkPattern
352
                    .matcher( firstAfterBegin.text() );
353
            if ( hyperlinkMatcher.matches() )
354
            {
355
                String hyperlink = hyperlinkMatcher.group( 1 );
356
                processHyperlink( hwpfDocument, currentBlock, paragraph,
357
                        currentTableLevel, hyperlink, separatorMark + 1,
358
                        endMark );
359
                return;
360
            }
361
362
            final Matcher pagerefMatcher = pagerefPattern
363
                    .matcher( firstAfterBegin.text() );
364
            if ( pagerefMatcher.matches() )
365
            {
366
                String pageref = pagerefMatcher.group( 1 );
367
                processPageref( hwpfDocument, currentBlock, paragraph,
368
                        currentTableLevel, pageref, separatorMark + 1, endMark );
369
                return;
370
            }
371
        }
274
        }
372
275
        if ( characterRun.getFontName() != null
373
        StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
276
                && !AbstractWordUtils.equals(
374
        for ( int i = beginMark; i <= endMark; i++ )
277
                        characterRun.getFontName(), blockProperies.pFontName ) )
375
        {
278
        {
376
            debug.append( "\t" );
279
            WordToFoUtils.setFontFamily( inline, characterRun.getFontName() );
377
            debug.append( paragraph.getCharacterRun( i ) );
378
            debug.append( "\n" );
379
        }
280
        }
380
        logger.log( POILogger.WARN, debug );
281
        if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
381
282
        {
382
        // just output field value
283
            WordToFoUtils.setFontSize( inline, characterRun.getFontSize() / 2 );
383
        if ( separatorMark + 1 < endMark )
284
        }
384
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
285
        WordToFoUtils.setCharactersProperties( characterRun, inline );
385
                    currentBlock, separatorMark + 1, endMark );
286
        block.appendChild( inline );
386
287
387
        return;
288
        Text textNode = foDocumentFacade.createText( text );
289
        inline.appendChild( textNode );
388
    }
290
    }
389
291
390
    protected void processHyperlink( HWPFDocument hwpfDocument,
292
    protected void processHyperlink( HWPFDocumentCore hwpfDocument,
391
            Element currentBlock, Paragraph paragraph, int currentTableLevel,
293
            Element currentBlock, Paragraph paragraph,
294
            List<CharacterRun> characterRuns, int currentTableLevel,
392
            String hyperlink, int beginTextInclusive, int endTextExclusive )
295
            String hyperlink, int beginTextInclusive, int endTextExclusive )
393
    {
296
    {
394
        Element basicLink = createBasicLinkExternal( hyperlink );
297
        Element basicLink = foDocumentFacade
298
                .createBasicLinkExternal( hyperlink );
395
        currentBlock.appendChild( basicLink );
299
        currentBlock.appendChild( basicLink );
396
300
397
        if ( beginTextInclusive < endTextExclusive )
301
        if ( beginTextInclusive < endTextExclusive )
398
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
302
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
399
                    basicLink, beginTextInclusive, endTextExclusive );
303
                    basicLink, characterRuns, beginTextInclusive,
304
                    endTextExclusive );
400
    }
305
    }
401
306
402
    /**
307
    /**
Lines 422-448 Link Here
422
            Picture picture )
327
            Picture picture )
423
    {
328
    {
424
        // no default implementation -- skip
329
        // no default implementation -- skip
425
        currentBlock.appendChild( document.createComment( "Image link to '"
330
        currentBlock.appendChild( foDocumentFacade.getDocument().createComment(
426
                + picture.suggestFullFileName() + "' can be here" ) );
331
                "Image link to '" + picture.suggestFullFileName()
332
                        + "' can be here" ) );
427
    }
333
    }
428
334
429
    protected void processPageref( HWPFDocument hwpfDocument,
335
    protected void processPageref( HWPFDocumentCore hwpfDocument,
430
            Element currentBlock, Paragraph paragraph, int currentTableLevel,
336
            Element currentBlock, Paragraph paragraph,
337
            List<CharacterRun> characterRuns, int currentTableLevel,
431
            String pageref, int beginTextInclusive, int endTextExclusive )
338
            String pageref, int beginTextInclusive, int endTextExclusive )
432
    {
339
    {
433
        Element basicLink = createBasicLinkInternal( pageref );
340
        Element basicLink = foDocumentFacade.createBasicLinkInternal( pageref );
434
        currentBlock.appendChild( basicLink );
341
        currentBlock.appendChild( basicLink );
435
342
436
        if ( beginTextInclusive < endTextExclusive )
343
        if ( beginTextInclusive < endTextExclusive )
437
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
344
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
438
                    basicLink, beginTextInclusive, endTextExclusive );
345
                    basicLink, characterRuns, beginTextInclusive,
346
                    endTextExclusive );
439
    }
347
    }
440
348
441
    protected void processParagraph( HWPFDocument hwpfDocument,
349
    protected void processParagraph( HWPFDocumentCore hwpfDocument,
442
            Element parentFopElement, int currentTableLevel,
350
            Element parentFopElement, int currentTableLevel,
443
            Paragraph paragraph, String bulletText )
351
            Paragraph paragraph, String bulletText )
444
    {
352
    {
445
        final Element block = createBlock();
353
        final Element block = foDocumentFacade.createBlock();
446
        parentFopElement.appendChild( block );
354
        parentFopElement.appendChild( block );
447
355
448
        WordToFoUtils.setParagraphProperties( paragraph, block );
356
        WordToFoUtils.setParagraphProperties( paragraph, block );
Lines 480-500 Link Here
480
388
481
            if ( WordToFoUtils.isNotEmpty( bulletText ) )
389
            if ( WordToFoUtils.isNotEmpty( bulletText ) )
482
            {
390
            {
483
                Element inline = createInline();
391
                Element inline = foDocumentFacade.createInline();
484
                block.appendChild( inline );
392
                block.appendChild( inline );
485
393
486
                Text textNode = createText( bulletText );
394
                Text textNode = foDocumentFacade.createText( bulletText );
487
                inline.appendChild( textNode );
395
                inline.appendChild( textNode );
488
396
489
                haveAnyText |= bulletText.trim().length() != 0;
397
                haveAnyText |= bulletText.trim().length() != 0;
490
            }
398
            }
491
399
400
            List<CharacterRun> characterRuns = WordToFoUtils
401
                    .findCharacterRuns( paragraph );
492
            haveAnyText = processCharacters( hwpfDocument, currentTableLevel,
402
            haveAnyText = processCharacters( hwpfDocument, currentTableLevel,
493
                    paragraph, block, 0, charRuns );
403
                    paragraph, block, characterRuns, 0, characterRuns.size() );
494
404
495
            if ( !haveAnyText )
405
            if ( !haveAnyText )
496
            {
406
            {
497
                Element leader = createLeader();
407
                Element leader = foDocumentFacade.createLeader();
498
                block.appendChild( leader );
408
                block.appendChild( leader );
499
            }
409
            }
500
        }
410
        }
Lines 506-525 Link Here
506
        return;
416
        return;
507
    }
417
    }
508
418
509
    protected void processSection( HWPFDocument hwpfDocument, Section section,
419
    protected void processSection( HWPFDocumentCore wordDocument,
510
            int sectionCounter )
420
            Section section, int sectionCounter )
511
    {
421
    {
512
        String regularPage = createPageMaster(
422
        String regularPage = createPageMaster(
513
                WordToFoUtils.getSectionProperties( section ), "page",
423
                WordToFoUtils.getSectionProperties( section ), "page",
514
                sectionCounter );
424
                sectionCounter );
515
425
516
        Element pageSequence = addPageSequence( regularPage );
426
        Element pageSequence = foDocumentFacade.addPageSequence( regularPage );
517
        Element flow = addFlowToPageSequence( pageSequence, "xsl-region-body" );
427
        Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence,
428
                "xsl-region-body" );
518
429
519
        processSectionParagraphes( hwpfDocument, flow, section, 0 );
430
        processSectionParagraphes( wordDocument, flow, section, 0 );
520
    }
431
    }
521
432
522
    protected void processSectionParagraphes( HWPFDocument hwpfDocument,
433
    protected void processSectionParagraphes( HWPFDocument wordDocument,
523
            Element flow, Range range, int currentTableLevel )
434
            Element flow, Range range, int currentTableLevel )
524
    {
435
    {
525
        final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
436
        final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
Lines 530-536 Link Here
530
            allTables.put( Integer.valueOf( next.getStartOffset() ), next );
441
            allTables.put( Integer.valueOf( next.getStartOffset() ), next );
531
        }
442
        }
532
443
533
        final ListTables listTables = hwpfDocument.getListTables();
444
        final ListTables listTables = wordDocument.getListTables();
534
        int currentListInfo = 0;
445
        int currentListInfo = 0;
535
446
536
        final int paragraphs = range.numParagraphs();
447
        final int paragraphs = range.numParagraphs();
Lines 543-549 Link Here
543
            {
454
            {
544
                Table table = allTables.get( Integer.valueOf( paragraph
455
                Table table = allTables.get( Integer.valueOf( paragraph
545
                        .getStartOffset() ) );
456
                        .getStartOffset() ) );
546
                processTable( hwpfDocument, flow, table, currentTableLevel + 1 );
457
                processTable( wordDocument, flow, table, currentTableLevel + 1 );
547
                continue;
458
                continue;
548
            }
459
            }
549
460
Lines 568-574 Link Here
568
                    String label = WordToFoUtils.getBulletText( listTables,
479
                    String label = WordToFoUtils.getBulletText( listTables,
569
                            paragraph, listFormatOverride.getLsid() );
480
                            paragraph, listFormatOverride.getLsid() );
570
481
571
                    processParagraph( hwpfDocument, flow, currentTableLevel,
482
                    processParagraph( wordDocument, flow, currentTableLevel,
572
                            paragraph, label );
483
                            paragraph, label );
573
                }
484
                }
574
                else
485
                else
Lines 580-603 Link Here
580
                                    + currentListInfo
491
                                    + currentListInfo
581
                                    + ", but listTables not defined in file" );
492
                                    + ", but listTables not defined in file" );
582
493
583
                    processParagraph( hwpfDocument, flow, currentTableLevel,
494
                    processParagraph( wordDocument, flow, currentTableLevel,
584
                            paragraph, WordToFoUtils.EMPTY );
495
                            paragraph, WordToFoUtils.EMPTY );
585
                }
496
                }
586
            }
497
            }
587
            else
498
            else
588
            {
499
            {
589
                processParagraph( hwpfDocument, flow, currentTableLevel,
500
                processParagraph( wordDocument, flow, currentTableLevel,
590
                        paragraph, WordToFoUtils.EMPTY );
501
                        paragraph, WordToFoUtils.EMPTY );
591
            }
502
            }
592
        }
503
        }
593
504
594
    }
505
    }
595
506
596
    protected void processTable( HWPFDocument hwpfDocument, Element flow,
507
    protected void processTable( HWPFDocumentCore wordDocument, Element flow,
597
            Table table, int thisTableLevel )
508
            Table table, int thisTableLevel )
598
    {
509
    {
599
        Element tableHeader = createTableHeader();
510
        Element tableHeader = foDocumentFacade.createTableHeader();
600
        Element tableBody = createTableBody();
511
        Element tableBody = foDocumentFacade.createTableBody();
601
512
602
        final int tableRows = table.numRows();
513
        final int tableRows = table.numRows();
603
514
Lines 611-617 Link Here
611
        {
522
        {
612
            TableRow tableRow = table.getRow( r );
523
            TableRow tableRow = table.getRow( r );
613
524
614
            Element tableRowElement = createTableRow();
525
            Element tableRowElement = foDocumentFacade.createTableRow();
615
            WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
526
            WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
616
527
617
            final int rowCells = tableRow.numCells();
528
            final int rowCells = tableRow.numCells();
Lines 626-632 Link Here
626
                        && !tableCell.isFirstVerticallyMerged() )
537
                        && !tableCell.isFirstVerticallyMerged() )
627
                    continue;
538
                    continue;
628
539
629
                Element tableCellElement = createTableCell();
540
                Element tableCellElement = foDocumentFacade.createTableCell();
630
                WordToFoUtils.setTableCellProperties( tableRow, tableCell,
541
                WordToFoUtils.setTableCellProperties( tableRow, tableCell,
631
                        tableCellElement, r == 0, r == tableRows - 1, c == 0,
542
                        tableCellElement, r == 0, r == tableRows - 1, c == 0,
632
                        c == rowCells - 1 );
543
                        c == rowCells - 1 );
Lines 673-684 Link Here
673
                            + count );
584
                            + count );
674
                }
585
                }
675
586
676
                processSectionParagraphes( hwpfDocument, tableCellElement,
587
                processSectionParagraphes( wordDocument, tableCellElement,
677
                        tableCell, thisTableLevel );
588
                        tableCell, thisTableLevel );
678
589
679
                if ( !tableCellElement.hasChildNodes() )
590
                if ( !tableCellElement.hasChildNodes() )
680
                {
591
                {
681
                    tableCellElement.appendChild( createBlock() );
592
                    tableCellElement.appendChild( foDocumentFacade
593
                            .createBlock() );
682
                }
594
                }
683
595
684
                tableRowElement.appendChild( tableCellElement );
596
                tableRowElement.appendChild( tableCellElement );
Lines 694-700 Link Here
694
            }
606
            }
695
        }
607
        }
696
608
697
        final Element tableElement = createTable();
609
        final Element tableElement = foDocumentFacade.createTable();
698
        if ( tableHeader.hasChildNodes() )
610
        if ( tableHeader.hasChildNodes() )
699
        {
611
        {
700
            tableElement.appendChild( tableHeader );
612
            tableElement.appendChild( tableHeader );
Lines 714-764 Link Here
714
        }
626
        }
715
    }
627
    }
716
628
717
    protected int tryField( HWPFDocument hwpfDocument, Paragraph paragraph,
718
            int currentTableLevel, int beginMark, Element currentBlock )
719
    {
720
        int separatorMark = -1;
721
        int endMark = -1;
722
        for ( int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++ )
723
        {
724
            CharacterRun characterRun = paragraph.getCharacterRun( c );
725
726
            String text = characterRun.text();
727
            if ( text.getBytes().length == 0 )
728
                continue;
729
730
            if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
731
            {
732
                if ( separatorMark != -1 )
733
                {
734
                    // double;
735
                    return beginMark;
736
                }
737
738
                separatorMark = c;
739
                continue;
740
            }
741
742
            if ( text.getBytes()[0] == FIELD_END_MARK )
743
            {
744
                if ( endMark != -1 )
745
                {
746
                    // double;
747
                    return beginMark;
748
                }
749
750
                endMark = c;
751
                break;
752
            }
753
754
        }
755
756
        if ( separatorMark == -1 || endMark == -1 )
757
            return beginMark;
758
759
        processField( hwpfDocument, currentBlock, paragraph, currentTableLevel,
760
                beginMark, separatorMark, endMark );
761
762
        return endMark;
763
    }
764
}
629
}
(-)testcases/org/apache/poi/hwpf/extractor/TestWordToFoExtractorSuite.java (-92 lines)
Lines 1-92 Link Here
1
package org.apache.poi.hwpf.extractor;
2
3
import java.io.File;
4
import java.io.FileInputStream;
5
import java.io.FilenameFilter;
6
import java.io.StringWriter;
7
import java.util.Arrays;
8
import java.util.Collections;
9
import java.util.List;
10
import java.util.Set;
11
12
import javax.xml.parsers.DocumentBuilderFactory;
13
import javax.xml.transform.OutputKeys;
14
import javax.xml.transform.Transformer;
15
import javax.xml.transform.TransformerFactory;
16
import javax.xml.transform.dom.DOMSource;
17
import javax.xml.transform.stream.StreamResult;
18
19
import org.apache.poi.EncryptedDocumentException;
20
21
import org.apache.poi.hwpf.OldWordFileFormatException;
22
23
import junit.framework.Test;
24
import junit.framework.TestCase;
25
import junit.framework.TestSuite;
26
import org.apache.poi.POIDataSamples;
27
import org.apache.poi.hwpf.HWPFDocument;
28
29
public class TestWordToFoExtractorSuite
30
{
31
    /**
32
     * YK: a quick hack to exclude failing documents from the suite.
33
     *
34
     * WordToFoExtractor stumbles on Bug33519.doc with a NPE
35
     */
36
    private static List<String> failingFiles = Arrays.asList("Bug33519.doc");
37
38
    public static Test suite() {
39
        TestSuite suite = new TestSuite();
40
41
        File directory = POIDataSamples.getDocumentInstance().getFile(
42
                "../document");
43
        for (final File child : directory.listFiles(new FilenameFilter() {
44
            public boolean accept(File dir, String name) {
45
                return name.endsWith(".doc") && !failingFiles.contains(name);
46
            }
47
        })) {
48
            final String name = child.getName();
49
            suite.addTest(new TestCase(name) {
50
                public void runTest() throws Exception {
51
                    test(child);
52
                }
53
            });
54
        }
55
56
        return suite;
57
    }
58
59
    protected static void test( File child ) throws Exception
60
    {
61
        HWPFDocument hwpfDocument;
62
        FileInputStream fileInputStream = new FileInputStream( child );
63
        try
64
        {
65
            hwpfDocument = new HWPFDocument( fileInputStream );
66
        }
67
        catch ( Exception exc )
68
        {
69
            // unable to parse file -- not WordToFoExtractor fault
70
            return;
71
        }
72
        finally
73
        {
74
            fileInputStream.close();
75
        }
76
77
        WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
78
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
79
                        .newDocument() );
80
        wordToFoExtractor.processDocument( hwpfDocument );
81
82
        StringWriter stringWriter = new StringWriter();
83
84
        Transformer transformer = TransformerFactory.newInstance()
85
                .newTransformer();
86
        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
87
        transformer.transform(
88
                new DOMSource( wordToFoExtractor.getDocument() ),
89
                new StreamResult( stringWriter ) );
90
        // no exceptions
91
    }
92
}
(-)testcases/org/apache/poi/hwpf/extractor/TestWordToExtractorsSuite.java (-37 / +50 lines)
Lines 1-13 Link Here
1
package org.apache.poi.hwpf.extractor;
1
package org.apache.poi.hwpf.extractor;
2
2
3
import java.io.File;
3
import java.io.File;
4
import java.io.FileInputStream;
5
import java.io.FilenameFilter;
4
import java.io.FilenameFilter;
6
import java.io.StringWriter;
5
import java.io.StringWriter;
7
import java.util.Arrays;
6
import java.util.Arrays;
8
import java.util.Collections;
9
import java.util.List;
7
import java.util.List;
10
import java.util.Set;
11
8
12
import javax.xml.parsers.DocumentBuilderFactory;
9
import javax.xml.parsers.DocumentBuilderFactory;
13
import javax.xml.transform.OutputKeys;
10
import javax.xml.transform.OutputKeys;
Lines 16-91 Link Here
16
import javax.xml.transform.dom.DOMSource;
13
import javax.xml.transform.dom.DOMSource;
17
import javax.xml.transform.stream.StreamResult;
14
import javax.xml.transform.stream.StreamResult;
18
15
19
import org.apache.poi.EncryptedDocumentException;
16
import org.w3c.dom.Document;
20
21
import org.apache.poi.hwpf.OldWordFileFormatException;
22
17
23
import junit.framework.Test;
18
import junit.framework.Test;
24
import junit.framework.TestCase;
19
import junit.framework.TestCase;
25
import junit.framework.TestSuite;
20
import junit.framework.TestSuite;
26
import org.apache.poi.POIDataSamples;
21
import org.apache.poi.POIDataSamples;
27
import org.apache.poi.hwpf.HWPFDocument;
22
import org.apache.poi.hwpf.HWPFDocumentCore;
28
23
29
public class TestWordToFoExtractorSuite
24
public class TestWordToExtractorsSuite
30
{
25
{
31
    /**
26
    /**
32
     * YK: a quick hack to exclude failing documents from the suite.
27
     * YK: a quick hack to exclude failing documents from the suite.
33
     *
34
     * WordToFoExtractor stumbles on Bug33519.doc with a NPE
35
     */
28
     */
36
    private static List<String> failingFiles = Arrays.asList("Bug33519.doc");
29
    private static List<String> failingFiles = Arrays.asList();
37
30
38
    public static Test suite() {
31
    public static Test suite()
39
        TestSuite suite = new TestSuite();
32
    {
33
        TestSuite suite = new TestSuite(
34
                TestWordToExtractorsSuite.class.getName() );
40
35
41
        File directory = POIDataSamples.getDocumentInstance().getFile(
36
        File directory = POIDataSamples.getDocumentInstance().getFile(
42
                "../document");
37
                "../document" );
43
        for (final File child : directory.listFiles(new FilenameFilter() {
38
        for ( final File child : directory.listFiles( new FilenameFilter()
44
            public boolean accept(File dir, String name) {
39
        {
45
                return name.endsWith(".doc") && !failingFiles.contains(name);
40
            public boolean accept( File dir, String name )
41
            {
42
                return name.endsWith( ".doc" ) && !failingFiles.contains( name );
46
            }
43
            }
47
        })) {
44
        } ) )
45
        {
48
            final String name = child.getName();
46
            final String name = child.getName();
49
            suite.addTest(new TestCase(name) {
47
            suite.addTest( new TestCase( name + " [FO]" )
50
                public void runTest() throws Exception {
48
            {
51
                    test(child);
49
                public void runTest() throws Exception
50
                {
51
                    test( child, false );
52
                }
52
                }
53
            });
53
            } );
54
            suite.addTest( new TestCase( name + " [HTML]" )
55
            {
56
                public void runTest() throws Exception
57
                {
58
                    test( child, true );
59
                }
60
            } );
54
        }
61
        }
55
62
56
        return suite;
63
        return suite;
57
    }
64
    }
58
65
59
    protected static void test( File child ) throws Exception
66
    protected static void test( File child, boolean html ) throws Exception
60
    {
67
    {
61
        HWPFDocument hwpfDocument;
68
        HWPFDocumentCore hwpfDocument;
62
        FileInputStream fileInputStream = new FileInputStream( child );
63
        try
69
        try
64
        {
70
        {
65
            hwpfDocument = new HWPFDocument( fileInputStream );
71
            hwpfDocument = AbstractWordUtils.loadDoc( child );
66
        }
72
        }
67
        catch ( Exception exc )
73
        catch ( Exception exc )
68
        {
74
        {
69
            // unable to parse file -- not WordToFoExtractor fault
75
            // unable to parse file -- not WordToFoExtractor fault
70
            return;
76
            return;
71
        }
77
        }
72
        finally
73
        {
74
            fileInputStream.close();
75
        }
76
78
77
        WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
79
        final Document newDocument = DocumentBuilderFactory.newInstance()
78
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
80
                .newDocumentBuilder().newDocument();
79
                        .newDocument() );
80
        wordToFoExtractor.processDocument( hwpfDocument );
81
81
82
        StringWriter stringWriter = new StringWriter();
82
        AbstractWordExtractor extractor;
83
        if ( html )
84
        {
85
            extractor = new WordToHtmlExtractor( newDocument );
86
        }
87
        else
88
        {
89
            extractor = new WordToFoExtractor( newDocument );
90
        }
91
        extractor.processDocument( hwpfDocument );
83
92
84
        Transformer transformer = TransformerFactory.newInstance()
93
        Transformer transformer = TransformerFactory.newInstance()
85
                .newTransformer();
94
                .newTransformer();
95
        transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
86
        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
96
        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
87
        transformer.transform(
97
        if ( html )
88
                new DOMSource( wordToFoExtractor.getDocument() ),
98
            transformer.setOutputProperty( OutputKeys.METHOD, "html" );
99
100
        StringWriter stringWriter = new StringWriter();
101
        transformer.transform( new DOMSource( extractor.getDocument() ),
89
                new StreamResult( stringWriter ) );
102
                new StreamResult( stringWriter ) );
90
        // no exceptions
103
        // no exceptions
91
    }
104
    }
(-)testcases/org/apache/poi/hwpf/extractor/TestWordToHtmlExtractor.java (+93 lines)
Line 0 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import java.io.StringWriter;
22
23
import javax.xml.parsers.DocumentBuilderFactory;
24
import javax.xml.transform.OutputKeys;
25
import javax.xml.transform.Transformer;
26
import javax.xml.transform.TransformerFactory;
27
import javax.xml.transform.dom.DOMSource;
28
import javax.xml.transform.stream.StreamResult;
29
30
import junit.framework.TestCase;
31
import org.apache.poi.POIDataSamples;
32
import org.apache.poi.hwpf.HWPFDocument;
33
34
/**
35
 * Test cases for {@link WordToFoExtractor}
36
 * 
37
 * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
38
 */
39
public class TestWordToHtmlExtractor extends TestCase
40
{
41
    private static String getHtmlText( final String sampleFileName )
42
            throws Exception
43
    {
44
        HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
45
                .getDocumentInstance().openResourceAsStream( sampleFileName ) );
46
47
        WordToHtmlExtractor wordToHtmlExtractor = new WordToHtmlExtractor(
48
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
49
                        .newDocument() );
50
        wordToHtmlExtractor.processDocument( hwpfDocument );
51
52
        StringWriter stringWriter = new StringWriter();
53
54
        Transformer transformer = TransformerFactory.newInstance()
55
                .newTransformer();
56
        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
57
        transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
58
        transformer.setOutputProperty( OutputKeys.METHOD, "html" );
59
        transformer.transform(
60
                new DOMSource( wordToHtmlExtractor.getDocument() ),
61
                new StreamResult( stringWriter ) );
62
63
        String result = stringWriter.toString();
64
        return result;
65
    }
66
67
    public void testHyperlink() throws Exception
68
    {
69
        final String sampleFileName = "hyperlink.doc";
70
        String result = getHtmlText( sampleFileName );
71
72
        assertTrue( result.contains( "<a href=\"http://testuri.org/\">" ) );
73
        assertTrue( result.contains( "Hyperlink text" ) );
74
    }
75
76
    public void testEquation() throws Exception
77
    {
78
        final String sampleFileName = "equation.doc";
79
        String result = getHtmlText( sampleFileName );
80
81
        assertTrue( result
82
                .contains( "<!--Image link to '0.emf' can be here-->" ) );
83
    }
84
85
    public void testPageref() throws Exception
86
    {
87
        final String sampleFileName = "pageref.doc";
88
        String result = getHtmlText( sampleFileName );
89
90
        assertTrue( result.contains( "<a href=\"#userref\">" ) );
91
        assertTrue( result.contains( "1" ) );
92
    }
93
}

Return to bug 51351