View | Details | Raw Unified | Return to bug 51351
Collapse All | Expand All

(-)src/org/apache/poi/hwpf/sprm/CharacterSprmUncompressor.java (-1 / +12 lines)
Lines 17-30 Link Here
17
17
18
package org.apache.poi.hwpf.sprm;
18
package org.apache.poi.hwpf.sprm;
19
19
20
import org.apache.poi.hwpf.usermodel.BorderCode;
20
import org.apache.poi.hwpf.usermodel.CharacterProperties;
21
import org.apache.poi.hwpf.usermodel.CharacterProperties;
21
import org.apache.poi.hwpf.usermodel.DateAndTime;
22
import org.apache.poi.hwpf.usermodel.DateAndTime;
22
import org.apache.poi.hwpf.usermodel.BorderCode;
23
import org.apache.poi.hwpf.usermodel.ShadingDescriptor;
23
import org.apache.poi.hwpf.usermodel.ShadingDescriptor;
24
import org.apache.poi.util.LittleEndian;
24
import org.apache.poi.util.LittleEndian;
25
import org.apache.poi.util.POILogFactory;
26
import org.apache.poi.util.POILogger;
25
27
26
public final class CharacterSprmUncompressor
28
public final class CharacterSprmUncompressor
27
{
29
{
30
    private static final POILogger logger = POILogFactory
31
            .getLogger( CharacterSprmUncompressor.class );
32
28
  public CharacterSprmUncompressor()
33
  public CharacterSprmUncompressor()
29
  {
34
  {
30
  }
35
  }
Lines 47-52 Link Here
47
    while (sprmIt.hasNext())
52
    while (sprmIt.hasNext())
48
    {
53
    {
49
      SprmOperation sprm = sprmIt.next();
54
      SprmOperation sprm = sprmIt.next();
55
56
      if (sprm.getType() != 2) {
57
        logger.log( POILogger.WARN, "Non-CHP SPRM returned by SprmIterator" );
58
        continue;
59
      }
60
50
      unCompressCHPOperation(parent, newProperties, sprm);
61
      unCompressCHPOperation(parent, newProperties, sprm);
51
    }
62
    }
52
63
(-)src/org/apache/poi/hwpf/model/CHPX.java (-2 / +8 lines)
Lines 57-67 Link Here
57
57
58
  public CharacterProperties getCharacterProperties(StyleSheet ss, short istd)
58
  public CharacterProperties getCharacterProperties(StyleSheet ss, short istd)
59
  {
59
  {
60
    CharacterProperties baseStyle = ss.getCharacterStyle(istd);
60
    CharacterProperties baseStyle;
61
    if (ss == null) {
62
      //old document format
63
      baseStyle = new CharacterProperties();
64
    } else {
65
      baseStyle = ss.getCharacterStyle(istd);
66
    }
61
    CharacterProperties props = CharacterSprmUncompressor.uncompressCHP(baseStyle, getGrpprl(), 0);
67
    CharacterProperties props = CharacterSprmUncompressor.uncompressCHP(baseStyle, getGrpprl(), 0);
62
    return props;
68
    return props;
63
  }
69
  }
64
  
70
65
  public String toString() {
71
  public String toString() {
66
      return "CHPX from " + getStart() + " to " + getEnd() + 
72
      return "CHPX from " + getStart() + " to " + getEnd() + 
67
         " (in bytes " + getStartBytes() + " to " + getEndBytes() + ")";
73
         " (in bytes " + getStartBytes() + " to " + getEndBytes() + ")";
(-)src/org/apache/poi/hwpf/usermodel/CharacterRun.java (+4 lines)
Lines 426-431 Link Here
426
426
427
  public String getFontName()
427
  public String getFontName()
428
  {
428
  {
429
    if (_doc.getFontTable() == null)
430
      // old word format
431
      return null;
432
429
    return _doc.getFontTable().getMainFont(_props.getFtcAscii());
433
    return _doc.getFontTable().getMainFont(_props.getFtcAscii());
430
  }
434
  }
431
435
(-)src/org/apache/poi/hwpf/usermodel/BorderCode.java (+28 lines)
Lines 194-197 Link Here
194
    _fFrame.setValue(_info2, frame ? 1 : 0);
194
    _fFrame.setValue(_info2, frame ? 1 : 0);
195
  }
195
  }
196
196
197
    @Override
198
    public String toString()
199
    {
200
        StringBuffer buffer = new StringBuffer();
201
202
        buffer.append( "[BRC]\n" );
203
204
        buffer.append( "        .dptLineWidth         = " );
205
        buffer.append( " (" ).append( getLineWidth() ).append( " )\n" );
206
207
        buffer.append( "        .brcType              = " );
208
        buffer.append( " (" ).append( getBorderType() ).append( " )\n" );
209
210
        buffer.append( "        .ico                  = " );
211
        buffer.append( " (" ).append( getColor() ).append( " )\n" );
212
213
        buffer.append( "        .dptSpace             = " );
214
        buffer.append( " (" ).append( getSpace() ).append( " )\n" );
215
216
        buffer.append( "        .fShadow              = " );
217
        buffer.append( " (" ).append( isShadow() ).append( " )\n" );
218
219
        buffer.append( "        .fFrame               = " );
220
        buffer.append( " (" ).append( isFrame() ).append( " )\n" );
221
222
        return buffer.toString();
223
    }
224
197
}
225
}
(-)src/org/apache/poi/hwpf/usermodel/Range.java (-13 / +27 lines)
Lines 781-808 Link Here
781
	 *            The index of the character run to get.
781
	 *            The index of the character run to get.
782
	 * @return The character run at the specified index in this range.
782
	 * @return The character run at the specified index in this range.
783
	 */
783
	 */
784
	public CharacterRun getCharacterRun(int index) {
784
    public CharacterRun getCharacterRun( int index )
785
		initCharacterRuns();
785
    {
786
		CHPX chpx = _characters.get(index + _charStart);
786
        initCharacterRuns();
787
        
787
        CHPX chpx = _characters.get( index + _charStart );
788
        if (chpx == null) {
788
        return getCharacterRun( chpx );
789
    }
790
791
    private CharacterRun getCharacterRun( CHPX chpx )
792
    {
793
        if ( chpx == null )
794
        {
789
            return null;
795
            return null;
790
        }
796
        }
791
797
792
		int[] point = findRange(_paragraphs, _parStart, Math.max(chpx.getStart(), _start), chpx
798
        int[] point = findRange( _paragraphs, _parStart,
793
				.getEnd());
799
                Math.max( chpx.getStart(), _start ), chpx.getEnd() );
794
800
795
        if (point[0] >= _paragraphs.size()) {
801
        if ( point[0] >= _paragraphs.size() )
802
        {
796
            return null;
803
            return null;
797
        }
804
        }
798
805
799
		PAPX papx = _paragraphs.get(point[0]);
806
        PAPX papx = _paragraphs.get( point[0] );
800
		short istd = papx.getIstd();
807
        short istd = papx.getIstd();
801
808
802
		CharacterRun chp = new CharacterRun(chpx, _doc.getStyleSheet(), istd, this);
809
        CharacterRun chp = new CharacterRun( chpx, _doc.getStyleSheet(), istd,
810
                this );
803
811
804
		return chp;
812
        return chp;
805
	}
813
    }
806
814
807
	/**
815
	/**
808
	 * Gets the section at index. The index is relative to this range.
816
	 * Gets the section at index. The index is relative to this range.
Lines 1077-1086 Link Here
1077
		}
1085
		}
1078
	}
1086
	}
1079
1087
1088
	/**
1089
	 * @return Starting character offset of the range
1090
	 */
1080
	public int getStartOffset() {
1091
	public int getStartOffset() {
1081
		return _start;
1092
		return _start;
1082
	}
1093
	}
1083
1094
1095
	/**
1096
	 * @return The ending character offset of this range
1097
	 */
1084
	public int getEndOffset() {
1098
	public int getEndOffset() {
1085
		return _end;
1099
		return _end;
1086
	}
1100
	}
(-)src/org/apache/poi/hwpf/extractor/AbstractWordUtils.java (+407 lines)
Line 0 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import java.io.Closeable;
22
import java.io.File;
23
import java.io.FileInputStream;
24
import java.io.IOException;
25
import java.io.InputStream;
26
import java.lang.reflect.Constructor;
27
import java.lang.reflect.Field;
28
import java.lang.reflect.Method;
29
import java.util.ArrayList;
30
import java.util.List;
31
32
import org.apache.poi.hwpf.HWPFDocument;
33
import org.apache.poi.hwpf.HWPFDocumentCore;
34
import org.apache.poi.hwpf.HWPFOldDocument;
35
import org.apache.poi.hwpf.OldWordFileFormatException;
36
import org.apache.poi.hwpf.model.CHPX;
37
import org.apache.poi.hwpf.model.ListLevel;
38
import org.apache.poi.hwpf.model.ListTables;
39
import org.apache.poi.hwpf.usermodel.BorderCode;
40
import org.apache.poi.hwpf.usermodel.CharacterRun;
41
import org.apache.poi.hwpf.usermodel.Paragraph;
42
import org.apache.poi.hwpf.usermodel.Range;
43
import org.apache.poi.hwpf.usermodel.Section;
44
import org.apache.poi.hwpf.usermodel.SectionProperties;
45
import org.apache.poi.hwpf.usermodel.TableIterator;
46
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
47
import org.apache.poi.util.POILogFactory;
48
import org.apache.poi.util.POILogger;
49
50
public class AbstractWordUtils
51
{
52
    static final String EMPTY = "";
53
54
    private static final POILogger logger = POILogFactory
55
            .getLogger( AbstractWordUtils.class );
56
57
    public static final float TWIPS_PER_INCH = 1440.0f;
58
    public static final int TWIPS_PER_PT = 20;
59
60
    static void closeQuietly( final Closeable closeable )
61
    {
62
        try
63
        {
64
            closeable.close();
65
        }
66
        catch ( Exception exc )
67
        {
68
            logger.log( POILogger.ERROR, "Unable to close resource: " + exc,
69
                    exc );
70
        }
71
    }
72
73
    static boolean equals( String str1, String str2 )
74
    {
75
        return str1 == null ? str2 == null : str1.equals( str2 );
76
    }
77
78
    // XXX incorporate into Range
79
    static List<CharacterRun> findCharacterRuns( Range range )
80
    {
81
        final int min = range.getStartOffset();
82
        final int max = range.getEndOffset();
83
84
        List<CharacterRun> result = new ArrayList<CharacterRun>();
85
        List<CHPX> chpxs = getCharacters( range );
86
        for ( int i = 0; i < chpxs.size(); i++ )
87
        {
88
            CHPX chpx = chpxs.get( i );
89
            if ( chpx == null )
90
                continue;
91
92
            if ( Math.max( min, chpx.getStart() ) <= Math.min( max,
93
                    chpx.getEnd() ) )
94
            {
95
                final CharacterRun characterRun = getCharacterRun( range, chpx );
96
97
                if ( characterRun == null )
98
                    continue;
99
100
                result.add( characterRun );
101
            }
102
        }
103
104
        return result;
105
    }
106
107
    public static String getBorderType( BorderCode borderCode )
108
    {
109
        if ( borderCode == null )
110
            throw new IllegalArgumentException( "borderCode is null" );
111
112
        switch ( borderCode.getBorderType() )
113
        {
114
        case 1:
115
        case 2:
116
            return "solid";
117
        case 3:
118
            return "double";
119
        case 5:
120
            return "solid";
121
        case 6:
122
            return "dotted";
123
        case 7:
124
        case 8:
125
            return "dashed";
126
        case 9:
127
            return "dotted";
128
        case 10:
129
        case 11:
130
        case 12:
131
        case 13:
132
        case 14:
133
        case 15:
134
        case 16:
135
        case 17:
136
        case 18:
137
        case 19:
138
            return "double";
139
        case 20:
140
            return "solid";
141
        case 21:
142
            return "double";
143
        case 22:
144
            return "dashed";
145
        case 23:
146
            return "dashed";
147
        case 24:
148
            return "ridge";
149
        case 25:
150
            return "grooved";
151
        default:
152
            return "solid";
153
        }
154
    }
155
156
    public static String getBorderWidth( BorderCode borderCode )
157
    {
158
        int lineWidth = borderCode.getLineWidth();
159
        int pt = lineWidth / 8;
160
        int pte = lineWidth - pt * 8;
161
162
        StringBuilder stringBuilder = new StringBuilder();
163
        stringBuilder.append( pt );
164
        stringBuilder.append( "." );
165
        stringBuilder.append( 1000 / 8 * pte );
166
        stringBuilder.append( "pt" );
167
        return stringBuilder.toString();
168
    }
169
170
    public static String getBulletText( ListTables listTables,
171
            Paragraph paragraph, int listId )
172
    {
173
        final ListLevel listLevel = listTables.getLevel( listId,
174
                paragraph.getIlvl() );
175
176
        if ( listLevel.getNumberText() == null )
177
            return EMPTY;
178
179
        StringBuffer bulletBuffer = new StringBuffer();
180
        char[] xst = listLevel.getNumberText().toCharArray();
181
        for ( char element : xst )
182
        {
183
            if ( element < 9 )
184
            {
185
                ListLevel numLevel = listTables.getLevel( listId, element );
186
187
                int num = numLevel.getStartAt();
188
                bulletBuffer.append( NumberFormatter.getNumber( num,
189
                        listLevel.getNumberFormat() ) );
190
191
                if ( numLevel == listLevel )
192
                {
193
                    numLevel.setStartAt( numLevel.getStartAt() + 1 );
194
                }
195
196
            }
197
            else
198
            {
199
                bulletBuffer.append( element );
200
            }
201
        }
202
203
        byte follow = getIxchFollow( listLevel );
204
        switch ( follow )
205
        {
206
        case 0:
207
            bulletBuffer.append( "\t" );
208
            break;
209
        case 1:
210
            bulletBuffer.append( " " );
211
            break;
212
        default:
213
            break;
214
        }
215
216
        return bulletBuffer.toString();
217
    }
218
219
    private static CharacterRun getCharacterRun( Range range, CHPX chpx )
220
    {
221
        try
222
        {
223
            Method method = Range.class.getDeclaredMethod( "getCharacterRun",
224
                    CHPX.class );
225
            method.setAccessible( true );
226
            return (CharacterRun) method.invoke( range, chpx );
227
        }
228
        catch ( Exception exc )
229
        {
230
            throw new Error( exc );
231
        }
232
    }
233
234
    @SuppressWarnings( "unchecked" )
235
    private static List<CHPX> getCharacters( Range range )
236
    {
237
        try
238
        {
239
            Field field = Range.class.getDeclaredField( "_characters" );
240
            field.setAccessible( true );
241
            return (List<CHPX>) field.get( range );
242
        }
243
        catch ( Exception exc )
244
        {
245
            throw new Error( exc );
246
        }
247
    }
248
249
    public static String getColor( int ico )
250
    {
251
        switch ( ico )
252
        {
253
        case 1:
254
            return "black";
255
        case 2:
256
            return "blue";
257
        case 3:
258
            return "cyan";
259
        case 4:
260
            return "green";
261
        case 5:
262
            return "magenta";
263
        case 6:
264
            return "red";
265
        case 7:
266
            return "yellow";
267
        case 8:
268
            return "white";
269
        case 9:
270
            return "darkblue";
271
        case 10:
272
            return "darkcyan";
273
        case 11:
274
            return "darkgreen";
275
        case 12:
276
            return "darkmagenta";
277
        case 13:
278
            return "darkred";
279
        case 14:
280
            return "darkyellow";
281
        case 15:
282
            return "darkgray";
283
        case 16:
284
            return "lightgray";
285
        default:
286
            return "black";
287
        }
288
    }
289
290
    public static byte getIxchFollow( ListLevel listLevel )
291
    {
292
        try
293
        {
294
            Field field = ListLevel.class.getDeclaredField( "_ixchFollow" );
295
            field.setAccessible( true );
296
            return ((Byte) field.get( listLevel )).byteValue();
297
        }
298
        catch ( Exception exc )
299
        {
300
            throw new Error( exc );
301
        }
302
    }
303
304
    public static String getJustification( int js )
305
    {
306
        switch ( js )
307
        {
308
        case 0:
309
            return "start";
310
        case 1:
311
            return "center";
312
        case 2:
313
            return "end";
314
        case 3:
315
        case 4:
316
            return "justify";
317
        case 5:
318
            return "center";
319
        case 6:
320
            return "left";
321
        case 7:
322
            return "start";
323
        case 8:
324
            return "end";
325
        case 9:
326
            return "justify";
327
        }
328
        return "";
329
    }
330
331
    public static String getListItemNumberLabel( int number, int format )
332
    {
333
334
        if ( format != 0 )
335
            System.err.println( "NYI: toListItemNumberLabel(): " + format );
336
337
        return String.valueOf( number );
338
    }
339
340
    public static SectionProperties getSectionProperties( Section section )
341
    {
342
        try
343
        {
344
            Field field = Section.class.getDeclaredField( "_props" );
345
            field.setAccessible( true );
346
            return (SectionProperties) field.get( section );
347
        }
348
        catch ( Exception exc )
349
        {
350
            throw new Error( exc );
351
        }
352
    }
353
354
    static boolean isEmpty( String str )
355
    {
356
        return str == null || str.length() == 0;
357
    }
358
359
    static boolean isNotEmpty( String str )
360
    {
361
        return !isEmpty( str );
362
    }
363
364
    public static HWPFDocumentCore loadDoc( File docFile ) throws IOException
365
    {
366
        final FileInputStream istream = new FileInputStream( docFile );
367
        try
368
        {
369
            return loadDoc( istream );
370
        }
371
        finally
372
        {
373
            closeQuietly( istream );
374
        }
375
    }
376
377
    public static HWPFDocumentCore loadDoc( InputStream inputStream )
378
            throws IOException
379
    {
380
        final POIFSFileSystem poifsFileSystem = HWPFDocumentCore
381
                .verifyAndBuildPOIFS( inputStream );
382
        try
383
        {
384
            return new HWPFDocument( poifsFileSystem );
385
        }
386
        catch ( OldWordFileFormatException exc )
387
        {
388
            return new HWPFOldDocument( poifsFileSystem );
389
        }
390
    }
391
392
    public static TableIterator newTableIterator( Range range, int level )
393
    {
394
        try
395
        {
396
            Constructor<TableIterator> constructor = TableIterator.class
397
                    .getDeclaredConstructor( Range.class, int.class );
398
            constructor.setAccessible( true );
399
            return constructor.newInstance( range, Integer.valueOf( level ) );
400
        }
401
        catch ( Exception exc )
402
        {
403
            throw new Error( exc );
404
        }
405
    }
406
407
}
0
  + text/plain
408
  + text/plain
(-)src/org/apache/poi/hwpf/extractor/WordToHtmlUtils.java (+294 lines)
Line 0 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import org.apache.poi.hwpf.usermodel.BorderCode;
22
import org.apache.poi.hwpf.usermodel.CharacterProperties;
23
import org.apache.poi.hwpf.usermodel.CharacterRun;
24
import org.apache.poi.hwpf.usermodel.Paragraph;
25
import org.apache.poi.hwpf.usermodel.Picture;
26
import org.apache.poi.hwpf.usermodel.TableCell;
27
import org.apache.poi.hwpf.usermodel.TableRow;
28
import org.w3c.dom.Element;
29
30
public class WordToHtmlUtils extends AbstractWordUtils
31
{
32
    public static void addBold( final boolean bold, StringBuilder style )
33
    {
34
        style.append( "font-weight: " + (bold ? "bold" : "normal") + ";" );
35
    }
36
37
    public static void addBorder( BorderCode borderCode, String where,
38
            StringBuilder style )
39
    {
40
        if ( borderCode == null || borderCode.getBorderType() == 0 )
41
            return;
42
43
        if ( isEmpty( where ) )
44
        {
45
            style.append( "border-style: " + getBorderType( borderCode ) + "; " );
46
            style.append( "border-color: " + getColor( borderCode.getColor() )
47
                    + "; " );
48
            style.append( "border-width: " + getBorderWidth( borderCode )
49
                    + "; " );
50
        }
51
        else
52
        {
53
            style.append( "border-" + where + "-style: "
54
                    + getBorderType( borderCode ) + "; " );
55
            style.append( "border-" + where + "-color: "
56
                    + getColor( borderCode.getColor() ) + "; " );
57
            style.append( "border-" + where + "-width: "
58
                    + getBorderWidth( borderCode ) + "; " );
59
        }
60
    }
61
62
    public static void addCharactersProperties(
63
            final CharacterRun characterRun, StringBuilder style )
64
    {
65
        final CharacterProperties clonedProperties = characterRun
66
                .cloneProperties();
67
68
        if ( characterRun.isBold() )
69
        {
70
            style.append( "font-weight: bold; " );
71
        }
72
        if ( characterRun.isItalic() )
73
        {
74
            style.append( "font-style: italic; " );
75
        }
76
77
        addBorder( clonedProperties.getBrc(), EMPTY, style );
78
79
        if ( characterRun.isCapitalized() )
80
        {
81
            style.append( "text-transform: uppercase; " );
82
        }
83
        if ( characterRun.isHighlighted() )
84
        {
85
            style.append( "background-color: "
86
                    + getColor( clonedProperties.getIcoHighlight() ) + "; " );
87
        }
88
        if ( characterRun.isStrikeThrough() )
89
        {
90
            style.append( "text-decoration: line-through; " );
91
        }
92
        if ( characterRun.isShadowed() )
93
        {
94
            style.append( "text-shadow: " + characterRun.getFontSize() / 24
95
                    + "pt; " );
96
        }
97
        if ( characterRun.isSmallCaps() )
98
        {
99
            style.append( "font-variant: small-caps; " );
100
        }
101
        if ( characterRun.getSubSuperScriptIndex() == 1 )
102
        {
103
            style.append( "baseline-shift: super; " );
104
            style.append( "font-size: smaller; " );
105
        }
106
        if ( characterRun.getSubSuperScriptIndex() == 2 )
107
        {
108
            style.append( "baseline-shift: sub; " );
109
            style.append( "font-size: smaller; " );
110
        }
111
        if ( characterRun.getUnderlineCode() > 0 )
112
        {
113
            style.append( "text-decoration: underline; " );
114
        }
115
        if ( characterRun.isVanished() )
116
        {
117
            style.append( "visibility: hidden; " );
118
        }
119
    }
120
121
    public static void addFontFamily( final String fontFamily,
122
            StringBuilder style )
123
    {
124
        if ( isEmpty( fontFamily ) )
125
            return;
126
127
        style.append( "font-family: " + fontFamily );
128
    }
129
130
    public static void addFontSize( final int fontSize, StringBuilder style )
131
    {
132
        style.append( "font-size: " + fontSize );
133
    }
134
135
    public static void addIndent( Paragraph paragraph, StringBuilder style )
136
    {
137
        addIndent( style, "text-indent", paragraph.getFirstLineIndent() );
138
        addIndent( style, "start-indent", paragraph.getIndentFromLeft() );
139
        addIndent( style, "end-indent", paragraph.getIndentFromRight() );
140
        addIndent( style, "space-before", paragraph.getSpacingBefore() );
141
        addIndent( style, "space-after", paragraph.getSpacingAfter() );
142
    }
143
144
    private static void addIndent( StringBuilder style, final String cssName,
145
            final int twipsValue )
146
    {
147
        if ( twipsValue == 0 )
148
            return;
149
150
        style.append( cssName + ": " + (twipsValue / TWIPS_PER_PT) + "pt; " );
151
    }
152
153
    public static void addJustification( Paragraph paragraph,
154
            final StringBuilder style )
155
    {
156
        String justification = getJustification( paragraph.getJustification() );
157
        if ( isNotEmpty( justification ) )
158
            style.append( "text-align: " + justification + "; " );
159
    }
160
161
    public static void addParagraphProperties( Paragraph paragraph,
162
            StringBuilder style )
163
    {
164
        addIndent( paragraph, style );
165
        addJustification( paragraph, style );
166
167
        addBorder( paragraph.getBottomBorder(), "bottom", style );
168
        addBorder( paragraph.getLeftBorder(), "left", style );
169
        addBorder( paragraph.getRightBorder(), "right", style );
170
        addBorder( paragraph.getTopBorder(), "top", style );
171
172
        if ( paragraph.pageBreakBefore() )
173
        {
174
            style.append( "break-before: page; " );
175
        }
176
177
        style.append( "hyphenate: " + paragraph.isAutoHyphenated() + "; " );
178
179
        if ( paragraph.keepOnPage() )
180
        {
181
            style.append( "keep-together.within-page: always; " );
182
        }
183
184
        if ( paragraph.keepWithNext() )
185
        {
186
            style.append( "keep-with-next.within-page: always; " );
187
        }
188
189
        style.append( "linefeed-treatment: preserve; " );
190
        style.append( "white-space-collapse: false; " );
191
    }
192
193
    public static void addTableCellProperties( TableRow tableRow,
194
            TableCell tableCell, boolean toppest, boolean bottomest,
195
            boolean leftest, boolean rightest, StringBuilder style )
196
    {
197
        style.append( "width: " + (tableCell.getWidth() / TWIPS_PER_INCH)
198
                + "in; " );
199
        style.append( "padding-start: "
200
                + (tableRow.getGapHalf() / TWIPS_PER_INCH) + "in; " );
201
        style.append( "padding-end: "
202
                + (tableRow.getGapHalf() / TWIPS_PER_INCH) + "in; " );
203
204
        BorderCode top = tableCell.getBrcTop() != null
205
                && tableCell.getBrcTop().getBorderType() != 0 ? tableCell
206
                .getBrcTop() : toppest ? tableRow.getTopBorder() : tableRow
207
                .getHorizontalBorder();
208
        BorderCode bottom = tableCell.getBrcBottom() != null
209
                && tableCell.getBrcBottom().getBorderType() != 0 ? tableCell
210
                .getBrcBottom() : bottomest ? tableRow.getBottomBorder()
211
                : tableRow.getHorizontalBorder();
212
213
        BorderCode left = tableCell.getBrcLeft() != null
214
                && tableCell.getBrcLeft().getBorderType() != 0 ? tableCell
215
                .getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
216
                .getVerticalBorder();
217
        BorderCode right = tableCell.getBrcRight() != null
218
                && tableCell.getBrcRight().getBorderType() != 0 ? tableCell
219
                .getBrcRight() : rightest ? tableRow.getRightBorder()
220
                : tableRow.getVerticalBorder();
221
222
        addBorder( bottom, "bottom", style );
223
        addBorder( left, "left", style );
224
        addBorder( right, "right", style );
225
        addBorder( top, "top", style );
226
    }
227
228
    public static void addTableRowProperties( TableRow tableRow,
229
            StringBuilder style )
230
    {
231
        if ( tableRow.getRowHeight() > 0 )
232
        {
233
            style.append( "height: "
234
                    + (tableRow.getRowHeight() / TWIPS_PER_INCH) + "in; " );
235
        }
236
        if ( !tableRow.cantSplit() )
237
        {
238
            style.append( "keep-together: always; " );
239
        }
240
    }
241
242
    public static void setPictureProperties( Picture picture,
243
            Element graphicElement )
244
    {
245
        final int aspectRatioX = picture.getAspectRatioX();
246
        final int aspectRatioY = picture.getAspectRatioY();
247
248
        if ( aspectRatioX > 0 )
249
        {
250
            graphicElement
251
                    .setAttribute( "content-width", ((picture.getDxaGoal()
252
                            * aspectRatioX / 100) / TWIPS_PER_PT)
253
                            + "pt" );
254
        }
255
        else
256
            graphicElement.setAttribute( "content-width",
257
                    (picture.getDxaGoal() / TWIPS_PER_PT) + "pt" );
258
259
        if ( aspectRatioY > 0 )
260
            graphicElement
261
                    .setAttribute( "content-height", ((picture.getDyaGoal()
262
                            * aspectRatioY / 100) / TWIPS_PER_PT)
263
                            + "pt" );
264
        else
265
            graphicElement.setAttribute( "content-height",
266
                    (picture.getDyaGoal() / TWIPS_PER_PT) + "pt" );
267
268
        if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
269
        {
270
            graphicElement.setAttribute( "scaling", "uniform" );
271
        }
272
        else
273
        {
274
            graphicElement.setAttribute( "scaling", "non-uniform" );
275
        }
276
277
        graphicElement.setAttribute( "vertical-align", "text-bottom" );
278
279
        if ( picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
280
                || picture.getDyaCropBottom() != 0
281
                || picture.getDxaCropLeft() != 0 )
282
        {
283
            int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
284
            int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
285
            int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
286
            int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
287
            graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
288
                    + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
289
                    + "pt)" );
290
            graphicElement.setAttribute( "oveerflow", "hidden" );
291
        }
292
    }
293
294
}
0
  + text/plain
295
  + text/plain
(-)src/org/apache/poi/hwpf/extractor/WordToHtmlExtractor.java (+477 lines)
Line 0 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import java.io.File;
22
import java.io.FileWriter;
23
import java.util.List;
24
import java.util.Stack;
25
26
import javax.xml.parsers.DocumentBuilderFactory;
27
import javax.xml.transform.OutputKeys;
28
import javax.xml.transform.Transformer;
29
import javax.xml.transform.TransformerFactory;
30
import javax.xml.transform.dom.DOMSource;
31
import javax.xml.transform.stream.StreamResult;
32
33
import org.apache.poi.hwpf.HWPFDocument;
34
import org.apache.poi.hwpf.HWPFDocumentCore;
35
import org.apache.poi.hwpf.usermodel.CharacterRun;
36
import org.apache.poi.hwpf.usermodel.Paragraph;
37
import org.apache.poi.hwpf.usermodel.Picture;
38
import org.apache.poi.hwpf.usermodel.Section;
39
import org.apache.poi.hwpf.usermodel.SectionProperties;
40
import org.apache.poi.hwpf.usermodel.Table;
41
import org.apache.poi.hwpf.usermodel.TableCell;
42
import org.apache.poi.hwpf.usermodel.TableRow;
43
import org.apache.poi.util.POILogFactory;
44
import org.apache.poi.util.POILogger;
45
import org.w3c.dom.Document;
46
import org.w3c.dom.Element;
47
import org.w3c.dom.Text;
48
49
import static org.apache.poi.hwpf.extractor.AbstractWordUtils.TWIPS_PER_INCH;
50
51
/**
52
 * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
53
 */
54
public class WordToHtmlExtractor extends AbstractWordExtractor
55
{
56
57
    /**
58
     * Holds properties values, applied to current <tt>p</tt> element. Those
59
     * properties shall not be doubled in children <tt>span</tt> elements.
60
     */
61
    private static class BlockProperies
62
    {
63
        final String pFontName;
64
        final int pFontSize;
65
66
        public BlockProperies( String pFontName, int pFontSize )
67
        {
68
            this.pFontName = pFontName;
69
            this.pFontSize = pFontSize;
70
        }
71
    }
72
73
    private static final POILogger logger = POILogFactory
74
            .getLogger( WordToHtmlExtractor.class );
75
76
    private static String getSectionStyle( Section section )
77
    {
78
        SectionProperties sep = WordToHtmlUtils.getSectionProperties( section );
79
80
        float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH;
81
        float rightMargin = sep.getDxaRight() / TWIPS_PER_INCH;
82
        float topMargin = sep.getDyaTop() / TWIPS_PER_INCH;
83
        float bottomMargin = sep.getDyaBottom() / TWIPS_PER_INCH;
84
85
        String style = "margin: " + topMargin + "in " + rightMargin + "in "
86
                + bottomMargin + "in " + leftMargin + "in; ";
87
88
        if ( sep.getCcolM1() > 0 )
89
        {
90
            style += "column-count: " + (sep.getCcolM1() + 1) + "; ";
91
            if ( sep.getFEvenlySpaced() )
92
            {
93
                style += "column-gap: "
94
                        + (sep.getDxaColumns() / TWIPS_PER_INCH) + "in; ";
95
            }
96
            else
97
            {
98
                style += "column-gap: 0.25in; ";
99
            }
100
        }
101
        return style;
102
    }
103
104
    /**
105
     * Java main() interface to interact with WordToHtmlExtractor
106
     * 
107
     * <p>
108
     * Usage: WordToHtmlExtractor infile outfile
109
     * </p>
110
     * Where infile is an input .doc file ( Word 95-2007) which will be rendered
111
     * as HTML into outfile
112
     */
113
    public static void main( String[] args )
114
    {
115
        if ( args.length < 2 )
116
        {
117
            System.err
118
                    .println( "Usage: WordToHtmlExtractor <inputFile.doc> <saveTo.html>" );
119
            return;
120
        }
121
122
        System.out.println( "Converting " + args[0] );
123
        System.out.println( "Saving output to " + args[1] );
124
        try
125
        {
126
            Document doc = WordToHtmlExtractor.process( new File( args[0] ) );
127
128
            FileWriter out = new FileWriter( args[1] );
129
            DOMSource domSource = new DOMSource( doc );
130
            StreamResult streamResult = new StreamResult( out );
131
132
            TransformerFactory tf = TransformerFactory.newInstance();
133
            Transformer serializer = tf.newTransformer();
134
            // TODO set encoding from a command argument
135
            serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
136
            serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
137
            serializer.setOutputProperty( OutputKeys.METHOD, "html" );
138
            serializer.transform( domSource, streamResult );
139
            out.close();
140
        }
141
        catch ( Exception e )
142
        {
143
            e.printStackTrace();
144
        }
145
    }
146
147
    static Document process( File docFile ) throws Exception
148
    {
149
        final HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc( docFile );
150
        WordToHtmlExtractor wordToHtmlExtractor = new WordToHtmlExtractor(
151
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
152
                        .newDocument() );
153
        wordToHtmlExtractor.processDocument( wordDocument );
154
        return wordToHtmlExtractor.getDocument();
155
    }
156
157
    private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
158
159
    private final HtmlDocumentFacade htmlDocumentFacade;
160
161
    /**
162
     * Creates new instance of {@link WordToHtmlExtractor}. Can be used for
163
     * output several {@link HWPFDocument}s into single HTML document.
164
     * 
165
     * @param document
166
     *            XML DOM Document used as HTML document
167
     */
168
    public WordToHtmlExtractor( Document document )
169
    {
170
        this.htmlDocumentFacade = new HtmlDocumentFacade( document );
171
    }
172
173
    public Document getDocument()
174
    {
175
        return htmlDocumentFacade.getDocument();
176
    }
177
178
    @Override
179
    protected void outputCharacters( Element pElement,
180
            CharacterRun characterRun, String text )
181
    {
182
        Element span = htmlDocumentFacade.document.createElement( "span" );
183
        pElement.appendChild( span );
184
185
        StringBuilder style = new StringBuilder();
186
        BlockProperies blockProperies = this.blocksProperies.peek();
187
        if ( characterRun.getFontName() != null
188
                && !WordToHtmlUtils.equals( characterRun.getFontName(),
189
                        blockProperies.pFontName ) )
190
        {
191
            style.append( "font-family: " + characterRun.getFontName() + "; " );
192
        }
193
        if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
194
        {
195
            style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " );
196
        }
197
198
        WordToHtmlUtils.addCharactersProperties( characterRun, style );
199
        if ( style.length() != 0 )
200
            span.setAttribute( "style", style.toString() );
201
202
        Text textNode = htmlDocumentFacade.createText( text );
203
        span.appendChild( textNode );
204
    }
205
206
    protected void processHyperlink( HWPFDocumentCore wordDocument,
207
            Element currentBlock, Paragraph paragraph,
208
            List<CharacterRun> characterRuns, int currentTableLevel,
209
            String hyperlink, int beginTextInclusive, int endTextExclusive )
210
    {
211
        Element basicLink = htmlDocumentFacade.createHyperlink( hyperlink );
212
        currentBlock.appendChild( basicLink );
213
214
        if ( beginTextInclusive < endTextExclusive )
215
            processCharacters( wordDocument, currentTableLevel, paragraph,
216
                    basicLink, characterRuns, beginTextInclusive,
217
                    endTextExclusive );
218
    }
219
220
    /**
221
     * This method shall store image bytes in external file and convert it if
222
     * necessary. Images shall be stored using PNG format. Other formats may be
223
     * not supported by user browser.
224
     * <p>
225
     * Please note the
226
     * {@link WordToHtmlUtils#setPictureProperties(Picture, Element)} method.
227
     * 
228
     * @param currentBlock
229
     *            currently processed HTML element, like <tt>p</tt>. Shall be
230
     *            used as parent of newly created <tt>img</tt>
231
     * @param inlined
232
     *            if image is inlined
233
     * @param picture
234
     *            HWPF object, contained picture data and properties
235
     */
236
    protected void processImage( Element currentBlock, boolean inlined,
237
            Picture picture )
238
    {
239
        // no default implementation -- skip
240
        currentBlock.appendChild( htmlDocumentFacade.document
241
                .createComment( "Image link to '"
242
                        + picture.suggestFullFileName() + "' can be here" ) );
243
    }
244
245
    protected void processPageref( HWPFDocumentCore hwpfDocument,
246
            Element currentBlock, Paragraph paragraph,
247
            List<CharacterRun> characterRuns, int currentTableLevel,
248
            String pageref, int beginTextInclusive, int endTextExclusive )
249
    {
250
        Element basicLink = htmlDocumentFacade.createHyperlink( "#" + pageref );
251
        currentBlock.appendChild( basicLink );
252
253
        if ( beginTextInclusive < endTextExclusive )
254
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
255
                    basicLink, characterRuns, beginTextInclusive,
256
                    endTextExclusive );
257
    }
258
259
    protected void processParagraph( HWPFDocumentCore hwpfDocument,
260
            Element parentFopElement, int currentTableLevel,
261
            Paragraph paragraph, String bulletText )
262
    {
263
        final Element pElement = htmlDocumentFacade.createParagraph();
264
        parentFopElement.appendChild( pElement );
265
266
        StringBuilder style = new StringBuilder();
267
        WordToHtmlUtils.addParagraphProperties( paragraph, style );
268
269
        final int charRuns = paragraph.numCharacterRuns();
270
271
        if ( charRuns == 0 )
272
        {
273
            return;
274
        }
275
276
        {
277
            final String pFontName;
278
            final int pFontSize;
279
            final CharacterRun characterRun = paragraph.getCharacterRun( 0 );
280
            if ( characterRun != null )
281
            {
282
                pFontSize = characterRun.getFontSize() / 2;
283
                pFontName = characterRun.getFontName();
284
                WordToHtmlUtils.addFontFamily( pFontName, style );
285
                WordToHtmlUtils.addFontSize( pFontSize, style );
286
            }
287
            else
288
            {
289
                pFontSize = -1;
290
                pFontName = WordToHtmlUtils.EMPTY;
291
            }
292
            blocksProperies.push( new BlockProperies( pFontName, pFontSize ) );
293
        }
294
        try
295
        {
296
            if ( WordToHtmlUtils.isNotEmpty( bulletText ) )
297
            {
298
                Text textNode = htmlDocumentFacade.createText( bulletText );
299
                pElement.appendChild( textNode );
300
            }
301
302
            List<CharacterRun> characterRuns = WordToHtmlUtils
303
                    .findCharacterRuns( paragraph );
304
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
305
                    pElement, characterRuns, 0, characterRuns.size() );
306
        }
307
        finally
308
        {
309
            blocksProperies.pop();
310
        }
311
312
        if ( style.length() > 0 )
313
            pElement.setAttribute( "style", style.toString() );
314
315
        return;
316
    }
317
318
    protected void processSection( HWPFDocumentCore wordDocument,
319
            Section section, int sectionCounter )
320
    {
321
        Element div = htmlDocumentFacade.document.createElement( "div" );
322
        div.setAttribute( "style", getSectionStyle( section ) );
323
        htmlDocumentFacade.body.appendChild( div );
324
325
        processSectionParagraphes( wordDocument, div, section, 0 );
326
    }
327
328
    @Override
329
    protected void processSingleSection( HWPFDocumentCore wordDocument,
330
            Section section )
331
    {
332
        htmlDocumentFacade.body.setAttribute( "style",
333
                getSectionStyle( section ) );
334
335
        processSectionParagraphes( wordDocument, htmlDocumentFacade.body,
336
                section, 0 );
337
    }
338
339
    protected void processTable( HWPFDocumentCore hwpfDocument, Element flow,
340
            Table table, int thisTableLevel )
341
    {
342
        Element tableHeader = htmlDocumentFacade.createTableHeader();
343
        Element tableBody = htmlDocumentFacade.createTableBody();
344
345
        final int tableRows = table.numRows();
346
347
        int maxColumns = Integer.MIN_VALUE;
348
        for ( int r = 0; r < tableRows; r++ )
349
        {
350
            maxColumns = Math.max( maxColumns, table.getRow( r ).numCells() );
351
        }
352
353
        for ( int r = 0; r < tableRows; r++ )
354
        {
355
            TableRow tableRow = table.getRow( r );
356
357
            Element tableRowElement = htmlDocumentFacade.createTableRow();
358
            StringBuilder tableRowStyle = new StringBuilder();
359
            WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle );
360
361
            final int rowCells = tableRow.numCells();
362
            for ( int c = 0; c < rowCells; c++ )
363
            {
364
                TableCell tableCell = tableRow.getCell( c );
365
366
                if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
367
                    continue;
368
369
                if ( tableCell.isVerticallyMerged()
370
                        && !tableCell.isFirstVerticallyMerged() )
371
                    continue;
372
373
                Element tableCellElement;
374
                if ( tableRow.isTableHeader() )
375
                {
376
                    tableCellElement = htmlDocumentFacade
377
                            .createTableHeaderCell();
378
                }
379
                else
380
                {
381
                    tableCellElement = htmlDocumentFacade.createTableCell();
382
                }
383
                StringBuilder tableCellStyle = new StringBuilder();
384
                WordToHtmlUtils.addTableCellProperties( tableRow, tableCell,
385
                        r == 0, r == tableRows - 1, c == 0, c == rowCells - 1,
386
                        tableCellStyle );
387
388
                if ( tableCell.isFirstMerged() )
389
                {
390
                    int count = 0;
391
                    for ( int c1 = c; c1 < rowCells; c1++ )
392
                    {
393
                        TableCell nextCell = tableRow.getCell( c1 );
394
                        if ( nextCell.isMerged() )
395
                            count++;
396
                        if ( !nextCell.isMerged() )
397
                            break;
398
                    }
399
                    tableCellElement.setAttribute( "colspan", "" + count );
400
                }
401
                else
402
                {
403
                    if ( c == rowCells - 1 && c != maxColumns - 1 )
404
                    {
405
                        tableCellElement.setAttribute( "colspan", ""
406
                                + (maxColumns - c) );
407
                    }
408
                }
409
410
                if ( tableCell.isFirstVerticallyMerged() )
411
                {
412
                    int count = 0;
413
                    for ( int r1 = r; r1 < tableRows; r1++ )
414
                    {
415
                        TableRow nextRow = table.getRow( r1 );
416
                        if ( nextRow.numCells() < c )
417
                            break;
418
                        TableCell nextCell = nextRow.getCell( c );
419
                        if ( nextCell.isVerticallyMerged() )
420
                            count++;
421
                        if ( !nextCell.isVerticallyMerged() )
422
                            break;
423
                    }
424
                    tableCellElement.setAttribute( "rowspan", "" + count );
425
                }
426
427
                processSectionParagraphes( hwpfDocument, tableCellElement,
428
                        tableCell, thisTableLevel );
429
430
                if ( !tableCellElement.hasChildNodes() )
431
                {
432
                    tableCellElement.appendChild( htmlDocumentFacade
433
                            .createParagraph() );
434
                }
435
                if ( tableCellStyle.length() > 0 )
436
                    tableCellElement.setAttribute( "style",
437
                            tableCellStyle.toString() );
438
439
                tableRowElement.appendChild( tableCellElement );
440
            }
441
442
            if ( tableRowStyle.length() > 0 )
443
                tableRowElement
444
                        .setAttribute( "style", tableRowStyle.toString() );
445
446
            if ( tableRow.isTableHeader() )
447
            {
448
                tableHeader.appendChild( tableRowElement );
449
            }
450
            else
451
            {
452
                tableBody.appendChild( tableRowElement );
453
            }
454
455
        }
456
457
        final Element tableElement = htmlDocumentFacade.createTable();
458
        if ( tableHeader.hasChildNodes() )
459
        {
460
            tableElement.appendChild( tableHeader );
461
        }
462
        if ( tableBody.hasChildNodes() )
463
        {
464
            tableElement.appendChild( tableBody );
465
            flow.appendChild( tableElement );
466
        }
467
        else
468
        {
469
            logger.log(
470
                    POILogger.WARN,
471
                    "Table without body starting on offset "
472
                            + table.getStartOffset() + " -- "
473
                            + table.getEndOffset() );
474
        }
475
    }
476
477
}
0
  + text/plain
478
  + text/plain
(-)src/org/apache/poi/hwpf/extractor/AbstractWordExtractor.java (+369 lines)
Line 0 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import java.util.HashMap;
22
import java.util.List;
23
import java.util.Map;
24
import java.util.regex.Matcher;
25
import java.util.regex.Pattern;
26
27
import org.apache.poi.hwpf.HWPFDocument;
28
import org.apache.poi.hwpf.HWPFDocumentCore;
29
import org.apache.poi.hwpf.model.ListFormatOverride;
30
import org.apache.poi.hwpf.model.ListTables;
31
import org.apache.poi.hwpf.usermodel.CharacterRun;
32
import org.apache.poi.hwpf.usermodel.Paragraph;
33
import org.apache.poi.hwpf.usermodel.Picture;
34
import org.apache.poi.hwpf.usermodel.Range;
35
import org.apache.poi.hwpf.usermodel.Section;
36
import org.apache.poi.hwpf.usermodel.Table;
37
import org.apache.poi.hwpf.usermodel.TableIterator;
38
import org.apache.poi.util.POILogFactory;
39
import org.apache.poi.util.POILogger;
40
import org.w3c.dom.Document;
41
import org.w3c.dom.Element;
42
43
public abstract class AbstractWordExtractor
44
{
45
    private static final byte BEL_MARK = 7;
46
47
    private static final byte FIELD_BEGIN_MARK = 19;
48
49
    private static final byte FIELD_END_MARK = 21;
50
51
    private static final byte FIELD_SEPARATOR_MARK = 20;
52
53
    private static final POILogger logger = POILogFactory
54
            .getLogger( AbstractWordExtractor.class );
55
56
    public abstract Document getDocument();
57
58
    protected abstract void outputCharacters( Element block,
59
            CharacterRun characterRun, String text );
60
61
    protected boolean processCharacters( HWPFDocumentCore hwpfDocument,
62
            int currentTableLevel, Paragraph paragraph, final Element block,
63
            List<CharacterRun> characterRuns, final int start, final int end )
64
    {
65
        boolean haveAnyText = false;
66
67
        for ( int c = start; c < end; c++ )
68
        {
69
            CharacterRun characterRun = characterRuns.get( c );
70
71
            if ( characterRun == null )
72
                throw new AssertionError();
73
74
            if ( hwpfDocument instanceof HWPFDocument
75
                    && ((HWPFDocument) hwpfDocument).getPicturesTable()
76
                            .hasPicture( characterRun ) )
77
            {
78
                HWPFDocument newFormat = (HWPFDocument) hwpfDocument;
79
                Picture picture = newFormat.getPicturesTable().extractPicture(
80
                        characterRun, true );
81
82
                processImage( block, characterRun.text().charAt( 0 ) == 0x01,
83
                        picture );
84
                continue;
85
            }
86
87
            String text = characterRun.text();
88
            if ( text.getBytes().length == 0 )
89
                continue;
90
91
            if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
92
            {
93
                int skipTo = tryField( hwpfDocument, paragraph,
94
                        currentTableLevel, characterRuns, c, block );
95
96
                if ( skipTo != c )
97
                {
98
                    c = skipTo;
99
                    continue;
100
                }
101
102
                continue;
103
            }
104
            if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
105
            {
106
                // shall not appear without FIELD_BEGIN_MARK
107
                continue;
108
            }
109
            if ( text.getBytes()[0] == FIELD_END_MARK )
110
            {
111
                // shall not appear without FIELD_BEGIN_MARK
112
                continue;
113
            }
114
115
            if ( characterRun.isSpecialCharacter() || characterRun.isObj()
116
                    || characterRun.isOle2() )
117
            {
118
                continue;
119
            }
120
121
            if ( text.endsWith( "\r" )
122
                    || (text.charAt( text.length() - 1 ) == BEL_MARK && currentTableLevel != 0) )
123
                text = text.substring( 0, text.length() - 1 );
124
125
            outputCharacters( block, characterRun, text );
126
127
            haveAnyText |= text.trim().length() != 0;
128
        }
129
130
        return haveAnyText;
131
    }
132
133
    public void processDocument( HWPFDocumentCore wordDocument )
134
    {
135
        final Range range = wordDocument.getRange();
136
        for ( int s = 0; s < range.numSections(); s++ )
137
        {
138
            processSection( wordDocument, range.getSection( s ), s );
139
        }
140
    }
141
142
    protected void processField( HWPFDocumentCore wordDocument,
143
            Element currentBlock, Paragraph paragraph, int currentTableLevel,
144
            List<CharacterRun> characterRuns, int beginMark, int separatorMark,
145
            int endMark )
146
    {
147
148
        Pattern hyperlinkPattern = Pattern
149
                .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
150
        Pattern pagerefPattern = Pattern
151
                .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
152
153
        if ( separatorMark - beginMark > 1 )
154
        {
155
            int index = beginMark + 1;
156
            CharacterRun firstAfterBegin = null;
157
            while ( index < separatorMark )
158
            {
159
                firstAfterBegin = paragraph.getCharacterRun( index );
160
                if ( firstAfterBegin == null )
161
                {
162
                    logger.log( POILogger.WARN,
163
                            "Paragraph " + paragraph.getStartOffset() + "--"
164
                                    + paragraph.getEndOffset()
165
                                    + " contains null CharacterRun #" + index );
166
                    index++;
167
                    continue;
168
                }
169
                break;
170
            }
171
172
            if ( firstAfterBegin != null )
173
            {
174
                final Matcher hyperlinkMatcher = hyperlinkPattern
175
                        .matcher( firstAfterBegin.text() );
176
                if ( hyperlinkMatcher.matches() )
177
                {
178
                    String hyperlink = hyperlinkMatcher.group( 1 );
179
                    processHyperlink( wordDocument, currentBlock, paragraph,
180
                            characterRuns, currentTableLevel, hyperlink,
181
                            separatorMark + 1, endMark );
182
                    return;
183
                }
184
185
                final Matcher pagerefMatcher = pagerefPattern
186
                        .matcher( firstAfterBegin.text() );
187
                if ( pagerefMatcher.matches() )
188
                {
189
                    String pageref = pagerefMatcher.group( 1 );
190
                    processPageref( wordDocument, currentBlock, paragraph,
191
                            characterRuns, currentTableLevel, pageref,
192
                            separatorMark + 1, endMark );
193
                    return;
194
                }
195
            }
196
        }
197
198
        StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
199
        for ( int i = beginMark; i <= endMark; i++ )
200
        {
201
            debug.append( "\t" );
202
            debug.append( paragraph.getCharacterRun( i ) );
203
            debug.append( "\n" );
204
        }
205
        logger.log( POILogger.WARN, debug );
206
207
        // just output field value
208
        if ( separatorMark + 1 < endMark )
209
            processCharacters( wordDocument, currentTableLevel, paragraph,
210
                    currentBlock, characterRuns, separatorMark + 1, endMark );
211
212
        return;
213
    }
214
215
    protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
216
            Element currentBlock, Paragraph paragraph,
217
            List<CharacterRun> characterRuns, int currentTableLevel,
218
            String hyperlink, int i, int endMark );
219
220
    protected abstract void processImage( Element currentBlock,
221
            boolean inlined, Picture picture );
222
223
    protected abstract void processPageref( HWPFDocumentCore wordDocument,
224
            Element currentBlock, Paragraph paragraph,
225
            List<CharacterRun> characterRuns, int currentTableLevel,
226
            String pageref, int beginTextInclusive, int endTextExclusive );
227
228
    protected abstract void processParagraph( HWPFDocumentCore wordDocument,
229
            Element parentFopElement, int currentTableLevel,
230
            Paragraph paragraph, String bulletText );
231
232
    protected abstract void processSection( HWPFDocumentCore wordDocument,
233
            Section section, int s );
234
235
    protected void processSectionParagraphes( HWPFDocumentCore wordDocument,
236
            Element flow, Range range, int currentTableLevel )
237
    {
238
        final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
239
        for ( TableIterator tableIterator = AbstractWordUtils
240
                .newTableIterator( range, currentTableLevel + 1 ); tableIterator
241
                .hasNext(); )
242
        {
243
            Table next = tableIterator.next();
244
            allTables.put( Integer.valueOf( next.getStartOffset() ), next );
245
        }
246
247
        final ListTables listTables = wordDocument.getListTables();
248
        int currentListInfo = 0;
249
250
        final int paragraphs = range.numParagraphs();
251
        for ( int p = 0; p < paragraphs; p++ )
252
        {
253
            Paragraph paragraph = range.getParagraph( p );
254
255
            if ( allTables.containsKey( Integer.valueOf( paragraph
256
                    .getStartOffset() ) ) )
257
            {
258
                Table table = allTables.get( Integer.valueOf( paragraph
259
                        .getStartOffset() ) );
260
                processTable( wordDocument, flow, table, currentTableLevel + 1 );
261
                continue;
262
            }
263
264
            if ( paragraph.isInTable()
265
                    && paragraph.getTableLevel() != currentTableLevel )
266
            {
267
                continue;
268
            }
269
270
            if ( paragraph.getIlfo() != currentListInfo )
271
            {
272
                currentListInfo = paragraph.getIlfo();
273
            }
274
275
            if ( currentListInfo != 0 )
276
            {
277
                if ( listTables != null )
278
                {
279
                    final ListFormatOverride listFormatOverride = listTables
280
                            .getOverride( paragraph.getIlfo() );
281
282
                    String label = AbstractWordUtils
283
                            .getBulletText( listTables, paragraph,
284
                                    listFormatOverride.getLsid() );
285
286
                    processParagraph( wordDocument, flow, currentTableLevel,
287
                            paragraph, label );
288
                }
289
                else
290
                {
291
                    logger.log( POILogger.WARN,
292
                            "Paragraph #" + paragraph.getStartOffset() + "-"
293
                                    + paragraph.getEndOffset()
294
                                    + " has reference to list structure #"
295
                                    + currentListInfo
296
                                    + ", but listTables not defined in file" );
297
298
                    processParagraph( wordDocument, flow, currentTableLevel,
299
                            paragraph, AbstractWordUtils.EMPTY );
300
                }
301
            }
302
            else
303
            {
304
                processParagraph( wordDocument, flow, currentTableLevel,
305
                        paragraph, AbstractWordUtils.EMPTY );
306
            }
307
        }
308
309
    }
310
311
    protected void processSingleSection( HWPFDocumentCore wordDocument,
312
            Section section )
313
    {
314
        processSection( wordDocument, section, 0 );
315
    }
316
317
    protected abstract void processTable( HWPFDocumentCore wordDocument,
318
            Element flow, Table table, int newTableLevel );
319
320
    protected int tryField( HWPFDocumentCore wordDocument, Paragraph paragraph,
321
            int currentTableLevel, List<CharacterRun> characterRuns,
322
            int beginMark, Element currentBlock )
323
    {
324
        int separatorMark = -1;
325
        int endMark = -1;
326
        for ( int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++ )
327
        {
328
            CharacterRun characterRun = paragraph.getCharacterRun( c );
329
330
            String text = characterRun.text();
331
            if ( text.getBytes().length == 0 )
332
                continue;
333
334
            if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
335
            {
336
                if ( separatorMark != -1 )
337
                {
338
                    // double;
339
                    return beginMark;
340
                }
341
342
                separatorMark = c;
343
                continue;
344
            }
345
346
            if ( text.getBytes()[0] == FIELD_END_MARK )
347
            {
348
                if ( endMark != -1 )
349
                {
350
                    // double;
351
                    return beginMark;
352
                }
353
354
                endMark = c;
355
                break;
356
            }
357
358
        }
359
360
        if ( separatorMark == -1 || endMark == -1 )
361
            return beginMark;
362
363
        processField( wordDocument, currentBlock, paragraph, currentTableLevel,
364
                characterRuns, beginMark, separatorMark, endMark );
365
366
        return endMark;
367
    }
368
369
}
0
  + text/plain
370
  + text/plain
(-)src/org/apache/poi/hwpf/extractor/FoDocumentFacade.java (-32 / +29 lines)
Lines 22-37 Link Here
22
import org.w3c.dom.Element;
22
import org.w3c.dom.Element;
23
import org.w3c.dom.Text;
23
import org.w3c.dom.Text;
24
24
25
public abstract class AbstractToFoExtractor
25
public class FoDocumentFacade
26
{
26
{
27
28
    private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
27
    private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
29
28
30
    protected final Document document;
29
    protected final Document document;
31
    protected final Element layoutMasterSet;
30
    protected final Element layoutMasterSet;
32
    protected final Element root;
31
    protected final Element root;
33
32
34
    public AbstractToFoExtractor( Document document )
33
    public FoDocumentFacade( Document document )
35
    {
34
    {
36
        this.document = document;
35
        this.document = document;
37
36
Lines 43-49 Link Here
43
        root.appendChild( layoutMasterSet );
42
        root.appendChild( layoutMasterSet );
44
    }
43
    }
45
44
46
    protected Element addFlowToPageSequence( final Element pageSequence,
45
    public Element addFlowToPageSequence( final Element pageSequence,
47
            String flowName )
46
            String flowName )
48
    {
47
    {
49
        final Element flow = document.createElementNS( NS_XSLFO, "fo:flow" );
48
        final Element flow = document.createElementNS( NS_XSLFO, "fo:flow" );
Lines 53-80 Link Here
53
        return flow;
52
        return flow;
54
    }
53
    }
55
54
56
    protected Element addListItem( Element listBlock )
55
    public Element addListItem( Element listBlock )
57
    {
56
    {
58
        Element result = createListItem();
57
        Element result = createListItem();
59
        listBlock.appendChild( result );
58
        listBlock.appendChild( result );
60
        return result;
59
        return result;
61
    }
60
    }
62
61
63
    protected Element addListItemBody( Element listItem )
62
    public Element addListItemBody( Element listItem )
64
    {
63
    {
65
        Element result = createListItemBody();
64
        Element result = createListItemBody();
66
        listItem.appendChild( result );
65
        listItem.appendChild( result );
67
        return result;
66
        return result;
68
    }
67
    }
69
68
70
    protected Element addListItemLabel( Element listItem, String text )
69
    public Element addListItemLabel( Element listItem, String text )
71
    {
70
    {
72
        Element result = createListItemLabel( text );
71
        Element result = createListItemLabel( text );
73
        listItem.appendChild( result );
72
        listItem.appendChild( result );
74
        return result;
73
        return result;
75
    }
74
    }
76
75
77
    protected Element addPageSequence( String pageMaster )
76
    public Element addPageSequence( String pageMaster )
78
    {
77
    {
79
        final Element pageSequence = document.createElementNS( NS_XSLFO,
78
        final Element pageSequence = document.createElementNS( NS_XSLFO,
80
                "fo:page-sequence" );
79
                "fo:page-sequence" );
Lines 83-89 Link Here
83
        return pageSequence;
82
        return pageSequence;
84
    }
83
    }
85
84
86
    protected Element addRegionBody( Element pageMaster )
85
    public Element addRegionBody( Element pageMaster )
87
    {
86
    {
88
        final Element regionBody = document.createElementNS( NS_XSLFO,
87
        final Element regionBody = document.createElementNS( NS_XSLFO,
89
                "fo:region-body" );
88
                "fo:region-body" );
Lines 92-98 Link Here
92
        return regionBody;
91
        return regionBody;
93
    }
92
    }
94
93
95
    protected Element addSimplePageMaster( String masterName )
94
    public Element addSimplePageMaster( String masterName )
96
    {
95
    {
97
        final Element simplePageMaster = document.createElementNS( NS_XSLFO,
96
        final Element simplePageMaster = document.createElementNS( NS_XSLFO,
98
                "fo:simple-page-master" );
97
                "fo:simple-page-master" );
Lines 102-115 Link Here
102
        return simplePageMaster;
101
        return simplePageMaster;
103
    }
102
    }
104
103
105
    protected Element addTable( Element flow )
104
    public Element createBasicLinkExternal( String externalDestination )
106
    {
107
        final Element table = document.createElementNS( NS_XSLFO, "fo:table" );
108
        flow.appendChild( table );
109
        return table;
110
    }
111
112
    protected Element createBasicLinkExternal( String externalDestination )
113
    {
105
    {
114
        final Element basicLink = document.createElementNS( NS_XSLFO,
106
        final Element basicLink = document.createElementNS( NS_XSLFO,
115
                "fo:basic-link" );
107
                "fo:basic-link" );
Lines 117-123 Link Here
117
        return basicLink;
109
        return basicLink;
118
    }
110
    }
119
111
120
    protected Element createBasicLinkInternal( String internalDestination )
112
    public Element createBasicLinkInternal( String internalDestination )
121
    {
113
    {
122
        final Element basicLink = document.createElementNS( NS_XSLFO,
114
        final Element basicLink = document.createElementNS( NS_XSLFO,
123
                "fo:basic-link" );
115
                "fo:basic-link" );
Lines 125-136 Link Here
125
        return basicLink;
117
        return basicLink;
126
    }
118
    }
127
119
128
    protected Element createBlock()
120
    public Element createBlock()
129
    {
121
    {
130
        return document.createElementNS( NS_XSLFO, "fo:block" );
122
        return document.createElementNS( NS_XSLFO, "fo:block" );
131
    }
123
    }
132
124
133
    protected Element createExternalGraphic( String source )
125
    public Element createExternalGraphic( String source )
134
    {
126
    {
135
        Element result = document.createElementNS( NS_XSLFO,
127
        Element result = document.createElementNS( NS_XSLFO,
136
                "fo:external-graphic" );
128
                "fo:external-graphic" );
Lines 138-169 Link Here
138
        return result;
130
        return result;
139
    }
131
    }
140
132
141
    protected Element createInline()
133
    public Element createInline()
142
    {
134
    {
143
        return document.createElementNS( NS_XSLFO, "fo:inline" );
135
        return document.createElementNS( NS_XSLFO, "fo:inline" );
144
    }
136
    }
145
137
146
    protected Element createLeader()
138
    public Element createLeader()
147
    {
139
    {
148
        return document.createElementNS( NS_XSLFO, "fo:leader" );
140
        return document.createElementNS( NS_XSLFO, "fo:leader" );
149
    }
141
    }
150
142
151
    protected Element createListBlock()
143
    public Element createListBlock()
152
    {
144
    {
153
        return document.createElementNS( NS_XSLFO, "fo:list-block" );
145
        return document.createElementNS( NS_XSLFO, "fo:list-block" );
154
    }
146
    }
155
147
156
    protected Element createListItem()
148
    public Element createListItem()
157
    {
149
    {
158
        return document.createElementNS( NS_XSLFO, "fo:list-item" );
150
        return document.createElementNS( NS_XSLFO, "fo:list-item" );
159
    }
151
    }
160
152
161
    protected Element createListItemBody()
153
    public Element createListItemBody()
162
    {
154
    {
163
        return document.createElementNS( NS_XSLFO, "fo:list-item-body" );
155
        return document.createElementNS( NS_XSLFO, "fo:list-item-body" );
164
    }
156
    }
165
157
166
    protected Element createListItemLabel( String text )
158
    public Element createListItemLabel( String text )
167
    {
159
    {
168
        Element result = document.createElementNS( NS_XSLFO,
160
        Element result = document.createElementNS( NS_XSLFO,
169
                "fo:list-item-label" );
161
                "fo:list-item-label" );
Lines 173-199 Link Here
173
        return result;
165
        return result;
174
    }
166
    }
175
167
176
    protected Element createTableBody()
168
    public Element createTable()
169
    {
170
        return document.createElementNS( NS_XSLFO, "fo:table" );
171
    }
172
173
    public Element createTableBody()
177
    {
174
    {
178
        return document.createElementNS( NS_XSLFO, "fo:table-body" );
175
        return document.createElementNS( NS_XSLFO, "fo:table-body" );
179
    }
176
    }
180
177
181
    protected Element createTableCell()
178
    public Element createTableCell()
182
    {
179
    {
183
        return document.createElementNS( NS_XSLFO, "fo:table-cell" );
180
        return document.createElementNS( NS_XSLFO, "fo:table-cell" );
184
    }
181
    }
185
182
186
    protected Element createTableHeader()
183
    public Element createTableHeader()
187
    {
184
    {
188
        return document.createElementNS( NS_XSLFO, "fo:table-header" );
185
        return document.createElementNS( NS_XSLFO, "fo:table-header" );
189
    }
186
    }
190
187
191
    protected Element createTableRow()
188
    public Element createTableRow()
192
    {
189
    {
193
        return document.createElementNS( NS_XSLFO, "fo:table-row" );
190
        return document.createElementNS( NS_XSLFO, "fo:table-row" );
194
    }
191
    }
195
192
196
    protected Text createText( String data )
193
    public Text createText( String data )
197
    {
194
    {
198
        return document.createTextNode( data );
195
        return document.createTextNode( data );
199
    }
196
    }
(-)src/org/apache/poi/hwpf/extractor/WordToFoUtils.java (-413 / +231 lines)
Lines 1-489 Link Here
1
package org.apache.poi.hwpf.extractor;
1
package org.apache.poi.hwpf.extractor;
2
2
3
import java.lang.reflect.Constructor;
4
import java.lang.reflect.Field;
5
6
import org.apache.poi.hwpf.model.ListLevel;
7
import org.apache.poi.hwpf.model.ListTables;
8
import org.apache.poi.hwpf.usermodel.BorderCode;
3
import org.apache.poi.hwpf.usermodel.BorderCode;
9
import org.apache.poi.hwpf.usermodel.CharacterProperties;
4
import org.apache.poi.hwpf.usermodel.CharacterProperties;
10
import org.apache.poi.hwpf.usermodel.CharacterRun;
5
import org.apache.poi.hwpf.usermodel.CharacterRun;
11
import org.apache.poi.hwpf.usermodel.Paragraph;
6
import org.apache.poi.hwpf.usermodel.Paragraph;
12
import org.apache.poi.hwpf.usermodel.Picture;
7
import org.apache.poi.hwpf.usermodel.Picture;
13
import org.apache.poi.hwpf.usermodel.Range;
14
import org.apache.poi.hwpf.usermodel.Section;
15
import org.apache.poi.hwpf.usermodel.SectionProperties;
16
import org.apache.poi.hwpf.usermodel.TableCell;
8
import org.apache.poi.hwpf.usermodel.TableCell;
17
import org.apache.poi.hwpf.usermodel.TableIterator;
18
import org.apache.poi.hwpf.usermodel.TableRow;
9
import org.apache.poi.hwpf.usermodel.TableRow;
19
import org.w3c.dom.Element;
10
import org.w3c.dom.Element;
20
11
21
public class WordToFoUtils {
12
public class WordToFoUtils extends AbstractWordUtils
22
    static final String EMPTY = "";
13
{
23
14
    public static void setBold( final Element element, final boolean bold )
24
    public static final float TWIPS_PER_INCH = 1440.0f;
15
    {
25
16
        element.setAttribute( "font-weight", bold ? "bold" : "normal" );
26
    public static final int TWIPS_PER_PT = 20;
27
28
    static boolean equals(String str1, String str2) {
29
	return str1 == null ? str2 == null : str1.equals(str2);
30
    }
31
32
    public static String getBorderType(BorderCode borderCode) {
33
	if (borderCode == null)
34
	    throw new IllegalArgumentException("borderCode is null");
35
36
	switch (borderCode.getBorderType()) {
37
	case 1:
38
	case 2:
39
	    return "solid";
40
	case 3:
41
	    return "double";
42
	case 5:
43
	    return "solid";
44
	case 6:
45
	    return "dotted";
46
	case 7:
47
	case 8:
48
	    return "dashed";
49
	case 9:
50
	    return "dotted";
51
	case 10:
52
	case 11:
53
	case 12:
54
	case 13:
55
	case 14:
56
	case 15:
57
	case 16:
58
	case 17:
59
	case 18:
60
	case 19:
61
	    return "double";
62
	case 20:
63
	    return "solid";
64
	case 21:
65
	    return "double";
66
	case 22:
67
	    return "dashed";
68
	case 23:
69
	    return "dashed";
70
	case 24:
71
	    return "ridge";
72
	case 25:
73
	    return "grooved";
74
	default:
75
	    return "solid";
76
	}
77
    }
17
    }
78
18
79
    public static String getBorderWidth(BorderCode borderCode) {
19
    public static void setBorder( Element element, BorderCode borderCode,
80
	int lineWidth = borderCode.getLineWidth();
20
            String where )
81
	int pt = lineWidth / 8;
21
    {
82
	int pte = lineWidth - pt * 8;
22
        if ( element == null )
23
            throw new IllegalArgumentException( "element is null" );
83
24
84
	StringBuilder stringBuilder = new StringBuilder();
25
        if ( borderCode == null || borderCode.getBorderType() == 0 )
85
	stringBuilder.append(pt);
26
            return;
86
	stringBuilder.append(".");
87
	stringBuilder.append(1000 / 8 * pte);
88
	stringBuilder.append("pt");
89
	return stringBuilder.toString();
90
    }
91
27
92
    public static String getBulletText(ListTables listTables,
28
        if ( isEmpty( where ) )
93
	    Paragraph paragraph, int listId) {
29
        {
94
	final ListLevel listLevel = listTables.getLevel(listId,
30
            element.setAttribute( "border-style", getBorderType( borderCode ) );
95
		paragraph.getIlvl());
31
            element.setAttribute( "border-color",
96
32
                    getColor( borderCode.getColor() ) );
97
	if (listLevel.getNumberText() == null)
33
            element.setAttribute( "border-width", getBorderWidth( borderCode ) );
98
	    return EMPTY;
99
100
	StringBuffer bulletBuffer = new StringBuffer();
101
	char[] xst = listLevel.getNumberText().toCharArray();
102
	for (char element : xst) {
103
	    if (element < 9) {
104
		ListLevel numLevel = listTables.getLevel(listId, element);
105
106
		int num = numLevel.getStartAt();
107
		bulletBuffer.append(NumberFormatter.getNumber(num,
108
			listLevel.getNumberFormat()));
109
110
		if (numLevel == listLevel) {
111
		    numLevel.setStartAt(numLevel.getStartAt() + 1);
112
		}
113
114
	    } else {
115
		bulletBuffer.append(element);
116
	    }
117
	}
118
119
	byte follow = getIxchFollow(listLevel);
120
	switch (follow) {
121
	case 0:
122
	    bulletBuffer.append("\t");
123
	    break;
124
	case 1:
125
	    bulletBuffer.append(" ");
126
	    break;
127
	default:
128
	    break;
129
	}
130
131
	return bulletBuffer.toString();
132
    }
133
134
    public static String getColor(int ico) {
135
	switch (ico) {
136
	case 1:
137
	    return "black";
138
	case 2:
139
	    return "blue";
140
	case 3:
141
	    return "cyan";
142
	case 4:
143
	    return "green";
144
	case 5:
145
	    return "magenta";
146
	case 6:
147
	    return "red";
148
	case 7:
149
	    return "yellow";
150
	case 8:
151
	    return "white";
152
	case 9:
153
	    return "darkblue";
154
	case 10:
155
	    return "darkcyan";
156
	case 11:
157
	    return "darkgreen";
158
	case 12:
159
	    return "darkmagenta";
160
	case 13:
161
	    return "darkred";
162
	case 14:
163
	    return "darkyellow";
164
	case 15:
165
	    return "darkgray";
166
	case 16:
167
	    return "lightgray";
168
	default:
169
	    return "black";
170
	}
171
    }
172
173
    public static byte getIxchFollow(ListLevel listLevel) {
174
	try {
175
	    Field field = ListLevel.class.getDeclaredField("_ixchFollow");
176
	    field.setAccessible(true);
177
	    return ((Byte) field.get(listLevel)).byteValue();
178
	} catch (Exception exc) {
179
	    throw new Error(exc);
180
	}
181
    }
182
183
    public static String getJustification(int js) {
184
        switch (js) {
185
        case 0:
186
            return "start";
187
        case 1:
188
            return "center";
189
        case 2:
190
            return "end";
191
        case 3:
192
        case 4:
193
            return "justify";
194
        case 5:
195
            return "center";
196
        case 6:
197
            return "left";
198
        case 7:
199
            return "start";
200
        case 8:
201
            return "end";
202
        case 9:
203
            return "justify";
204
        }
34
        }
205
        return "";
35
        else
206
    }
36
        {
207
37
            element.setAttribute( "border-" + where + "-style",
208
    public static String getListItemNumberLabel(int number, int format) {
38
                    getBorderType( borderCode ) );
209
39
            element.setAttribute( "border-" + where + "-color",
210
	if (format != 0)
40
                    getColor( borderCode.getColor() ) );
211
	    System.err.println("NYI: toListItemNumberLabel(): " + format);
41
            element.setAttribute( "border-" + where + "-width",
212
42
                    getBorderWidth( borderCode ) );
213
	return String.valueOf(number);
43
        }
214
    }
215
216
    public static SectionProperties getSectionProperties(Section section) {
217
	try {
218
	    Field field = Section.class.getDeclaredField("_props");
219
	    field.setAccessible(true);
220
	    return (SectionProperties) field.get(section);
221
	} catch (Exception exc) {
222
	    throw new Error(exc);
223
	}
224
    }
225
226
    static boolean isEmpty(String str) {
227
	return str == null || str.length() == 0;
228
    }
229
230
    static boolean isNotEmpty(String str) {
231
	return !isEmpty(str);
232
    }
233
234
    public static TableIterator newTableIterator(Range range, int level) {
235
	try {
236
	    Constructor<TableIterator> constructor = TableIterator.class
237
		    .getDeclaredConstructor(Range.class, int.class);
238
	    constructor.setAccessible(true);
239
	    return constructor.newInstance(range, Integer.valueOf(level));
240
	} catch (Exception exc) {
241
	    throw new Error(exc);
242
	}
243
    }
244
245
    public static void setBold(final Element element, final boolean bold) {
246
	element.setAttribute("font-weight", bold ? "bold" : "normal");
247
    }
248
249
    public static void setBorder(Element element, BorderCode borderCode,
250
	    String where) {
251
	if (element == null)
252
	    throw new IllegalArgumentException("element is null");
253
254
	if (borderCode == null)
255
	    return;
256
257
	if (isEmpty(where)) {
258
	    element.setAttribute("border-style", getBorderType(borderCode));
259
	    element.setAttribute("border-color",
260
		    getColor(borderCode.getColor()));
261
	    element.setAttribute("border-width", getBorderWidth(borderCode));
262
	} else {
263
	    element.setAttribute("border-" + where + "-style",
264
		    getBorderType(borderCode));
265
	    element.setAttribute("border-" + where + "-color",
266
		    getColor(borderCode.getColor()));
267
	    element.setAttribute("border-" + where + "-width",
268
		    getBorderWidth(borderCode));
269
	}
270
    }
44
    }
271
45
272
    public static void setCharactersProperties(final CharacterRun characterRun,
46
    public static void setCharactersProperties(
273
            final Element inline) {
47
            final CharacterRun characterRun, final Element inline )
48
    {
274
        final CharacterProperties clonedProperties = characterRun
49
        final CharacterProperties clonedProperties = characterRun
275
                .cloneProperties();
50
                .cloneProperties();
276
        StringBuilder textDecorations = new StringBuilder();
51
        StringBuilder textDecorations = new StringBuilder();
277
52
278
        setBorder(inline, clonedProperties.getBrc(), EMPTY);
53
        setBorder( inline, clonedProperties.getBrc(), EMPTY );
279
54
280
        if (characterRun.isCapitalized()) {
55
        if ( characterRun.isCapitalized() )
281
            inline.setAttribute("text-transform", "uppercase");
56
        {
57
            inline.setAttribute( "text-transform", "uppercase" );
282
        }
58
        }
283
        if (characterRun.isHighlighted()) {
59
        if ( characterRun.isHighlighted() )
284
            inline.setAttribute("background-color",
60
        {
285
                    getColor(clonedProperties.getIcoHighlight()));
61
            inline.setAttribute( "background-color",
62
                    getColor( clonedProperties.getIcoHighlight() ) );
286
        }
63
        }
287
        if (characterRun.isStrikeThrough()) {
64
        if ( characterRun.isStrikeThrough() )
288
            if (textDecorations.length() > 0)
65
        {
289
                textDecorations.append(" ");
66
            if ( textDecorations.length() > 0 )
290
            textDecorations.append("line-through");
67
                textDecorations.append( " " );
68
            textDecorations.append( "line-through" );
291
        }
69
        }
292
        if (characterRun.isShadowed()) {
70
        if ( characterRun.isShadowed() )
293
            inline.setAttribute("text-shadow", characterRun.getFontSize() / 24
71
        {
294
                    + "pt");
72
            inline.setAttribute( "text-shadow", characterRun.getFontSize() / 24
73
                    + "pt" );
295
        }
74
        }
296
        if (characterRun.isSmallCaps()) {
75
        if ( characterRun.isSmallCaps() )
297
            inline.setAttribute("font-variant", "small-caps");
76
        {
77
            inline.setAttribute( "font-variant", "small-caps" );
298
        }
78
        }
299
        if (characterRun.getSubSuperScriptIndex() == 1) {
79
        if ( characterRun.getSubSuperScriptIndex() == 1 )
300
            inline.setAttribute("baseline-shift", "super");
80
        {
301
            inline.setAttribute("font-size", "smaller");
81
            inline.setAttribute( "baseline-shift", "super" );
82
            inline.setAttribute( "font-size", "smaller" );
302
        }
83
        }
303
        if (characterRun.getSubSuperScriptIndex() == 2) {
84
        if ( characterRun.getSubSuperScriptIndex() == 2 )
304
            inline.setAttribute("baseline-shift", "sub");
85
        {
305
            inline.setAttribute("font-size", "smaller");
86
            inline.setAttribute( "baseline-shift", "sub" );
87
            inline.setAttribute( "font-size", "smaller" );
306
        }
88
        }
307
        if (characterRun.getUnderlineCode() > 0) {
89
        if ( characterRun.getUnderlineCode() > 0 )
308
            if (textDecorations.length() > 0)
90
        {
309
                textDecorations.append(" ");
91
            if ( textDecorations.length() > 0 )
310
            textDecorations.append("underline");
92
                textDecorations.append( " " );
93
            textDecorations.append( "underline" );
311
        }
94
        }
312
        if (characterRun.isVanished()) {
95
        if ( characterRun.isVanished() )
313
            inline.setAttribute("visibility", "hidden");
96
        {
97
            inline.setAttribute( "visibility", "hidden" );
314
        }
98
        }
315
        if (textDecorations.length() > 0) {
99
        if ( textDecorations.length() > 0 )
316
            inline.setAttribute("text-decoration", textDecorations.toString());
100
        {
101
            inline.setAttribute( "text-decoration", textDecorations.toString() );
317
        }
102
        }
318
    }
103
    }
319
104
320
    public static void setFontFamily(final Element element,
105
    public static void setFontFamily( final Element element,
321
	    final String fontFamily) {
106
            final String fontFamily )
322
	element.setAttribute("font-family", fontFamily);
107
    {
108
        if ( isEmpty( fontFamily ) )
109
            return;
110
111
        element.setAttribute( "font-family", fontFamily );
323
    }
112
    }
324
113
325
    public static void setFontSize(final Element element, final int fontSize) {
114
    public static void setFontSize( final Element element, final int fontSize )
326
	element.setAttribute("font-size", String.valueOf(fontSize));
115
    {
116
        element.setAttribute( "font-size", String.valueOf( fontSize ) );
327
    }
117
    }
328
118
329
    public static void setIndent(Paragraph paragraph, Element block) {
119
    public static void setIndent( Paragraph paragraph, Element block )
330
	if (paragraph.getFirstLineIndent() != 0) {
120
    {
331
	    block.setAttribute(
121
        if ( paragraph.getFirstLineIndent() != 0 )
332
		    "text-indent",
122
        {
333
		    String.valueOf(paragraph.getFirstLineIndent()
123
            block.setAttribute(
334
			    / TWIPS_PER_PT)
124
                    "text-indent",
335
			    + "pt");
125
                    String.valueOf( paragraph.getFirstLineIndent()
336
	}
126
                            / TWIPS_PER_PT )
337
	if (paragraph.getIndentFromLeft() != 0) {
127
                            + "pt" );
338
	    block.setAttribute(
128
        }
339
		    "start-indent",
129
        if ( paragraph.getIndentFromLeft() != 0 )
340
		    String.valueOf(paragraph.getIndentFromLeft() / TWIPS_PER_PT)
130
        {
341
			    + "pt");
131
            block.setAttribute(
342
	}
132
                    "start-indent",
343
	if (paragraph.getIndentFromRight() != 0) {
133
                    String.valueOf( paragraph.getIndentFromLeft()
344
	    block.setAttribute(
134
                            / TWIPS_PER_PT )
345
		    "end-indent",
135
                            + "pt" );
346
		    String.valueOf(paragraph.getIndentFromRight()
136
        }
347
			    / TWIPS_PER_PT)
137
        if ( paragraph.getIndentFromRight() != 0 )
348
			    + "pt");
138
        {
349
	}
139
            block.setAttribute(
350
	if (paragraph.getSpacingBefore() != 0) {
140
                    "end-indent",
351
	    block.setAttribute("space-before",
141
                    String.valueOf( paragraph.getIndentFromRight()
352
		    String.valueOf(paragraph.getSpacingBefore() / TWIPS_PER_PT)
142
                            / TWIPS_PER_PT )
353
			    + "pt");
143
                            + "pt" );
354
	}
144
        }
355
	if (paragraph.getSpacingAfter() != 0) {
145
        if ( paragraph.getSpacingBefore() != 0 )
356
	    block.setAttribute("space-after",
146
        {
357
		    String.valueOf(paragraph.getSpacingAfter() / TWIPS_PER_PT)
147
            block.setAttribute(
358
			    + "pt");
148
                    "space-before",
359
	}
149
                    String.valueOf( paragraph.getSpacingBefore() / TWIPS_PER_PT )
150
                            + "pt" );
151
        }
152
        if ( paragraph.getSpacingAfter() != 0 )
153
        {
154
            block.setAttribute( "space-after",
155
                    String.valueOf( paragraph.getSpacingAfter() / TWIPS_PER_PT )
156
                            + "pt" );
157
        }
360
    }
158
    }
361
159
362
    public static void setItalic(final Element element, final boolean italic) {
160
    public static void setItalic( final Element element, final boolean italic )
363
	element.setAttribute("font-style", italic ? "italic" : "normal");
161
    {
162
        element.setAttribute( "font-style", italic ? "italic" : "normal" );
364
    }
163
    }
365
164
366
    public static void setJustification(Paragraph paragraph,
165
    public static void setJustification( Paragraph paragraph,
367
            final Element element) {
166
            final Element element )
368
        String justification = getJustification(paragraph.getJustification());
167
    {
369
        if (isNotEmpty(justification))
168
        String justification = getJustification( paragraph.getJustification() );
370
            element.setAttribute("text-align", justification);
169
        if ( isNotEmpty( justification ) )
170
            element.setAttribute( "text-align", justification );
371
    }
171
    }
372
172
373
    public static void setParagraphProperties(Paragraph paragraph, Element block) {
173
    public static void setParagraphProperties( Paragraph paragraph,
374
	setIndent(paragraph, block);
174
            Element block )
375
	setJustification(paragraph, block);
175
    {
176
        setIndent( paragraph, block );
177
        setJustification( paragraph, block );
376
178
377
	setBorder(block, paragraph.getBottomBorder(), "bottom");
179
        setBorder( block, paragraph.getBottomBorder(), "bottom" );
378
	setBorder(block, paragraph.getLeftBorder(), "left");
180
        setBorder( block, paragraph.getLeftBorder(), "left" );
379
	setBorder(block, paragraph.getRightBorder(), "right");
181
        setBorder( block, paragraph.getRightBorder(), "right" );
380
	setBorder(block, paragraph.getTopBorder(), "top");
182
        setBorder( block, paragraph.getTopBorder(), "top" );
381
183
382
	if (paragraph.pageBreakBefore()) {
184
        if ( paragraph.pageBreakBefore() )
383
	    block.setAttribute("break-before", "page");
185
        {
384
	}
186
            block.setAttribute( "break-before", "page" );
187
        }
385
188
386
	block.setAttribute("hyphenate",
189
        block.setAttribute( "hyphenate",
387
		String.valueOf(paragraph.isAutoHyphenated()));
190
                String.valueOf( paragraph.isAutoHyphenated() ) );
388
191
389
	if (paragraph.keepOnPage()) {
192
        if ( paragraph.keepOnPage() )
390
	    block.setAttribute("keep-together.within-page", "always");
193
        {
391
	}
194
            block.setAttribute( "keep-together.within-page", "always" );
195
        }
392
196
393
	if (paragraph.keepWithNext()) {
197
        if ( paragraph.keepWithNext() )
394
	    block.setAttribute("keep-with-next.within-page", "always");
198
        {
395
	}
199
            block.setAttribute( "keep-with-next.within-page", "always" );
200
        }
396
201
397
	block.setAttribute("linefeed-treatment", "preserve");
202
        block.setAttribute( "linefeed-treatment", "preserve" );
398
	block.setAttribute("white-space-collapse", "false");
203
        block.setAttribute( "white-space-collapse", "false" );
399
    }
204
    }
400
205
401
    public static void setPictureProperties(Picture picture,
206
    public static void setPictureProperties( Picture picture,
402
            Element graphicElement) {
207
            Element graphicElement )
208
    {
403
        final int aspectRatioX = picture.getAspectRatioX();
209
        final int aspectRatioX = picture.getAspectRatioX();
404
        final int aspectRatioY = picture.getAspectRatioY();
210
        final int aspectRatioY = picture.getAspectRatioY();
405
211
406
        if (aspectRatioX > 0) {
212
        if ( aspectRatioX > 0 )
407
            graphicElement.setAttribute("content-width", ((picture.getDxaGoal()
213
        {
408
                    * aspectRatioX / 100) / WordToFoUtils.TWIPS_PER_PT)
214
            graphicElement
409
                    + "pt");
215
                    .setAttribute( "content-width", ((picture.getDxaGoal()
410
        } else
216
                            * aspectRatioX / 100) / TWIPS_PER_PT)
411
            graphicElement.setAttribute("content-width",
217
                            + "pt" );
412
                    (picture.getDxaGoal() / WordToFoUtils.TWIPS_PER_PT) + "pt");
218
        }
219
        else
220
            graphicElement.setAttribute( "content-width",
221
                    (picture.getDxaGoal() / TWIPS_PER_PT) + "pt" );
413
222
414
        if (aspectRatioY > 0)
223
        if ( aspectRatioY > 0 )
415
            graphicElement
224
            graphicElement
416
                    .setAttribute("content-height", ((picture.getDyaGoal()
225
                    .setAttribute( "content-height", ((picture.getDyaGoal()
417
                            * aspectRatioY / 100) / WordToFoUtils.TWIPS_PER_PT)
226
                            * aspectRatioY / 100) / TWIPS_PER_PT)
418
                            + "pt");
227
                            + "pt" );
419
        else
228
        else
420
            graphicElement.setAttribute("content-height",
229
            graphicElement.setAttribute( "content-height",
421
                    (picture.getDyaGoal() / WordToFoUtils.TWIPS_PER_PT) + "pt");
230
                    (picture.getDyaGoal() / TWIPS_PER_PT) + "pt" );
422
231
423
        if (aspectRatioX <= 0 || aspectRatioY <= 0) {
232
        if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
424
            graphicElement.setAttribute("scaling", "uniform");
233
        {
425
        } else {
234
            graphicElement.setAttribute( "scaling", "uniform" );
426
            graphicElement.setAttribute("scaling", "non-uniform");
235
        }
236
        else
237
        {
238
            graphicElement.setAttribute( "scaling", "non-uniform" );
427
        }
239
        }
428
240
429
        graphicElement.setAttribute("vertical-align", "text-bottom");
241
        graphicElement.setAttribute( "vertical-align", "text-bottom" );
430
242
431
        if (picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
243
        if ( picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
432
                || picture.getDyaCropBottom() != 0
244
                || picture.getDyaCropBottom() != 0
433
                || picture.getDxaCropLeft() != 0) {
245
                || picture.getDxaCropLeft() != 0 )
434
            int rectTop = picture.getDyaCropTop() / WordToFoUtils.TWIPS_PER_PT;
246
        {
435
            int rectRight = picture.getDxaCropRight()
247
            int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
436
                    / WordToFoUtils.TWIPS_PER_PT;
248
            int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
437
            int rectBottom = picture.getDyaCropBottom()
249
            int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
438
                    / WordToFoUtils.TWIPS_PER_PT;
250
            int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
439
            int rectLeft = picture.getDxaCropLeft()
251
            graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
440
                    / WordToFoUtils.TWIPS_PER_PT;
441
            graphicElement.setAttribute("clip", "rect(" + rectTop + "pt, "
442
                    + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
252
                    + rectRight + "pt, " + rectBottom + "pt, " + rectLeft
443
                    + "pt)");
253
                    + "pt)" );
444
            graphicElement.setAttribute("oveerflow", "hidden");
254
            graphicElement.setAttribute( "oveerflow", "hidden" );
445
        }
255
        }
446
    }
256
    }
447
257
448
    public static void setTableCellProperties(TableRow tableRow,
258
    public static void setTableCellProperties( TableRow tableRow,
449
	    TableCell tableCell, Element element, boolean toppest,
259
            TableCell tableCell, Element element, boolean toppest,
450
	    boolean bottomest, boolean leftest, boolean rightest) {
260
            boolean bottomest, boolean leftest, boolean rightest )
451
	element.setAttribute("width", (tableCell.getWidth() / TWIPS_PER_INCH)
261
    {
452
		+ "in");
262
        element.setAttribute( "width", (tableCell.getWidth() / TWIPS_PER_INCH)
453
	element.setAttribute("padding-start",
263
                + "in" );
454
		(tableRow.getGapHalf() / TWIPS_PER_INCH) + "in");
264
        element.setAttribute( "padding-start",
455
	element.setAttribute("padding-end",
265
                (tableRow.getGapHalf() / TWIPS_PER_INCH) + "in" );
456
		(tableRow.getGapHalf() / TWIPS_PER_INCH) + "in");
266
        element.setAttribute( "padding-end",
267
                (tableRow.getGapHalf() / TWIPS_PER_INCH) + "in" );
457
268
458
	BorderCode top = tableCell.getBrcTop() != null ? tableCell.getBrcTop()
269
        BorderCode top = tableCell.getBrcTop() != null
459
		: toppest ? tableRow.getTopBorder() : tableRow
270
                && tableCell.getBrcTop().getBorderType() != 0 ? tableCell
460
			.getHorizontalBorder();
271
                .getBrcTop() : toppest ? tableRow.getTopBorder() : tableRow
461
	BorderCode bottom = tableCell.getBrcBottom() != null ? tableCell
272
                .getHorizontalBorder();
462
		.getBrcBottom() : bottomest ? tableRow.getBottomBorder()
273
        BorderCode bottom = tableCell.getBrcBottom() != null
463
		: tableRow.getHorizontalBorder();
274
                && tableCell.getBrcBottom().getBorderType() != 0 ? tableCell
275
                .getBrcBottom() : bottomest ? tableRow.getBottomBorder()
276
                : tableRow.getHorizontalBorder();
464
277
465
	BorderCode left = tableCell.getBrcLeft() != null ? tableCell
278
        BorderCode left = tableCell.getBrcLeft() != null
466
		.getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
279
                && tableCell.getBrcLeft().getBorderType() != 0 ? tableCell
467
		.getVerticalBorder();
280
                .getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
468
	BorderCode right = tableCell.getBrcRight() != null ? tableCell
281
                .getVerticalBorder();
469
		.getBrcRight() : rightest ? tableRow.getRightBorder()
282
        BorderCode right = tableCell.getBrcRight() != null
470
		: tableRow.getVerticalBorder();
283
                && tableCell.getBrcRight().getBorderType() != 0 ? tableCell
284
                .getBrcRight() : rightest ? tableRow.getRightBorder()
285
                : tableRow.getVerticalBorder();
471
286
472
	setBorder(element, bottom, "bottom");
287
        setBorder( element, bottom, "bottom" );
473
	setBorder(element, left, "left");
288
        setBorder( element, left, "left" );
474
	setBorder(element, right, "right");
289
        setBorder( element, right, "right" );
475
	setBorder(element, top, "top");
290
        setBorder( element, top, "top" );
476
    }
291
    }
477
292
478
    public static void setTableRowProperties(TableRow tableRow,
293
    public static void setTableRowProperties( TableRow tableRow,
479
	    Element tableRowElement) {
294
            Element tableRowElement )
480
	if (tableRow.getRowHeight() > 0) {
295
    {
481
	    tableRowElement.setAttribute("height",
296
        if ( tableRow.getRowHeight() > 0 )
482
		    (tableRow.getRowHeight() / TWIPS_PER_INCH) + "in");
297
        {
483
	}
298
            tableRowElement.setAttribute( "height",
484
	if (!tableRow.cantSplit()) {
299
                    (tableRow.getRowHeight() / TWIPS_PER_INCH) + "in" );
485
	    tableRowElement.setAttribute("keep-together", "always");
300
        }
486
	}
301
        if ( !tableRow.cantSplit() )
302
        {
303
            tableRowElement.setAttribute( "keep-together", "always" );
304
        }
487
    }
305
    }
488
306
489
}
307
}
(-)src/org/apache/poi/hwpf/extractor/AbstractToFoExtractor.java (-204 lines)
Lines 1-204 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import org.w3c.dom.Document;
22
import org.w3c.dom.Element;
23
import org.w3c.dom.Text;
24
25
public abstract class AbstractToFoExtractor
26
{
27
28
    private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
29
30
    protected final Document document;
31
    protected final Element layoutMasterSet;
32
    protected final Element root;
33
34
    public AbstractToFoExtractor( Document document )
35
    {
36
        this.document = document;
37
38
        root = document.createElementNS( NS_XSLFO, "fo:root" );
39
        document.appendChild( root );
40
41
        layoutMasterSet = document.createElementNS( NS_XSLFO,
42
                "fo:layout-master-set" );
43
        root.appendChild( layoutMasterSet );
44
    }
45
46
    protected Element addFlowToPageSequence( final Element pageSequence,
47
            String flowName )
48
    {
49
        final Element flow = document.createElementNS( NS_XSLFO, "fo:flow" );
50
        flow.setAttribute( "flow-name", flowName );
51
        pageSequence.appendChild( flow );
52
53
        return flow;
54
    }
55
56
    protected Element addListItem( Element listBlock )
57
    {
58
        Element result = createListItem();
59
        listBlock.appendChild( result );
60
        return result;
61
    }
62
63
    protected Element addListItemBody( Element listItem )
64
    {
65
        Element result = createListItemBody();
66
        listItem.appendChild( result );
67
        return result;
68
    }
69
70
    protected Element addListItemLabel( Element listItem, String text )
71
    {
72
        Element result = createListItemLabel( text );
73
        listItem.appendChild( result );
74
        return result;
75
    }
76
77
    protected Element addPageSequence( String pageMaster )
78
    {
79
        final Element pageSequence = document.createElementNS( NS_XSLFO,
80
                "fo:page-sequence" );
81
        pageSequence.setAttribute( "master-reference", pageMaster );
82
        root.appendChild( pageSequence );
83
        return pageSequence;
84
    }
85
86
    protected Element addRegionBody( Element pageMaster )
87
    {
88
        final Element regionBody = document.createElementNS( NS_XSLFO,
89
                "fo:region-body" );
90
        pageMaster.appendChild( regionBody );
91
92
        return regionBody;
93
    }
94
95
    protected Element addSimplePageMaster( String masterName )
96
    {
97
        final Element simplePageMaster = document.createElementNS( NS_XSLFO,
98
                "fo:simple-page-master" );
99
        simplePageMaster.setAttribute( "master-name", masterName );
100
        layoutMasterSet.appendChild( simplePageMaster );
101
102
        return simplePageMaster;
103
    }
104
105
    protected Element createBasicLinkExternal( String externalDestination )
106
    {
107
        final Element basicLink = document.createElementNS( NS_XSLFO,
108
                "fo:basic-link" );
109
        basicLink.setAttribute( "external-destination", externalDestination );
110
        return basicLink;
111
    }
112
113
    protected Element createBasicLinkInternal( String internalDestination )
114
    {
115
        final Element basicLink = document.createElementNS( NS_XSLFO,
116
                "fo:basic-link" );
117
        basicLink.setAttribute( "internal-destination", internalDestination );
118
        return basicLink;
119
    }
120
121
    protected Element createBlock()
122
    {
123
        return document.createElementNS( NS_XSLFO, "fo:block" );
124
    }
125
126
    protected Element createExternalGraphic( String source )
127
    {
128
        Element result = document.createElementNS( NS_XSLFO,
129
                "fo:external-graphic" );
130
        result.setAttribute( "src", "url('" + source + "')" );
131
        return result;
132
    }
133
134
    protected Element createInline()
135
    {
136
        return document.createElementNS( NS_XSLFO, "fo:inline" );
137
    }
138
139
    protected Element createLeader()
140
    {
141
        return document.createElementNS( NS_XSLFO, "fo:leader" );
142
    }
143
144
    protected Element createListBlock()
145
    {
146
        return document.createElementNS( NS_XSLFO, "fo:list-block" );
147
    }
148
149
    protected Element createListItem()
150
    {
151
        return document.createElementNS( NS_XSLFO, "fo:list-item" );
152
    }
153
154
    protected Element createListItemBody()
155
    {
156
        return document.createElementNS( NS_XSLFO, "fo:list-item-body" );
157
    }
158
159
    protected Element createListItemLabel( String text )
160
    {
161
        Element result = document.createElementNS( NS_XSLFO,
162
                "fo:list-item-label" );
163
        Element block = createBlock();
164
        block.appendChild( document.createTextNode( text ) );
165
        result.appendChild( block );
166
        return result;
167
    }
168
169
    protected Element createTable()
170
    {
171
        return document.createElementNS( NS_XSLFO, "fo:table" );
172
    }
173
174
    protected Element createTableBody()
175
    {
176
        return document.createElementNS( NS_XSLFO, "fo:table-body" );
177
    }
178
179
    protected Element createTableCell()
180
    {
181
        return document.createElementNS( NS_XSLFO, "fo:table-cell" );
182
    }
183
184
    protected Element createTableHeader()
185
    {
186
        return document.createElementNS( NS_XSLFO, "fo:table-header" );
187
    }
188
189
    protected Element createTableRow()
190
    {
191
        return document.createElementNS( NS_XSLFO, "fo:table-row" );
192
    }
193
194
    protected Text createText( String data )
195
    {
196
        return document.createTextNode( data );
197
    }
198
199
    public Document getDocument()
200
    {
201
        return document;
202
    }
203
204
}
(-)src/org/apache/poi/hwpf/extractor/HtmlDocumentFacade.java (+109 lines)
Line 0 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import org.w3c.dom.Document;
22
import org.w3c.dom.Element;
23
import org.w3c.dom.Text;
24
25
public class HtmlDocumentFacade
26
{
27
28
    protected final Element body;
29
    protected final Document document;
30
    protected final Element head;
31
    protected final Element html;
32
33
    public HtmlDocumentFacade( Document document )
34
    {
35
        this.document = document;
36
37
        html = document.createElement( "html" );
38
        document.appendChild( html );
39
40
        body = document.createElement( "body" );
41
        head = document.createElement( "head" );
42
43
        html.appendChild( head );
44
        html.appendChild( body );
45
    }
46
47
    public Element createHyperlink( String internalDestination )
48
    {
49
        final Element basicLink = document.createElement( "a" );
50
        basicLink.setAttribute( "href", internalDestination );
51
        return basicLink;
52
    }
53
54
    public Element createListItem()
55
    {
56
        return document.createElement( "li" );
57
    }
58
59
    public Element createParagraph()
60
    {
61
        return document.createElement( "p" );
62
    }
63
64
    public Element createTable()
65
    {
66
        return document.createElement( "table" );
67
    }
68
69
    public Element createTableBody()
70
    {
71
        return document.createElement( "tbody" );
72
    }
73
74
    public Element createTableCell()
75
    {
76
        return document.createElement( "td" );
77
    }
78
79
    public Element createTableHeader()
80
    {
81
        return document.createElement( "thead" );
82
    }
83
84
    public Element createTableHeaderCell()
85
    {
86
        return document.createElement( "th" );
87
    }
88
89
    public Element createTableRow()
90
    {
91
        return document.createElement( "tr" );
92
    }
93
94
    public Text createText( String data )
95
    {
96
        return document.createTextNode( data );
97
    }
98
99
    public Element createUnorderedList()
100
    {
101
        return document.createElement( "ul" );
102
    }
103
104
    public Document getDocument()
105
    {
106
        return document;
107
    }
108
109
}
0
  + text/plain
110
  + text/plain
(-)src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java (-266 / +131 lines)
Lines 19-32 Link Here
19
package org.apache.poi.hwpf.extractor;
19
package org.apache.poi.hwpf.extractor;
20
20
21
import java.io.File;
21
import java.io.File;
22
import java.io.FileInputStream;
23
import java.io.FileWriter;
22
import java.io.FileWriter;
24
import java.io.IOException;
25
import java.util.HashMap;
23
import java.util.HashMap;
24
import java.util.List;
26
import java.util.Map;
25
import java.util.Map;
27
import java.util.Stack;
26
import java.util.Stack;
28
import java.util.regex.Matcher;
29
import java.util.regex.Pattern;
30
27
31
import javax.xml.parsers.DocumentBuilderFactory;
28
import javax.xml.parsers.DocumentBuilderFactory;
32
import javax.xml.transform.OutputKeys;
29
import javax.xml.transform.OutputKeys;
Lines 36-43 Link Here
36
import javax.xml.transform.stream.StreamResult;
33
import javax.xml.transform.stream.StreamResult;
37
34
38
import org.apache.poi.hwpf.HWPFDocument;
35
import org.apache.poi.hwpf.HWPFDocument;
36
import org.apache.poi.hwpf.HWPFDocumentCore;
39
import org.apache.poi.hwpf.model.ListFormatOverride;
37
import org.apache.poi.hwpf.model.ListFormatOverride;
40
import org.apache.poi.hwpf.model.ListTables;
38
import org.apache.poi.hwpf.model.ListTables;
39
import org.apache.poi.hwpf.usermodel.BorderCode;
41
import org.apache.poi.hwpf.usermodel.CharacterRun;
40
import org.apache.poi.hwpf.usermodel.CharacterRun;
42
import org.apache.poi.hwpf.usermodel.Paragraph;
41
import org.apache.poi.hwpf.usermodel.Paragraph;
43
import org.apache.poi.hwpf.usermodel.Picture;
42
import org.apache.poi.hwpf.usermodel.Picture;
Lines 54-65 Link Here
54
import org.w3c.dom.Element;
53
import org.w3c.dom.Element;
55
import org.w3c.dom.Text;
54
import org.w3c.dom.Text;
56
55
57
import static org.apache.poi.hwpf.extractor.WordToFoUtils.TWIPS_PER_INCH;
58
59
/**
56
/**
60
 * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
57
 * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
61
 */
58
 */
62
public class WordToFoExtractor extends AbstractToFoExtractor
59
public class WordToFoExtractor extends AbstractWordExtractor
63
{
60
{
64
61
65
    /**
62
    /**
Lines 84-118 Link Here
84
        }
81
        }
85
    }
82
    }
86
83
87
    private static final byte BEL_MARK = 7;
88
89
    private static final byte FIELD_BEGIN_MARK = 19;
90
91
    private static final byte FIELD_END_MARK = 21;
92
93
    private static final byte FIELD_SEPARATOR_MARK = 20;
94
95
    private static final POILogger logger = POILogFactory
84
    private static final POILogger logger = POILogFactory
96
            .getLogger( WordToFoExtractor.class );
85
            .getLogger( WordToFoExtractor.class );
97
86
98
    private static HWPFDocument loadDoc( File docFile ) throws IOException
87
    public static String getBorderType( BorderCode borderCode )
99
    {
88
    {
100
        final FileInputStream istream = new FileInputStream( docFile );
89
        if ( borderCode == null )
101
        try
90
            throw new IllegalArgumentException( "borderCode is null" );
91
92
        switch ( borderCode.getBorderType() )
102
        {
93
        {
103
            return new HWPFDocument( istream );
94
        case 1:
104
        }
95
        case 2:
105
        finally
96
            return "solid";
106
        {
97
        case 3:
107
            try
98
            return "double";
108
            {
99
        case 5:
109
                istream.close();
100
            return "solid";
110
            }
101
        case 6:
111
            catch ( Exception exc )
102
            return "dotted";
112
            {
103
        case 7:
113
                logger.log( POILogger.ERROR,
104
        case 8:
114
                        "Unable to close FileInputStream: " + exc, exc );
105
            return "dashed";
115
            }
106
        case 9:
107
            return "dotted";
108
        case 10:
109
        case 11:
110
        case 12:
111
        case 13:
112
        case 14:
113
        case 15:
114
        case 16:
115
        case 17:
116
        case 18:
117
        case 19:
118
            return "double";
119
        case 20:
120
            return "solid";
121
        case 21:
122
            return "double";
123
        case 22:
124
            return "dashed";
125
        case 23:
126
            return "dashed";
127
        case 24:
128
            return "ridge";
129
        case 25:
130
            return "grooved";
131
        default:
132
            return "solid";
116
        }
133
        }
117
    }
134
    }
118
135
Lines 160-166 Link Here
160
177
161
    static Document process( File docFile ) throws Exception
178
    static Document process( File docFile ) throws Exception
162
    {
179
    {
163
        final HWPFDocument hwpfDocument = loadDoc( docFile );
180
        final HWPFDocumentCore hwpfDocument = WordToFoUtils.loadDoc( docFile );
164
        WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
181
        WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
165
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
182
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
166
                        .newDocument() );
183
                        .newDocument() );
Lines 170-175 Link Here
170
187
171
    private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
188
    private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
172
189
190
    protected final FoDocumentFacade foDocumentFacade;
191
173
    /**
192
    /**
174
     * Creates new instance of {@link WordToFoExtractor}. Can be used for output
193
     * Creates new instance of {@link WordToFoExtractor}. Can be used for output
175
     * several {@link HWPFDocument}s into single FO document.
194
     * several {@link HWPFDocument}s into single FO document.
Lines 180-206 Link Here
180
     */
199
     */
181
    public WordToFoExtractor( Document document )
200
    public WordToFoExtractor( Document document )
182
    {
201
    {
183
        super( document );
202
        this.foDocumentFacade = new FoDocumentFacade( document );
184
    }
203
    }
185
204
186
    protected String createPageMaster( SectionProperties sep, String type,
205
    protected String createPageMaster( SectionProperties sep, String type,
187
            int section )
206
            int section )
188
    {
207
    {
189
        float height = sep.getYaPage() / TWIPS_PER_INCH;
208
        float height = sep.getYaPage() / WordToFoUtils.TWIPS_PER_INCH;
190
        float width = sep.getXaPage() / TWIPS_PER_INCH;
209
        float width = sep.getXaPage() / WordToFoUtils.TWIPS_PER_INCH;
191
        float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH;
210
        float leftMargin = sep.getDxaLeft() / WordToFoUtils.TWIPS_PER_INCH;
192
        float rightMargin = sep.getDxaRight() / TWIPS_PER_INCH;
211
        float rightMargin = sep.getDxaRight() / WordToFoUtils.TWIPS_PER_INCH;
193
        float topMargin = sep.getDyaTop() / TWIPS_PER_INCH;
212
        float topMargin = sep.getDyaTop() / WordToFoUtils.TWIPS_PER_INCH;
194
        float bottomMargin = sep.getDyaBottom() / TWIPS_PER_INCH;
213
        float bottomMargin = sep.getDyaBottom() / WordToFoUtils.TWIPS_PER_INCH;
195
214
196
        // add these to the header
215
        // add these to the header
197
        String pageMasterName = type + "-page" + section;
216
        String pageMasterName = type + "-page" + section;
198
217
199
        Element pageMaster = addSimplePageMaster( pageMasterName );
218
        Element pageMaster = foDocumentFacade
219
                .addSimplePageMaster( pageMasterName );
200
        pageMaster.setAttribute( "page-height", height + "in" );
220
        pageMaster.setAttribute( "page-height", height + "in" );
201
        pageMaster.setAttribute( "page-width", width + "in" );
221
        pageMaster.setAttribute( "page-width", width + "in" );
202
222
203
        Element regionBody = addRegionBody( pageMaster );
223
        Element regionBody = foDocumentFacade.addRegionBody( pageMaster );
204
        regionBody.setAttribute( "margin", topMargin + "in " + rightMargin
224
        regionBody.setAttribute( "margin", topMargin + "in " + rightMargin
205
                + "in " + bottomMargin + "in " + leftMargin + "in" );
225
                + "in " + bottomMargin + "in " + leftMargin + "in" );
206
226
Lines 221-227 Link Here
221
            if ( sep.getFEvenlySpaced() )
241
            if ( sep.getFEvenlySpaced() )
222
            {
242
            {
223
                regionBody.setAttribute( "column-gap",
243
                regionBody.setAttribute( "column-gap",
224
                        (sep.getDxaColumns() / TWIPS_PER_INCH) + "in" );
244
                        (sep.getDxaColumns() / WordToFoUtils.TWIPS_PER_INCH)
245
                                + "in" );
225
            }
246
            }
226
            else
247
            else
227
            {
248
            {
Lines 232-402 Link Here
232
        return pageMasterName;
253
        return pageMasterName;
233
    }
254
    }
234
255
235
    protected boolean processCharacters( HWPFDocument hwpfDocument,
256
    public Document getDocument()
236
            int currentTableLevel, Paragraph paragraph, final Element block,
237
            final int start, final int end )
238
    {
257
    {
239
        boolean haveAnyText = false;
258
        return foDocumentFacade.getDocument();
240
241
        for ( int c = start; c < end; c++ )
242
        {
243
            CharacterRun characterRun = paragraph.getCharacterRun( c );
244
245
            if ( hwpfDocument.getPicturesTable().hasPicture( characterRun ) )
246
            {
247
                Picture picture = hwpfDocument.getPicturesTable()
248
                        .extractPicture( characterRun, true );
249
250
                processImage( block, characterRun.text().charAt( 0 ) == 0x01,
251
                        picture );
252
                continue;
253
            }
254
255
            String text = characterRun.text();
256
            if ( text.getBytes().length == 0 )
257
                continue;
258
259
            if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
260
            {
261
                int skipTo = tryField( hwpfDocument, paragraph,
262
                        currentTableLevel, c, block );
263
264
                if ( skipTo != c )
265
                {
266
                    c = skipTo;
267
                    continue;
268
                }
269
270
                continue;
271
            }
272
            if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
273
            {
274
                // shall not appear without FIELD_BEGIN_MARK
275
                continue;
276
            }
277
            if ( text.getBytes()[0] == FIELD_END_MARK )
278
            {
279
                // shall not appear without FIELD_BEGIN_MARK
280
                continue;
281
            }
282
283
            if ( characterRun.isSpecialCharacter() || characterRun.isObj()
284
                    || characterRun.isOle2() )
285
            {
286
                continue;
287
            }
288
289
            BlockProperies blockProperies = this.blocksProperies.peek();
290
            Element inline = createInline();
291
            if ( characterRun.isBold() != blockProperies.pBold )
292
            {
293
                WordToFoUtils.setBold( inline, characterRun.isBold() );
294
            }
295
            if ( characterRun.isItalic() != blockProperies.pItalic )
296
            {
297
                WordToFoUtils.setItalic( inline, characterRun.isItalic() );
298
            }
299
            if ( !WordToFoUtils.equals( characterRun.getFontName(),
300
                    blockProperies.pFontName ) )
301
            {
302
                WordToFoUtils
303
                        .setFontFamily( inline, characterRun.getFontName() );
304
            }
305
            if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
306
            {
307
                WordToFoUtils.setFontSize( inline,
308
                        characterRun.getFontSize() / 2 );
309
            }
310
            WordToFoUtils.setCharactersProperties( characterRun, inline );
311
            block.appendChild( inline );
312
313
            if ( text.endsWith( "\r" )
314
                    || (text.charAt( text.length() - 1 ) == BEL_MARK && currentTableLevel != 0) )
315
                text = text.substring( 0, text.length() - 1 );
316
317
            Text textNode = createText( text );
318
            inline.appendChild( textNode );
319
320
            haveAnyText |= text.trim().length() != 0;
321
        }
322
323
        return haveAnyText;
324
    }
259
    }
325
260
326
    public void processDocument( HWPFDocument hwpfDocument )
261
    @Override
262
    protected void outputCharacters( Element block, CharacterRun characterRun,
263
            String text )
327
    {
264
    {
328
        final Range range = hwpfDocument.getRange();
265
        BlockProperies blockProperies = this.blocksProperies.peek();
329
266
        Element inline = foDocumentFacade.createInline();
330
        for ( int s = 0; s < range.numSections(); s++ )
267
        if ( characterRun.isBold() != blockProperies.pBold )
331
        {
268
        {
332
            processSection( hwpfDocument, range.getSection( s ), s );
269
            WordToFoUtils.setBold( inline, characterRun.isBold() );
333
        }
270
        }
334
    }
271
        if ( characterRun.isItalic() != blockProperies.pItalic )
335
336
    protected void processField( HWPFDocument hwpfDocument,
337
            Element currentBlock, Paragraph paragraph, int currentTableLevel,
338
            int beginMark, int separatorMark, int endMark )
339
    {
340
341
        Pattern hyperlinkPattern = Pattern
342
                .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
343
        Pattern pagerefPattern = Pattern
344
                .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
345
346
        if ( separatorMark - beginMark > 1 )
347
        {
272
        {
348
            CharacterRun firstAfterBegin = paragraph
273
            WordToFoUtils.setItalic( inline, characterRun.isItalic() );
349
                    .getCharacterRun( beginMark + 1 );
350
351
            final Matcher hyperlinkMatcher = hyperlinkPattern
352
                    .matcher( firstAfterBegin.text() );
353
            if ( hyperlinkMatcher.matches() )
354
            {
355
                String hyperlink = hyperlinkMatcher.group( 1 );
356
                processHyperlink( hwpfDocument, currentBlock, paragraph,
357
                        currentTableLevel, hyperlink, separatorMark + 1,
358
                        endMark );
359
                return;
360
            }
361
362
            final Matcher pagerefMatcher = pagerefPattern
363
                    .matcher( firstAfterBegin.text() );
364
            if ( pagerefMatcher.matches() )
365
            {
366
                String pageref = pagerefMatcher.group( 1 );
367
                processPageref( hwpfDocument, currentBlock, paragraph,
368
                        currentTableLevel, pageref, separatorMark + 1, endMark );
369
                return;
370
            }
371
        }
274
        }
372
275
        if ( characterRun.getFontName() != null
373
        StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
276
                && !AbstractWordUtils.equals(
374
        for ( int i = beginMark; i <= endMark; i++ )
277
                        characterRun.getFontName(), blockProperies.pFontName ) )
375
        {
278
        {
376
            debug.append( "\t" );
279
            WordToFoUtils.setFontFamily( inline, characterRun.getFontName() );
377
            debug.append( paragraph.getCharacterRun( i ) );
378
            debug.append( "\n" );
379
        }
280
        }
380
        logger.log( POILogger.WARN, debug );
281
        if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
381
282
        {
382
        // just output field value
283
            WordToFoUtils.setFontSize( inline, characterRun.getFontSize() / 2 );
383
        if ( separatorMark + 1 < endMark )
284
        }
384
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
285
        WordToFoUtils.setCharactersProperties( characterRun, inline );
385
                    currentBlock, separatorMark + 1, endMark );
286
        block.appendChild( inline );
386
287
387
        return;
288
        Text textNode = foDocumentFacade.createText( text );
289
        inline.appendChild( textNode );
388
    }
290
    }
389
291
390
    protected void processHyperlink( HWPFDocument hwpfDocument,
292
    protected void processHyperlink( HWPFDocumentCore hwpfDocument,
391
            Element currentBlock, Paragraph paragraph, int currentTableLevel,
293
            Element currentBlock, Paragraph paragraph,
294
            List<CharacterRun> characterRuns, int currentTableLevel,
392
            String hyperlink, int beginTextInclusive, int endTextExclusive )
295
            String hyperlink, int beginTextInclusive, int endTextExclusive )
393
    {
296
    {
394
        Element basicLink = createBasicLinkExternal( hyperlink );
297
        Element basicLink = foDocumentFacade
298
                .createBasicLinkExternal( hyperlink );
395
        currentBlock.appendChild( basicLink );
299
        currentBlock.appendChild( basicLink );
396
300
397
        if ( beginTextInclusive < endTextExclusive )
301
        if ( beginTextInclusive < endTextExclusive )
398
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
302
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
399
                    basicLink, beginTextInclusive, endTextExclusive );
303
                    basicLink, characterRuns, beginTextInclusive,
304
                    endTextExclusive );
400
    }
305
    }
401
306
402
    /**
307
    /**
Lines 422-448 Link Here
422
            Picture picture )
327
            Picture picture )
423
    {
328
    {
424
        // no default implementation -- skip
329
        // no default implementation -- skip
425
        currentBlock.appendChild( document.createComment( "Image link to '"
330
        currentBlock.appendChild( foDocumentFacade.getDocument().createComment(
426
                + picture.suggestFullFileName() + "' can be here" ) );
331
                "Image link to '" + picture.suggestFullFileName()
332
                        + "' can be here" ) );
427
    }
333
    }
428
334
429
    protected void processPageref( HWPFDocument hwpfDocument,
335
    protected void processPageref( HWPFDocumentCore hwpfDocument,
430
            Element currentBlock, Paragraph paragraph, int currentTableLevel,
336
            Element currentBlock, Paragraph paragraph,
337
            List<CharacterRun> characterRuns, int currentTableLevel,
431
            String pageref, int beginTextInclusive, int endTextExclusive )
338
            String pageref, int beginTextInclusive, int endTextExclusive )
432
    {
339
    {
433
        Element basicLink = createBasicLinkInternal( pageref );
340
        Element basicLink = foDocumentFacade.createBasicLinkInternal( pageref );
434
        currentBlock.appendChild( basicLink );
341
        currentBlock.appendChild( basicLink );
435
342
436
        if ( beginTextInclusive < endTextExclusive )
343
        if ( beginTextInclusive < endTextExclusive )
437
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
344
            processCharacters( hwpfDocument, currentTableLevel, paragraph,
438
                    basicLink, beginTextInclusive, endTextExclusive );
345
                    basicLink, characterRuns, beginTextInclusive,
346
                    endTextExclusive );
439
    }
347
    }
440
348
441
    protected void processParagraph( HWPFDocument hwpfDocument,
349
    protected void processParagraph( HWPFDocumentCore hwpfDocument,
442
            Element parentFopElement, int currentTableLevel,
350
            Element parentFopElement, int currentTableLevel,
443
            Paragraph paragraph, String bulletText )
351
            Paragraph paragraph, String bulletText )
444
    {
352
    {
445
        final Element block = createBlock();
353
        final Element block = foDocumentFacade.createBlock();
446
        parentFopElement.appendChild( block );
354
        parentFopElement.appendChild( block );
447
355
448
        WordToFoUtils.setParagraphProperties( paragraph, block );
356
        WordToFoUtils.setParagraphProperties( paragraph, block );
Lines 480-500 Link Here
480
388
481
            if ( WordToFoUtils.isNotEmpty( bulletText ) )
389
            if ( WordToFoUtils.isNotEmpty( bulletText ) )
482
            {
390
            {
483
                Element inline = createInline();
391
                Element inline = foDocumentFacade.createInline();
484
                block.appendChild( inline );
392
                block.appendChild( inline );
485
393
486
                Text textNode = createText( bulletText );
394
                Text textNode = foDocumentFacade.createText( bulletText );
487
                inline.appendChild( textNode );
395
                inline.appendChild( textNode );
488
396
489
                haveAnyText |= bulletText.trim().length() != 0;
397
                haveAnyText |= bulletText.trim().length() != 0;
490
            }
398
            }
491
399
400
            List<CharacterRun> characterRuns = WordToFoUtils
401
                    .findCharacterRuns( paragraph );
492
            haveAnyText = processCharacters( hwpfDocument, currentTableLevel,
402
            haveAnyText = processCharacters( hwpfDocument, currentTableLevel,
493
                    paragraph, block, 0, charRuns );
403
                    paragraph, block, characterRuns, 0, characterRuns.size() );
494
404
495
            if ( !haveAnyText )
405
            if ( !haveAnyText )
496
            {
406
            {
497
                Element leader = createLeader();
407
                Element leader = foDocumentFacade.createLeader();
498
                block.appendChild( leader );
408
                block.appendChild( leader );
499
            }
409
            }
500
        }
410
        }
Lines 506-525 Link Here
506
        return;
416
        return;
507
    }
417
    }
508
418
509
    protected void processSection( HWPFDocument hwpfDocument, Section section,
419
    protected void processSection( HWPFDocumentCore wordDocument,
510
            int sectionCounter )
420
            Section section, int sectionCounter )
511
    {
421
    {
512
        String regularPage = createPageMaster(
422
        String regularPage = createPageMaster(
513
                WordToFoUtils.getSectionProperties( section ), "page",
423
                WordToFoUtils.getSectionProperties( section ), "page",
514
                sectionCounter );
424
                sectionCounter );
515
425
516
        Element pageSequence = addPageSequence( regularPage );
426
        Element pageSequence = foDocumentFacade.addPageSequence( regularPage );
517
        Element flow = addFlowToPageSequence( pageSequence, "xsl-region-body" );
427
        Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence,
428
                "xsl-region-body" );
518
429
519
        processSectionParagraphes( hwpfDocument, flow, section, 0 );
430
        processSectionParagraphes( wordDocument, flow, section, 0 );
520
    }
431
    }
521
432
522
    protected void processSectionParagraphes( HWPFDocument hwpfDocument,
433
    protected void processSectionParagraphes( HWPFDocument wordDocument,
523
            Element flow, Range range, int currentTableLevel )
434
            Element flow, Range range, int currentTableLevel )
524
    {
435
    {
525
        final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
436
        final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
Lines 530-536 Link Here
530
            allTables.put( Integer.valueOf( next.getStartOffset() ), next );
441
            allTables.put( Integer.valueOf( next.getStartOffset() ), next );
531
        }
442
        }
532
443
533
        final ListTables listTables = hwpfDocument.getListTables();
444
        final ListTables listTables = wordDocument.getListTables();
534
        int currentListInfo = 0;
445
        int currentListInfo = 0;
535
446
536
        final int paragraphs = range.numParagraphs();
447
        final int paragraphs = range.numParagraphs();
Lines 543-549 Link Here
543
            {
454
            {
544
                Table table = allTables.get( Integer.valueOf( paragraph
455
                Table table = allTables.get( Integer.valueOf( paragraph
545
                        .getStartOffset() ) );
456
                        .getStartOffset() ) );
546
                processTable( hwpfDocument, flow, table, currentTableLevel + 1 );
457
                processTable( wordDocument, flow, table, currentTableLevel + 1 );
547
                continue;
458
                continue;
548
            }
459
            }
549
460
Lines 568-574 Link Here
568
                    String label = WordToFoUtils.getBulletText( listTables,
479
                    String label = WordToFoUtils.getBulletText( listTables,
569
                            paragraph, listFormatOverride.getLsid() );
480
                            paragraph, listFormatOverride.getLsid() );
570
481
571
                    processParagraph( hwpfDocument, flow, currentTableLevel,
482
                    processParagraph( wordDocument, flow, currentTableLevel,
572
                            paragraph, label );
483
                            paragraph, label );
573
                }
484
                }
574
                else
485
                else
Lines 580-603 Link Here
580
                                    + currentListInfo
491
                                    + currentListInfo
581
                                    + ", but listTables not defined in file" );
492
                                    + ", but listTables not defined in file" );
582
493
583
                    processParagraph( hwpfDocument, flow, currentTableLevel,
494
                    processParagraph( wordDocument, flow, currentTableLevel,
584
                            paragraph, WordToFoUtils.EMPTY );
495
                            paragraph, WordToFoUtils.EMPTY );
585
                }
496
                }
586
            }
497
            }
587
            else
498
            else
588
            {
499
            {
589
                processParagraph( hwpfDocument, flow, currentTableLevel,
500
                processParagraph( wordDocument, flow, currentTableLevel,
590
                        paragraph, WordToFoUtils.EMPTY );
501
                        paragraph, WordToFoUtils.EMPTY );
591
            }
502
            }
592
        }
503
        }
593
504
594
    }
505
    }
595
506
596
    protected void processTable( HWPFDocument hwpfDocument, Element flow,
507
    protected void processTable( HWPFDocumentCore wordDocument, Element flow,
597
            Table table, int thisTableLevel )
508
            Table table, int thisTableLevel )
598
    {
509
    {
599
        Element tableHeader = createTableHeader();
510
        Element tableHeader = foDocumentFacade.createTableHeader();
600
        Element tableBody = createTableBody();
511
        Element tableBody = foDocumentFacade.createTableBody();
601
512
602
        final int tableRows = table.numRows();
513
        final int tableRows = table.numRows();
603
514
Lines 611-617 Link Here
611
        {
522
        {
612
            TableRow tableRow = table.getRow( r );
523
            TableRow tableRow = table.getRow( r );
613
524
614
            Element tableRowElement = createTableRow();
525
            Element tableRowElement = foDocumentFacade.createTableRow();
615
            WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
526
            WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
616
527
617
            final int rowCells = tableRow.numCells();
528
            final int rowCells = tableRow.numCells();
Lines 626-632 Link Here
626
                        && !tableCell.isFirstVerticallyMerged() )
537
                        && !tableCell.isFirstVerticallyMerged() )
627
                    continue;
538
                    continue;
628
539
629
                Element tableCellElement = createTableCell();
540
                Element tableCellElement = foDocumentFacade.createTableCell();
630
                WordToFoUtils.setTableCellProperties( tableRow, tableCell,
541
                WordToFoUtils.setTableCellProperties( tableRow, tableCell,
631
                        tableCellElement, r == 0, r == tableRows - 1, c == 0,
542
                        tableCellElement, r == 0, r == tableRows - 1, c == 0,
632
                        c == rowCells - 1 );
543
                        c == rowCells - 1 );
Lines 673-684 Link Here
673
                            + count );
584
                            + count );
674
                }
585
                }
675
586
676
                processSectionParagraphes( hwpfDocument, tableCellElement,
587
                processSectionParagraphes( wordDocument, tableCellElement,
677
                        tableCell, thisTableLevel );
588
                        tableCell, thisTableLevel );
678
589
679
                if ( !tableCellElement.hasChildNodes() )
590
                if ( !tableCellElement.hasChildNodes() )
680
                {
591
                {
681
                    tableCellElement.appendChild( createBlock() );
592
                    tableCellElement.appendChild( foDocumentFacade
593
                            .createBlock() );
682
                }
594
                }
683
595
684
                tableRowElement.appendChild( tableCellElement );
596
                tableRowElement.appendChild( tableCellElement );
Lines 694-700 Link Here
694
            }
606
            }
695
        }
607
        }
696
608
697
        final Element tableElement = createTable();
609
        final Element tableElement = foDocumentFacade.createTable();
698
        if ( tableHeader.hasChildNodes() )
610
        if ( tableHeader.hasChildNodes() )
699
        {
611
        {
700
            tableElement.appendChild( tableHeader );
612
            tableElement.appendChild( tableHeader );
Lines 714-764 Link Here
714
        }
626
        }
715
    }
627
    }
716
628
717
    protected int tryField( HWPFDocument hwpfDocument, Paragraph paragraph,
718
            int currentTableLevel, int beginMark, Element currentBlock )
719
    {
720
        int separatorMark = -1;
721
        int endMark = -1;
722
        for ( int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++ )
723
        {
724
            CharacterRun characterRun = paragraph.getCharacterRun( c );
725
726
            String text = characterRun.text();
727
            if ( text.getBytes().length == 0 )
728
                continue;
729
730
            if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
731
            {
732
                if ( separatorMark != -1 )
733
                {
734
                    // double;
735
                    return beginMark;
736
                }
737
738
                separatorMark = c;
739
                continue;
740
            }
741
742
            if ( text.getBytes()[0] == FIELD_END_MARK )
743
            {
744
                if ( endMark != -1 )
745
                {
746
                    // double;
747
                    return beginMark;
748
                }
749
750
                endMark = c;
751
                break;
752
            }
753
754
        }
755
756
        if ( separatorMark == -1 || endMark == -1 )
757
            return beginMark;
758
759
        processField( hwpfDocument, currentBlock, paragraph, currentTableLevel,
760
                beginMark, separatorMark, endMark );
761
762
        return endMark;
763
    }
764
}
629
}
(-)src/org/apache/poi/generator/FieldIterator.java (+156 lines)
Line 0 Link Here
1
/* ====================================================================
2
   Licensed to the Apache Software Foundation (ASF) under one or more
3
   contributor license agreements.  See the NOTICE file distributed with
4
   this work for additional information regarding copyright ownership.
5
   The ASF licenses this file to You under the Apache License, Version 2.0
6
   (the "License"); you may not use this file except in compliance with
7
   the License.  You may obtain a copy of the License at
8
9
       http://www.apache.org/licenses/LICENSE-2.0
10
11
   Unless required by applicable law or agreed to in writing, software
12
   distributed under the License is distributed on an "AS IS" BASIS,
13
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
   See the License for the specific language governing permissions and
15
   limitations under the License.
16
==================================================================== */
17
        
18
19
20
package org.apache.poi.generator;
21
22
/**
23
 * <p>For iterating through our fields.</p>
24
 *
25
 * @author Glen Stampoultzis (glens at apache.org)
26
 */
27
public class FieldIterator
28
{
29
    protected int offset;
30
31
    public FieldIterator()
32
    {
33
    }
34
35
    /**
36
     * This utility function returns a fill method entry for a given field
37
     *
38
     * @param size - how big of an "int" or the name of the size field for a string
39
     * @param type - int or string
40
     */
41
    public String fillDecoder(String size, String type)
42
    {
43
        String javaType = RecordUtil.getType(size, type, 0);
44
45
        String result = "";
46
        if (javaType.equals("short"))
47
            result = "LittleEndian.getShort(data, pos + 0x" + Integer.toHexString(offset) + " + offset)";
48
        else if (javaType.equals("short[]"))
49
            result = "LittleEndian.getShortArray(data, pos + 0x" + Integer.toHexString(offset) + " + offset)";
50
        else if (javaType.equals("int"))
51
            result = "LittleEndian.getInt(data, pos + 0x" + Integer.toHexString(offset) + " + offset)";
52
        else if (javaType.equals("byte"))
53
            result = "data[ pos + 0x" + Integer.toHexString(offset) + " + offset ]";
54
        else if (javaType.equals("double"))
55
            result = "LittleEndian.getDouble(data, pos + 0x" + Integer.toHexString(offset) + " + offset)";
56
        else if (javaType.equals("String") && !type.equals("hbstring"))
57
            result = "StringUtil.getFromUnicode(data, pos + 0x" + Integer.toHexString(offset) + " + offset,("+ size + "-1)/2)";
58
        else if (javaType.equals("String") && type.equals("hbstring"))
59
            result = "StringUtil.getFromUnicodeHigh(data, pos + 0x" + Integer.toHexString(offset) + " + offset, ("+ size+"/2))";
60
61
        try
62
        {
63
            offset += Integer.parseInt(size);
64
        }
65
        catch (NumberFormatException ignore)
66
        {
67
        }
68
        return result;
69
    }
70
71
    public String fillDecoder2(int position, String name, String size, String type)
72
    {
73
        if (type.startsWith("custom:"))
74
        {
75
            StringBuffer result = new StringBuffer();
76
            result.append( RecordUtil.getFieldName( position, name, 0 ) );
77
            result.append( " = new " );
78
            String javaType = type.substring( 7 );
79
            result.append(javaType);
80
            result.append( "();\n");
81
            result.append( "        pos += " );
82
            result.append(RecordUtil.getFieldName(position, name, 0))
83
                    .append(".fillField(data,size,pos + offset + ")
84
                    .append(offset)
85
                    .append(")");
86
            return result.toString();
87
        }
88
        else
89
        {
90
            return RecordUtil.getFieldName(position, name, 30) +
91
                    " = " + fillDecoder(size, type);
92
        }
93
    }
94
95
96
    //position(),@name,@size,@type
97
    public String serialiseEncoder( int fieldNumber, String fieldName, String size, String type)
98
    {
99
        String javaType = RecordUtil.getType(size, type, 0);
100
        String javaFieldName = RecordUtil.getFieldName(fieldNumber,fieldName,0);
101
102
        String result = "";
103
        if (type.startsWith("custom:"))
104
            result = "pos += " + javaFieldName + ".serializeField( pos + " + (offset+4) + " + offset, data );";
105
        else if (javaType.equals("short"))
106
            result = "LittleEndian.putShort(data, " + (offset+4) + " + offset + pos, " + javaFieldName + ");";
107
        else if (javaType.equals("short[]"))
108
            result = "LittleEndian.putShortArray(data, " + (offset+4) + " + offset + pos, " + javaFieldName + ");";
109
        else if (javaType.equals("int"))
110
            result = "LittleEndian.putInt(data, " + (offset+4) + " + offset + pos, " + javaFieldName + ");";
111
        else if (javaType.equals("byte"))
112
            result = "data[ " + (offset+4) + " + offset + pos ] = " + javaFieldName + ";";
113
        else if (javaType.equals("double"))
114
            result = "LittleEndian.putDouble(data, " + (offset+4) + " + offset + pos, " + javaFieldName + ");";
115
        else if (javaType.equals("String") && !type.equals("hbstring"))
116
            result = "StringUtil.putUncompressedUnicode("+ javaFieldName +", data, offset + pos + 4);";
117
        else if (javaType.equals("String") && type.equals("hbstring"))
118
            result = "StringUtil.putUncompressedUnicodeHigh("+ javaFieldName +", data, "+(offset+4)+" + offset + pos);";
119
120
121
        try
122
        {
123
            offset += Integer.parseInt(size);
124
        }
125
        catch (NumberFormatException ignore)
126
        {
127
        }
128
        return result;
129
130
    }
131
132
    public String calcSize( int fieldNumber, String fieldName, String size, String type)
133
    {
134
        String result = " + ";
135
        if (type.startsWith("custom:"))
136
        {
137
            String javaFieldName = RecordUtil.getFieldName(fieldNumber, fieldName, 0);
138
            return result + javaFieldName + ".getSize()";
139
        }
140
        else if ("var".equals(size))
141
        {
142
            String javaFieldName = RecordUtil.getFieldName(fieldNumber,fieldName,0);
143
            return result + " ("+javaFieldName + ".length() *2)";
144
        }
145
        else if ("varword".equals(size))
146
        {
147
            String javaFieldName = RecordUtil.getFieldName(fieldNumber,fieldName,0);
148
            return result + javaFieldName + ".length * 2 + 2";
149
        } else
150
        {
151
            return result + size;
152
        }
153
    }
154
155
156
}
(-)src/org/apache/poi/generator/RecordUtil.java (+241 lines)
Line 0 Link Here
1
2
/* ====================================================================
3
   Licensed to the Apache Software Foundation (ASF) under one or more
4
   contributor license agreements.  See the NOTICE file distributed with
5
   this work for additional information regarding copyright ownership.
6
   The ASF licenses this file to You under the Apache License, Version 2.0
7
   (the "License"); you may not use this file except in compliance with
8
   the License.  You may obtain a copy of the License at
9
10
       http://www.apache.org/licenses/LICENSE-2.0
11
12
   Unless required by applicable law or agreed to in writing, software
13
   distributed under the License is distributed on an "AS IS" BASIS,
14
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
   See the License for the specific language governing permissions and
16
   limitations under the License.
17
==================================================================== */
18
        
19
20
package org.apache.poi.generator;
21
22
import java.util.StringTokenizer;
23
24
/**
25
 * Helper functions for the record transformations. 
26
 *
27
 * @author Glen Stampoultzis (glens at apache.org)
28
 * @author Andrew C. Oliver (acoliver at apache dot org)
29
 */
30
public class RecordUtil
31
{
32
    private static final String CR = "\n";
33
34
    public static String getFieldName(int position, String name, int padTo)
35
    {
36
        StringBuffer fieldName = new StringBuffer("field_" + position + "_");
37
        toIdentifier(name, fieldName);
38
        pad(fieldName, padTo);
39
40
        return fieldName.toString();
41
    }
42
43
    protected static StringBuffer pad(StringBuffer fieldName, int padTo)
44
    {
45
        for (int i = fieldName.length(); i < padTo; i++)
46
            fieldName.append(' ');
47
        return fieldName;
48
    }
49
50
    public static String getFieldName(String name, int padTo)
51
    {
52
        StringBuffer fieldName = new StringBuffer();
53
        toIdentifier(name, fieldName);
54
        pad(fieldName, padTo);
55
56
        return fieldName.toString();
57
    }
58
59
    public static String getFieldName1stCap(String name, int padTo)
60
    {
61
        StringBuffer fieldName = new StringBuffer();
62
        toIdentifier(name, fieldName);
63
        fieldName.setCharAt(0, Character.toUpperCase(fieldName.charAt(0)));
64
        pad(fieldName, padTo);
65
66
        return fieldName.toString();
67
    }
68
69
    public static String clone(String name, String type, int pos) {
70
        StringBuffer fieldName = new StringBuffer();
71
        toIdentifier(name, fieldName);
72
73
        String javaFieldName = getFieldName(pos, name, 0);
74
75
        if (type.startsWith("custom:"))
76
        {
77
            String javaType = type.substring(7);
78
            return "rec." + javaFieldName + " = ((" + javaType + ")" + javaFieldName + ".clone());";
79
        }
80
        else
81
        {
82
            return "rec." + javaFieldName + " = " + javaFieldName;
83
        }
84
    }
85
86
    public static String initializeText(String size, String type)
87
    {
88
        // Removed because of wierdo initialization sequence in constructors.
89
//        if (type.startsWith("custom:"))
90
//        {
91
//            String javaType = type.substring( 7 );
92
//            return " = new " + javaType + "()";
93
//        }
94
//        else
95
//        {
96
//            return "";
97
//        }
98
        return "";
99
    }
100
101
    private static void toIdentifier(String name, StringBuffer fieldName)
102
    {
103
        for (int i = 0; i < name.length(); i++)
104
        {
105
            if (name.charAt(i) == ' ')
106
                fieldName.append(Character.toUpperCase(name.charAt(++i)));
107
            else
108
                fieldName.append(name.charAt(i));
109
        }
110
    }
111
112
    private static void toConstIdentifier(String name, StringBuffer fieldName)
113
    {
114
        for (int i = 0; i < name.length(); i++)
115
        {
116
            if (name.charAt(i) == ' ')
117
                fieldName.append('_');
118
            else
119
                fieldName.append(Character.toUpperCase(name.charAt(i)));
120
        }
121
    }
122
123
    public static String getType(String size, String type, int padTo)
124
    {
125
126
        boolean wholeNumber = type.equals("bits") || type.equals("int");
127
        if (wholeNumber && "1".equals(size))
128
            return pad(new StringBuffer("byte"), padTo).toString();
129
        else if (wholeNumber && "2".equals(size))
130
            return pad(new StringBuffer("short"), padTo).toString();
131
        else if (type.equals("int") && "varword".equals(size))
132
            return pad(new StringBuffer("short[]"), padTo).toString();
133
        else if (wholeNumber && "4".equals(size))
134
            return pad(new StringBuffer("int"), padTo).toString();
135
        else if (type.equals("float") && "8".equals(size))
136
            return pad(new StringBuffer("double"), padTo).toString();
137
        else if (type.equals("string"))
138
            return pad(new StringBuffer("String"), padTo).toString();
139
        else if (type.equals("hbstring"))
140
            return pad(new StringBuffer("String"), padTo).toString();
141
        else if (type.startsWith("custom:"))
142
        {
143
            int pos = type.lastIndexOf('.');
144
            return pad(new StringBuffer(type.substring(pos+1)), padTo)
145
                    .toString();
146
        }
147
148
        return "short";   // if we don't know, default to short
149
    }
150
151
    public static String getType1stCap(String size, String type, int padTo)
152
    {
153
        StringBuffer result;
154
        boolean numeric = type.equals("bits") || type.equals("int");
155
        if (numeric && "1".equals(size))
156
            result = pad(new StringBuffer("byte"), padTo);
157
        else if (type.equals("int") && "varword".equals(size))
158
            result = pad(new StringBuffer("short[]"), padTo);
159
        else if (numeric && "2".equals(size))
160
            result = pad(new StringBuffer("short"), padTo);
161
        else if (type.equals("string"))
162
            result = pad(new StringBuffer("String"), padTo);
163
        else if (type.equals("hbstring"))
164
            result = pad(new StringBuffer("HighByteString"), padTo);
165
        
166
        else
167
            return "";
168
169
        result.setCharAt(0, Character.toUpperCase(result.charAt(0)));
170
171
        return result.toString();
172
    }
173
174
    public static String getMask(int bit)
175
    {
176
	//if (bit > 1) bit--;
177
        int mask = (int)Math.pow(2, bit);
178
179
        return "0x" + Integer.toHexString(mask);
180
    }
181
182
    public static String getConstName(String parentName, String constName, int padTo)
183
    {
184
        StringBuffer fieldName = new StringBuffer();
185
        toConstIdentifier(parentName, fieldName);
186
        fieldName.append('_');
187
        toConstIdentifier(constName, fieldName);
188
        pad(fieldName, padTo);
189
        return fieldName.toString();
190
    }
191
    
192
    /**
193
     * @return a byte array formatted string from a HexDump formatted string
194
     *  for example (byte)0x00,(byte)0x01 instead of 00 01
195
     */
196
    public static String getByteArrayString(String data) {
197
        StringTokenizer tokenizer = new StringTokenizer(data);
198
        StringBuffer retval = new StringBuffer();
199
        
200
        while (tokenizer.hasMoreTokens()) {
201
            retval.append("(byte)0x").append(tokenizer.nextToken());
202
            if (tokenizer.hasMoreTokens()) {
203
                retval.append(",");
204
            }
205
        }
206
        return retval.toString();
207
    }
208
209
    public static String getToString(String fieldName, String type, String size) {
210
        StringBuffer result = new StringBuffer();
211
        result.append("        buffer.append(\"    .");
212
        result.append(getFieldName(fieldName, 20));
213
        result.append(" = \")" + CR);
214
        if (type.equals("string") == false
215
                && type.equals("hbstring") == false
216
                && type.equals("float") == false
217
//                && type.equals("varword") == false
218
                && size.equals("varword") == false
219
                && type.startsWith("custom:") == false)
220
        {
221
            result.append("            .append(\"0x\")");
222
            result.append(".append(HexDump.toHex( ");
223
//            result.append(getType(size, type, 0));
224
            result.append(" get");
225
            result.append(getFieldName1stCap(fieldName, 0));
226
            result.append(" ()))" + CR);
227
        }
228
        result.append("            .append(\" (\").append( get");
229
        result.append(getFieldName1stCap(fieldName,0));
230
        result.append("() ).append(\" )\");");
231
        return result.toString();
232
    }
233
234
    public static String getRecordId(String recordName, String excelName)
235
    {
236
        if (excelName == null || excelName.equals(""))
237
            return recordName;
238
        else
239
            return excelName;
240
    }
241
}
(-)testcases/org/apache/poi/hwpf/extractor/TestWordToFoExtractorSuite.java (-92 lines)
Lines 1-92 Link Here
1
package org.apache.poi.hwpf.extractor;
2
3
import java.io.File;
4
import java.io.FileInputStream;
5
import java.io.FilenameFilter;
6
import java.io.StringWriter;
7
import java.util.Arrays;
8
import java.util.Collections;
9
import java.util.List;
10
import java.util.Set;
11
12
import javax.xml.parsers.DocumentBuilderFactory;
13
import javax.xml.transform.OutputKeys;
14
import javax.xml.transform.Transformer;
15
import javax.xml.transform.TransformerFactory;
16
import javax.xml.transform.dom.DOMSource;
17
import javax.xml.transform.stream.StreamResult;
18
19
import org.apache.poi.EncryptedDocumentException;
20
21
import org.apache.poi.hwpf.OldWordFileFormatException;
22
23
import junit.framework.Test;
24
import junit.framework.TestCase;
25
import junit.framework.TestSuite;
26
import org.apache.poi.POIDataSamples;
27
import org.apache.poi.hwpf.HWPFDocument;
28
29
public class TestWordToFoExtractorSuite
30
{
31
    /**
32
     * YK: a quick hack to exclude failing documents from the suite.
33
     *
34
     * WordToFoExtractor stumbles on Bug33519.doc with a NPE
35
     */
36
    private static List<String> failingFiles = Arrays.asList("Bug33519.doc");
37
38
    public static Test suite() {
39
        TestSuite suite = new TestSuite();
40
41
        File directory = POIDataSamples.getDocumentInstance().getFile(
42
                "../document");
43
        for (final File child : directory.listFiles(new FilenameFilter() {
44
            public boolean accept(File dir, String name) {
45
                return name.endsWith(".doc") && !failingFiles.contains(name);
46
            }
47
        })) {
48
            final String name = child.getName();
49
            suite.addTest(new TestCase(name) {
50
                public void runTest() throws Exception {
51
                    test(child);
52
                }
53
            });
54
        }
55
56
        return suite;
57
    }
58
59
    protected static void test( File child ) throws Exception
60
    {
61
        HWPFDocument hwpfDocument;
62
        FileInputStream fileInputStream = new FileInputStream( child );
63
        try
64
        {
65
            hwpfDocument = new HWPFDocument( fileInputStream );
66
        }
67
        catch ( Exception exc )
68
        {
69
            // unable to parse file -- not WordToFoExtractor fault
70
            return;
71
        }
72
        finally
73
        {
74
            fileInputStream.close();
75
        }
76
77
        WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
78
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
79
                        .newDocument() );
80
        wordToFoExtractor.processDocument( hwpfDocument );
81
82
        StringWriter stringWriter = new StringWriter();
83
84
        Transformer transformer = TransformerFactory.newInstance()
85
                .newTransformer();
86
        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
87
        transformer.transform(
88
                new DOMSource( wordToFoExtractor.getDocument() ),
89
                new StreamResult( stringWriter ) );
90
        // no exceptions
91
    }
92
}
(-)testcases/org/apache/poi/hwpf/extractor/TestWordToExtractorsSuite.java (-37 / +52 lines)
Lines 1-13 Link Here
1
package org.apache.poi.hwpf.extractor;
1
package org.apache.poi.hwpf.extractor;
2
2
3
import java.io.File;
3
import java.io.File;
4
import java.io.FileInputStream;
5
import java.io.FilenameFilter;
4
import java.io.FilenameFilter;
6
import java.io.StringWriter;
5
import java.io.StringWriter;
7
import java.util.Arrays;
6
import java.util.Arrays;
8
import java.util.Collections;
9
import java.util.List;
7
import java.util.List;
10
import java.util.Set;
11
8
12
import javax.xml.parsers.DocumentBuilderFactory;
9
import javax.xml.parsers.DocumentBuilderFactory;
13
import javax.xml.transform.OutputKeys;
10
import javax.xml.transform.OutputKeys;
Lines 16-92 Link Here
16
import javax.xml.transform.dom.DOMSource;
13
import javax.xml.transform.dom.DOMSource;
17
import javax.xml.transform.stream.StreamResult;
14
import javax.xml.transform.stream.StreamResult;
18
15
19
import org.apache.poi.EncryptedDocumentException;
16
import org.w3c.dom.Document;
20
21
import org.apache.poi.hwpf.OldWordFileFormatException;
22
17
23
import junit.framework.Test;
18
import junit.framework.Test;
24
import junit.framework.TestCase;
19
import junit.framework.TestCase;
25
import junit.framework.TestSuite;
20
import junit.framework.TestSuite;
26
import org.apache.poi.POIDataSamples;
21
import org.apache.poi.POIDataSamples;
27
import org.apache.poi.hwpf.HWPFDocument;
22
import org.apache.poi.hwpf.HWPFDocumentCore;
28
23
29
public class TestWordToFoExtractorSuite
24
public class TestWordToExtractorsSuite
30
{
25
{
31
    /**
26
    /**
32
     * YK: a quick hack to exclude failing documents from the suite.
27
     * YK: a quick hack to exclude failing documents from the suite.
33
     *
34
     * WordToFoExtractor stumbles on Bug33519.doc with a NPE
35
     */
28
     */
36
    private static List<String> failingFiles = Arrays.asList("Bug33519.doc");
29
    private static List<String> failingFiles = Arrays.asList();
37
30
38
    public static Test suite() {
31
    public static Test suite()
39
        TestSuite suite = new TestSuite();
32
    {
33
        TestSuite suite = new TestSuite(
34
                TestWordToExtractorsSuite.class.getName() );
40
35
41
        File directory = POIDataSamples.getDocumentInstance().getFile(
36
        File directory = POIDataSamples.getDocumentInstance().getFile(
42
                "../document");
37
                "../document" );
43
        for (final File child : directory.listFiles(new FilenameFilter() {
38
        for ( final File child : directory.listFiles( new FilenameFilter()
44
            public boolean accept(File dir, String name) {
39
        {
45
                return name.endsWith(".doc") && !failingFiles.contains(name);
40
            public boolean accept( File dir, String name )
41
            {
42
                return name.endsWith( ".doc" ) && !failingFiles.contains( name );
46
            }
43
            }
47
        })) {
44
        } ) )
45
        {
48
            final String name = child.getName();
46
            final String name = child.getName();
49
            suite.addTest(new TestCase(name) {
47
            suite.addTest( new TestCase( name + " [FO]" )
50
                public void runTest() throws Exception {
48
            {
51
                    test(child);
49
                public void runTest() throws Exception
50
                {
51
                    test( child, false );
52
                }
53
            } );
54
            suite.addTest( new TestCase( name + " [HTML]" )
55
            {
56
                public void runTest() throws Exception
57
                {
58
                    test( child, true );
52
                }
59
                }
53
            });
60
            } );
54
        }
61
        }
55
62
56
        return suite;
63
        return suite;
57
    }
64
    }
58
65
59
    protected static void test( File child ) throws Exception
66
    protected static void test( File child, boolean html ) throws Exception
60
    {
67
    {
61
        HWPFDocument hwpfDocument;
68
        HWPFDocumentCore hwpfDocument;
62
        FileInputStream fileInputStream = new FileInputStream( child );
63
        try
69
        try
64
        {
70
        {
65
            hwpfDocument = new HWPFDocument( fileInputStream );
71
            hwpfDocument = AbstractWordUtils.loadDoc( child );
66
        }
72
        }
67
        catch ( Exception exc )
73
        catch ( Exception exc )
68
        {
74
        {
69
            // unable to parse file -- not WordToFoExtractor fault
75
            // unable to parse file -- not WordToFoExtractor fault
70
            return;
76
            return;
71
        }
77
        }
72
        finally
73
        {
74
            fileInputStream.close();
75
        }
76
78
77
        WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
79
        final Document newDocument = DocumentBuilderFactory.newInstance()
78
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
80
                .newDocumentBuilder().newDocument();
79
                        .newDocument() );
80
        wordToFoExtractor.processDocument( hwpfDocument );
81
81
82
        StringWriter stringWriter = new StringWriter();
82
        AbstractWordExtractor extractor;
83
        if ( html )
84
        {
85
            extractor = new WordToHtmlExtractor( newDocument );
86
        }
87
        else
88
        {
89
            extractor = new WordToFoExtractor( newDocument );
90
        }
91
        extractor.processDocument( hwpfDocument );
83
92
84
        Transformer transformer = TransformerFactory.newInstance()
93
        Transformer transformer = TransformerFactory.newInstance()
85
                .newTransformer();
94
                .newTransformer();
95
        transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
86
        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
96
        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
87
        transformer.transform(
97
        if ( html )
88
                new DOMSource( wordToFoExtractor.getDocument() ),
98
            transformer.setOutputProperty( OutputKeys.METHOD, "html" );
99
100
        StringWriter stringWriter = new StringWriter();
101
        transformer.transform( new DOMSource( extractor.getDocument() ),
89
                new StreamResult( stringWriter ) );
102
                new StreamResult( stringWriter ) );
90
        // no exceptions
103
        // no exceptions
104
105
        System.out.println( stringWriter );
91
    }
106
    }
92
}
107
}
(-)testcases/org/apache/poi/hwpf/extractor/TestWordToHtmlExtractor.java (+97 lines)
Line 0 Link Here
1
/*
2
 *  ====================================================================
3
 *    Licensed to the Apache Software Foundation (ASF) under one or more
4
 *    contributor license agreements.  See the NOTICE file distributed with
5
 *    this work for additional information regarding copyright ownership.
6
 *    The ASF licenses this file to You under the Apache License, Version 2.0
7
 *    (the "License"); you may not use this file except in compliance with
8
 *    the License.  You may obtain a copy of the License at
9
 *
10
 *        http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 *    Unless required by applicable law or agreed to in writing, software
13
 *    distributed under the License is distributed on an "AS IS" BASIS,
14
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 *    See the License for the specific language governing permissions and
16
 *    limitations under the License.
17
 * ====================================================================
18
 */
19
package org.apache.poi.hwpf.extractor;
20
21
import java.io.StringWriter;
22
23
import javax.xml.parsers.DocumentBuilderFactory;
24
import javax.xml.transform.OutputKeys;
25
import javax.xml.transform.Transformer;
26
import javax.xml.transform.TransformerFactory;
27
import javax.xml.transform.dom.DOMSource;
28
import javax.xml.transform.stream.StreamResult;
29
30
import junit.framework.TestCase;
31
import org.apache.poi.POIDataSamples;
32
import org.apache.poi.hwpf.HWPFDocument;
33
34
/**
35
 * Test cases for {@link WordToFoExtractor}
36
 * 
37
 * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
38
 */
39
public class TestWordToHtmlExtractor extends TestCase
40
{
41
    private static String getHtmlText( final String sampleFileName )
42
            throws Exception
43
    {
44
        HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
45
                .getDocumentInstance().openResourceAsStream( sampleFileName ) );
46
47
        WordToHtmlExtractor wordToHtmlExtractor = new WordToHtmlExtractor(
48
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
49
                        .newDocument() );
50
        wordToHtmlExtractor.processDocument( hwpfDocument );
51
52
        StringWriter stringWriter = new StringWriter();
53
54
        Transformer transformer = TransformerFactory.newInstance()
55
                .newTransformer();
56
        transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
57
        transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
58
        transformer.setOutputProperty( OutputKeys.METHOD, "html" );
59
        transformer.transform(
60
                new DOMSource( wordToHtmlExtractor.getDocument() ),
61
                new StreamResult( stringWriter ) );
62
63
        String result = stringWriter.toString();
64
        return result;
65
    }
66
67
    public void testHyperlink() throws Exception
68
    {
69
        String result = getHtmlText( "hyperlink.doc" );
70
71
        assertTrue( result.contains( "<a href=\"http://testuri.org/\">" ) );
72
        assertTrue( result.contains( "Hyperlink text" ) );
73
    }
74
75
    public void testEquation() throws Exception
76
    {
77
        String result = getHtmlText( "equation.doc" );
78
79
        assertTrue( result
80
                .contains( "<!--Image link to '0.emf' can be here-->" ) );
81
    }
82
83
    public void testPageref() throws Exception
84
    {
85
        String result = getHtmlText( "pageref.doc" );
86
87
        assertTrue( result.contains( "<a href=\"#userref\">" ) );
88
        assertTrue( result.contains( "1" ) );
89
    }
90
91
    public void testBug46610_2() throws Exception
92
    {
93
        String result = getHtmlText( "Bug46610_2.doc" );
94
        assertTrue( result
95
                .contains( "012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ) );
96
    }
97
}

Return to bug 51351