--- src/org/apache/poi/hwpf/extractor/WordToFoUtils.java (revision 1135432) +++ src/org/apache/poi/hwpf/extractor/WordToFoUtils.java (working copy) @@ -9,6 +9,7 @@ import org.apache.poi.hwpf.usermodel.CharacterProperties; import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Paragraph; +import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.hwpf.usermodel.Section; import org.apache.poi.hwpf.usermodel.SectionProperties; @@ -179,6 +180,31 @@ } } + public static String getJustification(int js) { + switch (js) { + case 0: + return "start"; + case 1: + return "center"; + case 2: + return "end"; + case 3: + case 4: + return "justify"; + case 5: + return "center"; + case 6: + return "left"; + case 7: + return "start"; + case 8: + return "end"; + case 9: + return "justify"; + } + return ""; + } + public static String getListItemNumberLabel(int number, int format) { if (format != 0) @@ -244,48 +270,51 @@ } public static void setCharactersProperties(final CharacterRun characterRun, - final Element inline) { - final CharacterProperties clonedProperties = characterRun - .cloneProperties(); - StringBuilder textDecorations = new StringBuilder(); + final Element inline) { + final CharacterProperties clonedProperties = characterRun + .cloneProperties(); + StringBuilder textDecorations = new StringBuilder(); - setBorder(inline, clonedProperties.getBrc(), EMPTY); + setBorder(inline, clonedProperties.getBrc(), EMPTY); - if (characterRun.isCapitalized()) { - inline.setAttribute("text-transform", "uppercase"); - } - if (characterRun.isHighlighted()) { - inline.setAttribute("background-color", - getColor(clonedProperties.getIcoHighlight())); - } - if (characterRun.isStrikeThrough()) { - if (textDecorations.length() > 0) - textDecorations.append(" "); - textDecorations.append("line-through"); - } - if (characterRun.isShadowed()) { - inline.setAttribute("text-shadow", characterRun.getFontSize() / 24 - + "pt"); - } - if (characterRun.isSmallCaps()) { - inline.setAttribute("font-variant", "small-caps"); - } - if (characterRun.getSubSuperScriptIndex() == 1) { - inline.setAttribute("baseline-shift", "super"); - inline.setAttribute("font-size", "smaller"); - } - if (characterRun.getSubSuperScriptIndex() == 2) { - inline.setAttribute("baseline-shift", "sub"); - inline.setAttribute("font-size", "smaller"); - } - if (characterRun.getUnderlineCode() > 0) { - if (textDecorations.length() > 0) - textDecorations.append(" "); - textDecorations.append("underline"); - } - if (textDecorations.length() > 0) { - inline.setAttribute("text-decoration", textDecorations.toString()); - } + if (characterRun.isCapitalized()) { + inline.setAttribute("text-transform", "uppercase"); + } + if (characterRun.isHighlighted()) { + inline.setAttribute("background-color", + getColor(clonedProperties.getIcoHighlight())); + } + if (characterRun.isStrikeThrough()) { + if (textDecorations.length() > 0) + textDecorations.append(" "); + textDecorations.append("line-through"); + } + if (characterRun.isShadowed()) { + inline.setAttribute("text-shadow", characterRun.getFontSize() / 24 + + "pt"); + } + if (characterRun.isSmallCaps()) { + inline.setAttribute("font-variant", "small-caps"); + } + if (characterRun.getSubSuperScriptIndex() == 1) { + inline.setAttribute("baseline-shift", "super"); + inline.setAttribute("font-size", "smaller"); + } + if (characterRun.getSubSuperScriptIndex() == 2) { + inline.setAttribute("baseline-shift", "sub"); + inline.setAttribute("font-size", "smaller"); + } + if (characterRun.getUnderlineCode() > 0) { + if (textDecorations.length() > 0) + textDecorations.append(" "); + textDecorations.append("underline"); + } + if (characterRun.isVanished()) { + inline.setAttribute("visibility", "hidden"); + } + if (textDecorations.length() > 0) { + inline.setAttribute("text-decoration", textDecorations.toString()); + } } public static void setFontFamily(final Element element, @@ -335,40 +364,10 @@ } public static void setJustification(Paragraph paragraph, - final Element element) { - final int justification = paragraph.getJustification(); - switch (justification) { - case 0: - element.setAttribute("text-align", "start"); - break; - case 1: - element.setAttribute("text-align", "center"); - break; - case 2: - element.setAttribute("text-align", "end"); - break; - case 3: - element.setAttribute("text-align", "justify"); - break; - case 4: - element.setAttribute("text-align", "justify"); - break; - case 5: - element.setAttribute("text-align", "center"); - break; - case 6: - element.setAttribute("text-align", "left"); - break; - case 7: - element.setAttribute("text-align", "start"); - break; - case 8: - element.setAttribute("text-align", "end"); - break; - case 9: - element.setAttribute("text-align", "justify"); - break; - } + final Element element) { + String justification = getJustification(paragraph.getJustification()); + if (isNotEmpty(justification)) + element.setAttribute("text-align", justification); } public static void setParagraphProperties(Paragraph paragraph, Element block) { @@ -399,6 +398,53 @@ block.setAttribute("white-space-collapse", "false"); } + public static void setPictureProperties(Picture picture, + Element graphicElement) { + final int aspectRatioX = picture.getAspectRatioX(); + final int aspectRatioY = picture.getAspectRatioY(); + + if (aspectRatioX > 0) { + graphicElement.setAttribute("content-width", ((picture.getDxaGoal() + * aspectRatioX / 100) / WordToFoUtils.TWIPS_PER_PT) + + "pt"); + } else + graphicElement.setAttribute("content-width", + (picture.getDxaGoal() / WordToFoUtils.TWIPS_PER_PT) + "pt"); + + if (aspectRatioY > 0) + graphicElement + .setAttribute("content-height", ((picture.getDyaGoal() + * aspectRatioY / 100) / WordToFoUtils.TWIPS_PER_PT) + + "pt"); + else + graphicElement.setAttribute("content-height", + (picture.getDyaGoal() / WordToFoUtils.TWIPS_PER_PT) + "pt"); + + if (aspectRatioX <= 0 || aspectRatioY <= 0) { + graphicElement.setAttribute("scaling", "uniform"); + } else { + graphicElement.setAttribute("scaling", "non-uniform"); + } + + graphicElement.setAttribute("vertical-align", "text-bottom"); + + if (picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0 + || picture.getDyaCropBottom() != 0 + || picture.getDxaCropLeft() != 0) { + int rectTop = picture.getDyaCropTop() / WordToFoUtils.TWIPS_PER_PT; + int rectRight = picture.getDxaCropRight() + / WordToFoUtils.TWIPS_PER_PT; + int rectBottom = picture.getDyaCropBottom() + / WordToFoUtils.TWIPS_PER_PT; + int rectLeft = picture.getDxaCropLeft() + / WordToFoUtils.TWIPS_PER_PT; + graphicElement.setAttribute("clip", "rect(" + rectTop + "pt, " + + rectRight + "pt, " + rectBottom + "pt, " + rectLeft + + "pt)"); + graphicElement.setAttribute("oveerflow", "hidden"); + } + } + public static void setTableCellProperties(TableRow tableRow, TableCell tableCell, Element element, boolean toppest, boolean bottomest, boolean leftest, boolean rightest) { --- src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java (revision 1135432) +++ src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java (working copy) @@ -279,63 +279,95 @@ } } - @SuppressWarnings("unused") - protected void processImage(Element currentBlock, Picture picture) { - // no default implementation -- skip + /** + * This method shall store image bytes in external file and convert it if + * necessary. Images shall be stored using PNG format (for bitmap) or SVG + * (for vector). Other formats may be not supported by your XSL FO + * processor. + *

+ * Please note the + * {@link WordToFoUtils#setPictureProperties(Picture, Element)} method. + * + * @param currentBlock + * currently processed FO element, like fo:block. Shall + * be used as parent of newly created + * fo:external-graphic or + * fo:instream-foreign-object + * @param inlined + * if image is inlined + * @param picture + * HWPF object, contained picture data and properties + */ + protected void processImage(Element currentBlock, boolean inlined, + Picture picture) { + // no default implementation -- skip } protected void processParagraph(HWPFDocument hwpfDocument, - Element parentFopElement, int currentTableLevel, - Paragraph paragraph, String bulletText) { - final Element block = createBlock(); - parentFopElement.appendChild(block); + Element parentFopElement, int currentTableLevel, + Paragraph paragraph, String bulletText) { + final Element block = createBlock(); + parentFopElement.appendChild(block); - WordToFoUtils.setParagraphProperties(paragraph, block); + WordToFoUtils.setParagraphProperties(paragraph, block); - final int charRuns = paragraph.numCharacterRuns(); + final int charRuns = paragraph.numCharacterRuns(); - if (charRuns == 0) { - return; - } + if (charRuns == 0) { + return; + } - final String pFontName; - final int pFontSize; - final boolean pBold; - final boolean pItalic; - { - CharacterRun characterRun = paragraph.getCharacterRun(0); - pFontSize = characterRun.getFontSize() / 2; - pFontName = characterRun.getFontName(); - pBold = characterRun.isBold(); - pItalic = characterRun.isItalic(); - } - WordToFoUtils.setFontFamily(block, pFontName); - WordToFoUtils.setFontSize(block, pFontSize); - WordToFoUtils.setBold(block, pBold); - WordToFoUtils.setItalic(block, pItalic); + final String pFontName; + final int pFontSize; + final boolean pBold; + final boolean pItalic; + { + CharacterRun characterRun = paragraph.getCharacterRun(0); + pFontSize = characterRun.getFontSize() / 2; + pFontName = characterRun.getFontName(); + pBold = characterRun.isBold(); + pItalic = characterRun.isItalic(); + } + WordToFoUtils.setFontFamily(block, pFontName); + WordToFoUtils.setFontSize(block, pFontSize); + WordToFoUtils.setBold(block, pBold); + WordToFoUtils.setItalic(block, pItalic); - StringBuilder lineText = new StringBuilder(); + StringBuilder lineText = new StringBuilder(); - if (WordToFoUtils.isNotEmpty(bulletText)) { - Element inline = createInline(); - block.appendChild(inline); + if (WordToFoUtils.isNotEmpty(bulletText)) { + Element inline = createInline(); + block.appendChild(inline); - Text textNode = createText(bulletText); - inline.appendChild(textNode); + Text textNode = createText(bulletText); + inline.appendChild(textNode); - lineText.append(bulletText); - } + lineText.append(bulletText); + } + + for (int c = 0; c < charRuns; c++) { + CharacterRun characterRun = paragraph.getCharacterRun(c); + + if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) { + Picture picture = hwpfDocument.getPicturesTable() + .extractPicture(characterRun, true); - for (int c = 0; c < charRuns; c++) { - CharacterRun characterRun = paragraph.getCharacterRun(c); + processImage(block, characterRun.text().charAt(0) == 0x01, + picture); + continue; + } String text = characterRun.text(); if (text.getBytes().length == 0) continue; - if (text.getBytes()[0] == FIELD_BEGIN_MARK) { - int skipTo = tryImageWithinField(hwpfDocument, paragraph, c, - block); + if (text.getBytes()[0] == FIELD_BEGIN_MARK) { + /* + * check if we have a field with calculated image as a result. + * MathType equation, for example. + */ + int skipTo = tryImageWithinField(hwpfDocument, paragraph, c, + block); if (skipTo != c) { c = skipTo; @@ -550,59 +582,61 @@ } protected int tryImageWithinField(HWPFDocument hwpfDocument, - Paragraph paragraph, int beginMark, Element currentBlock) { - int separatorMark = -1; - int pictureMark = -1; - int endMark = -1; - for (int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++) { - CharacterRun characterRun = paragraph.getCharacterRun(c); + Paragraph paragraph, int beginMark, Element currentBlock) { + int separatorMark = -1; + int pictureMark = -1; + int pictureChar = Integer.MIN_VALUE; + int endMark = -1; + for (int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++) { + CharacterRun characterRun = paragraph.getCharacterRun(c); - String text = characterRun.text(); - if (text.getBytes().length == 0) - continue; + String text = characterRun.text(); + if (text.getBytes().length == 0) + continue; - if (text.getBytes()[0] == FIELD_SEPARATOR_MARK) { - if (separatorMark != -1) { - // double; - return beginMark; - } + if (text.getBytes()[0] == FIELD_SEPARATOR_MARK) { + if (separatorMark != -1) { + // double; + return beginMark; + } - separatorMark = c; - continue; - } + separatorMark = c; + continue; + } - if (text.getBytes()[0] == FIELD_END_MARK) { - if (endMark != -1) { - // double; - return beginMark; - } + if (text.getBytes()[0] == FIELD_END_MARK) { + if (endMark != -1) { + // double; + return beginMark; + } - endMark = c; - break; - } + endMark = c; + break; + } - if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) { - if (pictureMark != -1) { - // double; - return beginMark; - } + if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) { + if (c != -1) { + // double; + return beginMark; + } - pictureMark = c; - continue; - } - } + pictureMark = c; + pictureChar = characterRun.text().charAt(0); + continue; + } + } - if (separatorMark == -1 || pictureMark == -1 || endMark == -1) - return beginMark; + if (separatorMark == -1 || pictureMark == -1 || endMark == -1) + return beginMark; - final CharacterRun pictureRun = paragraph.getCharacterRun(pictureMark); - final Picture picture = hwpfDocument.getPicturesTable().extractPicture( - pictureRun, true); - processImage(currentBlock, picture); + final CharacterRun pictureRun = paragraph.getCharacterRun(pictureMark); + final Picture picture = hwpfDocument.getPicturesTable().extractPicture( + pictureRun, true); - return endMark; - } + processImage(currentBlock, pictureChar == 0x01, picture); + return endMark; + } /** * Java main() interface to interact with WordToFoExtractor