--- src/org/apache/poi/hwpf/extractor/NumberFormatter.java (revision 0) +++ src/org/apache/poi/hwpf/extractor/NumberFormatter.java (revision 0) @@ -0,0 +1,43 @@ +package org.apache.poi.hwpf.extractor; + +/** + * Comment me + * + * @author Ryan Ackley + */ +public final class NumberFormatter { + + private static String[] C_LETTERS = new String[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", + "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "x", "y", "z" }; + + private static String[] C_ROMAN = new String[] { "i", "ii", "iii", "iv", "v", "vi", "vii", + "viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix", + "xx", "xxi", "xxii", "xxiii", "xxiv", "xxv", "xxvi", "xxvii", "xxviii", "xxix", "xxx", + "xxxi", "xxxii", "xxxiii", "xxxiv", "xxxv", "xxxvi", "xxxvii", "xxxvii", "xxxviii", + "xxxix", "xl", "xli", "xlii", "xliii", "xliv", "xlv", "xlvi", "xlvii", "xlviii", + "xlix", "l" }; + + private final static int T_ARABIC = 0; + private final static int T_LOWER_LETTER = 4; + private final static int T_LOWER_ROMAN = 2; + private final static int T_ORDINAL = 5; + private final static int T_UPPER_LETTER = 3; + private final static int T_UPPER_ROMAN = 1; + + public static String getNumber(int num, int style) { + switch (style) { + case T_UPPER_ROMAN: + return C_ROMAN[num - 1].toUpperCase(); + case T_LOWER_ROMAN: + return C_ROMAN[num - 1]; + case T_UPPER_LETTER: + return C_LETTERS[num - 1].toUpperCase(); + case T_LOWER_LETTER: + return C_LETTERS[num - 1]; + case T_ARABIC: + case T_ORDINAL: + default: + return String.valueOf(num); + } + } +} --- src/org/apache/poi/hwpf/extractor/WordToFoUtils.java (revision 0) +++ src/org/apache/poi/hwpf/extractor/WordToFoUtils.java (revision 0) @@ -0,0 +1,443 @@ +package org.apache.poi.hwpf.extractor; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Field; + +import org.apache.poi.hwpf.model.ListLevel; +import org.apache.poi.hwpf.model.ListTables; +import org.apache.poi.hwpf.usermodel.BorderCode; +import org.apache.poi.hwpf.usermodel.CharacterProperties; +import org.apache.poi.hwpf.usermodel.CharacterRun; +import org.apache.poi.hwpf.usermodel.Paragraph; +import org.apache.poi.hwpf.usermodel.Range; +import org.apache.poi.hwpf.usermodel.Section; +import org.apache.poi.hwpf.usermodel.SectionProperties; +import org.apache.poi.hwpf.usermodel.TableCell; +import org.apache.poi.hwpf.usermodel.TableIterator; +import org.apache.poi.hwpf.usermodel.TableRow; +import org.w3c.dom.Element; + +public class WordToFoUtils { + static final String EMPTY = ""; + + public static final float TWIPS_PER_INCH = 1440.0f; + + public static final int TWIPS_PER_PT = 20; + + static boolean equals(String str1, String str2) { + return str1 == null ? str2 == null : str1.equals(str2); + } + + public static String getBorderType(BorderCode borderCode) { + if (borderCode == null) + throw new IllegalArgumentException("borderCode is null"); + + switch (borderCode.getBorderType()) { + case 1: + case 2: + return "solid"; + case 3: + return "double"; + case 5: + return "solid"; + case 6: + return "dotted"; + case 7: + case 8: + return "dashed"; + case 9: + return "dotted"; + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + case 16: + case 17: + case 18: + case 19: + return "double"; + case 20: + return "solid"; + case 21: + return "double"; + case 22: + return "dashed"; + case 23: + return "dashed"; + case 24: + return "ridge"; + case 25: + return "grooved"; + default: + return "solid"; + } + } + + public static String getBorderWidth(BorderCode borderCode) { + int lineWidth = borderCode.getLineWidth(); + int pt = lineWidth / 8; + int pte = lineWidth - pt * 8; + + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append(pt); + stringBuilder.append("."); + stringBuilder.append(1000 / 8 * pte); + stringBuilder.append("pt"); + return stringBuilder.toString(); + } + + public static String getBulletText(ListTables listTables, + Paragraph paragraph, int listId) { + final ListLevel listLevel = listTables.getLevel(listId, + paragraph.getIlvl()); + + if (listLevel.getNumberText() == null) + return EMPTY; + + StringBuffer bulletBuffer = new StringBuffer(); + char[] xst = listLevel.getNumberText().toCharArray(); + for (char element : xst) { + if (element < 9) { + ListLevel numLevel = listTables.getLevel(listId, element); + + int num = numLevel.getStartAt(); + bulletBuffer.append(NumberFormatter.getNumber(num, + listLevel.getNumberFormat())); + + if (numLevel == listLevel) { + numLevel.setStartAt(numLevel.getStartAt() + 1); + } + + } else { + bulletBuffer.append(element); + } + } + + byte follow = getIxchFollow(listLevel); + switch (follow) { + case 0: + bulletBuffer.append("\t"); + break; + case 1: + bulletBuffer.append(" "); + break; + default: + break; + } + + return bulletBuffer.toString(); + } + + public static String getColor(int ico) { + switch (ico) { + case 1: + return "black"; + case 2: + return "blue"; + case 3: + return "cyan"; + case 4: + return "green"; + case 5: + return "magenta"; + case 6: + return "red"; + case 7: + return "yellow"; + case 8: + return "white"; + case 9: + return "darkblue"; + case 10: + return "darkcyan"; + case 11: + return "darkgreen"; + case 12: + return "darkmagenta"; + case 13: + return "darkred"; + case 14: + return "darkyellow"; + case 15: + return "darkgray"; + case 16: + return "lightgray"; + default: + return "black"; + } + } + + public static byte getIxchFollow(ListLevel listLevel) { + try { + Field field = ListLevel.class.getDeclaredField("_ixchFollow"); + field.setAccessible(true); + return ((Byte) field.get(listLevel)).byteValue(); + } catch (Exception exc) { + throw new Error(exc); + } + } + + public static String getListItemNumberLabel(int number, int format) { + + if (format != 0) + System.err.println("NYI: toListItemNumberLabel(): " + format); + + return String.valueOf(number); + } + + public static SectionProperties getSectionProperties(Section section) { + try { + Field field = Section.class.getDeclaredField("_props"); + field.setAccessible(true); + return (SectionProperties) field.get(section); + } catch (Exception exc) { + throw new Error(exc); + } + } + + static boolean isEmpty(String str) { + return str == null || str.length() == 0; + } + + static boolean isNotEmpty(String str) { + return !isEmpty(str); + } + + public static TableIterator newTableIterator(Range range, int level) { + try { + Constructor constructor = TableIterator.class + .getDeclaredConstructor(Range.class, int.class); + constructor.setAccessible(true); + return constructor.newInstance(range, Integer.valueOf(level)); + } catch (Exception exc) { + throw new Error(exc); + } + } + + public static void setBold(final Element element, final boolean bold) { + element.setAttribute("font-weight", bold ? "bold" : "normal"); + } + + public static void setBorder(Element element, BorderCode borderCode, + String where) { + if (element == null) + throw new IllegalArgumentException("element is null"); + + if (borderCode == null) + return; + + if (isEmpty(where)) { + element.setAttribute("border-style", getBorderType(borderCode)); + element.setAttribute("border-color", + getColor(borderCode.getColor())); + element.setAttribute("border-width", getBorderWidth(borderCode)); + } else { + element.setAttribute("border-" + where + "-style", + getBorderType(borderCode)); + element.setAttribute("border-" + where + "-color", + getColor(borderCode.getColor())); + element.setAttribute("border-" + where + "-width", + getBorderWidth(borderCode)); + } + } + + public static void setCharactersProperties(final CharacterRun characterRun, + final Element inline) { + final CharacterProperties clonedProperties = characterRun + .cloneProperties(); + StringBuilder textDecorations = new StringBuilder(); + + setBorder(inline, clonedProperties.getBrc(), EMPTY); + + if (characterRun.isCapitalized()) { + inline.setAttribute("text-transform", "uppercase"); + } + if (characterRun.isHighlighted()) { + inline.setAttribute("background-color", + getColor(clonedProperties.getIcoHighlight())); + } + if (characterRun.isStrikeThrough()) { + if (textDecorations.length() > 0) + textDecorations.append(" "); + textDecorations.append("line-through"); + } + if (characterRun.isShadowed()) { + inline.setAttribute("text-shadow", characterRun.getFontSize() / 24 + + "pt"); + } + if (characterRun.isSmallCaps()) { + inline.setAttribute("font-variant", "small-caps"); + } + if (characterRun.getSubSuperScriptIndex() == 1) { + inline.setAttribute("baseline-shift", "super"); + inline.setAttribute("font-size", "smaller"); + } + if (characterRun.getSubSuperScriptIndex() == 2) { + inline.setAttribute("baseline-shift", "sub"); + inline.setAttribute("font-size", "smaller"); + } + if (characterRun.getUnderlineCode() > 0) { + if (textDecorations.length() > 0) + textDecorations.append(" "); + textDecorations.append("underline"); + } + if (textDecorations.length() > 0) { + inline.setAttribute("text-decoration", textDecorations.toString()); + } + } + + public static void setFontFamily(final Element element, + final String fontFamily) { + element.setAttribute("font-family", fontFamily); + } + + public static void setFontSize(final Element element, final int fontSize) { + element.setAttribute("font-size", String.valueOf(fontSize)); + } + + public static void setIndent(Paragraph paragraph, Element block) { + if (paragraph.getFirstLineIndent() != 0) { + block.setAttribute( + "text-indent", + String.valueOf(paragraph.getFirstLineIndent() + / TWIPS_PER_PT) + + "pt"); + } + if (paragraph.getIndentFromLeft() != 0) { + block.setAttribute( + "start-indent", + String.valueOf(paragraph.getIndentFromLeft() / TWIPS_PER_PT) + + "pt"); + } + if (paragraph.getIndentFromRight() != 0) { + block.setAttribute( + "end-indent", + String.valueOf(paragraph.getIndentFromRight() + / TWIPS_PER_PT) + + "pt"); + } + if (paragraph.getSpacingBefore() != 0) { + block.setAttribute("space-before", + String.valueOf(paragraph.getSpacingBefore() / TWIPS_PER_PT) + + "pt"); + } + if (paragraph.getSpacingAfter() != 0) { + block.setAttribute("space-after", + String.valueOf(paragraph.getSpacingAfter() / TWIPS_PER_PT) + + "pt"); + } + } + + public static void setItalic(final Element element, final boolean italic) { + element.setAttribute("font-style", italic ? "italic" : "normal"); + } + + public static void setJustification(Paragraph paragraph, + final Element element) { + final int justification = paragraph.getJustification(); + switch (justification) { + case 0: + element.setAttribute("text-align", "start"); + break; + case 1: + element.setAttribute("text-align", "center"); + break; + case 2: + element.setAttribute("text-align", "end"); + break; + case 3: + element.setAttribute("text-align", "justify"); + break; + case 4: + element.setAttribute("text-align", "justify"); + break; + case 5: + element.setAttribute("text-align", "center"); + break; + case 6: + element.setAttribute("text-align", "left"); + break; + case 7: + element.setAttribute("text-align", "start"); + break; + case 8: + element.setAttribute("text-align", "end"); + break; + case 9: + element.setAttribute("text-align", "justify"); + break; + } + } + + public static void setParagraphProperties(Paragraph paragraph, Element block) { + setIndent(paragraph, block); + setJustification(paragraph, block); + + setBorder(block, paragraph.getBottomBorder(), "bottom"); + setBorder(block, paragraph.getLeftBorder(), "left"); + setBorder(block, paragraph.getRightBorder(), "right"); + setBorder(block, paragraph.getTopBorder(), "top"); + + if (paragraph.pageBreakBefore()) { + block.setAttribute("break-before", "page"); + } + + block.setAttribute("hyphenate", + String.valueOf(paragraph.isAutoHyphenated())); + + if (paragraph.keepOnPage()) { + block.setAttribute("keep-together.within-page", "always"); + } + + if (paragraph.keepWithNext()) { + block.setAttribute("keep-with-next.within-page", "always"); + } + + block.setAttribute("linefeed-treatment", "preserve"); + block.setAttribute("white-space-collapse", "false"); + } + + public static void setTableCellProperties(TableRow tableRow, + TableCell tableCell, Element element, boolean toppest, + boolean bottomest, boolean leftest, boolean rightest) { + element.setAttribute("width", (tableCell.getWidth() / TWIPS_PER_INCH) + + "in"); + element.setAttribute("padding-start", + (tableRow.getGapHalf() / TWIPS_PER_INCH) + "in"); + element.setAttribute("padding-end", + (tableRow.getGapHalf() / TWIPS_PER_INCH) + "in"); + + BorderCode top = tableCell.getBrcTop() != null ? tableCell.getBrcTop() + : toppest ? tableRow.getTopBorder() : tableRow + .getHorizontalBorder(); + BorderCode bottom = tableCell.getBrcBottom() != null ? tableCell + .getBrcBottom() : bottomest ? tableRow.getBottomBorder() + : tableRow.getHorizontalBorder(); + + BorderCode left = tableCell.getBrcLeft() != null ? tableCell + .getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow + .getVerticalBorder(); + BorderCode right = tableCell.getBrcRight() != null ? tableCell + .getBrcRight() : rightest ? tableRow.getRightBorder() + : tableRow.getVerticalBorder(); + + setBorder(element, bottom, "bottom"); + setBorder(element, left, "left"); + setBorder(element, right, "right"); + setBorder(element, top, "top"); + } + + public static void setTableRowProperties(TableRow tableRow, + Element tableRowElement) { + if (tableRow.getRowHeight() > 0) { + tableRowElement.setAttribute("height", + (tableRow.getRowHeight() / TWIPS_PER_INCH) + "in"); + } + if (!tableRow.cantSplit()) { + tableRowElement.setAttribute("keep-together", "always"); + } + } + +} --- src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java (revision 0) +++ src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java (revision 0) @@ -0,0 +1,581 @@ +package org.apache.poi.hwpf.extractor; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import javax.xml.parsers.DocumentBuilderFactory; + +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.model.ListFormatOverride; +import org.apache.poi.hwpf.model.ListTables; +import org.apache.poi.hwpf.usermodel.CharacterRun; +import org.apache.poi.hwpf.usermodel.Paragraph; +import org.apache.poi.hwpf.usermodel.Picture; +import org.apache.poi.hwpf.usermodel.Range; +import org.apache.poi.hwpf.usermodel.Section; +import org.apache.poi.hwpf.usermodel.SectionProperties; +import org.apache.poi.hwpf.usermodel.Table; +import org.apache.poi.hwpf.usermodel.TableCell; +import org.apache.poi.hwpf.usermodel.TableIterator; +import org.apache.poi.hwpf.usermodel.TableRow; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Text; + +import static org.apache.poi.hwpf.extractor.WordToFoUtils.TWIPS_PER_INCH; + +/** + * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) + */ +public class WordToFoExtractor { + + private static final byte BEL_MARK = 7; + + private static final byte FIELD_BEGIN_MARK = 19; + + private static final byte FIELD_END_MARK = 21; + + private static final byte FIELD_SEPARATOR_MARK = 20; + + private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format"; + + private static HWPFDocument loadDoc(File docFile) throws IOException { + final FileInputStream istream = new FileInputStream(docFile); + try { + return new HWPFDocument(istream); + } finally { + try { + istream.close(); + } catch (Exception exc) { + // no op + } + } + } + + static Document process(File docFile) throws Exception { + final HWPFDocument hwpfDocument = loadDoc(docFile); + WordToFoExtractor wordToFoExtractor = new WordToFoExtractor( + DocumentBuilderFactory.newInstance().newDocumentBuilder() + .newDocument()); + wordToFoExtractor.processDocument(hwpfDocument); + return wordToFoExtractor.getDocument(); + } + + private final Document document; + + private final Element layoutMasterSet; + + private final Element root; + + public WordToFoExtractor(Document document) throws Exception { + this.document = document; + + root = document.createElementNS(NS_XSLFO, "fo:root"); + document.appendChild(root); + + layoutMasterSet = document.createElementNS(NS_XSLFO, + "fo:layout-master-set"); + root.appendChild(layoutMasterSet); + } + + protected Element addFlowToPageSequence(final Element pageSequence, + String flowName) { + final Element flow = document.createElementNS(NS_XSLFO, "fo:flow"); + flow.setAttribute("flow-name", flowName); + pageSequence.appendChild(flow); + + return flow; + } + + protected Element addListItem(Element listBlock) { + Element result = createListItem(); + listBlock.appendChild(result); + return result; + } + + protected Element addListItemBody(Element listItem) { + Element result = createListItemBody(); + listItem.appendChild(result); + return result; + } + + protected Element addListItemLabel(Element listItem, String text) { + Element result = createListItemLabel(text); + listItem.appendChild(result); + return result; + } + + protected Element addPageSequence(String pageMaster) { + final Element pageSequence = document.createElementNS(NS_XSLFO, + "fo:page-sequence"); + pageSequence.setAttribute("master-reference", pageMaster); + root.appendChild(pageSequence); + return pageSequence; + } + + protected Element addRegionBody(Element pageMaster) { + final Element regionBody = document.createElementNS(NS_XSLFO, + "fo:region-body"); + pageMaster.appendChild(regionBody); + + return regionBody; + } + + protected Element addSimplePageMaster(String masterName) { + final Element simplePageMaster = document.createElementNS(NS_XSLFO, + "fo:simple-page-master"); + simplePageMaster.setAttribute("master-name", masterName); + layoutMasterSet.appendChild(simplePageMaster); + + return simplePageMaster; + } + + protected Element addTable(Element flow) { + final Element table = document.createElementNS(NS_XSLFO, "fo:table"); + flow.appendChild(table); + return table; + } + + protected Element createBlock() { + return document.createElementNS(NS_XSLFO, "fo:block"); + } + + protected Element createExternalGraphic(String source) { + Element result = document.createElementNS(NS_XSLFO, + "fo:external-graphic"); + result.setAttribute("src", "url('" + source + "')"); + return result; + } + + protected Element createInline() { + return document.createElementNS(NS_XSLFO, "fo:inline"); + } + + protected Element createLeader() { + return document.createElementNS(NS_XSLFO, "fo:leader"); + } + + protected Element createListBlock() { + return document.createElementNS(NS_XSLFO, "fo:list-block"); + } + + protected Element createListItem() { + return document.createElementNS(NS_XSLFO, "fo:list-item"); + } + + protected Element createListItemBody() { + return document.createElementNS(NS_XSLFO, "fo:list-item-body"); + } + + protected Element createListItemLabel(String text) { + Element result = document.createElementNS(NS_XSLFO, + "fo:list-item-label"); + Element block = createBlock(); + block.appendChild(document.createTextNode(text)); + result.appendChild(block); + return result; + } + + protected String createPageMaster(SectionProperties sep, String type, + int section) { + float height = sep.getYaPage() / TWIPS_PER_INCH; + float width = sep.getXaPage() / TWIPS_PER_INCH; + float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH; + float rightMargin = sep.getDxaRight() / TWIPS_PER_INCH; + float topMargin = sep.getDyaTop() / TWIPS_PER_INCH; + float bottomMargin = sep.getDyaBottom() / TWIPS_PER_INCH; + + // add these to the header + String pageMasterName = type + "-page" + section; + + Element pageMaster = addSimplePageMaster(pageMasterName); + pageMaster.setAttribute("page-height", height + "in"); + pageMaster.setAttribute("page-width", width + "in"); + + Element regionBody = addRegionBody(pageMaster); + regionBody.setAttribute("margin", topMargin + "in " + rightMargin + + "in " + bottomMargin + "in " + leftMargin + "in"); + + /* + * 6.4.14 fo:region-body + * + * The values of the padding and border-width traits must be "0". + */ + // WordToFoUtils.setBorder(regionBody, sep.getBrcTop(), "top"); + // WordToFoUtils.setBorder(regionBody, sep.getBrcBottom(), "bottom"); + // WordToFoUtils.setBorder(regionBody, sep.getBrcLeft(), "left"); + // WordToFoUtils.setBorder(regionBody, sep.getBrcRight(), "right"); + + if (sep.getCcolM1() > 0) { + regionBody.setAttribute("column-count", "" + (sep.getCcolM1() + 1)); + if (sep.getFEvenlySpaced()) { + regionBody.setAttribute("column-gap", + (sep.getDxaColumns() / TWIPS_PER_INCH) + "in"); + } else { + regionBody.setAttribute("column-gap", "0.25in"); + } + } + + return pageMasterName; + } + + protected Element createTableBody() { + return document.createElementNS(NS_XSLFO, "fo:table-body"); + } + + protected Element createTableCell() { + return document.createElementNS(NS_XSLFO, "fo:table-cell"); + } + + protected Element createTableHeader() { + return document.createElementNS(NS_XSLFO, "fo:table-header"); + } + + protected Element createTableRow() { + return document.createElementNS(NS_XSLFO, "fo:table-row"); + } + + protected Text createText(String data) { + return document.createTextNode(data); + } + + public Document getDocument() { + return document; + } + + public void processDocument(HWPFDocument hwpfDocument) { + final Range range = hwpfDocument.getRange(); + + for (int s = 0; s < range.numSections(); s++) { + processSection(hwpfDocument, range.getSection(s), s); + } + } + + @SuppressWarnings("unused") + protected void processImage(Element currentBlock, Picture picture) { + // no default implementation -- skip + } + + protected void processParagraph(HWPFDocument hwpfDocument, + Element parentFopElement, int currentTableLevel, + Paragraph paragraph, String bulletText) { + final Element block = createBlock(); + parentFopElement.appendChild(block); + + WordToFoUtils.setParagraphProperties(paragraph, block); + + final int charRuns = paragraph.numCharacterRuns(); + + if (charRuns == 0) { + return; + } + + final String pFontName; + final int pFontSize; + final boolean pBold; + final boolean pItalic; + { + CharacterRun characterRun = paragraph.getCharacterRun(0); + pFontSize = characterRun.getFontSize() / 2; + pFontName = characterRun.getFontName(); + pBold = characterRun.isBold(); + pItalic = characterRun.isItalic(); + } + WordToFoUtils.setFontFamily(block, pFontName); + WordToFoUtils.setFontSize(block, pFontSize); + WordToFoUtils.setBold(block, pBold); + WordToFoUtils.setItalic(block, pItalic); + + StringBuilder lineText = new StringBuilder(); + + if (WordToFoUtils.isNotEmpty(bulletText)) { + Element inline = createInline(); + block.appendChild(inline); + + Text textNode = createText(bulletText); + inline.appendChild(textNode); + + lineText.append(bulletText); + } + + for (int c = 0; c < charRuns; c++) { + CharacterRun characterRun = paragraph.getCharacterRun(c); + + String text = characterRun.text(); + if (text.getBytes().length == 0) + continue; + + if (text.getBytes()[0] == FIELD_BEGIN_MARK) { + int skipTo = tryImageWithinField(hwpfDocument, paragraph, c, + block); + + if (skipTo != c) { + c = skipTo; + continue; + } + continue; + } + if (text.getBytes()[0] == FIELD_SEPARATOR_MARK) { + continue; + } + if (text.getBytes()[0] == FIELD_END_MARK) { + continue; + } + + if (characterRun.isSpecialCharacter() || characterRun.isObj() + || characterRun.isOle2()) { + continue; + } + + Element inline = createInline(); + if (characterRun.isBold() != pBold) { + WordToFoUtils.setBold(inline, characterRun.isBold()); + } + if (characterRun.isItalic() != pItalic) { + WordToFoUtils.setItalic(inline, characterRun.isItalic()); + } + if (!WordToFoUtils.equals(characterRun.getFontName(), pFontName)) { + WordToFoUtils.setFontFamily(inline, characterRun.getFontName()); + } + if (characterRun.getFontSize() / 2 != pFontSize) { + WordToFoUtils.setFontSize(inline, + characterRun.getFontSize() / 2); + } + WordToFoUtils.setCharactersProperties(characterRun, inline); + block.appendChild(inline); + + if (text.endsWith("\r") + || (text.charAt(text.length() - 1) == BEL_MARK && currentTableLevel != 0)) + text = text.substring(0, text.length() - 1); + + Text textNode = createText(text); + inline.appendChild(textNode); + + lineText.append(text); + } + + if (lineText.toString().trim().length() == 0) { + Element leader = createLeader(); + block.appendChild(leader); + } + + return; + } + + protected void processSection(HWPFDocument hwpfDocument, Section section, + int sectionCounter) { + String regularPage = createPageMaster( + WordToFoUtils.getSectionProperties(section), "page", + sectionCounter); + + Element pageSequence = addPageSequence(regularPage); + Element flow = addFlowToPageSequence(pageSequence, "xsl-region-body"); + + processSectionParagraphes(hwpfDocument, flow, section, 0); + } + + protected void processSectionParagraphes(HWPFDocument hwpfDocument, + Element flow, Range range, int currentTableLevel) { + final Map allTables = new HashMap(); + for (TableIterator tableIterator = WordToFoUtils.newTableIterator( + range, currentTableLevel + 1); tableIterator.hasNext();) { + Table next = tableIterator.next(); + allTables.put(Integer.valueOf(next.getStartOffset()), next); + } + + final ListTables listTables = hwpfDocument.getListTables(); + int currentListInfo = 0; + + final int paragraphs = range.numParagraphs(); + for (int p = 0; p < paragraphs; p++) { + Paragraph paragraph = range.getParagraph(p); + + if (allTables.containsKey(Integer.valueOf(paragraph + .getStartOffset()))) { + Table table = allTables.get(Integer.valueOf(paragraph + .getStartOffset())); + processTable(hwpfDocument, flow, table, currentTableLevel + 1); + continue; + } + + if (paragraph.isInTable() + && paragraph.getTableLevel() != currentTableLevel) { + continue; + } + + if (paragraph.getIlfo() != currentListInfo) { + currentListInfo = paragraph.getIlfo(); + } + + if (currentListInfo != 0) { + final ListFormatOverride listFormatOverride = listTables + .getOverride(paragraph.getIlfo()); + + String label = WordToFoUtils.getBulletText(listTables, + paragraph, listFormatOverride.getLsid()); + + processParagraph(hwpfDocument, flow, currentTableLevel, + paragraph, label); + } else { + processParagraph(hwpfDocument, flow, currentTableLevel, + paragraph, WordToFoUtils.EMPTY); + } + } + + } + + protected void processTable(HWPFDocument hwpfDocument, Element flow, + Table table, int thisTableLevel) { + Element tableElement = addTable(flow); + + Element tableHeader = createTableHeader(); + Element tableBody = createTableBody(); + + final int tableRows = table.numRows(); + + int maxColumns = Integer.MIN_VALUE; + for (int r = 0; r < tableRows; r++) { + maxColumns = Math.max(maxColumns, table.getRow(r).numCells()); + } + + for (int r = 0; r < tableRows; r++) { + TableRow tableRow = table.getRow(r); + + Element tableRowElement = createTableRow(); + WordToFoUtils.setTableRowProperties(tableRow, tableRowElement); + + final int rowCells = tableRow.numCells(); + for (int c = 0; c < rowCells; c++) { + TableCell tableCell = tableRow.getCell(c); + + if (tableCell.isMerged() && !tableCell.isFirstMerged()) + continue; + + if (tableCell.isVerticallyMerged() + && !tableCell.isFirstVerticallyMerged()) + continue; + + Element tableCellElement = createTableCell(); + WordToFoUtils.setTableCellProperties(tableRow, tableCell, + tableCellElement, r == 0, r == tableRows - 1, c == 0, + c == rowCells - 1); + + if (tableCell.isFirstMerged()) { + int count = 0; + for (int c1 = c; c1 < rowCells; c1++) { + TableCell nextCell = tableRow.getCell(c1); + if (nextCell.isMerged()) + count++; + if (!nextCell.isMerged()) + break; + } + tableCellElement.setAttribute("number-columns-spanned", "" + + count); + } else { + if (c == rowCells - 1 && c != maxColumns - 1) { + tableCellElement.setAttribute("number-columns-spanned", + "" + (maxColumns - c)); + } + } + + if (tableCell.isFirstVerticallyMerged()) { + int count = 0; + for (int r1 = r; r1 < tableRows; r1++) { + TableRow nextRow = table.getRow(r1); + if (nextRow.numCells() < c) + break; + TableCell nextCell = nextRow.getCell(c); + if (nextCell.isVerticallyMerged()) + count++; + if (!nextCell.isVerticallyMerged()) + break; + } + tableCellElement.setAttribute("number-rows-spanned", "" + + count); + } + + processSectionParagraphes(hwpfDocument, tableCellElement, + tableCell, thisTableLevel); + + if (!tableCellElement.hasChildNodes()) { + tableCellElement.appendChild(createBlock()); + } + + tableRowElement.appendChild(tableCellElement); + } + + if (tableRow.isTableHeader()) { + tableHeader.appendChild(tableRowElement); + } else { + tableBody.appendChild(tableRowElement); + } + } + + if (tableHeader.hasChildNodes()) { + tableElement.appendChild(tableHeader); + } + if (tableBody.hasChildNodes()) { + tableElement.appendChild(tableBody); + } else { + System.err.println("Table without body"); + } + } + + protected int tryImageWithinField(HWPFDocument hwpfDocument, + Paragraph paragraph, int beginMark, Element currentBlock) { + int separatorMark = -1; + int pictureMark = -1; + int endMark = -1; + for (int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++) { + CharacterRun characterRun = paragraph.getCharacterRun(c); + + String text = characterRun.text(); + if (text.getBytes().length == 0) + continue; + + if (text.getBytes()[0] == FIELD_SEPARATOR_MARK) { + if (separatorMark != -1) { + // double; + return beginMark; + } + + separatorMark = c; + continue; + } + + if (text.getBytes()[0] == FIELD_END_MARK) { + if (endMark != -1) { + // double; + return beginMark; + } + + endMark = c; + break; + } + + if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) { + if (pictureMark != -1) { + // double; + return beginMark; + } + + pictureMark = c; + continue; + } + } + + if (separatorMark == -1 || pictureMark == -1 || endMark == -1) + return beginMark; + + final CharacterRun pictureRun = paragraph.getCharacterRun(pictureMark); + final Picture picture = hwpfDocument.getPicturesTable().extractPicture( + pictureRun, true); + processImage(currentBlock, picture); + + return endMark; + } + +}