ASF Bugzilla – Attachment 25597 Details for
Bug 49446
[patch] please don't insert field codes in the XWPFWordExtractor output
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
a patch
poi-fieldcodes.patch (text/plain), 2.32 KB, created by
Antoni Mylka
on 2010-06-16 08:56:02 UTC
(
hide
)
Description:
a patch
Filename:
MIME Type:
Creator:
Antoni Mylka
Created:
2010-06-16 08:56:02 UTC
Size:
2.32 KB
patch
obsolete
>Index: src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java >=================================================================== >--- src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java (revision 955183) >+++ src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java (working copy) >@@ -54,6 +54,7 @@ > import org.openxmlformats.schemas.wordprocessingml.x2006.main.STLineSpacingRule; > import org.openxmlformats.schemas.wordprocessingml.x2006.main.STOnOff; > import org.openxmlformats.schemas.wordprocessingml.x2006.main.STTextAlignment; >+import org.w3c.dom.Node; > import org.w3c.dom.NodeList; > import org.w3c.dom.Text; > >@@ -133,7 +134,13 @@ > while (c.toNextSelection()) { > XmlObject o = c.getObject(); > if (o instanceof CTText) { >- text.append(((CTText) o).getStringValue()); >+ String tagName = o.getDomNode().getNodeName(); >+ // field codes (w:instrText, defined in spec sec. 17.16.23) >+ // come up as instances of CTText, but they only >+ // pollute the output >+ if (!tagName.equals("w:instrText")) { >+ text.append(((CTText) o).getStringValue()); >+ } > } > if (o instanceof CTPTab) { > text.append("\t"); >Index: src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java >=================================================================== >--- src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java (revision 955183) >+++ src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java (working copy) >@@ -237,4 +237,17 @@ > // Now check the first paragraph in total > assertTrue(extractor.getText().contains("a\tb\n")); > } >+ >+ /** >+ * The output should not contain field codes, e.g. those specified in the >+ * w:instrText tag (spec sec. 17.16.23) >+ */ >+ public void testNoFieldCodes() { >+ XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("FieldCodes.docx"); >+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc); >+ String text = extractor.getText(); >+ assertTrue(text.length() > 0); >+ assertFalse(text.contains("AUTHOR")); >+ assertFalse(text.contains("CREATEDATE")); >+ } > }
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 49446
: 25597 |
25598