Lines 23-28
import java.io.UnsupportedEncodingException;
Link Here
|
23 |
import java.net.MalformedURLException; |
23 |
import java.net.MalformedURLException; |
24 |
import java.net.URL; |
24 |
import java.net.URL; |
25 |
import java.net.URLDecoder; |
25 |
import java.net.URLDecoder; |
|
|
26 |
import java.nio.charset.StandardCharsets; |
26 |
import java.util.LinkedList; |
27 |
import java.util.LinkedList; |
27 |
import java.util.List; |
28 |
import java.util.List; |
28 |
|
29 |
|
Lines 74-80
public final class HtmlParsingUtils {
Link Here
|
74 |
{ |
75 |
{ |
75 |
String query = null; |
76 |
String query = null; |
76 |
try { |
77 |
try { |
77 |
query = URLDecoder.decode(newLink.getQueryString(), "UTF-8"); // $NON-NLS-1$ |
78 |
query = URLDecoder.decode(newLink.getQueryString(), StandardCharsets.UTF_8.name()); |
78 |
} catch (UnsupportedEncodingException e) { |
79 |
} catch (UnsupportedEncodingException e) { |
79 |
// UTF-8 unsupported? You must be joking! |
80 |
// UTF-8 unsupported? You must be joking! |
80 |
log.error("UTF-8 encoding not supported!"); |
81 |
log.error("UTF-8 encoding not supported!"); |
Lines 204-211
public final class HtmlParsingUtils {
Link Here
|
204 |
public static Tidy getParser() { |
205 |
public static Tidy getParser() { |
205 |
log.debug("Start : getParser1"); |
206 |
log.debug("Start : getParser1"); |
206 |
Tidy tidy = new Tidy(); |
207 |
Tidy tidy = new Tidy(); |
207 |
tidy.setInputEncoding("UTF8"); |
208 |
tidy.setInputEncoding(StandardCharsets.UTF_8.name()); |
208 |
tidy.setOutputEncoding("UTF8"); |
209 |
tidy.setOutputEncoding(StandardCharsets.UTF_8.name()); |
209 |
tidy.setQuiet(true); |
210 |
tidy.setQuiet(true); |
210 |
tidy.setShowWarnings(false); |
211 |
tidy.setShowWarnings(false); |
211 |
|
212 |
|
Lines 228-248
public final class HtmlParsingUtils {
Link Here
|
228 |
public static Node getDOM(String text) { |
229 |
public static Node getDOM(String text) { |
229 |
log.debug("Start : getDOM1"); |
230 |
log.debug("Start : getDOM1"); |
230 |
|
231 |
|
231 |
try { |
232 |
Node node = getParser() |
232 |
Node node = getParser().parseDOM(new ByteArrayInputStream(text.getBytes("UTF-8")), null);// $NON-NLS-1$ |
233 |
.parseDOM( |
|
|
234 |
new ByteArrayInputStream( |
235 |
text.getBytes(StandardCharsets.UTF_8)), null); |
233 |
|
236 |
|
234 |
if (log.isDebugEnabled()) { |
237 |
if (log.isDebugEnabled()) { |
235 |
log.debug("node : " + node); |
238 |
log.debug("node : " + node); |
236 |
} |
239 |
} |
237 |
|
240 |
|
238 |
log.debug("End : getDOM1"); |
241 |
log.debug("End : getDOM1"); |
|
|
242 |
|
243 |
return node; |
239 |
|
244 |
|
240 |
return node; |
|
|
241 |
} catch (UnsupportedEncodingException e) { |
242 |
log.error("getDOM1 : Unsupported encoding exception - " + e); |
243 |
log.debug("End : getDOM1"); |
244 |
throw new RuntimeException("UTF-8 encoding failed", e); |
245 |
} |
246 |
} |
245 |
} |
247 |
|
246 |
|
248 |
public static Document createEmptyDoc() { |
247 |
public static Document createEmptyDoc() { |