Index: bin/jmeter.properties =================================================================== --- bin/jmeter.properties (revision 1730969) +++ bin/jmeter.properties (working copy) @@ -713,7 +713,8 @@ # Default parser: # This new parser (since 2.10) should perform better than all others # see https://bz.apache.org/bugzilla/show_bug.cgi?id=55632 -#htmlParser.className=org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser +# Do not comment this property +htmlParser.className=org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser # Other parsers: # Default parser before 2.10 Index: src/protocol/http/org/apache/jmeter/protocol/http/parser/BaseParser.java =================================================================== --- src/protocol/http/org/apache/jmeter/protocol/http/parser/BaseParser.java (revision 0) +++ src/protocol/http/org/apache/jmeter/protocol/http/parser/BaseParser.java (revision 0) @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.jmeter.protocol.http.parser; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.jorphan.logging.LoggingManager; +import org.apache.log.Logger; + +/** + * BaseParser is the base class for {@link LinkExtractorParser} + * It is advised to make subclasses reusable accross parsing, so {@link BaseParser}{@link #isReusable()} returns true by default + * @since 3.0 + */ +public abstract class BaseParser implements LinkExtractorParser { + private static final Logger log = LoggingManager.getLoggerForClass(); + // Cache of parsers - parsers must be re-usable + private static final Map parsers = new ConcurrentHashMap<>(5); + + /** + * + */ + public BaseParser() { + } + + /** + * Factory method of parsers + * @param parserClassName + * @return {@link LinkExtractorParser} + * @throws LinkExtractorParseException + */ + public static LinkExtractorParser getParser(String parserClassName) + throws LinkExtractorParseException { + + // Is there a cached parser? + LinkExtractorParser parser = parsers.get(parserClassName); + if (parser != null) { + log.debug("Fetched " + parserClassName); + return parser; + } + + try { + Object clazz = Class.forName(parserClassName).newInstance(); + if (clazz instanceof LinkExtractorParser) { + parser = (LinkExtractorParser) clazz; + } else { + throw new LinkExtractorParseException(new ClassCastException(parserClassName)); + } + } catch (InstantiationException | ClassNotFoundException + | IllegalAccessException e) { + throw new LinkExtractorParseException(e); + } + log.info("Created " + parserClassName); + if (parser.isReusable()) { + parsers.put(parserClassName, parser);// cache the parser + } + + return parser; + } + + /** + * Parsers should over-ride this method if the parser class is re-usable, in + * which case the class will be cached for the next getParser() call. + * + * @return true if the Parser is reusable + */ + @Override + public boolean isReusable() { + return true; + } + +} Index: src/protocol/http/org/apache/jmeter/protocol/http/parser/CssParser.java =================================================================== --- src/protocol/http/org/apache/jmeter/protocol/http/parser/CssParser.java (revision 0) +++ src/protocol/http/org/apache/jmeter/protocol/http/parser/CssParser.java (revision 0) @@ -0,0 +1,44 @@ +/** + * + */ +package org.apache.jmeter.protocol.http.parser; + +import java.net.URL; +import java.util.ArrayList; +import java.util.Iterator; + +/** + * + */ +public class CssParser implements LinkExtractorParser { + //private static final Pattern PATTERN = Pattern.compile("background(-image)?: url[\\s]*\\([\\s]*(?[^\)]*)\)"); + /** + * + */ + public CssParser() { + } + + /* (non-Javadoc) + * @see org.apache.jmeter.protocol.http.parser.HTMLParser#getEmbeddedResourceURLs(java.lang.String, byte[], java.net.URL, org.apache.jmeter.protocol.http.parser.URLCollection, java.lang.String) + */ + @Override + public Iterator getEmbeddedResourceURLs(String userAgent, byte[] data, + URL baseUrl, String encoding) + throws LinkExtractorParseException { + try { + String contents = new String(data,encoding); + + return new URLCollection(new ArrayList()).iterator(); + } + catch (Exception e) { + throw new HTMLParseException(e); + } + } + + @Override + public boolean isReusable() { + return true; + } + + +} Index: src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParseError.java =================================================================== --- src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParseError.java (revision 1720679) +++ src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParseError.java (working copy) @@ -24,7 +24,9 @@ * however it is now used in its own right. * * @version $Revision$ + * @deprecated */ +@Deprecated public class HTMLParseError extends Error { private static final long serialVersionUID = 240L; Index: src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParseException.java =================================================================== --- src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParseException.java (revision 1720679) +++ src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParseException.java (working copy) @@ -18,15 +18,10 @@ package org.apache.jmeter.protocol.http.parser; /** - * Exception class for use with HTMLParser classes. - * The main rationale for the class - * was to support chained Exceptions in JDK 1.3, - * however it is now used in its own right. - * - * @version $Revision$ + * Exception class for use with {@link HTMLParser} classes. */ -public class HTMLParseException extends Exception { - private static final long serialVersionUID = 240L; +public class HTMLParseException extends LinkExtractorParseException { + private static final long serialVersionUID = 241L; public HTMLParseException() { super(); @@ -43,4 +38,9 @@ public HTMLParseException(String message, Throwable cause) { super(message, cause); } + + public HTMLParseException(String message, Throwable cause, + boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } } Index: src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java =================================================================== --- src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java (revision 1731074) +++ src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java (working copy) @@ -23,21 +23,18 @@ import java.util.Collection; import java.util.Iterator; import java.util.LinkedHashSet; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; -import org.apache.jmeter.util.JMeterUtils; import org.apache.jorphan.logging.LoggingManager; import org.apache.log.Logger; /** - * HtmlParsers can parse HTML content to obtain URLs. + * {@link HTMLParser} subclasses can parse HTML content to obtain URLs. * */ -public abstract class HTMLParser { +public abstract class HTMLParser extends BaseParser { private static final Logger log = LoggingManager.getLoggerForClass(); @@ -69,9 +66,6 @@ protected static final Pattern IE_UA_PATTERN = Pattern.compile(IE_UA); private static final float IE_10 = 10.0f; - // Cache of parsers - parsers must be re-usable - private static final Map parsers = new ConcurrentHashMap<>(4); - public static final String PARSER_CLASSNAME = "htmlParser.className"; // $NON-NLS-1$ public static final String DEFAULT_PARSER = @@ -84,38 +78,6 @@ protected HTMLParser() { } - public static HTMLParser getParser() { - return getParser(JMeterUtils.getPropDefault(PARSER_CLASSNAME, DEFAULT_PARSER)); - } - - public static HTMLParser getParser(String htmlParserClassName) { - - // Is there a cached parser? - HTMLParser pars = parsers.get(htmlParserClassName); - if (pars != null) { - log.debug("Fetched " + htmlParserClassName); - return pars; - } - - try { - Object clazz = Class.forName(htmlParserClassName).newInstance(); - if (clazz instanceof HTMLParser) { - pars = (HTMLParser) clazz; - } else { - throw new HTMLParseError(new ClassCastException(htmlParserClassName)); - } - } catch (InstantiationException | ClassNotFoundException - | IllegalAccessException e) { - throw new HTMLParseError(e); - } - log.info("Created " + htmlParserClassName); - if (pars.isReusable()) { - parsers.put(htmlParserClassName, pars);// cache the parser - } - - return pars; - } - /** * Get the URLs for all the resources that a browser would automatically * download following the download of the HTML content, that is: images, @@ -137,6 +99,7 @@ * @return an Iterator for the resource URLs * @throws HTMLParseException when parsing the html fails */ + @Override public Iterator getEmbeddedResourceURLs( String userAgent, byte[] html, URL baseUrl, String encoding) throws HTMLParseException { // The Set is used to ignore duplicated binary files. @@ -216,16 +179,6 @@ String userAgent, byte[] html, URL baseUrl, Collection coll, String encoding) throws HTMLParseException { return getEmbeddedResourceURLs(userAgent, html, baseUrl, new URLCollection(coll), encoding); } - - /** - * Parsers should over-ride this method if the parser class is re-usable, in - * which case the class will be cached for the next getParser() call. - * - * @return true if the Parser is reusable - */ - protected boolean isReusable() { - return false; - } /** * Index: src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java =================================================================== --- src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java (revision 1730204) +++ src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java (working copy) @@ -57,11 +57,6 @@ log.info("Using htmlparser version: "+Parser.getVersion()); } - @Override - protected boolean isReusable() { - return true; - } - /** * {@inheritDoc} */ Index: src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java =================================================================== --- src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java (revision 1720679) +++ src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java (working copy) @@ -45,11 +45,6 @@ super(); } - @Override - protected boolean isReusable() { - return true; - } - /** * {@inheritDoc} */ Index: src/protocol/http/org/apache/jmeter/protocol/http/parser/JsoupBasedHtmlParser.java =================================================================== --- src/protocol/http/org/apache/jmeter/protocol/http/parser/JsoupBasedHtmlParser.java (revision 1730126) +++ src/protocol/http/org/apache/jmeter/protocol/http/parser/JsoupBasedHtmlParser.java (working copy) @@ -152,12 +152,4 @@ throw new HTMLParseException(e); } } - - /* (non-Javadoc) - * @see org.apache.jmeter.protocol.http.parser.HTMLParser#isReusable() - */ - @Override - protected boolean isReusable() { - return true; - } } Index: src/protocol/http/org/apache/jmeter/protocol/http/parser/LagartoBasedHtmlParser.java =================================================================== --- src/protocol/http/org/apache/jmeter/protocol/http/parser/LagartoBasedHtmlParser.java (revision 1731074) +++ src/protocol/http/org/apache/jmeter/protocol/http/parser/LagartoBasedHtmlParser.java (working copy) @@ -229,16 +229,4 @@ throw new HTMLParseException(e); } } - - - - - - /* (non-Javadoc) - * @see org.apache.jmeter.protocol.http.parser.HTMLParser#isReusable() - */ - @Override - protected boolean isReusable() { - return true; - } } Index: src/protocol/http/org/apache/jmeter/protocol/http/parser/LinkExtractorParseException.java =================================================================== --- src/protocol/http/org/apache/jmeter/protocol/http/parser/LinkExtractorParseException.java (revision 0) +++ src/protocol/http/org/apache/jmeter/protocol/http/parser/LinkExtractorParseException.java (revision 0) @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.jmeter.protocol.http.parser; + +/** + * Exception used with {@link LinkExtractorParser} + * @since 3.0 + */ +public class LinkExtractorParseException extends Exception { + + /** + * + */ + private static final long serialVersionUID = 6816968619973437826L; + + /** + * + */ + public LinkExtractorParseException() { + } + + /** + * @param message + */ + public LinkExtractorParseException(String message) { + super(message); + } + + /** + * @param cause + */ + public LinkExtractorParseException(Throwable cause) { + super(cause); + } + + /** + * @param message + * @param cause + */ + public LinkExtractorParseException(String message, Throwable cause) { + super(message, cause); + } + + /** + * @param message + * @param cause + * @param enableSuppression + * @param writableStackTrace + */ + public LinkExtractorParseException(String message, Throwable cause, + boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } +} Index: src/protocol/http/org/apache/jmeter/protocol/http/parser/LinkExtractorParser.java =================================================================== --- src/protocol/http/org/apache/jmeter/protocol/http/parser/LinkExtractorParser.java (revision 0) +++ src/protocol/http/org/apache/jmeter/protocol/http/parser/LinkExtractorParser.java (revision 0) @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.jmeter.protocol.http.parser; + +import java.net.URL; +import java.util.Iterator; + +/** + * Interface specifying contract of content parser that aims to extract links + * @since 3.0 + */ +public interface LinkExtractorParser { + + /** + * Get the URLs for all the resources that a browser would automatically + * download following the download of the content, that is: images, + * stylesheets, javascript files, applets, etc... + *

+ * URLs should not appear twice in the returned iterator. + *

+ * Malformed URLs can be reported to the caller by having the Iterator + * return the corresponding RL String. Overall problems parsing the html + * should be reported by throwing an HTMLParseException. + * @param userAgent + * User Agent + * @param responseData Response data + * @param baseUrl + * Base URL from which the HTML code was obtained + * @param encoding Charset + * @return an Iterator for the resource URLs + */ + Iterator getEmbeddedResourceURLs( + String userAgent, byte[] responseData, URL baseUrl, String encoding) + throws LinkExtractorParseException; + + boolean isReusable(); +} Index: src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java =================================================================== --- src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java (revision 1729736) +++ src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java (working copy) @@ -125,14 +125,6 @@ }; /** - * {@inheritDoc} - */ - @Override - protected boolean isReusable() { - return true; - } - - /** * Make sure to compile the regular expression upon instantiation: */ protected RegexpHTMLParser() { Index: src/protocol/http/org/apache/jmeter/protocol/http/sampler/HTTPSamplerBase.java =================================================================== --- src/protocol/http/org/apache/jmeter/protocol/http/sampler/HTTPSamplerBase.java (revision 1731242) +++ src/protocol/http/org/apache/jmeter/protocol/http/sampler/HTTPSamplerBase.java (working copy) @@ -56,8 +56,9 @@ import org.apache.jmeter.protocol.http.control.CookieManager; import org.apache.jmeter.protocol.http.control.DNSCacheManager; import org.apache.jmeter.protocol.http.control.HeaderManager; -import org.apache.jmeter.protocol.http.parser.HTMLParseException; -import org.apache.jmeter.protocol.http.parser.HTMLParser; +import org.apache.jmeter.protocol.http.parser.BaseParser; +import org.apache.jmeter.protocol.http.parser.LinkExtractorParseException; +import org.apache.jmeter.protocol.http.parser.LinkExtractorParser; import org.apache.jmeter.protocol.http.util.ConversionUtils; import org.apache.jmeter.protocol.http.util.EncoderCache; import org.apache.jmeter.protocol.http.util.HTTPArgument; @@ -303,36 +304,32 @@ // Derive the mapping of content types to parsers - private static final Map parsersForType = new HashMap<>(); + private static final Map PARSERS_FOR_CONTENT_TYPE = new HashMap<>(); // Not synch, but it is not modified after creation private static final String RESPONSE_PARSERS= // list of parsers JMeterUtils.getProperty("HTTPResponse.parsers");//$NON-NLS-1$ static{ - String []parsers = JOrphanUtils.split(RESPONSE_PARSERS, " " , true);// returns empty array for null + String[] parsers = JOrphanUtils.split(RESPONSE_PARSERS, " " , true);// returns empty array for null for (final String parser : parsers) { String classname = JMeterUtils.getProperty(parser + ".className");//$NON-NLS-1$ if (classname == null) { - log.info("Cannot find .className property for " + parser + ", using default"); - classname = ""; + log.error("Cannot find .className property for " + parser+", ensure you set property:'"+parser+".className'"); + continue; } String typelist = JMeterUtils.getProperty(parser + ".types");//$NON-NLS-1$ if (typelist != null) { String[] types = JOrphanUtils.split(typelist, " ", true); for (final String type : types) { log.info("Parser for " + type + " is " + classname); - parsersForType.put(type, classname); + PARSERS_FOR_CONTENT_TYPE.put(type, classname); } } else { - log.warn("Cannot find .types property for " + parser); + log.warn("Cannot find .types property for " + parser + + ", as a consequence parser will not be used, to make it usable, define property:'"+parser+".types'"); } } - if (parsers.length==0){ // revert to previous behaviour - parsersForType.put("text/html", ""); //$NON-NLS-1$ //$NON-NLS-2$ - log.info("No response parsers defined: text/html only will be scanned for embedded resources"); - } - } // Bug 49083 @@ -1194,19 +1191,13 @@ try { final byte[] responseData = res.getResponseData(); if (responseData.length > 0){ // Bug 39205 - String parserName = getParserClass(res); - if(parserName != null) - { - final HTMLParser parser = - parserName.length() > 0 ? // we have a name - HTMLParser.getParser(parserName) - : - HTMLParser.getParser(); // we don't; use the default parser + final LinkExtractorParser parser = getParser(res); + if(parser != null) { String userAgent = getUserAgent(res); urls = parser.getEmbeddedResourceURLs(userAgent, responseData, res.getURL(), res.getDataEncodingWithDefault()); } } - } catch (HTMLParseException e) { + } catch (LinkExtractorParseException e) { // Don't break the world just because this failed: res.addSubResult(errorResult(e, new HTTPSampleResult(res))); setParentSampleSuccess(res, false); @@ -1358,6 +1349,23 @@ } /** + * Gets parser from {@link HTTPSampleResult#getMediaType()}. + * Returns null if no parser defined for it + * @param res {@link HTTPSampleResult} + * @return {@link LinkExtractorParser} + * @throws LinkExtractorParseException + */ + private LinkExtractorParser getParser(HTTPSampleResult res) + throws LinkExtractorParseException { + String parserClassName = + PARSERS_FOR_CONTENT_TYPE.get(res.getMediaType()); + if( !StringUtils.isEmpty(parserClassName) ) { + return BaseParser.getParser(parserClassName); + } + return null; + } + + /** * @param url URL to escape * @return escaped url */ @@ -1434,15 +1442,6 @@ } } - /* - * @param res HTTPSampleResult to check - * @return parser class name (may be "") or null if entry does not exist - */ - private String getParserClass(HTTPSampleResult res) { - final String ct = res.getMediaType(); - return parsersForType.get(ct); - } - // TODO: make static? protected String encodeSpaces(String path) { return JOrphanUtils.replaceAllChars(path, ' ', "%20"); // $NON-NLS-1$ Index: test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java =================================================================== --- test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java (revision 1728418) +++ test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java (working copy) @@ -285,11 +285,11 @@ p = JMeterUtils.getProperties("jmeter.properties"); } p.setProperty(HTMLParser.PARSER_CLASSNAME, parserName); - HTMLParser.getParser(); + HTMLParser.getParser(p.getProperty(HTMLParser.PARSER_CLASSNAME)); } public void testDefaultParser() throws Exception { - HTMLParser.getParser(); + HTMLParser.getParser(JMeterUtils.getPropDefault(HTMLParser.PARSER_CLASSNAME, HTMLParser.DEFAULT_PARSER)); } public void testParserDefault() throws Exception { @@ -300,7 +300,7 @@ try { HTMLParser.getParser("no.such.parser"); fail("Should not have been able to create the parser"); - } catch (HTMLParseError e) { + } catch (LinkExtractorParseException e) { if (e.getCause() instanceof ClassNotFoundException) { // This is OK } else { @@ -313,7 +313,7 @@ try { HTMLParser.getParser("java.lang.String"); fail("Should not have been able to create the parser"); - } catch (HTMLParseError e) { + } catch (LinkExtractorParseException e) { if (e.getCause() instanceof ClassCastException) { return; } @@ -325,7 +325,7 @@ try { HTMLParser.getParser(TestClass.class.getName()); fail("Should not have been able to create the parser"); - } catch (HTMLParseError e) { + } catch (LinkExtractorParseException e) { if (e.getCause() instanceof InstantiationException) { return; } @@ -337,7 +337,7 @@ try { HTMLParser.getParser(StaticTestClass.class.getName()); fail("Should not have been able to create the parser"); - } catch (HTMLParseError e) { + } catch (LinkExtractorParseException e) { if (e.getCause() instanceof ClassCastException) { return; } @@ -349,19 +349,19 @@ } public void testParserSet() throws Exception { - HTMLParser p = HTMLParser.getParser(parserName); + HTMLParser p = (HTMLParser) BaseParser.getParser(parserName); filetest(p, TESTS[testNumber].fileName, TESTS[testNumber].baseURL, TESTS[testNumber].expectedSet, null, false, TESTS[testNumber].userAgent); } public void testParserList() throws Exception { - HTMLParser p = HTMLParser.getParser(parserName); + HTMLParser p = (HTMLParser) BaseParser.getParser(parserName); filetest(p, TESTS[testNumber].fileName, TESTS[testNumber].baseURL, TESTS[testNumber].expectedList, new Vector(), true, TESTS[testNumber].userAgent); } public void testSpecificParserList() throws Exception { - HTMLParser p = HTMLParser.getParser(parserName); + HTMLParser p = (HTMLParser) BaseParser.getParser(parserName); filetest(p, SPECIFIC_PARSER_TESTS[testNumber].fileName, SPECIFIC_PARSER_TESTS[testNumber].baseURL, SPECIFIC_PARSER_TESTS[testNumber].expectedList, new ArrayList(), true, SPECIFIC_PARSER_TESTS[testNumber].userAgent); }