Attachment #33573 for bug #59033

View | Details | Raw Unified | Return to bug 59033
Collapse All | Expand All

Lines 713-719 Link Here

(-)bin/jmeter.properties (-1 / +2 lines)
713	# Default parser:	713	# Default parser:
714	# This new parser (since 2.10) should perform better than all others	714	# This new parser (since 2.10) should perform better than all others
715	# see https://bz.apache.org/bugzilla/show_bug.cgi?id=55632	715	# see https://bz.apache.org/bugzilla/show_bug.cgi?id=55632
716	#htmlParser.className=org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser	716	# Do not comment this property
		717	htmlParser.className=org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser
717		718
718	# Other parsers:	719	# Other parsers:
719	# Default parser before 2.10	720	# Default parser before 2.10




/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package org.apache.jmeter.protocol.http.parser;

import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.jorphan.logging.LoggingManager;
import org.apache.log.Logger;

/**
 * BaseParser is the base class for {@link LinkExtractorParser}
 * It is advised to make subclasses reusable accross parsing, so {@link BaseParser}{@link #isReusable()} returns true by default
 * @since 3.0
 */
public abstract class BaseParser implements LinkExtractorParser {
    private static final Logger log = LoggingManager.getLoggerForClass();
    // Cache of parsers - parsers must be re-usable
    private static final Map<String, LinkExtractorParser> parsers = new ConcurrentHashMap<>(5);

    /**
     * 
     */
    public BaseParser() {
    }

    /**
     * Factory method of parsers
     * @param parserClassName
     * @return {@link LinkExtractorParser}
     * @throws LinkExtractorParseException
     */
    public static LinkExtractorParser getParser(String parserClassName) 
            throws LinkExtractorParseException {

        // Is there a cached parser?
        LinkExtractorParser parser = parsers.get(parserClassName);
        if (parser != null) {
            log.debug("Fetched " + parserClassName);
            return parser;
        }

        try {
            Object clazz = Class.forName(parserClassName).newInstance();
            if (clazz instanceof LinkExtractorParser) {
                parser = (LinkExtractorParser) clazz;
            } else {
                throw new LinkExtractorParseException(new ClassCastException(parserClassName));
            }
        } catch (InstantiationException | ClassNotFoundException
                | IllegalAccessException e) {
            throw new LinkExtractorParseException(e);
        }
        log.info("Created " + parserClassName);
        if (parser.isReusable()) {
            parsers.put(parserClassName, parser);// cache the parser
        }

        return parser;
    }
    
    /**
     * Parsers should over-ride this method if the parser class is re-usable, in
     * which case the class will be cached for the next getParser() call.
     *
     * @return true if the Parser is reusable
     */
    @Override
    public boolean isReusable() {
        return true;
    }

}




/**
 * 
 */
package org.apache.jmeter.protocol.http.parser;

import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;

/**
 * 
 */
public class CssParser implements LinkExtractorParser {
    //private static final Pattern PATTERN = Pattern.compile("background(-image)?: url[\\s]*\\([\\s]*(?<url>[^\)]*)\)");
    /**
     * 
     */
    public CssParser() {
    }

    /* (non-Javadoc)
     * @see org.apache.jmeter.protocol.http.parser.HTMLParser#getEmbeddedResourceURLs(java.lang.String, byte[], java.net.URL, org.apache.jmeter.protocol.http.parser.URLCollection, java.lang.String)
     */
    @Override
    public Iterator<URL> getEmbeddedResourceURLs(String userAgent, byte[] data,
            URL baseUrl, String encoding)
            throws LinkExtractorParseException {
        try {
            String contents = new String(data,encoding);
            
            return new URLCollection(new ArrayList<URLString>()).iterator();
        }
        catch (Exception e) {
            throw new HTMLParseException(e);
        }
    }

    @Override
    public boolean isReusable() {
        return true;
    }


}

Lines 24-30 Link Here

(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParseError.java (+2 lines)
24	* however it is now used in its own right.	24	* however it is now used in its own right.
25	*	25	*
26	* @version $Revision$	26	* @version $Revision$
		27	* @deprecated
27	*/	28	*/
		29	@Deprecated
28	public class HTMLParseError extends Error {	30	public class HTMLParseError extends Error {
29	private static final long serialVersionUID = 240L;	31	private static final long serialVersionUID = 240L;
30		32




package org.apache.jmeter.protocol.http.parser;

/**
 * Exception class for use with {@link HTMLParser} classes. 
 * The main rationale for the class
 * was to support chained Exceptions in JDK 1.3,
 * however it is now used in its own right.
 *
 * @version $Revision$
 */
public class HTMLParseException extends LinkExtractorParseException {
    private static final long serialVersionUID = 241L;

    public HTMLParseException() {
        super();

    public HTMLParseException(String message, Throwable cause) {
        super(message, cause);
    }

    public HTMLParseException(String message, Throwable cause,
            boolean enableSuppression, boolean writableStackTrace) {
        super(message, cause, enableSuppression, writableStackTrace);
    }
}




import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;
import org.apache.jmeter.util.JMeterUtils;
import org.apache.jorphan.logging.LoggingManager;
import org.apache.log.Logger;

/**
 * {@link HTMLParser} subclasses can parse HTML content to obtain URLs.
 *
 */
public abstract class HTMLParser extends BaseParser {

    private static final Logger log = LoggingManager.getLoggerForClass();


    protected static final Pattern IE_UA_PATTERN    = Pattern.compile(IE_UA);
    private   static final float IE_10                = 10.0f;

    // Cache of parsers - parsers must be re-usable
    private static final Map<String, HTMLParser> parsers = new ConcurrentHashMap<>(4);

    public static final String PARSER_CLASSNAME = "htmlParser.className"; // $NON-NLS-1$

    public static final String DEFAULT_PARSER =

    protected HTMLParser() {
    }

    public static HTMLParser getParser() {
        return getParser(JMeterUtils.getPropDefault(PARSER_CLASSNAME, DEFAULT_PARSER));
    }

    public static HTMLParser getParser(String htmlParserClassName) {

        // Is there a cached parser?
        HTMLParser pars = parsers.get(htmlParserClassName);
        if (pars != null) {
            log.debug("Fetched " + htmlParserClassName);
            return pars;
        }

        try {
            Object clazz = Class.forName(htmlParserClassName).newInstance();
            if (clazz instanceof HTMLParser) {
                pars = (HTMLParser) clazz;
            } else {
                throw new HTMLParseError(new ClassCastException(htmlParserClassName));
            }
        } catch (InstantiationException | ClassNotFoundException
                | IllegalAccessException e) {
            throw new HTMLParseError(e);
        }
        log.info("Created " + htmlParserClassName);
        if (pars.isReusable()) {
            parsers.put(htmlParserClassName, pars);// cache the parser
        }

        return pars;
    }

    /**
     * Get the URLs for all the resources that a browser would automatically
     * download following the download of the HTML content, that is: images,

     * @return an Iterator for the resource URLs
     * @throws HTMLParseException when parsing the <code>html</code> fails
     */
    @Override
    public Iterator<URL> getEmbeddedResourceURLs(
            String userAgent, byte[] html, URL baseUrl, String encoding) throws HTMLParseException {
        // The Set is used to ignore duplicated binary files.

            String userAgent, byte[] html, URL baseUrl, Collection<URLString> coll, String encoding) throws HTMLParseException {
        return getEmbeddedResourceURLs(userAgent, html, baseUrl, new URLCollection(coll), encoding);
    }

    /**
     * Parsers should over-ride this method if the parser class is re-usable, in
     * which case the class will be cached for the next getParser() call.
     *
     * @return true if the Parser is reusable
     */
    protected boolean isReusable() {
        return false;
    }
    
    /**
     * 

Lines 57-67 Link Here

(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java (-5 lines)
57	log.info("Using htmlparser version: "+Parser.getVersion());	57	log.info("Using htmlparser version: "+Parser.getVersion());
58	}	58	}
59		59
60	@Override
61	protected boolean isReusable() {
62	return true;
63	}
64
65	/**	60	/**
66	* {@inheritDoc}	61	* {@inheritDoc}
67	*/	62	*/

Lines 45-55 Link Here

(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java (-5 lines)
45	super();	45	super();
46	}	46	}
47		47
48	@Override
49	protected boolean isReusable() {
50	return true;
51	}
52
53	/**	48	/**
54	* {@inheritDoc}	49	* {@inheritDoc}
55	*/	50	*/




            throw new HTMLParseException(e);
        }
    }

    /* (non-Javadoc)
     * @see org.apache.jmeter.protocol.http.parser.HTMLParser#isReusable()
     */
    @Override
    protected boolean isReusable() {
        return true;
    }
}




            throw new HTMLParseException(e);
        }
    }

    



    /* (non-Javadoc)
     * @see org.apache.jmeter.protocol.http.parser.HTMLParser#isReusable()
     */
    @Override
    protected boolean isReusable() {
        return true;
    }
}




/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package org.apache.jmeter.protocol.http.parser;

/**
 * Exception used with {@link LinkExtractorParser}
 * @since 3.0
 */
public class LinkExtractorParseException extends Exception {

    /**
     * 
     */
    private static final long serialVersionUID = 6816968619973437826L;

    /**
     * 
     */
    public LinkExtractorParseException() {
    }

    /**
     * @param message
     */
    public LinkExtractorParseException(String message) {
        super(message);
    }

    /**
     * @param cause
     */
    public LinkExtractorParseException(Throwable cause) {
        super(cause);
    }

    /**
     * @param message
     * @param cause
     */
    public LinkExtractorParseException(String message, Throwable cause) {
        super(message, cause);
    }

    /**
     * @param message
     * @param cause
     * @param enableSuppression
     * @param writableStackTrace
     */
    public LinkExtractorParseException(String message, Throwable cause,
            boolean enableSuppression, boolean writableStackTrace) {
        super(message, cause, enableSuppression, writableStackTrace);
    }
}




/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package org.apache.jmeter.protocol.http.parser;

import java.net.URL;
import java.util.Iterator;

/**
 * Interface specifying contract of content parser that aims to extract links 
 * @since 3.0
 */
public interface LinkExtractorParser {

    /**
     * Get the URLs for all the resources that a browser would automatically
     * download following the download of the content, that is: images,
     * stylesheets, javascript files, applets, etc...
     * <p>
     * URLs should not appear twice in the returned iterator.
     * <p>
     * Malformed URLs can be reported to the caller by having the Iterator
     * return the corresponding RL String. Overall problems parsing the html
     * should be reported by throwing an HTMLParseException.
     * @param userAgent
     *            User Agent
     * @param responseData Response data
     * @param baseUrl
     *            Base URL from which the HTML code was obtained
     * @param encoding Charset
     * @return an Iterator for the resource URLs
     */
    Iterator<URL> getEmbeddedResourceURLs(
            String userAgent, byte[] responseData, URL baseUrl, String encoding) 
                    throws LinkExtractorParseException;
    
    boolean isReusable();
}




    };

    /**
     * {@inheritDoc}
     */
    @Override
    protected boolean isReusable() {
        return true;
    }

    /**
     * Make sure to compile the regular expression upon instantiation:
     */
    protected RegexpHTMLParser() {




import org.apache.jmeter.protocol.http.control.CookieManager;
import org.apache.jmeter.protocol.http.control.DNSCacheManager;
import org.apache.jmeter.protocol.http.control.HeaderManager;
import org.apache.jmeter.protocol.http.parser.BaseParser;
import org.apache.jmeter.protocol.http.parser.LinkExtractorParseException;
import org.apache.jmeter.protocol.http.parser.LinkExtractorParser;
import org.apache.jmeter.protocol.http.util.ConversionUtils;
import org.apache.jmeter.protocol.http.util.EncoderCache;
import org.apache.jmeter.protocol.http.util.HTTPArgument;



    // Derive the mapping of content types to parsers
    private static final Map<String, String> PARSERS_FOR_CONTENT_TYPE = new HashMap<>();
    // Not synch, but it is not modified after creation

    private static final String RESPONSE_PARSERS= // list of parsers
        JMeterUtils.getProperty("HTTPResponse.parsers");//$NON-NLS-1$

    static{
        String[] parsers = JOrphanUtils.split(RESPONSE_PARSERS, " " , true);// returns empty array for null
        for (final String parser : parsers) {
            String classname = JMeterUtils.getProperty(parser + ".className");//$NON-NLS-1$
            if (classname == null) {
                log.error("Cannot find .className property for " + parser+", ensure you set property:'"+parser+".className'");
                continue;
            }
            String typelist = JMeterUtils.getProperty(parser + ".types");//$NON-NLS-1$
            if (typelist != null) {
                String[] types = JOrphanUtils.split(typelist, " ", true);
                for (final String type : types) {
                    log.info("Parser for " + type + " is " + classname);
                    PARSERS_FOR_CONTENT_TYPE.put(type, classname);
                }
            } else {
                log.warn("Cannot find .types property for " + parser 
                        + ", as a consequence parser will not be used, to make it usable, define property:'"+parser+".types'");
            }
        }
        if (parsers.length==0){ // revert to previous behaviour
            parsersForType.put("text/html", ""); //$NON-NLS-1$ //$NON-NLS-2$
            log.info("No response parsers defined: text/html only will be scanned for embedded resources");
        }
        
    }

    // Bug 49083

        try {
            final byte[] responseData = res.getResponseData();
            if (responseData.length > 0){  // Bug 39205
                final LinkExtractorParser parser = getParser(res);
                if(parser != null) {
                {
                    final HTMLParser parser =
                        parserName.length() > 0 ? // we have a name
                        HTMLParser.getParser(parserName)
                        :
                        HTMLParser.getParser(); // we don't; use the default parser
                    String userAgent = getUserAgent(res);
                    urls = parser.getEmbeddedResourceURLs(userAgent, responseData, res.getURL(), res.getDataEncodingWithDefault());
                }
            }
        } catch (LinkExtractorParseException e) {
            // Don't break the world just because this failed:
            res.addSubResult(errorResult(e, new HTTPSampleResult(res)));
            setParentSampleSuccess(res, false);

    }
    
    /**
     * Gets parser from {@link HTTPSampleResult#getMediaType()}.
     * Returns null if no parser defined for it
     * @param res {@link HTTPSampleResult}
     * @return {@link LinkExtractorParser}
     * @throws LinkExtractorParseException
     */
    private LinkExtractorParser getParser(HTTPSampleResult res) 
            throws LinkExtractorParseException {
        String parserClassName = 
                PARSERS_FOR_CONTENT_TYPE.get(res.getMediaType());
        if( !StringUtils.isEmpty(parserClassName) ) {
            return BaseParser.getParser(parserClassName);
        }
        return null;
    }

    /**
     * @param url URL to escape
     * @return escaped url
     */

        }
    }

    /*
     * @param res HTTPSampleResult to check
     * @return parser class name (may be "") or null if entry does not exist
     */
    private String getParserClass(HTTPSampleResult res) {
        final String ct = res.getMediaType();
        return parsersForType.get(ct);
    }

    // TODO: make static?
    protected String encodeSpaces(String path) {
        return JOrphanUtils.replaceAllChars(path, ' ', "%20"); // $NON-NLS-1$




                p = JMeterUtils.getProperties("jmeter.properties");
            }
            p.setProperty(HTMLParser.PARSER_CLASSNAME, parserName);
            HTMLParser.getParser(p.getProperty(HTMLParser.PARSER_CLASSNAME));
        }

        public void testDefaultParser() throws Exception {
            HTMLParser.getParser(JMeterUtils.getPropDefault(HTMLParser.PARSER_CLASSNAME, HTMLParser.DEFAULT_PARSER));
        }

        public void testParserDefault() throws Exception {

            try {
                HTMLParser.getParser("no.such.parser");
                fail("Should not have been able to create the parser");
            } catch (LinkExtractorParseException e) {
                if (e.getCause() instanceof ClassNotFoundException) {
                    // This is OK
                } else {

            try {
                HTMLParser.getParser("java.lang.String");
                fail("Should not have been able to create the parser");
            } catch (LinkExtractorParseException e) {
                if (e.getCause() instanceof ClassCastException) {
                    return;
                }

            try {
                HTMLParser.getParser(TestClass.class.getName());
                fail("Should not have been able to create the parser");
            } catch (LinkExtractorParseException e) {
                if (e.getCause() instanceof InstantiationException) {
                    return;
                }

            try {
                HTMLParser.getParser(StaticTestClass.class.getName());
                fail("Should not have been able to create the parser");
            } catch (LinkExtractorParseException e) {
                if (e.getCause() instanceof ClassCastException) {
                    return;
                }

        }

        public void testParserSet() throws Exception {
            HTMLParser p = (HTMLParser) BaseParser.getParser(parserName);
            filetest(p, TESTS[testNumber].fileName, TESTS[testNumber].baseURL, TESTS[testNumber].expectedSet, null,
                    false, TESTS[testNumber].userAgent);
        }

        public void testParserList() throws Exception {
            HTMLParser p = (HTMLParser) BaseParser.getParser(parserName);
            filetest(p, TESTS[testNumber].fileName, TESTS[testNumber].baseURL, TESTS[testNumber].expectedList,
                    new Vector<URLString>(), true, TESTS[testNumber].userAgent);
        }
        
        public void testSpecificParserList() throws Exception {
            HTMLParser p = (HTMLParser) BaseParser.getParser(parserName);
            filetest(p, SPECIFIC_PARSER_TESTS[testNumber].fileName, SPECIFIC_PARSER_TESTS[testNumber].baseURL, SPECIFIC_PARSER_TESTS[testNumber].expectedList,
                    new ArrayList<URLString>(), true, SPECIFIC_PARSER_TESTS[testNumber].userAgent);
        }

Return to bug 59033

Lines 152-163 Link Here

(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/JsoupBasedHtmlParser.java (-8 lines)
152	throw new HTMLParseException(e);	152	throw new HTMLParseException(e);
153	}	153	}
154	}	154	}
155
156	/* (non-Javadoc)
157	* @see org.apache.jmeter.protocol.http.parser.HTMLParser#isReusable()
158	*/
159	@Override
160	protected boolean isReusable() {
161	return true;
162	}
163	}	155	}

Lines 229-244 Link Here

(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/LagartoBasedHtmlParser.java (-12 lines)
229	throw new HTMLParseException(e);	229	throw new HTMLParseException(e);
230	}	230	}
231	}	231	}
232
233
234
235
236
237	/* (non-Javadoc)
238	* @see org.apache.jmeter.protocol.http.parser.HTMLParser#isReusable()
239	*/
240	@Override
241	protected boolean isReusable() {
242	return true;
243	}
244	}	232	}

Lines 125-138 Link Here

(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java (-8 lines)
125	};	125	};
126		126
127	/**	127	/**
128	* {@inheritDoc}
129	*/
130	@Override
131	protected boolean isReusable() {
132	return true;
133	}
134
135	/**
136	* Make sure to compile the regular expression upon instantiation:	128	* Make sure to compile the regular expression upon instantiation:
137	*/	129	*/
138	protected RegexpHTMLParser() {	130	protected RegexpHTMLParser() {

Lines 56-63 Link Here

(-)src/protocol/http/org/apache/jmeter/protocol/http/sampler/HTTPSamplerBase.java (-31 / +30 lines)
56	import org.apache.jmeter.protocol.http.control.CookieManager;	56	import org.apache.jmeter.protocol.http.control.CookieManager;
57	import org.apache.jmeter.protocol.http.control.DNSCacheManager;	57	import org.apache.jmeter.protocol.http.control.DNSCacheManager;
58	import org.apache.jmeter.protocol.http.control.HeaderManager;	58	import org.apache.jmeter.protocol.http.control.HeaderManager;
59	import org.apache.jmeter.protocol.http.parser.HTMLParseException;	59	import org.apache.jmeter.protocol.http.parser.BaseParser;
60	import org.apache.jmeter.protocol.http.parser.HTMLParser;	60	import org.apache.jmeter.protocol.http.parser.LinkExtractorParseException;
		61	import org.apache.jmeter.protocol.http.parser.LinkExtractorParser;
61	import org.apache.jmeter.protocol.http.util.ConversionUtils;	62	import org.apache.jmeter.protocol.http.util.ConversionUtils;
62	import org.apache.jmeter.protocol.http.util.EncoderCache;	63	import org.apache.jmeter.protocol.http.util.EncoderCache;
63	import org.apache.jmeter.protocol.http.util.HTTPArgument;	64	import org.apache.jmeter.protocol.http.util.HTTPArgument;
Lines 303-338 Link Here
303		304
304		305
305	// Derive the mapping of content types to parsers	306	// Derive the mapping of content types to parsers
306	private static final Map<String, String> parsersForType = new HashMap<>();	307	private static final Map<String, String> PARSERS_FOR_CONTENT_TYPE = new HashMap<>();
307	// Not synch, but it is not modified after creation	308	// Not synch, but it is not modified after creation
308		309
309	private static final String RESPONSE_PARSERS= // list of parsers	310	private static final String RESPONSE_PARSERS= // list of parsers
310	JMeterUtils.getProperty("HTTPResponse.parsers");//$NON-NLS-1$	311	JMeterUtils.getProperty("HTTPResponse.parsers");//$NON-NLS-1$
311		312
312	static{	313	static{
313	String []parsers = JOrphanUtils.split(RESPONSE_PARSERS, " " , true);// returns empty array for null	314	String[] parsers = JOrphanUtils.split(RESPONSE_PARSERS, " " , true);// returns empty array for null
314	for (final String parser : parsers) {	315	for (final String parser : parsers) {
315	String classname = JMeterUtils.getProperty(parser + ".className");//$NON-NLS-1$	316	String classname = JMeterUtils.getProperty(parser + ".className");//$NON-NLS-1$
316	if (classname == null) {	317	if (classname == null) {
317	log.info("Cannot find .className property for " + parser + ", using default");	318	log.error("Cannot find .className property for " + parser+", ensure you set property:'"+parser+".className'");
318	classname = "";	319	continue;
319	}	320	}
320	String typelist = JMeterUtils.getProperty(parser + ".types");//$NON-NLS-1$	321	String typelist = JMeterUtils.getProperty(parser + ".types");//$NON-NLS-1$
321	if (typelist != null) {	322	if (typelist != null) {
322	String[] types = JOrphanUtils.split(typelist, " ", true);	323	String[] types = JOrphanUtils.split(typelist, " ", true);
323	for (final String type : types) {	324	for (final String type : types) {
324	log.info("Parser for " + type + " is " + classname);	325	log.info("Parser for " + type + " is " + classname);
325	parsersForType.put(type, classname);	326	PARSERS_FOR_CONTENT_TYPE.put(type, classname);
326	}	327	}
327	} else {	328	} else {
328	log.warn("Cannot find .types property for " + parser);	329	log.warn("Cannot find .types property for " + parser
		330	+ ", as a consequence parser will not be used, to make it usable, define property:'"+parser+".types'");
329	}	331	}
330	}	332	}
331	if (parsers.length==0){ // revert to previous behaviour
332	parsersForType.put("text/html", ""); //$NON-NLS-1$ //$NON-NLS-2$
333	log.info("No response parsers defined: text/html only will be scanned for embedded resources");
334	}
335
336	}	333	}
337		334
338	// Bug 49083	335	// Bug 49083
Lines 1194-1212 Link Here
1194	try {	1191	try {
1195	final byte[] responseData = res.getResponseData();	1192	final byte[] responseData = res.getResponseData();
1196	if (responseData.length > 0){ // Bug 39205	1193	if (responseData.length > 0){ // Bug 39205
1197	String parserName = getParserClass(res);	1194	final LinkExtractorParser parser = getParser(res);
1198	if(parserName != null)	1195	if(parser != null) {
1199	{
1200	final HTMLParser parser =
1201	parserName.length() > 0 ? // we have a name
1202	HTMLParser.getParser(parserName)
1203	:
1204	HTMLParser.getParser(); // we don't; use the default parser
1205	String userAgent = getUserAgent(res);	1196	String userAgent = getUserAgent(res);
1206	urls = parser.getEmbeddedResourceURLs(userAgent, responseData, res.getURL(), res.getDataEncodingWithDefault());	1197	urls = parser.getEmbeddedResourceURLs(userAgent, responseData, res.getURL(), res.getDataEncodingWithDefault());
1207	}	1198	}
1208	}	1199	}
1209	} catch (HTMLParseException e) {	1200	} catch (LinkExtractorParseException e) {
1210	// Don't break the world just because this failed:	1201	// Don't break the world just because this failed:
1211	res.addSubResult(errorResult(e, new HTTPSampleResult(res)));	1202	res.addSubResult(errorResult(e, new HTTPSampleResult(res)));
1212	setParentSampleSuccess(res, false);	1203	setParentSampleSuccess(res, false);
Lines 1358-1363 Link Here
1358	}	1349	}
1359		1350
1360	/**	1351	/**
		1352	* Gets parser from {@link HTTPSampleResult#getMediaType()}.
		1353	* Returns null if no parser defined for it
		1354	* @param res {@link HTTPSampleResult}
		1355	* @return {@link LinkExtractorParser}
		1356	* @throws LinkExtractorParseException
		1357	*/
		1358	private LinkExtractorParser getParser(HTTPSampleResult res)
		1359	throws LinkExtractorParseException {
		1360	String parserClassName =
		1361	PARSERS_FOR_CONTENT_TYPE.get(res.getMediaType());
		1362	if( !StringUtils.isEmpty(parserClassName) ) {
		1363	return BaseParser.getParser(parserClassName);
		1364	}
		1365	return null;
		1366	}
		1367
		1368	/**
1361	* @param url URL to escape	1369	* @param url URL to escape
1362	* @return escaped url	1370	* @return escaped url
1363	*/	1371	*/
Lines 1434-1448 Link Here
1434	}	1442	}
1435	}	1443	}
1436		1444
1437	/*
1438	* @param res HTTPSampleResult to check
1439	* @return parser class name (may be "") or null if entry does not exist
1440	*/
1441	private String getParserClass(HTTPSampleResult res) {
1442	final String ct = res.getMediaType();
1443	return parsersForType.get(ct);
1444	}
1445
1446	// TODO: make static?	1445	// TODO: make static?
1447	protected String encodeSpaces(String path) {	1446	protected String encodeSpaces(String path) {
1448	return JOrphanUtils.replaceAllChars(path, ' ', "%20"); // $NON-NLS-1$	1447	return JOrphanUtils.replaceAllChars(path, ' ', "%20"); // $NON-NLS-1$

Lines 285-295 Link Here

(-)test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java (-9 / +9 lines)
285	p = JMeterUtils.getProperties("jmeter.properties");	285	p = JMeterUtils.getProperties("jmeter.properties");
286	}	286	}
287	p.setProperty(HTMLParser.PARSER_CLASSNAME, parserName);	287	p.setProperty(HTMLParser.PARSER_CLASSNAME, parserName);
288	HTMLParser.getParser();	288	HTMLParser.getParser(p.getProperty(HTMLParser.PARSER_CLASSNAME));
289	}	289	}
290		290
291	public void testDefaultParser() throws Exception {	291	public void testDefaultParser() throws Exception {
292	HTMLParser.getParser();	292	HTMLParser.getParser(JMeterUtils.getPropDefault(HTMLParser.PARSER_CLASSNAME, HTMLParser.DEFAULT_PARSER));
293	}	293	}
294		294
295	public void testParserDefault() throws Exception {	295	public void testParserDefault() throws Exception {
Lines 300-306 Link Here
300	try {	300	try {
301	HTMLParser.getParser("no.such.parser");	301	HTMLParser.getParser("no.such.parser");
302	fail("Should not have been able to create the parser");	302	fail("Should not have been able to create the parser");
303	} catch (HTMLParseError e) {	303	} catch (LinkExtractorParseException e) {
304	if (e.getCause() instanceof ClassNotFoundException) {	304	if (e.getCause() instanceof ClassNotFoundException) {
305	// This is OK	305	// This is OK
306	} else {	306	} else {
Lines 313-319 Link Here
313	try {	313	try {
314	HTMLParser.getParser("java.lang.String");	314	HTMLParser.getParser("java.lang.String");
315	fail("Should not have been able to create the parser");	315	fail("Should not have been able to create the parser");
316	} catch (HTMLParseError e) {	316	} catch (LinkExtractorParseException e) {
317	if (e.getCause() instanceof ClassCastException) {	317	if (e.getCause() instanceof ClassCastException) {
318	return;	318	return;
319	}	319	}
Lines 325-331 Link Here
325	try {	325	try {
326	HTMLParser.getParser(TestClass.class.getName());	326	HTMLParser.getParser(TestClass.class.getName());
327	fail("Should not have been able to create the parser");	327	fail("Should not have been able to create the parser");
328	} catch (HTMLParseError e) {	328	} catch (LinkExtractorParseException e) {
329	if (e.getCause() instanceof InstantiationException) {	329	if (e.getCause() instanceof InstantiationException) {
330	return;	330	return;
331	}	331	}
Lines 337-343 Link Here
337	try {	337	try {
338	HTMLParser.getParser(StaticTestClass.class.getName());	338	HTMLParser.getParser(StaticTestClass.class.getName());
339	fail("Should not have been able to create the parser");	339	fail("Should not have been able to create the parser");
340	} catch (HTMLParseError e) {	340	} catch (LinkExtractorParseException e) {
341	if (e.getCause() instanceof ClassCastException) {	341	if (e.getCause() instanceof ClassCastException) {
342	return;	342	return;
343	}	343	}
Lines 349-367 Link Here
349	}	349	}
350		350
351	public void testParserSet() throws Exception {	351	public void testParserSet() throws Exception {
352	HTMLParser p = HTMLParser.getParser(parserName);	352	HTMLParser p = (HTMLParser) BaseParser.getParser(parserName);
353	filetest(p, TESTS[testNumber].fileName, TESTS[testNumber].baseURL, TESTS[testNumber].expectedSet, null,	353	filetest(p, TESTS[testNumber].fileName, TESTS[testNumber].baseURL, TESTS[testNumber].expectedSet, null,
354	false, TESTS[testNumber].userAgent);	354	false, TESTS[testNumber].userAgent);
355	}	355	}
356		356
357	public void testParserList() throws Exception {	357	public void testParserList() throws Exception {
358	HTMLParser p = HTMLParser.getParser(parserName);	358	HTMLParser p = (HTMLParser) BaseParser.getParser(parserName);
359	filetest(p, TESTS[testNumber].fileName, TESTS[testNumber].baseURL, TESTS[testNumber].expectedList,	359	filetest(p, TESTS[testNumber].fileName, TESTS[testNumber].baseURL, TESTS[testNumber].expectedList,
360	new Vector<URLString>(), true, TESTS[testNumber].userAgent);	360	new Vector<URLString>(), true, TESTS[testNumber].userAgent);
361	}	361	}
362		362
363	public void testSpecificParserList() throws Exception {	363	public void testSpecificParserList() throws Exception {
364	HTMLParser p = HTMLParser.getParser(parserName);	364	HTMLParser p = (HTMLParser) BaseParser.getParser(parserName);
365	filetest(p, SPECIFIC_PARSER_TESTS[testNumber].fileName, SPECIFIC_PARSER_TESTS[testNumber].baseURL, SPECIFIC_PARSER_TESTS[testNumber].expectedList,	365	filetest(p, SPECIFIC_PARSER_TESTS[testNumber].fileName, SPECIFIC_PARSER_TESTS[testNumber].baseURL, SPECIFIC_PARSER_TESTS[testNumber].expectedList,
366	new ArrayList<URLString>(), true, SPECIFIC_PARSER_TESTS[testNumber].userAgent);	366	new ArrayList<URLString>(), true, SPECIFIC_PARSER_TESTS[testNumber].userAgent);
367	}	367	}

Line 0 Link Here

(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/BaseParser.java (+89 lines)
		1	/*
		2	* Licensed to the Apache Software Foundation (ASF) under one or more
		3	* contributor license agreements. See the NOTICE file distributed with
		4	* this work for additional information regarding copyright ownership.
		5	* The ASF licenses this file to You under the Apache License, Version 2.0
		6	* (the "License"); you may not use this file except in compliance with
		7	* the License. You may obtain a copy of the License at
		8	*
		9	* http://www.apache.org/licenses/LICENSE-2.0
		10	*
		11	* Unless required by applicable law or agreed to in writing, software
		12	* distributed under the License is distributed on an "AS IS" BASIS,
		13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		14	* See the License for the specific language governing permissions and
		15	* limitations under the License.
		16	*
		17	*/
		18
		19	package org.apache.jmeter.protocol.http.parser;
		20
		21	import java.util.Map;
		22	import java.util.concurrent.ConcurrentHashMap;
		23
		24	import org.apache.jorphan.logging.LoggingManager;
		25	import org.apache.log.Logger;
		26
		27	/**
		28	* BaseParser is the base class for {@link LinkExtractorParser}
		29	* It is advised to make subclasses reusable accross parsing, so {@link BaseParser}{@link #isReusable()} returns true by default
		30	* @since 3.0
		31	*/
		32	public abstract class BaseParser implements LinkExtractorParser {
		33	private static final Logger log = LoggingManager.getLoggerForClass();
		34	// Cache of parsers - parsers must be re-usable
		35	private static final Map<String, LinkExtractorParser> parsers = new ConcurrentHashMap<>(5);
		36
		37	/**
		38	*
		39	*/
		40	public BaseParser() {
		41	}
		42
		43	/**
		44	* Factory method of parsers
		45	* @param parserClassName
		46	* @return {@link LinkExtractorParser}
		47	* @throws LinkExtractorParseException
		48	*/
		49	public static LinkExtractorParser getParser(String parserClassName)
		50	throws LinkExtractorParseException {
		51
		52	// Is there a cached parser?
		53	LinkExtractorParser parser = parsers.get(parserClassName);
		54	if (parser != null) {
		55	log.debug("Fetched " + parserClassName);
		56	return parser;
		57	}
		58
		59	try {
		60	Object clazz = Class.forName(parserClassName).newInstance();
		61	if (clazz instanceof LinkExtractorParser) {
		62	parser = (LinkExtractorParser) clazz;
		63	} else {
		64	throw new LinkExtractorParseException(new ClassCastException(parserClassName));
65	}
66	} catch (InstantiationException \| ClassNotFoundException
67	\| IllegalAccessException e) {
68	throw new LinkExtractorParseException(e);
69	}
70	log.info("Created " + parserClassName);
71	if (parser.isReusable()) {
72	parsers.put(parserClassName, parser);// cache the parser
73	}
74
75	return parser;
76	}
77
78	/**
79	* Parsers should over-ride this method if the parser class is re-usable, in
80	* which case the class will be cached for the next getParser() call.
81	*
82	* @return true if the Parser is reusable
83	*/
84	@Override
85	public boolean isReusable() {
86	return true;
87	}
88
89	}

Line 0 Link Here

(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/CssParser.java (+44 lines)
	1	/**
	2	*
	3	*/
	4	package org.apache.jmeter.protocol.http.parser;
	5
	6	import java.net.URL;
	7	import java.util.ArrayList;
	8	import java.util.Iterator;
	9
	10	/**
	11	*
	12	*/
	13	public class CssParser implements LinkExtractorParser {
	14	//private static final Pattern PATTERN = Pattern.compile("background(-image)?: url[\\s]\\([\\s](?<url>[^\)]*)\)");
	15	/**
	16	*
	17	*/
	18	public CssParser() {
	19	}
	20
	21	/* (non-Javadoc)
	22	* @see org.apache.jmeter.protocol.http.parser.HTMLParser#getEmbeddedResourceURLs(java.lang.String, byte[], java.net.URL, org.apache.jmeter.protocol.http.parser.URLCollection, java.lang.String)
	23	*/
	24	@Override
	25	public Iterator<URL> getEmbeddedResourceURLs(String userAgent, byte[] data,
	26	URL baseUrl, String encoding)
	27	throws LinkExtractorParseException {
	28	try {
	29	String contents = new String(data,encoding);
	30
	31	return new URLCollection(new ArrayList<URLString>()).iterator();
	32	}
	33	catch (Exception e) {
	34	throw new HTMLParseException(e);
	35	}
	36	}
	37
	38	@Override
	39	public boolean isReusable() {
	40	return true;
	41	}
	42
	43
	44	}

Lines 23-43 Link Here

(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java (-50 / +3 lines)
23	import java.util.Collection;	23	import java.util.Collection;
24	import java.util.Iterator;	24	import java.util.Iterator;
25	import java.util.LinkedHashSet;	25	import java.util.LinkedHashSet;
26	import java.util.Map;
27	import java.util.concurrent.ConcurrentHashMap;
28	import java.util.regex.Matcher;	26	import java.util.regex.Matcher;
29	import java.util.regex.Pattern;	27	import java.util.regex.Pattern;
30		28
31	import org.apache.commons.lang3.StringUtils;	29	import org.apache.commons.lang3.StringUtils;
32	import org.apache.jmeter.util.JMeterUtils;
33	import org.apache.jorphan.logging.LoggingManager;	30	import org.apache.jorphan.logging.LoggingManager;
34	import org.apache.log.Logger;	31	import org.apache.log.Logger;
35		32
36	/**	33	/**
37	* HtmlParsers can parse HTML content to obtain URLs.	34	* {@link HTMLParser} subclasses can parse HTML content to obtain URLs.
38	*	35	*
39	*/	36	*/
40	public abstract class HTMLParser {	37	public abstract class HTMLParser extends BaseParser {
41		38
42	private static final Logger log = LoggingManager.getLoggerForClass();	39	private static final Logger log = LoggingManager.getLoggerForClass();
43		40
Lines 69-77 Link Here
69	protected static final Pattern IE_UA_PATTERN = Pattern.compile(IE_UA);	66	protected static final Pattern IE_UA_PATTERN = Pattern.compile(IE_UA);
70	private static final float IE_10 = 10.0f;	67	private static final float IE_10 = 10.0f;
71		68
72	// Cache of parsers - parsers must be re-usable
73	private static final Map<String, HTMLParser> parsers = new ConcurrentHashMap<>(4);
74
75	public static final String PARSER_CLASSNAME = "htmlParser.className"; // $NON-NLS-1$	69	public static final String PARSER_CLASSNAME = "htmlParser.className"; // $NON-NLS-1$
76		70
77	public static final String DEFAULT_PARSER =	71	public static final String DEFAULT_PARSER =
Lines 84-121 Link Here
84	protected HTMLParser() {	78	protected HTMLParser() {
85	}	79	}
86		80
87	public static HTMLParser getParser() {
88	return getParser(JMeterUtils.getPropDefault(PARSER_CLASSNAME, DEFAULT_PARSER));
89	}
90
91	public static HTMLParser getParser(String htmlParserClassName) {
92
93	// Is there a cached parser?
94	HTMLParser pars = parsers.get(htmlParserClassName);
95	if (pars != null) {
96	log.debug("Fetched " + htmlParserClassName);
97	return pars;
98	}
99
100	try {
101	Object clazz = Class.forName(htmlParserClassName).newInstance();
102	if (clazz instanceof HTMLParser) {
103	pars = (HTMLParser) clazz;
104	} else {
105	throw new HTMLParseError(new ClassCastException(htmlParserClassName));
106	}
107	} catch (InstantiationException \| ClassNotFoundException
108	\| IllegalAccessException e) {
109	throw new HTMLParseError(e);
110	}
111	log.info("Created " + htmlParserClassName);
112	if (pars.isReusable()) {
113	parsers.put(htmlParserClassName, pars);// cache the parser
114	}
115
116	return pars;
117	}
118
119	/**	81	/**
120	* Get the URLs for all the resources that a browser would automatically	82	* Get the URLs for all the resources that a browser would automatically
121	* download following the download of the HTML content, that is: images,	83	* download following the download of the HTML content, that is: images,
Lines 137-142 Link Here
137	* @return an Iterator for the resource URLs	99	* @return an Iterator for the resource URLs
138	* @throws HTMLParseException when parsing the <code>html</code> fails	100	* @throws HTMLParseException when parsing the <code>html</code> fails
139	*/	101	*/
		102	@Override
140	public Iterator<URL> getEmbeddedResourceURLs(	103	public Iterator<URL> getEmbeddedResourceURLs(
141	String userAgent, byte[] html, URL baseUrl, String encoding) throws HTMLParseException {	104	String userAgent, byte[] html, URL baseUrl, String encoding) throws HTMLParseException {
142	// The Set is used to ignore duplicated binary files.	105	// The Set is used to ignore duplicated binary files.
Lines 216-231 Link Here
216	String userAgent, byte[] html, URL baseUrl, Collection<URLString> coll, String encoding) throws HTMLParseException {	179	String userAgent, byte[] html, URL baseUrl, Collection<URLString> coll, String encoding) throws HTMLParseException {
217	return getEmbeddedResourceURLs(userAgent, html, baseUrl, new URLCollection(coll), encoding);	180	return getEmbeddedResourceURLs(userAgent, html, baseUrl, new URLCollection(coll), encoding);
218	}	181	}
219
220	/**
221	* Parsers should over-ride this method if the parser class is re-usable, in
222	* which case the class will be cached for the next getParser() call.
223	*
224	* @return true if the Parser is reusable
225	*/
226	protected boolean isReusable() {
227	return false;
228	}
229		182
230	/**	183	/**
231	*	184	*