View | Details | Raw Unified | Return to bug 59033
Collapse All | Expand All

(-)bin/jmeter.properties (-1 / +2 lines)
Lines 713-719 Link Here
713
# Default parser:
713
# Default parser:
714
# This new parser (since 2.10) should perform better than all others
714
# This new parser (since 2.10) should perform better than all others
715
# see https://bz.apache.org/bugzilla/show_bug.cgi?id=55632
715
# see https://bz.apache.org/bugzilla/show_bug.cgi?id=55632
716
#htmlParser.className=org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser
716
# Do not comment this property
717
htmlParser.className=org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser
717
718
718
# Other parsers:
719
# Other parsers:
719
# Default parser before 2.10
720
# Default parser before 2.10
(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/BaseParser.java (+89 lines)
Line 0 Link Here
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *   http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 */
18
19
package org.apache.jmeter.protocol.http.parser;
20
21
import java.util.Map;
22
import java.util.concurrent.ConcurrentHashMap;
23
24
import org.apache.jorphan.logging.LoggingManager;
25
import org.apache.log.Logger;
26
27
/**
28
 * BaseParser is the base class for {@link LinkExtractorParser}
29
 * It is advised to make subclasses reusable accross parsing, so {@link BaseParser}{@link #isReusable()} returns true by default
30
 * @since 3.0
31
 */
32
public abstract class BaseParser implements LinkExtractorParser {
33
    private static final Logger log = LoggingManager.getLoggerForClass();
34
    // Cache of parsers - parsers must be re-usable
35
    private static final Map<String, LinkExtractorParser> parsers = new ConcurrentHashMap<>(5);
36
37
    /**
38
     * 
39
     */
40
    public BaseParser() {
41
    }
42
43
    /**
44
     * Factory method of parsers
45
     * @param parserClassName
46
     * @return {@link LinkExtractorParser}
47
     * @throws LinkExtractorParseException
48
     */
49
    public static LinkExtractorParser getParser(String parserClassName) 
50
            throws LinkExtractorParseException {
51
52
        // Is there a cached parser?
53
        LinkExtractorParser parser = parsers.get(parserClassName);
54
        if (parser != null) {
55
            log.debug("Fetched " + parserClassName);
56
            return parser;
57
        }
58
59
        try {
60
            Object clazz = Class.forName(parserClassName).newInstance();
61
            if (clazz instanceof LinkExtractorParser) {
62
                parser = (LinkExtractorParser) clazz;
63
            } else {
64
                throw new LinkExtractorParseException(new ClassCastException(parserClassName));
65
            }
66
        } catch (InstantiationException | ClassNotFoundException
67
                | IllegalAccessException e) {
68
            throw new LinkExtractorParseException(e);
69
        }
70
        log.info("Created " + parserClassName);
71
        if (parser.isReusable()) {
72
            parsers.put(parserClassName, parser);// cache the parser
73
        }
74
75
        return parser;
76
    }
77
    
78
    /**
79
     * Parsers should over-ride this method if the parser class is re-usable, in
80
     * which case the class will be cached for the next getParser() call.
81
     *
82
     * @return true if the Parser is reusable
83
     */
84
    @Override
85
    public boolean isReusable() {
86
        return true;
87
    }
88
89
}
(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/CssParser.java (+44 lines)
Line 0 Link Here
1
/**
2
 * 
3
 */
4
package org.apache.jmeter.protocol.http.parser;
5
6
import java.net.URL;
7
import java.util.ArrayList;
8
import java.util.Iterator;
9
10
/**
11
 * 
12
 */
13
public class CssParser implements LinkExtractorParser {
14
    //private static final Pattern PATTERN = Pattern.compile("background(-image)?: url[\\s]*\\([\\s]*(?<url>[^\)]*)\)");
15
    /**
16
     * 
17
     */
18
    public CssParser() {
19
    }
20
21
    /* (non-Javadoc)
22
     * @see org.apache.jmeter.protocol.http.parser.HTMLParser#getEmbeddedResourceURLs(java.lang.String, byte[], java.net.URL, org.apache.jmeter.protocol.http.parser.URLCollection, java.lang.String)
23
     */
24
    @Override
25
    public Iterator<URL> getEmbeddedResourceURLs(String userAgent, byte[] data,
26
            URL baseUrl, String encoding)
27
            throws LinkExtractorParseException {
28
        try {
29
            String contents = new String(data,encoding);
30
            
31
            return new URLCollection(new ArrayList<URLString>()).iterator();
32
        }
33
        catch (Exception e) {
34
            throw new HTMLParseException(e);
35
        }
36
    }
37
38
    @Override
39
    public boolean isReusable() {
40
        return true;
41
    }
42
43
44
}
(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParseError.java (+2 lines)
Lines 24-30 Link Here
24
 * however it is now used in its own right.
24
 * however it is now used in its own right.
25
 *
25
 *
26
 * @version $Revision$
26
 * @version $Revision$
27
 * @deprecated
27
 */
28
 */
29
@Deprecated
28
public class HTMLParseError extends Error {
30
public class HTMLParseError extends Error {
29
    private static final long serialVersionUID = 240L;
31
    private static final long serialVersionUID = 240L;
30
32
(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParseException.java (-8 / +8 lines)
Lines 18-32 Link Here
18
package org.apache.jmeter.protocol.http.parser;
18
package org.apache.jmeter.protocol.http.parser;
19
19
20
/**
20
/**
21
 * Exception class for use with HTMLParser classes. 
21
 * Exception class for use with {@link HTMLParser} classes. 
22
 * The main rationale for the class
23
 * was to support chained Exceptions in JDK 1.3,
24
 * however it is now used in its own right.
25
 *
26
 * @version $Revision$
27
 */
22
 */
28
public class HTMLParseException extends Exception {
23
public class HTMLParseException extends LinkExtractorParseException {
29
    private static final long serialVersionUID = 240L;
24
    private static final long serialVersionUID = 241L;
30
25
31
    public HTMLParseException() {
26
    public HTMLParseException() {
32
        super();
27
        super();
Lines 43-46 Link Here
43
    public HTMLParseException(String message, Throwable cause) {
38
    public HTMLParseException(String message, Throwable cause) {
44
        super(message, cause);
39
        super(message, cause);
45
    }
40
    }
41
42
    public HTMLParseException(String message, Throwable cause,
43
            boolean enableSuppression, boolean writableStackTrace) {
44
        super(message, cause, enableSuppression, writableStackTrace);
45
    }
46
}
46
}
(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java (-50 / +3 lines)
Lines 23-43 Link Here
23
import java.util.Collection;
23
import java.util.Collection;
24
import java.util.Iterator;
24
import java.util.Iterator;
25
import java.util.LinkedHashSet;
25
import java.util.LinkedHashSet;
26
import java.util.Map;
27
import java.util.concurrent.ConcurrentHashMap;
28
import java.util.regex.Matcher;
26
import java.util.regex.Matcher;
29
import java.util.regex.Pattern;
27
import java.util.regex.Pattern;
30
28
31
import org.apache.commons.lang3.StringUtils;
29
import org.apache.commons.lang3.StringUtils;
32
import org.apache.jmeter.util.JMeterUtils;
33
import org.apache.jorphan.logging.LoggingManager;
30
import org.apache.jorphan.logging.LoggingManager;
34
import org.apache.log.Logger;
31
import org.apache.log.Logger;
35
32
36
/**
33
/**
37
 * HtmlParsers can parse HTML content to obtain URLs.
34
 * {@link HTMLParser} subclasses can parse HTML content to obtain URLs.
38
 *
35
 *
39
 */
36
 */
40
public abstract class HTMLParser {
37
public abstract class HTMLParser extends BaseParser {
41
38
42
    private static final Logger log = LoggingManager.getLoggerForClass();
39
    private static final Logger log = LoggingManager.getLoggerForClass();
43
40
Lines 69-77 Link Here
69
    protected static final Pattern IE_UA_PATTERN    = Pattern.compile(IE_UA);
66
    protected static final Pattern IE_UA_PATTERN    = Pattern.compile(IE_UA);
70
    private   static final float IE_10                = 10.0f;
67
    private   static final float IE_10                = 10.0f;
71
68
72
    // Cache of parsers - parsers must be re-usable
73
    private static final Map<String, HTMLParser> parsers = new ConcurrentHashMap<>(4);
74
75
    public static final String PARSER_CLASSNAME = "htmlParser.className"; // $NON-NLS-1$
69
    public static final String PARSER_CLASSNAME = "htmlParser.className"; // $NON-NLS-1$
76
70
77
    public static final String DEFAULT_PARSER =
71
    public static final String DEFAULT_PARSER =
Lines 84-121 Link Here
84
    protected HTMLParser() {
78
    protected HTMLParser() {
85
    }
79
    }
86
80
87
    public static HTMLParser getParser() {
88
        return getParser(JMeterUtils.getPropDefault(PARSER_CLASSNAME, DEFAULT_PARSER));
89
    }
90
91
    public static HTMLParser getParser(String htmlParserClassName) {
92
93
        // Is there a cached parser?
94
        HTMLParser pars = parsers.get(htmlParserClassName);
95
        if (pars != null) {
96
            log.debug("Fetched " + htmlParserClassName);
97
            return pars;
98
        }
99
100
        try {
101
            Object clazz = Class.forName(htmlParserClassName).newInstance();
102
            if (clazz instanceof HTMLParser) {
103
                pars = (HTMLParser) clazz;
104
            } else {
105
                throw new HTMLParseError(new ClassCastException(htmlParserClassName));
106
            }
107
        } catch (InstantiationException | ClassNotFoundException
108
                | IllegalAccessException e) {
109
            throw new HTMLParseError(e);
110
        }
111
        log.info("Created " + htmlParserClassName);
112
        if (pars.isReusable()) {
113
            parsers.put(htmlParserClassName, pars);// cache the parser
114
        }
115
116
        return pars;
117
    }
118
119
    /**
81
    /**
120
     * Get the URLs for all the resources that a browser would automatically
82
     * Get the URLs for all the resources that a browser would automatically
121
     * download following the download of the HTML content, that is: images,
83
     * download following the download of the HTML content, that is: images,
Lines 137-142 Link Here
137
     * @return an Iterator for the resource URLs
99
     * @return an Iterator for the resource URLs
138
     * @throws HTMLParseException when parsing the <code>html</code> fails
100
     * @throws HTMLParseException when parsing the <code>html</code> fails
139
     */
101
     */
102
    @Override
140
    public Iterator<URL> getEmbeddedResourceURLs(
103
    public Iterator<URL> getEmbeddedResourceURLs(
141
            String userAgent, byte[] html, URL baseUrl, String encoding) throws HTMLParseException {
104
            String userAgent, byte[] html, URL baseUrl, String encoding) throws HTMLParseException {
142
        // The Set is used to ignore duplicated binary files.
105
        // The Set is used to ignore duplicated binary files.
Lines 216-231 Link Here
216
            String userAgent, byte[] html, URL baseUrl, Collection<URLString> coll, String encoding) throws HTMLParseException {
179
            String userAgent, byte[] html, URL baseUrl, Collection<URLString> coll, String encoding) throws HTMLParseException {
217
        return getEmbeddedResourceURLs(userAgent, html, baseUrl, new URLCollection(coll), encoding);
180
        return getEmbeddedResourceURLs(userAgent, html, baseUrl, new URLCollection(coll), encoding);
218
    }
181
    }
219
220
    /**
221
     * Parsers should over-ride this method if the parser class is re-usable, in
222
     * which case the class will be cached for the next getParser() call.
223
     *
224
     * @return true if the Parser is reusable
225
     */
226
    protected boolean isReusable() {
227
        return false;
228
    }
229
    
182
    
230
    /**
183
    /**
231
     * 
184
     * 
(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java (-5 lines)
Lines 57-67 Link Here
57
        log.info("Using htmlparser version: "+Parser.getVersion());
57
        log.info("Using htmlparser version: "+Parser.getVersion());
58
    }
58
    }
59
59
60
    @Override
61
    protected boolean isReusable() {
62
        return true;
63
    }
64
65
    /**
60
    /**
66
     * {@inheritDoc}
61
     * {@inheritDoc}
67
     */
62
     */
(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java (-5 lines)
Lines 45-55 Link Here
45
        super();
45
        super();
46
    }
46
    }
47
47
48
    @Override
49
    protected boolean isReusable() {
50
        return true;
51
    }
52
53
    /**
48
    /**
54
     * {@inheritDoc}
49
     * {@inheritDoc}
55
     */
50
     */
(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/JsoupBasedHtmlParser.java (-8 lines)
Lines 152-163 Link Here
152
            throw new HTMLParseException(e);
152
            throw new HTMLParseException(e);
153
        }
153
        }
154
    }
154
    }
155
156
    /* (non-Javadoc)
157
     * @see org.apache.jmeter.protocol.http.parser.HTMLParser#isReusable()
158
     */
159
    @Override
160
    protected boolean isReusable() {
161
        return true;
162
    }
163
}
155
}
(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/LagartoBasedHtmlParser.java (-12 lines)
Lines 229-244 Link Here
229
            throw new HTMLParseException(e);
229
            throw new HTMLParseException(e);
230
        }
230
        }
231
    }
231
    }
232
233
    
234
235
236
237
    /* (non-Javadoc)
238
     * @see org.apache.jmeter.protocol.http.parser.HTMLParser#isReusable()
239
     */
240
    @Override
241
    protected boolean isReusable() {
242
        return true;
243
    }
244
}
232
}
(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/LinkExtractorParseException.java (+70 lines)
Line 0 Link Here
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *   http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 */
18
19
package org.apache.jmeter.protocol.http.parser;
20
21
/**
22
 * Exception used with {@link LinkExtractorParser}
23
 * @since 3.0
24
 */
25
public class LinkExtractorParseException extends Exception {
26
27
    /**
28
     * 
29
     */
30
    private static final long serialVersionUID = 6816968619973437826L;
31
32
    /**
33
     * 
34
     */
35
    public LinkExtractorParseException() {
36
    }
37
38
    /**
39
     * @param message
40
     */
41
    public LinkExtractorParseException(String message) {
42
        super(message);
43
    }
44
45
    /**
46
     * @param cause
47
     */
48
    public LinkExtractorParseException(Throwable cause) {
49
        super(cause);
50
    }
51
52
    /**
53
     * @param message
54
     * @param cause
55
     */
56
    public LinkExtractorParseException(String message, Throwable cause) {
57
        super(message, cause);
58
    }
59
60
    /**
61
     * @param message
62
     * @param cause
63
     * @param enableSuppression
64
     * @param writableStackTrace
65
     */
66
    public LinkExtractorParseException(String message, Throwable cause,
67
            boolean enableSuppression, boolean writableStackTrace) {
68
        super(message, cause, enableSuppression, writableStackTrace);
69
    }
70
}
(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/LinkExtractorParser.java (+53 lines)
Line 0 Link Here
1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one or more
3
 * contributor license agreements.  See the NOTICE file distributed with
4
 * this work for additional information regarding copyright ownership.
5
 * The ASF licenses this file to You under the Apache License, Version 2.0
6
 * (the "License"); you may not use this file except in compliance with
7
 * the License.  You may obtain a copy of the License at
8
 *
9
 *   http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 */
18
19
package org.apache.jmeter.protocol.http.parser;
20
21
import java.net.URL;
22
import java.util.Iterator;
23
24
/**
25
 * Interface specifying contract of content parser that aims to extract links 
26
 * @since 3.0
27
 */
28
public interface LinkExtractorParser {
29
30
    /**
31
     * Get the URLs for all the resources that a browser would automatically
32
     * download following the download of the content, that is: images,
33
     * stylesheets, javascript files, applets, etc...
34
     * <p>
35
     * URLs should not appear twice in the returned iterator.
36
     * <p>
37
     * Malformed URLs can be reported to the caller by having the Iterator
38
     * return the corresponding RL String. Overall problems parsing the html
39
     * should be reported by throwing an HTMLParseException.
40
     * @param userAgent
41
     *            User Agent
42
     * @param responseData Response data
43
     * @param baseUrl
44
     *            Base URL from which the HTML code was obtained
45
     * @param encoding Charset
46
     * @return an Iterator for the resource URLs
47
     */
48
    Iterator<URL> getEmbeddedResourceURLs(
49
            String userAgent, byte[] responseData, URL baseUrl, String encoding) 
50
                    throws LinkExtractorParseException;
51
    
52
    boolean isReusable();
53
}
(-)src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java (-8 lines)
Lines 125-138 Link Here
125
    };
125
    };
126
126
127
    /**
127
    /**
128
     * {@inheritDoc}
129
     */
130
    @Override
131
    protected boolean isReusable() {
132
        return true;
133
    }
134
135
    /**
136
     * Make sure to compile the regular expression upon instantiation:
128
     * Make sure to compile the regular expression upon instantiation:
137
     */
129
     */
138
    protected RegexpHTMLParser() {
130
    protected RegexpHTMLParser() {
(-)src/protocol/http/org/apache/jmeter/protocol/http/sampler/HTTPSamplerBase.java (-31 / +30 lines)
Lines 56-63 Link Here
56
import org.apache.jmeter.protocol.http.control.CookieManager;
56
import org.apache.jmeter.protocol.http.control.CookieManager;
57
import org.apache.jmeter.protocol.http.control.DNSCacheManager;
57
import org.apache.jmeter.protocol.http.control.DNSCacheManager;
58
import org.apache.jmeter.protocol.http.control.HeaderManager;
58
import org.apache.jmeter.protocol.http.control.HeaderManager;
59
import org.apache.jmeter.protocol.http.parser.HTMLParseException;
59
import org.apache.jmeter.protocol.http.parser.BaseParser;
60
import org.apache.jmeter.protocol.http.parser.HTMLParser;
60
import org.apache.jmeter.protocol.http.parser.LinkExtractorParseException;
61
import org.apache.jmeter.protocol.http.parser.LinkExtractorParser;
61
import org.apache.jmeter.protocol.http.util.ConversionUtils;
62
import org.apache.jmeter.protocol.http.util.ConversionUtils;
62
import org.apache.jmeter.protocol.http.util.EncoderCache;
63
import org.apache.jmeter.protocol.http.util.EncoderCache;
63
import org.apache.jmeter.protocol.http.util.HTTPArgument;
64
import org.apache.jmeter.protocol.http.util.HTTPArgument;
Lines 303-338 Link Here
303
304
304
305
305
    // Derive the mapping of content types to parsers
306
    // Derive the mapping of content types to parsers
306
    private static final Map<String, String> parsersForType = new HashMap<>();
307
    private static final Map<String, String> PARSERS_FOR_CONTENT_TYPE = new HashMap<>();
307
    // Not synch, but it is not modified after creation
308
    // Not synch, but it is not modified after creation
308
309
309
    private static final String RESPONSE_PARSERS= // list of parsers
310
    private static final String RESPONSE_PARSERS= // list of parsers
310
        JMeterUtils.getProperty("HTTPResponse.parsers");//$NON-NLS-1$
311
        JMeterUtils.getProperty("HTTPResponse.parsers");//$NON-NLS-1$
311
312
312
    static{
313
    static{
313
        String []parsers = JOrphanUtils.split(RESPONSE_PARSERS, " " , true);// returns empty array for null
314
        String[] parsers = JOrphanUtils.split(RESPONSE_PARSERS, " " , true);// returns empty array for null
314
        for (final String parser : parsers) {
315
        for (final String parser : parsers) {
315
            String classname = JMeterUtils.getProperty(parser + ".className");//$NON-NLS-1$
316
            String classname = JMeterUtils.getProperty(parser + ".className");//$NON-NLS-1$
316
            if (classname == null) {
317
            if (classname == null) {
317
                log.info("Cannot find .className property for " + parser + ", using default");
318
                log.error("Cannot find .className property for " + parser+", ensure you set property:'"+parser+".className'");
318
                classname = "";
319
                continue;
319
            }
320
            }
320
            String typelist = JMeterUtils.getProperty(parser + ".types");//$NON-NLS-1$
321
            String typelist = JMeterUtils.getProperty(parser + ".types");//$NON-NLS-1$
321
            if (typelist != null) {
322
            if (typelist != null) {
322
                String[] types = JOrphanUtils.split(typelist, " ", true);
323
                String[] types = JOrphanUtils.split(typelist, " ", true);
323
                for (final String type : types) {
324
                for (final String type : types) {
324
                    log.info("Parser for " + type + " is " + classname);
325
                    log.info("Parser for " + type + " is " + classname);
325
                    parsersForType.put(type, classname);
326
                    PARSERS_FOR_CONTENT_TYPE.put(type, classname);
326
                }
327
                }
327
            } else {
328
            } else {
328
                log.warn("Cannot find .types property for " + parser);
329
                log.warn("Cannot find .types property for " + parser 
330
                        + ", as a consequence parser will not be used, to make it usable, define property:'"+parser+".types'");
329
            }
331
            }
330
        }
332
        }
331
        if (parsers.length==0){ // revert to previous behaviour
332
            parsersForType.put("text/html", ""); //$NON-NLS-1$ //$NON-NLS-2$
333
            log.info("No response parsers defined: text/html only will be scanned for embedded resources");
334
        }
335
        
336
    }
333
    }
337
334
338
    // Bug 49083
335
    // Bug 49083
Lines 1194-1212 Link Here
1194
        try {
1191
        try {
1195
            final byte[] responseData = res.getResponseData();
1192
            final byte[] responseData = res.getResponseData();
1196
            if (responseData.length > 0){  // Bug 39205
1193
            if (responseData.length > 0){  // Bug 39205
1197
                String parserName = getParserClass(res);
1194
                final LinkExtractorParser parser = getParser(res);
1198
                if(parserName != null)
1195
                if(parser != null) {
1199
                {
1200
                    final HTMLParser parser =
1201
                        parserName.length() > 0 ? // we have a name
1202
                        HTMLParser.getParser(parserName)
1203
                        :
1204
                        HTMLParser.getParser(); // we don't; use the default parser
1205
                    String userAgent = getUserAgent(res);
1196
                    String userAgent = getUserAgent(res);
1206
                    urls = parser.getEmbeddedResourceURLs(userAgent, responseData, res.getURL(), res.getDataEncodingWithDefault());
1197
                    urls = parser.getEmbeddedResourceURLs(userAgent, responseData, res.getURL(), res.getDataEncodingWithDefault());
1207
                }
1198
                }
1208
            }
1199
            }
1209
        } catch (HTMLParseException e) {
1200
        } catch (LinkExtractorParseException e) {
1210
            // Don't break the world just because this failed:
1201
            // Don't break the world just because this failed:
1211
            res.addSubResult(errorResult(e, new HTTPSampleResult(res)));
1202
            res.addSubResult(errorResult(e, new HTTPSampleResult(res)));
1212
            setParentSampleSuccess(res, false);
1203
            setParentSampleSuccess(res, false);
Lines 1358-1363 Link Here
1358
    }
1349
    }
1359
    
1350
    
1360
    /**
1351
    /**
1352
     * Gets parser from {@link HTTPSampleResult#getMediaType()}.
1353
     * Returns null if no parser defined for it
1354
     * @param res {@link HTTPSampleResult}
1355
     * @return {@link LinkExtractorParser}
1356
     * @throws LinkExtractorParseException
1357
     */
1358
    private LinkExtractorParser getParser(HTTPSampleResult res) 
1359
            throws LinkExtractorParseException {
1360
        String parserClassName = 
1361
                PARSERS_FOR_CONTENT_TYPE.get(res.getMediaType());
1362
        if( !StringUtils.isEmpty(parserClassName) ) {
1363
            return BaseParser.getParser(parserClassName);
1364
        }
1365
        return null;
1366
    }
1367
1368
    /**
1361
     * @param url URL to escape
1369
     * @param url URL to escape
1362
     * @return escaped url
1370
     * @return escaped url
1363
     */
1371
     */
Lines 1434-1448 Link Here
1434
        }
1442
        }
1435
    }
1443
    }
1436
1444
1437
    /*
1438
     * @param res HTTPSampleResult to check
1439
     * @return parser class name (may be "") or null if entry does not exist
1440
     */
1441
    private String getParserClass(HTTPSampleResult res) {
1442
        final String ct = res.getMediaType();
1443
        return parsersForType.get(ct);
1444
    }
1445
1446
    // TODO: make static?
1445
    // TODO: make static?
1447
    protected String encodeSpaces(String path) {
1446
    protected String encodeSpaces(String path) {
1448
        return JOrphanUtils.replaceAllChars(path, ' ', "%20"); // $NON-NLS-1$
1447
        return JOrphanUtils.replaceAllChars(path, ' ', "%20"); // $NON-NLS-1$
(-)test/src/org/apache/jmeter/protocol/http/parser/TestHTMLParser.java (-9 / +9 lines)
Lines 285-295 Link Here
285
                p = JMeterUtils.getProperties("jmeter.properties");
285
                p = JMeterUtils.getProperties("jmeter.properties");
286
            }
286
            }
287
            p.setProperty(HTMLParser.PARSER_CLASSNAME, parserName);
287
            p.setProperty(HTMLParser.PARSER_CLASSNAME, parserName);
288
            HTMLParser.getParser();
288
            HTMLParser.getParser(p.getProperty(HTMLParser.PARSER_CLASSNAME));
289
        }
289
        }
290
290
291
        public void testDefaultParser() throws Exception {
291
        public void testDefaultParser() throws Exception {
292
            HTMLParser.getParser();
292
            HTMLParser.getParser(JMeterUtils.getPropDefault(HTMLParser.PARSER_CLASSNAME, HTMLParser.DEFAULT_PARSER));
293
        }
293
        }
294
294
295
        public void testParserDefault() throws Exception {
295
        public void testParserDefault() throws Exception {
Lines 300-306 Link Here
300
            try {
300
            try {
301
                HTMLParser.getParser("no.such.parser");
301
                HTMLParser.getParser("no.such.parser");
302
                fail("Should not have been able to create the parser");
302
                fail("Should not have been able to create the parser");
303
            } catch (HTMLParseError e) {
303
            } catch (LinkExtractorParseException e) {
304
                if (e.getCause() instanceof ClassNotFoundException) {
304
                if (e.getCause() instanceof ClassNotFoundException) {
305
                    // This is OK
305
                    // This is OK
306
                } else {
306
                } else {
Lines 313-319 Link Here
313
            try {
313
            try {
314
                HTMLParser.getParser("java.lang.String");
314
                HTMLParser.getParser("java.lang.String");
315
                fail("Should not have been able to create the parser");
315
                fail("Should not have been able to create the parser");
316
            } catch (HTMLParseError e) {
316
            } catch (LinkExtractorParseException e) {
317
                if (e.getCause() instanceof ClassCastException) {
317
                if (e.getCause() instanceof ClassCastException) {
318
                    return;
318
                    return;
319
                }
319
                }
Lines 325-331 Link Here
325
            try {
325
            try {
326
                HTMLParser.getParser(TestClass.class.getName());
326
                HTMLParser.getParser(TestClass.class.getName());
327
                fail("Should not have been able to create the parser");
327
                fail("Should not have been able to create the parser");
328
            } catch (HTMLParseError e) {
328
            } catch (LinkExtractorParseException e) {
329
                if (e.getCause() instanceof InstantiationException) {
329
                if (e.getCause() instanceof InstantiationException) {
330
                    return;
330
                    return;
331
                }
331
                }
Lines 337-343 Link Here
337
            try {
337
            try {
338
                HTMLParser.getParser(StaticTestClass.class.getName());
338
                HTMLParser.getParser(StaticTestClass.class.getName());
339
                fail("Should not have been able to create the parser");
339
                fail("Should not have been able to create the parser");
340
            } catch (HTMLParseError e) {
340
            } catch (LinkExtractorParseException e) {
341
                if (e.getCause() instanceof ClassCastException) {
341
                if (e.getCause() instanceof ClassCastException) {
342
                    return;
342
                    return;
343
                }
343
                }
Lines 349-367 Link Here
349
        }
349
        }
350
350
351
        public void testParserSet() throws Exception {
351
        public void testParserSet() throws Exception {
352
            HTMLParser p = HTMLParser.getParser(parserName);
352
            HTMLParser p = (HTMLParser) BaseParser.getParser(parserName);
353
            filetest(p, TESTS[testNumber].fileName, TESTS[testNumber].baseURL, TESTS[testNumber].expectedSet, null,
353
            filetest(p, TESTS[testNumber].fileName, TESTS[testNumber].baseURL, TESTS[testNumber].expectedSet, null,
354
                    false, TESTS[testNumber].userAgent);
354
                    false, TESTS[testNumber].userAgent);
355
        }
355
        }
356
356
357
        public void testParserList() throws Exception {
357
        public void testParserList() throws Exception {
358
            HTMLParser p = HTMLParser.getParser(parserName);
358
            HTMLParser p = (HTMLParser) BaseParser.getParser(parserName);
359
            filetest(p, TESTS[testNumber].fileName, TESTS[testNumber].baseURL, TESTS[testNumber].expectedList,
359
            filetest(p, TESTS[testNumber].fileName, TESTS[testNumber].baseURL, TESTS[testNumber].expectedList,
360
                    new Vector<URLString>(), true, TESTS[testNumber].userAgent);
360
                    new Vector<URLString>(), true, TESTS[testNumber].userAgent);
361
        }
361
        }
362
        
362
        
363
        public void testSpecificParserList() throws Exception {
363
        public void testSpecificParserList() throws Exception {
364
            HTMLParser p = HTMLParser.getParser(parserName);
364
            HTMLParser p = (HTMLParser) BaseParser.getParser(parserName);
365
            filetest(p, SPECIFIC_PARSER_TESTS[testNumber].fileName, SPECIFIC_PARSER_TESTS[testNumber].baseURL, SPECIFIC_PARSER_TESTS[testNumber].expectedList,
365
            filetest(p, SPECIFIC_PARSER_TESTS[testNumber].fileName, SPECIFIC_PARSER_TESTS[testNumber].baseURL, SPECIFIC_PARSER_TESTS[testNumber].expectedList,
366
                    new ArrayList<URLString>(), true, SPECIFIC_PARSER_TESTS[testNumber].userAgent);
366
                    new ArrayList<URLString>(), true, SPECIFIC_PARSER_TESTS[testNumber].userAgent);
367
        }
367
        }

Return to bug 59033