View | Details | Raw Unified | Return to bug 62297
Collapse All | Expand All

(-)a/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java (-10 / +49 lines)
Lines 27-32 import javax.servlet.http.HttpSession; Link Here
27
import javax.servlet.http.HttpSessionBindingEvent;
27
import javax.servlet.http.HttpSessionBindingEvent;
28
import javax.servlet.http.HttpSessionBindingListener;
28
import javax.servlet.http.HttpSessionBindingListener;
29
29
30
import org.apache.catalina.Context;
31
import org.apache.catalina.Host;
30
import org.apache.catalina.LifecycleException;
32
import org.apache.catalina.LifecycleException;
31
import org.apache.catalina.connector.Request;
33
import org.apache.catalina.connector.Request;
32
import org.apache.catalina.connector.Response;
34
import org.apache.catalina.connector.Response;
Lines 44-51 public class CrawlerSessionManagerValve extends ValveBase implements HttpSession Link Here
44
46
45
    private static final Log log = LogFactory.getLog(CrawlerSessionManagerValve.class);
47
    private static final Log log = LogFactory.getLog(CrawlerSessionManagerValve.class);
46
48
47
    private final Map<String, String> clientIpSessionId = new ConcurrentHashMap<>();
49
    private final Map<String, String> clientIdSessionId = new ConcurrentHashMap<>();
48
    private final Map<String, String> sessionIdClientIp = new ConcurrentHashMap<>();
50
    private final Map<String, String> sessionIdClientId = new ConcurrentHashMap<>();
49
51
50
    private String crawlerUserAgents = ".*[bB]ot.*|.*Yahoo! Slurp.*|.*Feedfetcher-Google.*";
52
    private String crawlerUserAgents = ".*[bB]ot.*|.*Yahoo! Slurp.*|.*Feedfetcher-Google.*";
51
    private Pattern uaPattern = null;
53
    private Pattern uaPattern = null;
Lines 55-60 public class CrawlerSessionManagerValve extends ValveBase implements HttpSession Link Here
55
57
56
    private int sessionInactiveInterval = 60;
58
    private int sessionInactiveInterval = 60;
57
59
60
    private boolean isHostAware = true;
61
62
    private boolean isContextAware = true;
63
58
64
59
    /**
65
    /**
60
     * Specifies a default constructor so async support can be configured.
66
     * Specifies a default constructor so async support can be configured.
Lines 134-140 public class CrawlerSessionManagerValve extends ValveBase implements HttpSession Link Here
134
140
135
141
136
    public Map<String, String> getClientIpSessionId() {
142
    public Map<String, String> getClientIpSessionId() {
137
        return clientIpSessionId;
143
        return clientIdSessionId;
144
    }
145
146
147
    public boolean isHostAware() {
148
        return isHostAware;
149
    }
150
151
152
    public void setHostAware(boolean isHostAware) {
153
        this.isHostAware = isHostAware;
154
    }
155
156
157
    public boolean isContextAware() {
158
        return isContextAware;
159
    }
160
161
162
    public void setContextAware(boolean isContextAware) {
163
        this.isContextAware = isContextAware;
138
    }
164
    }
139
165
140
166
Lines 152-160 public class CrawlerSessionManagerValve extends ValveBase implements HttpSession Link Here
152
        boolean isBot = false;
178
        boolean isBot = false;
153
        String sessionId = null;
179
        String sessionId = null;
154
        String clientIp = request.getRemoteAddr();
180
        String clientIp = request.getRemoteAddr();
181
        String clientIdentifier = getClientIdentifier(request.getHost(), request.getContext(), clientIp);
155
182
156
        if (log.isDebugEnabled()) {
183
        if (log.isDebugEnabled()) {
157
            log.debug(request.hashCode() + ": ClientIp=" + clientIp + ", RequestedSessionId="
184
            log.debug(request.hashCode() + ": ClientIdentifier=" + clientIdentifier + ", RequestedSessionId="
158
                    + request.getRequestedSessionId());
185
                    + request.getRequestedSessionId());
159
        }
186
        }
160
187
Lines 194-200 public class CrawlerSessionManagerValve extends ValveBase implements HttpSession Link Here
194
221
195
            // If this is a bot, is the session ID known?
222
            // If this is a bot, is the session ID known?
196
            if (isBot) {
223
            if (isBot) {
197
                sessionId = clientIpSessionId.get(clientIp);
224
                sessionId = clientIdSessionId.get(clientIdentifier);
198
                if (sessionId != null) {
225
                if (sessionId != null) {
199
                    request.setRequestedSessionId(sessionId);
226
                    request.setRequestedSessionId(sessionId);
200
                    if (log.isDebugEnabled()) {
227
                    if (log.isDebugEnabled()) {
Lines 211-218 public class CrawlerSessionManagerValve extends ValveBase implements HttpSession Link Here
211
                // Has bot just created a session, if so make a note of it
238
                // Has bot just created a session, if so make a note of it
212
                HttpSession s = request.getSession(false);
239
                HttpSession s = request.getSession(false);
213
                if (s != null) {
240
                if (s != null) {
214
                    clientIpSessionId.put(clientIp, s.getId());
241
                    clientIdSessionId.put(clientIdentifier, s.getId());
215
                    sessionIdClientIp.put(s.getId(), clientIp);
242
                    sessionIdClientId.put(s.getId(), clientIdentifier);
216
                    // #valueUnbound() will be called on session expiration
243
                    // #valueUnbound() will be called on session expiration
217
                    s.setAttribute(this.getClass().getName(), this);
244
                    s.setAttribute(this.getClass().getName(), this);
218
                    s.setMaxInactiveInterval(sessionInactiveInterval);
245
                    s.setMaxInactiveInterval(sessionInactiveInterval);
Lines 231-241 public class CrawlerSessionManagerValve extends ValveBase implements HttpSession Link Here
231
    }
258
    }
232
259
233
260
261
    private String getClientIdentifier(Host host, Context context, String clientIp) {
262
        StringBuilder result = new StringBuilder(clientIp);
263
        if (isHostAware) {
264
            result.append('-').append(host.getName());
265
        }
266
        if (isContextAware) {
267
            result.append(context.getName());
268
        }
269
        return result.toString();
270
    }
271
272
234
    @Override
273
    @Override
235
    public void valueUnbound(HttpSessionBindingEvent event) {
274
    public void valueUnbound(HttpSessionBindingEvent event) {
236
        String clientIp = sessionIdClientIp.remove(event.getSession().getId());
275
        String clientIdentifier = sessionIdClientId.remove(event.getSession().getId());
237
        if (clientIp != null) {
276
        if (clientIdentifier != null) {
238
            clientIpSessionId.remove(clientIp);
277
            clientIdSessionId.remove(clientIdentifier);
239
        }
278
        }
240
    }
279
    }
241
}
280
}
(-)a/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java (-2 / +55 lines)
Lines 16-27 Link Here
16
 */
16
 */
17
package org.apache.catalina.valves;
17
package org.apache.catalina.valves;
18
18
19
import java.io.IOException;
20
import java.util.Arrays;
19
import java.util.Collections;
21
import java.util.Collections;
20
22
23
import javax.servlet.ServletException;
21
import javax.servlet.http.HttpSession;
24
import javax.servlet.http.HttpSession;
22
25
23
import org.junit.Test;
26
import org.junit.Test;
24
27
import org.apache.catalina.Context;
28
import org.apache.catalina.Host;
25
import org.apache.catalina.Valve;
29
import org.apache.catalina.Valve;
26
import org.apache.catalina.connector.Request;
30
import org.apache.catalina.connector.Request;
27
import org.apache.catalina.connector.Response;
31
import org.apache.catalina.connector.Response;
Lines 34-39 public class TestCrawlerSessionManagerValve { Link Here
34
    public void testCrawlerIpsPositive() throws Exception {
38
    public void testCrawlerIpsPositive() throws Exception {
35
        CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve();
39
        CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve();
36
        valve.setCrawlerIps("216\\.58\\.206\\.174");
40
        valve.setCrawlerIps("216\\.58\\.206\\.174");
41
        valve.setCrawlerUserAgents(valve.getCrawlerUserAgents());
37
        valve.setNext(EasyMock.createMock(Valve.class));
42
        valve.setNext(EasyMock.createMock(Valve.class));
38
        HttpSession session = createSessionExpectations(valve, true);
43
        HttpSession session = createSessionExpectations(valve, true);
39
        Request request = createRequestExpectations("216.58.206.174", session, true);
44
        Request request = createRequestExpectations("216.58.206.174", session, true);
Lines 49-54 public class TestCrawlerSessionManagerValve { Link Here
49
    public void testCrawlerIpsNegative() throws Exception {
54
    public void testCrawlerIpsNegative() throws Exception {
50
        CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve();
55
        CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve();
51
        valve.setCrawlerIps("216\\.58\\.206\\.174");
56
        valve.setCrawlerIps("216\\.58\\.206\\.174");
57
        valve.setCrawlerUserAgents(valve.getCrawlerUserAgents());
52
        valve.setNext(EasyMock.createMock(Valve.class));
58
        valve.setNext(EasyMock.createMock(Valve.class));
53
        HttpSession session = createSessionExpectations(valve, false);
59
        HttpSession session = createSessionExpectations(valve, false);
54
        Request request = createRequestExpectations("127.0.0.1", session, false);
60
        Request request = createRequestExpectations("127.0.0.1", session, false);
Lines 60-65 public class TestCrawlerSessionManagerValve { Link Here
60
        EasyMock.verify(request, session);
66
        EasyMock.verify(request, session);
61
    }
67
    }
62
68
69
    @Test
70
    public void testCrawlerMultipleHostsHostAware() throws Exception {
71
        CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve();
72
        valve.setCrawlerUserAgents(valve.getCrawlerUserAgents());
73
        valve.setHostAware(true);
74
        valve.setContextAware(true);
75
        valve.setNext(EasyMock.createMock(Valve.class));
76
77
        verifyCrawlingLocalhost(valve, "localhost");
78
        verifyCrawlingLocalhost(valve, "example.invalid");
79
    }
80
81
82
    private void verifyCrawlingLocalhost(CrawlerSessionManagerValve valve, String hostname)
83
            throws IOException, ServletException {
84
        HttpSession session = createSessionExpectations(valve, true);
85
        Request request = createRequestExpectations("127.0.0.1", session, true, hostname, "tomcatBot 1.0");
86
87
        EasyMock.replay(request, session);
88
89
        valve.invoke(request, EasyMock.createMock(Response.class));
90
91
        EasyMock.verify(request, session);
92
    }
93
94
63
    private HttpSession createSessionExpectations(CrawlerSessionManagerValve valve, boolean isBot) {
95
    private HttpSession createSessionExpectations(CrawlerSessionManagerValve valve, boolean isBot) {
64
        HttpSession session = EasyMock.createMock(HttpSession.class);
96
        HttpSession session = EasyMock.createMock(HttpSession.class);
65
        if (isBot) {
97
        if (isBot) {
Lines 72-86 public class TestCrawlerSessionManagerValve { Link Here
72
        return session;
104
        return session;
73
    }
105
    }
74
106
107
75
    private Request createRequestExpectations(String ip, HttpSession session, boolean isBot) {
108
    private Request createRequestExpectations(String ip, HttpSession session, boolean isBot) {
109
        return createRequestExpectations(ip, session, isBot, "localhost", "something 1.0");
110
    }
111
112
    private Request createRequestExpectations(String ip, HttpSession session, boolean isBot, String hostname, String userAgent) {
76
        Request request = EasyMock.createMock(Request.class);
113
        Request request = EasyMock.createMock(Request.class);
77
        EasyMock.expect(request.getRemoteAddr()).andReturn(ip);
114
        EasyMock.expect(request.getRemoteAddr()).andReturn(ip);
115
        EasyMock.expect(request.getHost()).andReturn(simpleHostWithName(hostname));
116
        EasyMock.expect(request.getContext()).andReturn(simpleContextWithName());
78
        IExpectationSetters<HttpSession> setter = EasyMock.expect(request.getSession(false))
117
        IExpectationSetters<HttpSession> setter = EasyMock.expect(request.getSession(false))
79
                .andReturn(null);
118
                .andReturn(null);
80
        if (isBot) {
119
        if (isBot) {
81
            setter.andReturn(session);
120
            setter.andReturn(session);
82
        }
121
        }
83
        EasyMock.expect(request.getHeaders("user-agent")).andReturn(Collections.emptyEnumeration());
122
        EasyMock.expect(request.getHeaders("user-agent")).andAnswer(() -> Collections.enumeration(Arrays.asList(userAgent)));
84
        return request;
123
        return request;
85
    }
124
    }
125
126
    private Host simpleHostWithName(String hostname) {
127
        Host host = EasyMock.createMock(Host.class);
128
        EasyMock.expect(host.getName()).andReturn(hostname);
129
        EasyMock.replay(host);
130
        return host;
131
    }
132
133
    private Context simpleContextWithName() {
134
        Context context = EasyMock.createMock(Context.class);
135
        EasyMock.expect(context.getName()).andReturn("/examples");
136
        EasyMock.replay(context);
137
        return context;
138
    }
86
}
139
}
(-)a/webapps/docs/config/valve.xml (+14 lines)
Lines 1820-1825 Link Here
1820
        </p>
1820
        </p>
1821
      </attribute>
1821
      </attribute>
1822
1822
1823
      <attribute name="contextAware" required="false">
1824
        <p>Flag to use the context name together with the client IP to
1825
        identify the session to re-use. Can be combined with <code>hostAware</code>.
1826
        Default value: <code>true</code>
1827
        </p>
1828
      </attribute>
1829
1823
      <attribute name="crawlerIps" required="false">
1830
      <attribute name="crawlerIps" required="false">
1824
        <p>Regular expression (using <code>java.util.regex</code>) that client
1831
        <p>Regular expression (using <code>java.util.regex</code>) that client
1825
        IP is matched against to determine if a request is from a web crawler.
1832
        IP is matched against to determine if a request is from a web crawler.
Lines 1833-1838 Link Here
1833
        <code>.*[bB]ot.*|.*Yahoo! Slurp.*|.*Feedfetcher-Google.*</code> is used.</p>
1840
        <code>.*[bB]ot.*|.*Yahoo! Slurp.*|.*Feedfetcher-Google.*</code> is used.</p>
1834
      </attribute>
1841
      </attribute>
1835
1842
1843
      <attribute name="hostAware" required="false">
1844
        <p>Flag to use the configured host together with the client IP to
1845
        identify the session to re-use. Can be combined with <code>contextAware</code>.
1846
        Default value: <code>true</code>
1847
        </p>
1848
      </attribute>
1849
1836
      <attribute name="sessionInactiveInterval" required="false">
1850
      <attribute name="sessionInactiveInterval" required="false">
1837
        <p>The minimum time in seconds that the Crawler Session Manager Valve
1851
        <p>The minimum time in seconds that the Crawler Session Manager Valve
1838
        should keep the mapping of client IP to session ID in memory without any
1852
        should keep the mapping of client IP to session ID in memory without any

Return to bug 62297