diff --git a/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java b/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java
index e3f12ff..a268d4b 100644
--- a/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java
+++ b/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java
@@ -27,6 +27,8 @@ import javax.servlet.http.HttpSession;
import javax.servlet.http.HttpSessionBindingEvent;
import javax.servlet.http.HttpSessionBindingListener;
+import org.apache.catalina.Context;
+import org.apache.catalina.Host;
import org.apache.catalina.LifecycleException;
import org.apache.catalina.connector.Request;
import org.apache.catalina.connector.Response;
@@ -44,8 +46,8 @@ public class CrawlerSessionManagerValve extends ValveBase implements HttpSession
private static final Log log = LogFactory.getLog(CrawlerSessionManagerValve.class);
- private final Map clientIpSessionId = new ConcurrentHashMap<>();
- private final Map sessionIdClientIp = new ConcurrentHashMap<>();
+ private final Map clientIdSessionId = new ConcurrentHashMap<>();
+ private final Map sessionIdClientId = new ConcurrentHashMap<>();
private String crawlerUserAgents = ".*[bB]ot.*|.*Yahoo! Slurp.*|.*Feedfetcher-Google.*";
private Pattern uaPattern = null;
@@ -55,6 +57,10 @@ public class CrawlerSessionManagerValve extends ValveBase implements HttpSession
private int sessionInactiveInterval = 60;
+ private boolean isHostAware = true;
+
+ private boolean isContextAware = true;
+
/**
* Specifies a default constructor so async support can be configured.
@@ -134,7 +140,27 @@ public class CrawlerSessionManagerValve extends ValveBase implements HttpSession
public Map getClientIpSessionId() {
- return clientIpSessionId;
+ return clientIdSessionId;
+ }
+
+
+ public boolean isHostAware() {
+ return isHostAware;
+ }
+
+
+ public void setHostAware(boolean isHostAware) {
+ this.isHostAware = isHostAware;
+ }
+
+
+ public boolean isContextAware() {
+ return isContextAware;
+ }
+
+
+ public void setContextAware(boolean isContextAware) {
+ this.isContextAware = isContextAware;
}
@@ -152,9 +178,10 @@ public class CrawlerSessionManagerValve extends ValveBase implements HttpSession
boolean isBot = false;
String sessionId = null;
String clientIp = request.getRemoteAddr();
+ String clientIdentifier = getClientIdentifier(request.getHost(), request.getContext(), clientIp);
if (log.isDebugEnabled()) {
- log.debug(request.hashCode() + ": ClientIp=" + clientIp + ", RequestedSessionId="
+ log.debug(request.hashCode() + ": ClientIdentifier=" + clientIdentifier + ", RequestedSessionId="
+ request.getRequestedSessionId());
}
@@ -194,7 +221,7 @@ public class CrawlerSessionManagerValve extends ValveBase implements HttpSession
// If this is a bot, is the session ID known?
if (isBot) {
- sessionId = clientIpSessionId.get(clientIp);
+ sessionId = clientIdSessionId.get(clientIdentifier);
if (sessionId != null) {
request.setRequestedSessionId(sessionId);
if (log.isDebugEnabled()) {
@@ -211,8 +238,8 @@ public class CrawlerSessionManagerValve extends ValveBase implements HttpSession
// Has bot just created a session, if so make a note of it
HttpSession s = request.getSession(false);
if (s != null) {
- clientIpSessionId.put(clientIp, s.getId());
- sessionIdClientIp.put(s.getId(), clientIp);
+ clientIdSessionId.put(clientIdentifier, s.getId());
+ sessionIdClientId.put(s.getId(), clientIdentifier);
// #valueUnbound() will be called on session expiration
s.setAttribute(this.getClass().getName(), this);
s.setMaxInactiveInterval(sessionInactiveInterval);
@@ -231,11 +258,23 @@ public class CrawlerSessionManagerValve extends ValveBase implements HttpSession
}
+ private String getClientIdentifier(Host host, Context context, String clientIp) {
+ StringBuilder result = new StringBuilder(clientIp);
+ if (isHostAware) {
+ result.append('-').append(host.getName());
+ }
+ if (isContextAware) {
+ result.append(context.getName());
+ }
+ return result.toString();
+ }
+
+
@Override
public void valueUnbound(HttpSessionBindingEvent event) {
- String clientIp = sessionIdClientIp.remove(event.getSession().getId());
- if (clientIp != null) {
- clientIpSessionId.remove(clientIp);
+ String clientIdentifier = sessionIdClientId.remove(event.getSession().getId());
+ if (clientIdentifier != null) {
+ clientIdSessionId.remove(clientIdentifier);
}
}
}
diff --git a/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java b/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java
index edc562f..3a3e883 100644
--- a/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java
+++ b/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java
@@ -16,12 +16,16 @@
*/
package org.apache.catalina.valves;
+import java.io.IOException;
+import java.util.Arrays;
import java.util.Collections;
+import javax.servlet.ServletException;
import javax.servlet.http.HttpSession;
import org.junit.Test;
-
+import org.apache.catalina.Context;
+import org.apache.catalina.Host;
import org.apache.catalina.Valve;
import org.apache.catalina.connector.Request;
import org.apache.catalina.connector.Response;
@@ -34,6 +38,7 @@ public class TestCrawlerSessionManagerValve {
public void testCrawlerIpsPositive() throws Exception {
CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve();
valve.setCrawlerIps("216\\.58\\.206\\.174");
+ valve.setCrawlerUserAgents(valve.getCrawlerUserAgents());
valve.setNext(EasyMock.createMock(Valve.class));
HttpSession session = createSessionExpectations(valve, true);
Request request = createRequestExpectations("216.58.206.174", session, true);
@@ -49,6 +54,7 @@ public class TestCrawlerSessionManagerValve {
public void testCrawlerIpsNegative() throws Exception {
CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve();
valve.setCrawlerIps("216\\.58\\.206\\.174");
+ valve.setCrawlerUserAgents(valve.getCrawlerUserAgents());
valve.setNext(EasyMock.createMock(Valve.class));
HttpSession session = createSessionExpectations(valve, false);
Request request = createRequestExpectations("127.0.0.1", session, false);
@@ -60,6 +66,32 @@ public class TestCrawlerSessionManagerValve {
EasyMock.verify(request, session);
}
+ @Test
+ public void testCrawlerMultipleHostsHostAware() throws Exception {
+ CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve();
+ valve.setCrawlerUserAgents(valve.getCrawlerUserAgents());
+ valve.setHostAware(true);
+ valve.setContextAware(true);
+ valve.setNext(EasyMock.createMock(Valve.class));
+
+ verifyCrawlingLocalhost(valve, "localhost");
+ verifyCrawlingLocalhost(valve, "example.invalid");
+ }
+
+
+ private void verifyCrawlingLocalhost(CrawlerSessionManagerValve valve, String hostname)
+ throws IOException, ServletException {
+ HttpSession session = createSessionExpectations(valve, true);
+ Request request = createRequestExpectations("127.0.0.1", session, true, hostname, "tomcatBot 1.0");
+
+ EasyMock.replay(request, session);
+
+ valve.invoke(request, EasyMock.createMock(Response.class));
+
+ EasyMock.verify(request, session);
+ }
+
+
private HttpSession createSessionExpectations(CrawlerSessionManagerValve valve, boolean isBot) {
HttpSession session = EasyMock.createMock(HttpSession.class);
if (isBot) {
@@ -72,15 +104,36 @@ public class TestCrawlerSessionManagerValve {
return session;
}
+
private Request createRequestExpectations(String ip, HttpSession session, boolean isBot) {
+ return createRequestExpectations(ip, session, isBot, "localhost", "something 1.0");
+ }
+
+ private Request createRequestExpectations(String ip, HttpSession session, boolean isBot, String hostname, String userAgent) {
Request request = EasyMock.createMock(Request.class);
EasyMock.expect(request.getRemoteAddr()).andReturn(ip);
+ EasyMock.expect(request.getHost()).andReturn(simpleHostWithName(hostname));
+ EasyMock.expect(request.getContext()).andReturn(simpleContextWithName());
IExpectationSetters setter = EasyMock.expect(request.getSession(false))
.andReturn(null);
if (isBot) {
setter.andReturn(session);
}
- EasyMock.expect(request.getHeaders("user-agent")).andReturn(Collections.emptyEnumeration());
+ EasyMock.expect(request.getHeaders("user-agent")).andAnswer(() -> Collections.enumeration(Arrays.asList(userAgent)));
return request;
}
+
+ private Host simpleHostWithName(String hostname) {
+ Host host = EasyMock.createMock(Host.class);
+ EasyMock.expect(host.getName()).andReturn(hostname);
+ EasyMock.replay(host);
+ return host;
+ }
+
+ private Context simpleContextWithName() {
+ Context context = EasyMock.createMock(Context.class);
+ EasyMock.expect(context.getName()).andReturn("/examples");
+ EasyMock.replay(context);
+ return context;
+ }
}
diff --git a/webapps/docs/config/valve.xml b/webapps/docs/config/valve.xml
index 97b0679..bde7d7d 100644
--- a/webapps/docs/config/valve.xml
+++ b/webapps/docs/config/valve.xml
@@ -1820,6 +1820,13 @@
+
+ Flag to use the context name together with the client IP to
+ identify the session to re-use. Can be combined with hostAware
.
+ Default value: true
+
+
+
Regular expression (using java.util.regex
) that client
IP is matched against to determine if a request is from a web crawler.
@@ -1833,6 +1840,13 @@
.*[bB]ot.*|.*Yahoo! Slurp.*|.*Feedfetcher-Google.*
is used.
+
+ Flag to use the configured host together with the client IP to
+ identify the session to re-use. Can be combined with contextAware
.
+ Default value: true
+
+
+
The minimum time in seconds that the Crawler Session Manager Valve
should keep the mapping of client IP to session ID in memory without any