ASF Bugzilla – Attachment 35867 Details for
Bug 62297
Add support for multiple hosts/contexts in CrawlerSessionManagerValve
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Support multiple hosts in one CrawlerSessionManagerValve
crawler.diff (text/plain), 11.11 KB, created by
Felix Schumacher
on 2018-04-12 21:12:30 UTC
(
hide
)
Description:
Support multiple hosts in one CrawlerSessionManagerValve
Filename:
MIME Type:
Creator:
Felix Schumacher
Created:
2018-04-12 21:12:30 UTC
Size:
11.11 KB
patch
obsolete
>diff --git a/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java b/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java >index e3f12ff..a268d4b 100644 >--- a/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java >+++ b/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java >@@ -27,6 +27,8 @@ import javax.servlet.http.HttpSession; > import javax.servlet.http.HttpSessionBindingEvent; > import javax.servlet.http.HttpSessionBindingListener; > >+import org.apache.catalina.Context; >+import org.apache.catalina.Host; > import org.apache.catalina.LifecycleException; > import org.apache.catalina.connector.Request; > import org.apache.catalina.connector.Response; >@@ -44,8 +46,8 @@ public class CrawlerSessionManagerValve extends ValveBase implements HttpSession > > private static final Log log = LogFactory.getLog(CrawlerSessionManagerValve.class); > >- private final Map<String, String> clientIpSessionId = new ConcurrentHashMap<>(); >- private final Map<String, String> sessionIdClientIp = new ConcurrentHashMap<>(); >+ private final Map<String, String> clientIdSessionId = new ConcurrentHashMap<>(); >+ private final Map<String, String> sessionIdClientId = new ConcurrentHashMap<>(); > > private String crawlerUserAgents = ".*[bB]ot.*|.*Yahoo! Slurp.*|.*Feedfetcher-Google.*"; > private Pattern uaPattern = null; >@@ -55,6 +57,10 @@ public class CrawlerSessionManagerValve extends ValveBase implements HttpSession > > private int sessionInactiveInterval = 60; > >+ private boolean isHostAware = true; >+ >+ private boolean isContextAware = true; >+ > > /** > * Specifies a default constructor so async support can be configured. >@@ -134,7 +140,27 @@ public class CrawlerSessionManagerValve extends ValveBase implements HttpSession > > > public Map<String, String> getClientIpSessionId() { >- return clientIpSessionId; >+ return clientIdSessionId; >+ } >+ >+ >+ public boolean isHostAware() { >+ return isHostAware; >+ } >+ >+ >+ public void setHostAware(boolean isHostAware) { >+ this.isHostAware = isHostAware; >+ } >+ >+ >+ public boolean isContextAware() { >+ return isContextAware; >+ } >+ >+ >+ public void setContextAware(boolean isContextAware) { >+ this.isContextAware = isContextAware; > } > > >@@ -152,9 +178,10 @@ public class CrawlerSessionManagerValve extends ValveBase implements HttpSession > boolean isBot = false; > String sessionId = null; > String clientIp = request.getRemoteAddr(); >+ String clientIdentifier = getClientIdentifier(request.getHost(), request.getContext(), clientIp); > > if (log.isDebugEnabled()) { >- log.debug(request.hashCode() + ": ClientIp=" + clientIp + ", RequestedSessionId=" >+ log.debug(request.hashCode() + ": ClientIdentifier=" + clientIdentifier + ", RequestedSessionId=" > + request.getRequestedSessionId()); > } > >@@ -194,7 +221,7 @@ public class CrawlerSessionManagerValve extends ValveBase implements HttpSession > > // If this is a bot, is the session ID known? > if (isBot) { >- sessionId = clientIpSessionId.get(clientIp); >+ sessionId = clientIdSessionId.get(clientIdentifier); > if (sessionId != null) { > request.setRequestedSessionId(sessionId); > if (log.isDebugEnabled()) { >@@ -211,8 +238,8 @@ public class CrawlerSessionManagerValve extends ValveBase implements HttpSession > // Has bot just created a session, if so make a note of it > HttpSession s = request.getSession(false); > if (s != null) { >- clientIpSessionId.put(clientIp, s.getId()); >- sessionIdClientIp.put(s.getId(), clientIp); >+ clientIdSessionId.put(clientIdentifier, s.getId()); >+ sessionIdClientId.put(s.getId(), clientIdentifier); > // #valueUnbound() will be called on session expiration > s.setAttribute(this.getClass().getName(), this); > s.setMaxInactiveInterval(sessionInactiveInterval); >@@ -231,11 +258,23 @@ public class CrawlerSessionManagerValve extends ValveBase implements HttpSession > } > > >+ private String getClientIdentifier(Host host, Context context, String clientIp) { >+ StringBuilder result = new StringBuilder(clientIp); >+ if (isHostAware) { >+ result.append('-').append(host.getName()); >+ } >+ if (isContextAware) { >+ result.append(context.getName()); >+ } >+ return result.toString(); >+ } >+ >+ > @Override > public void valueUnbound(HttpSessionBindingEvent event) { >- String clientIp = sessionIdClientIp.remove(event.getSession().getId()); >- if (clientIp != null) { >- clientIpSessionId.remove(clientIp); >+ String clientIdentifier = sessionIdClientId.remove(event.getSession().getId()); >+ if (clientIdentifier != null) { >+ clientIdSessionId.remove(clientIdentifier); > } > } > } >diff --git a/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java b/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java >index edc562f..3a3e883 100644 >--- a/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java >+++ b/test/org/apache/catalina/valves/TestCrawlerSessionManagerValve.java >@@ -16,12 +16,16 @@ > */ > package org.apache.catalina.valves; > >+import java.io.IOException; >+import java.util.Arrays; > import java.util.Collections; > >+import javax.servlet.ServletException; > import javax.servlet.http.HttpSession; > > import org.junit.Test; >- >+import org.apache.catalina.Context; >+import org.apache.catalina.Host; > import org.apache.catalina.Valve; > import org.apache.catalina.connector.Request; > import org.apache.catalina.connector.Response; >@@ -34,6 +38,7 @@ public class TestCrawlerSessionManagerValve { > public void testCrawlerIpsPositive() throws Exception { > CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve(); > valve.setCrawlerIps("216\\.58\\.206\\.174"); >+ valve.setCrawlerUserAgents(valve.getCrawlerUserAgents()); > valve.setNext(EasyMock.createMock(Valve.class)); > HttpSession session = createSessionExpectations(valve, true); > Request request = createRequestExpectations("216.58.206.174", session, true); >@@ -49,6 +54,7 @@ public class TestCrawlerSessionManagerValve { > public void testCrawlerIpsNegative() throws Exception { > CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve(); > valve.setCrawlerIps("216\\.58\\.206\\.174"); >+ valve.setCrawlerUserAgents(valve.getCrawlerUserAgents()); > valve.setNext(EasyMock.createMock(Valve.class)); > HttpSession session = createSessionExpectations(valve, false); > Request request = createRequestExpectations("127.0.0.1", session, false); >@@ -60,6 +66,32 @@ public class TestCrawlerSessionManagerValve { > EasyMock.verify(request, session); > } > >+ @Test >+ public void testCrawlerMultipleHostsHostAware() throws Exception { >+ CrawlerSessionManagerValve valve = new CrawlerSessionManagerValve(); >+ valve.setCrawlerUserAgents(valve.getCrawlerUserAgents()); >+ valve.setHostAware(true); >+ valve.setContextAware(true); >+ valve.setNext(EasyMock.createMock(Valve.class)); >+ >+ verifyCrawlingLocalhost(valve, "localhost"); >+ verifyCrawlingLocalhost(valve, "example.invalid"); >+ } >+ >+ >+ private void verifyCrawlingLocalhost(CrawlerSessionManagerValve valve, String hostname) >+ throws IOException, ServletException { >+ HttpSession session = createSessionExpectations(valve, true); >+ Request request = createRequestExpectations("127.0.0.1", session, true, hostname, "tomcatBot 1.0"); >+ >+ EasyMock.replay(request, session); >+ >+ valve.invoke(request, EasyMock.createMock(Response.class)); >+ >+ EasyMock.verify(request, session); >+ } >+ >+ > private HttpSession createSessionExpectations(CrawlerSessionManagerValve valve, boolean isBot) { > HttpSession session = EasyMock.createMock(HttpSession.class); > if (isBot) { >@@ -72,15 +104,36 @@ public class TestCrawlerSessionManagerValve { > return session; > } > >+ > private Request createRequestExpectations(String ip, HttpSession session, boolean isBot) { >+ return createRequestExpectations(ip, session, isBot, "localhost", "something 1.0"); >+ } >+ >+ private Request createRequestExpectations(String ip, HttpSession session, boolean isBot, String hostname, String userAgent) { > Request request = EasyMock.createMock(Request.class); > EasyMock.expect(request.getRemoteAddr()).andReturn(ip); >+ EasyMock.expect(request.getHost()).andReturn(simpleHostWithName(hostname)); >+ EasyMock.expect(request.getContext()).andReturn(simpleContextWithName()); > IExpectationSetters<HttpSession> setter = EasyMock.expect(request.getSession(false)) > .andReturn(null); > if (isBot) { > setter.andReturn(session); > } >- EasyMock.expect(request.getHeaders("user-agent")).andReturn(Collections.emptyEnumeration()); >+ EasyMock.expect(request.getHeaders("user-agent")).andAnswer(() -> Collections.enumeration(Arrays.asList(userAgent))); > return request; > } >+ >+ private Host simpleHostWithName(String hostname) { >+ Host host = EasyMock.createMock(Host.class); >+ EasyMock.expect(host.getName()).andReturn(hostname); >+ EasyMock.replay(host); >+ return host; >+ } >+ >+ private Context simpleContextWithName() { >+ Context context = EasyMock.createMock(Context.class); >+ EasyMock.expect(context.getName()).andReturn("/examples"); >+ EasyMock.replay(context); >+ return context; >+ } > } >diff --git a/webapps/docs/config/valve.xml b/webapps/docs/config/valve.xml >index 97b0679..bde7d7d 100644 >--- a/webapps/docs/config/valve.xml >+++ b/webapps/docs/config/valve.xml >@@ -1820,6 +1820,13 @@ > </p> > </attribute> > >+ <attribute name="contextAware" required="false"> >+ <p>Flag to use the context name together with the client IP to >+ identify the session to re-use. Can be combined with <code>hostAware</code>. >+ Default value: <code>true</code> >+ </p> >+ </attribute> >+ > <attribute name="crawlerIps" required="false"> > <p>Regular expression (using <code>java.util.regex</code>) that client > IP is matched against to determine if a request is from a web crawler. >@@ -1833,6 +1840,13 @@ > <code>.*[bB]ot.*|.*Yahoo! Slurp.*|.*Feedfetcher-Google.*</code> is used.</p> > </attribute> > >+ <attribute name="hostAware" required="false"> >+ <p>Flag to use the configured host together with the client IP to >+ identify the session to re-use. Can be combined with <code>contextAware</code>. >+ Default value: <code>true</code> >+ </p> >+ </attribute> >+ > <attribute name="sessionInactiveInterval" required="false"> > <p>The minimum time in seconds that the Crawler Session Manager Valve > should keep the mapping of client IP to session ID in memory without any
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 62297
: 35867