--- java/org/apache/catalina/valves/LocalStrings.properties (revision 1351601) +++ java/org/apache/catalina/valves/LocalStrings.properties (working copy) @@ -56,6 +56,10 @@ sslValve.invalidProvider=The SSL provider specified on the connector associated with this request of [{0}] is invalid. The certificate data could not be processed. +#Stuck thread detection Valve +stuckThreadDetectionValve.notifyStuckThreadDetected=Thread "{0}" (id={6}) has been active for {1} milliseconds (since {2}) to serve the same request for {4} and may be stuck (configured threshold for this StuckThreadDetectionValve is {5} seconds). There is/are {3} thread(s) in total that are monitored by this Valve and may be stuck. +stuckThreadDetectionValve.notifyStuckThreadCompleted=Thread "{0}" (id={3}) was previously reported to be stuck but has completed. It was active for approximately {1} milliseconds.{2,choice,0#|0< There is/are still {2} thread(s) that are monitored by this Valve and may be stuck.} + # HTTP status reports http.100=The client may continue ({0}). http.101=The server is switching protocols according to the "Upgrade" header ({0}). --- java/org/apache/catalina/valves/mbeans-descriptors.xml (revision 1351601) +++ java/org/apache/catalina/valves/mbeans-descriptors.xml (working copy) @@ -459,4 +459,31 @@ writeable="false" /> + + + + + + + + + + + + --- java/org/apache/catalina/valves/StuckThreadDetectionValve.java (revision 0) +++ java/org/apache/catalina/valves/StuckThreadDetectionValve.java (working copy) @@ -0,0 +1,297 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.catalina.valves; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.atomic.AtomicInteger; + +import javax.servlet.ServletException; + +import org.apache.catalina.connector.Request; +import org.apache.catalina.connector.Response; +import org.apache.juli.logging.Log; +import org.apache.juli.logging.LogFactory; +import org.apache.tomcat.util.res.StringManager; + +/** + * This valve allows to detect requests that take a long time to process, which + * might indicate that the thread that is processing it is stuck. + */ +public class StuckThreadDetectionValve extends ValveBase { + + /** + * Logger + */ + private static final Log log = LogFactory.getLog(StuckThreadDetectionValve.class); + + /** + * The string manager for this package. + */ + private static final StringManager sm = + StringManager.getManager(Constants.Package); + + /** + * Keeps count of the number of stuck threads detected + */ + private final AtomicInteger stuckCount = new AtomicInteger(0); + + /** + * In seconds. Default 600 (10 minutes). + */ + private int threshold = 600; + + /** + * The only references we keep to actual running Thread objects are in + * this Map (which is automatically cleaned in invoke()s finally clause). + * That way, Threads can be GC'ed, eventhough the Valve still thinks they + * are stuck (caused by a long monitor interval) + */ + private final ConcurrentHashMap activeThreads = + new ConcurrentHashMap(); + /** + * + */ + private final Queue completedStuckThreadsQueue = + new ConcurrentLinkedQueue(); + + /** + * Specify the threshold (in seconds) used when checking for stuck threads. + * If <=0, the detection is disabled. The default is 600 seconds. + * + * @param threshold + * The new threshold in seconds + */ + public void setThreshold(int threshold) { + this.threshold = threshold; + } + + /** + * @see #setThreshold(int) + * @return The current threshold in seconds + */ + public int getThreshold() { + return threshold; + } + + + private void notifyStuckThreadDetected(MonitoredThread monitoredThread, + long activeTime, int numStuckThreads) { + if (log.isWarnEnabled()) { + String msg = sm.getString( + "stuckThreadDetectionValve.notifyStuckThreadDetected", + monitoredThread.getThread().getName(), + Long.valueOf(activeTime), + monitoredThread.getStartTime(), + Integer.valueOf(numStuckThreads), + monitoredThread.getRequestUri(), + Integer.valueOf(threshold), + String.valueOf(monitoredThread.getThread().getId()) + ); + // msg += "\n" + getStackTraceAsString(trace); + Throwable th = new Throwable(); + th.setStackTrace(monitoredThread.getThread().getStackTrace()); + log.warn(msg, th); + } + } + + private void notifyStuckThreadCompleted(CompletedStuckThread thread, + int numStuckThreads) { + if (log.isWarnEnabled()) { + String msg = sm.getString( + "stuckThreadDetectionValve.notifyStuckThreadCompleted", + thread.getName(), + Long.valueOf(thread.getTotalActiveTime()), + Integer.valueOf(numStuckThreads), + String.valueOf(thread.getId())); + // Since the "stuck thread notification" is warn, this should also + // be warn + log.warn(msg); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void invoke(Request request, Response response) + throws IOException, ServletException { + + if (threshold <= 0) { + // short-circuit if not monitoring stuck threads + getNext().invoke(request, response); + return; + } + + // Save the thread/runnable + // Keeping a reference to the thread object here does not prevent + // GC'ing, as the reference is removed from the Map in the finally clause + + Long key = Long.valueOf(Thread.currentThread().getId()); + StringBuffer requestUrl = request.getRequestURL(); + if(request.getQueryString()!=null) { + requestUrl.append("?"); + requestUrl.append(request.getQueryString()); + } + MonitoredThread monitoredThread = new MonitoredThread(Thread.currentThread(), + requestUrl.toString()); + activeThreads.put(key, monitoredThread); + + try { + getNext().invoke(request, response); + } finally { + activeThreads.remove(key); + if (monitoredThread.markAsDone() == MonitoredThreadState.STUCK) { + completedStuckThreadsQueue.add( + new CompletedStuckThread(monitoredThread.getThread(), + monitoredThread.getActiveTimeInMillis())); + } + } + } + + @Override + public void backgroundProcess() { + super.backgroundProcess(); + + long thresholdInMillis = threshold * 1000; + + // Check monitored threads, being careful that the request might have + // completed by the time we examine it + for (MonitoredThread monitoredThread : activeThreads.values()) { + long activeTime = monitoredThread.getActiveTimeInMillis(); + + if (activeTime >= thresholdInMillis && monitoredThread.markAsStuckIfStillRunning()) { + int numStuckThreads = stuckCount.incrementAndGet(); + notifyStuckThreadDetected(monitoredThread, activeTime, numStuckThreads); + } + } + // Check if any threads previously reported as stuck, have finished. + for (CompletedStuckThread completedStuckThread = completedStuckThreadsQueue.poll(); + completedStuckThread != null; completedStuckThread = completedStuckThreadsQueue.poll()) { + + int numStuckThreads = stuckCount.decrementAndGet(); + notifyStuckThreadCompleted(completedStuckThread, numStuckThreads); + } + } + + public long[] getStuckThreadIds() { + List idList = new ArrayList(); + for (MonitoredThread monitoredThread : activeThreads.values()) { + if (monitoredThread.isMarkedAsStuck()) { + idList.add(Long.valueOf(monitoredThread.getThread().getId())); + } + } + + long[] result = new long[idList.size()]; + for (int i = 0; i < result.length; i++) { + result[i] = idList.get(i).longValue(); + } + return result; + } + + public String[] getStuckThreadNames() { + List nameList = new ArrayList(); + for (MonitoredThread monitoredThread : activeThreads.values()) { + if (monitoredThread.isMarkedAsStuck()) { + nameList.add(monitoredThread.getThread().getName()); + } + } + return nameList.toArray(new String[nameList.size()]); + } + + private static class MonitoredThread { + + /** + * Reference to the thread to get a stack trace from background task + */ + private final Thread thread; + private final String requestUri; + private final long start; + private final AtomicInteger state = new AtomicInteger( + MonitoredThreadState.RUNNING.ordinal()); + + public MonitoredThread(Thread thread, String requestUri) { + this.thread = thread; + this.requestUri = requestUri; + this.start = System.currentTimeMillis(); + } + + public Thread getThread() { + return this.thread; + } + + public String getRequestUri() { + return requestUri; + } + + public long getActiveTimeInMillis() { + return System.currentTimeMillis() - start; + } + + public Date getStartTime() { + return new Date(start); + } + + public boolean markAsStuckIfStillRunning() { + return this.state.compareAndSet(MonitoredThreadState.RUNNING.ordinal(), + MonitoredThreadState.STUCK.ordinal()); + } + + public MonitoredThreadState markAsDone() { + int val = this.state.getAndSet(MonitoredThreadState.DONE.ordinal()); + return MonitoredThreadState.values()[val]; + } + + boolean isMarkedAsStuck() { + return this.state.get() == MonitoredThreadState.STUCK.ordinal(); + } + } + + private static class CompletedStuckThread { + + private final String threadName; + private final long threadId; + private final long totalActiveTime; + + public CompletedStuckThread(Thread thread, long totalActiveTime) { + this.threadName = thread.getName(); + this.threadId = thread.getId(); + this.totalActiveTime = totalActiveTime; + } + + public String getName() { + return this.threadName; + } + + public long getId() { + return this.threadId; + } + + public long getTotalActiveTime() { + return this.totalActiveTime; + } + } + + private enum MonitoredThreadState { + RUNNING, STUCK, DONE; + } +} --- webapps/docs/config/valve.xml (revision 1351601) +++ webapps/docs/config/valve.xml (working copy) @@ -873,6 +873,50 @@ +
+ + + +

This valve allows to detect requests that take a long time to process, which might + indicate that the thread that is processing it is stuck.

+

When such a request is detected, the current stack trace of its thread is written + to Tomcat log with a WARN level.

+

The IDs and names of the stuck threads are available through JMX in the + stuckThreadIds and stuckThreadNames attributes. + The IDs can be used with the standard Threading JVM MBean + (java.lang:type=Threading) to retrieve other information + about each stuck thread.

+ +
+ + + +

The Stuck Thread Detection Valve supports the + following configuration attributes:

+ + + + +

Java class name of the implementation to use. This MUST be set to + org.apache.catalina.valves.StuckThreadDetectionValve. +

+
+ + +

Minimum duration in seconds after which a thread is considered stuck. + Default is 600 seconds. If set to 0, the detection is disabled.

+

Note: since the detection is done in the background thread of the Container + (Engine, Host or Context) declaring this Valve, the threshold should be higher + than the backgroundProcessorDelay of this Container.

+
+ +
+ +
+ +
+ +