ASF Bugzilla – Attachment 34202 Details for
Bug 53555
Scoreboard full error with event/ssl
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Use all scoreboard entries up to ServerLimit, for 2.4
use-all-scoreboard-2.4.diff (text/plain), 43.09 KB, created by
Stefan Fritsch
on 2016-09-05 21:50:31 UTC
(
hide
)
Description:
Use all scoreboard entries up to ServerLimit, for 2.4
Filename:
MIME Type:
Creator:
Stefan Fritsch
Created:
2016-09-05 21:50:31 UTC
Size:
43.09 KB
patch
obsolete
>diff --git a/modules/generators/mod_status.c b/modules/generators/mod_status.c >index 4ff9df1..b35172e 100644 >--- a/modules/generators/mod_status.c >+++ b/modules/generators/mod_status.c >@@ -529,7 +529,7 @@ static int status_handler(request_rec *r) > > if (is_async) { > int write_completion = 0, lingering_close = 0, keep_alive = 0, >- connections = 0; >+ connections = 0, stopping = 0, procs = 0; > /* > * These differ from 'busy' and 'ready' in how gracefully finishing > * threads are counted. XXX: How to make this clear in the html? >@@ -537,13 +537,15 @@ static int status_handler(request_rec *r) > int busy_workers = 0, idle_workers = 0; > if (!short_report) > ap_rputs("\n\n<table rules=\"all\" cellpadding=\"1%\">\n" >- "<tr><th rowspan=\"2\">PID</th>" >+ "<tr><th rowspan=\"2\">Slot</th>" >+ "<th rowspan=\"2\">PID</th>" >+ "<th rowspan=\"2\">Stopping</th>" > "<th colspan=\"2\">Connections</th>\n" > "<th colspan=\"2\">Threads</th>" >- "<th colspan=\"4\">Async connections</th></tr>\n" >+ "<th colspan=\"3\">Async connections</th></tr>\n" > "<tr><th>total</th><th>accepting</th>" >- "<th>busy</th><th>idle</th><th>writing</th>" >- "<th>keep-alive</th><th>closing</th></tr>\n", r); >+ "<th>busy</th><th>idle</th>" >+ "<th>writing</th><th>keep-alive</th><th>closing</th></tr>\n", r); > for (i = 0; i < server_limit; ++i) { > ps_record = ap_get_scoreboard_process(i); > if (ps_record->pid) { >@@ -553,26 +555,45 @@ static int status_handler(request_rec *r) > lingering_close += ps_record->lingering_close; > busy_workers += thread_busy_buffer[i]; > idle_workers += thread_idle_buffer[i]; >- if (!short_report) >- ap_rprintf(r, "<tr><td>%" APR_PID_T_FMT "</td><td>%u</td>" >- "<td>%s</td><td>%u</td><td>%u</td>" >+ if (!short_report) { >+ const char *dying = "no"; >+ const char *old = ""; >+ if (ps_record->quiescing) { >+ dying = "yes"; >+ stopping++; >+ } >+ if (ps_record->generation != mpm_generation) >+ old = " (old gen)"; >+ procs++; >+ ap_rprintf(r, "<tr><td>%u</td><td>%" APR_PID_T_FMT "</td>" >+ "<td>%s%s</td>" >+ "<td>%u</td><td>%s</td>" >+ "<td>%u</td><td>%u</td>" > "<td>%u</td><td>%u</td><td>%u</td>" > "</tr>\n", >- ps_record->pid, ps_record->connections, >+ i, ps_record->pid, >+ dying, old, >+ ps_record->connections, > ps_record->not_accepting ? "no" : "yes", >- thread_busy_buffer[i], thread_idle_buffer[i], >+ thread_busy_buffer[i], >+ thread_idle_buffer[i], > ps_record->write_completion, > ps_record->keep_alive, > ps_record->lingering_close); >+ } > } > } > if (!short_report) { >- ap_rprintf(r, "<tr><td>Sum</td><td>%d</td><td> </td><td>%d</td>" >- "<td>%d</td><td>%d</td><td>%d</td><td>%d</td>" >+ ap_rprintf(r, "<tr><td>Sum</td>" >+ "<td>%d</td><td>%d</td>" >+ "<td>%d</td><td> </td>" >+ "<td>%d</td><td>%d</td>" >+ "<td>%d</td><td>%d</td><td>%d</td>" > "</tr>\n</table>\n", >- connections, busy_workers, idle_workers, >+ procs, stopping, >+ connections, >+ busy_workers, idle_workers, > write_completion, keep_alive, lingering_close); >- > } > else { > ap_rprintf(r, "ConnsTotal: %d\n" >@@ -621,7 +642,7 @@ static int status_handler(request_rec *r) > "\"<b><code>G</code></b>\" Gracefully finishing,<br /> \n" > "\"<b><code>I</code></b>\" Idle cleanup of worker, \n" > "\"<b><code>.</code></b>\" Open slot with no current process<br />\n" >- "<p />\n", r); >+ "</p>\n", r); > if (!ap_extended_status) { > int j; > int k = 0; >diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c >index e53ea0f..d777ecd 100644 >--- a/server/mpm/event/event.c >+++ b/server/mpm/event/event.c >@@ -160,15 +160,18 @@ > #endif > #define WORKER_FACTOR_SCALE 16 /* scale factor to allow fractional values */ > static unsigned int worker_factor = DEFAULT_WORKER_FACTOR * WORKER_FACTOR_SCALE; >- >-static int threads_per_child = 0; /* Worker threads per child */ >-static int ap_daemons_to_start = 0; >-static int min_spare_threads = 0; >-static int max_spare_threads = 0; >-static int ap_daemons_limit = 0; >-static int max_workers = 0; >-static int server_limit = 0; >-static int thread_limit = 0; >+ /* AsyncRequestWorkerFactor * 16 */ >+ >+static int threads_per_child = 0; /* ThreadsPerChild */ >+static int ap_daemons_to_start = 0; /* StartServers */ >+static int min_spare_threads = 0; /* MinSpareThreads */ >+static int max_spare_threads = 0; /* MaxSpareThreads */ >+static int active_daemons_limit = 0; /* MaxRequestWorkers / ThreadsPerChild */ >+static int active_daemons = 0; /* workers that still active, i.e. are >+ not shutting down gracefully */ >+static int max_workers = 0; /* MaxRequestWorkers */ >+static int server_limit = 0; /* ServerLimit */ >+static int thread_limit = 0; /* ThreadLimit */ > static int had_healthy_child = 0; > static int dying = 0; > static int workers_may_exit = 0; >@@ -181,6 +184,8 @@ static apr_uint32_t connection_count = 0; /* Number of open connections */ > static apr_uint32_t lingering_count = 0; /* Number of connections in lingering close */ > static apr_uint32_t suspended_count = 0; /* Number of suspended connections */ > static apr_uint32_t clogged_count = 0; /* Number of threads processing ssl conns */ >+static apr_uint32_t threads_shutdown = 0; /* Number of threads that have shutdown >+ early during graceful termination */ > static int resource_shortage = 0; > static fd_queue_t *worker_queue; > static fd_queue_info_t *worker_queue_info; >@@ -288,9 +293,8 @@ static apr_pollset_t *event_pollset; > /* The structure used to pass unique initialization info to each thread */ > typedef struct > { >- int pid; >- int tid; >- int sd; >+ int pslot; /* process slot */ >+ int tslot; /* worker slot of the thread */ > } proc_info; > > /* Structure used to pass information to the thread responsible for >@@ -335,6 +339,14 @@ typedef struct event_retained_data { > * scoreboard. > */ > int max_daemons_limit; >+ >+ /* >+ * All running workers, active and shutting down, including those that >+ * may be left from before a graceful restart. >+ * Not kept up-to-date when shutdown is pending. >+ */ >+ int total_daemons; >+ > /* > * idle_spawn_rate is the number of children that will be spawned on the > * next maintenance cycle if there aren't enough idle servers. It is >@@ -548,7 +560,7 @@ static int event_query(int query_code, int *result, apr_status_t *rv) > *result = ap_max_requests_per_child; > break; > case AP_MPMQ_MAX_DAEMONS: >- *result = ap_daemons_limit; >+ *result = active_daemons_limit; > break; > case AP_MPMQ_MPM_STATE: > *result = mpm_state; >@@ -585,27 +597,6 @@ static void event_note_child_started(int slot, pid_t pid) > retained->my_generation, slot, MPM_CHILD_STARTED); > } > >-static void event_note_child_lost_slot(int slot, pid_t newpid) >-{ >- ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00458) >- "pid %" APR_PID_T_FMT " taking over scoreboard slot from " >- "%" APR_PID_T_FMT "%s", >- newpid, >- ap_scoreboard_image->parent[slot].pid, >- ap_scoreboard_image->parent[slot].quiescing ? >- " (quiescing)" : ""); >- ap_run_child_status(ap_server_conf, >- ap_scoreboard_image->parent[slot].pid, >- ap_scoreboard_image->parent[slot].generation, >- slot, MPM_CHILD_LOST_SLOT); >- /* Don't forget about this exiting child process, or we >- * won't be able to kill it if it doesn't exit by the >- * time the server is shut down. >- */ >- ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid, >- ap_scoreboard_image->parent[slot].generation); >-} >- > static const char *event_get_name(void) > { > return "event"; >@@ -911,6 +902,8 @@ static int start_lingering_close_nonblocking(event_conn_state_t *cs) > || apr_socket_shutdown(csd, APR_SHUTDOWN_WRITE) != APR_SUCCESS) { > apr_socket_close(csd); > ap_push_pool(worker_queue_info, cs->p); >+ if (dying) >+ ap_queue_interrupt_one(worker_queue); > return 0; > } > return start_lingering_close_common(cs, 0); >@@ -934,6 +927,8 @@ static int stop_lingering_close(event_conn_state_t *cs) > AP_DEBUG_ASSERT(0); > } > ap_push_pool(worker_queue_info, cs->p); >+ if (dying) >+ ap_queue_interrupt_one(worker_queue); > return 0; > } > >@@ -1219,6 +1214,9 @@ static void close_listeners(int process_slot, int *closed) > } > /* wake up the main thread */ > kill(ap_my_pid, SIGTERM); >+ >+ ap_free_idle_pools(worker_queue_info); >+ ap_queue_interrupt_all(worker_queue); > } > } > >@@ -1439,6 +1437,8 @@ static void process_lingering_close(event_conn_state_t *cs, const apr_pollfd_t * > TO_QUEUE_ELEM_INIT(cs); > > ap_push_pool(worker_queue_info, cs->p); >+ if (dying) >+ ap_queue_interrupt_one(worker_queue); > } > > /* call 'func' for all elements of 'q' with timeout less than 'timeout_time'. >@@ -1518,7 +1518,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) > timer_event_t *te; > apr_status_t rc; > proc_info *ti = dummy; >- int process_slot = ti->pid; >+ int process_slot = ti->pslot; > apr_pool_t *tpool = apr_thread_pool_get(thd); > void *csd = NULL; > apr_pool_t *ptrans; /* Pool for per-transaction stuff */ >@@ -1584,6 +1584,12 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) > *keepalive_q->total, > apr_atomic_read32(&lingering_count), > apr_atomic_read32(&suspended_count)); >+ if (dying) { >+ ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf, >+ "%u/%u workers shutdown", >+ apr_atomic_read32(&threads_shutdown), >+ threads_per_child); >+ } > apr_thread_mutex_unlock(timeout_mutex); > } > } >@@ -1818,11 +1824,12 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) > /* If all workers are busy, we kill older keep-alive connections so that they > * may connect to another process. > */ >- if (workers_were_busy && *keepalive_q->total) { >- ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, >- "All workers are busy, will close %d keep-alive " >- "connections", >- *keepalive_q->total); >+ if ((workers_were_busy || dying) && *keepalive_q->total) { >+ if (!dying) >+ ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, >+ "All workers are busy, will close %d keep-alive " >+ "connections", >+ *keepalive_q->total); > process_timeout_queue(keepalive_q, 0, > start_lingering_close_nonblocking); > } >@@ -1869,6 +1876,34 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) > return NULL; > } > >+/* >+ * During graceful shutdown, if there are more running worker threads than >+ * open connections, exit one worker thread. >+ * >+ * return 1 if thread should exit, 0 if it should continue running. >+ */ >+static int worker_thread_should_exit_early(void) >+{ >+ for (;;) { >+ apr_uint32_t conns = apr_atomic_read32(&connection_count); >+ apr_uint32_t dead = apr_atomic_read32(&threads_shutdown); >+ apr_uint32_t newdead; >+ >+ AP_DEBUG_ASSERT(dead <= threads_per_child); >+ if (conns >= threads_per_child - dead) >+ return 0; >+ >+ newdead = dead + 1; >+ if (apr_atomic_cas32(&threads_shutdown, newdead, dead) == dead) { >+ /* >+ * No other thread has exited in the mean time, safe to exit >+ * this one. >+ */ >+ return 1; >+ } >+ } >+} >+ > /* XXX For ungraceful termination/restart, we definitely don't want to > * wait for active connections to finish but we may want to wait > * for idle workers to get out of the queue code and release mutexes, >@@ -1879,8 +1914,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) > static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy) > { > proc_info *ti = dummy; >- int process_slot = ti->pid; >- int thread_slot = ti->tid; >+ int process_slot = ti->pslot; >+ int thread_slot = ti->tslot; > apr_socket_t *csd = NULL; > event_conn_state_t *cs; > apr_pool_t *ptrans; /* Pool for per-transaction stuff */ >@@ -1916,6 +1951,9 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy) > if (workers_may_exit) { > break; > } >+ if (dying && worker_thread_should_exit_early()) { >+ break; >+ } > > te = NULL; > rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te); >@@ -1993,9 +2031,8 @@ static void create_listener_thread(thread_starter * ts) > apr_status_t rv; > > my_info = (proc_info *) ap_malloc(sizeof(proc_info)); >- my_info->pid = my_child_num; >- my_info->tid = -1; /* listener thread doesn't have a thread slot */ >- my_info->sd = 0; >+ my_info->pslot = my_child_num; >+ my_info->tslot = -1; /* listener thread doesn't have a thread slot */ > rv = apr_thread_create(&ts->listener, thread_attr, listener_thread, > my_info, pchild); > if (rv != APR_SUCCESS) { >@@ -2103,14 +2140,13 @@ static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy) > int status = > ap_scoreboard_image->servers[my_child_num][i].status; > >- if (status != SERVER_GRACEFUL && status != SERVER_DEAD) { >+ if (status != SERVER_DEAD) { > continue; > } > > my_info = (proc_info *) ap_malloc(sizeof(proc_info)); >- my_info->pid = my_child_num; >- my_info->tid = i; >- my_info->sd = 0; >+ my_info->pslot = my_child_num; >+ my_info->tslot = i; > > /* We are creating threads right now */ > ap_update_child_status_from_indexes(my_child_num, i, >@@ -2411,6 +2447,15 @@ static int make_child(server_rec * s, int slot, int bucket) > retained->max_daemons_limit = slot + 1; > } > >+ if (ap_scoreboard_image->parent[slot].pid != 0) { >+ /* XXX replace with assert or remove ? */ >+ ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(03455) >+ "BUG: Scoreboard slot %d should be empty but is " >+ "in use by pid %" APR_PID_T_FMT, >+ slot, ap_scoreboard_image->parent[slot].pid); >+ return -1; >+ } >+ > if (one_process) { > my_bucket = &all_buckets[0]; > >@@ -2464,17 +2509,12 @@ static int make_child(server_rec * s, int slot, int bucket) > return -1; > } > >- if (ap_scoreboard_image->parent[slot].pid != 0) { >- /* This new child process is squatting on the scoreboard >- * entry owned by an exiting child process, which cannot >- * exit until all active requests complete. >- */ >- event_note_child_lost_slot(slot, pid); >- } > ap_scoreboard_image->parent[slot].quiescing = 0; > ap_scoreboard_image->parent[slot].not_accepting = 0; > ap_scoreboard_image->parent[slot].bucket = bucket; > event_note_child_started(slot, pid); >+ active_daemons++; >+ retained->total_daemons++; > return 0; > } > >@@ -2483,7 +2523,7 @@ static void startup_children(int number_to_start) > { > int i; > >- for (i = 0; number_to_start && i < ap_daemons_limit; ++i) { >+ for (i = 0; number_to_start && i < server_limit; ++i) { > if (ap_scoreboard_image->parent[i].pid != 0) { > continue; > } >@@ -2497,34 +2537,22 @@ static void startup_children(int number_to_start) > static void perform_idle_server_maintenance(int child_bucket, int num_buckets) > { > int i, j; >- int idle_thread_count; >+ int idle_thread_count = 0; > worker_score *ws; > process_score *ps; >- int free_length; >- int totally_free_length = 0; >+ int free_length = 0; > int free_slots[MAX_SPAWN_RATE]; >- int last_non_dead; >- int total_non_dead; >+ int last_non_dead = -1; > int active_thread_count = 0; > >- /* initialize the free_list */ >- free_length = 0; >- >- idle_thread_count = 0; >- last_non_dead = -1; >- total_non_dead = 0; >- >- for (i = 0; i < ap_daemons_limit; ++i) { >+ for (i = 0; i < server_limit; ++i) { > /* Initialization to satisfy the compiler. It doesn't know > * that threads_per_child is always > 0 */ > int status = SERVER_DEAD; >- int any_dying_threads = 0; >- int any_dead_threads = 0; >- int all_dead_threads = 1; > int child_threads_active = 0; > > if (i >= retained->max_daemons_limit && >- totally_free_length == retained->idle_spawn_rate[child_bucket]) { >+ free_length == retained->idle_spawn_rate[child_bucket]) { > /* short cut if all active processes have been examined and > * enough empty scoreboard slots have been found > */ >@@ -2532,25 +2560,17 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) > break; > } > ps = &ap_scoreboard_image->parent[i]; >- for (j = 0; j < threads_per_child; j++) { >- ws = &ap_scoreboard_image->servers[i][j]; >- status = ws->status; >- >- /* XXX any_dying_threads is probably no longer needed GLA */ >- any_dying_threads = any_dying_threads || >- (status == SERVER_GRACEFUL); >- any_dead_threads = any_dead_threads || (status == SERVER_DEAD); >- all_dead_threads = all_dead_threads && >- (status == SERVER_DEAD || status == SERVER_GRACEFUL); >- >- /* We consider a starting server as idle because we started it >- * at least a cycle ago, and if it still hasn't finished starting >- * then we're just going to swamp things worse by forking more. >- * So we hopefully won't need to fork more if we count it. >- * This depends on the ordering of SERVER_READY and SERVER_STARTING. >- */ >- if (ps->pid != 0) { /* XXX just set all_dead_threads in outer >- for loop if no pid? not much else matters */ >+ if (ps->pid != 0) { >+ for (j = 0; j < threads_per_child; j++) { >+ ws = &ap_scoreboard_image->servers[i][j]; >+ status = ws->status; >+ >+ /* We consider a starting server as idle because we started it >+ * at least a cycle ago, and if it still hasn't finished starting >+ * then we're just going to swamp things worse by forking more. >+ * So we hopefully won't need to fork more if we count it. >+ * This depends on the ordering of SERVER_READY and SERVER_STARTING. >+ */ > if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting > && ps->generation == retained->my_generation > && ps->bucket == child_bucket) >@@ -2561,39 +2581,13 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) > ++child_threads_active; > } > } >+ last_non_dead = i; > } > active_thread_count += child_threads_active; >- if (any_dead_threads >- && totally_free_length < retained->idle_spawn_rate[child_bucket] >- && free_length < MAX_SPAWN_RATE / num_buckets >- && (!ps->pid /* no process in the slot */ >- || ps->quiescing)) { /* or at least one is going away */ >- if (all_dead_threads) { >- /* great! we prefer these, because the new process can >- * start more threads sooner. So prioritize this slot >- * by putting it ahead of any slots with active threads. >- * >- * first, make room by moving a slot that's potentially still >- * in use to the end of the array >- */ >- free_slots[free_length] = free_slots[totally_free_length]; >- free_slots[totally_free_length++] = i; >- } >- else { >- /* slot is still in use - back of the bus >- */ >- free_slots[free_length] = i; >- } >- ++free_length; >- } >- else if (child_threads_active == threads_per_child) { >+ if (!ps->pid && free_length < retained->idle_spawn_rate[child_bucket]) >+ free_slots[free_length++] = i; >+ else if (child_threads_active == threads_per_child) > had_healthy_child = 1; >- } >- /* XXX if (!ps->quiescing) is probably more reliable GLA */ >- if (!any_dying_threads) { >- last_non_dead = i; >- ++total_non_dead; >- } > } > > if (retained->sick_child_detected) { >@@ -2621,32 +2615,56 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) > > retained->max_daemons_limit = last_non_dead + 1; > >- if (idle_thread_count > max_spare_threads / num_buckets) { >- /* Kill off one child */ >- ap_mpm_podx_signal(all_buckets[child_bucket].pod, >- AP_MPM_PODX_GRACEFUL); >- retained->idle_spawn_rate[child_bucket] = 1; >+ if (idle_thread_count > max_spare_threads / num_buckets) >+ { >+ /* >+ * Child processes that we ask to shut down won't die immediately >+ * but may stay around for a long time when they finish their >+ * requests. If the server load changes many times, many such >+ * gracefully finishing processes may accumulate, filling up the >+ * scoreboard. To avoid running out of scoreboard entries, we >+ * don't shut down more processes when the total number of processes >+ * is high. >+ * >+ * XXX It would be nice if we could >+ * XXX - kill processes without keepalive connections first >+ * XXX - tell children to stop accepting new connections, and >+ * XXX depending on server load, later be able to resurrect them >+ * or kill them >+ */ >+ if (retained->total_daemons <= active_daemons_limit && >+ retained->total_daemons < server_limit) { >+ /* Kill off one child */ >+ ap_mpm_podx_signal(all_buckets[child_bucket].pod, >+ AP_MPM_PODX_GRACEFUL); >+ retained->idle_spawn_rate[child_bucket] = 1; >+ active_daemons--; >+ } else { >+ ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf, >+ "Not shutting down child: total daemons %d / " >+ "active limit %d / ServerLimit %d", >+ retained->total_daemons, active_daemons_limit, >+ server_limit); >+ } > } > else if (idle_thread_count < min_spare_threads / num_buckets) { >- /* terminate the free list */ >- if (free_length == 0) { /* scoreboard is full, can't fork */ >- >- if (active_thread_count >= ap_daemons_limit * threads_per_child) { >- if (!retained->maxclients_reported) { >- /* only report this condition once */ >- ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484) >- "server reached MaxRequestWorkers setting, " >- "consider raising the MaxRequestWorkers " >- "setting"); >- retained->maxclients_reported = 1; >- } >- } >- else { >- ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00485) >- "scoreboard is full, not at MaxRequestWorkers"); >+ if (active_thread_count >= max_workers) { >+ if (!retained->maxclients_reported) { >+ /* only report this condition once */ >+ ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484) >+ "server reached MaxRequestWorkers setting, " >+ "consider raising the MaxRequestWorkers " >+ "setting"); >+ retained->maxclients_reported = 1; > } > retained->idle_spawn_rate[child_bucket] = 1; > } >+ else if (free_length == 0) { /* scoreboard is full, can't fork */ >+ ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO() >+ "scoreboard is full, not at MaxRequestWorkers." >+ "Increase ServerLimit."); >+ retained->idle_spawn_rate[child_bucket] = 1; >+ } > else { > if (free_length > retained->idle_spawn_rate[child_bucket]) { > free_length = retained->idle_spawn_rate[child_bucket]; >@@ -2657,10 +2675,17 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) > "to increase StartServers, ThreadsPerChild " > "or Min/MaxSpareThreads), " > "spawning %d children, there are around %d idle " >- "threads, and %d total children", free_length, >- idle_thread_count, total_non_dead); >+ "threads, %d active children, and %d children " >+ "that are shutting down", free_length, >+ idle_thread_count, active_daemons, >+ retained->total_daemons); > } > for (i = 0; i < free_length; ++i) { >+ ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf, >+ "Spawning new child: slot %d active / " >+ "total daemons: %d/%d", >+ free_slots[i], active_daemons, >+ retained->total_daemons); > make_child(ap_server_conf, free_slots[i], child_bucket); > } > /* the next time around we want to spawn twice as many if this >@@ -2682,7 +2707,6 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) > > static void server_main_loop(int remaining_children_to_start, int num_buckets) > { >- ap_generation_t old_gen; > int child_slot; > apr_exit_why_e exitwhy; > int status, processed_status; >@@ -2732,13 +2756,15 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets) > > event_note_child_killed(child_slot, 0, 0); > ps = &ap_scoreboard_image->parent[child_slot]; >+ if (!ps->quiescing) >+ active_daemons--; > ps->quiescing = 0; >+ retained->total_daemons--; > if (processed_status == APEXIT_CHILDSICK) { > /* resource shortage, minimize the fork rate */ > retained->idle_spawn_rate[ps->bucket] = 1; > } >- else if (remaining_children_to_start >- && child_slot < ap_daemons_limit) { >+ else if (remaining_children_to_start) { > /* we're still doing a 1-for-1 replacement of dead > * children with new children > */ >@@ -2746,24 +2772,12 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets) > --remaining_children_to_start; > } > } >- else if (ap_unregister_extra_mpm_process(pid.pid, &old_gen) == 1) { >- >- event_note_child_killed(-1, /* already out of the scoreboard */ >- pid.pid, old_gen); >- if (processed_status == APEXIT_CHILDSICK >- && old_gen == retained->my_generation) { >- /* resource shortage, minimize the fork rate */ >- for (i = 0; i < num_buckets; i++) { >- retained->idle_spawn_rate[i] = 1; >- } >- } > #if APR_HAS_OTHER_CHILD >- } > else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH, > status) == 0) { > /* handled */ >-#endif > } >+#endif > else if (retained->is_graceful) { > /* Great, we've probably just lost a slot in the > * scoreboard. Somehow we don't know about this child. >@@ -2825,8 +2839,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > /* Don't thrash since num_buckets depends on the > * system and the number of online CPU cores... > */ >- if (ap_daemons_limit < num_buckets) >- ap_daemons_limit = num_buckets; >+ if (active_daemons_limit < num_buckets) >+ active_daemons_limit = num_buckets; > if (ap_daemons_to_start < num_buckets) > ap_daemons_to_start = num_buckets; > /* We want to create as much children at a time as the number of buckets, >@@ -2850,8 +2864,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > * supposed to start up without the 1 second penalty between each fork. > */ > remaining_children_to_start = ap_daemons_to_start; >- if (remaining_children_to_start > ap_daemons_limit) { >- remaining_children_to_start = ap_daemons_limit; >+ if (remaining_children_to_start > active_daemons_limit) { >+ remaining_children_to_start = active_daemons_limit; > } > if (!retained->is_graceful) { > startup_children(remaining_children_to_start); >@@ -2881,7 +2895,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > * Kill child processes, tell them to call child_exit, etc... > */ > for (i = 0; i < num_buckets; i++) { >- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, >+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, > AP_MPM_PODX_RESTART); > } > ap_reclaim_child_processes(1, /* Start with SIGTERM */ >@@ -2905,7 +2919,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > /* Close our listeners, and then ask our children to do same */ > ap_close_listeners(); > for (i = 0; i < num_buckets; i++) { >- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, >+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, > AP_MPM_PODX_GRACEFUL); > } > ap_relieve_child_processes(event_note_child_killed); >@@ -2933,7 +2947,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > ap_relieve_child_processes(event_note_child_killed); > > active_children = 0; >- for (index = 0; index < ap_daemons_limit; ++index) { >+ for (index = 0; index < retained->max_daemons_limit; ++index) { > if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) { > active_children = 1; > /* Having just one child is enough to stay around */ >@@ -2948,7 +2962,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > * really dead. > */ > for (i = 0; i < num_buckets; i++) { >- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, >+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, > AP_MPM_PODX_RESTART); > } > ap_reclaim_child_processes(1, event_note_child_killed); >@@ -2977,7 +2991,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > " received. Doing graceful restart"); > /* wake up the children...time to die. But we'll have more soon */ > for (i = 0; i < num_buckets; i++) { >- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, >+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, > AP_MPM_PODX_GRACEFUL); > } > >@@ -2992,7 +3006,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > * pthreads are stealing signals from us left and right. > */ > for (i = 0; i < num_buckets; i++) { >- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, >+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, > AP_MPM_PODX_RESTART); > } > >@@ -3002,6 +3016,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > "SIGHUP received. Attempting to restart"); > } > >+ active_daemons = 0; >+ > return OK; > } > >@@ -3215,9 +3231,9 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog, > max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD; > server_limit = DEFAULT_SERVER_LIMIT; > thread_limit = DEFAULT_THREAD_LIMIT; >- ap_daemons_limit = server_limit; >+ active_daemons_limit = server_limit; > threads_per_child = DEFAULT_THREADS_PER_CHILD; >- max_workers = ap_daemons_limit * threads_per_child; >+ max_workers = active_daemons_limit * threads_per_child; > had_healthy_child = 0; > ap_extended_status = 0; > >@@ -3426,10 +3442,10 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog, > max_workers = threads_per_child; > } > >- ap_daemons_limit = max_workers / threads_per_child; >+ active_daemons_limit = max_workers / threads_per_child; > > if (max_workers % threads_per_child) { >- int tmp_max_workers = ap_daemons_limit * threads_per_child; >+ int tmp_max_workers = active_daemons_limit * threads_per_child; > > if (startup) { > ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00513) >@@ -3437,7 +3453,7 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog, > "multiple of ThreadsPerChild of %d, decreasing to nearest " > "multiple %d, for a maximum of %d servers.", > max_workers, threads_per_child, tmp_max_workers, >- ap_daemons_limit); >+ active_daemons_limit); > } else { > ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00514) > "MaxRequestWorkers of %d is not an integer multiple " >@@ -3448,25 +3464,25 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog, > max_workers = tmp_max_workers; > } > >- if (ap_daemons_limit > server_limit) { >+ if (active_daemons_limit > server_limit) { > if (startup) { > ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00515) > "WARNING: MaxRequestWorkers of %d would require %d servers " > "and would exceed ServerLimit of %d, decreasing to %d. " > "To increase, please see the ServerLimit directive.", >- max_workers, ap_daemons_limit, server_limit, >+ max_workers, active_daemons_limit, server_limit, > server_limit * threads_per_child); > } else { > ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00516) > "MaxRequestWorkers of %d would require %d servers and " > "exceed ServerLimit of %d, decreasing to %d", >- max_workers, ap_daemons_limit, server_limit, >+ max_workers, active_daemons_limit, server_limit, > server_limit * threads_per_child); > } >- ap_daemons_limit = server_limit; >+ active_daemons_limit = server_limit; > } > >- /* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */ >+ /* ap_daemons_to_start > active_daemons_limit checked in ap_mpm_run() */ > if (ap_daemons_to_start < 1) { > if (startup) { > ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00517) >diff --git a/server/mpm/event/fdqueue.c b/server/mpm/event/fdqueue.c >index 343146f..64b318d 100644 >--- a/server/mpm/event/fdqueue.c >+++ b/server/mpm/event/fdqueue.c >@@ -280,6 +280,19 @@ void ap_pop_pool(apr_pool_t ** recycled_pool, fd_queue_info_t * queue_info) > } > } > >+void ap_free_idle_pools(fd_queue_info_t *queue_info) >+{ >+ apr_pool_t *p; >+ >+ queue_info->max_recycled_pools = 0; >+ do { >+ ap_pop_pool(&p, queue_info); >+ if (p != NULL) >+ apr_pool_destroy(p); >+ } while (p != NULL); >+} >+ >+ > apr_status_t ap_queue_info_term(fd_queue_info_t * queue_info) > { > apr_status_t rv; >@@ -477,17 +490,30 @@ apr_status_t ap_queue_pop_something(fd_queue_t * queue, apr_socket_t ** sd, > return rv; > } > >-apr_status_t ap_queue_interrupt_all(fd_queue_t * queue) >+static apr_status_t queue_interrupt(fd_queue_t * queue, int all) > { > apr_status_t rv; > > if ((rv = apr_thread_mutex_lock(queue->one_big_mutex)) != APR_SUCCESS) { > return rv; > } >- apr_thread_cond_broadcast(queue->not_empty); >+ if (all) >+ apr_thread_cond_broadcast(queue->not_empty); >+ else >+ apr_thread_cond_signal(queue->not_empty); > return apr_thread_mutex_unlock(queue->one_big_mutex); > } > >+apr_status_t ap_queue_interrupt_all(fd_queue_t * queue) >+{ >+ return queue_interrupt(queue, 1); >+} >+ >+apr_status_t ap_queue_interrupt_one(fd_queue_t * queue) >+{ >+ return queue_interrupt(queue, 0); >+} >+ > apr_status_t ap_queue_term(fd_queue_t * queue) > { > apr_status_t rv; >diff --git a/server/mpm/event/fdqueue.h b/server/mpm/event/fdqueue.h >index 955816b..37be684 100644 >--- a/server/mpm/event/fdqueue.h >+++ b/server/mpm/event/fdqueue.h >@@ -52,6 +52,7 @@ apr_status_t ap_queue_info_wait_for_idler(fd_queue_info_t * queue_info, > int *had_to_block); > apr_status_t ap_queue_info_term(fd_queue_info_t * queue_info); > apr_uint32_t ap_queue_info_get_idlers(fd_queue_info_t * queue_info); >+void ap_free_idle_pools(fd_queue_info_t *queue_info); > > struct fd_queue_elem_t > { >@@ -98,6 +99,7 @@ apr_status_t ap_queue_pop_something(fd_queue_t * queue, apr_socket_t ** sd, > event_conn_state_t ** ecs, apr_pool_t ** p, > timer_event_t ** te); > apr_status_t ap_queue_interrupt_all(fd_queue_t * queue); >+apr_status_t ap_queue_interrupt_one(fd_queue_t * queue); > apr_status_t ap_queue_term(fd_queue_t * queue); > > #endif /* FDQUEUE_H */ >diff --git a/server/mpm_unix.c b/server/mpm_unix.c >index 1a7f935..926eff4 100644 >--- a/server/mpm_unix.c >+++ b/server/mpm_unix.c >@@ -63,7 +63,13 @@ > #undef APLOG_MODULE_INDEX > #define APLOG_MODULE_INDEX AP_CORE_MODULE_INDEX > >-typedef enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action_t; >+typedef enum { >+ DO_NOTHING, >+ SEND_SIGTERM, >+ SEND_SIGTERM_NOLOG, >+ SEND_SIGKILL, >+ GIVEUP >+} action_t; > > typedef struct extra_process_t { > struct extra_process_t *next; >@@ -142,6 +148,8 @@ static int reclaim_one_pid(pid_t pid, action_t action) > " still did not exit, " > "sending a SIGTERM", > pid); >+ /* FALLTHROUGH */ >+ case SEND_SIGTERM_NOLOG: > kill(pid, SIGTERM); > break; > >@@ -193,6 +201,7 @@ AP_DECLARE(void) ap_reclaim_child_processes(int terminate, > * children but take no action against > * stragglers > */ >+ {SEND_SIGTERM_NOLOG, 0}, /* skipped if terminate == 0 */ > {SEND_SIGTERM, apr_time_from_sec(3)}, > {SEND_SIGTERM, apr_time_from_sec(5)}, > {SEND_SIGTERM, apr_time_from_sec(7)}, >@@ -202,19 +211,21 @@ AP_DECLARE(void) ap_reclaim_child_processes(int terminate, > int cur_action; /* index of action we decided to take this > * iteration > */ >- int next_action = 1; /* index of first real action */ >+ int next_action = terminate ? 1 : 2; /* index of first real action */ > > ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons); > > do { >- apr_sleep(waittime); >- /* don't let waittime get longer than 1 second; otherwise, we don't >- * react quickly to the last child exiting, and taking action can >- * be delayed >- */ >- waittime = waittime * 4; >- if (waittime > apr_time_from_sec(1)) { >- waittime = apr_time_from_sec(1); >+ if (action_table[next_action].action_time > 0) { >+ apr_sleep(waittime); >+ /* don't let waittime get longer than 1 second; otherwise, we don't >+ * react quickly to the last child exiting, and taking action can >+ * be delayed >+ */ >+ waittime = waittime * 4; >+ if (waittime > apr_time_from_sec(1)) { >+ waittime = apr_time_from_sec(1); >+ } > } > > /* see what action to take, if any */ >diff --git a/server/scoreboard.c b/server/scoreboard.c >index 8e3403f..d83a849 100644 >--- a/server/scoreboard.c >+++ b/server/scoreboard.c >@@ -399,7 +399,7 @@ AP_DECLARE(int) ap_find_child_by_pid(apr_proc_t *pid) > int i; > int max_daemons_limit = 0; > >- ap_mpm_query(AP_MPMQ_MAX_DAEMONS, &max_daemons_limit); >+ ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons_limit); > > for (i = 0; i < max_daemons_limit; ++i) { > if (ap_scoreboard_image->parent[i].pid == pid->pid) {
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 53555
:
33154
|
33158
|
33749
|
33750
|
34201
| 34202