--- a/modules/generators/mod_status.c +++ a/modules/generators/mod_status.c @@ -529,7 +529,7 @@ static int status_handler(request_rec *r) if (is_async) { int write_completion = 0, lingering_close = 0, keep_alive = 0, - connections = 0; + connections = 0, stopping = 0, procs = 0; /* * These differ from 'busy' and 'ready' in how gracefully finishing * threads are counted. XXX: How to make this clear in the html? @@ -537,13 +537,15 @@ static int status_handler(request_rec *r) int busy_workers = 0, idle_workers = 0; if (!short_report) ap_rputs("\n\n\n" - "" + "" + "" + "" "\n" "" - "\n" + "\n" "" - "" - "\n", r); + "" + "\n", r); for (i = 0; i < server_limit; ++i) { ps_record = ap_get_scoreboard_process(i); if (ps_record->pid) { @@ -553,26 +555,45 @@ static int status_handler(request_rec *r) lingering_close += ps_record->lingering_close; busy_workers += thread_busy_buffer[i]; idle_workers += thread_idle_buffer[i]; - if (!short_report) - ap_rprintf(r, "" - "" + if (!short_report) { + const char *dying = "no"; + const char *old = ""; + if (ps_record->quiescing) { + dying = "yes"; + stopping++; + } + if (ps_record->generation != mpm_generation) + old = " (old gen)"; + procs++; + ap_rprintf(r, "" + "" + "" + "" "" "\n", - ps_record->pid, ps_record->connections, + i, ps_record->pid, + dying, old, + ps_record->connections, ps_record->not_accepting ? "no" : "yes", - thread_busy_buffer[i], thread_idle_buffer[i], + thread_busy_buffer[i], + thread_idle_buffer[i], ps_record->write_completion, ps_record->keep_alive, ps_record->lingering_close); + } } } if (!short_report) { - ap_rprintf(r, "" - "" + ap_rprintf(r, "" + "" + "" + "" + "" "\n
PID
SlotPIDStoppingConnectionsThreadsAsync connections
Async connections
totalacceptingbusyidlewritingkeep-aliveclosing
busyidlewritingkeep-aliveclosing
%" APR_PID_T_FMT "%u%s%u%u
%u%" APR_PID_T_FMT "%s%s%u%s%u%u%u%u%u
Sum%d %d%d%d%d%d
Sum%d%d%d %d%d%d%d%d
\n", - connections, busy_workers, idle_workers, + procs, stopping, + connections, + busy_workers, idle_workers, write_completion, keep_alive, lingering_close); - } else { ap_rprintf(r, "ConnsTotal: %d\n" @@ -621,7 +642,7 @@ static int status_handler(request_rec *r) "\"G\" Gracefully finishing,
\n" "\"I\" Idle cleanup of worker, \n" "\".\" Open slot with no current process
\n" - "

\n", r); + "

\n", r); if (!ap_extended_status) { int j; int k = 0; --- a/server/mpm/event/event.c +++ a/server/mpm/event/event.c @@ -160,15 +160,18 @@ #endif #define WORKER_FACTOR_SCALE 16 /* scale factor to allow fractional values */ static unsigned int worker_factor = DEFAULT_WORKER_FACTOR * WORKER_FACTOR_SCALE; - -static int threads_per_child = 0; /* Worker threads per child */ -static int ap_daemons_to_start = 0; -static int min_spare_threads = 0; -static int max_spare_threads = 0; -static int ap_daemons_limit = 0; -static int max_workers = 0; -static int server_limit = 0; -static int thread_limit = 0; + /* AsyncRequestWorkerFactor * 16 */ + +static int threads_per_child = 0; /* ThreadsPerChild */ +static int ap_daemons_to_start = 0; /* StartServers */ +static int min_spare_threads = 0; /* MinSpareThreads */ +static int max_spare_threads = 0; /* MaxSpareThreads */ +static int active_daemons_limit = 0; /* MaxRequestWorkers / ThreadsPerChild */ +static int active_daemons = 0; /* workers that still active, i.e. are + not shutting down gracefully */ +static int max_workers = 0; /* MaxRequestWorkers */ +static int server_limit = 0; /* ServerLimit */ +static int thread_limit = 0; /* ThreadLimit */ static int had_healthy_child = 0; static int dying = 0; static int workers_may_exit = 0; @@ -181,6 +184,8 @@ static apr_uint32_t connection_count = 0; /* Number of open connections */ static apr_uint32_t lingering_count = 0; /* Number of connections in lingering close */ static apr_uint32_t suspended_count = 0; /* Number of suspended connections */ static apr_uint32_t clogged_count = 0; /* Number of threads processing ssl conns */ +static apr_uint32_t threads_shutdown = 0; /* Number of threads that have shutdown + early during graceful termination */ static int resource_shortage = 0; static fd_queue_t *worker_queue; static fd_queue_info_t *worker_queue_info; @@ -288,9 +293,8 @@ static apr_pollset_t *event_pollset; /* The structure used to pass unique initialization info to each thread */ typedef struct { - int pid; - int tid; - int sd; + int pslot; /* process slot */ + int tslot; /* worker slot of the thread */ } proc_info; /* Structure used to pass information to the thread responsible for @@ -335,6 +339,14 @@ typedef struct event_retained_data { * scoreboard. */ int max_daemons_limit; + + /* + * All running workers, active and shutting down, including those that + * may be left from before a graceful restart. + * Not kept up-to-date when shutdown is pending. + */ + int total_daemons; + /* * idle_spawn_rate is the number of children that will be spawned on the * next maintenance cycle if there aren't enough idle servers. It is @@ -548,7 +560,7 @@ static int event_query(int query_code, int *result, apr_status_t *rv) *result = ap_max_requests_per_child; break; case AP_MPMQ_MAX_DAEMONS: - *result = ap_daemons_limit; + *result = active_daemons_limit; break; case AP_MPMQ_MPM_STATE: *result = mpm_state; @@ -585,27 +597,6 @@ static void event_note_child_started(int slot, pid_t pid) retained->my_generation, slot, MPM_CHILD_STARTED); } -static void event_note_child_lost_slot(int slot, pid_t newpid) -{ - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00458) - "pid %" APR_PID_T_FMT " taking over scoreboard slot from " - "%" APR_PID_T_FMT "%s", - newpid, - ap_scoreboard_image->parent[slot].pid, - ap_scoreboard_image->parent[slot].quiescing ? - " (quiescing)" : ""); - ap_run_child_status(ap_server_conf, - ap_scoreboard_image->parent[slot].pid, - ap_scoreboard_image->parent[slot].generation, - slot, MPM_CHILD_LOST_SLOT); - /* Don't forget about this exiting child process, or we - * won't be able to kill it if it doesn't exit by the - * time the server is shut down. - */ - ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid, - ap_scoreboard_image->parent[slot].generation); -} - static const char *event_get_name(void) { return "event"; @@ -911,6 +902,8 @@ static int start_lingering_close_nonblocking(event_conn_state_t *cs) || apr_socket_shutdown(csd, APR_SHUTDOWN_WRITE) != APR_SUCCESS) { apr_socket_close(csd); ap_push_pool(worker_queue_info, cs->p); + if (dying) + ap_queue_interrupt_one(worker_queue); return 0; } return start_lingering_close_common(cs, 0); @@ -934,6 +927,8 @@ static int stop_lingering_close(event_conn_state_t *cs) AP_DEBUG_ASSERT(0); } ap_push_pool(worker_queue_info, cs->p); + if (dying) + ap_queue_interrupt_one(worker_queue); return 0; } @@ -1219,6 +1214,9 @@ static void close_listeners(int process_slot, int *closed) } /* wake up the main thread */ kill(ap_my_pid, SIGTERM); + + ap_free_idle_pools(worker_queue_info); + ap_queue_interrupt_all(worker_queue); } } @@ -1439,6 +1437,8 @@ static void process_lingering_close(event_conn_state_t *cs, const apr_pollfd_t * TO_QUEUE_ELEM_INIT(cs); ap_push_pool(worker_queue_info, cs->p); + if (dying) + ap_queue_interrupt_one(worker_queue); } /* call 'func' for all elements of 'q' with timeout less than 'timeout_time'. @@ -1518,7 +1518,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) timer_event_t *te; apr_status_t rc; proc_info *ti = dummy; - int process_slot = ti->pid; + int process_slot = ti->pslot; apr_pool_t *tpool = apr_thread_pool_get(thd); void *csd = NULL; apr_pool_t *ptrans; /* Pool for per-transaction stuff */ @@ -1584,6 +1584,12 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) *keepalive_q->total, apr_atomic_read32(&lingering_count), apr_atomic_read32(&suspended_count)); + if (dying) { + ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf, + "%u/%u workers shutdown", + apr_atomic_read32(&threads_shutdown), + threads_per_child); + } apr_thread_mutex_unlock(timeout_mutex); } } @@ -1818,11 +1824,12 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) /* If all workers are busy, we kill older keep-alive connections so that they * may connect to another process. */ - if (workers_were_busy && *keepalive_q->total) { - ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, - "All workers are busy, will close %d keep-alive " - "connections", - *keepalive_q->total); + if ((workers_were_busy || dying) && *keepalive_q->total) { + if (!dying) + ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, + "All workers are busy, will close %d keep-alive " + "connections", + *keepalive_q->total); process_timeout_queue(keepalive_q, 0, start_lingering_close_nonblocking); } @@ -1869,6 +1876,34 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) return NULL; } +/* + * During graceful shutdown, if there are more running worker threads than + * open connections, exit one worker thread. + * + * return 1 if thread should exit, 0 if it should continue running. + */ +static int worker_thread_should_exit_early(void) +{ + for (;;) { + apr_uint32_t conns = apr_atomic_read32(&connection_count); + apr_uint32_t dead = apr_atomic_read32(&threads_shutdown); + apr_uint32_t newdead; + + AP_DEBUG_ASSERT(dead <= threads_per_child); + if (conns >= threads_per_child - dead) + return 0; + + newdead = dead + 1; + if (apr_atomic_cas32(&threads_shutdown, newdead, dead) == dead) { + /* + * No other thread has exited in the mean time, safe to exit + * this one. + */ + return 1; + } + } +} + /* XXX For ungraceful termination/restart, we definitely don't want to * wait for active connections to finish but we may want to wait * for idle workers to get out of the queue code and release mutexes, @@ -1879,8 +1914,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy) { proc_info *ti = dummy; - int process_slot = ti->pid; - int thread_slot = ti->tid; + int process_slot = ti->pslot; + int thread_slot = ti->tslot; apr_socket_t *csd = NULL; event_conn_state_t *cs; apr_pool_t *ptrans; /* Pool for per-transaction stuff */ @@ -1916,6 +1951,9 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy) if (workers_may_exit) { break; } + if (dying && worker_thread_should_exit_early()) { + break; + } te = NULL; rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te); @@ -1993,9 +2031,8 @@ static void create_listener_thread(thread_starter * ts) apr_status_t rv; my_info = (proc_info *) ap_malloc(sizeof(proc_info)); - my_info->pid = my_child_num; - my_info->tid = -1; /* listener thread doesn't have a thread slot */ - my_info->sd = 0; + my_info->pslot = my_child_num; + my_info->tslot = -1; /* listener thread doesn't have a thread slot */ rv = apr_thread_create(&ts->listener, thread_attr, listener_thread, my_info, pchild); if (rv != APR_SUCCESS) { @@ -2103,14 +2140,13 @@ static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy) int status = ap_scoreboard_image->servers[my_child_num][i].status; - if (status != SERVER_GRACEFUL && status != SERVER_DEAD) { + if (status != SERVER_DEAD) { continue; } my_info = (proc_info *) ap_malloc(sizeof(proc_info)); - my_info->pid = my_child_num; - my_info->tid = i; - my_info->sd = 0; + my_info->pslot = my_child_num; + my_info->tslot = i; /* We are creating threads right now */ ap_update_child_status_from_indexes(my_child_num, i, @@ -2411,6 +2447,15 @@ static int make_child(server_rec * s, int slot, int bucket) retained->max_daemons_limit = slot + 1; } + if (ap_scoreboard_image->parent[slot].pid != 0) { + /* XXX replace with assert or remove ? */ + ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(03455) + "BUG: Scoreboard slot %d should be empty but is " + "in use by pid %" APR_PID_T_FMT, + slot, ap_scoreboard_image->parent[slot].pid); + return -1; + } + if (one_process) { my_bucket = &all_buckets[0]; @@ -2464,17 +2509,12 @@ static int make_child(server_rec * s, int slot, int bucket) return -1; } - if (ap_scoreboard_image->parent[slot].pid != 0) { - /* This new child process is squatting on the scoreboard - * entry owned by an exiting child process, which cannot - * exit until all active requests complete. - */ - event_note_child_lost_slot(slot, pid); - } ap_scoreboard_image->parent[slot].quiescing = 0; ap_scoreboard_image->parent[slot].not_accepting = 0; ap_scoreboard_image->parent[slot].bucket = bucket; event_note_child_started(slot, pid); + active_daemons++; + retained->total_daemons++; return 0; } @@ -2483,7 +2523,7 @@ static void startup_children(int number_to_start) { int i; - for (i = 0; number_to_start && i < ap_daemons_limit; ++i) { + for (i = 0; number_to_start && i < server_limit; ++i) { if (ap_scoreboard_image->parent[i].pid != 0) { continue; } @@ -2497,34 +2537,22 @@ static void startup_children(int number_to_start) static void perform_idle_server_maintenance(int child_bucket, int num_buckets) { int i, j; - int idle_thread_count; + int idle_thread_count = 0; worker_score *ws; process_score *ps; - int free_length; - int totally_free_length = 0; + int free_length = 0; int free_slots[MAX_SPAWN_RATE]; - int last_non_dead; - int total_non_dead; + int last_non_dead = -1; int active_thread_count = 0; - /* initialize the free_list */ - free_length = 0; - - idle_thread_count = 0; - last_non_dead = -1; - total_non_dead = 0; - - for (i = 0; i < ap_daemons_limit; ++i) { + for (i = 0; i < server_limit; ++i) { /* Initialization to satisfy the compiler. It doesn't know * that threads_per_child is always > 0 */ int status = SERVER_DEAD; - int any_dying_threads = 0; - int any_dead_threads = 0; - int all_dead_threads = 1; int child_threads_active = 0; if (i >= retained->max_daemons_limit && - totally_free_length == retained->idle_spawn_rate[child_bucket]) { + free_length == retained->idle_spawn_rate[child_bucket]) { /* short cut if all active processes have been examined and * enough empty scoreboard slots have been found */ @@ -2532,25 +2560,17 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) break; } ps = &ap_scoreboard_image->parent[i]; - for (j = 0; j < threads_per_child; j++) { - ws = &ap_scoreboard_image->servers[i][j]; - status = ws->status; - - /* XXX any_dying_threads is probably no longer needed GLA */ - any_dying_threads = any_dying_threads || - (status == SERVER_GRACEFUL); - any_dead_threads = any_dead_threads || (status == SERVER_DEAD); - all_dead_threads = all_dead_threads && - (status == SERVER_DEAD || status == SERVER_GRACEFUL); - - /* We consider a starting server as idle because we started it - * at least a cycle ago, and if it still hasn't finished starting - * then we're just going to swamp things worse by forking more. - * So we hopefully won't need to fork more if we count it. - * This depends on the ordering of SERVER_READY and SERVER_STARTING. - */ - if (ps->pid != 0) { /* XXX just set all_dead_threads in outer - for loop if no pid? not much else matters */ + if (ps->pid != 0) { + for (j = 0; j < threads_per_child; j++) { + ws = &ap_scoreboard_image->servers[i][j]; + status = ws->status; + + /* We consider a starting server as idle because we started it + * at least a cycle ago, and if it still hasn't finished starting + * then we're just going to swamp things worse by forking more. + * So we hopefully won't need to fork more if we count it. + * This depends on the ordering of SERVER_READY and SERVER_STARTING. + */ if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting && ps->generation == retained->my_generation && ps->bucket == child_bucket) @@ -2561,39 +2581,13 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) ++child_threads_active; } } + last_non_dead = i; } active_thread_count += child_threads_active; - if (any_dead_threads - && totally_free_length < retained->idle_spawn_rate[child_bucket] - && free_length < MAX_SPAWN_RATE / num_buckets - && (!ps->pid /* no process in the slot */ - || ps->quiescing)) { /* or at least one is going away */ - if (all_dead_threads) { - /* great! we prefer these, because the new process can - * start more threads sooner. So prioritize this slot - * by putting it ahead of any slots with active threads. - * - * first, make room by moving a slot that's potentially still - * in use to the end of the array - */ - free_slots[free_length] = free_slots[totally_free_length]; - free_slots[totally_free_length++] = i; - } - else { - /* slot is still in use - back of the bus - */ - free_slots[free_length] = i; - } - ++free_length; - } - else if (child_threads_active == threads_per_child) { + if (!ps->pid && free_length < retained->idle_spawn_rate[child_bucket]) + free_slots[free_length++] = i; + else if (child_threads_active == threads_per_child) had_healthy_child = 1; - } - /* XXX if (!ps->quiescing) is probably more reliable GLA */ - if (!any_dying_threads) { - last_non_dead = i; - ++total_non_dead; - } } if (retained->sick_child_detected) { @@ -2621,32 +2615,56 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) retained->max_daemons_limit = last_non_dead + 1; - if (idle_thread_count > max_spare_threads / num_buckets) { - /* Kill off one child */ - ap_mpm_podx_signal(all_buckets[child_bucket].pod, - AP_MPM_PODX_GRACEFUL); - retained->idle_spawn_rate[child_bucket] = 1; + if (idle_thread_count > max_spare_threads / num_buckets) + { + /* + * Child processes that we ask to shut down won't die immediately + * but may stay around for a long time when they finish their + * requests. If the server load changes many times, many such + * gracefully finishing processes may accumulate, filling up the + * scoreboard. To avoid running out of scoreboard entries, we + * don't shut down more processes when the total number of processes + * is high. + * + * XXX It would be nice if we could + * XXX - kill processes without keepalive connections first + * XXX - tell children to stop accepting new connections, and + * XXX depending on server load, later be able to resurrect them + * or kill them + */ + if (retained->total_daemons <= active_daemons_limit && + retained->total_daemons < server_limit) { + /* Kill off one child */ + ap_mpm_podx_signal(all_buckets[child_bucket].pod, + AP_MPM_PODX_GRACEFUL); + retained->idle_spawn_rate[child_bucket] = 1; + active_daemons--; + } else { + ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf, + "Not shutting down child: total daemons %d / " + "active limit %d / ServerLimit %d", + retained->total_daemons, active_daemons_limit, + server_limit); + } } else if (idle_thread_count < min_spare_threads / num_buckets) { - /* terminate the free list */ - if (free_length == 0) { /* scoreboard is full, can't fork */ - - if (active_thread_count >= ap_daemons_limit * threads_per_child) { - if (!retained->maxclients_reported) { - /* only report this condition once */ - ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484) - "server reached MaxRequestWorkers setting, " - "consider raising the MaxRequestWorkers " - "setting"); - retained->maxclients_reported = 1; - } - } - else { - ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00485) - "scoreboard is full, not at MaxRequestWorkers"); + if (active_thread_count >= max_workers) { + if (!retained->maxclients_reported) { + /* only report this condition once */ + ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484) + "server reached MaxRequestWorkers setting, " + "consider raising the MaxRequestWorkers " + "setting"); + retained->maxclients_reported = 1; } retained->idle_spawn_rate[child_bucket] = 1; } + else if (free_length == 0) { /* scoreboard is full, can't fork */ + ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO() + "scoreboard is full, not at MaxRequestWorkers." + "Increase ServerLimit."); + retained->idle_spawn_rate[child_bucket] = 1; + } else { if (free_length > retained->idle_spawn_rate[child_bucket]) { free_length = retained->idle_spawn_rate[child_bucket]; @@ -2657,10 +2675,17 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) "to increase StartServers, ThreadsPerChild " "or Min/MaxSpareThreads), " "spawning %d children, there are around %d idle " - "threads, and %d total children", free_length, - idle_thread_count, total_non_dead); + "threads, %d active children, and %d children " + "that are shutting down", free_length, + idle_thread_count, active_daemons, + retained->total_daemons); } for (i = 0; i < free_length; ++i) { + ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf, + "Spawning new child: slot %d active / " + "total daemons: %d/%d", + free_slots[i], active_daemons, + retained->total_daemons); make_child(ap_server_conf, free_slots[i], child_bucket); } /* the next time around we want to spawn twice as many if this @@ -2682,7 +2707,6 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) static void server_main_loop(int remaining_children_to_start, int num_buckets) { - ap_generation_t old_gen; int child_slot; apr_exit_why_e exitwhy; int status, processed_status; @@ -2732,13 +2756,15 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets) event_note_child_killed(child_slot, 0, 0); ps = &ap_scoreboard_image->parent[child_slot]; + if (!ps->quiescing) + active_daemons--; ps->quiescing = 0; + retained->total_daemons--; if (processed_status == APEXIT_CHILDSICK) { /* resource shortage, minimize the fork rate */ retained->idle_spawn_rate[ps->bucket] = 1; } - else if (remaining_children_to_start - && child_slot < ap_daemons_limit) { + else if (remaining_children_to_start) { /* we're still doing a 1-for-1 replacement of dead * children with new children */ @@ -2746,24 +2772,12 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets) --remaining_children_to_start; } } - else if (ap_unregister_extra_mpm_process(pid.pid, &old_gen) == 1) { - - event_note_child_killed(-1, /* already out of the scoreboard */ - pid.pid, old_gen); - if (processed_status == APEXIT_CHILDSICK - && old_gen == retained->my_generation) { - /* resource shortage, minimize the fork rate */ - for (i = 0; i < num_buckets; i++) { - retained->idle_spawn_rate[i] = 1; - } - } #if APR_HAS_OTHER_CHILD - } else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH, status) == 0) { /* handled */ -#endif } +#endif else if (retained->is_graceful) { /* Great, we've probably just lost a slot in the * scoreboard. Somehow we don't know about this child. @@ -2825,8 +2839,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) /* Don't thrash since num_buckets depends on the * system and the number of online CPU cores... */ - if (ap_daemons_limit < num_buckets) - ap_daemons_limit = num_buckets; + if (active_daemons_limit < num_buckets) + active_daemons_limit = num_buckets; if (ap_daemons_to_start < num_buckets) ap_daemons_to_start = num_buckets; /* We want to create as much children at a time as the number of buckets, @@ -2850,8 +2864,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) * supposed to start up without the 1 second penalty between each fork. */ remaining_children_to_start = ap_daemons_to_start; - if (remaining_children_to_start > ap_daemons_limit) { - remaining_children_to_start = ap_daemons_limit; + if (remaining_children_to_start > active_daemons_limit) { + remaining_children_to_start = active_daemons_limit; } if (!retained->is_graceful) { startup_children(remaining_children_to_start); @@ -2881,7 +2895,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) * Kill child processes, tell them to call child_exit, etc... */ for (i = 0; i < num_buckets; i++) { - ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, AP_MPM_PODX_RESTART); } ap_reclaim_child_processes(1, /* Start with SIGTERM */ @@ -2905,7 +2919,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) /* Close our listeners, and then ask our children to do same */ ap_close_listeners(); for (i = 0; i < num_buckets; i++) { - ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, AP_MPM_PODX_GRACEFUL); } ap_relieve_child_processes(event_note_child_killed); @@ -2933,7 +2947,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) ap_relieve_child_processes(event_note_child_killed); active_children = 0; - for (index = 0; index < ap_daemons_limit; ++index) { + for (index = 0; index < retained->max_daemons_limit; ++index) { if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) { active_children = 1; /* Having just one child is enough to stay around */ @@ -2948,7 +2962,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) * really dead. */ for (i = 0; i < num_buckets; i++) { - ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, AP_MPM_PODX_RESTART); } ap_reclaim_child_processes(1, event_note_child_killed); @@ -2977,7 +2991,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) " received. Doing graceful restart"); /* wake up the children...time to die. But we'll have more soon */ for (i = 0; i < num_buckets; i++) { - ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, AP_MPM_PODX_GRACEFUL); } @@ -2992,7 +3006,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) * pthreads are stealing signals from us left and right. */ for (i = 0; i < num_buckets; i++) { - ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, AP_MPM_PODX_RESTART); } @@ -3002,6 +3016,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) "SIGHUP received. Attempting to restart"); } + active_daemons = 0; + return OK; } @@ -3215,9 +3231,9 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog, max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD; server_limit = DEFAULT_SERVER_LIMIT; thread_limit = DEFAULT_THREAD_LIMIT; - ap_daemons_limit = server_limit; + active_daemons_limit = server_limit; threads_per_child = DEFAULT_THREADS_PER_CHILD; - max_workers = ap_daemons_limit * threads_per_child; + max_workers = active_daemons_limit * threads_per_child; had_healthy_child = 0; ap_extended_status = 0; @@ -3426,10 +3442,10 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog, max_workers = threads_per_child; } - ap_daemons_limit = max_workers / threads_per_child; + active_daemons_limit = max_workers / threads_per_child; if (max_workers % threads_per_child) { - int tmp_max_workers = ap_daemons_limit * threads_per_child; + int tmp_max_workers = active_daemons_limit * threads_per_child; if (startup) { ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00513) @@ -3437,7 +3453,7 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog, "multiple of ThreadsPerChild of %d, decreasing to nearest " "multiple %d, for a maximum of %d servers.", max_workers, threads_per_child, tmp_max_workers, - ap_daemons_limit); + active_daemons_limit); } else { ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00514) "MaxRequestWorkers of %d is not an integer multiple " @@ -3448,25 +3464,25 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog, max_workers = tmp_max_workers; } - if (ap_daemons_limit > server_limit) { + if (active_daemons_limit > server_limit) { if (startup) { ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00515) "WARNING: MaxRequestWorkers of %d would require %d servers " "and would exceed ServerLimit of %d, decreasing to %d. " "To increase, please see the ServerLimit directive.", - max_workers, ap_daemons_limit, server_limit, + max_workers, active_daemons_limit, server_limit, server_limit * threads_per_child); } else { ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00516) "MaxRequestWorkers of %d would require %d servers and " "exceed ServerLimit of %d, decreasing to %d", - max_workers, ap_daemons_limit, server_limit, + max_workers, active_daemons_limit, server_limit, server_limit * threads_per_child); } - ap_daemons_limit = server_limit; + active_daemons_limit = server_limit; } - /* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */ + /* ap_daemons_to_start > active_daemons_limit checked in ap_mpm_run() */ if (ap_daemons_to_start < 1) { if (startup) { ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00517) --- a/server/mpm/event/fdqueue.c +++ a/server/mpm/event/fdqueue.c @@ -280,6 +280,19 @@ void ap_pop_pool(apr_pool_t ** recycled_pool, fd_queue_info_t * queue_info) } } +void ap_free_idle_pools(fd_queue_info_t *queue_info) +{ + apr_pool_t *p; + + queue_info->max_recycled_pools = 0; + do { + ap_pop_pool(&p, queue_info); + if (p != NULL) + apr_pool_destroy(p); + } while (p != NULL); +} + + apr_status_t ap_queue_info_term(fd_queue_info_t * queue_info) { apr_status_t rv; @@ -477,17 +490,30 @@ apr_status_t ap_queue_pop_something(fd_queue_t * queue, apr_socket_t ** sd, return rv; } -apr_status_t ap_queue_interrupt_all(fd_queue_t * queue) +static apr_status_t queue_interrupt(fd_queue_t * queue, int all) { apr_status_t rv; if ((rv = apr_thread_mutex_lock(queue->one_big_mutex)) != APR_SUCCESS) { return rv; } - apr_thread_cond_broadcast(queue->not_empty); + if (all) + apr_thread_cond_broadcast(queue->not_empty); + else + apr_thread_cond_signal(queue->not_empty); return apr_thread_mutex_unlock(queue->one_big_mutex); } +apr_status_t ap_queue_interrupt_all(fd_queue_t * queue) +{ + return queue_interrupt(queue, 1); +} + +apr_status_t ap_queue_interrupt_one(fd_queue_t * queue) +{ + return queue_interrupt(queue, 0); +} + apr_status_t ap_queue_term(fd_queue_t * queue) { apr_status_t rv; --- a/server/mpm/event/fdqueue.h +++ a/server/mpm/event/fdqueue.h @@ -52,6 +52,7 @@ apr_status_t ap_queue_info_wait_for_idler(fd_queue_info_t * queue_info, int *had_to_block); apr_status_t ap_queue_info_term(fd_queue_info_t * queue_info); apr_uint32_t ap_queue_info_get_idlers(fd_queue_info_t * queue_info); +void ap_free_idle_pools(fd_queue_info_t *queue_info); struct fd_queue_elem_t { @@ -98,6 +99,7 @@ apr_status_t ap_queue_pop_something(fd_queue_t * queue, apr_socket_t ** sd, event_conn_state_t ** ecs, apr_pool_t ** p, timer_event_t ** te); apr_status_t ap_queue_interrupt_all(fd_queue_t * queue); +apr_status_t ap_queue_interrupt_one(fd_queue_t * queue); apr_status_t ap_queue_term(fd_queue_t * queue); #endif /* FDQUEUE_H */ --- a/server/mpm_unix.c +++ a/server/mpm_unix.c @@ -63,7 +63,13 @@ #undef APLOG_MODULE_INDEX #define APLOG_MODULE_INDEX AP_CORE_MODULE_INDEX -typedef enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action_t; +typedef enum { + DO_NOTHING, + SEND_SIGTERM, + SEND_SIGTERM_NOLOG, + SEND_SIGKILL, + GIVEUP +} action_t; typedef struct extra_process_t { struct extra_process_t *next; @@ -142,6 +148,8 @@ static int reclaim_one_pid(pid_t pid, action_t action) " still did not exit, " "sending a SIGTERM", pid); + /* FALLTHROUGH */ + case SEND_SIGTERM_NOLOG: kill(pid, SIGTERM); break; @@ -193,6 +201,7 @@ AP_DECLARE(void) ap_reclaim_child_processes(int terminate, * children but take no action against * stragglers */ + {SEND_SIGTERM_NOLOG, 0}, /* skipped if terminate == 0 */ {SEND_SIGTERM, apr_time_from_sec(3)}, {SEND_SIGTERM, apr_time_from_sec(5)}, {SEND_SIGTERM, apr_time_from_sec(7)}, @@ -202,19 +211,21 @@ AP_DECLARE(void) ap_reclaim_child_processes(int terminate, int cur_action; /* index of action we decided to take this * iteration */ - int next_action = 1; /* index of first real action */ + int next_action = terminate ? 1 : 2; /* index of first real action */ ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons); do { - apr_sleep(waittime); - /* don't let waittime get longer than 1 second; otherwise, we don't - * react quickly to the last child exiting, and taking action can - * be delayed - */ - waittime = waittime * 4; - if (waittime > apr_time_from_sec(1)) { - waittime = apr_time_from_sec(1); + if (action_table[next_action].action_time > 0) { + apr_sleep(waittime); + /* don't let waittime get longer than 1 second; otherwise, we don't + * react quickly to the last child exiting, and taking action can + * be delayed + */ + waittime = waittime * 4; + if (waittime > apr_time_from_sec(1)) { + waittime = apr_time_from_sec(1); + } } /* see what action to take, if any */ --- a/server/scoreboard.c +++ a/server/scoreboard.c @@ -399,7 +399,7 @@ AP_DECLARE(int) ap_find_child_by_pid(apr_proc_t *pid) int i; int max_daemons_limit = 0; - ap_mpm_query(AP_MPMQ_MAX_DAEMONS, &max_daemons_limit); + ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons_limit); for (i = 0; i < max_daemons_limit; ++i) { if (ap_scoreboard_image->parent[i].pid == pid->pid) {