--- server/mpm/event/event.c (revision 1738637) +++ server/mpm/event/event.c (working copy) @@ -160,15 +160,17 @@ #endif #define WORKER_FACTOR_SCALE 16 /* scale factor to allow fractional values */ static unsigned int worker_factor = DEFAULT_WORKER_FACTOR * WORKER_FACTOR_SCALE; + /* AsyncRequestWorkerFactor * 16 */ -static int threads_per_child = 0; /* Worker threads per child */ -static int ap_daemons_to_start = 0; -static int min_spare_threads = 0; -static int max_spare_threads = 0; -static int ap_daemons_limit = 0; -static int max_workers = 0; -static int server_limit = 0; -static int thread_limit = 0; +static int threads_per_child = 0; /* ThreadsPerChild */ +static int ap_daemons_to_start = 0; /* StartServers */ +static int min_spare_threads = 0; /* MinSpareThreads */ +static int max_spare_threads = 0; /* MaxSpareThreads */ +static int ap_daemons_limit = 0; /* MaxRequestWorkers / ThreadsPerChild */ +static int max_workers = 0; /* MaxRequestWorkers */ +static int server_limit = 0; /* ServerLimit */ +static int thread_limit = 0; /* ThreadLimit */ +static int sb_max_daemon_idx = 0; static int had_healthy_child = 0; static int dying = 0; static int workers_may_exit = 0; @@ -181,6 +183,8 @@ static apr_uint32_t lingering_count = 0; /* Number of connections in lingering close */ static apr_uint32_t suspended_count = 0; /* Number of suspended connections */ static apr_uint32_t clogged_count = 0; /* Number of threads processing ssl conns */ +static apr_uint32_t threads_shutdown = 0; /* Number of threads that have shutdown + early during graceful termination */ static int resource_shortage = 0; static fd_queue_t *worker_queue; static fd_queue_info_t *worker_queue_info; @@ -288,9 +292,8 @@ /* The structure used to pass unique initialization info to each thread */ typedef struct { - int pid; - int tid; - int sd; + int pslot; /* process slot */ + int tslot; /* worker slot of the thread */ } proc_info; /* Structure used to pass information to the thread responsible for @@ -911,6 +914,8 @@ || apr_socket_shutdown(csd, APR_SHUTDOWN_WRITE) != APR_SUCCESS) { apr_socket_close(csd); ap_push_pool(worker_queue_info, cs->p); + if (dying) + ap_queue_interrupt_one(worker_queue); return 0; } return start_lingering_close_common(cs, 0); @@ -934,6 +939,8 @@ AP_DEBUG_ASSERT(0); } ap_push_pool(worker_queue_info, cs->p); + if (dying) + ap_queue_interrupt_one(worker_queue); return 0; } @@ -1208,6 +1215,7 @@ { if (!*closed) { int i; + worker_score *ws; disable_listensocks(process_slot); ap_close_listeners_ex(my_bucket->listeners); *closed = 1; @@ -1214,11 +1222,19 @@ dying = 1; ap_scoreboard_image->parent[process_slot].quiescing = 1; for (i = 0; i < threads_per_child; ++i) { + ws = ap_get_scoreboard_worker_from_indexes(process_slot, i); + if (ws->pid != ap_my_pid) { + /* scoreboard slot still in use by previous generation */ + continue; + } ap_update_child_status_from_indexes(process_slot, i, SERVER_GRACEFUL, NULL); } /* wake up the main thread */ kill(ap_my_pid, SIGTERM); + + ap_free_idle_pools(worker_queue_info); + ap_queue_interrupt_all(worker_queue); } } @@ -1439,6 +1455,8 @@ TO_QUEUE_ELEM_INIT(cs); ap_push_pool(worker_queue_info, cs->p); + if (dying) + ap_queue_interrupt_one(worker_queue); } /* call 'func' for all elements of 'q' with timeout less than 'timeout_time'. @@ -1518,7 +1536,7 @@ timer_event_t *te; apr_status_t rc; proc_info *ti = dummy; - int process_slot = ti->pid; + int process_slot = ti->pslot; apr_pool_t *tpool = apr_thread_pool_get(thd); void *csd = NULL; apr_pool_t *ptrans; /* Pool for per-transaction stuff */ @@ -1584,6 +1602,12 @@ *keepalive_q->total, apr_atomic_read32(&lingering_count), apr_atomic_read32(&suspended_count)); + if (dying) { + ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf, + "%u/%u workers shutdown", + apr_atomic_read32(&threads_shutdown), + threads_per_child); + } apr_thread_mutex_unlock(timeout_mutex); } } @@ -1818,11 +1842,12 @@ /* If all workers are busy, we kill older keep-alive connections so that they * may connect to another process. */ - if (workers_were_busy && *keepalive_q->total) { - ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, - "All workers are busy, will close %d keep-alive " - "connections", - *keepalive_q->total); + if ((workers_were_busy || dying) && *keepalive_q->total) { + if (!dying) + ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, + "All workers are busy, will close %d keep-alive " + "connections", + *keepalive_q->total); process_timeout_queue(keepalive_q, 0, start_lingering_close_nonblocking); } @@ -1869,6 +1894,34 @@ return NULL; } +/* + * During graceful shutdown, if there are more running worker threads than + * open connections, exit one worker thread. + * + * return 1 if thread should exit, 0 if it should continue running. + */ +static int worker_thread_should_exit_early(void) +{ + for (;;) { + apr_uint32_t conns = apr_atomic_read32(&connection_count); + apr_uint32_t dead = apr_atomic_read32(&threads_shutdown); + apr_uint32_t newdead; + + AP_DEBUG_ASSERT(dead <= threads_per_child); + if (conns >= threads_per_child - dead) + return 0; + + newdead = dead + 1; + if (apr_atomic_cas32(&threads_shutdown, newdead, dead) == dead) { + /* + * No other thread has exited in the mean time, safe to exit + * this one. + */ + return 1; + } + } +} + /* XXX For ungraceful termination/restart, we definitely don't want to * wait for active connections to finish but we may want to wait * for idle workers to get out of the queue code and release mutexes, @@ -1879,8 +1932,8 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy) { proc_info *ti = dummy; - int process_slot = ti->pid; - int thread_slot = ti->tid; + int process_slot = ti->pslot; + int thread_slot = ti->tslot; apr_socket_t *csd = NULL; event_conn_state_t *cs; apr_pool_t *ptrans; /* Pool for per-transaction stuff */ @@ -1915,6 +1968,9 @@ if (workers_may_exit) { break; } + if (dying && worker_thread_should_exit_early()) { + break; + } te = NULL; rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te); @@ -1993,9 +2049,8 @@ apr_status_t rv; my_info = (proc_info *) ap_malloc(sizeof(proc_info)); - my_info->pid = my_child_num; - my_info->tid = -1; /* listener thread doesn't have a thread slot */ - my_info->sd = 0; + my_info->pslot = my_child_num; + my_info->tslot = -1; /* listener thread doesn't have a thread slot */ rv = apr_thread_create(&ts->listener, thread_attr, listener_thread, my_info, pchild); if (rv != APR_SUCCESS) { @@ -2104,14 +2159,13 @@ int status = ap_scoreboard_image->servers[child_num_arg][i].status; - if (status != SERVER_GRACEFUL && status != SERVER_DEAD) { + if (status != SERVER_DEAD) { continue; } my_info = (proc_info *) ap_malloc(sizeof(proc_info)); - my_info->pid = my_child_num; - my_info->tid = i; - my_info->sd = 0; + my_info->pslot = my_child_num; + my_info->tslot = i; /* We are creating threads right now */ ap_update_child_status_from_indexes(my_child_num, i, @@ -2484,7 +2538,7 @@ { int i; - for (i = 0; number_to_start && i < ap_daemons_limit; ++i) { + for (i = 0; number_to_start && i < sb_max_daemon_idx; ++i) { if (ap_scoreboard_image->parent[i].pid != 0) { continue; } @@ -2515,7 +2569,7 @@ last_non_dead = -1; total_non_dead = 0; - for (i = 0; i < ap_daemons_limit; ++i) { + for (i = 0; i < sb_max_daemon_idx; ++i) { /* Initialization to satisfy the compiler. It doesn't know * that threads_per_child is always > 0 */ int status = SERVER_DEAD; @@ -2739,7 +2793,7 @@ retained->idle_spawn_rate[ps->bucket] = 1; } else if (remaining_children_to_start - && child_slot < ap_daemons_limit) { + && child_slot < sb_max_daemon_idx) { /* we're still doing a 1-for-1 replacement of dead * children with new children */ @@ -2835,6 +2889,10 @@ if (max_spare_threads < min_spare_threads + threads_per_child * num_buckets) max_spare_threads = min_spare_threads + threads_per_child * num_buckets; + sb_max_daemon_idx = 10 * ap_daemons_limit; + if (sb_max_daemon_idx > server_limit) { + sb_max_daemon_idx = server_limit; + } /* If we're doing a graceful_restart then we're going to see a lot * of children exiting immediately when we get into the main loop * below (because we just sent them AP_SIG_GRACEFUL). This happens pretty @@ -2876,7 +2934,7 @@ * Kill child processes, tell them to call child_exit, etc... */ for (i = 0; i < num_buckets; i++) { - ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + ap_mpm_podx_killpg(all_buckets[i].pod, sb_max_daemon_idx, AP_MPM_PODX_RESTART); } ap_reclaim_child_processes(1, /* Start with SIGTERM */ @@ -2900,7 +2958,7 @@ /* Close our listeners, and then ask our children to do same */ ap_close_listeners(); for (i = 0; i < num_buckets; i++) { - ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + ap_mpm_podx_killpg(all_buckets[i].pod, sb_max_daemon_idx, AP_MPM_PODX_GRACEFUL); } ap_relieve_child_processes(event_note_child_killed); @@ -2928,7 +2986,7 @@ ap_relieve_child_processes(event_note_child_killed); active_children = 0; - for (index = 0; index < ap_daemons_limit; ++index) { + for (index = 0; index < sb_max_daemon_idx; ++index) { if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) { active_children = 1; /* Having just one child is enough to stay around */ @@ -2943,7 +3001,7 @@ * really dead. */ for (i = 0; i < num_buckets; i++) { - ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + ap_mpm_podx_killpg(all_buckets[i].pod, sb_max_daemon_idx, AP_MPM_PODX_RESTART); } ap_reclaim_child_processes(1, event_note_child_killed); @@ -2972,7 +3030,7 @@ " received. Doing graceful restart"); /* wake up the children...time to die. But we'll have more soon */ for (i = 0; i < num_buckets; i++) { - ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + ap_mpm_podx_killpg(all_buckets[i].pod, sb_max_daemon_idx, AP_MPM_PODX_GRACEFUL); } @@ -2987,7 +3045,7 @@ * pthreads are stealing signals from us left and right. */ for (i = 0; i < num_buckets; i++) { - ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, + ap_mpm_podx_killpg(all_buckets[i].pod, sb_max_daemon_idx, AP_MPM_PODX_RESTART); } @@ -3211,6 +3269,7 @@ server_limit = DEFAULT_SERVER_LIMIT; thread_limit = DEFAULT_THREAD_LIMIT; ap_daemons_limit = server_limit; + sb_max_daemon_idx = server_limit; threads_per_child = DEFAULT_THREADS_PER_CHILD; max_workers = ap_daemons_limit * threads_per_child; had_healthy_child = 0; --- server/mpm/event/fdqueue.c (revision 1738637) +++ server/mpm/event/fdqueue.c (working copy) @@ -280,6 +280,19 @@ } } +void ap_free_idle_pools(fd_queue_info_t *queue_info) +{ + apr_pool_t *p; + + queue_info->max_recycled_pools = 0; + do { + ap_pop_pool(&p, queue_info); + if (p != NULL) + apr_pool_destroy(p); + } while (p != NULL); +} + + apr_status_t ap_queue_info_term(fd_queue_info_t * queue_info) { apr_status_t rv; @@ -477,7 +490,7 @@ return rv; } -apr_status_t ap_queue_interrupt_all(fd_queue_t * queue) +static apr_status_t queue_interrupt(fd_queue_t *queue, int all, int term) { apr_status_t rv; @@ -484,24 +497,31 @@ if ((rv = apr_thread_mutex_lock(queue->one_big_mutex)) != APR_SUCCESS) { return rv; } - apr_thread_cond_broadcast(queue->not_empty); - return apr_thread_mutex_unlock(queue->one_big_mutex); -} - -apr_status_t ap_queue_term(fd_queue_t * queue) -{ - apr_status_t rv; - - if ((rv = apr_thread_mutex_lock(queue->one_big_mutex)) != APR_SUCCESS) { - return rv; - } /* we must hold one_big_mutex when setting this... otherwise, * we could end up setting it and waking everybody up just after a * would-be popper checks it but right before they block */ - queue->terminated = 1; - if ((rv = apr_thread_mutex_unlock(queue->one_big_mutex)) != APR_SUCCESS) { - return rv; + if (term) { + queue->terminated = 1; } - return ap_queue_interrupt_all(queue); + if (all) + apr_thread_cond_broadcast(queue->not_empty); + else + apr_thread_cond_signal(queue->not_empty); + return apr_thread_mutex_unlock(queue->one_big_mutex); } + +apr_status_t ap_queue_interrupt_all(fd_queue_t * queue) +{ + return queue_interrupt(queue, 1, 0); +} + +apr_status_t ap_queue_interrupt_one(fd_queue_t * queue) +{ + return queue_interrupt(queue, 0, 0); +} + +apr_status_t ap_queue_term(fd_queue_t * queue) +{ + return queue_interrupt(queue, 1, 1); +} --- server/mpm/event/fdqueue.h (revision 1738637) +++ server/mpm/event/fdqueue.h (working copy) @@ -52,6 +52,7 @@ int *had_to_block); apr_status_t ap_queue_info_term(fd_queue_info_t * queue_info); apr_uint32_t ap_queue_info_get_idlers(fd_queue_info_t * queue_info); +void ap_free_idle_pools(fd_queue_info_t *queue_info); struct fd_queue_elem_t { @@ -98,6 +99,7 @@ event_conn_state_t ** ecs, apr_pool_t ** p, timer_event_t ** te); apr_status_t ap_queue_interrupt_all(fd_queue_t * queue); +apr_status_t ap_queue_interrupt_one(fd_queue_t * queue); apr_status_t ap_queue_term(fd_queue_t * queue); #endif /* FDQUEUE_H */ --- server/mpm/worker/fdqueue.c (revision 1738637) +++ server/mpm/worker/fdqueue.c (working copy) @@ -382,7 +382,7 @@ return rv; } -apr_status_t ap_queue_interrupt_all(fd_queue_t *queue) +static apr_status_t queue_interrupt_all(fd_queue_t *queue, int term) { apr_status_t rv; @@ -389,24 +389,23 @@ if ((rv = apr_thread_mutex_lock(queue->one_big_mutex)) != APR_SUCCESS) { return rv; } + /* we must hold one_big_mutex when setting this... otherwise, + * we could end up setting it and waking everybody up just after a + * would-be popper checks it but right before they block + */ + if (term) { + queue->terminated = 1; + } apr_thread_cond_broadcast(queue->not_empty); return apr_thread_mutex_unlock(queue->one_big_mutex); } +apr_status_t ap_queue_interrupt_all(fd_queue_t *queue) +{ + return queue_interrupt_all(queue, 0); +} + apr_status_t ap_queue_term(fd_queue_t *queue) { - apr_status_t rv; - - if ((rv = apr_thread_mutex_lock(queue->one_big_mutex)) != APR_SUCCESS) { - return rv; - } - /* we must hold one_big_mutex when setting this... otherwise, - * we could end up setting it and waking everybody up just after a - * would-be popper checks it but right before they block - */ - queue->terminated = 1; - if ((rv = apr_thread_mutex_unlock(queue->one_big_mutex)) != APR_SUCCESS) { - return rv; - } - return ap_queue_interrupt_all(queue); + return queue_interrupt_all(queue, 1); } --- server/scoreboard.c (revision 1738637) +++ server/scoreboard.c (working copy) @@ -399,7 +399,7 @@ int i; int max_daemons_limit = 0; - ap_mpm_query(AP_MPMQ_MAX_DAEMONS, &max_daemons_limit); + ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons_limit); for (i = 0; i < max_daemons_limit; ++i) { if (ap_scoreboard_image->parent[i].pid == pid->pid) {