Attachment 34202 Details for Bug 53555 – Use all scoreboard entries up to ServerLimit, for 2.4

[patch] Use all scoreboard entries up to ServerLimit, for 2.4

use-all-scoreboard-2.4.diff (text/plain), 43.09 KB, created by Stefan Fritsch on 2016-09-05 21:50:31 UTC

(hide)

Description:

Filename:

MIME Type:

Creator: Stefan Fritsch

Created: 2016-09-05 21:50:31 UTC

Size: 43.09 KB

patch

obsolete

>diff --git a/modules/generators/mod_status.c b/modules/generators/mod_status.c
>index 4ff9df1..b35172e 100644
>--- a/modules/generators/mod_status.c
>+++ b/modules/generators/mod_status.c
>@@ -529,7 +529,7 @@ static int status_handler(request_rec *r)
> 
> if (is_async) {
> int write_completion = 0, lingering_close = 0, keep_alive = 0,
>- connections = 0;
>+ connections = 0, stopping = 0, procs = 0;
> /*
> * These differ from 'busy' and 'ready' in how gracefully finishing
> * threads are counted. XXX: How to make this clear in the html?
>@@ -537,13 +537,15 @@ static int status_handler(request_rec *r)
> int busy_workers = 0, idle_workers = 0;
> if (!short_report)
> ap_rputs("\n\n<table rules=\"all\" cellpadding=\"1%\">\n"
>- "<tr><th rowspan=\"2\">PID</th>"
>+ "<tr><th rowspan=\"2\">Slot</th>"
>+ "<th rowspan=\"2\">PID</th>"
>+ "<th rowspan=\"2\">Stopping</th>"
> "<th colspan=\"2\">Connections</th>\n"
> "<th colspan=\"2\">Threads</th>"
>- "<th colspan=\"4\">Async connections</th></tr>\n"
>+ "<th colspan=\"3\">Async connections</th></tr>\n"
> "<tr><th>total</th><th>accepting</th>"
>- "<th>busy</th><th>idle</th><th>writing</th>"
>- "<th>keep-alive</th><th>closing</th></tr>\n", r);
>+ "<th>busy</th><th>idle</th>"
>+ "<th>writing</th><th>keep-alive</th><th>closing</th></tr>\n", r);
> for (i = 0; i < server_limit; ++i) {
> ps_record = ap_get_scoreboard_process(i);
> if (ps_record->pid) {
>@@ -553,26 +555,45 @@ static int status_handler(request_rec *r)
> lingering_close += ps_record->lingering_close;
> busy_workers += thread_busy_buffer[i];
> idle_workers += thread_idle_buffer[i];
>- if (!short_report)
>- ap_rprintf(r, "<tr><td>%" APR_PID_T_FMT "</td><td>%u</td>"
>- "<td>%s</td><td>%u</td><td>%u</td>"
>+ if (!short_report) {
>+ const char *dying = "no";
>+ const char *old = "";
>+ if (ps_record->quiescing) {
>+ dying = "yes";
>+ stopping++;
>+ }
>+ if (ps_record->generation != mpm_generation)
>+ old = " (old gen)";
>+ procs++;
>+ ap_rprintf(r, "<tr><td>%u</td><td>%" APR_PID_T_FMT "</td>"
>+ "<td>%s%s</td>"
>+ "<td>%u</td><td>%s</td>"
>+ "<td>%u</td><td>%u</td>"
> "<td>%u</td><td>%u</td><td>%u</td>"
> "</tr>\n",
>- ps_record->pid, ps_record->connections,
>+ i, ps_record->pid,
>+ dying, old,
>+ ps_record->connections,
> ps_record->not_accepting ? "no" : "yes",
>- thread_busy_buffer[i], thread_idle_buffer[i],
>+ thread_busy_buffer[i],
>+ thread_idle_buffer[i],
> ps_record->write_completion,
> ps_record->keep_alive,
> ps_record->lingering_close);
>+ }
> }
> }
> if (!short_report) {
>- ap_rprintf(r, "<tr><td>Sum</td><td>%d</td><td>&nbsp;</td><td>%d</td>"
>- "<td>%d</td><td>%d</td><td>%d</td><td>%d</td>"
>+ ap_rprintf(r, "<tr><td>Sum</td>"
>+ "<td>%d</td><td>%d</td>"
>+ "<td>%d</td><td>&nbsp;</td>"
>+ "<td>%d</td><td>%d</td>"
>+ "<td>%d</td><td>%d</td><td>%d</td>"
> "</tr>\n</table>\n",
>- connections, busy_workers, idle_workers,
>+ procs, stopping,
>+ connections,
>+ busy_workers, idle_workers,
> write_completion, keep_alive, lingering_close);
>-
> }
> else {
> ap_rprintf(r, "ConnsTotal: %d\n"
>@@ -621,7 +642,7 @@ static int status_handler(request_rec *r)
> "\"<code>G</code>\" Gracefully finishing, \n"
> "\"<code>I</code>\" Idle cleanup of worker, \n"
> "\"<code>.</code>\" Open slot with no current process \n"
>- "\n", r);
>+ "\n", r);
> if (!ap_extended_status) {
> int j;
> int k = 0;
>diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
>index e53ea0f..d777ecd 100644
>--- a/server/mpm/event/event.c
>+++ b/server/mpm/event/event.c
>@@ -160,15 +160,18 @@
> #endif
> #define WORKER_FACTOR_SCALE 16 /* scale factor to allow fractional values */
> static unsigned int worker_factor = DEFAULT_WORKER_FACTOR * WORKER_FACTOR_SCALE;
>-
>-static int threads_per_child = 0; /* Worker threads per child */
>-static int ap_daemons_to_start = 0;
>-static int min_spare_threads = 0;
>-static int max_spare_threads = 0;
>-static int ap_daemons_limit = 0;
>-static int max_workers = 0;
>-static int server_limit = 0;
>-static int thread_limit = 0;
>+ /* AsyncRequestWorkerFactor * 16 */
>+
>+static int threads_per_child = 0; /* ThreadsPerChild */
>+static int ap_daemons_to_start = 0; /* StartServers */
>+static int min_spare_threads = 0; /* MinSpareThreads */
>+static int max_spare_threads = 0; /* MaxSpareThreads */
>+static int active_daemons_limit = 0; /* MaxRequestWorkers / ThreadsPerChild */
>+static int active_daemons = 0; /* workers that still active, i.e. are
>+ not shutting down gracefully */
>+static int max_workers = 0; /* MaxRequestWorkers */
>+static int server_limit = 0; /* ServerLimit */
>+static int thread_limit = 0; /* ThreadLimit */
> static int had_healthy_child = 0;
> static int dying = 0;
> static int workers_may_exit = 0;
>@@ -181,6 +184,8 @@ static apr_uint32_t connection_count = 0; /* Number of open connections */
> static apr_uint32_t lingering_count = 0; /* Number of connections in lingering close */
> static apr_uint32_t suspended_count = 0; /* Number of suspended connections */
> static apr_uint32_t clogged_count = 0; /* Number of threads processing ssl conns */
>+static apr_uint32_t threads_shutdown = 0; /* Number of threads that have shutdown
>+ early during graceful termination */
> static int resource_shortage = 0;
> static fd_queue_t *worker_queue;
> static fd_queue_info_t *worker_queue_info;
>@@ -288,9 +293,8 @@ static apr_pollset_t *event_pollset;
> /* The structure used to pass unique initialization info to each thread */
> typedef struct
> {
>- int pid;
>- int tid;
>- int sd;
>+ int pslot; /* process slot */
>+ int tslot; /* worker slot of the thread */
> } proc_info;
> 
> /* Structure used to pass information to the thread responsible for
>@@ -335,6 +339,14 @@ typedef struct event_retained_data {
> * scoreboard.
> */
> int max_daemons_limit;
>+
>+ /*
>+ * All running workers, active and shutting down, including those that
>+ * may be left from before a graceful restart.
>+ * Not kept up-to-date when shutdown is pending.
>+ */
>+ int total_daemons;
>+
> /*
> * idle_spawn_rate is the number of children that will be spawned on the
> * next maintenance cycle if there aren't enough idle servers. It is
>@@ -548,7 +560,7 @@ static int event_query(int query_code, int *result, apr_status_t *rv)
> *result = ap_max_requests_per_child;
> break;
> case AP_MPMQ_MAX_DAEMONS:
>- *result = ap_daemons_limit;
>+ *result = active_daemons_limit;
> break;
> case AP_MPMQ_MPM_STATE:
> *result = mpm_state;
>@@ -585,27 +597,6 @@ static void event_note_child_started(int slot, pid_t pid)
> retained->my_generation, slot, MPM_CHILD_STARTED);
> }
> 
>-static void event_note_child_lost_slot(int slot, pid_t newpid)
>-{
>- ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00458)
>- "pid %" APR_PID_T_FMT " taking over scoreboard slot from "
>- "%" APR_PID_T_FMT "%s",
>- newpid,
>- ap_scoreboard_image->parent[slot].pid,
>- ap_scoreboard_image->parent[slot].quiescing ?
>- " (quiescing)" : "");
>- ap_run_child_status(ap_server_conf,
>- ap_scoreboard_image->parent[slot].pid,
>- ap_scoreboard_image->parent[slot].generation,
>- slot, MPM_CHILD_LOST_SLOT);
>- /* Don't forget about this exiting child process, or we
>- * won't be able to kill it if it doesn't exit by the
>- * time the server is shut down.
>- */
>- ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid,
>- ap_scoreboard_image->parent[slot].generation);
>-}
>-
> static const char *event_get_name(void)
> {
> return "event";
>@@ -911,6 +902,8 @@ static int start_lingering_close_nonblocking(event_conn_state_t *cs)
> || apr_socket_shutdown(csd, APR_SHUTDOWN_WRITE) != APR_SUCCESS) {
> apr_socket_close(csd);
> ap_push_pool(worker_queue_info, cs->p);
>+ if (dying)
>+ ap_queue_interrupt_one(worker_queue);
> return 0;
> }
> return start_lingering_close_common(cs, 0);
>@@ -934,6 +927,8 @@ static int stop_lingering_close(event_conn_state_t *cs)
> AP_DEBUG_ASSERT(0);
> }
> ap_push_pool(worker_queue_info, cs->p);
>+ if (dying)
>+ ap_queue_interrupt_one(worker_queue);
> return 0;
> }
> 
>@@ -1219,6 +1214,9 @@ static void close_listeners(int process_slot, int *closed)
> }
> /* wake up the main thread */
> kill(ap_my_pid, SIGTERM);
>+
>+ ap_free_idle_pools(worker_queue_info);
>+ ap_queue_interrupt_all(worker_queue);
> }
> }
> 
>@@ -1439,6 +1437,8 @@ static void process_lingering_close(event_conn_state_t *cs, const apr_pollfd_t *
> TO_QUEUE_ELEM_INIT(cs);
> 
> ap_push_pool(worker_queue_info, cs->p);
>+ if (dying)
>+ ap_queue_interrupt_one(worker_queue);
> }
> 
> /* call 'func' for all elements of 'q' with timeout less than 'timeout_time'.
>@@ -1518,7 +1518,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
> timer_event_t *te;
> apr_status_t rc;
> proc_info *ti = dummy;
>- int process_slot = ti->pid;
>+ int process_slot = ti->pslot;
> apr_pool_t *tpool = apr_thread_pool_get(thd);
> void *csd = NULL;
> apr_pool_t *ptrans; /* Pool for per-transaction stuff */
>@@ -1584,6 +1584,12 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
> *keepalive_q->total,
> apr_atomic_read32(&lingering_count),
> apr_atomic_read32(&suspended_count));
>+ if (dying) {
>+ ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
>+ "%u/%u workers shutdown",
>+ apr_atomic_read32(&threads_shutdown),
>+ threads_per_child);
>+ }
> apr_thread_mutex_unlock(timeout_mutex);
> }
> }
>@@ -1818,11 +1824,12 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
> /* If all workers are busy, we kill older keep-alive connections so that they
> * may connect to another process.
> */
>- if (workers_were_busy && *keepalive_q->total) {
>- ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
>- "All workers are busy, will close %d keep-alive "
>- "connections",
>- *keepalive_q->total);
>+ if ((workers_were_busy || dying) && *keepalive_q->total) {
>+ if (!dying)
>+ ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
>+ "All workers are busy, will close %d keep-alive "
>+ "connections",
>+ *keepalive_q->total);
> process_timeout_queue(keepalive_q, 0,
> start_lingering_close_nonblocking);
> }
>@@ -1869,6 +1876,34 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
> return NULL;
> }
> 
>+/*
>+ * During graceful shutdown, if there are more running worker threads than
>+ * open connections, exit one worker thread.
>+ *
>+ * return 1 if thread should exit, 0 if it should continue running.
>+ */
>+static int worker_thread_should_exit_early(void)
>+{
>+ for (;;) {
>+ apr_uint32_t conns = apr_atomic_read32(&connection_count);
>+ apr_uint32_t dead = apr_atomic_read32(&threads_shutdown);
>+ apr_uint32_t newdead;
>+
>+ AP_DEBUG_ASSERT(dead <= threads_per_child);
>+ if (conns >= threads_per_child - dead)
>+ return 0;
>+
>+ newdead = dead + 1;
>+ if (apr_atomic_cas32(&threads_shutdown, newdead, dead) == dead) {
>+ /*
>+ * No other thread has exited in the mean time, safe to exit
>+ * this one.
>+ */
>+ return 1;
>+ }
>+ }
>+}
>+
> /* XXX For ungraceful termination/restart, we definitely don't want to
> * wait for active connections to finish but we may want to wait
> * for idle workers to get out of the queue code and release mutexes,
>@@ -1879,8 +1914,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
> static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
> {
> proc_info *ti = dummy;
>- int process_slot = ti->pid;
>- int thread_slot = ti->tid;
>+ int process_slot = ti->pslot;
>+ int thread_slot = ti->tslot;
> apr_socket_t *csd = NULL;
> event_conn_state_t *cs;
> apr_pool_t *ptrans; /* Pool for per-transaction stuff */
>@@ -1916,6 +1951,9 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
> if (workers_may_exit) {
> break;
> }
>+ if (dying && worker_thread_should_exit_early()) {
>+ break;
>+ }
> 
> te = NULL;
> rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te);
>@@ -1993,9 +2031,8 @@ static void create_listener_thread(thread_starter * ts)
> apr_status_t rv;
> 
> my_info = (proc_info *) ap_malloc(sizeof(proc_info));
>- my_info->pid = my_child_num;
>- my_info->tid = -1; /* listener thread doesn't have a thread slot */
>- my_info->sd = 0;
>+ my_info->pslot = my_child_num;
>+ my_info->tslot = -1; /* listener thread doesn't have a thread slot */
> rv = apr_thread_create(&ts->listener, thread_attr, listener_thread,
> my_info, pchild);
> if (rv != APR_SUCCESS) {
>@@ -2103,14 +2140,13 @@ static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
> int status =
> ap_scoreboard_image->servers[my_child_num][i].status;
> 
>- if (status != SERVER_GRACEFUL && status != SERVER_DEAD) {
>+ if (status != SERVER_DEAD) {
> continue;
> }
> 
> my_info = (proc_info *) ap_malloc(sizeof(proc_info));
>- my_info->pid = my_child_num;
>- my_info->tid = i;
>- my_info->sd = 0;
>+ my_info->pslot = my_child_num;
>+ my_info->tslot = i;
> 
> /* We are creating threads right now */
> ap_update_child_status_from_indexes(my_child_num, i,
>@@ -2411,6 +2447,15 @@ static int make_child(server_rec * s, int slot, int bucket)
> retained->max_daemons_limit = slot + 1;
> }
> 
>+ if (ap_scoreboard_image->parent[slot].pid != 0) {
>+ /* XXX replace with assert or remove ? */
>+ ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(03455)
>+ "BUG: Scoreboard slot %d should be empty but is "
>+ "in use by pid %" APR_PID_T_FMT,
>+ slot, ap_scoreboard_image->parent[slot].pid);
>+ return -1;
>+ }
>+
> if (one_process) {
> my_bucket = &all_buckets[0];
> 
>@@ -2464,17 +2509,12 @@ static int make_child(server_rec * s, int slot, int bucket)
> return -1;
> }
> 
>- if (ap_scoreboard_image->parent[slot].pid != 0) {
>- /* This new child process is squatting on the scoreboard
>- * entry owned by an exiting child process, which cannot
>- * exit until all active requests complete.
>- */
>- event_note_child_lost_slot(slot, pid);
>- }
> ap_scoreboard_image->parent[slot].quiescing = 0;
> ap_scoreboard_image->parent[slot].not_accepting = 0;
> ap_scoreboard_image->parent[slot].bucket = bucket;
> event_note_child_started(slot, pid);
>+ active_daemons++;
>+ retained->total_daemons++;
> return 0;
> }
> 
>@@ -2483,7 +2523,7 @@ static void startup_children(int number_to_start)
> {
> int i;
> 
>- for (i = 0; number_to_start && i < ap_daemons_limit; ++i) {
>+ for (i = 0; number_to_start && i < server_limit; ++i) {
> if (ap_scoreboard_image->parent[i].pid != 0) {
> continue;
> }
>@@ -2497,34 +2537,22 @@ static void startup_children(int number_to_start)
> static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
> {
> int i, j;
>- int idle_thread_count;
>+ int idle_thread_count = 0;
> worker_score *ws;
> process_score *ps;
>- int free_length;
>- int totally_free_length = 0;
>+ int free_length = 0;
> int free_slots[MAX_SPAWN_RATE];
>- int last_non_dead;
>- int total_non_dead;
>+ int last_non_dead = -1;
> int active_thread_count = 0;
> 
>- /* initialize the free_list */
>- free_length = 0;
>-
>- idle_thread_count = 0;
>- last_non_dead = -1;
>- total_non_dead = 0;
>-
>- for (i = 0; i < ap_daemons_limit; ++i) {
>+ for (i = 0; i < server_limit; ++i) {
> /* Initialization to satisfy the compiler. It doesn't know
> * that threads_per_child is always > 0 */
> int status = SERVER_DEAD;
>- int any_dying_threads = 0;
>- int any_dead_threads = 0;
>- int all_dead_threads = 1;
> int child_threads_active = 0;
> 
> if (i >= retained->max_daemons_limit &&
>- totally_free_length == retained->idle_spawn_rate[child_bucket]) {
>+ free_length == retained->idle_spawn_rate[child_bucket]) {
> /* short cut if all active processes have been examined and
> * enough empty scoreboard slots have been found
> */
>@@ -2532,25 +2560,17 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
> break;
> }
> ps = &ap_scoreboard_image->parent[i];
>- for (j = 0; j < threads_per_child; j++) {
>- ws = &ap_scoreboard_image->servers[i][j];
>- status = ws->status;
>-
>- /* XXX any_dying_threads is probably no longer needed GLA */
>- any_dying_threads = any_dying_threads ||
>- (status == SERVER_GRACEFUL);
>- any_dead_threads = any_dead_threads || (status == SERVER_DEAD);
>- all_dead_threads = all_dead_threads &&
>- (status == SERVER_DEAD || status == SERVER_GRACEFUL);
>-
>- /* We consider a starting server as idle because we started it
>- * at least a cycle ago, and if it still hasn't finished starting
>- * then we're just going to swamp things worse by forking more.
>- * So we hopefully won't need to fork more if we count it.
>- * This depends on the ordering of SERVER_READY and SERVER_STARTING.
>- */
>- if (ps->pid != 0) { /* XXX just set all_dead_threads in outer
>- for loop if no pid? not much else matters */
>+ if (ps->pid != 0) {
>+ for (j = 0; j < threads_per_child; j++) {
>+ ws = &ap_scoreboard_image->servers[i][j];
>+ status = ws->status;
>+
>+ /* We consider a starting server as idle because we started it
>+ * at least a cycle ago, and if it still hasn't finished starting
>+ * then we're just going to swamp things worse by forking more.
>+ * So we hopefully won't need to fork more if we count it.
>+ * This depends on the ordering of SERVER_READY and SERVER_STARTING.
>+ */
> if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting
> && ps->generation == retained->my_generation
> && ps->bucket == child_bucket)
>@@ -2561,39 +2581,13 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
> ++child_threads_active;
> }
> }
>+ last_non_dead = i;
> }
> active_thread_count += child_threads_active;
>- if (any_dead_threads
>- && totally_free_length < retained->idle_spawn_rate[child_bucket]
>- && free_length < MAX_SPAWN_RATE / num_buckets
>- && (!ps->pid /* no process in the slot */
>- || ps->quiescing)) { /* or at least one is going away */
>- if (all_dead_threads) {
>- /* great! we prefer these, because the new process can
>- * start more threads sooner. So prioritize this slot
>- * by putting it ahead of any slots with active threads.
>- *
>- * first, make room by moving a slot that's potentially still
>- * in use to the end of the array
>- */
>- free_slots[free_length] = free_slots[totally_free_length];
>- free_slots[totally_free_length++] = i;
>- }
>- else {
>- /* slot is still in use - back of the bus
>- */
>- free_slots[free_length] = i;
>- }
>- ++free_length;
>- }
>- else if (child_threads_active == threads_per_child) {
>+ if (!ps->pid && free_length < retained->idle_spawn_rate[child_bucket])
>+ free_slots[free_length++] = i;
>+ else if (child_threads_active == threads_per_child)
> had_healthy_child = 1;
>- }
>- /* XXX if (!ps->quiescing) is probably more reliable GLA */
>- if (!any_dying_threads) {
>- last_non_dead = i;
>- ++total_non_dead;
>- }
> }
> 
> if (retained->sick_child_detected) {
>@@ -2621,32 +2615,56 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
> 
> retained->max_daemons_limit = last_non_dead + 1;
> 
>- if (idle_thread_count > max_spare_threads / num_buckets) {
>- /* Kill off one child */
>- ap_mpm_podx_signal(all_buckets[child_bucket].pod,
>- AP_MPM_PODX_GRACEFUL);
>- retained->idle_spawn_rate[child_bucket] = 1;
>+ if (idle_thread_count > max_spare_threads / num_buckets)
>+ {
>+ /*
>+ * Child processes that we ask to shut down won't die immediately
>+ * but may stay around for a long time when they finish their
>+ * requests. If the server load changes many times, many such
>+ * gracefully finishing processes may accumulate, filling up the
>+ * scoreboard. To avoid running out of scoreboard entries, we
>+ * don't shut down more processes when the total number of processes
>+ * is high.
>+ *
>+ * XXX It would be nice if we could
>+ * XXX - kill processes without keepalive connections first
>+ * XXX - tell children to stop accepting new connections, and
>+ * XXX depending on server load, later be able to resurrect them
>+ * or kill them
>+ */
>+ if (retained->total_daemons <= active_daemons_limit &&
>+ retained->total_daemons < server_limit) {
>+ /* Kill off one child */
>+ ap_mpm_podx_signal(all_buckets[child_bucket].pod,
>+ AP_MPM_PODX_GRACEFUL);
>+ retained->idle_spawn_rate[child_bucket] = 1;
>+ active_daemons--;
>+ } else {
>+ ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
>+ "Not shutting down child: total daemons %d / "
>+ "active limit %d / ServerLimit %d",
>+ retained->total_daemons, active_daemons_limit,
>+ server_limit);
>+ }
> }
> else if (idle_thread_count < min_spare_threads / num_buckets) {
>- /* terminate the free list */
>- if (free_length == 0) { /* scoreboard is full, can't fork */
>-
>- if (active_thread_count >= ap_daemons_limit * threads_per_child) {
>- if (!retained->maxclients_reported) {
>- /* only report this condition once */
>- ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484)
>- "server reached MaxRequestWorkers setting, "
>- "consider raising the MaxRequestWorkers "
>- "setting");
>- retained->maxclients_reported = 1;
>- }
>- }
>- else {
>- ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00485)
>- "scoreboard is full, not at MaxRequestWorkers");
>+ if (active_thread_count >= max_workers) {
>+ if (!retained->maxclients_reported) {
>+ /* only report this condition once */
>+ ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484)
>+ "server reached MaxRequestWorkers setting, "
>+ "consider raising the MaxRequestWorkers "
>+ "setting");
>+ retained->maxclients_reported = 1;
> }
> retained->idle_spawn_rate[child_bucket] = 1;
> }
>+ else if (free_length == 0) { /* scoreboard is full, can't fork */
>+ ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO()
>+ "scoreboard is full, not at MaxRequestWorkers."
>+ "Increase ServerLimit.");
>+ retained->idle_spawn_rate[child_bucket] = 1;
>+ }
> else {
> if (free_length > retained->idle_spawn_rate[child_bucket]) {
> free_length = retained->idle_spawn_rate[child_bucket];
>@@ -2657,10 +2675,17 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
> "to increase StartServers, ThreadsPerChild "
> "or Min/MaxSpareThreads), "
> "spawning %d children, there are around %d idle "
>- "threads, and %d total children", free_length,
>- idle_thread_count, total_non_dead);
>+ "threads, %d active children, and %d children "
>+ "that are shutting down", free_length,
>+ idle_thread_count, active_daemons,
>+ retained->total_daemons);
> }
> for (i = 0; i < free_length; ++i) {
>+ ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
>+ "Spawning new child: slot %d active / "
>+ "total daemons: %d/%d",
>+ free_slots[i], active_daemons,
>+ retained->total_daemons);
> make_child(ap_server_conf, free_slots[i], child_bucket);
> }
> /* the next time around we want to spawn twice as many if this
>@@ -2682,7 +2707,6 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
> 
> static void server_main_loop(int remaining_children_to_start, int num_buckets)
> {
>- ap_generation_t old_gen;
> int child_slot;
> apr_exit_why_e exitwhy;
> int status, processed_status;
>@@ -2732,13 +2756,15 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets)
> 
> event_note_child_killed(child_slot, 0, 0);
> ps = &ap_scoreboard_image->parent[child_slot];
>+ if (!ps->quiescing)
>+ active_daemons--;
> ps->quiescing = 0;
>+ retained->total_daemons--;
> if (processed_status == APEXIT_CHILDSICK) {
> /* resource shortage, minimize the fork rate */
> retained->idle_spawn_rate[ps->bucket] = 1;
> }
>- else if (remaining_children_to_start
>- && child_slot < ap_daemons_limit) {
>+ else if (remaining_children_to_start) {
> /* we're still doing a 1-for-1 replacement of dead
> * children with new children
> */
>@@ -2746,24 +2772,12 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets)
> --remaining_children_to_start;
> }
> }
>- else if (ap_unregister_extra_mpm_process(pid.pid, &old_gen) == 1) {
>-
>- event_note_child_killed(-1, /* already out of the scoreboard */
>- pid.pid, old_gen);
>- if (processed_status == APEXIT_CHILDSICK
>- && old_gen == retained->my_generation) {
>- /* resource shortage, minimize the fork rate */
>- for (i = 0; i < num_buckets; i++) {
>- retained->idle_spawn_rate[i] = 1;
>- }
>- }
> #if APR_HAS_OTHER_CHILD
>- }
> else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
> status) == 0) {
> /* handled */
>-#endif
> }
>+#endif
> else if (retained->is_graceful) {
> /* Great, we've probably just lost a slot in the
> * scoreboard. Somehow we don't know about this child.
>@@ -2825,8 +2839,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
> /* Don't thrash since num_buckets depends on the
> * system and the number of online CPU cores...
> */
>- if (ap_daemons_limit < num_buckets)
>- ap_daemons_limit = num_buckets;
>+ if (active_daemons_limit < num_buckets)
>+ active_daemons_limit = num_buckets;
> if (ap_daemons_to_start < num_buckets)
> ap_daemons_to_start = num_buckets;
> /* We want to create as much children at a time as the number of buckets,
>@@ -2850,8 +2864,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
> * supposed to start up without the 1 second penalty between each fork.
> */
> remaining_children_to_start = ap_daemons_to_start;
>- if (remaining_children_to_start > ap_daemons_limit) {
>- remaining_children_to_start = ap_daemons_limit;
>+ if (remaining_children_to_start > active_daemons_limit) {
>+ remaining_children_to_start = active_daemons_limit;
> }
> if (!retained->is_graceful) {
> startup_children(remaining_children_to_start);
>@@ -2881,7 +2895,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
> * Kill child processes, tell them to call child_exit, etc...
> */
> for (i = 0; i < num_buckets; i++) {
>- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
>+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
> AP_MPM_PODX_RESTART);
> }
> ap_reclaim_child_processes(1, /* Start with SIGTERM */
>@@ -2905,7 +2919,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
> /* Close our listeners, and then ask our children to do same */
> ap_close_listeners();
> for (i = 0; i < num_buckets; i++) {
>- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
>+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
> AP_MPM_PODX_GRACEFUL);
> }
> ap_relieve_child_processes(event_note_child_killed);
>@@ -2933,7 +2947,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
> ap_relieve_child_processes(event_note_child_killed);
> 
> active_children = 0;
>- for (index = 0; index < ap_daemons_limit; ++index) {
>+ for (index = 0; index < retained->max_daemons_limit; ++index) {
> if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
> active_children = 1;
> /* Having just one child is enough to stay around */
>@@ -2948,7 +2962,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
> * really dead.
> */
> for (i = 0; i < num_buckets; i++) {
>- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
>+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
> AP_MPM_PODX_RESTART);
> }
> ap_reclaim_child_processes(1, event_note_child_killed);
>@@ -2977,7 +2991,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
> " received. Doing graceful restart");
> /* wake up the children...time to die. But we'll have more soon */
> for (i = 0; i < num_buckets; i++) {
>- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
>+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
> AP_MPM_PODX_GRACEFUL);
> }
> 
>@@ -2992,7 +3006,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
> * pthreads are stealing signals from us left and right.
> */
> for (i = 0; i < num_buckets; i++) {
>- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
>+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
> AP_MPM_PODX_RESTART);
> }
> 
>@@ -3002,6 +3016,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
> "SIGHUP received. Attempting to restart");
> }
> 
>+ active_daemons = 0;
>+
> return OK;
> }
> 
>@@ -3215,9 +3231,9 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
> max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
> server_limit = DEFAULT_SERVER_LIMIT;
> thread_limit = DEFAULT_THREAD_LIMIT;
>- ap_daemons_limit = server_limit;
>+ active_daemons_limit = server_limit;
> threads_per_child = DEFAULT_THREADS_PER_CHILD;
>- max_workers = ap_daemons_limit * threads_per_child;
>+ max_workers = active_daemons_limit * threads_per_child;
> had_healthy_child = 0;
> ap_extended_status = 0;
> 
>@@ -3426,10 +3442,10 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
> max_workers = threads_per_child;
> }
> 
>- ap_daemons_limit = max_workers / threads_per_child;
>+ active_daemons_limit = max_workers / threads_per_child;
> 
> if (max_workers % threads_per_child) {
>- int tmp_max_workers = ap_daemons_limit * threads_per_child;
>+ int tmp_max_workers = active_daemons_limit * threads_per_child;
> 
> if (startup) {
> ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00513)
>@@ -3437,7 +3453,7 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
> "multiple of ThreadsPerChild of %d, decreasing to nearest "
> "multiple %d, for a maximum of %d servers.",
> max_workers, threads_per_child, tmp_max_workers,
>- ap_daemons_limit);
>+ active_daemons_limit);
> } else {
> ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00514)
> "MaxRequestWorkers of %d is not an integer multiple "
>@@ -3448,25 +3464,25 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
> max_workers = tmp_max_workers;
> }
> 
>- if (ap_daemons_limit > server_limit) {
>+ if (active_daemons_limit > server_limit) {
> if (startup) {
> ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00515)
> "WARNING: MaxRequestWorkers of %d would require %d servers "
> "and would exceed ServerLimit of %d, decreasing to %d. "
> "To increase, please see the ServerLimit directive.",
>- max_workers, ap_daemons_limit, server_limit,
>+ max_workers, active_daemons_limit, server_limit,
> server_limit * threads_per_child);
> } else {
> ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00516)
> "MaxRequestWorkers of %d would require %d servers and "
> "exceed ServerLimit of %d, decreasing to %d",
>- max_workers, ap_daemons_limit, server_limit,
>+ max_workers, active_daemons_limit, server_limit,
> server_limit * threads_per_child);
> }
>- ap_daemons_limit = server_limit;
>+ active_daemons_limit = server_limit;
> }
> 
>- /* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */
>+ /* ap_daemons_to_start > active_daemons_limit checked in ap_mpm_run() */
> if (ap_daemons_to_start < 1) {
> if (startup) {
> ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00517)
>diff --git a/server/mpm/event/fdqueue.c b/server/mpm/event/fdqueue.c
>index 343146f..64b318d 100644
>--- a/server/mpm/event/fdqueue.c
>+++ b/server/mpm/event/fdqueue.c
>@@ -280,6 +280,19 @@ void ap_pop_pool(apr_pool_t ** recycled_pool, fd_queue_info_t * queue_info)
> }
> }
> 
>+void ap_free_idle_pools(fd_queue_info_t *queue_info)
>+{
>+ apr_pool_t *p;
>+
>+ queue_info->max_recycled_pools = 0;
>+ do {
>+ ap_pop_pool(&p, queue_info);
>+ if (p != NULL)
>+ apr_pool_destroy(p);
>+ } while (p != NULL);
>+}
>+
>+
> apr_status_t ap_queue_info_term(fd_queue_info_t * queue_info)
> {
> apr_status_t rv;
>@@ -477,17 +490,30 @@ apr_status_t ap_queue_pop_something(fd_queue_t * queue, apr_socket_t ** sd,
> return rv;
> }
> 
>-apr_status_t ap_queue_interrupt_all(fd_queue_t * queue)
>+static apr_status_t queue_interrupt(fd_queue_t * queue, int all)
> {
> apr_status_t rv;
> 
> if ((rv = apr_thread_mutex_lock(queue->one_big_mutex)) != APR_SUCCESS) {
> return rv;
> }
>- apr_thread_cond_broadcast(queue->not_empty);
>+ if (all)
>+ apr_thread_cond_broadcast(queue->not_empty);
>+ else
>+ apr_thread_cond_signal(queue->not_empty);
> return apr_thread_mutex_unlock(queue->one_big_mutex);
> }
> 
>+apr_status_t ap_queue_interrupt_all(fd_queue_t * queue)
>+{
>+ return queue_interrupt(queue, 1);
>+}
>+
>+apr_status_t ap_queue_interrupt_one(fd_queue_t * queue)
>+{
>+ return queue_interrupt(queue, 0);
>+}
>+
> apr_status_t ap_queue_term(fd_queue_t * queue)
> {
> apr_status_t rv;
>diff --git a/server/mpm/event/fdqueue.h b/server/mpm/event/fdqueue.h
>index 955816b..37be684 100644
>--- a/server/mpm/event/fdqueue.h
>+++ b/server/mpm/event/fdqueue.h
>@@ -52,6 +52,7 @@ apr_status_t ap_queue_info_wait_for_idler(fd_queue_info_t * queue_info,
> int *had_to_block);
> apr_status_t ap_queue_info_term(fd_queue_info_t * queue_info);
> apr_uint32_t ap_queue_info_get_idlers(fd_queue_info_t * queue_info);
>+void ap_free_idle_pools(fd_queue_info_t *queue_info);
> 
> struct fd_queue_elem_t
> {
>@@ -98,6 +99,7 @@ apr_status_t ap_queue_pop_something(fd_queue_t * queue, apr_socket_t ** sd,
> event_conn_state_t ** ecs, apr_pool_t ** p,
> timer_event_t ** te);
> apr_status_t ap_queue_interrupt_all(fd_queue_t * queue);
>+apr_status_t ap_queue_interrupt_one(fd_queue_t * queue);
> apr_status_t ap_queue_term(fd_queue_t * queue);
> 
> #endif /* FDQUEUE_H */
>diff --git a/server/mpm_unix.c b/server/mpm_unix.c
>index 1a7f935..926eff4 100644
>--- a/server/mpm_unix.c
>+++ b/server/mpm_unix.c
>@@ -63,7 +63,13 @@
> #undef APLOG_MODULE_INDEX
> #define APLOG_MODULE_INDEX AP_CORE_MODULE_INDEX
> 
>-typedef enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action_t;
>+typedef enum {
>+ DO_NOTHING,
>+ SEND_SIGTERM,
>+ SEND_SIGTERM_NOLOG,
>+ SEND_SIGKILL,
>+ GIVEUP
>+} action_t;
> 
> typedef struct extra_process_t {
> struct extra_process_t *next;
>@@ -142,6 +148,8 @@ static int reclaim_one_pid(pid_t pid, action_t action)
> " still did not exit, "
> "sending a SIGTERM",
> pid);
>+ /* FALLTHROUGH */
>+ case SEND_SIGTERM_NOLOG:
> kill(pid, SIGTERM);
> break;
> 
>@@ -193,6 +201,7 @@ AP_DECLARE(void) ap_reclaim_child_processes(int terminate,
> * children but take no action against
> * stragglers
> */
>+ {SEND_SIGTERM_NOLOG, 0}, /* skipped if terminate == 0 */
> {SEND_SIGTERM, apr_time_from_sec(3)},
> {SEND_SIGTERM, apr_time_from_sec(5)},
> {SEND_SIGTERM, apr_time_from_sec(7)},
>@@ -202,19 +211,21 @@ AP_DECLARE(void) ap_reclaim_child_processes(int terminate,
> int cur_action; /* index of action we decided to take this
> * iteration
> */
>- int next_action = 1; /* index of first real action */
>+ int next_action = terminate ? 1 : 2; /* index of first real action */
> 
> ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons);
> 
> do {
>- apr_sleep(waittime);
>- /* don't let waittime get longer than 1 second; otherwise, we don't
>- * react quickly to the last child exiting, and taking action can
>- * be delayed
>- */
>- waittime = waittime * 4;
>- if (waittime > apr_time_from_sec(1)) {
>- waittime = apr_time_from_sec(1);
>+ if (action_table[next_action].action_time > 0) {
>+ apr_sleep(waittime);
>+ /* don't let waittime get longer than 1 second; otherwise, we don't
>+ * react quickly to the last child exiting, and taking action can
>+ * be delayed
>+ */
>+ waittime = waittime * 4;
>+ if (waittime > apr_time_from_sec(1)) {
>+ waittime = apr_time_from_sec(1);
>+ }
> }
> 
> /* see what action to take, if any */
>diff --git a/server/scoreboard.c b/server/scoreboard.c
>index 8e3403f..d83a849 100644
>--- a/server/scoreboard.c
>+++ b/server/scoreboard.c
>@@ -399,7 +399,7 @@ AP_DECLARE(int) ap_find_child_by_pid(apr_proc_t *pid)
> int i;
> int max_daemons_limit = 0;
> 
>- ap_mpm_query(AP_MPMQ_MAX_DAEMONS, &max_daemons_limit);
>+ ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons_limit);
> 
> for (i = 0; i < max_daemons_limit; ++i) {
> if (ap_scoreboard_image->parent[i].pid == pid->pid) {

Actions: View | Diff

Attachments on bug 53555: 33154 | 33158 | 33749 | 33750 | 34201 | 34202