ASF Bugzilla – Attachment 34201 Details for
Bug 53555
Scoreboard full error with event/ssl
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Use all scoreboard entries up to ServerLimit, for trunk
use-all-scoreboard-trunk.diff (text/plain), 22.86 KB, created by
Stefan Fritsch
on 2016-09-05 21:45:56 UTC
(
hide
)
Description:
Use all scoreboard entries up to ServerLimit, for trunk
Filename:
MIME Type:
Creator:
Stefan Fritsch
Created:
2016-09-05 21:45:56 UTC
Size:
22.86 KB
patch
obsolete
>diff --git a/include/mpm_common.h b/include/mpm_common.h >index 30927ed..b366a08 100644 >--- a/include/mpm_common.h >+++ b/include/mpm_common.h >@@ -94,8 +94,7 @@ typedef void ap_reclaim_callback_fn_t(int childnum, pid_t pid, > * Make sure all child processes that have been spawned by the parent process > * have died. This includes process registered as "other_children". > * >- * @param terminate Not Implemented, value is ignored !!! >- * Either 1 or 0. If 1, send the child processes SIGTERM >+ * @param terminate Either 1 or 0. If 1, send the child processes SIGTERM > * each time through the loop. If 0, give the process time to die > * on its own before signalling it. > * @param mpm_callback Callback invoked for each dead child process >diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c >index 54bfa9a..f7b3da7 100644 >--- a/server/mpm/event/event.c >+++ b/server/mpm/event/event.c >@@ -173,7 +173,9 @@ static int threads_per_child = 0; /* ThreadsPerChild */ > static int ap_daemons_to_start = 0; /* StartServers */ > static int min_spare_threads = 0; /* MinSpareThreads */ > static int max_spare_threads = 0; /* MaxSpareThreads */ >-static int ap_daemons_limit = 0; >+static int active_daemons_limit = 0; /* MaxRequestWorkers / ThreadsPerChild */ >+static int active_daemons = 0; /* workers that still active, i.e. are >+ not shutting down gracefully */ > static int max_workers = 0; /* MaxRequestWorkers */ > static int server_limit = 0; /* ServerLimit */ > static int thread_limit = 0; /* ThreadLimit */ >@@ -366,6 +368,14 @@ typedef struct event_retained_data { > * scoreboard. > */ > int max_daemons_limit; >+ >+ /* >+ * All running workers, active and shutting down, including those that >+ * may be left from before a graceful restart. >+ * Not kept up-to-date when shutdown is pending. >+ */ >+ int total_daemons; >+ > /* > * idle_spawn_rate is the number of children that will be spawned on the > * next maintenance cycle if there aren't enough idle servers. It is >@@ -582,7 +592,7 @@ static int event_query(int query_code, int *result, apr_status_t *rv) > *result = ap_max_requests_per_child; > break; > case AP_MPMQ_MAX_DAEMONS: >- *result = ap_daemons_limit; >+ *result = active_daemons_limit; > break; > case AP_MPMQ_MPM_STATE: > *result = mpm_state; >@@ -2766,6 +2776,8 @@ static int make_child(server_rec * s, int slot, int bucket) > ap_scoreboard_image->parent[slot].not_accepting = 0; > ap_scoreboard_image->parent[slot].bucket = bucket; > event_note_child_started(slot, pid); >+ active_daemons++; >+ retained->total_daemons++; > return 0; > } > >@@ -2774,7 +2786,7 @@ static void startup_children(int number_to_start) > { > int i; > >- for (i = 0; number_to_start && i < ap_daemons_limit; ++i) { >+ for (i = 0; number_to_start && i < server_limit; ++i) { > if (ap_scoreboard_image->parent[i].pid != 0) { > continue; > } >@@ -2794,16 +2806,12 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) > int free_length = 0; > int free_slots[MAX_SPAWN_RATE]; > int last_non_dead = -1; >- int total_non_dead = 0; > int active_thread_count = 0; > >- for (i = 0; i < ap_daemons_limit; ++i) { >+ for (i = 0; i < server_limit; ++i) { > /* Initialization to satisfy the compiler. It doesn't know > * that threads_per_child is always > 0 */ > int status = SERVER_DEAD; >- int any_dying_threads = 0; >- int any_dead_threads = 0; >- int all_dead_threads = 1; > int child_threads_active = 0; > > if (i >= retained->max_daemons_limit && >@@ -2815,25 +2823,17 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) > break; > } > ps = &ap_scoreboard_image->parent[i]; >- for (j = 0; j < threads_per_child; j++) { >- ws = &ap_scoreboard_image->servers[i][j]; >- status = ws->status; >- >- /* XXX any_dying_threads is probably no longer needed GLA */ >- any_dying_threads = any_dying_threads || >- (status == SERVER_GRACEFUL); >- any_dead_threads = any_dead_threads || (status == SERVER_DEAD); >- all_dead_threads = all_dead_threads && >- (status == SERVER_DEAD || status == SERVER_GRACEFUL); >- >- /* We consider a starting server as idle because we started it >- * at least a cycle ago, and if it still hasn't finished starting >- * then we're just going to swamp things worse by forking more. >- * So we hopefully won't need to fork more if we count it. >- * This depends on the ordering of SERVER_READY and SERVER_STARTING. >- */ >- if (ps->pid != 0) { /* XXX just set all_dead_threads in outer >- for loop if no pid? not much else matters */ >+ if (ps->pid != 0) { >+ for (j = 0; j < threads_per_child; j++) { >+ ws = &ap_scoreboard_image->servers[i][j]; >+ status = ws->status; >+ >+ /* We consider a starting server as idle because we started it >+ * at least a cycle ago, and if it still hasn't finished starting >+ * then we're just going to swamp things worse by forking more. >+ * So we hopefully won't need to fork more if we count it. >+ * This depends on the ordering of SERVER_READY and SERVER_STARTING. >+ */ > if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting > && ps->generation == retained->my_generation > && ps->bucket == child_bucket) >@@ -2844,20 +2844,13 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) > ++child_threads_active; > } > } >+ last_non_dead = i; > } > active_thread_count += child_threads_active; > if (!ps->pid && free_length < retained->idle_spawn_rate[child_bucket]) >- { > free_slots[free_length++] = i; >- } >- else if (child_threads_active == threads_per_child) { >+ else if (child_threads_active == threads_per_child) > had_healthy_child = 1; >- } >- /* XXX if (!ps->quiescing) is probably more reliable GLA */ >- if (!any_dying_threads) { >- last_non_dead = i; >- ++total_non_dead; >- } > } > > if (retained->sick_child_detected) { >@@ -2885,32 +2878,56 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) > > retained->max_daemons_limit = last_non_dead + 1; > >- if (idle_thread_count > max_spare_threads / num_buckets) { >- /* Kill off one child */ >- ap_mpm_podx_signal(all_buckets[child_bucket].pod, >- AP_MPM_PODX_GRACEFUL); >- retained->idle_spawn_rate[child_bucket] = 1; >+ if (idle_thread_count > max_spare_threads / num_buckets) >+ { >+ /* >+ * Child processes that we ask to shut down won't die immediately >+ * but may stay around for a long time when they finish their >+ * requests. If the server load changes many times, many such >+ * gracefully finishing processes may accumulate, filling up the >+ * scoreboard. To avoid running out of scoreboard entries, we >+ * don't shut down more processes when the total number of processes >+ * is high. >+ * >+ * XXX It would be nice if we could >+ * XXX - kill processes without keepalive connections first >+ * XXX - tell children to stop accepting new connections, and >+ * XXX depending on server load, later be able to resurrect them >+ * or kill them >+ */ >+ if (retained->total_daemons <= active_daemons_limit && >+ retained->total_daemons < server_limit) { >+ /* Kill off one child */ >+ ap_mpm_podx_signal(all_buckets[child_bucket].pod, >+ AP_MPM_PODX_GRACEFUL); >+ retained->idle_spawn_rate[child_bucket] = 1; >+ active_daemons--; >+ } else { >+ ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf, >+ "Not shutting down child: total daemons %d / " >+ "active limit %d / ServerLimit %d", >+ retained->total_daemons, active_daemons_limit, >+ server_limit); >+ } > } > else if (idle_thread_count < min_spare_threads / num_buckets) { >- /* terminate the free list */ >- if (free_length == 0) { /* scoreboard is full, can't fork */ >- >- if (active_thread_count >= max_workers) { >- if (!retained->maxclients_reported) { >- /* only report this condition once */ >- ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484) >- "server reached MaxRequestWorkers setting, " >- "consider raising the MaxRequestWorkers " >- "setting"); >- retained->maxclients_reported = 1; >- } >- } >- else { >- ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00485) >- "scoreboard is full, not at MaxRequestWorkers"); >+ if (active_thread_count >= max_workers) { >+ if (!retained->maxclients_reported) { >+ /* only report this condition once */ >+ ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484) >+ "server reached MaxRequestWorkers setting, " >+ "consider raising the MaxRequestWorkers " >+ "setting"); >+ retained->maxclients_reported = 1; > } > retained->idle_spawn_rate[child_bucket] = 1; > } >+ else if (free_length == 0) { /* scoreboard is full, can't fork */ >+ ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO() >+ "scoreboard is full, not at MaxRequestWorkers." >+ "Increase ServerLimit."); >+ retained->idle_spawn_rate[child_bucket] = 1; >+ } > else { > if (free_length > retained->idle_spawn_rate[child_bucket]) { > free_length = retained->idle_spawn_rate[child_bucket]; >@@ -2921,10 +2938,17 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets) > "to increase StartServers, ThreadsPerChild " > "or Min/MaxSpareThreads), " > "spawning %d children, there are around %d idle " >- "threads, and %d total children", free_length, >- idle_thread_count, total_non_dead); >+ "threads, %d active children, and %d children " >+ "that are shutting down", free_length, >+ idle_thread_count, active_daemons, >+ retained->total_daemons); > } > for (i = 0; i < free_length; ++i) { >+ ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf, >+ "Spawning new child: slot %d active / " >+ "total daemons: %d/%d", >+ free_slots[i], active_daemons, >+ retained->total_daemons); > make_child(ap_server_conf, free_slots[i], child_bucket); > } > /* the next time around we want to spawn twice as many if this >@@ -2995,13 +3019,15 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets) > > event_note_child_killed(child_slot, 0, 0); > ps = &ap_scoreboard_image->parent[child_slot]; >+ if (!ps->quiescing) >+ active_daemons--; > ps->quiescing = 0; >+ retained->total_daemons--; > if (processed_status == APEXIT_CHILDSICK) { > /* resource shortage, minimize the fork rate */ > retained->idle_spawn_rate[ps->bucket] = 1; > } >- else if (remaining_children_to_start >- && child_slot < ap_daemons_limit) { >+ else if (remaining_children_to_start) { > /* we're still doing a 1-for-1 replacement of dead > * children with new children > */ >@@ -3076,8 +3102,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > /* Don't thrash since num_buckets depends on the > * system and the number of online CPU cores... > */ >- if (ap_daemons_limit < num_buckets) >- ap_daemons_limit = num_buckets; >+ if (active_daemons_limit < num_buckets) >+ active_daemons_limit = num_buckets; > if (ap_daemons_to_start < num_buckets) > ap_daemons_to_start = num_buckets; > /* We want to create as much children at a time as the number of buckets, >@@ -3101,8 +3127,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > * supposed to start up without the 1 second penalty between each fork. > */ > remaining_children_to_start = ap_daemons_to_start; >- if (remaining_children_to_start > ap_daemons_limit) { >- remaining_children_to_start = ap_daemons_limit; >+ if (remaining_children_to_start > active_daemons_limit) { >+ remaining_children_to_start = active_daemons_limit; > } > if (!retained->is_graceful) { > startup_children(remaining_children_to_start); >@@ -3132,7 +3158,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > * Kill child processes, tell them to call child_exit, etc... > */ > for (i = 0; i < num_buckets; i++) { >- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, >+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, > AP_MPM_PODX_RESTART); > } > ap_reclaim_child_processes(1, /* Start with SIGTERM */ >@@ -3156,7 +3182,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > /* Close our listeners, and then ask our children to do same */ > ap_close_listeners(); > for (i = 0; i < num_buckets; i++) { >- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, >+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, > AP_MPM_PODX_GRACEFUL); > } > ap_relieve_child_processes(event_note_child_killed); >@@ -3184,7 +3210,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > ap_relieve_child_processes(event_note_child_killed); > > active_children = 0; >- for (index = 0; index < ap_daemons_limit; ++index) { >+ for (index = 0; index < retained->max_daemons_limit; ++index) { > if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) { > active_children = 1; > /* Having just one child is enough to stay around */ >@@ -3199,7 +3225,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > * really dead. > */ > for (i = 0; i < num_buckets; i++) { >- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, >+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, > AP_MPM_PODX_RESTART); > } > ap_reclaim_child_processes(1, event_note_child_killed); >@@ -3228,7 +3254,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > " received. Doing graceful restart"); > /* wake up the children...time to die. But we'll have more soon */ > for (i = 0; i < num_buckets; i++) { >- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, >+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, > AP_MPM_PODX_GRACEFUL); > } > >@@ -3243,7 +3269,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > * pthreads are stealing signals from us left and right. > */ > for (i = 0; i < num_buckets; i++) { >- ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit, >+ ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit, > AP_MPM_PODX_RESTART); > } > >@@ -3253,6 +3279,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s) > "SIGHUP received. Attempting to restart"); > } > >+ active_daemons = 0; >+ > return OK; > } > >@@ -3466,9 +3494,9 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog, > max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD; > server_limit = DEFAULT_SERVER_LIMIT; > thread_limit = DEFAULT_THREAD_LIMIT; >- ap_daemons_limit = server_limit; >+ active_daemons_limit = server_limit; > threads_per_child = DEFAULT_THREADS_PER_CHILD; >- max_workers = ap_daemons_limit * threads_per_child; >+ max_workers = active_daemons_limit * threads_per_child; > had_healthy_child = 0; > ap_extended_status = 0; > >@@ -3677,10 +3705,10 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog, > max_workers = threads_per_child; > } > >- ap_daemons_limit = max_workers / threads_per_child; >+ active_daemons_limit = max_workers / threads_per_child; > > if (max_workers % threads_per_child) { >- int tmp_max_workers = ap_daemons_limit * threads_per_child; >+ int tmp_max_workers = active_daemons_limit * threads_per_child; > > if (startup) { > ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00513) >@@ -3688,7 +3716,7 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog, > "multiple of ThreadsPerChild of %d, decreasing to nearest " > "multiple %d, for a maximum of %d servers.", > max_workers, threads_per_child, tmp_max_workers, >- ap_daemons_limit); >+ active_daemons_limit); > } else { > ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00514) > "MaxRequestWorkers of %d is not an integer multiple " >@@ -3699,25 +3727,25 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog, > max_workers = tmp_max_workers; > } > >- if (ap_daemons_limit > server_limit) { >+ if (active_daemons_limit > server_limit) { > if (startup) { > ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00515) > "WARNING: MaxRequestWorkers of %d would require %d servers " > "and would exceed ServerLimit of %d, decreasing to %d. " > "To increase, please see the ServerLimit directive.", >- max_workers, ap_daemons_limit, server_limit, >+ max_workers, active_daemons_limit, server_limit, > server_limit * threads_per_child); > } else { > ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00516) > "MaxRequestWorkers of %d would require %d servers and " > "exceed ServerLimit of %d, decreasing to %d", >- max_workers, ap_daemons_limit, server_limit, >+ max_workers, active_daemons_limit, server_limit, > server_limit * threads_per_child); > } >- ap_daemons_limit = server_limit; >+ active_daemons_limit = server_limit; > } > >- /* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */ >+ /* ap_daemons_to_start > active_daemons_limit checked in ap_mpm_run() */ > if (ap_daemons_to_start < 1) { > if (startup) { > ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00517) >diff --git a/server/mpm_unix.c b/server/mpm_unix.c >index 06318fe..8ac36b2 100644 >--- a/server/mpm_unix.c >+++ b/server/mpm_unix.c >@@ -63,7 +63,13 @@ > #undef APLOG_MODULE_INDEX > #define APLOG_MODULE_INDEX AP_CORE_MODULE_INDEX > >-typedef enum {DO_NOTHING, SEND_SIGTERM, SEND_SIGKILL, GIVEUP} action_t; >+typedef enum { >+ DO_NOTHING, >+ SEND_SIGTERM, >+ SEND_SIGTERM_NOLOG, >+ SEND_SIGKILL, >+ GIVEUP >+} action_t; > > typedef struct extra_process_t { > struct extra_process_t *next; >@@ -142,6 +148,8 @@ static int reclaim_one_pid(pid_t pid, action_t action) > " still did not exit, " > "sending a SIGTERM", > pid); >+ /* FALLTHROUGH */ >+ case SEND_SIGTERM_NOLOG: > kill(pid, SIGTERM); > break; > >@@ -173,7 +181,6 @@ static int reclaim_one_pid(pid_t pid, action_t action) > return 0; > } > >-/* XXX The terminate argument is ignored. Implement or remove? */ > AP_DECLARE(void) ap_reclaim_child_processes(int terminate, > ap_reclaim_callback_fn_t *mpm_callback) > { >@@ -194,6 +201,7 @@ AP_DECLARE(void) ap_reclaim_child_processes(int terminate, > * children but take no action against > * stragglers > */ >+ {SEND_SIGTERM_NOLOG, 0}, /* skipped if terminate == 0 */ > {SEND_SIGTERM, apr_time_from_sec(3)}, > {SEND_SIGTERM, apr_time_from_sec(5)}, > {SEND_SIGTERM, apr_time_from_sec(7)}, >@@ -203,19 +211,21 @@ AP_DECLARE(void) ap_reclaim_child_processes(int terminate, > int cur_action; /* index of action we decided to take this > * iteration > */ >- int next_action = 1; /* index of first real action */ >+ int next_action = terminate ? 1 : 2; /* index of first real action */ > > ap_mpm_query(AP_MPMQ_MAX_DAEMON_USED, &max_daemons); > > do { >- apr_sleep(waittime); >- /* don't let waittime get longer than 1 second; otherwise, we don't >- * react quickly to the last child exiting, and taking action can >- * be delayed >- */ >- waittime = waittime * 4; >- if (waittime > apr_time_from_sec(1)) { >- waittime = apr_time_from_sec(1); >+ if (action_table[next_action].action_time > 0) { >+ apr_sleep(waittime); >+ /* don't let waittime get longer than 1 second; otherwise, we don't >+ * react quickly to the last child exiting, and taking action can >+ * be delayed >+ */ >+ waittime = waittime * 4; >+ if (waittime > apr_time_from_sec(1)) { >+ waittime = apr_time_from_sec(1); >+ } > } > > /* see what action to take, if any */
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 53555
:
33154
|
33158
|
33749
|
33750
| 34201 |
34202