ASF Bugzilla – Attachment 28262 Details for
Bug 48769
[PATCH] Processes in the busy list should not be killed during graceful restarts
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Separated graceful restart patch
graceful_restart2.patch (text/plain), 20.95 KB, created by
Michał Grzędzicki
on 2012-02-03 20:43:38 UTC
(
hide
)
Description:
Separated graceful restart patch
Filename:
MIME Type:
Creator:
Michał Grzędzicki
Created:
2012-02-03 20:43:38 UTC
Size:
20.95 KB
patch
obsolete
>Index: modules/fcgid/fcgid_mutex_unix.c >=================================================================== >--- modules/fcgid/fcgid_mutex_unix.c (revision 1240335) >+++ modules/fcgid/fcgid_mutex_unix.c (working copy) >@@ -114,7 +114,12 @@ > * to a better place would require a directive to override it. This > * is resolved for httpd 2.3+ by hooking into the Mutex support. > */ >- lockfile = apr_palloc(pconf, L_tmpnam); >+ if (*lockfilep == NULL) { >+ lockfile = apr_palloc(pool, L_tmpnam); >+ } >+ else { >+ lockfile = *lockfilep; >+ } > tmpnam(lockfile); > rv = apr_global_mutex_create(mutex, lockfile, mechanism, pconf); > if (rv != APR_SUCCESS) { >Index: modules/fcgid/fcgid_proctbl_unix.c >=================================================================== >--- modules/fcgid/fcgid_proctbl_unix.c (revision 1240335) >+++ modules/fcgid/fcgid_proctbl_unix.c (working copy) >@@ -129,36 +129,81 @@ > apr_status_t > proctable_post_config(server_rec * main_server, apr_pool_t * configpool) > { >+ const char *userdata_mutex_key = "fcgid_proctbl_mutex"; >+ const char *userdata_filename_key = "fcgid_proctbl_mutex_file"; > size_t shmem_size = sizeof(fcgid_share); > fcgid_procnode *ptmpnode = NULL; > int i; >+ int reused_shm = 0; > apr_status_t rv; > fcgid_server_conf *sconf = ap_get_module_config(main_server->module_config, > &fcgid_module); > >- /* Remove share memory first */ >- apr_shm_remove(sconf->shmname_path, main_server->process->pconf); >+ g_sharelock_name = NULL; >+ g_sharemem = NULL; >+ g_sharelock = NULL; > >- /* Create share memory */ >- if ((rv = apr_shm_create(&g_sharemem, shmem_size, sconf->shmname_path, >- main_server->process->pconf)) != APR_SUCCESS) >- { >- ap_log_error(APLOG_MARK, APLOG_EMERG, rv, main_server, >- "mod_fcgid: Can't create shared memory for size %" APR_SIZE_T_FMT " bytes", >- shmem_size); >- exit(1); >+ /* Check to see if we can reclaim the mutex first >+ All mutexes are going to persist across restarts */ >+ apr_pool_userdata_get((void *) &g_sharelock, userdata_mutex_key, >+ main_server->process->pool); >+ apr_pool_userdata_get((void *) &g_sharelock_name, userdata_filename_key, >+ main_server->process->pool); >+ if (g_sharelock == NULL || g_sharelock_name == NULL ) { >+ rv = fcgid_mutex_create(&g_sharelock, &g_sharelock_name, >+ g_sharelock_mutex_type, >+ main_server->process->pool, main_server); >+ if (rv != APR_SUCCESS) { >+ exit(1); >+ } >+ apr_pool_userdata_set(g_sharelock, userdata_mutex_key, >+ apr_pool_cleanup_null, >+ main_server->process->pool); >+ apr_pool_userdata_set(g_sharelock_name, userdata_filename_key, >+ apr_pool_cleanup_null, >+ main_server->process->pool); >+ >+ /* Since we couldn't use the old mutex we're going to assume >+ the old shared memory is also invalid */ >+ apr_shm_remove(sconf->shmname_path, main_server->process->pool); > } >- _global_memory = apr_shm_baseaddr_get(g_sharemem); >+ else { >+ /* We reclaimed the old mutex so we'll try to also reclaim the old shared memory */ >+ if ((rv = apr_shm_attach(&g_sharemem, sconf->shmname_path, main_server->process->pool)) == APR_SUCCESS) { >+ if ( apr_shm_size_get(g_sharemem) == shmem_size ) { >+ /* TODO: Additional checks here that shm comes from the current server */ >+ _global_memory = apr_shm_baseaddr_get(g_sharemem); >+ reused_shm = 1; >+ } >+ else { >+ apr_shm_detach(g_sharemem); >+ } >+ } >+ if (rv != APR_SUCCESS || g_sharemem == NULL) { >+ /* Couldn't reuse the old shared memory, make sure the file is removed */ >+ apr_shm_remove(sconf->shmname_path, main_server->process->pool); >+ } >+ } > >- /* Create global mutex */ >- rv = fcgid_mutex_create(&g_sharelock, &g_sharelock_name, >- g_sharelock_mutex_type, >- main_server->process->pconf, main_server); >+ if (!reused_shm) { >+ if ((rv = apr_shm_create(&g_sharemem, shmem_size, sconf->shmname_path, >+ main_server->process->pool)) != APR_SUCCESS) { >+ ap_log_error(APLOG_MARK, APLOG_EMERG, rv, main_server, >+ "mod_fcgid: Can't create shared memory for size %" APR_SIZE_T_FMT " bytes", >+ shmem_size); >+ exit(1); >+ } >+ _global_memory = apr_shm_baseaddr_get(g_sharemem); >+ memset(_global_memory, 0, shmem_size); >+ } >+ >+ rv = proctable_lock_safe(); > if (rv != APR_SUCCESS) { >+ ap_log_error(APLOG_MARK, APLOG_EMERG, rv, main_server, >+ "mod_fcgid: Can't lock process table for initialization"); > exit(1); > } >- >- memset(_global_memory, 0, shmem_size); >+ > g_proc_array = _global_memory->procnode_array; > g_global_share = &_global_memory->global; > >@@ -169,12 +214,17 @@ > g_busy_list_header = g_idle_list_header + 1; > g_error_list_header = g_busy_list_header + 1; > g_free_list_header = g_error_list_header + 1; >- ptmpnode = g_free_list_header; >- for (i = 0; i < FCGID_MAX_APPLICATION; i++) { >- ptmpnode->next_index = ptmpnode - g_proc_array + 1; >- ptmpnode++; >+ if (!reused_shm) { >+ /* This initially adds all nodes to the free list */ >+ ptmpnode = g_free_list_header; >+ for (i = 0; i < FCGID_MAX_APPLICATION; i++) { >+ ptmpnode->next_index = ptmpnode - g_proc_array + 1; >+ ptmpnode++; >+ } > } >+ proctable_unlock_safe(); > >+ > return APR_SUCCESS; > } > >@@ -200,11 +250,21 @@ > return apr_global_mutex_lock(g_sharelock); > } > >+apr_status_t proctable_lock_safe(void) >+{ >+ return proctable_lock_internal(); >+} >+ > static apr_status_t proctable_unlock_internal(void) > { > return apr_global_mutex_unlock(g_sharelock); > } > >+apr_status_t proctable_unlock_safe(void) >+{ >+ return proctable_unlock_internal(); >+} >+ > fcgid_procnode *proctable_get_free_list(void) > { > return g_free_list_header; >@@ -272,14 +332,6 @@ > { > apr_status_t rv; > >- if (g_global_share->must_exit) { >- ap_log_error(APLOG_MARK, APLOG_EMERG, 0, s, >- "mod_fcgid: server is restarted, pid %" APR_PID_T_FMT >- " must exit", >- getpid()); >- kill(getpid(), SIGTERM); >- } >- > /* Lock error is a fatal error */ > if ((rv = proctable_lock_internal()) != APR_SUCCESS) { > ap_log_error(APLOG_MARK, APLOG_EMERG, rv, s, >Index: modules/fcgid/fcgid_pm_main.c >=================================================================== >--- modules/fcgid/fcgid_pm_main.c (revision 1240335) >+++ modules/fcgid/fcgid_pm_main.c (working copy) >@@ -401,64 +401,93 @@ > return 0; > } > >-static void kill_all_subprocess(server_rec *main_server) >+static void kill_all_subprocess(server_rec * main_server, int graceful) > { >- apr_time_t waittime = 1024 * 16; >- size_t i, table_size = proctable_get_table_size(); >- int not_dead_yet; >- int cur_action, next_action; >- apr_time_t starttime = apr_time_now(); >- struct { >- action_t action; >- apr_time_t action_time; >- } action_table[] = { >- {DO_NOTHING, 0}, /* dummy entry for iterations where >- * we reap children but take no action >- * against stragglers >- */ >- {KILL_GRACEFULLY, apr_time_from_sec(0)}, >- {KILL_GRACEFULLY, apr_time_from_sec(1)}, >- {KILL_FORCEFULLY, apr_time_from_sec(8)}, >- {HARD_WAIT, apr_time_from_sec(8)} >- }; >+ int wait_attempts = 0; > fcgid_procnode *proc_table = proctable_get_table_array(); >+ fcgid_procnode *free_list_header = proctable_get_free_list(); >+ fcgid_procnode *busy_list_header = proctable_get_busy_list(); >+ fcgid_procnode *idle_list_header = proctable_get_idle_list(); >+ fcgid_procnode *error_list_header = proctable_get_error_list(); >+ fcgid_procnode *previous_node, *current_node, *next_node; > >- next_action = 1; >- do { >- apr_sleep(waittime); >- /* don't let waittime get longer than 1 second; otherwise, we don't >- * react quickly to the last child exiting, and taking action can >- * be delayed >- */ >- waittime = waittime * 4; >- if (waittime > apr_time_from_sec(1)) { >- waittime = apr_time_from_sec(1); >- } >+ /* We'll scan the idle and busy lists signaling each process to shut down >+ The processes in the idle list will be moved to the error list >+ Processes in the busy list will be moved to the error list only during a hard shutdown/restart. >+ After we've moved everything to the error list that needs to die, we walk it until everything is cleaned up */ > >- /* see what action to take, if any */ >- if (action_table[next_action].action_time <= apr_time_now() - starttime) { >- cur_action = next_action; >- ++next_action; >+ proctable_pm_lock(main_server); >+ >+ /* signal all processes in the error list once */ >+ previous_node = error_list_header; >+ current_node = &proc_table[previous_node->next_index]; >+ while (current_node != proc_table) { >+ proc_kill_gracefully(current_node, main_server); >+ previous_node = current_node; >+ current_node = &proc_table[current_node->next_index]; >+ } >+ >+ /* Previous node is going to be the tail end of the error list. Attach all the idle list to it and signal those processes to exit */ >+ current_node = &proc_table[idle_list_header->next_index]; >+ if (current_node != proc_table) { >+ previous_node->next_index = (current_node - proc_table); >+ idle_list_header->next_index = 0; >+ while ( current_node != proc_table ) { >+ proc_kill_gracefully( current_node, main_server ); >+ current_node->diewhy = FCGID_DIE_SHUTDOWN; >+ previous_node = current_node; >+ current_node = &proc_table[ current_node->next_index ]; > } >- else { >- cur_action = 0; /* index of DO_NOTHING entry */ >- } >+ } > >- /* now see who is done */ >- not_dead_yet = 0; >- for (i = 0; i < table_size; i++) { >- if (!proc_table[i].proc_pool) { >- continue; /* unused */ >- } >+ /* If this is a forced shutdown, link the busy list to the error list */ >+ current_node = &proc_table[busy_list_header->next_index]; >+ if (!graceful && current_node != proc_table) { >+ previous_node->next_index = (current_node - proc_table); >+ busy_list_header->next_index = 0; >+ } >+ >+ /* Signal processes in the busy list */ >+ while (current_node != proc_table) { >+ proc_kill_gracefully( current_node, main_server ); >+ current_node->diewhy = FCGID_DIE_SHUTDOWN; >+ current_node = &proc_table[ current_node->next_index ]; >+ } > >- if (!reclaim_one_pid(main_server, &proc_table[i], >- action_table[cur_action].action)) { >- ++not_dead_yet; >+ /* Now clear the error list into the free list... >+ We are blocking all the bridged processes from completing during this timeframe >+ since they can't aquire the mutex to check the process tables. This should be >+ acceptable for both graceful and hard restarts. This loop should end rapidly >+ with graceful restarts and connection interruptions are going to happen regardless >+ with hard restarts. */ >+ while (error_list_header->next_index != 0) { >+ apr_sleep(apr_time_from_sec(1)); >+ wait_attempts++; >+ previous_node = error_list_header; >+ current_node = &proc_table[previous_node->next_index]; >+ while (current_node != proc_table) { >+ next_node = &proc_table[current_node->next_index]; >+ if (proc_wait_process(main_server, current_node) == APR_CHILD_NOTDONE >+ && wait_attempts < FCGID_PM_MAX_SHUTDOWN_WAIT_ATTEMPTS ) { >+ /* kill it hard and check again in 1 second */ >+ proc_kill_force( current_node, main_server ); >+ previous_node = current_node; >+ }else { >+ /* Unlink from error list */ >+ previous_node->next_index = current_node->next_index; >+ >+ /* Link to free list */ >+ current_node->next_index = free_list_header->next_index; >+ free_list_header->next_index = current_node - proc_table; > } >+ current_node = next_node; > } >+ } >+ >+ /* At this point in a graceful restart, the busy and free lists have all the process nodes */ >+ /* In a forced restart, all nodes are in the free list */ > >- } while (not_dead_yet && >- action_table[cur_action].action != HARD_WAIT); >+ proctable_pm_unlock(main_server); > } > > /* This should be proposed as a stand-alone improvement to the httpd module, >@@ -578,7 +607,7 @@ > procinfo.gid = command->gid; > procinfo.userdir = command->userdir; > if ((rv = >- apr_pool_create(&procnode->proc_pool, configpool)) != APR_SUCCESS >+ apr_pool_create(&procnode->proc_pool, main_server->process->pool)) != APR_SUCCESS > || (procinfo.proc_environ = > apr_table_make(procnode->proc_pool, INITENV_CNT)) == NULL) { > /* Link the node back to free list in this case */ >@@ -591,6 +620,7 @@ > "mod_fcgid: can't create pool for process"); > return; > } >+ > /* Set up longer, system defaults before falling into parsing fixed-limit > * request-by-request variables, so if any are overriden, they preempt > * any system default assumptions >@@ -633,6 +663,7 @@ > apr_status_t pm_main(server_rec * main_server, apr_pool_t * configpool) > { > fcgid_command command; >+ apr_status_t rv; > > while (1) { > if (procmgr_must_exit()) >@@ -656,7 +687,7 @@ > } > > /* Stop all processes */ >- kill_all_subprocess(main_server); >+ kill_all_subprocess(main_server, procmgr_graceful_restart()); > > return APR_SUCCESS; > } >Index: modules/fcgid/fcgid_proctbl.h >=================================================================== >--- modules/fcgid/fcgid_proctbl.h (revision 1240335) >+++ modules/fcgid/fcgid_proctbl.h (working copy) >@@ -96,6 +96,8 @@ > void proctable_pm_unlock(server_rec *s); > void proctable_lock(request_rec *r); > void proctable_unlock(request_rec *r); >+apr_status_t proctable_lock_safe(void); >+apr_status_t proctable_unlock_safe(void); > > /* Just for debug */ > void proctable_print_debug_info(server_rec * main_server); >Index: modules/fcgid/fcgid_pm_unix.c >=================================================================== >--- modules/fcgid/fcgid_pm_unix.c (revision 1240335) >+++ modules/fcgid/fcgid_pm_unix.c (working copy) >@@ -66,6 +66,7 @@ > static const char *g_pipelock_mutex_type = "fcgid-pipe"; > > static int volatile g_caughtSigTerm = 0; >+static int volatile g_caughtSigUsr1 = 0; > static pid_t g_pm_pid; > static void signal_handler(int signo) > { >@@ -76,11 +77,16 @@ > return; > } > >- if ((signo == SIGTERM) || (signo == SIGUSR1) || (signo == SIGHUP)) { >+ if ((signo == SIGTERM) || (signo == SIGHUP)) { > g_caughtSigTerm = 1; >+ g_caughtSigUsr1 = 0; > /* Tell the world it's time to die */ > proctable_get_globalshare()->must_exit = 1; > } >+ if (signo == SIGUSR1 && !g_caughtSigTerm) { >+ g_caughtSigUsr1 = 1; >+ proctable_get_globalshare()->must_exit = 1; >+ } > } > > static apr_status_t init_signal(server_rec * main_server) >@@ -164,6 +170,7 @@ > } > break; > case APR_OC_REASON_RESTART: >+ kill(proc->pid, SIGTERM); > apr_proc_other_child_unregister(data); > break; > case APR_OC_REASON_LOST: >@@ -181,7 +188,7 @@ > break; > case APR_OC_REASON_UNREGISTER: > /* I don't think it's going to happen */ >- kill(proc->pid, SIGHUP); >+ kill(proc->pid, SIGUSR1); > break; > } > } >@@ -316,7 +323,7 @@ > /* I am the parent > I will send the stop signal in procmgr_stop_procmgr() */ > apr_pool_note_subprocess(configpool, g_process_manager, >- APR_KILL_ONLY_ONCE); >+ APR_JUST_WAIT); > apr_proc_other_child_register(g_process_manager, fcgid_maint, > g_process_manager, NULL, configpool); > >@@ -349,8 +356,14 @@ > apr_status_t > procmgr_post_config(server_rec * main_server, apr_pool_t * configpool) > { >+ const char *userdata_mutex_key = "fcgid_procmgr_mutex"; >+ const char *userdata_filename_key = "fcgid_procmgr_mutex_file"; >+ const char *userdata_pipe_key[] = { "fcgid_pipe_pm_read", "fcgid_pipe_pm_write", "fcgid_pipe_ap_read", "fcgid_pipe_ap_write" }; >+ apr_file_t **userdata_pipe_handle[] = { &g_pm_read_pipe, &g_pm_write_pipe, &g_ap_read_pipe, &g_ap_write_pipe }; > apr_status_t rv; > apr_finfo_t finfo; >+ size_t i; >+ size_t j; > fcgid_server_conf *sconf = ap_get_module_config(main_server->module_config, > &fcgid_module); > >@@ -398,22 +411,50 @@ > } > } > >- /* Create pipes to communicate between process manager and apache */ >- if ((rv = apr_file_pipe_create(&g_pm_read_pipe, &g_ap_write_pipe, >- configpool)) != APR_SUCCESS >- || (rv = apr_file_pipe_create(&g_ap_read_pipe, &g_pm_write_pipe, >- configpool))) { >- ap_log_error(APLOG_MARK, APLOG_ERR, rv, main_server, >- "mod_fcgid: Can't create pipe between PM and stub"); >- return rv; >+ /* Create pipes to communicate between process manager and apache >+ The pipes will last across restarts */ >+ for (i = 0; i < 4; i++) { >+ apr_pool_userdata_get((void *) userdata_pipe_handle[i], userdata_pipe_key[i], >+ main_server->process->pool); >+ if (*userdata_pipe_handle[i] == NULL) { >+ if ((rv = apr_file_pipe_create(&g_pm_read_pipe, &g_ap_write_pipe, >+ main_server->process->pool)) != APR_SUCCESS >+ || (rv = apr_file_pipe_create(&g_ap_read_pipe, &g_pm_write_pipe, >+ main_server->process->pool))) { >+ ap_log_error(APLOG_MARK, APLOG_ERR, rv, main_server, >+ "mod_fcgid: Can't create pipe between PM and stub"); >+ return rv; >+ } >+ for (j = 0; j < 4; j++) { >+ apr_pool_userdata_set(*(userdata_pipe_handle[j]), userdata_pipe_key[j], >+ apr_pool_cleanup_null, >+ main_server->process->pool); >+ } >+ break; >+ } > } > >- /* Create mutex for pipe reading and writing */ >- rv = fcgid_mutex_create(&g_pipelock, &g_pipelock_name, >- g_pipelock_mutex_type, >- main_server->process->pconf, main_server); >- if (rv != APR_SUCCESS) { >- exit(1); >+ /* Create mutex for pipe reading and writing >+ All mutexex are going to persist across restarts */ >+ g_pipelock = NULL; >+ g_pipelock_name = NULL; >+ apr_pool_userdata_get((void *) &g_pipelock, userdata_mutex_key, >+ main_server->process->pool); >+ apr_pool_userdata_get((void *) &g_pipelock_name, userdata_filename_key, >+ main_server->process->pool); >+ if (g_pipelock == NULL || g_pipelock_name == NULL) { >+ rv = fcgid_mutex_create(&g_pipelock, &g_pipelock_name, >+ g_pipelock_mutex_type, >+ main_server->process->pool, main_server); >+ if (rv != APR_SUCCESS) { >+ exit(1); >+ } >+ apr_pool_userdata_set(g_pipelock, userdata_mutex_key, >+ apr_pool_cleanup_null, >+ main_server->process->pool); >+ apr_pool_userdata_set(g_pipelock_name, userdata_filename_key, >+ apr_pool_cleanup_null, >+ main_server->process->pool); > } > > /* Create process manager process */ >@@ -548,9 +589,14 @@ > > int procmgr_must_exit() > { >- return g_caughtSigTerm; >+ return g_caughtSigTerm || g_caughtSigUsr1; > } > >+int procmgr_graceful_restart() >+{ >+ return g_caughtSigUsr1; >+} >+ > apr_status_t procmgr_stop_procmgr(void *server) > { > return APR_SUCCESS; >Index: modules/fcgid/fcgid_pm.h >=================================================================== >--- modules/fcgid/fcgid_pm.h (revision 1240335) >+++ modules/fcgid/fcgid_pm.h (working copy) >@@ -56,5 +56,6 @@ > > apr_status_t procmgr_stop_procmgr(void *dummy); > int procmgr_must_exit(void); >+int procmgr_graceful_restart(void); > > #endif
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 48769
:
25019
|
28262
|
34001
|
35944