Index: modules/fcgid/fcgid_mutex_unix.c =================================================================== --- modules/fcgid/fcgid_mutex_unix.c (revision 1240335) +++ modules/fcgid/fcgid_mutex_unix.c (working copy) @@ -114,7 +114,12 @@ * to a better place would require a directive to override it. This * is resolved for httpd 2.3+ by hooking into the Mutex support. */ - lockfile = apr_palloc(pconf, L_tmpnam); + if (*lockfilep == NULL) { + lockfile = apr_palloc(pool, L_tmpnam); + } + else { + lockfile = *lockfilep; + } tmpnam(lockfile); rv = apr_global_mutex_create(mutex, lockfile, mechanism, pconf); if (rv != APR_SUCCESS) { Index: modules/fcgid/fcgid_proctbl_unix.c =================================================================== --- modules/fcgid/fcgid_proctbl_unix.c (revision 1240335) +++ modules/fcgid/fcgid_proctbl_unix.c (working copy) @@ -129,36 +129,81 @@ apr_status_t proctable_post_config(server_rec * main_server, apr_pool_t * configpool) { + const char *userdata_mutex_key = "fcgid_proctbl_mutex"; + const char *userdata_filename_key = "fcgid_proctbl_mutex_file"; size_t shmem_size = sizeof(fcgid_share); fcgid_procnode *ptmpnode = NULL; int i; + int reused_shm = 0; apr_status_t rv; fcgid_server_conf *sconf = ap_get_module_config(main_server->module_config, &fcgid_module); - /* Remove share memory first */ - apr_shm_remove(sconf->shmname_path, main_server->process->pconf); + g_sharelock_name = NULL; + g_sharemem = NULL; + g_sharelock = NULL; - /* Create share memory */ - if ((rv = apr_shm_create(&g_sharemem, shmem_size, sconf->shmname_path, - main_server->process->pconf)) != APR_SUCCESS) - { - ap_log_error(APLOG_MARK, APLOG_EMERG, rv, main_server, - "mod_fcgid: Can't create shared memory for size %" APR_SIZE_T_FMT " bytes", - shmem_size); - exit(1); + /* Check to see if we can reclaim the mutex first + All mutexes are going to persist across restarts */ + apr_pool_userdata_get((void *) &g_sharelock, userdata_mutex_key, + main_server->process->pool); + apr_pool_userdata_get((void *) &g_sharelock_name, userdata_filename_key, + main_server->process->pool); + if (g_sharelock == NULL || g_sharelock_name == NULL ) { + rv = fcgid_mutex_create(&g_sharelock, &g_sharelock_name, + g_sharelock_mutex_type, + main_server->process->pool, main_server); + if (rv != APR_SUCCESS) { + exit(1); + } + apr_pool_userdata_set(g_sharelock, userdata_mutex_key, + apr_pool_cleanup_null, + main_server->process->pool); + apr_pool_userdata_set(g_sharelock_name, userdata_filename_key, + apr_pool_cleanup_null, + main_server->process->pool); + + /* Since we couldn't use the old mutex we're going to assume + the old shared memory is also invalid */ + apr_shm_remove(sconf->shmname_path, main_server->process->pool); } - _global_memory = apr_shm_baseaddr_get(g_sharemem); + else { + /* We reclaimed the old mutex so we'll try to also reclaim the old shared memory */ + if ((rv = apr_shm_attach(&g_sharemem, sconf->shmname_path, main_server->process->pool)) == APR_SUCCESS) { + if ( apr_shm_size_get(g_sharemem) == shmem_size ) { + /* TODO: Additional checks here that shm comes from the current server */ + _global_memory = apr_shm_baseaddr_get(g_sharemem); + reused_shm = 1; + } + else { + apr_shm_detach(g_sharemem); + } + } + if (rv != APR_SUCCESS || g_sharemem == NULL) { + /* Couldn't reuse the old shared memory, make sure the file is removed */ + apr_shm_remove(sconf->shmname_path, main_server->process->pool); + } + } - /* Create global mutex */ - rv = fcgid_mutex_create(&g_sharelock, &g_sharelock_name, - g_sharelock_mutex_type, - main_server->process->pconf, main_server); + if (!reused_shm) { + if ((rv = apr_shm_create(&g_sharemem, shmem_size, sconf->shmname_path, + main_server->process->pool)) != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_EMERG, rv, main_server, + "mod_fcgid: Can't create shared memory for size %" APR_SIZE_T_FMT " bytes", + shmem_size); + exit(1); + } + _global_memory = apr_shm_baseaddr_get(g_sharemem); + memset(_global_memory, 0, shmem_size); + } + + rv = proctable_lock_safe(); if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_EMERG, rv, main_server, + "mod_fcgid: Can't lock process table for initialization"); exit(1); } - - memset(_global_memory, 0, shmem_size); + g_proc_array = _global_memory->procnode_array; g_global_share = &_global_memory->global; @@ -169,12 +214,17 @@ g_busy_list_header = g_idle_list_header + 1; g_error_list_header = g_busy_list_header + 1; g_free_list_header = g_error_list_header + 1; - ptmpnode = g_free_list_header; - for (i = 0; i < FCGID_MAX_APPLICATION; i++) { - ptmpnode->next_index = ptmpnode - g_proc_array + 1; - ptmpnode++; + if (!reused_shm) { + /* This initially adds all nodes to the free list */ + ptmpnode = g_free_list_header; + for (i = 0; i < FCGID_MAX_APPLICATION; i++) { + ptmpnode->next_index = ptmpnode - g_proc_array + 1; + ptmpnode++; + } } + proctable_unlock_safe(); + return APR_SUCCESS; } @@ -200,11 +250,21 @@ return apr_global_mutex_lock(g_sharelock); } +apr_status_t proctable_lock_safe(void) +{ + return proctable_lock_internal(); +} + static apr_status_t proctable_unlock_internal(void) { return apr_global_mutex_unlock(g_sharelock); } +apr_status_t proctable_unlock_safe(void) +{ + return proctable_unlock_internal(); +} + fcgid_procnode *proctable_get_free_list(void) { return g_free_list_header; @@ -272,14 +332,6 @@ { apr_status_t rv; - if (g_global_share->must_exit) { - ap_log_error(APLOG_MARK, APLOG_EMERG, 0, s, - "mod_fcgid: server is restarted, pid %" APR_PID_T_FMT - " must exit", - getpid()); - kill(getpid(), SIGTERM); - } - /* Lock error is a fatal error */ if ((rv = proctable_lock_internal()) != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_EMERG, rv, s, Index: modules/fcgid/fcgid_pm_main.c =================================================================== --- modules/fcgid/fcgid_pm_main.c (revision 1240335) +++ modules/fcgid/fcgid_pm_main.c (working copy) @@ -401,64 +401,93 @@ return 0; } -static void kill_all_subprocess(server_rec *main_server) +static void kill_all_subprocess(server_rec * main_server, int graceful) { - apr_time_t waittime = 1024 * 16; - size_t i, table_size = proctable_get_table_size(); - int not_dead_yet; - int cur_action, next_action; - apr_time_t starttime = apr_time_now(); - struct { - action_t action; - apr_time_t action_time; - } action_table[] = { - {DO_NOTHING, 0}, /* dummy entry for iterations where - * we reap children but take no action - * against stragglers - */ - {KILL_GRACEFULLY, apr_time_from_sec(0)}, - {KILL_GRACEFULLY, apr_time_from_sec(1)}, - {KILL_FORCEFULLY, apr_time_from_sec(8)}, - {HARD_WAIT, apr_time_from_sec(8)} - }; + int wait_attempts = 0; fcgid_procnode *proc_table = proctable_get_table_array(); + fcgid_procnode *free_list_header = proctable_get_free_list(); + fcgid_procnode *busy_list_header = proctable_get_busy_list(); + fcgid_procnode *idle_list_header = proctable_get_idle_list(); + fcgid_procnode *error_list_header = proctable_get_error_list(); + fcgid_procnode *previous_node, *current_node, *next_node; - next_action = 1; - do { - apr_sleep(waittime); - /* don't let waittime get longer than 1 second; otherwise, we don't - * react quickly to the last child exiting, and taking action can - * be delayed - */ - waittime = waittime * 4; - if (waittime > apr_time_from_sec(1)) { - waittime = apr_time_from_sec(1); - } + /* We'll scan the idle and busy lists signaling each process to shut down + The processes in the idle list will be moved to the error list + Processes in the busy list will be moved to the error list only during a hard shutdown/restart. + After we've moved everything to the error list that needs to die, we walk it until everything is cleaned up */ - /* see what action to take, if any */ - if (action_table[next_action].action_time <= apr_time_now() - starttime) { - cur_action = next_action; - ++next_action; + proctable_pm_lock(main_server); + + /* signal all processes in the error list once */ + previous_node = error_list_header; + current_node = &proc_table[previous_node->next_index]; + while (current_node != proc_table) { + proc_kill_gracefully(current_node, main_server); + previous_node = current_node; + current_node = &proc_table[current_node->next_index]; + } + + /* Previous node is going to be the tail end of the error list. Attach all the idle list to it and signal those processes to exit */ + current_node = &proc_table[idle_list_header->next_index]; + if (current_node != proc_table) { + previous_node->next_index = (current_node - proc_table); + idle_list_header->next_index = 0; + while ( current_node != proc_table ) { + proc_kill_gracefully( current_node, main_server ); + current_node->diewhy = FCGID_DIE_SHUTDOWN; + previous_node = current_node; + current_node = &proc_table[ current_node->next_index ]; } - else { - cur_action = 0; /* index of DO_NOTHING entry */ - } + } - /* now see who is done */ - not_dead_yet = 0; - for (i = 0; i < table_size; i++) { - if (!proc_table[i].proc_pool) { - continue; /* unused */ - } + /* If this is a forced shutdown, link the busy list to the error list */ + current_node = &proc_table[busy_list_header->next_index]; + if (!graceful && current_node != proc_table) { + previous_node->next_index = (current_node - proc_table); + busy_list_header->next_index = 0; + } + + /* Signal processes in the busy list */ + while (current_node != proc_table) { + proc_kill_gracefully( current_node, main_server ); + current_node->diewhy = FCGID_DIE_SHUTDOWN; + current_node = &proc_table[ current_node->next_index ]; + } - if (!reclaim_one_pid(main_server, &proc_table[i], - action_table[cur_action].action)) { - ++not_dead_yet; + /* Now clear the error list into the free list... + We are blocking all the bridged processes from completing during this timeframe + since they can't aquire the mutex to check the process tables. This should be + acceptable for both graceful and hard restarts. This loop should end rapidly + with graceful restarts and connection interruptions are going to happen regardless + with hard restarts. */ + while (error_list_header->next_index != 0) { + apr_sleep(apr_time_from_sec(1)); + wait_attempts++; + previous_node = error_list_header; + current_node = &proc_table[previous_node->next_index]; + while (current_node != proc_table) { + next_node = &proc_table[current_node->next_index]; + if (proc_wait_process(main_server, current_node) == APR_CHILD_NOTDONE + && wait_attempts < FCGID_PM_MAX_SHUTDOWN_WAIT_ATTEMPTS ) { + /* kill it hard and check again in 1 second */ + proc_kill_force( current_node, main_server ); + previous_node = current_node; + }else { + /* Unlink from error list */ + previous_node->next_index = current_node->next_index; + + /* Link to free list */ + current_node->next_index = free_list_header->next_index; + free_list_header->next_index = current_node - proc_table; } + current_node = next_node; } + } + + /* At this point in a graceful restart, the busy and free lists have all the process nodes */ + /* In a forced restart, all nodes are in the free list */ - } while (not_dead_yet && - action_table[cur_action].action != HARD_WAIT); + proctable_pm_unlock(main_server); } /* This should be proposed as a stand-alone improvement to the httpd module, @@ -578,7 +607,7 @@ procinfo.gid = command->gid; procinfo.userdir = command->userdir; if ((rv = - apr_pool_create(&procnode->proc_pool, configpool)) != APR_SUCCESS + apr_pool_create(&procnode->proc_pool, main_server->process->pool)) != APR_SUCCESS || (procinfo.proc_environ = apr_table_make(procnode->proc_pool, INITENV_CNT)) == NULL) { /* Link the node back to free list in this case */ @@ -591,6 +620,7 @@ "mod_fcgid: can't create pool for process"); return; } + /* Set up longer, system defaults before falling into parsing fixed-limit * request-by-request variables, so if any are overriden, they preempt * any system default assumptions @@ -633,6 +663,7 @@ apr_status_t pm_main(server_rec * main_server, apr_pool_t * configpool) { fcgid_command command; + apr_status_t rv; while (1) { if (procmgr_must_exit()) @@ -656,7 +687,7 @@ } /* Stop all processes */ - kill_all_subprocess(main_server); + kill_all_subprocess(main_server, procmgr_graceful_restart()); return APR_SUCCESS; } Index: modules/fcgid/fcgid_proctbl.h =================================================================== --- modules/fcgid/fcgid_proctbl.h (revision 1240335) +++ modules/fcgid/fcgid_proctbl.h (working copy) @@ -96,6 +96,8 @@ void proctable_pm_unlock(server_rec *s); void proctable_lock(request_rec *r); void proctable_unlock(request_rec *r); +apr_status_t proctable_lock_safe(void); +apr_status_t proctable_unlock_safe(void); /* Just for debug */ void proctable_print_debug_info(server_rec * main_server); Index: modules/fcgid/fcgid_pm_unix.c =================================================================== --- modules/fcgid/fcgid_pm_unix.c (revision 1240335) +++ modules/fcgid/fcgid_pm_unix.c (working copy) @@ -66,6 +66,7 @@ static const char *g_pipelock_mutex_type = "fcgid-pipe"; static int volatile g_caughtSigTerm = 0; +static int volatile g_caughtSigUsr1 = 0; static pid_t g_pm_pid; static void signal_handler(int signo) { @@ -76,11 +77,16 @@ return; } - if ((signo == SIGTERM) || (signo == SIGUSR1) || (signo == SIGHUP)) { + if ((signo == SIGTERM) || (signo == SIGHUP)) { g_caughtSigTerm = 1; + g_caughtSigUsr1 = 0; /* Tell the world it's time to die */ proctable_get_globalshare()->must_exit = 1; } + if (signo == SIGUSR1 && !g_caughtSigTerm) { + g_caughtSigUsr1 = 1; + proctable_get_globalshare()->must_exit = 1; + } } static apr_status_t init_signal(server_rec * main_server) @@ -164,6 +170,7 @@ } break; case APR_OC_REASON_RESTART: + kill(proc->pid, SIGTERM); apr_proc_other_child_unregister(data); break; case APR_OC_REASON_LOST: @@ -181,7 +188,7 @@ break; case APR_OC_REASON_UNREGISTER: /* I don't think it's going to happen */ - kill(proc->pid, SIGHUP); + kill(proc->pid, SIGUSR1); break; } } @@ -316,7 +323,7 @@ /* I am the parent I will send the stop signal in procmgr_stop_procmgr() */ apr_pool_note_subprocess(configpool, g_process_manager, - APR_KILL_ONLY_ONCE); + APR_JUST_WAIT); apr_proc_other_child_register(g_process_manager, fcgid_maint, g_process_manager, NULL, configpool); @@ -349,8 +356,14 @@ apr_status_t procmgr_post_config(server_rec * main_server, apr_pool_t * configpool) { + const char *userdata_mutex_key = "fcgid_procmgr_mutex"; + const char *userdata_filename_key = "fcgid_procmgr_mutex_file"; + const char *userdata_pipe_key[] = { "fcgid_pipe_pm_read", "fcgid_pipe_pm_write", "fcgid_pipe_ap_read", "fcgid_pipe_ap_write" }; + apr_file_t **userdata_pipe_handle[] = { &g_pm_read_pipe, &g_pm_write_pipe, &g_ap_read_pipe, &g_ap_write_pipe }; apr_status_t rv; apr_finfo_t finfo; + size_t i; + size_t j; fcgid_server_conf *sconf = ap_get_module_config(main_server->module_config, &fcgid_module); @@ -398,22 +411,50 @@ } } - /* Create pipes to communicate between process manager and apache */ - if ((rv = apr_file_pipe_create(&g_pm_read_pipe, &g_ap_write_pipe, - configpool)) != APR_SUCCESS - || (rv = apr_file_pipe_create(&g_ap_read_pipe, &g_pm_write_pipe, - configpool))) { - ap_log_error(APLOG_MARK, APLOG_ERR, rv, main_server, - "mod_fcgid: Can't create pipe between PM and stub"); - return rv; + /* Create pipes to communicate between process manager and apache + The pipes will last across restarts */ + for (i = 0; i < 4; i++) { + apr_pool_userdata_get((void *) userdata_pipe_handle[i], userdata_pipe_key[i], + main_server->process->pool); + if (*userdata_pipe_handle[i] == NULL) { + if ((rv = apr_file_pipe_create(&g_pm_read_pipe, &g_ap_write_pipe, + main_server->process->pool)) != APR_SUCCESS + || (rv = apr_file_pipe_create(&g_ap_read_pipe, &g_pm_write_pipe, + main_server->process->pool))) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, main_server, + "mod_fcgid: Can't create pipe between PM and stub"); + return rv; + } + for (j = 0; j < 4; j++) { + apr_pool_userdata_set(*(userdata_pipe_handle[j]), userdata_pipe_key[j], + apr_pool_cleanup_null, + main_server->process->pool); + } + break; + } } - /* Create mutex for pipe reading and writing */ - rv = fcgid_mutex_create(&g_pipelock, &g_pipelock_name, - g_pipelock_mutex_type, - main_server->process->pconf, main_server); - if (rv != APR_SUCCESS) { - exit(1); + /* Create mutex for pipe reading and writing + All mutexex are going to persist across restarts */ + g_pipelock = NULL; + g_pipelock_name = NULL; + apr_pool_userdata_get((void *) &g_pipelock, userdata_mutex_key, + main_server->process->pool); + apr_pool_userdata_get((void *) &g_pipelock_name, userdata_filename_key, + main_server->process->pool); + if (g_pipelock == NULL || g_pipelock_name == NULL) { + rv = fcgid_mutex_create(&g_pipelock, &g_pipelock_name, + g_pipelock_mutex_type, + main_server->process->pool, main_server); + if (rv != APR_SUCCESS) { + exit(1); + } + apr_pool_userdata_set(g_pipelock, userdata_mutex_key, + apr_pool_cleanup_null, + main_server->process->pool); + apr_pool_userdata_set(g_pipelock_name, userdata_filename_key, + apr_pool_cleanup_null, + main_server->process->pool); } /* Create process manager process */ @@ -548,9 +589,14 @@ int procmgr_must_exit() { - return g_caughtSigTerm; + return g_caughtSigTerm || g_caughtSigUsr1; } +int procmgr_graceful_restart() +{ + return g_caughtSigUsr1; +} + apr_status_t procmgr_stop_procmgr(void *server) { return APR_SUCCESS; Index: modules/fcgid/fcgid_pm.h =================================================================== --- modules/fcgid/fcgid_pm.h (revision 1240335) +++ modules/fcgid/fcgid_pm.h (working copy) @@ -56,5 +56,6 @@ apr_status_t procmgr_stop_procmgr(void *dummy); int procmgr_must_exit(void); +int procmgr_graceful_restart(void); #endif