--- jk_lb_worker.h.orig Mon Nov 8 14:30:14 2004 +++ jk_lb_worker.h.patch Sun Dec 5 20:08:49 2004 @@ -32,6 +32,7 @@ #endif /* __cplusplus */ #define JK_LB_WORKER_NAME ("lb") +#define JK_LB_DEF_DOMAIN_NAME ("unknown") int JK_METHOD lb_worker_factory(jk_worker_t **w, const char *name, jk_logger_t *l); --- jk_lb_worker.c.orig Fri Nov 26 17:59:51 2004 +++ jk_lb_worker.c.patch Sun Dec 5 22:07:43 2004 @@ -40,6 +40,16 @@ #define WAIT_BEFORE_RECOVER (60*1) #define WORKER_RECOVER_TIME ("recover_time") +int search_id_max = 5; +const char *search_types[] = { + "none", + "sticky", + "sticky domain", + "local", + "local domain", + "any" +}; + /** * Worker record should be inside shared * memory for correct behavior. @@ -49,9 +59,11 @@ struct worker_record { char *name; + char *domain; int lb_factor; int lb_value; int is_local_worker; + int is_local_domain; int in_error_state; int in_recovering; time_t error_time; @@ -63,6 +75,7 @@ { worker_record_t *lb_workers; unsigned num_of_workers; + unsigned num_of_local_workers; jk_pool_t p; jk_pool_atom_t buf[TINY_POOL_SIZE]; @@ -198,77 +211,247 @@ } } +const char *lookup_search_type(int search_id, + jk_logger_t *l) +{ + if ( ( search_id > 0 ) && ( search_id <= search_id_max ) ) { + return search_types[search_id]; + } + jk_log(l, JK_LOG_ERROR, + "search id %d does not lie between 1 and %d\n", + search_id, search_id_max); + return search_types[0]; +} + +int is_worker_candidate(worker_record_t *wr, + int search_id, + const char *search_string, + jk_logger_t *l) +{ + switch(search_id) { + case 1: + if(0 == strcmp(search_string, wr->name)) { + return JK_TRUE; + } + return JK_FALSE; + case 2: + if(0 == strcmp(search_string, wr->domain)) { + return JK_TRUE; + } + return JK_FALSE; + case 3: + if(wr->is_local_worker) { + return JK_TRUE; + } + return JK_FALSE; + case 4: + if(wr->is_local_domain) { + return JK_TRUE; + } + return JK_FALSE; + case 5: + return JK_TRUE; + } + jk_log(l, JK_LOG_ERROR, + "search id %d does not lie between 1 and %d\n", + search_id, search_id_max); + return JK_FALSE; +} + +static worker_record_t *get_suitable_worker(lb_worker_t *p, + int search_id, + const char *search_string, + int start, + int stop, + int use_lb_factor, + int *domain_id, + jk_logger_t *l) +{ + + worker_record_t *rc = NULL; + int lb_max = 0; + int total_factor = 0; + const char *search_type; + int i; + + *domain_id = -1; + if ( JK_IS_DEBUG_LEVEL(l) ) { + search_type = lookup_search_type(search_id, l); + } + + if ( JK_IS_DEBUG_LEVEL(l) ) { + jk_log(l, JK_LOG_TRACE, + "searching for %s worker (%s)\n", + search_type, search_string); + } + for(i = start ; i < stop ; i++) { + if ( JK_IS_DEBUG_LEVEL(l) ) { + jk_log(l, JK_LOG_TRACE, + "testing worker %s (%d) for match with %s (%s)\n", + p->lb_workers[i].name, i, search_type, search_string); + } + if(is_worker_candidate(&(p->lb_workers[i]), search_id, search_string, l)) { + if(search_id == 1) { + *domain_id = i; + } + if(!p->lb_workers[i].in_error_state || !p->lb_workers[i].in_recovering) { + if ( JK_IS_DEBUG_LEVEL(l) ) { + jk_log(l, JK_LOG_TRACE, + "found candidate worker %s (%d) with previous load %d in search with %s (%s)\n", + p->lb_workers[i].name, i, p->lb_workers[i].lb_value, search_type, search_string); + } + + if(p->lb_workers[i].in_error_state) { + + time_t now = time(0); + if ((now - p->lb_workers[i].error_time) <= + p->recover_wait_time) { + if ( JK_IS_DEBUG_LEVEL(l) ) { + jk_log(l, JK_LOG_TRACE, + "worker candidate %s (%d) is in error state - will not yet recover (%u < %d)\n", + p->lb_workers[i].name, i, elapsed, p->recover_wait_time); + } + continue; + } + } + + if(use_lb_factor) { + p->lb_workers[i].lb_value += p->lb_workers[i].lb_factor; + total_factor += p->lb_workers[i].lb_factor; + if(p->lb_workers[i].lb_value > lb_max || !rc) { + lb_max = p->lb_workers[i].lb_value; + rc = &(p->lb_workers[i]); + if ( JK_IS_DEBUG_LEVEL(l) ) { + jk_log(l, JK_LOG_TRACE, + "new maximal worker %s (%d) with previous load %d in search with %s (%s)\n", + rc->name, i, rc->lb_value, search_type, search_string); + } + } + } else { + rc = &(p->lb_workers[i]); + break; + } + } else if ( JK_IS_DEBUG_LEVEL(l) ) { + jk_log(l, JK_LOG_TRACE, + "worker candidate %s (%d) is in error state - already recovers\n", + p->lb_workers[i].name, i); + } + } + } + + if(rc) { + if(rc->in_error_state) { + time_t now = time(0); + rc->in_recovering = JK_TRUE; + rc->error_time = now; + if ( JK_IS_DEBUG_LEVEL(l) ) { + jk_log(l, JK_LOG_TRACE, + "found worker %s is in error state - will recover\n", + rc->name); + } + } + rc->lb_value -= total_factor; + if ( JK_IS_DEBUG_LEVEL(l) ) { + jk_log(l, JK_LOG_TRACE, + "found worker %s with new load %d in search with %s (%s)\n", + rc->name, rc->lb_value, search_type, search_string); + } + return rc; + } + if ( JK_IS_DEBUG_LEVEL(l) ) { + jk_log(l, JK_LOG_TRACE, + "found no %s (%s) worker\n", + search_type, search_string); + } + return rc; +} + static worker_record_t *get_most_suitable_worker(lb_worker_t * p, jk_ws_service_t *s, - int attempt) + int attempt, + jk_logger_t *l) { worker_record_t *rc = NULL; unsigned i; char *sessionid = NULL; int total_factor = 0; + int domain_id =-1; + JK_TRACE_ENTER(l); if (p->sticky_session) { sessionid = get_sessionid(s); } + if ( JK_IS_DEBUG_LEVEL(l) ) { + jk_log(l, JK_LOG_TRACE, + "total sessionid is %s.\n", + sessionid ? sessionid : "empty"); + } + while (sessionid) { char *next = strchr(sessionid, ';'); char *session_route; + char *session_domain; if (next) { *next++ = '\0'; } + if ( JK_IS_DEBUG_LEVEL(l) ) { + jk_log(l, JK_LOG_TRACE, + "searching worker for partial sessionid %s.\n", + sessionid); + } session_route = strchr(sessionid, '.'); if (session_route) { ++session_route; - for (i = 0; i < p->num_of_workers; i++) { - if (0 == strcmp(session_route, p->lb_workers[i].name)) { - /* First attempt will allways be to the - correct host. If this is indeed down and no - hope of recovery, we'll go to fail-over - */ - if (attempt > 0 && p->lb_workers[i].in_error_state) { - next = NULL; /* Double break; */ - break; - } - else { - return &(p->lb_workers[i]); - } - } + + rc = get_suitable_worker(p, 1, session_route, 0, p->num_of_workers, 0, &domain_id, l); + if ( rc ) { + JK_TRACE_EXIT(l); + return rc; + } + + if(domain_id>=0 && domain_idnum_of_workers) { + session_domain = p->lb_workers[domain_id].domain; + } else { + session_domain = JK_LB_DEF_DOMAIN_NAME; } + if ( JK_IS_DEBUG_LEVEL(l) ) { + jk_log(l, JK_LOG_TRACE, + "found domain %s in route %s\n", + session_domain, session_route); + } + + rc = get_suitable_worker(p, 2, session_domain, 0, p->num_of_workers, 1, &domain_id, l); + if ( rc ) { + JK_TRACE_EXIT(l); + return rc; + } + } sessionid = next; } - for (i = 0; i < p->num_of_workers; i++) { - if (!p->in_local_worker_mode || p->lb_workers[i].is_local_worker - || !p->local_worker_only) { - if (p->lb_workers[i].in_error_state) { - if (!p->lb_workers[i].in_recovering) { - time_t now = time(0); - if ((now - p->lb_workers[i].error_time) > - p->recover_wait_time) { - p->lb_workers[i].in_recovering = JK_TRUE; - p->lb_workers[i].error_time = now; - rc = &(p->lb_workers[i]); - break; - } - } - } - else { - p->lb_workers[i].lb_value += p->lb_workers[i].lb_factor; - total_factor += p->lb_workers[i].lb_factor; - if (!rc || p->lb_workers[i].lb_value > rc->lb_value) - rc = &(p->lb_workers[i]); - } - } + + rc = get_suitable_worker(p, 3, "any", 0, p->num_of_local_workers, 1, &domain_id, l); + if ( rc ) { + JK_TRACE_EXIT(l); + return rc; } - if (rc) { - rc->lb_value -= total_factor; + if(p->local_worker_only) { + JK_TRACE_EXIT(l); + return NULL; + } + + rc = get_suitable_worker(p, 4, "any", p->num_of_local_workers, p->num_of_workers, 1, &domain_id, l); + if ( rc ) { + JK_TRACE_EXIT(l); + return rc; } - + rc = get_suitable_worker(p, 5, "any", p->num_of_local_workers, p->num_of_workers, 1, &domain_id, l); + JK_TRACE_EXIT(l); return rc; } @@ -297,7 +480,7 @@ while (1) { worker_record_t *rec = - get_most_suitable_worker(p->worker, s, attempt++); + get_most_suitable_worker(p->worker, s, attempt++, l); int rc; if (rec) { @@ -397,9 +580,11 @@ lb_worker_t *p = pThis->worker_private; char **worker_names; unsigned num_of_workers; + unsigned num_of_local_workers; p->in_local_worker_mode = JK_FALSE; p->local_worker_only = jk_get_local_worker_only_flag(props, p->name); p->sticky_session = jk_get_is_sticky_session(props, p->name); + p->num_of_local_workers = 0; if (jk_get_lb_worker_list(props, p->name, @@ -425,6 +610,11 @@ if (p->lb_workers[i].lb_factor < 1) { p->lb_workers[i].lb_factor = 1; } + p->lb_workers[i].domain = jk_get_worker_domain(props, + worker_names[i], + JK_LB_DEF_DOMAIN_NAME); + if ( !p->lb_workers[i].domain ) { + p->lb_workers[i].domain = JK_LB_DEF_DOMAIN_NAME; p->lb_workers[i].is_local_worker = jk_get_is_local_worker(props, worker_names[i]); @@ -457,6 +647,7 @@ j++; } } + num_of_local_workers = j; if (!p->in_local_worker_mode) { p->local_worker_only = JK_FALSE; @@ -470,10 +661,24 @@ } else { + for (i = 0; i < num_of_local_workers; i++) { + p->lb_workers[i].is_local_domain=1; + } + for (i = num_of_local_workers; i < num_of_workers; i++) { + p->lb_workers[i].is_local_domain=0; + for (j = 0; j < num_of_local_workers; j++) { + if(0 == strcmp(p->lb_workers[i].domain, p->lb_workers[j].domain)) { + p->lb_workers[i].is_local_domain=1; + break; + } + } + } + for (i = 0; i < num_of_workers; i++) { jk_log(l, JK_LOG_DEBUG, - "Balanced worker %i has name %s\n", - i, p->lb_workers[i].name); + "Balanced worker %i has name %s in domain %s and has local=%d and local_domain=%d\n", + i, p->lb_workers[i].name, p->lb_workers[i].domain, + p->lb_workers[i].is_local_worker, p->lb_workers[i].is_local_domain); } jk_log(l, JK_LOG_DEBUG, "in_local_worker_mode: %s\n", @@ -482,6 +687,7 @@ "local_worker_only: %s\n", (p->local_worker_only ? "true" : "false")); p->num_of_workers = num_of_workers; + p->num_of_local_workers = num_of_local_workers; JK_TRACE_EXIT(l); return JK_TRUE; } @@ -576,6 +782,7 @@ private_data->lb_workers = NULL; private_data->num_of_workers = 0; + private_data->num_of_local_workers = 0; private_data->worker.worker_private = private_data; private_data->worker.validate = validate; private_data->worker.init = init; --- jk_util.h.orig Fri Nov 12 19:45:24 2004 +++ jk_util.h.patch Sun Dec 5 20:05:54 2004 @@ -70,6 +70,8 @@ int jk_get_worker_recycle_timeout(jk_map_t *m, const char *wname, int def); +char *jk_get_worker_domain(jk_map_t *m, const char *wname, const char *def); + char *jk_get_worker_secret_key(jk_map_t *m, const char *wname); int jk_get_worker_retries(jk_map_t *m, const char *wname, int def); --- jk_util.c.orig Fri Nov 19 16:58:51 2004 +++ jk_util.c.patch Sun Dec 5 20:04:09 2004 @@ -59,6 +59,7 @@ #define STICKY_SESSION ("sticky_session") #define LOCAL_WORKER_ONLY_FLAG ("local_worker_only") #define LOCAL_WORKER_FLAG ("local_worker") +#define DOMAIN_OF_WORKER ("domain") #define DEFAULT_WORKER_TYPE JK_AJP13_WORKER_NAME #define SECRET_KEY_OF_WORKER ("secretkey") @@ -334,6 +335,16 @@ sprintf(buf, "%s.%s.%s", PREFIX_OF_WORKER, wname, TYPE_OF_WORKER); return jk_map_get_string(m, buf, DEFAULT_WORKER_TYPE); +} + +char *jk_get_worker_domain(jk_map_t *m, const char *wname, const char *def) +{ + char buf[1024]; + if (!m || !wname) { + return NULL; + } + sprintf(buf, "%s.%s.%s", PREFIX_OF_WORKER, wname, DOMAIN_OF_WORKER); + return map_get_string(m, buf, def); } char *jk_get_worker_secret(jk_map_t *m, const char *wname)