ASF Bugzilla – Attachment 16009 Details for
Bug 36138
mod_jk load balance algorithm that accounts for current worker load
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
New load balancing algorithm for mod_jk
lb_busyness2.patch (text/plain), 18.05 KB, created by
Chris Lamprecht
on 2005-08-11 06:12:45 UTC
(
hide
)
Description:
New load balancing algorithm for mod_jk
Filename:
MIME Type:
Creator:
Chris Lamprecht
Created:
2005-08-11 06:12:45 UTC
Size:
18.05 KB
patch
obsolete
>Index: jk/native/common/jk_lb_worker.c >=================================================================== >RCS file: /home/cvspublic/jakarta-tomcat-connectors/jk/native/common/jk_lb_worker.c,v >retrieving revision 1.91 >diff -u -r1.91 jk_lb_worker.c >--- jk/native/common/jk_lb_worker.c 14 Jun 2005 06:34:13 -0000 1.91 >+++ jk/native/common/jk_lb_worker.c 11 Aug 2005 03:18:57 -0000 >@@ -142,6 +142,17 @@ > return result; > } > >+const char *get_method_value(int t) >+{ >+ if (JK_LB_BYREQUESTS == t) >+ return "by request"; >+ else if (JK_LB_BYTRAFFIC == t) >+ return "by traffic"; >+ else if (JK_LB_BYBUSYNESS == t) >+ return "by busyness"; >+ else >+ return "Unknown"; >+} > > /* Retrieve session id from the cookie or the parameter */ > /* (parameter first) */ >@@ -229,8 +240,6 @@ > jk_u64_t curmin = 0; > > worker_record_t *candidate = NULL; >- if (p->lblock == JK_LB_LOCK_PESSIMISTIC) >- jk_shm_lock(); > /* First try to see if we have available candidate */ > for (i = 0; i < p->num_of_workers; i++) { > /* Skip all workers that are not member of domain */ >@@ -263,8 +272,6 @@ > candidate->s->lb_value -= total_factor; > candidate->r = &(candidate->s->domain[0]); > } >- if (p->lblock == JK_LB_LOCK_PESSIMISTIC) >- jk_shm_unlock(); > > return candidate; > } >@@ -277,8 +284,6 @@ > int total_factor = 0; > worker_record_t *candidate = NULL; > >- if (p->lblock == JK_LB_LOCK_PESSIMISTIC) >- jk_shm_lock(); > /* First try to see if we have available candidate */ > for (i = 0; i < p->num_of_workers; i++) { > /* If the worker is in error state run >@@ -303,8 +308,6 @@ > > if (candidate) > candidate->s->lb_value -= total_factor; >- if (p->lblock == JK_LB_LOCK_PESSIMISTIC) >- jk_shm_unlock(); > > return candidate; > } >@@ -317,8 +320,6 @@ > jk_u64_t curmin = 0; > worker_record_t *candidate = NULL; > >- if (p->lblock == JK_LB_LOCK_PESSIMISTIC) >- jk_shm_lock(); > /* First try to see if we have available candidate */ > for (i = 0; i < p->num_of_workers; i++) { > /* If the worker is in error state run >@@ -342,8 +343,74 @@ > } > } > } >- if (p->lblock == JK_LB_LOCK_PESSIMISTIC) >- jk_shm_unlock(); >+ return candidate; >+} >+ >+static worker_record_t *find_best_bybusyness(lb_worker_t *p, >+ jk_logger_t *l) >+{ >+ static unsigned int next_offset = 0; >+ unsigned int i; >+ unsigned int j; >+ unsigned int offset; >+ int bfn; /* Numerator of best busy factor */ >+ int bfd; /* Denominator of best busy factor */ >+ int curn; /* Numerator of current busy factor */ >+ int curd; /* Denominator of current busy factor */ >+ >+ int left; /* left and right are used to compare rational numbers */ >+ int right; >+ >+ /* find the least busy worker */ >+ worker_record_t *candidate = NULL; >+ >+ offset = next_offset; >+ >+ /* First try to see if we have available candidate */ >+ for (j = 0; j < p->num_of_workers; j++) { >+ i = (j + offset) % p->num_of_workers; >+ >+ /* If the worker is in error state run >+ * retry on that worker. It will be marked as >+ * operational if the retry timeout is elapsed. >+ * The worker might still be unusable, but we try >+ * anyway. >+ */ >+ if (JK_WORKER_IN_ERROR(p->lb_workers[i].s)) { >+ retry_worker(&p->lb_workers[i], p->s->recover_wait_time, l); >+ } >+ /* Take into calculation only the workers that are >+ * not in error state, stopped or not disabled. >+ */ >+ if (JK_WORKER_USABLE(p->lb_workers[i].s)) { >+ curn = p->lb_workers[i].s->busy; >+ curd = p->lb_workers[i].s->lb_factor; >+ >+ /* If the server is restarted under load there is a bug that causes >+ * busy to be reset to zero before all the outstanding connections >+ * finish, they then finally finish. As a result, the busy value >+ * becomes negative, messing up the busyness load balancing. >+ * When this bug is fixed, this section can be removed */ >+ if (curn < 0) { >+ jk_log(l, JK_LOG_WARNING, >+ "busy value is %d for worker %s, resetting it to zero", >+ curn, p->lb_workers[i].s->name); >+ p->lb_workers[i].s->busy = 0; >+ curn = 0; >+ } >+ >+ /* compare rational numbers: (a/b) < (c/d) iff a*d < c*b */ >+ left = curn * bfd; >+ right = bfn * curd; >+ >+ if (!candidate || (left < right)) { >+ candidate = &p->lb_workers[i]; >+ bfn = curn; >+ bfd = curd; >+ next_offset = i + 1; >+ } >+ } >+ } > return candidate; > } > >@@ -386,8 +453,6 @@ > } > if (candidate && !uses_domain && > p->lbmethod == JK_LB_BYREQUESTS) { >- if (p->lblock == JK_LB_LOCK_PESSIMISTIC) >- jk_shm_lock(); > > for (i = 0; i < p->num_of_workers; i++) { > if (JK_WORKER_USABLE(p->lb_workers[i].s)) { >@@ -400,8 +465,6 @@ > } > } > candidate->s->lb_value -= total_factor; >- if (p->lblock == JK_LB_LOCK_PESSIMISTIC) >- jk_shm_unlock(); > } > return candidate; > } >@@ -433,6 +496,9 @@ > rc = find_best_byrequests(p, l); > else if (p->lbmethod == JK_LB_BYTRAFFIC) > rc = find_best_bytraffic(p, l); >+ else if (p->lbmethod == JK_LB_BYBUSYNESS) >+ rc = find_best_bybusyness(p, l); >+ > /* By default use worker name as session route */ > if (rc) > rc->r = &(rc->s->name[0]); >@@ -447,6 +513,7 @@ > jk_logger_t *l) > { > worker_record_t *rc = NULL; >+ int had_session_id = JK_FALSE; > char *sessionid = NULL; > int r; > >@@ -483,8 +550,11 @@ > JK_TRACE_EXIT(l); > return NULL; > } >+ if (p->lblock == JK_LB_LOCK_PESSIMISTIC) >+ jk_shm_lock(); > if (sessionid) { > char *session = sessionid; >+ had_session_id = JK_TRUE; > if (JK_IS_DEBUG_LEVEL(l)) { > jk_log(l, JK_LOG_DEBUG, > "total sessionid is %s", >@@ -511,13 +581,8 @@ > /* We have a session route. Whow! */ > rc = find_bysession_route(p, session_route, l); > if (rc) { >- JK_LEAVE_CS(&(p->cs), r); >- if (JK_IS_DEBUG_LEVEL(l)) >- jk_log(l, JK_LOG_DEBUG, >- "found worker %s for route %s and partial sessionid %s", >- rc->s->name, session_route, sessionid); >- JK_TRACE_EXIT(l); >- return rc; >+ rc->s->sticky_session_count++; >+ break; > } > } > /* Try next partial sessionid if present */ >@@ -525,20 +590,31 @@ > rc = NULL; > } > if (!rc && p->s->sticky_session_force) { >- JK_LEAVE_CS(&(p->cs), r); > jk_log(l, JK_LOG_INFO, > "all workers are in error state for session %s", > session); >- JK_TRACE_EXIT(l); >- return NULL; > } > } >- rc = find_best_worker(p, l); >+ if (!rc && (!had_session_id || !p->s->sticky_session_force)) { >+ rc = find_best_worker(p, l); >+ if (rc) rc->s->elected++; >+ } >+ if (rc) { >+ /* Increment the number of workers serving request */ >+ p->s->busy++; >+ if (p->s->busy > p->s->max_busy) >+ p->s->max_busy = p->s->busy; >+ rc->s->busy++; >+ if (rc->s->busy > rc->s->max_busy) >+ rc->s->max_busy = rc->s->busy; >+ } >+ if (p->lblock == JK_LB_LOCK_PESSIMISTIC) >+ jk_shm_unlock(); > JK_LEAVE_CS(&(p->cs), r); > if (rc && JK_IS_DEBUG_LEVEL(l)) { > jk_log(l, JK_LOG_DEBUG, > "found best worker (%s) using %s method", rc->s->name, >- p->lbmethod == JK_LB_BYREQUESTS ? "by request" : "by traffic"); >+ get_method_value(p->lbmethod)); > } > JK_TRACE_EXIT(l); > return rc; >@@ -572,12 +648,14 @@ > worker_record_t *rec = > get_most_suitable_worker(p->worker, s, attempt++, l); > int rc; >+ int r; > /* Do not reuse previous worker, because > * that worker already failed. > */ > if (rec && rec != prec) { > int is_service_error = JK_HTTP_OK; > int service_stat = JK_FALSE; >+ int no_endpoint = JK_TRUE; > jk_endpoint_t *end = NULL; > > s->jvm_route = rec->r; >@@ -587,40 +665,37 @@ > jk_log(l, JK_LOG_DEBUG, > "service worker=%s jvm_route=%s", > rec->s->name, s->jvm_route); >- rec->s->elected++; > if (rc && end) { >+ no_endpoint = JK_FALSE; >+ > /* Reset endpoint read and write sizes for > * this request. > */ > end->rd = end->wr = 0; >- /* Increment the number of workers serving request */ >- p->worker->s->busy++; >- if (p->worker->s->busy > p->worker->s->max_busy) >- p->worker->s->max_busy = p->worker->s->busy; >- rec->s->busy++; >- if (rec->s->busy > rec->s->max_busy) >- rec->s->max_busy = rec->s->busy; >+ > service_stat = end->service(end, s, l, &is_service_error); > /* Update partial reads and writes if any */ > rec->s->readed += end->rd; > rec->s->transferred += end->wr; > end->done(&end, l); >+ > /* When returning the endpoint mark the worker as not busy. > * We have at least one endpoint free > */ > rec->s->is_busy = JK_FALSE; >- /* Decrement the busy worker count */ >- rec->s->busy--; >- p->worker->s->busy--; >- if (service_stat == JK_TRUE) { >- rec->s->in_error_state = JK_FALSE; >- rec->s->in_recovering = JK_FALSE; >- rec->s->error_time = 0; >- JK_TRACE_EXIT(l); >- return JK_TRUE; >- } > } >- else { >+ >+ JK_ENTER_CS(&(p->worker->cs), r); >+ if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC) >+ jk_shm_lock(); >+ /* Decrement the busy worker count */ >+ rec->s->busy--; >+ p->worker->s->busy--; >+ if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC) >+ jk_shm_unlock(); >+ JK_LEAVE_CS(&(p->worker->cs), r); >+ >+ if (no_endpoint) { > /* If we can not get the endpoint > * mark the worker as busy rather then > * as in error >@@ -634,7 +709,15 @@ > prec = rec; > continue; > } >- if (service_stat == JK_FALSE) { >+ >+ if (service_stat == JK_TRUE) { >+ rec->s->in_error_state = JK_FALSE; >+ rec->s->in_recovering = JK_FALSE; >+ rec->s->error_time = 0; >+ JK_TRACE_EXIT(l); >+ return JK_TRUE; >+ } >+ else if (service_stat == JK_FALSE) { > /* > * Service failed !!! > * >@@ -699,6 +782,17 @@ > "recoverable error... will try to recover on other host"); > } > else { >+ if (rec) { >+ JK_ENTER_CS(&(p->worker->cs), r); >+ if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC) >+ jk_shm_lock(); >+ /* Decrement the busy worker count */ >+ rec->s->busy--; >+ p->worker->s->busy--; >+ if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC) >+ jk_shm_unlock(); >+ JK_LEAVE_CS(&(p->worker->cs), r); >+ } > /* NULL record, no more workers left ... */ > jk_log(l, JK_LOG_ERROR, > "All tomcat instances failed, no more workers left"); >Index: jk/native/common/jk_lb_worker.h >=================================================================== >RCS file: /home/cvspublic/jakarta-tomcat-connectors/jk/native/common/jk_lb_worker.h,v >retrieving revision 1.15 >diff -u -r1.15 jk_lb_worker.h >--- jk/native/common/jk_lb_worker.h 15 May 2005 15:22:05 -0000 1.15 >+++ jk/native/common/jk_lb_worker.h 11 Aug 2005 03:18:57 -0000 >@@ -39,8 +39,10 @@ > > #define JK_LB_BYREQUESTS (0) > #define JK_LB_BYTRAFFIC (1) >+#define JK_LB_BYBUSYNESS (2) > #define JK_LB_METHOD_REQUESTS ("Request") > #define JK_LB_METHOD_TRAFFIC ("Traffic") >+#define JK_LB_METHOD_BUSYNESS ("Busyness") > #define JK_LB_LOCK_DEFAULT (0) > #define JK_LB_LOCK_PESSIMISTIC (1) > #define JK_LB_LM_DEFAULT ("Optimistic") >Index: jk/native/common/jk_shm.h >=================================================================== >RCS file: /home/cvspublic/jakarta-tomcat-connectors/jk/native/common/jk_shm.h,v >retrieving revision 1.22 >diff -u -r1.22 jk_shm.h >--- jk/native/common/jk_shm.h 14 Jun 2005 06:34:13 -0000 1.22 >+++ jk/native/common/jk_shm.h 11 Aug 2005 03:18:57 -0000 >@@ -86,6 +86,8 @@ > volatile jk_u64_t transferred; > /* Number of times the worker was elected */ > volatile size_t elected; >+ /* Number of times the worker was reused from sticky session */ >+ volatile size_t sticky_session_count; > /* Number of non 200 responses */ > volatile size_t errors; > }; >Index: jk/native/common/jk_status.c >=================================================================== >RCS file: /home/cvspublic/jakarta-tomcat-connectors/jk/native/common/jk_status.c,v >retrieving revision 1.44 >diff -u -r1.44 jk_status.c >--- jk/native/common/jk_status.c 14 Jun 2005 14:31:24 -0000 1.44 >+++ jk/native/common/jk_status.c 11 Aug 2005 03:18:58 -0000 >@@ -83,6 +83,14 @@ > NULL > }; > >+static const char *lb_method_type[] = { >+ JK_LB_METHOD_REQUESTS, >+ JK_LB_METHOD_TRAFFIC, >+ JK_LB_METHOD_BUSYNESS, >+ "unknown", >+ NULL >+}; >+ > static const char *headers_names[] = { > "Content-Type", > "Cache-Control", >@@ -200,6 +208,13 @@ > } while (1); > } > >+static const char *status_lb_method_type(int t) >+{ >+ if (t < 0 || t > 2) >+ t = 3; >+ return lb_method_type[t]; >+} >+ > static const char *status_worker_type(int t) > { > if (t < 0 || t > 6) >@@ -443,12 +458,12 @@ > jk_putv(s, "<td>", status_val_bool(lb->s->sticky_session_force), > "</td>", NULL); > jk_printf(s, "<td>%d</td>", lb->s->retries); >- jk_printf(s, "<td>%s</td>", lb->lbmethod == JK_LB_BYREQUESTS ? JK_LB_METHOD_REQUESTS : JK_LB_METHOD_TRAFFIC); >+ jk_putv(s, "<td>", status_lb_method_type(lb->lbmethod), "</td>", NULL); > jk_printf(s, "<td>%s</td>", lb->lblock == JK_LB_LOCK_DEFAULT ? JK_LB_LM_DEFAULT : JK_LB_LM_PESSIMISTIC); > jk_puts(s, "</tr>\n</table>\n<br/>\n"); > jk_puts(s, "<table><tr>" > "<th>Name</th><th>Type</th><th>Host</th><th>Addr</th>" >- "<th>Stat</th><th>F</th><th>V</th><th>Acc</th><th>Err</th>" >+ "<th>Stat</th><th>F</th><th>V</th><th>Acc</th><th>Ssc</th><th>Err</th>" > "<th>Wr</th><th>Rd</th><th>Busy</th><th>Max</th><th>RR</th><th>Cd</th></tr>\n"); > for (j = 0; j < lb->num_of_workers; j++) { > worker_record_t *wr = &(lb->lb_workers[j]); >@@ -474,6 +489,7 @@ > jk_printf(s, "<td>%d</td>", wr->s->lb_factor); > jk_printf(s, "<td>%d</td>", wr->s->lb_value); > jk_printf(s, "<td>%u</td>", wr->s->elected); >+ jk_printf(s, "<td>%u</td>", wr->s->sticky_session_count); > jk_printf(s, "<td>%u</td>", wr->s->errors); > jk_putv(s, "<td>", status_strfsize(wr->s->transferred, buf), > "</td>", NULL); >@@ -580,7 +596,8 @@ > "<tr><th>Stat</th><td>Worker status</td></tr>\n" > "<tr><th>F</th><td>Load Balancer Factor</td></tr>\n" > "<tr><th>V</th><td>Load Balancer Value</td></tr>\n" >- "<tr><th>Acc</th><td>Number of requests</td></tr>\n" >+ "<tr><th>Acc</th><td>Number of new requests</td></tr>\n" >+ "<tr><th>Ssc</th><td>Number of sticky session requests</td></tr>\n" > "<tr><th>Err</th><td>Number of failed requests</td></tr>\n" > "<tr><th>Wr</th><td>Number of bytes transferred</td></tr>\n" > "<tr><th>Rd</th><td>Number of bytes read</td></tr>\n" >Index: jk/native/common/jk_util.c >=================================================================== >RCS file: /home/cvspublic/jakarta-tomcat-connectors/jk/native/common/jk_util.c,v >retrieving revision 1.71 >diff -u -r1.71 jk_util.c >--- jk/native/common/jk_util.c 1 Jul 2005 15:41:08 -0000 1.71 >+++ jk/native/common/jk_util.c 11 Aug 2005 03:18:59 -0000 >@@ -770,6 +770,8 @@ > return JK_LB_BYTRAFFIC; > else if (*v == 'r' || *v == 'R' || *v == '0') > return JK_LB_BYREQUESTS; >+ else if (*v == 'b' || *v == 'B') >+ return JK_LB_BYBUSYNESS; > else > return JK_LB_BYREQUESTS; > }
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 36138
: 16009