Lines 142-147
Link Here
|
142 |
return result; |
142 |
return result; |
143 |
} |
143 |
} |
144 |
|
144 |
|
|
|
145 |
const char *get_method_value(int t) |
146 |
{ |
147 |
if (JK_LB_BYREQUESTS == t) |
148 |
return "by request"; |
149 |
else if (JK_LB_BYTRAFFIC == t) |
150 |
return "by traffic"; |
151 |
else if (JK_LB_BYBUSYNESS == t) |
152 |
return "by busyness"; |
153 |
else |
154 |
return "Unknown"; |
155 |
} |
145 |
|
156 |
|
146 |
/* Retrieve session id from the cookie or the parameter */ |
157 |
/* Retrieve session id from the cookie or the parameter */ |
147 |
/* (parameter first) */ |
158 |
/* (parameter first) */ |
Lines 229-236
Link Here
|
229 |
jk_u64_t curmin = 0; |
240 |
jk_u64_t curmin = 0; |
230 |
|
241 |
|
231 |
worker_record_t *candidate = NULL; |
242 |
worker_record_t *candidate = NULL; |
232 |
if (p->lblock == JK_LB_LOCK_PESSIMISTIC) |
|
|
233 |
jk_shm_lock(); |
234 |
/* First try to see if we have available candidate */ |
243 |
/* First try to see if we have available candidate */ |
235 |
for (i = 0; i < p->num_of_workers; i++) { |
244 |
for (i = 0; i < p->num_of_workers; i++) { |
236 |
/* Skip all workers that are not member of domain */ |
245 |
/* Skip all workers that are not member of domain */ |
Lines 263-270
Link Here
|
263 |
candidate->s->lb_value -= total_factor; |
272 |
candidate->s->lb_value -= total_factor; |
264 |
candidate->r = &(candidate->s->domain[0]); |
273 |
candidate->r = &(candidate->s->domain[0]); |
265 |
} |
274 |
} |
266 |
if (p->lblock == JK_LB_LOCK_PESSIMISTIC) |
|
|
267 |
jk_shm_unlock(); |
268 |
|
275 |
|
269 |
return candidate; |
276 |
return candidate; |
270 |
} |
277 |
} |
Lines 277-284
Link Here
|
277 |
int total_factor = 0; |
284 |
int total_factor = 0; |
278 |
worker_record_t *candidate = NULL; |
285 |
worker_record_t *candidate = NULL; |
279 |
|
286 |
|
280 |
if (p->lblock == JK_LB_LOCK_PESSIMISTIC) |
|
|
281 |
jk_shm_lock(); |
282 |
/* First try to see if we have available candidate */ |
287 |
/* First try to see if we have available candidate */ |
283 |
for (i = 0; i < p->num_of_workers; i++) { |
288 |
for (i = 0; i < p->num_of_workers; i++) { |
284 |
/* If the worker is in error state run |
289 |
/* If the worker is in error state run |
Lines 303-310
Link Here
|
303 |
|
308 |
|
304 |
if (candidate) |
309 |
if (candidate) |
305 |
candidate->s->lb_value -= total_factor; |
310 |
candidate->s->lb_value -= total_factor; |
306 |
if (p->lblock == JK_LB_LOCK_PESSIMISTIC) |
|
|
307 |
jk_shm_unlock(); |
308 |
|
311 |
|
309 |
return candidate; |
312 |
return candidate; |
310 |
} |
313 |
} |
Lines 317-324
Link Here
|
317 |
jk_u64_t curmin = 0; |
320 |
jk_u64_t curmin = 0; |
318 |
worker_record_t *candidate = NULL; |
321 |
worker_record_t *candidate = NULL; |
319 |
|
322 |
|
320 |
if (p->lblock == JK_LB_LOCK_PESSIMISTIC) |
|
|
321 |
jk_shm_lock(); |
322 |
/* First try to see if we have available candidate */ |
323 |
/* First try to see if we have available candidate */ |
323 |
for (i = 0; i < p->num_of_workers; i++) { |
324 |
for (i = 0; i < p->num_of_workers; i++) { |
324 |
/* If the worker is in error state run |
325 |
/* If the worker is in error state run |
Lines 342-349
Link Here
|
342 |
} |
343 |
} |
343 |
} |
344 |
} |
344 |
} |
345 |
} |
345 |
if (p->lblock == JK_LB_LOCK_PESSIMISTIC) |
346 |
return candidate; |
346 |
jk_shm_unlock(); |
347 |
} |
|
|
348 |
|
349 |
static worker_record_t *find_best_bybusyness(lb_worker_t *p, |
350 |
jk_logger_t *l) |
351 |
{ |
352 |
static unsigned int next_offset = 0; |
353 |
unsigned int i; |
354 |
unsigned int j; |
355 |
unsigned int offset; |
356 |
int bfn; /* Numerator of best busy factor */ |
357 |
int bfd; /* Denominator of best busy factor */ |
358 |
int curn; /* Numerator of current busy factor */ |
359 |
int curd; /* Denominator of current busy factor */ |
360 |
|
361 |
int left; /* left and right are used to compare rational numbers */ |
362 |
int right; |
363 |
|
364 |
/* find the least busy worker */ |
365 |
worker_record_t *candidate = NULL; |
366 |
|
367 |
offset = next_offset; |
368 |
|
369 |
/* First try to see if we have available candidate */ |
370 |
for (j = 0; j < p->num_of_workers; j++) { |
371 |
i = (j + offset) % p->num_of_workers; |
372 |
|
373 |
/* If the worker is in error state run |
374 |
* retry on that worker. It will be marked as |
375 |
* operational if the retry timeout is elapsed. |
376 |
* The worker might still be unusable, but we try |
377 |
* anyway. |
378 |
*/ |
379 |
if (JK_WORKER_IN_ERROR(p->lb_workers[i].s)) { |
380 |
retry_worker(&p->lb_workers[i], p->s->recover_wait_time, l); |
381 |
} |
382 |
/* Take into calculation only the workers that are |
383 |
* not in error state, stopped or not disabled. |
384 |
*/ |
385 |
if (JK_WORKER_USABLE(p->lb_workers[i].s)) { |
386 |
curn = p->lb_workers[i].s->busy; |
387 |
curd = p->lb_workers[i].s->lb_factor; |
388 |
|
389 |
/* If the server is restarted under load there is a bug that causes |
390 |
* busy to be reset to zero before all the outstanding connections |
391 |
* finish, they then finally finish. As a result, the busy value |
392 |
* becomes negative, messing up the busyness load balancing. |
393 |
* When this bug is fixed, this section can be removed */ |
394 |
if (curn < 0) { |
395 |
jk_log(l, JK_LOG_WARNING, |
396 |
"busy value is %d for worker %s, resetting it to zero", |
397 |
curn, p->lb_workers[i].s->name); |
398 |
p->lb_workers[i].s->busy = 0; |
399 |
curn = 0; |
400 |
} |
401 |
|
402 |
/* compare rational numbers: (a/b) < (c/d) iff a*d < c*b */ |
403 |
left = curn * bfd; |
404 |
right = bfn * curd; |
405 |
|
406 |
if (!candidate || (left < right)) { |
407 |
candidate = &p->lb_workers[i]; |
408 |
bfn = curn; |
409 |
bfd = curd; |
410 |
next_offset = i + 1; |
411 |
} |
412 |
} |
413 |
} |
347 |
return candidate; |
414 |
return candidate; |
348 |
} |
415 |
} |
349 |
|
416 |
|
Lines 386-393
Link Here
|
386 |
} |
453 |
} |
387 |
if (candidate && !uses_domain && |
454 |
if (candidate && !uses_domain && |
388 |
p->lbmethod == JK_LB_BYREQUESTS) { |
455 |
p->lbmethod == JK_LB_BYREQUESTS) { |
389 |
if (p->lblock == JK_LB_LOCK_PESSIMISTIC) |
|
|
390 |
jk_shm_lock(); |
391 |
|
456 |
|
392 |
for (i = 0; i < p->num_of_workers; i++) { |
457 |
for (i = 0; i < p->num_of_workers; i++) { |
393 |
if (JK_WORKER_USABLE(p->lb_workers[i].s)) { |
458 |
if (JK_WORKER_USABLE(p->lb_workers[i].s)) { |
Lines 400-407
Link Here
|
400 |
} |
465 |
} |
401 |
} |
466 |
} |
402 |
candidate->s->lb_value -= total_factor; |
467 |
candidate->s->lb_value -= total_factor; |
403 |
if (p->lblock == JK_LB_LOCK_PESSIMISTIC) |
|
|
404 |
jk_shm_unlock(); |
405 |
} |
468 |
} |
406 |
return candidate; |
469 |
return candidate; |
407 |
} |
470 |
} |
Lines 433-438
Link Here
|
433 |
rc = find_best_byrequests(p, l); |
496 |
rc = find_best_byrequests(p, l); |
434 |
else if (p->lbmethod == JK_LB_BYTRAFFIC) |
497 |
else if (p->lbmethod == JK_LB_BYTRAFFIC) |
435 |
rc = find_best_bytraffic(p, l); |
498 |
rc = find_best_bytraffic(p, l); |
|
|
499 |
else if (p->lbmethod == JK_LB_BYBUSYNESS) |
500 |
rc = find_best_bybusyness(p, l); |
501 |
|
436 |
/* By default use worker name as session route */ |
502 |
/* By default use worker name as session route */ |
437 |
if (rc) |
503 |
if (rc) |
438 |
rc->r = &(rc->s->name[0]); |
504 |
rc->r = &(rc->s->name[0]); |
Lines 447-452
Link Here
|
447 |
jk_logger_t *l) |
513 |
jk_logger_t *l) |
448 |
{ |
514 |
{ |
449 |
worker_record_t *rc = NULL; |
515 |
worker_record_t *rc = NULL; |
|
|
516 |
int had_session_id = JK_FALSE; |
450 |
char *sessionid = NULL; |
517 |
char *sessionid = NULL; |
451 |
int r; |
518 |
int r; |
452 |
|
519 |
|
Lines 483-490
Link Here
|
483 |
JK_TRACE_EXIT(l); |
550 |
JK_TRACE_EXIT(l); |
484 |
return NULL; |
551 |
return NULL; |
485 |
} |
552 |
} |
|
|
553 |
if (p->lblock == JK_LB_LOCK_PESSIMISTIC) |
554 |
jk_shm_lock(); |
486 |
if (sessionid) { |
555 |
if (sessionid) { |
487 |
char *session = sessionid; |
556 |
char *session = sessionid; |
|
|
557 |
had_session_id = JK_TRUE; |
488 |
if (JK_IS_DEBUG_LEVEL(l)) { |
558 |
if (JK_IS_DEBUG_LEVEL(l)) { |
489 |
jk_log(l, JK_LOG_DEBUG, |
559 |
jk_log(l, JK_LOG_DEBUG, |
490 |
"total sessionid is %s", |
560 |
"total sessionid is %s", |
Lines 511-523
Link Here
|
511 |
/* We have a session route. Whow! */ |
581 |
/* We have a session route. Whow! */ |
512 |
rc = find_bysession_route(p, session_route, l); |
582 |
rc = find_bysession_route(p, session_route, l); |
513 |
if (rc) { |
583 |
if (rc) { |
514 |
JK_LEAVE_CS(&(p->cs), r); |
584 |
rc->s->sticky_session_count++; |
515 |
if (JK_IS_DEBUG_LEVEL(l)) |
585 |
break; |
516 |
jk_log(l, JK_LOG_DEBUG, |
|
|
517 |
"found worker %s for route %s and partial sessionid %s", |
518 |
rc->s->name, session_route, sessionid); |
519 |
JK_TRACE_EXIT(l); |
520 |
return rc; |
521 |
} |
586 |
} |
522 |
} |
587 |
} |
523 |
/* Try next partial sessionid if present */ |
588 |
/* Try next partial sessionid if present */ |
Lines 525-544
Link Here
|
525 |
rc = NULL; |
590 |
rc = NULL; |
526 |
} |
591 |
} |
527 |
if (!rc && p->s->sticky_session_force) { |
592 |
if (!rc && p->s->sticky_session_force) { |
528 |
JK_LEAVE_CS(&(p->cs), r); |
|
|
529 |
jk_log(l, JK_LOG_INFO, |
593 |
jk_log(l, JK_LOG_INFO, |
530 |
"all workers are in error state for session %s", |
594 |
"all workers are in error state for session %s", |
531 |
session); |
595 |
session); |
532 |
JK_TRACE_EXIT(l); |
|
|
533 |
return NULL; |
534 |
} |
596 |
} |
535 |
} |
597 |
} |
536 |
rc = find_best_worker(p, l); |
598 |
if (!rc && (!had_session_id || !p->s->sticky_session_force)) { |
|
|
599 |
rc = find_best_worker(p, l); |
600 |
if (rc) rc->s->elected++; |
601 |
} |
602 |
if (rc) { |
603 |
/* Increment the number of workers serving request */ |
604 |
p->s->busy++; |
605 |
if (p->s->busy > p->s->max_busy) |
606 |
p->s->max_busy = p->s->busy; |
607 |
rc->s->busy++; |
608 |
if (rc->s->busy > rc->s->max_busy) |
609 |
rc->s->max_busy = rc->s->busy; |
610 |
} |
611 |
if (p->lblock == JK_LB_LOCK_PESSIMISTIC) |
612 |
jk_shm_unlock(); |
537 |
JK_LEAVE_CS(&(p->cs), r); |
613 |
JK_LEAVE_CS(&(p->cs), r); |
538 |
if (rc && JK_IS_DEBUG_LEVEL(l)) { |
614 |
if (rc && JK_IS_DEBUG_LEVEL(l)) { |
539 |
jk_log(l, JK_LOG_DEBUG, |
615 |
jk_log(l, JK_LOG_DEBUG, |
540 |
"found best worker (%s) using %s method", rc->s->name, |
616 |
"found best worker (%s) using %s method", rc->s->name, |
541 |
p->lbmethod == JK_LB_BYREQUESTS ? "by request" : "by traffic"); |
617 |
get_method_value(p->lbmethod)); |
542 |
} |
618 |
} |
543 |
JK_TRACE_EXIT(l); |
619 |
JK_TRACE_EXIT(l); |
544 |
return rc; |
620 |
return rc; |
Lines 572-583
Link Here
|
572 |
worker_record_t *rec = |
648 |
worker_record_t *rec = |
573 |
get_most_suitable_worker(p->worker, s, attempt++, l); |
649 |
get_most_suitable_worker(p->worker, s, attempt++, l); |
574 |
int rc; |
650 |
int rc; |
|
|
651 |
int r; |
575 |
/* Do not reuse previous worker, because |
652 |
/* Do not reuse previous worker, because |
576 |
* that worker already failed. |
653 |
* that worker already failed. |
577 |
*/ |
654 |
*/ |
578 |
if (rec && rec != prec) { |
655 |
if (rec && rec != prec) { |
579 |
int is_service_error = JK_HTTP_OK; |
656 |
int is_service_error = JK_HTTP_OK; |
580 |
int service_stat = JK_FALSE; |
657 |
int service_stat = JK_FALSE; |
|
|
658 |
int no_endpoint = JK_TRUE; |
581 |
jk_endpoint_t *end = NULL; |
659 |
jk_endpoint_t *end = NULL; |
582 |
|
660 |
|
583 |
s->jvm_route = rec->r; |
661 |
s->jvm_route = rec->r; |
Lines 587-626
Link Here
|
587 |
jk_log(l, JK_LOG_DEBUG, |
665 |
jk_log(l, JK_LOG_DEBUG, |
588 |
"service worker=%s jvm_route=%s", |
666 |
"service worker=%s jvm_route=%s", |
589 |
rec->s->name, s->jvm_route); |
667 |
rec->s->name, s->jvm_route); |
590 |
rec->s->elected++; |
|
|
591 |
if (rc && end) { |
668 |
if (rc && end) { |
|
|
669 |
no_endpoint = JK_FALSE; |
670 |
|
592 |
/* Reset endpoint read and write sizes for |
671 |
/* Reset endpoint read and write sizes for |
593 |
* this request. |
672 |
* this request. |
594 |
*/ |
673 |
*/ |
595 |
end->rd = end->wr = 0; |
674 |
end->rd = end->wr = 0; |
596 |
/* Increment the number of workers serving request */ |
675 |
|
597 |
p->worker->s->busy++; |
|
|
598 |
if (p->worker->s->busy > p->worker->s->max_busy) |
599 |
p->worker->s->max_busy = p->worker->s->busy; |
600 |
rec->s->busy++; |
601 |
if (rec->s->busy > rec->s->max_busy) |
602 |
rec->s->max_busy = rec->s->busy; |
603 |
service_stat = end->service(end, s, l, &is_service_error); |
676 |
service_stat = end->service(end, s, l, &is_service_error); |
604 |
/* Update partial reads and writes if any */ |
677 |
/* Update partial reads and writes if any */ |
605 |
rec->s->readed += end->rd; |
678 |
rec->s->readed += end->rd; |
606 |
rec->s->transferred += end->wr; |
679 |
rec->s->transferred += end->wr; |
607 |
end->done(&end, l); |
680 |
end->done(&end, l); |
|
|
681 |
|
608 |
/* When returning the endpoint mark the worker as not busy. |
682 |
/* When returning the endpoint mark the worker as not busy. |
609 |
* We have at least one endpoint free |
683 |
* We have at least one endpoint free |
610 |
*/ |
684 |
*/ |
611 |
rec->s->is_busy = JK_FALSE; |
685 |
rec->s->is_busy = JK_FALSE; |
612 |
/* Decrement the busy worker count */ |
|
|
613 |
rec->s->busy--; |
614 |
p->worker->s->busy--; |
615 |
if (service_stat == JK_TRUE) { |
616 |
rec->s->in_error_state = JK_FALSE; |
617 |
rec->s->in_recovering = JK_FALSE; |
618 |
rec->s->error_time = 0; |
619 |
JK_TRACE_EXIT(l); |
620 |
return JK_TRUE; |
621 |
} |
622 |
} |
686 |
} |
623 |
else { |
687 |
|
|
|
688 |
JK_ENTER_CS(&(p->worker->cs), r); |
689 |
if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC) |
690 |
jk_shm_lock(); |
691 |
/* Decrement the busy worker count */ |
692 |
rec->s->busy--; |
693 |
p->worker->s->busy--; |
694 |
if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC) |
695 |
jk_shm_unlock(); |
696 |
JK_LEAVE_CS(&(p->worker->cs), r); |
697 |
|
698 |
if (no_endpoint) { |
624 |
/* If we can not get the endpoint |
699 |
/* If we can not get the endpoint |
625 |
* mark the worker as busy rather then |
700 |
* mark the worker as busy rather then |
626 |
* as in error |
701 |
* as in error |
Lines 634-640
Link Here
|
634 |
prec = rec; |
709 |
prec = rec; |
635 |
continue; |
710 |
continue; |
636 |
} |
711 |
} |
637 |
if (service_stat == JK_FALSE) { |
712 |
|
|
|
713 |
if (service_stat == JK_TRUE) { |
714 |
rec->s->in_error_state = JK_FALSE; |
715 |
rec->s->in_recovering = JK_FALSE; |
716 |
rec->s->error_time = 0; |
717 |
JK_TRACE_EXIT(l); |
718 |
return JK_TRUE; |
719 |
} |
720 |
else if (service_stat == JK_FALSE) { |
638 |
/* |
721 |
/* |
639 |
* Service failed !!! |
722 |
* Service failed !!! |
640 |
* |
723 |
* |
Lines 699-704
Link Here
|
699 |
"recoverable error... will try to recover on other host"); |
782 |
"recoverable error... will try to recover on other host"); |
700 |
} |
783 |
} |
701 |
else { |
784 |
else { |
|
|
785 |
if (rec) { |
786 |
JK_ENTER_CS(&(p->worker->cs), r); |
787 |
if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC) |
788 |
jk_shm_lock(); |
789 |
/* Decrement the busy worker count */ |
790 |
rec->s->busy--; |
791 |
p->worker->s->busy--; |
792 |
if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC) |
793 |
jk_shm_unlock(); |
794 |
JK_LEAVE_CS(&(p->worker->cs), r); |
795 |
} |
702 |
/* NULL record, no more workers left ... */ |
796 |
/* NULL record, no more workers left ... */ |
703 |
jk_log(l, JK_LOG_ERROR, |
797 |
jk_log(l, JK_LOG_ERROR, |
704 |
"All tomcat instances failed, no more workers left"); |
798 |
"All tomcat instances failed, no more workers left"); |