--- server/util_time.c.orig 2012-12-29 13:47:28.415799779 -0500 +++ server/util_time.c.orig 2012-12-31 09:11:35.350302411 -0500 @@ -15,6 +15,7 @@ */ #include "util_time.h" +#include /* Number of characters needed to format the microsecond part of a timestamp. @@ -30,9 +31,8 @@ */ struct exploded_time_cache_element { - apr_int64_t t; - apr_time_exp_t xt; - apr_int64_t t_validate; /* please see comments in cached_explode() */ + apr_time_exp_t xt; /* First for alignment of copies */ + apr_uint32_t key; }; /* the "+ 1" is for the current second: */ @@ -51,92 +51,84 @@ static apr_status_t cached_explode(apr_time_exp_t *xt, apr_time_t t, - struct exploded_time_cache_element *cache, - int use_gmt) + struct exploded_time_cache_element *cache, + apr_status_t (*explode)(apr_time_exp_t *, apr_time_t)) { - apr_int64_t seconds = apr_time_sec(t); - struct exploded_time_cache_element *cache_element = +#define SECONDS_MASK 0x7FFFFFFF + /* High bit is used to indicate invalid cache_element */ + const apr_uint32_t seconds = apr_time_sec(t) & SECONDS_MASK; + volatile struct exploded_time_cache_element * const cache_element = &(cache[seconds & TIME_CACHE_MASK]); - struct exploded_time_cache_element cache_element_snapshot; - /* The cache is implemented as a ring buffer. Each second, * it uses a different element in the buffer. The timestamp * in the element indicates whether the element contains the * exploded time for the current second (vs the time * 'now - AP_TIME_RECENT_THRESHOLD' seconds ago). If the - * cached value is for the current time, we use it. Otherwise, - * we compute the apr_time_exp_t and store it in this - * cache element. Note that the timestamp in the cache - * element is updated only after the exploded time. Thus - * if two threads hit this cache element simultaneously - * at the start of a new second, they'll both explode the - * time and store it. I.e., the writers will collide, but - * they'll be writing the same value. + * cached value is for the current time, we copy the exploded time. + * After copying, we check the cache_element to see if it still has the + * same second. If so, the copy is valid, because we always set the key + * after copying the exploded time into the cache, and we're using + * memory barriers (implemented with Compare-And-Swap) + * that guarantee total memory ordering. */ - if (cache_element->t >= seconds) { - /* There is an intentional race condition in this design: - * in a multithreaded app, one thread might be reading - * from this cache_element to resolve a timestamp from - * TIME_CACHE_SIZE seconds ago at the same time that - * another thread is copying the exploded form of the - * current time into the same cache_element. (I.e., the - * first thread might hit this element of the ring buffer - * just as the element is being recycled.) This can - * also happen at the start of a new second, if a - * reader accesses the cache_element after a writer - * has updated cache_element.t but before the writer - * has finished updating the whole cache_element. - * - * Rather than trying to prevent this race condition - * with locks, we allow it to happen and then detect - * and correct it. The detection works like this: - * Step 1: Take a "snapshot" of the cache element by - * copying it into a temporary buffer. - * Step 2: Check whether the snapshot contains consistent - * data: the timestamps at the start and end of - * the cache_element should both match the 'seconds' - * value that we computed from the input time. - * If these three don't match, then the snapshot - * shows the cache_element in the middle of an - * update, and its contents are invalid. - * Step 3: If the snapshot is valid, use it. Otherwise, - * just give up on the cache and explode the - * input time. - */ - memcpy(&cache_element_snapshot, cache_element, - sizeof(struct exploded_time_cache_element)); - if ((seconds != cache_element_snapshot.t) || - (seconds != cache_element_snapshot.t_validate)) { - /* Invalid snapshot */ - if (use_gmt) { - return apr_time_exp_gmt(xt, t); - } - else { - return apr_time_exp_lt(xt, t); - } - } - else { - /* Valid snapshot */ - memcpy(xt, &(cache_element_snapshot.xt), - sizeof(apr_time_exp_t)); + const apr_uint32_t key = cache_element->key; + /* Above is done speculatively, no memory barrier used. + * It's doing the same thing as apr_atomic_read32, a read of + * memory marked volatile, but without doing the function call. */ + if (seconds == key && seconds != 0) { + /* seconds == 0 may mean cache is uninitialized, so don't use cache */ + *xt = cache_element->xt; + /* After copying xt, make sure cache_element was not marked invalid + * by another thread beginning an update, and that cache_element + * really contained data for our second. + * Requires memory barrier, so use CAS. */ + if (apr_atomic_cas32(&cache_element->key, seconds, seconds)==seconds) { + xt->tm_usec = (int)apr_time_usec(t); + return APR_SUCCESS; } } - else { - apr_status_t r; - if (use_gmt) { - r = apr_time_exp_gmt(xt, t); - } - else { - r = apr_time_exp_lt(xt, t); - } + /* Invalid cache element, so calculate the exploded time value. + This is a wait-free algorithm, and we purposely don't spin and + retry to get from the cache, we just continue and calculate it + and do useful work, instead of spinning. */ + do { + const apr_status_t r = explode(xt, t); if (r != APR_SUCCESS) { return r; } - cache_element->t = seconds; - memcpy(&(cache_element->xt), xt, sizeof(apr_time_exp_t)); - cache_element->t_validate = seconds; + } while (0); + + /* Attempt to update the cache */ + + /* To prevent ABA problem, don't update the cache unless we have a + * newer time value (so that we never go from B->A). + * Handle cases where seconds overflows (e.g. year 2038), + * and cases where cache is uninitialized. + * Handle overflow, otherwise it will stop caching after overflow, + * until server process is restarted, which may be months later. + */ +#define OVERFLOW (((SECONDS_MASK)>>1) + 1) + if (key <= SECONDS_MASK /* another thread not updating cache_element */ + && seconds != 0 /* new key distinguishable from uninitialized */ + && ( + (seconds > key && seconds - key < OVERFLOW) || /* normal */ + (seconds < key && key - seconds > OVERFLOW) || /* overflow */ + (key == 0 && seconds < SECONDS_MASK - 0x100))) + /* cache is perhaps uninitialized, and not recent overflow */ + { + if (key == apr_atomic_cas32(&cache_element->key, ~seconds, key)) + { /* We won the race to update this cache_element. + * Above marks cache_element as invalid by using ~seconds, + * because we are starting an update: it's the start of a + * transaction. */ + cache_element->xt = *xt; + /* Finished copying, now update key with our key, + * ending the transaction. Need to use CAS for the + * memory barrier. + */ + apr_atomic_cas32(&cache_element->key, seconds, ~seconds); + } } - xt->tm_usec = (int)apr_time_usec(t); return APR_SUCCESS; } @@ -144,13 +136,13 @@ AP_DECLARE(apr_status_t) ap_explode_recent_localtime(apr_time_exp_t * tm, apr_time_t t) { - return cached_explode(tm, t, exploded_cache_localtime, 0); + return cached_explode(tm, t, exploded_cache_localtime, &apr_time_exp_lt); } AP_DECLARE(apr_status_t) ap_explode_recent_gmt(apr_time_exp_t * tm, apr_time_t t) { - return cached_explode(tm, t, exploded_cache_gmt, 1); + return cached_explode(tm, t, exploded_cache_gmt, &apr_time_exp_gmt); } AP_DECLARE(apr_status_t) ap_recent_ctime(char *date_str, apr_time_t t)