--- server/util_time.c.orig	2012-12-29 13:47:28.415799779 -0500
+++ server/util_time.c.orig	2012-12-31 09:11:35.350302411 -0500
@@ -15,6 +15,7 @@ 
  */
 
 #include "util_time.h"
+#include <apr_atomic.h>
 
 
 /* Number of characters needed to format the microsecond part of a timestamp.
@@ -30,9 +31,8 @@ 
  */
 
 struct exploded_time_cache_element {
-    apr_int64_t t;
-    apr_time_exp_t xt;
-    apr_int64_t t_validate; /* please see comments in cached_explode() */
+    apr_time_exp_t xt; /* First for alignment of copies */
+    apr_uint32_t key;
 };
 
 /* the "+ 1" is for the current second: */
@@ -51,92 +51,84 @@ 
 
 
 static apr_status_t cached_explode(apr_time_exp_t *xt, apr_time_t t,
-                                   struct exploded_time_cache_element *cache,
-                                   int use_gmt)
+    struct exploded_time_cache_element *cache,
+    apr_status_t (*explode)(apr_time_exp_t *, apr_time_t))
 {
-    apr_int64_t seconds = apr_time_sec(t);
-    struct exploded_time_cache_element *cache_element =
+#define SECONDS_MASK 0x7FFFFFFF
+    /* High bit is used to indicate invalid cache_element */
+    const apr_uint32_t seconds = apr_time_sec(t) & SECONDS_MASK;
+    volatile struct exploded_time_cache_element * const cache_element =
         &(cache[seconds & TIME_CACHE_MASK]);
-    struct exploded_time_cache_element cache_element_snapshot;
-
     /* The cache is implemented as a ring buffer.  Each second,
      * it uses a different element in the buffer.  The timestamp
      * in the element indicates whether the element contains the
      * exploded time for the current second (vs the time
      * 'now - AP_TIME_RECENT_THRESHOLD' seconds ago).  If the
-     * cached value is for the current time, we use it.  Otherwise,
-     * we compute the apr_time_exp_t and store it in this
-     * cache element. Note that the timestamp in the cache
-     * element is updated only after the exploded time.  Thus
-     * if two threads hit this cache element simultaneously
-     * at the start of a new second, they'll both explode the
-     * time and store it.  I.e., the writers will collide, but
-     * they'll be writing the same value.
+     * cached value is for the current time, we copy the exploded time.
+     * After copying, we check the cache_element to see if it still has the 
+     * same second. If so, the copy is valid, because we always set the key 
+     * after copying the exploded time into the cache, and we're using 
+     * memory barriers (implemented with Compare-And-Swap) 
+     * that guarantee total memory ordering.
      */
-    if (cache_element->t >= seconds) {
-        /* There is an intentional race condition in this design:
-         * in a multithreaded app, one thread might be reading
-         * from this cache_element to resolve a timestamp from
-         * TIME_CACHE_SIZE seconds ago at the same time that
-         * another thread is copying the exploded form of the
-         * current time into the same cache_element.  (I.e., the
-         * first thread might hit this element of the ring buffer
-         * just as the element is being recycled.)  This can
-         * also happen at the start of a new second, if a
-         * reader accesses the cache_element after a writer
-         * has updated cache_element.t but before the writer
-         * has finished updating the whole cache_element.
-         *
-         * Rather than trying to prevent this race condition
-         * with locks, we allow it to happen and then detect
-         * and correct it.  The detection works like this:
-         *   Step 1: Take a "snapshot" of the cache element by
-         *           copying it into a temporary buffer.
-         *   Step 2: Check whether the snapshot contains consistent
-         *           data: the timestamps at the start and end of
-         *           the cache_element should both match the 'seconds'
-         *           value that we computed from the input time.
-         *           If these three don't match, then the snapshot
-         *           shows the cache_element in the middle of an
-         *           update, and its contents are invalid.
-         *   Step 3: If the snapshot is valid, use it.  Otherwise,
-         *           just give up on the cache and explode the
-         *           input time.
-         */
-        memcpy(&cache_element_snapshot, cache_element,
-               sizeof(struct exploded_time_cache_element));
-        if ((seconds != cache_element_snapshot.t) ||
-            (seconds != cache_element_snapshot.t_validate)) {
-            /* Invalid snapshot */
-            if (use_gmt) {
-                return apr_time_exp_gmt(xt, t);
-            }
-            else {
-                return apr_time_exp_lt(xt, t);
-            }
-        }
-        else {
-            /* Valid snapshot */
-            memcpy(xt, &(cache_element_snapshot.xt),
-                   sizeof(apr_time_exp_t));
+    const apr_uint32_t key = cache_element->key; 
+    /* Above is done speculatively, no memory barrier used.
+     * It's doing the same thing as apr_atomic_read32, a read of
+     * memory marked volatile, but without doing the function call. */
+    if (seconds == key && seconds != 0) {
+        /* seconds == 0 may mean cache is uninitialized, so don't use cache */
+        *xt = cache_element->xt;
+        /* After copying xt, make sure cache_element was not marked invalid
+         * by another thread beginning an update, and that cache_element
+         * really contained data for our second.
+         * Requires memory barrier, so use CAS. */
+        if (apr_atomic_cas32(&cache_element->key, seconds, seconds)==seconds) {
+            xt->tm_usec = (int)apr_time_usec(t);
+            return APR_SUCCESS;
         }
     }
-    else {
-        apr_status_t r;
-        if (use_gmt) {
-            r = apr_time_exp_gmt(xt, t);
-        }
-        else {
-            r = apr_time_exp_lt(xt, t);
-        }
+    /* Invalid cache element, so calculate the exploded time value.
+       This is a wait-free algorithm, and we purposely don't spin and
+       retry to get from the cache, we just continue and calculate it
+       and do useful work, instead of spinning. */
+    do {
+        const apr_status_t r = explode(xt, t);
         if (r != APR_SUCCESS) {
             return r;
         }
-        cache_element->t = seconds;
-        memcpy(&(cache_element->xt), xt, sizeof(apr_time_exp_t));
-        cache_element->t_validate = seconds;
+    } while (0);
+    
+    /* Attempt to update the cache */
+    
+    /* To prevent ABA problem, don't update the cache unless we have a
+     * newer time value (so that we never go from B->A). 
+     * Handle cases where seconds overflows (e.g. year 2038), 
+     * and cases where cache is uninitialized.
+     * Handle overflow, otherwise it will stop caching after overflow,
+     * until server process is restarted, which may be months later.
+     */
+#define OVERFLOW (((SECONDS_MASK)>>1) + 1)
+    if (key <= SECONDS_MASK /* another thread not updating cache_element */
+        && seconds != 0 /* new key distinguishable from uninitialized */
+        && (
+        (seconds > key && seconds - key < OVERFLOW) || /* normal */
+        (seconds < key && key - seconds > OVERFLOW) || /* overflow */
+        (key == 0 && seconds < SECONDS_MASK - 0x100)))
+        /* cache is perhaps uninitialized, and not recent overflow */
+    {
+        if (key == apr_atomic_cas32(&cache_element->key, ~seconds, key))
+        {   /* We won the race to update this cache_element.
+             * Above marks cache_element as invalid by using ~seconds, 
+             * because we are starting an update: it's the start of a
+             * transaction. */
+            cache_element->xt = *xt;
+            /* Finished copying, now update key with our key,
+             * ending the transaction. Need to use CAS for the
+             * memory barrier. 
+             */
+            apr_atomic_cas32(&cache_element->key, seconds, ~seconds);
+        }
     }
-    xt->tm_usec = (int)apr_time_usec(t);
     return APR_SUCCESS;
 }
 
@@ -144,13 +136,13 @@ 
 AP_DECLARE(apr_status_t) ap_explode_recent_localtime(apr_time_exp_t * tm,
                                                      apr_time_t t)
 {
-    return cached_explode(tm, t, exploded_cache_localtime, 0);
+    return cached_explode(tm, t, exploded_cache_localtime, &apr_time_exp_lt);
 }
 
 AP_DECLARE(apr_status_t) ap_explode_recent_gmt(apr_time_exp_t * tm,
                                                apr_time_t t)
 {
-    return cached_explode(tm, t, exploded_cache_gmt, 1);
+    return cached_explode(tm, t, exploded_cache_gmt, &apr_time_exp_gmt);
 }
 
 AP_DECLARE(apr_status_t) ap_recent_ctime(char *date_str, apr_time_t t)