diff -rup ../dist/modules/cache/mod_cache.c ./modules/cache/mod_cache.c --- ../dist/modules/cache/mod_cache.c 2008-05-27 18:23:12.000000000 +0200 +++ ./modules/cache/mod_cache.c 2008-06-11 13:39:12.000000000 +0200 @@ -323,7 +323,7 @@ static int cache_save_filter(ap_filter_t cache_server_conf *conf; const char *cc_out, *cl; const char *exps, *lastmods, *dates, *etag; - apr_time_t exp, date, lastmod, now; + apr_time_t exp, date, lastmod, now, staleexp=APR_DATE_BAD; apr_off_t size; cache_info *info = NULL; char *reason; @@ -606,6 +606,8 @@ static int cache_save_filter(ap_filter_t /* Oh, hey. It isn't that stale! Yay! */ cache->handle = cache->stale_handle; info = &cache->handle->cache_obj->info; + /* Save stale expiry timestamp for later perusal */ + staleexp = info->expire; rv = OK; } else if (!r->header_only) { @@ -763,14 +765,41 @@ static int cache_save_filter(ap_filter_t ap_cache_accept_headers(cache->handle, r, 1); } - /* Write away header information to cache. It is possible that we are - * trying to update headers for an entity which has already been cached. - * - * This may fail, due to an unwritable cache area. E.g. filesystem full, - * permissions problems or a read-only (re)mount. This must be handled - * later. - */ - rv = cache->provider->store_headers(cache->handle, r, info); + rv = APR_EGENERAL; + if(conf->relaxupdates && cache->stale_handle && + staleexp != APR_DATE_BAD && now < staleexp) + { + /* Avoid storing on-disk headers that are never used. When the + * following conditions are fulfilled: + * - The body is NOT stale (ie. HTTP_NOT_MODIFIED when revalidating) + * - The on-disk header hasn't expired. + * - The request has max-age=0 + * Then there is no use to update the on-disk header since it won't be + * used by other max-age=0 requests since they are always revalidated, + * and we know it's likely there will be more max-age=0 requests since + * objects tend to have the same access pattern. + */ + const char *cc_req; + char *val; + + cc_req = apr_table_get(r->headers_in, "Cache-Control"); + if(cc_req && ap_cache_liststr(r->pool, cc_req, "max-age", &val) && + val != NULL && apr_atoi64(val) == 0) + { + /* Yay, we can skip storing the on-disk header */ + rv = APR_SUCCESS; + } + } + if(rv != APR_SUCCESS) { + /* Write away header information to cache. It is possible that we are + * trying to update headers for an entity which has already been cached. + * + * This may fail, due to an unwritable cache area. E.g. filesystem full, + * permissions problems or a read-only (re)mount. This must be handled + * later. + */ + rv = cache->provider->store_headers(cache->handle, r, info); + } /* Did we just update the cached headers on a revalidated response? * @@ -926,6 +955,9 @@ static void * create_cache_config(apr_po /* flag indicating that query-string should be ignored when caching */ ps->ignorequerystring = 0; ps->ignorequerystring_set = 0; + /* Whether to relax header updates on max-age=0 accesses */ + ps->relaxupdates = 0; + ps->relaxupdates_set = 0; return ps; } @@ -975,6 +1007,10 @@ static void * merge_cache_config(apr_poo (overrides->ignorequerystring_set == 0) ? base->ignorequerystring : overrides->ignorequerystring; + ps->relaxupdates = + (overrides->relaxupdates_set == 0) + ? base->relaxupdates + : overrides->relaxupdates; return ps; } static const char *set_cache_ignore_no_last_mod(cmd_parms *parms, void *dummy, @@ -1166,6 +1202,19 @@ static const char *set_cache_ignore_quer return NULL; } +static const char *set_cache_relaxupdates(cmd_parms *parms, void *dummy, + int flag) +{ + cache_server_conf *conf; + + conf = + (cache_server_conf *)ap_get_module_config(parms->server->module_config, + &cache_module); + conf->relaxupdates = flag; + conf->relaxupdates_set = 1; + return NULL; +} + static int cache_post_config(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *s) { @@ -1221,6 +1270,11 @@ static const command_rec cache_cmds[] = AP_INIT_TAKE1("CacheLastModifiedFactor", set_cache_factor, NULL, RSRC_CONF, "The factor used to estimate Expires date from " "LastModified date"), + AP_INIT_FLAG("CacheRelaxUpdates", set_cache_relaxupdates, + NULL, RSRC_CONF, + "Optimize for non-transparent caches by relaxing the " + "requirement that max-age=0 forces an update of headers " + "stored in cache"), {NULL} }; diff -rup ../dist/modules/cache/mod_cache.h ./modules/cache/mod_cache.h --- ../dist/modules/cache/mod_cache.h 2007-05-17 23:03:04.000000000 +0200 +++ ./modules/cache/mod_cache.h 2008-06-11 13:39:12.000000000 +0200 @@ -153,6 +153,9 @@ typedef struct { /** ignore query-string when caching */ int ignorequerystring; int ignorequerystring_set; + /* Relax header updates on max-age=0 accesses */ + int relaxupdates; + int relaxupdates_set; } cache_server_conf; /* cache info information */ diff -rup ../dist/modules/cache/mod_disk_cache.c ./modules/cache/mod_disk_cache.c --- ../dist/modules/cache/mod_disk_cache.c 2007-12-29 17:28:57.000000000 +0100 +++ ./modules/cache/mod_disk_cache.c 2008-06-11 13:39:12.000000000 +0200 @@ -22,33 +22,43 @@ #include "util_filter.h" #include "util_script.h" #include "util_charset.h" +#include "ap_mpm.h" +#include "mpm_common.h" /* * mod_disk_cache: Disk Based HTTP 1.1 Cache. * - * Flow to Find the .data file: - * Incoming client requests URI /foo/bar/baz - * Generate off of /foo/bar/baz + * Flow to Find the right cache file: + * Incoming client requests an URL + * Generate .header from URL * Open .header - * Read in .header file (may contain Format #1 or Format #2) - * If format #1 (Contains a list of Vary Headers): - * Use each header name (from .header) with our request values (headers_in) to - * regenerate using HeaderName+HeaderValue+.../foo/bar/baz - * re-read in .header (must be format #2) - * read in .data + * Read in .header file format identifier, which might be: + * VARY_FORMAT_VERSION - Vary headers + * DISK_FORMAT_VERSION - Metadata and headers for a cached file + * Anything else - Unknown header format, remove and return. * - * Format #1: + * If VARY_FORMAT_VERSION (Contains a list of Vary Headers): + * Use each header name with our request values (headers_in) to + * regenerate .header using HeaderName+HeaderValue+URL, + * open it, read format (must be DISK_FORMAT_VERSION). + * + * VARY_FORMAT_VERSION: * apr_uint32_t format; * apr_time_t expire; * apr_array_t vary_headers (delimited by CRLF) * - * Format #2: - * disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format) + * DISK_FORMAT_VERSION: + * disk_cache_info_t * entity name (dobj->name) [length is in disk_cache_info_t->name_len] - * r->headers_out (delimited by CRLF) - * CRLF - * r->headers_in (delimited by CRLF) - * CRLF + * bodyfile (dobj->bodyfile) [length is in disk_cache_info_t->bodyname_len] + * optional filename (r->filename) + * [length is in disk_cache_info_t->filename_len] + * r->headers_out (see on disk header format below) + * r->headers_in + * + * On disk headers are stored in the following format: + * apr_uint32_t totsize; - size of headers to follow + * totsize amount of headers, HeaderA\0ValueA\0...HeaderN\0ValueN\0 */ module AP_MODULE_DECLARE_DATA disk_cache_module; @@ -62,185 +72,403 @@ static apr_status_t recall_body(cache_ha static apr_status_t read_array(request_rec *r, apr_array_header_t* arr, apr_file_t *file); -/* - * Local static functions - */ -static char *header_file(apr_pool_t *p, disk_cache_conf *conf, - disk_cache_object_t *dobj, const char *name) -{ - if (!dobj->hashfile) { - dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels, - conf->dirlength, name); +#define CACHE_LOOP_INCTIME(x) x <<= 1 +#define CACHE_LOOP_DECTIME(x) x >>= 1 + +static void cache_loop_sleep(apr_interval_time_t *t) { + + if(*t < CACHE_LOOP_MINSLEEP) { + *t = CACHE_LOOP_MINSLEEP; + } + else if(*t > CACHE_LOOP_MAXSLEEP) { + *t = CACHE_LOOP_MAXSLEEP; } - if (dobj->prefix) { - return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/", - dobj->hashfile, CACHE_HEADER_SUFFIX, NULL); - } - else { - return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile, - CACHE_HEADER_SUFFIX, NULL); - } + apr_sleep(*t); } -static char *data_file(apr_pool_t *p, disk_cache_conf *conf, - disk_cache_object_t *dobj, const char *name) + +/* + * Modified file bucket implementation to be able to deliver files + * while caching. + */ + +/* Derived from apr_buckets_file.c */ + +#define BUCKET_IS_DISKCACHE(e) ((e)->type == &bucket_type_diskcache) +static const apr_bucket_type_t bucket_type_diskcache; + +static void diskcache_bucket_destroy(void *data) { - if (!dobj->hashfile) { - dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels, - conf->dirlength, name); - } + diskcache_bucket_data *f = data; - if (dobj->prefix) { - return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/", - dobj->hashfile, CACHE_DATA_SUFFIX, NULL); - } - else { - return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile, - CACHE_DATA_SUFFIX, NULL); - } + if (apr_bucket_shared_destroy(f)) { + /* no need to close files here; it will get + * done automatically when the pool gets cleaned up */ + apr_bucket_free(f); + } } -static void mkdir_structure(disk_cache_conf *conf, const char *file, apr_pool_t *pool) + +/* The idea here is to convert diskcache buckets to regular file buckets + as data becomes available */ +static apr_status_t diskcache_bucket_read(apr_bucket *e, const char **str, + apr_size_t *len, + apr_read_type_e block) { + diskcache_bucket_data *a = e->data; + apr_file_t *f = a->fd; + apr_bucket *b = NULL; + char *buf; apr_status_t rv; - char *p; + apr_finfo_t finfo; + apr_size_t filelength = e->length; /* bytes remaining in file past offset */ + apr_off_t fileoffset = e->start; + apr_size_t available; + apr_time_t start = apr_time_now(); +#if APR_HAS_THREADS && !APR_HAS_XTHREAD_FILES + apr_int32_t flags; +#endif + +#if APR_HAS_THREADS && !APR_HAS_XTHREAD_FILES + if ((flags = apr_file_flags_get(f)) & APR_XTHREAD) { + /* this file descriptor is shared across multiple threads and + * this OS doesn't support that natively, so as a workaround + * we must reopen the file into a->readpool */ + const char *fname; + apr_file_name_get(&fname, f); + + rv = apr_file_open(&f, fname, (flags & ~APR_XTHREAD), 0, a->readpool); + if (rv != APR_SUCCESS) + return rv; + + a->fd = f; + } +#endif + + /* in case we die prematurely */ + *str = NULL; + *len = 0; + + /* DEBUG + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, + "disk_cache: Called diskcache_bucket_read"); + */ + + while(1) { + /* Figure out how big the file is right now, sit here until + it's grown enough or we get bored */ + rv = apr_file_info_get(&finfo, + APR_FINFO_SIZE | APR_FINFO_MTIME | APR_FINFO_NLINK, f); + if(rv != APR_SUCCESS) { + return rv; + } - for (p = (char*)file + conf->cache_root_len + 1;;) { - p = strchr(p, '/'); - if (!p) + if(finfo.size >= fileoffset + MIN(filelength, CACHE_BUCKET_MINCHUNK)) { break; - *p = '\0'; + } + + /* No use to even wait for a deleted file */ + if(finfo.nlink == 0) { + return APR_EGENERAL; + } + + if(block == APR_NONBLOCK_READ) { + return APR_EAGAIN; + } - rv = apr_dir_make(file, - APR_UREAD|APR_UWRITE|APR_UEXECUTE, pool); - if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) { - /* XXX */ + /* Check for timeout */ + if(finfo.mtime < (apr_time_now() - a->updtimeout) ) { + return APR_EGENERAL; + } + /* If we have progress within half the timeout period, return what + we have so far */ + if(finfo.size > fileoffset && + start < (apr_time_now() - a->updtimeout/2) ) + { + break; } - *p = '/'; - ++p; + + /* Increase loop delay on each pass */ + cache_loop_sleep(&(a->polldelay)); + CACHE_LOOP_INCTIME(a->polldelay); + } + /* Decrease the loop delay a notch so the stored value is the actual + delay needed */ + CACHE_LOOP_DECTIME(a->polldelay); + + /* Convert this bucket to a zero-length heap bucket so we won't be called + again */ + buf = apr_bucket_alloc(0, e->list); + apr_bucket_heap_make(e, buf, 0, apr_bucket_free); + + /* Wrap as much as possible into a regular file bucket */ + available = MIN(filelength, finfo.size-fileoffset); + b = apr_bucket_file_create(f, fileoffset, available, a->readpool, e->list); + APR_BUCKET_INSERT_AFTER(e, b); + + /* DEBUG + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, + "disk_cache: diskcache_bucket_read: Converted to regular file" + " off %" APR_OFF_T_FMT " len %" APR_SIZE_T_FMT, + fileoffset, available); + */ + + + /* Put any remains in yet another bucket */ + if(available < filelength) { + e=b; + /* for efficiency, we can just build a new apr_bucket struct + * to wrap around the existing bucket */ + b = apr_bucket_alloc(sizeof(*b), e->list); + b->start = fileoffset + available; + b->length = filelength - available; + b->data = a; + b->type = &bucket_type_diskcache; + b->free = apr_bucket_free; + b->list = e->list; + APR_BUCKET_INSERT_AFTER(e, b); + } + else { + diskcache_bucket_destroy(a); } + + *str = buf; + return APR_SUCCESS; } -/* htcacheclean may remove directories underneath us. - * So, we'll try renaming three times at a cost of 0.002 seconds. - */ -static apr_status_t safe_file_rename(disk_cache_conf *conf, - const char *src, const char *dest, - apr_pool_t *pool) +static apr_bucket * diskcache_bucket_make(apr_bucket *b, + apr_file_t *fd, + apr_off_t offset, + apr_size_t len, + apr_interval_time_t timeout, + apr_pool_t *p) { - apr_status_t rv; + diskcache_bucket_data *f; - rv = apr_file_rename(src, dest, pool); + f = apr_bucket_alloc(sizeof(*f), b->list); + f->fd = fd; + f->readpool = p; + f->updtimeout = timeout; + f->polldelay = 0; - if (rv != APR_SUCCESS) { - int i; + b = apr_bucket_shared_make(b, f, offset, len); + b->type = &bucket_type_diskcache; - for (i = 0; i < 2 && rv != APR_SUCCESS; i++) { - /* 1000 micro-seconds aka 0.001 seconds. */ - apr_sleep(1000); + return b; +} - mkdir_structure(conf, dest, pool); +static apr_bucket * diskcache_bucket_create(apr_file_t *fd, + apr_off_t offset, + apr_size_t len, + apr_interval_time_t timeout, + apr_pool_t *p, + apr_bucket_alloc_t *list) +{ + apr_bucket *b = apr_bucket_alloc(sizeof(*b), list); - rv = apr_file_rename(src, dest, pool); + APR_BUCKET_INIT(b); + b->free = apr_bucket_free; + b->list = list; + return diskcache_bucket_make(b, fd, offset, len, timeout, p); +} + + +/* FIXME: This is probably only correct for the first case, that seems + to be the one that occurs all the time... */ +static apr_status_t diskcache_bucket_setaside(apr_bucket *data, + apr_pool_t *reqpool) +{ + diskcache_bucket_data *a = data->data; + apr_file_t *fd = NULL; + apr_file_t *f = a->fd; + apr_pool_t *curpool = apr_file_pool_get(f); + + if (apr_pool_is_ancestor(curpool, reqpool)) { + return APR_SUCCESS; + } + + if (!apr_pool_is_ancestor(a->readpool, reqpool)) { + /* FIXME: Figure out what needs to be done here */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, + "disk_cache: diskcache_bucket_setaside: FIXME1"); + a->readpool = reqpool; + } + + /* FIXME: Figure out what needs to be done here */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, + "disk_cache: diskcache_bucket_setaside: FIXME2"); + + apr_file_setaside(&fd, f, reqpool); + a->fd = fd; + return APR_SUCCESS; +} + +static const apr_bucket_type_t bucket_type_diskcache = { + "DISKCACHE", 5, APR_BUCKET_DATA, + diskcache_bucket_destroy, + diskcache_bucket_read, + diskcache_bucket_setaside, + apr_bucket_shared_split, + apr_bucket_shared_copy +}; + +/* From apr_brigade.c */ + +/* A "safe" maximum bucket size, 1Gb */ +#define MAX_BUCKET_SIZE (0x40000000) + +static apr_bucket * diskcache_brigade_insert(apr_bucket_brigade *bb, + apr_file_t *f, apr_off_t + start, apr_off_t length, + apr_interval_time_t timeout, + apr_pool_t *p) +{ + apr_bucket *e; + + if (length < MAX_BUCKET_SIZE) { + e = diskcache_bucket_create(f, start, (apr_size_t)length, timeout, p, + bb->bucket_alloc); + } + else { + /* Several buckets are needed. */ + e = diskcache_bucket_create(f, start, MAX_BUCKET_SIZE, timeout, p, + bb->bucket_alloc); + + while (length > MAX_BUCKET_SIZE) { + apr_bucket *ce; + apr_bucket_copy(e, &ce); + APR_BRIGADE_INSERT_TAIL(bb, ce); + e->start += MAX_BUCKET_SIZE; + length -= MAX_BUCKET_SIZE; } + e->length = (apr_size_t)length; /* Resize just the last bucket */ } - return rv; + APR_BRIGADE_INSERT_TAIL(bb, e); + return e; } -static apr_status_t file_cache_el_final(disk_cache_object_t *dobj, - request_rec *r) +/* --------------------------------------------------------------- */ + +/* + * Local static functions + */ + +static char *cache_file(apr_pool_t *p, disk_cache_conf *conf, + const char *prefix, const char *name, + const char *suffix) { - /* move the data over */ - if (dobj->tfd) { - apr_status_t rv; - apr_file_close(dobj->tfd); + char *hashfile; - /* This assumes that the tempfile is on the same file system - * as the cache_root. If not, then we need a file copy/move - * rather than a rename. - */ - rv = apr_file_rename(dobj->tempfile, dobj->datafile, r->pool); - if (rv != APR_SUCCESS) { - ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server, - "disk_cache: rename tempfile to datafile failed:" - " %s -> %s", dobj->tempfile, dobj->datafile); - apr_file_remove(dobj->tempfile, r->pool); - } + hashfile = ap_cache_generate_name(p, conf->dirlevels, + conf->dirlength, name); - dobj->tfd = NULL; + /* This assumes that we always deal with Vary-stuff if there's a prefix */ + if (prefix) { + return apr_pstrcat(p, prefix, CACHE_VDIR_SUFFIX, "/", + hashfile, suffix, NULL); + } + else { + return apr_pstrcat(p, conf->cache_root, "/", hashfile, suffix, NULL); } - - return APR_SUCCESS; } -static apr_status_t file_cache_errorcleanup(disk_cache_object_t *dobj, request_rec *r) + +static apr_status_t mkdir_structure(const char *file, apr_pool_t *pool) { - /* Remove the header file and the body file. */ - apr_file_remove(dobj->hdrsfile, r->pool); - apr_file_remove(dobj->datafile, r->pool); + apr_status_t rv; + char *p; + int i; - /* If we opened the temporary data file, close and remove it. */ - if (dobj->tfd) { - apr_file_close(dobj->tfd); - apr_file_remove(dobj->tempfile, r->pool); - dobj->tfd = NULL; + p = strrchr((char *)file, '/'); + if(!p) { + return APR_EGENERAL; } - return APR_SUCCESS; -} + *p = '\0'; + + /* Be stubborn to overcome racyness when others deletes directories + while we're trying to create them */ + for(i=0; i < 10; i++) { + rv = apr_dir_make_recursive(file, + APR_UREAD|APR_UWRITE|APR_UEXECUTE, pool); + if(rv == APR_SUCCESS) { + break; + } + } + *p = '/'; + return rv; +} -/* These two functions get and put state information into the data - * file for an ap_cache_el, this state information will be read - * and written transparent to clients of this module +/* htcacheclean may remove directories underneath us. + * So, we'll try renaming three times at a cost of 0.002 seconds. */ -static int file_cache_recall_mydata(apr_file_t *fd, cache_info *info, - disk_cache_object_t *dobj, request_rec *r) +static apr_status_t safe_file_rename(const char *src, const char *dest, + apr_pool_t *pool) { apr_status_t rv; - char *urlbuff; - disk_cache_info_t disk_info; - apr_size_t len; - /* read the data from the cache file */ - len = sizeof(disk_cache_info_t); - rv = apr_file_read_full(fd, &disk_info, len, &len); + rv = apr_file_rename(src, dest, pool); + if (rv != APR_SUCCESS) { - return rv; + int i; + + for (i = 0; i < 2 && rv != APR_SUCCESS; i++) { + mkdir_structure(dest, pool); + + rv = apr_file_rename(src, dest, pool); + + if(rv != APR_SUCCESS) { + /* 1000 micro-seconds aka 0.001 seconds. */ + apr_sleep(1000); + } + } } - /* Store it away so we can get it later. */ - dobj->disk_info = disk_info; + return rv; +} - info->status = disk_info.status; - info->date = disk_info.date; - info->expire = disk_info.expire; - info->request_time = disk_info.request_time; - info->response_time = disk_info.response_time; - - /* Note that we could optimize this by conditionally doing the palloc - * depending upon the size. */ - urlbuff = apr_palloc(r->pool, disk_info.name_len + 1); - len = disk_info.name_len; - rv = apr_file_read_full(fd, urlbuff, len, &len); - if (rv != APR_SUCCESS) { - return rv; +/* Close fd, remove file if it was opened for writing */ +static void close_and_rm(apr_file_t *fd, const char *file, request_rec *r) +{ + apr_int32_t flags = apr_file_flags_get(fd); + + apr_file_close(fd); + if(flags & APR_FOPEN_WRITE) { + apr_file_remove(file, r->pool); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: close_and_rm: Removed %s", + file); } - urlbuff[disk_info.name_len] = '\0'; +} - /* check that we have the same URL */ - /* Would strncmp be correct? */ - if (strcmp(urlbuff, dobj->name) != 0) { - return APR_EGENERAL; + +static apr_status_t file_cache_errorcleanup(disk_cache_object_t *dobj, + request_rec *r) +{ + /* Only remove files that are opened for write when called, files + opened only for reading must be explicitly removed */ + if(dobj->hfd) { + close_and_rm(dobj->hfd, dobj->hdrsfile, r); + dobj->hfd = NULL; + } + if(dobj->bfd) { + close_and_rm(dobj->bfd, dobj->bodyfile, r); + dobj->bfd = NULL; + } + if (dobj->tfd) { + close_and_rm(dobj->tfd, dobj->tempfile, r); + dobj->tfd = NULL; } return APR_SUCCESS; } + static const char* regen_key(apr_pool_t *p, apr_table_t *headers, apr_array_header_t *varray, const char *oldkey) { @@ -319,7 +547,8 @@ static void tokens_to_array(apr_pool_t * /* * Hook and mod_cache callback functions */ -static int create_entity(cache_handle_t *h, request_rec *r, const char *key, apr_off_t len) +static int create_entity(cache_handle_t *h, request_rec *r, const char *key, + apr_off_t len) { disk_cache_conf *conf = ap_get_module_config(r->server->module_config, &disk_cache_module); @@ -330,6 +559,22 @@ static int create_entity(cache_handle_t return DECLINED; } + /* Note, len is -1 if unknown so don't trust it too hard */ + if (len > conf->maxfs) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: URL %s failed the size check " + "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")", + key, len, conf->maxfs); + return DECLINED; + } + if (len >= 0 && len < conf->minfs) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: URL %s failed the size check " + "(%" APR_OFF_T_FMT " < %" APR_OFF_T_FMT ")", + key, len, conf->minfs); + return DECLINED; + } + /* Allocate and initialize cache_object_t and disk_cache_object_t */ h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(*obj)); obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(*dobj)); @@ -337,74 +582,126 @@ static int create_entity(cache_handle_t obj->key = apr_pstrdup(r->pool, key); dobj->name = obj->key; - dobj->prefix = NULL; /* Save the cache root */ dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len); dobj->root_len = conf->cache_root_len; - dobj->datafile = data_file(r->pool, conf, dobj, key); - dobj->hdrsfile = header_file(r->pool, conf, dobj, key); + dobj->hdrsfile = cache_file(r->pool, conf, NULL, key, CACHE_HEADER_SUFFIX); dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); + dobj->initial_size = len; + dobj->file_size = -1; + dobj->lastmod = APR_DATE_BAD; + dobj->updtimeout = conf->updtimeout; + dobj->removedirs = conf->removedirs; + dobj->header_only = r->header_only; + dobj->bytes_sent = 0; + + if(r->filename != NULL && strlen(r->filename) > 0) { + char buf[34]; + char *str; + + /* When possible, hash the body on dev:inode to minimize file + duplication. */ + if( (r->finfo.valid & APR_FINFO_IDENT) == APR_FINFO_IDENT) { + apr_uint64_t device = r->finfo.device; /* Avoid ifdef ... */ + apr_uint64_t inode = r->finfo.inode; /* ... type-mess */ + + apr_snprintf(buf, sizeof(buf), "%016" APR_UINT64_T_HEX_FMT ":%016" + APR_UINT64_T_HEX_FMT, device, inode); + str = buf; + } + else { + str = r->filename; + } + dobj->bodyfile = cache_file(r->pool, conf, NULL, str, + CACHE_BODY_SUFFIX); + dobj->filename = r->filename; + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: File %s was hashed using %s into %s", + r->filename, str, dobj->bodyfile); + } + else { + dobj->bodyfile = cache_file(r->pool, conf, NULL, key, + CACHE_BODY_SUFFIX); + } return OK; } -static int open_entity(cache_handle_t *h, request_rec *r, const char *key) + +static apr_status_t file_read_timeout(apr_file_t *file, char * buf, + apr_size_t len, apr_time_t timeout) { - apr_uint32_t format; - apr_size_t len; - const char *nkey; - apr_status_t rc; - static int error_logged = 0; - disk_cache_conf *conf = ap_get_module_config(r->server->module_config, - &disk_cache_module); + apr_size_t left, done; apr_finfo_t finfo; - cache_object_t *obj; - cache_info *info; - disk_cache_object_t *dobj; - int flags; + apr_status_t rc; + apr_interval_time_t delay=0; - h->cache_obj = NULL; + done = 0; + left = len; - /* Look up entity keyed to 'url' */ - if (conf->cache_root == NULL) { - if (!error_logged) { - error_logged = 1; - ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, - "disk_cache: Cannot cache files to disk without a CacheRoot specified."); + while(1) { + rc = apr_file_read_full(file, buf+done, left, &len); + if (rc == APR_SUCCESS) { + break; } - return DECLINED; - } + done += len; + left -= len; - /* Create and init the cache object */ - h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(cache_object_t)); - obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t)); + if(!APR_STATUS_IS_EOF(rc)) { + return rc; + } + rc = apr_file_info_get(&finfo, APR_FINFO_MTIME, file); + if(rc != APR_SUCCESS) { + return rc; + } + if(finfo.mtime < (apr_time_now() - timeout) ) { + return APR_ETIMEDOUT; + } + cache_loop_sleep(&delay); + CACHE_LOOP_INCTIME(delay); + } - info = &(obj->info); + return APR_SUCCESS; +} - /* Open the headers file */ - dobj->prefix = NULL; - /* Save the cache root */ - dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len); - dobj->root_len = conf->cache_root_len; +static apr_status_t open_header(cache_handle_t *h, request_rec *r, + const char *key, disk_cache_conf *conf) +{ + int flags = APR_READ | APR_WRITE | APR_BINARY; + disk_cache_format_t format; + apr_status_t rc; + const char *nkey = key; + disk_cache_info_t disk_info; + cache_object_t *obj = h->cache_obj; + disk_cache_object_t *dobj = obj->vobj; - dobj->hdrsfile = header_file(r->pool, conf, dobj, key); - flags = APR_READ|APR_BINARY|APR_BUFFERED; + /* Open header read/write so it's easy to rewrite it when needed */ rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, flags, 0, r->pool); if (rc != APR_SUCCESS) { - return DECLINED; + return CACHE_EDECLINED; } /* read the format from the cache file */ - len = sizeof(format); - apr_file_read_full(dobj->hfd, &format, len, &len); + rc = apr_file_read_full(dobj->hfd, &format, sizeof(format), NULL); + if(APR_STATUS_IS_EOF(rc)) { + return CACHE_ENODATA; + } + else if(rc != APR_SUCCESS) { + return rc; + } + /* Vary-files are being written to tmpfile and moved in place, so + the should always be complete */ if (format == VARY_FORMAT_VERSION) { apr_array_header_t* varray; apr_time_t expire; + char *p; - len = sizeof(expire); - apr_file_read_full(dobj->hfd, &expire, len, &len); + rc = apr_file_read_full(dobj->hfd, &expire, sizeof(expire), NULL); + if(rc != APR_SUCCESS) { + return rc; + } varray = apr_array_make(r->pool, 5, sizeof(char*)); rc = read_array(r, varray, dobj->hfd); @@ -412,93 +709,541 @@ static int open_entity(cache_handle_t *h ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server, "disk_cache: Cannot parse vary header file: %s", dobj->hdrsfile); - return DECLINED; + return CACHE_EDECLINED; } apr_file_close(dobj->hfd); nkey = regen_key(r->pool, r->headers_in, varray, key); - dobj->hashfile = NULL; dobj->prefix = dobj->hdrsfile; - dobj->hdrsfile = header_file(r->pool, conf, dobj, nkey); + p = strrchr((char *)dobj->prefix, '.'); + if(p) { + /* Cut away the suffix */ + *p = '\0'; + } + dobj->hdrsfile = cache_file(r->pool, conf, dobj->prefix, nkey, + CACHE_HEADER_SUFFIX); - flags = APR_READ|APR_BINARY|APR_BUFFERED; rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, flags, 0, r->pool); if (rc != APR_SUCCESS) { - return DECLINED; + dobj->hfd = NULL; + return CACHE_EDECLINED; + } + rc = apr_file_read_full(dobj->hfd, &format, sizeof(format), NULL); + if(APR_STATUS_IS_EOF(rc)) { + return CACHE_ENODATA; + } + else if(rc != APR_SUCCESS) { + return rc; } } - else if (format != DISK_FORMAT_VERSION) { - ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, - "cache_disk: File '%s' has a version mismatch. File had version: %d.", - dobj->hdrsfile, format); - return DECLINED; - } - else { - apr_off_t offset = 0; - /* This wasn't a Vary Format file, so we must seek to the - * start of the file again, so that later reads work. - */ - apr_file_seek(dobj->hfd, APR_SET, &offset); - nkey = key; + + if(format != DISK_FORMAT_VERSION) { + ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, + "disk_cache: File '%s' had a version mismatch. File had " + "version: %d (current is %d). Deleted.", dobj->hdrsfile, + format, DISK_FORMAT_VERSION); + file_cache_errorcleanup(dobj, r); + apr_file_remove(dobj->hdrsfile, r->pool); + return CACHE_EDECLINED; } obj->key = nkey; - dobj->key = nkey; dobj->name = key; - dobj->datafile = data_file(r->pool, conf, dobj, nkey); - dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); - /* Open the data file */ - flags = APR_READ|APR_BINARY; -#ifdef APR_SENDFILE_ENABLED - flags |= APR_SENDFILE_ENABLED; -#endif - rc = apr_file_open(&dobj->fd, dobj->datafile, flags, 0, r->pool); - if (rc != APR_SUCCESS) { - /* XXX: Log message */ - return DECLINED; + /* read the data from the header file */ + rc = apr_file_read_full(dobj->hfd, &disk_info, sizeof(disk_info), NULL); + if(APR_STATUS_IS_EOF(rc)) { + return CACHE_ENODATA; } - - rc = apr_file_info_get(&finfo, APR_FINFO_SIZE, dobj->fd); - if (rc == APR_SUCCESS) { - dobj->file_size = finfo.size; + else if(rc != APR_SUCCESS) { + return rc; } - /* Read the bytes to setup the cache_info fields */ - rc = file_cache_recall_mydata(dobj->hfd, info, dobj, r); - if (rc != APR_SUCCESS) { - /* XXX log message */ - return DECLINED; - } + /* Store it away so we can get it later. */ + dobj->disk_info = disk_info; - /* Initialize the cache_handle callback functions */ - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "disk_cache: Recalled cached URL info header %s", dobj->name); - return OK; + return APR_SUCCESS; } -static int remove_entity(cache_handle_t *h) + +static apr_status_t open_header_timeout(cache_handle_t *h, request_rec *r, + const char *key, disk_cache_conf *conf) { - /* Null out the cache object pointer so next time we start from scratch */ - h->cache_obj = NULL; - return OK; + apr_status_t rc; + apr_finfo_t finfo; + apr_interval_time_t delay = 0; + cache_object_t *obj = h->cache_obj; + disk_cache_object_t *dobj = obj->vobj; + + while(1) { + if(dobj->hfd) { + apr_file_close(dobj->hfd); + dobj->hfd = NULL; + } + rc = open_header(h, r, key, conf); + if(rc != APR_SUCCESS && rc != CACHE_ENODATA) { + if(rc != CACHE_EDECLINED) { + ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server, + "disk_cache: Cannot load header file: %s", + dobj->hdrsfile); + } + return rc; + } + + /* Objects with unknown body size will have file_size == -1 until the + entire body is written and the header updated with the actual size. + And since we depend on knowing the body size we wait until the size + is written */ + if(rc == APR_SUCCESS && dobj->disk_info.file_size >= 0) { + break; + } + rc = apr_file_info_get(&finfo, APR_FINFO_MTIME, dobj->hfd); + if(rc != APR_SUCCESS) { + return rc; + } + if(finfo.mtime < (apr_time_now() - dobj->updtimeout)) { + ap_log_error(APLOG_MARK, APLOG_WARNING, 0, r->server, + "disk_cache: Timed out waiting for header file %s for " + "URL %s - caching the body failed?", + dobj->hdrsfile, key); + return CACHE_EDECLINED; + } + cache_loop_sleep(&delay); + CACHE_LOOP_INCTIME(delay); + } + + return APR_SUCCESS; } -static int remove_url(cache_handle_t *h, apr_pool_t *p) + +static apr_status_t load_header_strings(request_rec *r, + disk_cache_object_t *dobj) { + apr_size_t len; apr_status_t rc; - disk_cache_object_t *dobj; + char *urlbuff; - /* Get disk cache object from cache handle */ - dobj = (disk_cache_object_t *) h->cache_obj->vobj; - if (!dobj) { - return DECLINED; + if(dobj->disk_info.name_len > MAX_STRING_LEN || + dobj->disk_info.bodyname_len > MAX_STRING_LEN || + dobj->disk_info.filename_len > MAX_STRING_LEN) + { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "disk_cache: Corrupt cache header for URL %s, deleting: %s", + dobj->name, dobj->hdrsfile); + file_cache_errorcleanup(dobj, r); + apr_file_remove(dobj->hdrsfile, r->pool); + return CACHE_EDECLINED; } - /* Delete headers file */ - if (dobj->hdrsfile) { - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, + /* FIXME: Enforce that url and bodyname is present */ + + + len = dobj->disk_info.name_len; + urlbuff = apr_palloc(r->pool, len+1); + if(urlbuff == NULL) { + return APR_ENOMEM; + } + + rc = file_read_timeout(dobj->hfd, urlbuff, len, dobj->updtimeout); + if (rc == APR_ETIMEDOUT) { + ap_log_error(APLOG_MARK, APLOG_WARNING, rc, r->server, + "disk_cache: Timed out waiting for urlbuff for " + "URL %s - caching failed?", dobj->name); + return CACHE_EDECLINED; + } + else if(rc != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_WARNING, rc, r->server, + "disk_cache: Error reading urlbuff for URL %s", + dobj->name); + return CACHE_EDECLINED; + } + urlbuff[len] = '\0'; + + /* check that we have the same URL */ + if (strcmp(urlbuff, dobj->name) != 0) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "disk_cache: Cached URL %s didn't match requested " + "URL %s", urlbuff, dobj->name); + file_cache_errorcleanup(dobj, r); + apr_file_remove(dobj->hdrsfile, r->pool); + apr_file_remove(dobj->bodyfile, r->pool); + return CACHE_EDECLINED; + } + + /* Read in the file the body is stored in */ + len = dobj->disk_info.bodyname_len; + if(len > 0) { + char *bodyfile = apr_palloc(r->pool, len+1); + + if(bodyfile == NULL) { + return APR_ENOMEM; + } + + rc = file_read_timeout(dobj->hfd, bodyfile, len, dobj->updtimeout); + if (rc == APR_ETIMEDOUT) { + ap_log_error(APLOG_MARK, APLOG_WARNING, rc, r->server, + "disk_cache: Timed out waiting for body cache " + "filename for URL %s - caching failed?", dobj->name); + return CACHE_EDECLINED; + } + else if(rc != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_WARNING, rc, r->server, + "disk_cache: Error reading body cache filename for " + "URL %s", dobj->name); + return CACHE_EDECLINED; + } + bodyfile[len] = '\0'; + dobj->bodyfile = apr_pstrcat(r->pool, dobj->root, "/", bodyfile, NULL); + } + + /* Read in the filename */ + len = dobj->disk_info.filename_len; + if(len > 0) { + char *fnamebuf = apr_palloc(r->pool, len+1); + + if(fnamebuf == NULL) { + return APR_ENOMEM; + } + + rc = file_read_timeout(dobj->hfd, fnamebuf, len, dobj->updtimeout); + if (rc == APR_ETIMEDOUT) { + ap_log_error(APLOG_MARK, APLOG_WARNING, rc, r->server, + "disk_cache: Timed out waiting for filename for " + "URL %s - caching failed?", dobj->name); + return CACHE_EDECLINED; + } + else if(rc != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_WARNING, rc, r->server, + "disk_cache: Error reading filename for URL %s", + dobj->name); + return CACHE_EDECLINED; + } + fnamebuf[len] = '\0'; + + dobj->filename = fnamebuf; + /* We can't set r->filename here because for example mod_rewrite + will exhibit different behaviour compared to a completely + uncached entity (will happen if entity is revalidated for + example). */ + /* Save a pointer to r->filename so we can set it later on in + recall_body which doesn't get r as an argument */ + dobj->rfilename = &(r->filename); + } + + return APR_SUCCESS; +} + + +static apr_status_t open_body_timeout(request_rec *r, cache_object_t *cache_obj) +{ + apr_status_t rc; + apr_finfo_t finfo; + int flags = APR_READ|APR_BINARY; + apr_interval_time_t delay = 0; + disk_cache_object_t *dobj = (disk_cache_object_t *) cache_obj->vobj; + cache_info *info = &(cache_obj->info); + +#if APR_HAS_SENDFILE + core_dir_config *pdconf = ap_get_module_config(r->per_dir_config, + &core_module); + flags |= ((pdconf->enable_sendfile == ENABLE_SENDFILE_OFF) + ? 0 : APR_SENDFILE_ENABLED); +#endif + + if(dobj->bodyfile == NULL || strlen(dobj->bodyfile) == 0) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "disk_cache: open_body_timeout called with NULL " + "bodyfile for URL %s", + dobj->name); + return APR_EGENERAL; + } + + /* Wait here until we get a body cachefile, data in it, and do quick sanity + * check */ + + while(1) { + if(dobj->bfd == NULL) { + rc = apr_file_open(&dobj->bfd, dobj->bodyfile, flags, 0, r->pool); + if(rc != APR_SUCCESS) { + if(info->response_time < (apr_time_now() - dobj->updtimeout) ) { + /* This usually means that the body simply wasn't cached, + due to HEAD requests for example */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, r->server, + "disk_cache: Timed out waiting for bodyfile " + "%s for URL %s - caching failed?", + dobj->bodyfile, dobj->name); + return CACHE_EDECLINED; + } + cache_loop_sleep(&delay); + CACHE_LOOP_INCTIME(delay); + continue; + } + } + + rc = apr_file_info_get(&finfo, APR_FINFO_SIZE | APR_FINFO_CSIZE | + APR_FINFO_MTIME | APR_FINFO_CTIME | + APR_FINFO_NLINK, + dobj->bfd); + if(rc != APR_SUCCESS && !APR_STATUS_IS_INCOMPLETE(rc)) { + return rc; + } + if(finfo.valid & APR_FINFO_NLINK && finfo.nlink == 0) { + /* This file has been deleted, close it and try again */ + apr_file_close(dobj->bfd); + dobj->bfd = NULL; + continue; + } + + /* XFS on Linux can leave corrupted files behind after a system crash, + these are usually detectable by the fact that csize is smaller than + the actual filesize. The occurances we've seen has had csize=0. + + Note that we can't simply check for csizeinitial_size > 0 && + finfo.csize == 0 && + finfo.ctime < (apr_time_now() - dobj->updtimeout)) + { + dobj->file_size = 0; + } + else { + dobj->file_size = finfo.size; + } + + /* Note that the body might have been updated by another entity + that uses the same body, which usually means that we should + revalidate too. Don't freak out completely when this happens. + We might have: + - Body in sync with this header. + - Body being cached. + - Body that failed caching. + - Body newer than this header. + */ + + if(dobj->initial_size < dobj->file_size) { + ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, + "disk_cache: Cached body for too large for URL %s" + " - revalidating.", dobj->name); + apr_file_remove(dobj->hdrsfile, r->pool); + return CACHE_EDECLINED; + } + else if(dobj->initial_size > dobj->file_size) { + /* Still caching or failed? */ + if(finfo.mtime < (apr_time_now() - dobj->updtimeout) ) { + ap_log_error(APLOG_MARK, APLOG_INFO, rc, r->server, + "disk_cache: Cached body too small for URL %s" + " - revalidating.", dobj->name); + apr_file_remove(dobj->hdrsfile, r->pool); + return CACHE_EDECLINED; + } + } + else { + /* If right size, file has either the correct mtime or + mtime == ctime which means the mtime isn't set. The latter + either means there was no Last-Modified available or + that we're in the window between finished copying and setting + mtime. + */ + if(dobj->lastmod != APR_DATE_BAD && + apr_time_sec(finfo.mtime) != apr_time_sec(dobj->lastmod) && + (finfo.mtime != finfo.ctime || + finfo.mtime < (apr_time_now() - dobj->updtimeout)) ) + { + ap_log_error(APLOG_MARK, APLOG_INFO, rc, r->server, + "disk_cache: Cached body Last-Modified mismatch " + "for URL %s - revalidating.", dobj->name); + apr_file_remove(dobj->hdrsfile, r->pool); + return CACHE_EDECLINED; + } + } + + if(dobj->file_size > 0) { + break; + } + cache_loop_sleep(&delay); + CACHE_LOOP_INCTIME(delay); + } + + return APR_SUCCESS; +} + + +static int open_entity(cache_handle_t *h, request_rec *r, const char *key) +{ + apr_status_t rc; + disk_cache_object_t *dobj; + cache_info *info; + static int error_logged = 0; + disk_cache_conf *conf = ap_get_module_config(r->server->module_config, + &disk_cache_module); + + h->cache_obj = NULL; + + /* Look up entity keyed to 'url' */ + if (conf->cache_root == NULL) { + if (!error_logged) { + error_logged = 1; + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "disk_cache: Cannot cache files to disk without a " + "CacheRoot specified."); + } + return DECLINED; + } + + /* Create and init the cache object */ + h->cache_obj = apr_pcalloc(r->pool, sizeof(cache_object_t)); + h->cache_obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t)); + info = &(h->cache_obj->info); + + /* Save the cache root */ + dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len); + dobj->root_len = conf->cache_root_len; + + dobj->hdrsfile = cache_file(r->pool, conf, NULL, key, CACHE_HEADER_SUFFIX); + + dobj->updtimeout = conf->updtimeout; + dobj->removedirs = conf->removedirs; + dobj->header_only = r->header_only; + + /* Open header and read basic info, wait until header contains + valid size information for the body */ + rc = open_header_timeout(h, r, key, conf); + if(rc != APR_SUCCESS) { + if(dobj->hfd != NULL) { + apr_file_close(dobj->hfd); + dobj->hfd = NULL; + } + return DECLINED; + } + + info->status = dobj->disk_info.status; + info->date = dobj->disk_info.date; + info->expire = dobj->disk_info.expire; + info->request_time = dobj->disk_info.request_time; + info->response_time = dobj->disk_info.response_time; + + dobj->lastmod = dobj->disk_info.lastmod; + dobj->initial_size = (apr_off_t) dobj->disk_info.file_size; + dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); + + /* Load and check strings (URL, bodyfile, filename) */ + rc = load_header_strings(r, dobj); + if(rc != APR_SUCCESS) { + if(dobj->hfd != NULL) { + apr_file_close(dobj->hfd); + dobj->hfd = NULL; + } + return DECLINED; + } + + /* Only need body cachefile if we have a body and this isn't a HEAD + request */ + if(dobj->initial_size > 0 && !dobj->header_only) { + rc = open_body_timeout(r, h->cache_obj); + if(rc != APR_SUCCESS) { + if(dobj->hfd != NULL) { + apr_file_close(dobj->hfd); + dobj->hfd = NULL; + } + if(dobj->bfd != NULL) { + apr_file_close(dobj->bfd); + dobj->bfd = NULL; + } + return DECLINED; + } + } + else { + dobj->file_size = 0; + } + + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: Recalled status for cached URL %s from file %s", + dobj->name, dobj->hdrsfile); + return OK; +} + + +static int remove_entity(cache_handle_t *h) +{ + disk_cache_object_t *dobj; + apr_finfo_t finfo; + apr_pool_t *p; + apr_status_t rv; + + /* Get disk cache object from cache handle */ + dobj = (disk_cache_object_t *) h->cache_obj->vobj; + + /* Null out the cache object pointer so next time we start from scratch */ + h->cache_obj = NULL; + + if(!dobj) { + return OK; + } + + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, + "disk_cache: remove_entity: %s", dobj->name); + + /* We really want to remove the cache files here since mod_cache has + deemed it stale, but it seems like an API miss that we don't + have a pool? And why is this function separate from remove_url? + Oh well, beware of kludge ;) */ + + if(dobj->hfd != NULL) { + /* Only remove file if fd isn't already unlinked. Not atomic, but + the best we can do? */ + rv = apr_file_info_get(&finfo, APR_FINFO_NLINK, dobj->hfd); + if(rv == APR_SUCCESS && finfo.nlink != 0) { + p = apr_file_pool_get(dobj->hfd); + apr_file_remove(dobj->hdrsfile, p); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, + "disk_cache: remove_entity: Deleted %s from cache.", + dobj->hdrsfile); + } + apr_file_close(dobj->hfd); + dobj->hfd = NULL; + } + if(dobj->bfd != NULL) { + /* Only remove file if fd isn't already unlinked. Not atomic, but + the best we can do? */ + rv = apr_file_info_get(&finfo, APR_FINFO_NLINK, dobj->bfd); + if(rv == APR_SUCCESS && finfo.nlink != 0) { + p = apr_file_pool_get(dobj->bfd); + apr_file_remove(dobj->bodyfile, p); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, + "disk_cache: remove_entity: Deleted %s from cache.", + dobj->bodyfile); + } + apr_file_close(dobj->bfd); + dobj->bfd = NULL; + } + + return OK; +} + + +/* FIXME: It would make sense to have the errorcleanup and this function + to be the same */ +static int remove_url(cache_handle_t *h, apr_pool_t *p) +{ + apr_status_t rc; + disk_cache_object_t *dobj; + + /* Get disk cache object from cache handle */ + dobj = (disk_cache_object_t *) h->cache_obj->vobj; + if (!dobj) { + return DECLINED; + } + + /* Delete headers file */ + if (dobj->hdrsfile) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, "disk_cache: Deleting %s from cache.", dobj->hdrsfile); rc = apr_file_remove(dobj->hdrsfile, p); @@ -507,34 +1252,35 @@ static int remove_url(cache_handle_t *h, * For reason see log_error_core for the case s == NULL. */ ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL, - "disk_cache: Failed to delete headers file %s from cache.", - dobj->hdrsfile); + "disk_cache: Failed to delete headers file %s " + "from cache.", dobj->hdrsfile); return DECLINED; } } - /* Delete data file */ - if (dobj->datafile) { + /* Only delete body cache file if it isn't backed by a real file */ + if(!dobj->filename && dobj->bodyfile) { ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, - "disk_cache: Deleting %s from cache.", dobj->datafile); + "disk_cache: Deleting %s from cache.", dobj->bodyfile); - rc = apr_file_remove(dobj->datafile, p); + rc = apr_file_remove(dobj->bodyfile, p); if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) { - /* Will only result in an output if httpd is started with -e debug. - * For reason see log_error_core for the case s == NULL. - */ ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL, - "disk_cache: Failed to delete data file %s from cache.", - dobj->datafile); + "disk_cache: Failed to delete body file %s " + "from cache.", dobj->bodyfile); return DECLINED; } } + if(!dobj->removedirs) { + return OK; + } + /* now delete directories as far as possible up to our cache root */ if (dobj->root) { const char *str_to_copy; - str_to_copy = dobj->hdrsfile ? dobj->hdrsfile : dobj->datafile; + str_to_copy = dobj->hdrsfile ? dobj->hdrsfile : dobj->bodyfile; if (str_to_copy) { char *dir, *slash, *q; @@ -549,10 +1295,10 @@ static int remove_url(cache_handle_t *h, * in the way as far as possible * * Note: due to the way we constructed the file names in - * header_file and data_file, we are guaranteed that the - * cache_root is suffixed by at least one '/' which will be - * turned into a terminating null by this loop. Therefore, - * we won't either delete or go above our cache root. + * cache_file, we are guaranteed that the cache_root is suffixed by + * at least one '/' which will be turned into a terminating null by + * this loop. Therefore, we won't either delete or go above our + * cache root. */ for (q = dir + dobj->root_len; *q ; ) { ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, @@ -583,7 +1329,7 @@ static apr_status_t read_array(request_r rv = apr_file_gets(w, MAX_STRING_LEN - 1, file); if (rv != APR_SUCCESS) { ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, - "Premature end of vary array."); + "disk_cache: Premature end of vary array."); return rv; } @@ -624,8 +1370,7 @@ static apr_status_t store_array(apr_file iov[1].iov_base = CRLF; iov[1].iov_len = sizeof(CRLF) - 1; - rv = apr_file_writev(fd, (const struct iovec *) &iov, 2, - &amt); + rv = apr_file_writev_full(fd, (const struct iovec *) &iov, 2, &amt); if (rv != APR_SUCCESS) { return rv; } @@ -634,92 +1379,48 @@ static apr_status_t store_array(apr_file iov[0].iov_base = CRLF; iov[0].iov_len = sizeof(CRLF) - 1; - return apr_file_writev(fd, (const struct iovec *) &iov, 1, + return apr_file_writev_full(fd, (const struct iovec *) &iov, 1, &amt); } -static apr_status_t read_table(cache_handle_t *handle, request_rec *r, +/* Load table stored by store_table */ +static apr_status_t read_table(request_rec *r, apr_table_t *table, apr_file_t *file) { - char w[MAX_STRING_LEN]; - char *l; - int p; + char *s, *k, *v; + apr_uint32_t totsize = 0; apr_status_t rv; - while (1) { - - /* ### What about APR_EOF? */ - rv = apr_file_gets(w, MAX_STRING_LEN - 1, file); - if (rv != APR_SUCCESS) { - ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, - "Premature end of cache headers."); - return rv; - } - - /* Delete terminal (CR?)LF */ + rv = apr_file_read_full(file, &totsize, sizeof(totsize), NULL); + if(rv != APR_SUCCESS) { + return rv; + } - p = strlen(w); - /* Indeed, the host's '\n': - '\012' for UNIX; '\015' for MacOS; '\025' for OS/390 - -- whatever the script generates. - */ - if (p > 0 && w[p - 1] == '\n') { - if (p > 1 && w[p - 2] == CR) { - w[p - 2] = '\0'; - } - else { - w[p - 1] = '\0'; - } - } + s = apr_palloc(r->pool, totsize); + if(s == NULL) { + return APR_ENOMEM; + } - /* If we've finished reading the headers, break out of the loop. */ - if (w[0] == '\0') { - break; - } + rv = apr_file_read_full(file, s, totsize, NULL); + if(rv != APR_SUCCESS) { + return rv; + } -#if APR_CHARSET_EBCDIC - /* Chances are that we received an ASCII header text instead of - * the expected EBCDIC header lines. Try to auto-detect: + k=s; + while(k < s + totsize) { + /* FIXME: Do a pointer-loop instead of strlen to make sure we don't + walk outside of allocated memory if on-disk data has been + corrupted */ - if (!(l = strchr(w, ':'))) { - int maybeASCII = 0, maybeEBCDIC = 0; - unsigned char *cp, native; - apr_size_t inbytes_left, outbytes_left; - - for (cp = w; *cp != '\0'; ++cp) { - native = apr_xlate_conv_byte(ap_hdrs_from_ascii, *cp); - if (apr_isprint(*cp) && !apr_isprint(native)) - ++maybeEBCDIC; - if (!apr_isprint(*cp) && apr_isprint(native)) - ++maybeASCII; - } - if (maybeASCII > maybeEBCDIC) { - ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, - "CGI Interface Error: Script headers apparently ASCII: (CGI = %s)", - r->filename); - inbytes_left = outbytes_left = cp - w; - apr_xlate_conv_buffer(ap_hdrs_from_ascii, - w, &inbytes_left, w, &outbytes_left); - } - } -#endif /*APR_CHARSET_EBCDIC*/ - - /* if we see a bogus header don't ignore it. Shout and scream */ - if (!(l = strchr(w, ':'))) { - return APR_EGENERAL; - } - - *l++ = '\0'; - while (*l && apr_isspace(*l)) { - ++l; - } - - apr_table_add(table, w, l); + v = k + strlen(k) + 1; + apr_table_addn(table, k, v); + k = v + strlen(v) + 1; } return APR_SUCCESS; } + /* * Reads headers from a buffer and returns an array of headers. * Returns NULL on file error @@ -730,21 +1431,83 @@ static apr_status_t read_table(cache_han static apr_status_t recall_headers(cache_handle_t *h, request_rec *r) { disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj; + apr_status_t rv; + apr_off_t off; + apr_finfo_t finfo; + apr_interval_time_t delay = 0; /* This case should not happen... */ if (!dobj->hfd) { - /* XXX log message */ + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "disk_cache: recall_headers called without fd for URL %s", + dobj->name); return APR_NOTFOUND; } - h->req_hdrs = apr_table_make(r->pool, 20); + off = 0; + rv = apr_file_seek(dobj->hfd, APR_CUR, &off); + if(rv != APR_SUCCESS) { + return rv; + } + h->resp_hdrs = apr_table_make(r->pool, 20); + h->req_hdrs = apr_table_make(r->pool, 20); - /* Call routine to read the header lines/status line */ - read_table(h, r, h->resp_hdrs, dobj->hfd); - read_table(h, r, h->req_hdrs, dobj->hfd); + while(1) { + rv = read_table(r, h->resp_hdrs, dobj->hfd); + if(rv != APR_SUCCESS) { + apr_table_clear(h->resp_hdrs); + } + else { + rv = read_table(r, h->req_hdrs, dobj->hfd); + if(rv != APR_SUCCESS) { + apr_table_clear(h->req_hdrs); + } + } + if(rv == APR_SUCCESS) { + break; + } + if(!APR_STATUS_IS_EOF(rv)) { + ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, + "disk_cache: Error reading cache headers " + "URL %s", dobj->name); + if(dobj->hfd != NULL) { + apr_file_close(dobj->hfd); + dobj->hfd = NULL; + } + if(dobj->bfd != NULL) { + apr_file_close(dobj->bfd); + dobj->bfd = NULL; + } + return rv; + } - apr_file_close(dobj->hfd); + /* FIXME: Check if header file deleted (nlinks==0) and reopen it if + * that's the case */ + rv = apr_file_info_get(&finfo, APR_FINFO_MTIME, dobj->hfd); + if(rv != APR_SUCCESS || + finfo.mtime < (apr_time_now() - dobj->updtimeout) ) + { + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, + "disk_cache: Timed out waiting for cache headers " + "URL %s", dobj->name); + if(dobj->hfd != NULL) { + apr_file_close(dobj->hfd); + dobj->hfd = NULL; + } + if(dobj->bfd != NULL) { + apr_file_close(dobj->bfd); + dobj->bfd = NULL; + } + return APR_EGENERAL; + } + rv = apr_file_seek(dobj->hfd, APR_SET, &off); + if(rv != APR_SUCCESS) { + return rv; + } + cache_loop_sleep(&delay); + CACHE_LOOP_INCTIME(delay); + } ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "disk_cache: Recalled headers for URL %s", dobj->name); @@ -755,301 +1518,1285 @@ static apr_status_t recall_body(cache_ha { apr_bucket *e; disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj; + apr_off_t bytes_already_done; + + if(dobj->hfd != NULL) { + /* Close header cache file, it won't be needed anymore */ + apr_file_close(dobj->hfd); + dobj->hfd = NULL; + } + + if(dobj->initial_size > 0 && !dobj->header_only && dobj->bfd == NULL) { + /* This should never happen, really... */ + ap_log_error(APLOG_MARK, APLOG_ERR, 0, NULL, + "disk_cache: recall_body: Called but no fd open, URL %s " + "from file %s", dobj->name, dobj->bodyfile); + return APR_EGENERAL; + } + + /* Restore r->filename if not present */ + if(dobj->filename != NULL && dobj->rfilename != NULL && + *(dobj->rfilename) == NULL) + { + *(dobj->rfilename) = dobj->filename; + } + + /* Insert as much as possible as regular file (ie. sendfile():able) */ + /* We need to make sure to skip the beginning of the file if we've + already sent some bytes, e.g., due to mod_proxy */ + if(dobj->file_size > dobj->bytes_sent) { + if(apr_brigade_insert_file(bb, dobj->bfd, dobj->bytes_sent, + dobj->file_size - dobj->bytes_sent, p) == NULL) + { + return APR_ENOMEM; + } + bytes_already_done = dobj->file_size; + } else { + bytes_already_done = dobj->bytes_sent; + } + + /* Insert any remainder as read-while-caching bucket */ + if(bytes_already_done < dobj->initial_size) { + if(diskcache_brigade_insert(bb, dobj->bfd, bytes_already_done, + dobj->initial_size - bytes_already_done, + dobj->updtimeout, p + ) == NULL) + { + return APR_ENOMEM; + } + } - e = apr_bucket_file_create(dobj->fd, 0, (apr_size_t) dobj->file_size, p, - bb->bucket_alloc); - APR_BRIGADE_INSERT_HEAD(bb, e); e = apr_bucket_eos_create(bb->bucket_alloc); APR_BRIGADE_INSERT_TAIL(bb, e); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, + "disk_cache: recall_body: Succeeded for URL %s from file %s", + dobj->name, dobj->bodyfile); + return APR_SUCCESS; } -static apr_status_t store_table(apr_file_t *fd, apr_table_t *table) +/* Store table on disk. + * Format on disk: apr_uint32_t totsize - total size of data following totsize + * totsize of data, consisting of key\0value\0...key\0value\0 + */ +static apr_status_t store_table(apr_file_t *fd, apr_table_t *table, + request_rec *r) { - int i; - apr_status_t rv; - struct iovec iov[4]; - apr_size_t amt; + int i, nelts, niov; + apr_status_t rv = APR_SUCCESS; + apr_uint32_t totsize = 0; apr_table_entry_t *elts; + struct iovec *iov; + + nelts = apr_table_elts(table)->nelts; + + /* Allocate space for the size-header plus two elements per table entry */ + + iov = apr_palloc(r->pool, (1+nelts*2) * sizeof(struct iovec)); + if(iov == NULL) { + return APR_ENOMEM; + } elts = (apr_table_entry_t *) apr_table_elts(table)->elts; - for (i = 0; i < apr_table_elts(table)->nelts; ++i) { + niov = 1; + for (i = 0; i < nelts; ++i) { if (elts[i].key != NULL) { - iov[0].iov_base = elts[i].key; - iov[0].iov_len = strlen(elts[i].key); - iov[1].iov_base = ": "; - iov[1].iov_len = sizeof(": ") - 1; - iov[2].iov_base = elts[i].val; - iov[2].iov_len = strlen(elts[i].val); - iov[3].iov_base = CRLF; - iov[3].iov_len = sizeof(CRLF) - 1; + iov[niov].iov_base = elts[i].key; + iov[niov].iov_len = strlen(elts[i].key)+1; + totsize += iov[niov++].iov_len; + iov[niov].iov_base = elts[i].val; + iov[niov].iov_len = strlen(elts[i].val)+1; + totsize += iov[niov++].iov_len; + } + } + iov[0].iov_base = (void *) &totsize; + iov[0].iov_len = sizeof(totsize); + i=0; + while(niov > 0) { + /* Need to write this in chunks, APR_MAX_IOVEC_SIZE is really small + on some OS's */ + int chunk = MIN(niov, APR_MAX_IOVEC_SIZE); + apr_size_t amt; + + rv = apr_file_writev_full(fd, (const struct iovec *) &iov[i], chunk, + &amt); + if(rv != APR_SUCCESS) { + return rv; + } + niov -= chunk; + i += chunk; + } + return rv; +} + + +static apr_status_t open_new_file(request_rec *r, const char *filename, + apr_file_t **fd, disk_cache_conf *conf) +{ + int flags = APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL; + apr_status_t rv; + + while(1) { + rv = apr_file_open(fd, filename, flags, + APR_FPROT_UREAD | APR_FPROT_UWRITE, r->pool); + + ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server, + "disk_cache: open_new_file: Opening %s", filename); + + if(APR_STATUS_IS_EEXIST(rv)) { + apr_finfo_t finfo; + + rv = apr_stat(&finfo, filename, APR_FINFO_MTIME, r->pool); + if(APR_STATUS_IS_ENOENT(rv)) { + /* Someone else has already removed it, try again */ + continue; + } + else if(rv != APR_SUCCESS) { + return rv; + } + + /* FIXME: We should really check for size and mtime that matches + the source file too if available */ + if(finfo.mtime < (apr_time_now() - conf->updtimeout) ) { + /* Something stale that's left around */ + + rv = apr_file_remove(filename, r->pool); + if(rv != APR_SUCCESS && !APR_STATUS_IS_ENOENT(rv)) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: open_new_file: Failed to " + "remove old %s", filename); + return rv; + } + continue; + } + else { + /* Someone else has just created the file, return identifiable + status so calling function can do the right thing */ + + return CACHE_EEXIST; + } + } + else if(APR_STATUS_IS_ENOENT(rv)) { + /* The directory for the file didn't exist */ + + rv = mkdir_structure(filename, r->pool); + if(rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: open_new_file: Failed to make " + "directory for %s", filename); + return rv; + } + continue; + } + else if(rv == APR_SUCCESS) { + return APR_SUCCESS; + } + else { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: open_new_file: Failed to open %s", + filename); + return rv; + } + } + + /* We should never get here, so */ + return APR_EGENERAL; +} + + +static apr_status_t store_vary_header(cache_handle_t *h, disk_cache_conf *conf, + request_rec *r, cache_info *info, + const char *varyhdr) +{ + disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj; + apr_array_header_t* varray; + const char *vfile; + apr_status_t rv; + int flags; + disk_cache_format_t format = VARY_FORMAT_VERSION; + struct iovec iov[2]; + apr_size_t amt; + + /* We should always write the vary format hints to the original header + * path, otherwise they will never be refreshed. */ + + vfile = dobj->hdrsfile; + + flags = APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL | APR_BUFFERED; + rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile, flags, r->pool); + if (rv != APR_SUCCESS) { + return rv; + } + + iov[0].iov_base = (void*)&format; + iov[0].iov_len = sizeof(format); + + iov[1].iov_base = (void*)&info->expire; + iov[1].iov_len = sizeof(info->expire); + + rv = apr_file_writev_full(dobj->tfd, (const struct iovec *) &iov, 2, &amt); + if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + return rv; + } + + varray = apr_array_make(r->pool, 6, sizeof(char*)); + tokens_to_array(r->pool, varyhdr, varray); + + rv = store_array(dobj->tfd, varray); + if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + return rv; + } + + rv = apr_file_close(dobj->tfd); + dobj->tfd = NULL; + if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + apr_file_remove(dobj->tempfile, r->pool); + return rv; + } + + rv = safe_file_rename(dobj->tempfile, vfile, r->pool); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: rename tempfile to varyfile failed: " + "%s -> %s", dobj->tempfile, vfile); + file_cache_errorcleanup(dobj, r); + apr_file_remove(dobj->tempfile, r->pool); + return rv; + } + + dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); + + if(dobj->prefix == NULL) { + const char *tmp = regen_key(r->pool, r->headers_in, varray, dobj->name); + char *p; + + dobj->prefix = dobj->hdrsfile; + p = strrchr((char *)dobj->prefix, '.'); + if(p) { + /* Cut away the suffix */ + *p = '\0'; + } + dobj->hdrsfile = cache_file(r->pool, conf, dobj->prefix, tmp, + CACHE_HEADER_SUFFIX); + dobj->bodyfile = cache_file(r->pool, conf, dobj->prefix, tmp, + CACHE_BODY_SUFFIX); + } + + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: Stored vary header for URL %s", dobj->name); + + return APR_SUCCESS; +} + + +static apr_status_t store_disk_header(disk_cache_object_t *dobj, + request_rec *r, cache_info *info) +{ + disk_cache_format_t format = DISK_FORMAT_VERSION; + struct iovec iov[5]; + int niov; + disk_cache_info_t disk_info; + apr_size_t amt; + apr_status_t rv; + + disk_info.date = info->date; + disk_info.expire = info->expire; + disk_info.entity_version = dobj->disk_info.entity_version++; + disk_info.request_time = info->request_time; + disk_info.response_time = info->response_time; + disk_info.status = info->status; + disk_info.file_size = dobj->initial_size; + disk_info.lastmod = dobj->lastmod; + + niov = 0; + iov[niov].iov_base = (void*)&format; + iov[niov++].iov_len = sizeof(format); + iov[niov].iov_base = (void*)&disk_info; + iov[niov++].iov_len = sizeof(disk_cache_info_t); + + disk_info.name_len = strlen(dobj->name); + iov[niov].iov_base = (void*)dobj->name; + iov[niov++].iov_len = disk_info.name_len; + + if(dobj->initial_size > 0) { + /* We know the bodyfile is root/bodyname ... */ + char *bodyname = (char *) dobj->bodyfile + dobj->root_len + 1; + disk_info.bodyname_len = strlen(bodyname); + iov[niov].iov_base = (void*)bodyname; + iov[niov++].iov_len = disk_info.bodyname_len; + } + else { + disk_info.bodyname_len = 0; + } + + if(r->filename != NULL && strlen(r->filename) > 0) { + disk_info.filename_len = strlen(r->filename); + iov[niov].iov_base = (void*)r->filename; + iov[niov++].iov_len = disk_info.filename_len; + } + else { + disk_info.filename_len = 0; + } + + rv = apr_file_writev_full(dobj->hfd, (const struct iovec *) &iov, niov, + &amt); + if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + return rv; + } + + if (r->headers_out) { + apr_table_t *headers_out; + + headers_out = ap_cache_cacheable_hdrs_out(r->pool, r->headers_out, + r->server); + + if (!apr_table_get(headers_out, "Content-Type") + && r->content_type) { + apr_table_setn(headers_out, "Content-Type", + ap_make_content_type(r, r->content_type)); + } + + headers_out = apr_table_overlay(r->pool, headers_out, + r->err_headers_out); + rv = store_table(dobj->hfd, headers_out, r); + if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + return rv; + } + } + + /* Parse the vary header and dump those fields from the headers_in. */ + /* FIXME: Make call to the same thing cache_select calls to crack Vary. */ + if (r->headers_in) { + apr_table_t *headers_in; + + headers_in = ap_cache_cacheable_hdrs_out(r->pool, r->headers_in, + r->server); + rv = store_table(dobj->hfd, headers_in, r); + if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + return rv; + } + } + + /* Store it away so we can get it later. */ + dobj->disk_info = disk_info; + + return APR_SUCCESS; +} + + +static apr_status_t store_headers(cache_handle_t *h, request_rec *r, + cache_info *info) +{ + disk_cache_conf *conf = ap_get_module_config(r->server->module_config, + &disk_cache_module); + apr_status_t rv; + int rewriting; + disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj; + const char *lastmods; + + + /* This is flaky... we need to manage the cache_info differently */ + h->cache_obj->info = *info; + + /* Get last-modified timestamp */ + lastmods = apr_table_get(r->err_headers_out, "Last-Modified"); + if (lastmods == NULL) { + lastmods = apr_table_get(r->headers_out, "Last-Modified"); + } + if (lastmods != NULL) { + dobj->lastmod = apr_date_parse_http(lastmods); + } + + if(dobj->hfd) { + rewriting = TRUE; + + /* Don't update header on disk if the following is met: + - The body size is known. + - If Last-Modified is known, it has to be identical. + - It's not expired. + - Date in cached header isn't older than updtimeout. + */ + if( dobj->disk_info.file_size >= 0 && (dobj->lastmod == APR_DATE_BAD || + dobj->lastmod == dobj->disk_info.lastmod) && + dobj->disk_info.expire > r->request_time && + dobj->disk_info.date > info->date - dobj->updtimeout) + { + dobj->skipstore = TRUE; + + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: store_headers: Headers current for URL " + "%s", dobj->name); + } + else { + ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, + "disk_cache: Rewriting headers for URL %s", + dobj->name); + } + } + else { + rewriting = FALSE; + + ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, + "disk_cache: Storing new headers for URL %s", dobj->name); + } + + if (r->headers_out) { + const char *tmp; + + tmp = apr_table_get(r->headers_out, "Vary"); + + if (tmp) { + rv = store_vary_header(h, conf, r, info, tmp); + if(rv != APR_SUCCESS) { + return rv; + } + } + } + + if(dobj->skipstore) { + apr_file_close(dobj->hfd); + dobj->hfd = NULL; + return APR_SUCCESS; + } + + if(rewriting) { + apr_finfo_t finfo; + + rv = apr_file_info_get(&finfo, APR_FINFO_MTIME, dobj->hfd); + if(rv != APR_SUCCESS) { + return rv; + } - rv = apr_file_writev(fd, (const struct iovec *) &iov, 4, - &amt); + /* FIXME: Isn't this a bit redundant? It probably causes more + trouble than it's fixing, especially since we handle it above + except for looking at mtime */ + /* Don't store disk headers more often than updtimeout */ + if(dobj->disk_info.file_size >= 0 && + dobj->disk_info.expire > r->request_time && + r->request_time < finfo.mtime + dobj->updtimeout) + { + dobj->skipstore = TRUE; + } + else { + /* This triggers bugs in APR when using APR_BUFFERED */ + apr_off_t off=0; + rv = apr_file_seek(dobj->hfd, APR_SET, &off); if (rv != APR_SUCCESS) { return rv; } + rv = apr_file_trunc(dobj->hfd, 0); + if(rv != APR_SUCCESS) { + return rv; + } } + + } + else { + rv = open_new_file(r, dobj->hdrsfile, &(dobj->hfd), conf); + if(rv == CACHE_EEXIST) { + dobj->skipstore = TRUE; + } + else if(rv != APR_SUCCESS) { + return rv; + } + } + + if(dobj->skipstore) { + if(dobj->hfd) { + apr_file_close(dobj->hfd); + dobj->hfd = NULL; + } + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: Skipping store for URL %s: Someone else " + "beat us to it", dobj->name); + return APR_SUCCESS; + } + + rv = store_disk_header(dobj, r, info); + if(rv != APR_SUCCESS) { + return rv; + } + + /* If the body size is unknown, the header file will be rewritten later + so we can't close it */ + if(dobj->initial_size >= 0) { + rv = apr_file_close(dobj->hfd); + dobj->hfd = NULL; + if(rv != APR_SUCCESS) { + apr_file_remove(dobj->hdrsfile, r->pool); + return rv; + } + } + + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: Stored headers for URL %s", dobj->name); + return APR_SUCCESS; +} + + +static apr_status_t check_destfd_timeout(apr_file_t *fd, apr_time_t now, + apr_time_t updtimeout) +{ + apr_status_t rc; + apr_finfo_t finfo; + + /* Get mtime and nlink for our opened destfile */ + rc = apr_file_info_get(&finfo, APR_FINFO_MTIME | APR_FINFO_NLINK, fd); + if(rc != APR_SUCCESS) { + return rc; + } + + /* If link count is zero, file is deleted */ + if(finfo.nlink == 0) { + return APR_ETIMEDOUT; + } + + /* Check if mtime on destfile shows us having timed out */ + if(now - finfo.mtime > updtimeout) { + return APR_ETIMEDOUT; + } + + return APR_SUCCESS; +} + + +static apr_status_t copy_body(apr_pool_t *p, + apr_file_t *srcfd, apr_off_t srcoff, + apr_file_t *destfd, apr_off_t destoff, + apr_off_t len, apr_interval_time_t updtimeout) +{ + apr_status_t rc; + apr_size_t size; + apr_finfo_t finfo; + apr_time_t starttime = apr_time_now(); + apr_time_t last = starttime; + apr_time_t lastcheck = 0; + unsigned int i=0, freq=1; + apr_interval_time_t minintvl = updtimeout/10; + apr_interval_time_t maxintvl = minintvl*3; + + char *buf = apr_palloc(p, MIN(len, CACHE_BUF_SIZE)); + if (!buf) { + return APR_ENOMEM; + } + + if(srcoff != 0) { + rc = apr_file_seek(srcfd, APR_SET, &srcoff); + if(rc != APR_SUCCESS) { + return rc; + } + } + + if(destoff != 0) { + rc = apr_file_seek(destfd, APR_SET, &destoff); + if(rc != APR_SUCCESS) { + return rc; + } + } + + /* Tried doing this with mmap, but sendfile on Linux got confused when + sending a file while it was being written to from an mmapped area. + The traditional way seems to be good enough, and less complex. + */ + while(len > 0) { + size=MIN(len, CACHE_BUF_SIZE); + + rc = apr_file_read_full (srcfd, buf, size, NULL); + if(rc != APR_SUCCESS) { + return rc; + } + + /* Do timeout checks before we do the write, this is what other clients + will see. Don't waste resources by calling apr_time_now() on each + iteration. */ + if(i++ % freq == 0) { + apr_time_t now = apr_time_now(); + apr_time_t elapsed = now-last; + + /* Do closer inspection at updtimeout intervals */ + if(now-lastcheck > updtimeout) { + rc = check_destfd_timeout(destfd, now, updtimeout); + if(rc != APR_SUCCESS) { + return rc; + } + lastcheck = now; + } + + if(elapsed > updtimeout) { + if(freq > 1) { + /* The close inspection above will catch a timeout. + If we get here, make sure we recalibrate at which + frequency we should check stuff */ + freq = 1; + } + } + else if(elapsed < minintvl) { + freq <<= 1; + freq |= 1; + } + else if(elapsed > maxintvl && freq > 1) { + freq >>= 1; + } + last = now; + } + + rc = apr_file_write_full(destfd, buf, size, NULL); + if(rc != APR_SUCCESS) { + return rc; + } + len -= size; + } + + /* Make sure we are the one having cached the destfile */ + rc = check_destfd_timeout(destfd, apr_time_now(), updtimeout); + if(rc != APR_SUCCESS) { + return rc; + } + + /* Check if file has changed during copying. This is not 100% foolproof + due to NFS attribute caching when on NFS etc. */ + /* FIXME: Can we assume that we're always copying an entire file? In that + case we can check if the current filesize matches the length + we think it is */ + rc = apr_file_info_get(&finfo, APR_FINFO_MTIME, srcfd); + if(rc != APR_SUCCESS) { + return rc; + } + if(starttime < finfo.mtime) { + return APR_EGENERAL; } - iov[0].iov_base = CRLF; - iov[0].iov_len = sizeof(CRLF) - 1; - rv = apr_file_writev(fd, (const struct iovec *) &iov, 1, - &amt); - return rv; + + return APR_SUCCESS; } -static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *info) + +/* Provide srcfile and srcinfo containing + APR_FINFO_INODE|APR_FINFO_MTIME to make sure we have opened the right file + (someone might have just replaced it which messes up things). +*/ +static apr_status_t copy_body_nofd(apr_pool_t *p, const char *srcfile, + apr_off_t srcoff, apr_finfo_t *srcinfo, + const char *destfile, apr_off_t destoff, + apr_off_t len, + apr_interval_time_t updtimeout) { - disk_cache_conf *conf = ap_get_module_config(r->server->module_config, - &disk_cache_module); - apr_status_t rv; - apr_size_t amt; - disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj; + apr_status_t rc; + apr_file_t *srcfd, *destfd; + apr_finfo_t finfo; - disk_cache_info_t disk_info; - struct iovec iov[2]; + rc = apr_file_open(&srcfd, srcfile, APR_READ | APR_BINARY, 0, p); + if(rc != APR_SUCCESS) { + return rc; + } + rc = apr_file_info_get(&finfo, APR_FINFO_INODE|APR_FINFO_MTIME, srcfd); + if(rc != APR_SUCCESS) { + return rc; + } + /* FIXME: Should probably check device too */ + if(srcinfo->inode != finfo.inode || srcinfo->mtime < finfo.mtime) { + return APR_EGENERAL; + } - /* This is flaky... we need to manage the cache_info differently */ - h->cache_obj->info = *info; + rc = apr_file_open(&destfd, destfile, APR_WRITE | APR_BINARY, 0, p); + if(rc != APR_SUCCESS) { + return rc; + } - if (r->headers_out) { - const char *tmp; + rc = copy_body(p, srcfd, srcoff, destfd, destoff, len, updtimeout); + apr_file_close(srcfd); + if(rc != APR_SUCCESS) { + apr_file_close(destfd); + return rc; + } - tmp = apr_table_get(r->headers_out, "Vary"); + rc = apr_file_close(destfd); - if (tmp) { - apr_array_header_t* varray; - apr_uint32_t format = VARY_FORMAT_VERSION; + /* Set mtime on file */ + apr_file_mtime_set(destfile, finfo.mtime, p); + + return rc; +} - /* If we were initially opened as a vary format, rollback - * that internal state for the moment so we can recreate the - * vary format hints in the appropriate directory. - */ - if (dobj->prefix) { - dobj->hdrsfile = dobj->prefix; - dobj->prefix = NULL; - } - mkdir_structure(conf, dobj->hdrsfile, r->pool); +#if APR_HAS_THREADS +static apr_status_t bgcopy_thread_cleanup(void *data) +{ + copyinfo *ci = data; + apr_status_t rc, ret; + apr_pool_t *p; - rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile, - APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL, - r->pool); + /* FIXME: Debug */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ci->s, + "disk_cache: bgcopy_thread_cleanup: %s -> %s", + ci->srcfile, ci->destfile); - if (rv != APR_SUCCESS) { - return rv; - } + rc = apr_thread_join(&ret, ci->t); + if(rc != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rc, ci->s, + "disk_cache: bgcopy_thread_cleanup: apr_thread_join " + "failed %s -> %s", ci->srcfile, ci->destfile); + return rc; + } + if(ret != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, ret, ci->s, + "disk_cache: Background caching body %s -> %s failed", + ci->srcfile, ci->destfile); + } - amt = sizeof(format); - apr_file_write(dobj->tfd, &format, &amt); + /* FIXME: Debug */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ci->s, + "disk_cache: bgcopy_thread_cleanup: SUCCESS %s -> %s", + ci->srcfile, ci->destfile); - amt = sizeof(info->expire); - apr_file_write(dobj->tfd, &info->expire, &amt); + /* Destroy our private pool */ + p = ci->pool; + apr_pool_destroy(p); + + return APR_SUCCESS; +} - varray = apr_array_make(r->pool, 6, sizeof(char*)); - tokens_to_array(r->pool, tmp, varray); - store_array(dobj->tfd, varray); +static void *bgcopy_thread(apr_thread_t *t, void *data) +{ + copyinfo *ci = data; + apr_pool_t *p; + apr_status_t rc; - apr_file_close(dobj->tfd); + p = apr_thread_pool_get(t); - dobj->tfd = NULL; + /* FIXME: Debug */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ci->s, + "disk_cache: bgcopy_thread: start %s -> %s", + ci->srcfile, ci->destfile); - rv = safe_file_rename(conf, dobj->tempfile, dobj->hdrsfile, - r->pool); - if (rv != APR_SUCCESS) { - ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server, - "disk_cache: rename tempfile to varyfile failed: %s -> %s", - dobj->tempfile, dobj->hdrsfile); - apr_file_remove(dobj->tempfile, r->pool); - return rv; - } + rc = copy_body_nofd(p, ci->srcfile, ci->srcoff, &(ci->srcinfo), + ci->destfile, ci->destoff, ci->len, ci->updtimeout); - dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); - tmp = regen_key(r->pool, r->headers_in, varray, dobj->name); - dobj->prefix = dobj->hdrsfile; - dobj->hashfile = NULL; - dobj->datafile = data_file(r->pool, conf, dobj, tmp); - dobj->hdrsfile = header_file(r->pool, conf, dobj, tmp); - } + if(rc != APR_ETIMEDOUT && rc != APR_SUCCESS) { + apr_file_remove(ci->destfile, p); } + /* FIXME: Debug */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ci->s, + "disk_cache: bgcopy_thread: done %s -> %s", + ci->srcfile, ci->destfile); + + apr_thread_exit(t, rc); + return NULL; +} +#endif /* APR_HAS_THREADS */ + + +#if APR_HAS_FORK +static apr_status_t bgcopy_child_cleanup(void *data) { + copyinfo *ci = data; + int status; + apr_exit_why_e why; + apr_pool_t *p; + + apr_proc_wait(ci->proc, &status, &why, APR_WAIT); + if(why == APR_PROC_EXIT) { + if(status != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, status, ci->s, + "disk_cache: Background caching body %s -> %s failed", + ci->srcfile, ci->destfile); + return APR_SUCCESS; + } + } + else if(status & (APR_PROC_SIGNAL | APR_PROC_SIGNAL_CORE) ) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, ci->s, + "disk_cache: Background caching body %s -> %s failed, " + "caught signal %d", ci->srcfile, ci->destfile, status); + return APR_SUCCESS; + } + + /* Destroy our private pool */ + p = ci->pool; + apr_pool_destroy(p); - rv = apr_file_mktemp(&dobj->hfd, dobj->tempfile, - APR_CREATE | APR_WRITE | APR_BINARY | - APR_BUFFERED | APR_EXCL, r->pool); + return APR_SUCCESS; +} +#endif /* APR_HAS_FORK */ + +static apr_status_t do_bgcopy(apr_file_t *srcfd, apr_off_t srcoff, + apr_file_t *destfd, apr_off_t destoff, + apr_off_t len, apr_interval_time_t updtimeout, + conn_rec *c) +{ + copyinfo *ci; + apr_status_t rv; + apr_pool_t *newpool; + const char *srcfile, *destfile; + int mpm_query_info; + + /* It seems pool gets destroyed (ie. fd's closed) before our cleanup + function is called when an error occurs (a dropped connection, for + example), so we need a pool of our own. + */ + rv = apr_pool_create(&newpool, NULL); if (rv != APR_SUCCESS) { return rv; } - disk_info.format = DISK_FORMAT_VERSION; - disk_info.date = info->date; - disk_info.expire = info->expire; - disk_info.entity_version = dobj->disk_info.entity_version++; - disk_info.request_time = info->request_time; - disk_info.response_time = info->response_time; - disk_info.status = info->status; - - disk_info.name_len = strlen(dobj->name); + ci = apr_palloc(newpool, sizeof(*ci)); + if(ci == NULL) { + apr_pool_destroy(newpool); + return APR_ENOMEM; + } - iov[0].iov_base = (void*)&disk_info; - iov[0].iov_len = sizeof(disk_cache_info_t); - iov[1].iov_base = (void*)dobj->name; - iov[1].iov_len = disk_info.name_len; + rv = apr_file_name_get(&srcfile, srcfd); + if(rv != APR_SUCCESS) { + return rv; + } + rv = apr_file_info_get(&(ci->srcinfo), APR_FINFO_INODE|APR_FINFO_MTIME, + srcfd); + if(rv != APR_SUCCESS) { + return rv; + } - rv = apr_file_writev(dobj->hfd, (const struct iovec *) &iov, 2, &amt); - if (rv != APR_SUCCESS) { + rv = apr_file_name_get(&destfile, destfd); + if(rv != APR_SUCCESS) { return rv; } - if (r->headers_out) { - apr_table_t *headers_out; + ci->pool = newpool; + ci->srcfile = apr_pstrdup(newpool, srcfile); + ci->srcoff = srcoff; + ci->destfile = apr_pstrdup(newpool, destfile); + ci->destoff = destoff; + ci->len = len; + ci->updtimeout = updtimeout; + ci->s = c->base_server; + +#if APR_HAS_THREADS + if(ap_mpm_query(AP_MPMQ_IS_THREADED, &mpm_query_info) == APR_SUCCESS) { + apr_threadattr_t *ta; + apr_thread_t *t; + rv = apr_threadattr_create(&ta, newpool); + if(rv != APR_SUCCESS) { + apr_pool_destroy(newpool); + return rv; + } - headers_out = ap_cache_cacheable_hdrs_out(r->pool, r->headers_out, - r->server); + apr_threadattr_detach_set(ta, FALSE); - if (!apr_table_get(headers_out, "Content-Type") - && r->content_type) { - apr_table_setn(headers_out, "Content-Type", - ap_make_content_type(r, r->content_type)); + /* FIXME: This makes module unloadable on AIX */ +#if 0 +#ifdef AP_MPM_WANT_SET_STACKSIZE + if (ap_thread_stacksize != 0) { + apr_threadattr_stacksize_set(ta, ap_thread_stacksize); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, c->base_server, + "disk_cache: BG thread stacksize set to %" + APR_SIZE_T_FMT, ap_thread_stacksize); } +#endif /* AP_MPM_WANT_SET_STACKSIZE */ +#endif /* 0 */ - headers_out = apr_table_overlay(r->pool, headers_out, - r->err_headers_out); - rv = store_table(dobj->hfd, headers_out); + rv = apr_thread_create (&t, ta, bgcopy_thread, ci, newpool); if (rv != APR_SUCCESS) { + apr_pool_destroy(newpool); return rv; } - } - - /* Parse the vary header and dump those fields from the headers_in. */ - /* FIXME: Make call to the same thing cache_select calls to crack Vary. */ - if (r->headers_in) { - apr_table_t *headers_in; + ci->t = t; - headers_in = ap_cache_cacheable_hdrs_out(r->pool, r->headers_in, - r->server); - rv = store_table(dobj->hfd, headers_in); - if (rv != APR_SUCCESS) { + apr_pool_cleanup_register(c->pool, ci, + bgcopy_thread_cleanup, apr_pool_cleanup_null); + } + else +#endif /* APR_HAS_THREADS */ +#if APR_HAS_FORK + if(ap_mpm_query(AP_MPMQ_IS_FORKED, &mpm_query_info) == APR_SUCCESS) { + ci->proc = apr_palloc(newpool, sizeof(apr_proc_t)); + if(ci->proc == NULL) { + apr_pool_destroy(newpool); + return APR_ENOMEM; + } + rv = apr_proc_fork(ci->proc, newpool); + if(rv == APR_INCHILD) { + /* Child */ + rv = copy_body_nofd(ci->pool, ci->srcfile, ci->srcoff, + &(ci->srcinfo), ci->destfile, ci->destoff, + ci->len, ci->updtimeout); + if(rv != APR_ETIMEDOUT && rv != APR_SUCCESS) { + apr_file_remove(ci->destfile, ci->pool); + } + exit(rv); + } + else if(rv == APR_INPARENT) { + apr_pool_cleanup_register(c->pool, ci, + bgcopy_child_cleanup, + apr_pool_cleanup_null); + } + else { return rv; } } + else +#endif /* APR_HAS_FORK */ + if(1) + { + rv = copy_body(newpool, srcfd, ci->srcoff, destfd, ci->destoff, + ci->len, ci->updtimeout); + apr_pool_destroy(newpool); + } + + return rv; +} - apr_file_close(dobj->hfd); /* flush and close */ - /* Remove old file with the same name. If remove fails, then - * perhaps we need to create the directory tree where we are - * about to write the new headers file. - */ - rv = apr_file_remove(dobj->hdrsfile, r->pool); - if (rv != APR_SUCCESS) { - mkdir_structure(conf, dobj->hdrsfile, r->pool); +static apr_status_t replace_brigade_with_cache(cache_handle_t *h, + request_rec *r, + apr_bucket_brigade *bb) +{ + apr_status_t rv; + apr_bucket *e; + disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj; + + if(dobj->bfd) { + apr_file_close(dobj->bfd); + dobj->bfd = NULL; } - rv = safe_file_rename(conf, dobj->tempfile, dobj->hdrsfile, r->pool); - if (rv != APR_SUCCESS) { - ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server, - "disk_cache: rename tempfile to hdrsfile failed: %s -> %s", - dobj->tempfile, dobj->hdrsfile); - apr_file_remove(dobj->tempfile, r->pool); + rv = open_body_timeout(r, h->cache_obj); + if(rv == CACHE_EDECLINED) { + return APR_ETIMEDOUT; + } + else if(rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: Error opening bodyfile %s for URL %s", + dobj->bodyfile, dobj->name); return rv; } - dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); + /* First, empty the brigade */ + e = APR_BRIGADE_FIRST(bb); + while (e != APR_BRIGADE_SENTINEL(bb)) { + apr_bucket *d; + d = e; + e = APR_BUCKET_NEXT(e); + apr_bucket_delete(d); + } + + /* Then, populate it with our cached instance */ + /* in case we've already sent part, e.g. via mod_proxy */ + dobj->bytes_sent = r->bytes_sent; + + rv = recall_body(h, r->pool, bb); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "disk_cache: Error serving URL %s from cache", dobj->name); + return rv; + } ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "disk_cache: Stored headers for URL %s", dobj->name); + "disk_cache: Serving cached body for URL %s", dobj->name); + return APR_SUCCESS; } + static apr_status_t store_body(cache_handle_t *h, request_rec *r, apr_bucket_brigade *bb) { apr_bucket *e; apr_status_t rv; + int copy_file = FALSE, first_call = FALSE, did_bgcopy = FALSE; disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj; disk_cache_conf *conf = ap_get_module_config(r->server->module_config, &disk_cache_module); - /* We write to a temp file and then atomically rename the file over - * in file_cache_el_final(). - */ - if (!dobj->tfd) { - rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile, - APR_CREATE | APR_WRITE | APR_BINARY | - APR_BUFFERED | APR_EXCL, r->pool); - if (rv != APR_SUCCESS) { - return rv; + if(r->no_cache) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: store_body called for URL %s even though" + "no_cache is set", dobj->name); + file_cache_errorcleanup(dobj, r); + return APR_EGENERAL; + } + + if(dobj->initial_size == 0) { + /* Don't waste a body cachefile on a 0 length body */ + return APR_SUCCESS; + } + + /* Only perform these actions when called the first time */ + if(dobj->bfd == NULL) { + first_call = TRUE; + + if(dobj->lastmod != APR_DATE_BAD) { + apr_finfo_t finfo; + rv = apr_stat(&finfo, dobj->bodyfile, + APR_FINFO_MTIME | APR_FINFO_SIZE | APR_FINFO_CSIZE, + r->pool); + if(rv == APR_SUCCESS || APR_STATUS_IS_INCOMPLETE(rv)) { + /* Dest-file will have same mtime as source if it's + current */ + /* FIXME: This code and the one used in open_body should + probably be identical... */ + if(dobj->lastmod <= finfo.mtime && + dobj->initial_size == finfo.size && + !(finfo.valid & APR_FINFO_CSIZE && finfo.csize < finfo.size)) + { + /* Assume it's a valid cached body there already */ + dobj->skipstore = TRUE; + } + } + } + + if(!dobj->skipstore) { + /* FIXME: We should pass the source file's size and mtime so + open_new_file() can more reliably determine if the target + file is current or stale. */ + rv = open_new_file(r, dobj->bodyfile, &(dobj->bfd), conf); + if(rv == CACHE_EEXIST) { + /* Someone else beat us to storing this */ + dobj->skipstore = TRUE; + } + else if(rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + apr_file_remove(dobj->hdrsfile, r->pool); + return rv; + } + else { + dobj->file_size = 0; + } + } + + if(dobj->skipstore) { + /* Someone else beat us to storing this object */ + if( dobj->initial_size > 0 && + APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb)) ) + { + /* Yay, we can replace the body with the cached instance */ + return replace_brigade_with_cache(h, r, bb); + } + + return APR_SUCCESS; } - dobj->file_size = 0; } - for (e = APR_BRIGADE_FIRST(bb); - e != APR_BRIGADE_SENTINEL(bb); - e = APR_BUCKET_NEXT(e)) + /* Check if this is a complete single sequential file, eligable for + * file copy. + */ + /* FIXME: Make the min size to do file copy run-time config? */ + if(dobj->file_size == 0 && + dobj->initial_size > APR_BUCKET_BUFF_SIZE && + APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb)) ) { - const char *str; - apr_size_t length, written; - rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ); - if (rv != APR_SUCCESS) { - ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, - "cache_disk: Error when reading bucket for URL %s", - h->cache_obj->key); - /* Remove the intermediate cache file and return non-APR_SUCCESS */ - file_cache_errorcleanup(dobj, r); - return rv; + apr_off_t begin = -1; + apr_off_t pos = -1; + apr_file_t *fd = NULL; + apr_bucket_file *a; + + copy_file = TRUE; + + for (e = APR_BRIGADE_FIRST(bb); + e != APR_BRIGADE_SENTINEL(bb); + e = APR_BUCKET_NEXT(e)) + { + if(APR_BUCKET_IS_EOS(e)) { + break; + } + if(!APR_BUCKET_IS_FILE(e)) { + copy_file = FALSE; + break; + } + + a = e->data; + + if(begin < 0) { + begin = pos = e->start; + fd = a->fd; + } + + if(fd != a->fd || pos != e->start) { + copy_file = FALSE; + break; + } + + pos += e->length; } - rv = apr_file_write_full(dobj->tfd, str, length, &written); - if (rv != APR_SUCCESS) { - ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, - "cache_disk: Error when writing cache file for URL %s", - h->cache_obj->key); - /* Remove the intermediate cache file and return non-APR_SUCCESS */ - file_cache_errorcleanup(dobj, r); + + if(copy_file) { + dobj->file_size = pos; + } + } + + if(copy_file) { + apr_bucket_file *a; + + ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, + "disk_cache: Copying body for URL %s, len %" + APR_OFF_T_FMT, dobj->name, dobj->file_size); + + e = APR_BRIGADE_FIRST(bb); + a = e->data; + + if(dobj->file_size > conf->minbgsize) { + rv = do_bgcopy(a->fd, e->start, dobj->bfd, 0, dobj->file_size, + dobj->updtimeout, r->connection); + did_bgcopy = TRUE; + } + else { + rv = copy_body(r->pool, a->fd, e->start, dobj->bfd, 0, + dobj->file_size, dobj->updtimeout); + } + if(rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: Copying body failed, " + "URL %s", dobj->name); + if(rv != APR_ETIMEDOUT) { + file_cache_errorcleanup(dobj, r); + apr_file_remove(dobj->bodyfile, r->pool); + } return rv; } - dobj->file_size += written; - if (dobj->file_size > conf->maxfs) { - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "cache_disk: URL %s failed the size check " - "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")", - h->cache_obj->key, dobj->file_size, conf->maxfs); - /* Remove the intermediate cache file and return non-APR_SUCCESS */ - file_cache_errorcleanup(dobj, r); - return APR_EGENERAL; + + } + else { + if(first_call) { + ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, + "disk_cache: Caching body for URL %s, len %" + APR_OFF_T_FMT, dobj->name, dobj->initial_size); + } + + for (e = APR_BRIGADE_FIRST(bb); + e != APR_BRIGADE_SENTINEL(bb); + e = APR_BUCKET_NEXT(e)) + { + const char *str; + apr_size_t length, written; + + /* Ignore the non-data-buckets */ + if(APR_BUCKET_IS_METADATA(e)) { + continue; + } + + rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: Error when reading bucket for URL %s", + dobj->name); + file_cache_errorcleanup(dobj, r); + apr_file_remove(dobj->hdrsfile, r->pool); + apr_file_remove(dobj->bodyfile, r->pool); + return rv; + } + rv = apr_file_write_full(dobj->bfd, str, length, &written); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: Error when writing cache file for " + "URL %s", dobj->name); + file_cache_errorcleanup(dobj, r); + apr_file_remove(dobj->hdrsfile, r->pool); + apr_file_remove(dobj->bodyfile, r->pool); + return rv; + } + dobj->file_size += written; + if (dobj->file_size > conf->maxfs) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: URL %s failed the size check " + "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")", + dobj->name, dobj->file_size, conf->maxfs); + file_cache_errorcleanup(dobj, r); + apr_file_remove(dobj->hdrsfile, r->pool); + apr_file_remove(dobj->bodyfile, r->pool); + return APR_EGENERAL; + } } } - /* Was this the final bucket? If yes, close the temp file and perform - * sanity checks. - */ - if (APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb))) { - if (r->connection->aborted || r->no_cache) { + + /* Drop out here if this wasn't the end */ + if (!APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb))) { + return APR_SUCCESS; + } + + if(!copy_file) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: Done caching URL %s, len %" APR_OFF_T_FMT, + dobj->name, dobj->file_size); + + /* FIXME: Do we really need to check r->no_cache here since we checked + it in the beginning? */ + /* Assume that if we've got an initial size then bucket brigade + was complete and there's no danger in keeping it even if the + connection was aborted */ + if (r->no_cache || (r->connection->aborted && dobj->initial_size < 0)) { ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, "disk_cache: Discarding body for URL %s " "because connection has been aborted.", - h->cache_obj->key); + dobj->name); /* Remove the intermediate cache file and return non-APR_SUCCESS */ file_cache_errorcleanup(dobj, r); + apr_file_remove(dobj->hdrsfile, r->pool); + apr_file_remove(dobj->bodyfile, r->pool); return APR_EGENERAL; } + if (dobj->file_size < conf->minfs) { ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "cache_disk: URL %s failed the size check " + "disk_cache: URL %s failed the size check " "(%" APR_OFF_T_FMT " < %" APR_OFF_T_FMT ")", - h->cache_obj->key, dobj->file_size, conf->minfs); + dobj->name, dobj->file_size, conf->minfs); /* Remove the intermediate cache file and return non-APR_SUCCESS */ file_cache_errorcleanup(dobj, r); + apr_file_remove(dobj->hdrsfile, r->pool); + apr_file_remove(dobj->bodyfile, r->pool); return APR_EGENERAL; } - /* All checks were fine. Move tempfile to final destination */ - /* Link to the perm file, and close the descriptor */ - file_cache_el_final(dobj, r); - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "disk_cache: Body for URL %s cached.", dobj->name); + if(dobj->initial_size < 0) { + /* Update header information now that we know the size */ + dobj->initial_size = dobj->file_size; + rv = store_headers(h, r, &(h->cache_obj->info)); + if(rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + apr_file_remove(dobj->hdrsfile, r->pool); + apr_file_remove(dobj->bodyfile, r->pool); + return rv; + } + } + else if(dobj->initial_size != dobj->file_size) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: URL %s - body size mismatch: suggested %" + APR_OFF_T_FMT " file_size %" APR_OFF_T_FMT ")", + dobj->name, dobj->initial_size, dobj->file_size); + file_cache_errorcleanup(dobj, r); + apr_file_remove(dobj->hdrsfile, r->pool); + apr_file_remove(dobj->bodyfile, r->pool); + return APR_EGENERAL; + } + } + + /* All checks were fine, close output file */ + rv = apr_file_close(dobj->bfd); + dobj->bfd = NULL; + if(rv != APR_SUCCESS) { + apr_file_remove(dobj->bodyfile, r->pool); + file_cache_errorcleanup(dobj, r); + return rv; + } + + /* Set mtime on body file */ + if(!did_bgcopy && dobj->lastmod != APR_DATE_BAD) { + apr_file_mtime_set(dobj->bodyfile, dobj->lastmod, r->pool); + } + + + /* Redirect to cachefile if we copied a plain file */ + if(copy_file) { + rv = replace_brigade_with_cache(h, r, bb); + if(rv != APR_SUCCESS) { + return rv; + } } return APR_SUCCESS; @@ -1064,6 +2811,8 @@ static void *create_config(apr_pool_t *p conf->dirlength = DEFAULT_DIRLENGTH; conf->maxfs = DEFAULT_MAX_FILE_SIZE; conf->minfs = DEFAULT_MIN_FILE_SIZE; + conf->updtimeout = DEFAULT_UPDATE_TIMEOUT; + conf->minbgsize = DEFAULT_MIN_BACKGROUND_SIZE; conf->cache_root = NULL; conf->cache_root_len = 0; @@ -1105,6 +2854,7 @@ static const char conf->dirlevels = val; return NULL; } + static const char *set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr, const char *arg) { @@ -1144,9 +2894,65 @@ static const char { return "CacheMaxFileSize argument must be a non-negative integer representing the max size of a file to cache in bytes."; } + + return NULL; +} + + +static const char +*set_cache_updtimeout(cmd_parms *parms, void *in_struct_ptr, const char *arg) +{ + apr_int64_t val; + disk_cache_conf *conf = ap_get_module_config(parms->server->module_config, + &disk_cache_module); + + if (apr_strtoff(&val, arg, NULL, 0) != APR_SUCCESS || val < 0) + { + return "CacheUpdateTimeout argument must be a non-negative integer representing the timeout in milliseconds for cache update operations"; + } + + conf->updtimeout = val * 1000; + + return NULL; +} + + +static const char +*set_cache_minbgsize(cmd_parms *parms, void *in_struct_ptr, const char *arg) +{ + disk_cache_conf *conf = ap_get_module_config(parms->server->module_config, + &disk_cache_module); + + if (apr_strtoff(&conf->minbgsize, arg, NULL, 0) != APR_SUCCESS || + conf->minbgsize < 0) + { + return "CacheMinBGSize argument must be a non-negative integer representing the min size in bytes for a file to be eligable for background caching"; + } + + return NULL; +} + + +static const char +*set_cache_removedirs(cmd_parms *parms, void *in_struct_ptr, const char *arg) +{ + disk_cache_conf *conf = ap_get_module_config(parms->server->module_config, + &disk_cache_module); + + if (strcasecmp(arg, "on") == 0 || strcasecmp(arg, "true")) { + conf->removedirs = TRUE; + } + else if (strcasecmp(arg, "off") == 0 || strcasecmp(arg, "false")) { + conf->removedirs = FALSE; + } + else { + return "CacheRemoveDirectories argument must be either on, true, off or false"; + } + return NULL; } + static const command_rec disk_cache_cmds[] = { AP_INIT_TAKE1("CacheRoot", set_cache_root, NULL, RSRC_CONF, @@ -1159,6 +2965,12 @@ static const command_rec disk_cache_cmds "The minimum file size to cache a document"), AP_INIT_TAKE1("CacheMaxFileSize", set_cache_maxfs, NULL, RSRC_CONF, "The maximum file size to cache a document"), + AP_INIT_TAKE1("CacheUpdateTimeout", set_cache_updtimeout, NULL, RSRC_CONF, + "Timeout in ms for cache updates"), + AP_INIT_TAKE1("CacheMinBGSize", set_cache_minbgsize, NULL, RSRC_CONF, + "The minimum file size for background caching"), + AP_INIT_TAKE1("CacheRemoveDirectories", set_cache_removedirs, NULL, RSRC_CONF, + "Should we try to remove directories when we remove expired cache files."), {NULL} }; diff -rup ../dist/modules/cache/mod_disk_cache.h ./modules/cache/mod_disk_cache.h --- ../dist/modules/cache/mod_disk_cache.h 2006-10-13 01:17:43.000000000 +0200 +++ ./modules/cache/mod_disk_cache.h 2008-06-11 13:39:12.000000000 +0200 @@ -22,12 +22,26 @@ */ #define VARY_FORMAT_VERSION 3 -#define DISK_FORMAT_VERSION 4 +#define DISK_FORMAT_VERSION_OLD 4 +#define DISK_FORMAT_VERSION_OLD2 5 +#define DISK_FORMAT_VERSION_OLD3 7 +#define DISK_FORMAT_VERSION 8 #define CACHE_HEADER_SUFFIX ".header" -#define CACHE_DATA_SUFFIX ".data" +#define CACHE_BODY_SUFFIX ".body" #define CACHE_VDIR_SUFFIX ".vary" +/* Size of buffer used when copying files */ +#define CACHE_BUF_SIZE 262144 + +/* How much the file on disk must have grown beyond the current offset + before diskcache_bucket_read breaks out of the stat/sleep-loop */ +#define CACHE_BUCKET_MINCHUNK 524288 + +/* How long to sleep before retrying while looping (micro-seconds) */ +#define CACHE_LOOP_MINSLEEP 10000 +#define CACHE_LOOP_MAXSLEEP 1000000 + #define AP_TEMPFILE_PREFIX "/" #define AP_TEMPFILE_BASE "aptmp" #define AP_TEMPFILE_SUFFIX "XXXXXX" @@ -35,22 +49,41 @@ #define AP_TEMPFILE_NAMELEN strlen(AP_TEMPFILE_BASE AP_TEMPFILE_SUFFIX) #define AP_TEMPFILE AP_TEMPFILE_PREFIX AP_TEMPFILE_BASE AP_TEMPFILE_SUFFIX +typedef apr_uint32_t disk_cache_format_t; + typedef struct { - /* Indicates the format of the header struct stored on-disk. */ - apr_uint32_t format; /* The HTTP status code returned for this response. */ - int status; - /* The size of the entity name that follows. */ - apr_size_t name_len; + apr_int32_t status; /* The number of times we've cached this entity. */ - apr_size_t entity_version; + apr_uint32_t entity_version; /* Miscellaneous time values. */ apr_time_t date; apr_time_t expire; apr_time_t request_time; apr_time_t response_time; + apr_time_t lastmod; /* Last-Modified (if present) */ + + /* The body size forced to 64bit to not break when people go from non-LFS + * to LFS builds */ + apr_int64_t file_size; + + /* The size of the entity name that follows. */ + apr_uint32_t name_len; + /* The size of the body cache filename */ + apr_uint32_t bodyname_len; + /* The size of the filename that follows, to fill in r->filename */ + apr_uint32_t filename_len; + + /* On disk: + * name_len long string of entity name. + * bodyname_len long string of body cache filename (without cacheroot). + * filename_len long string of filename + */ } disk_cache_info_t; + +/* Don't expose module-related stuff unless needed */ +#ifdef AP_FILTER_H /* * disk_cache_object_t * Pointed to by cache_object_t::vobj @@ -58,18 +91,41 @@ typedef struct { typedef struct disk_cache_object { const char *root; /* the location of the cache directory */ apr_size_t root_len; - char *tempfile; /* temp file tohold the content */ - const char *prefix; - const char *datafile; /* name of file where the data will go */ - const char *hdrsfile; /* name of file where the hdrs will go */ - const char *hashfile; /* Computed hash key for this URI */ - const char *name; /* Requested URI without vary bits - suitable for mortals. */ - const char *key; /* On-disk prefix; URI with Vary bits (if present) */ - apr_file_t *fd; /* data file */ - apr_file_t *hfd; /* headers file */ - apr_file_t *tfd; /* temporary file for data */ - apr_off_t file_size; /* File size of the cached data file */ - disk_cache_info_t disk_info; /* Header information. */ + + /* Temporary file */ + apr_file_t *tfd; + char *tempfile; + + /* Header cache file */ + apr_file_t *hfd; + const char *hdrsfile; + + /* Body cache file */ + apr_file_t *bfd; + const char *bodyfile; + + const char *name; /* Requested URI without vary bits - + suitable for mortals. */ + const char *prefix; /* Prefix to deal with Vary headers */ + char *filename; /* Filename of requested URL (if present) */ + char **rfilename; /* Pointer to r->filename */ + + apr_off_t initial_size; /* Size of body as reported upstreams */ + apr_off_t file_size; /* File size of the cached body */ + + apr_time_t lastmod; /* Last-Modified (if present) */ + + int skipstore; /* Set if we should skip storing stuff */ + + int removedirs; /* Set it we should rmdir when doing rm */ + + int header_only; /* Copy of r->header_only */ + + apr_interval_time_t updtimeout; /* Cache update timeout */ + + disk_cache_info_t disk_info; /* Disk header information. */ + + apr_off_t bytes_sent; /* Copy of r->bytes_sent before calling recall_body */ } disk_cache_object_t; @@ -82,14 +138,70 @@ typedef struct disk_cache_object { #define DEFAULT_DIRLENGTH 2 #define DEFAULT_MIN_FILE_SIZE 1 #define DEFAULT_MAX_FILE_SIZE 1000000 +/* Background caching disabled by default */ +#define DEFAULT_MIN_BACKGROUND_SIZE DEFAULT_MAX_FILE_SIZE +#define DEFAULT_UPDATE_TIMEOUT apr_time_from_sec(10) typedef struct { const char* cache_root; apr_size_t cache_root_len; int dirlevels; /* Number of levels of subdirectories */ int dirlength; /* Length of subdirectory names */ - apr_off_t minfs; /* minimum file size for cached files */ + apr_off_t minfs; /* minumum file size for cached files */ apr_off_t maxfs; /* maximum file size for cached files */ + apr_off_t minbgsize; /* minimum file size to do bg caching */ + apr_interval_time_t updtimeout; /* Cache update timeout */ + int removedirs; /* Should we try to remove directories? */ } disk_cache_conf; +typedef struct diskcache_bucket_data diskcache_bucket_data; +struct diskcache_bucket_data { + /** Number of buckets using this memory */ + apr_bucket_refcount refcount; + apr_file_t *fd; + /** The pool into which any needed structures should + * be created while reading from this file bucket */ + apr_pool_t *readpool; + /* Cache update timeout */ + apr_interval_time_t updtimeout; + /* Adaptive loop delay timeout */ + apr_interval_time_t polldelay; +}; + +/* Stuff needed by the background copy thread */ +typedef struct copyinfo copyinfo; +struct copyinfo { + apr_off_t len; + /* Source info */ + const char *srcfile; + apr_finfo_t srcinfo; + apr_off_t srcoff; + /* Destination info */ + const char *destfile; + apr_off_t destoff; + + /* Cache update timeout */ + apr_interval_time_t updtimeout; + + /* Our private pool */ + apr_pool_t *pool; + +#if APR_HAS_THREADS + /* Background process info */ + apr_thread_t *t; +#endif /* APR_HAS_THREADS */ +#if APR_HAS_FORK + apr_proc_t *proc; +#endif /* APR_HAS_FORK */ + + /* For logging */ + const server_rec *s; +}; + +#define CACHE_ENODATA (APR_OS_START_USERERR+1) +#define CACHE_EDECLINED (APR_OS_START_USERERR+2) +#define CACHE_EEXIST (APR_OS_START_USERERR+3) + +#endif /* AP_FILTER_H */ + #endif /*MOD_DISK_CACHE_H*/ diff -rup ../dist/support/htcacheclean.c ./support/htcacheclean.c --- ../dist/support/htcacheclean.c 2007-07-25 15:13:49.000000000 +0200 +++ ./support/htcacheclean.c 2008-06-11 15:20:24.000000000 +0200 @@ -70,10 +70,8 @@ typedef struct _direntry { APR_RING_ENTRY(_direntry) link; int type; /* type of file/fileset: TEMP, HEADER, DATA, HEADERDATA */ - apr_time_t htime; /* headers file modification time */ - apr_time_t dtime; /* body file modification time */ - apr_off_t hsize; /* headers file size */ - apr_off_t dsize; /* body or temporary file size */ + apr_time_t htime; /* file modification time */ + apr_off_t hsize; /* file size */ char *basename; /* file/fileset base name */ } DIRENTRY; @@ -81,11 +79,10 @@ typedef struct _entry { APR_RING_ENTRY(_entry) link; apr_time_t expire; /* cache entry exiration time */ apr_time_t response_time; /* cache entry time of last response to client */ - apr_time_t htime; /* headers file modification time */ - apr_time_t dtime; /* body file modification time */ - apr_off_t hsize; /* headers file size */ - apr_off_t dsize; /* body or temporary file size */ + apr_time_t htime; /* file modification time */ + apr_off_t hsize; /* file size */ char *basename; /* fileset base name */ + char *name; /* entity name */ } ENTRY; @@ -255,10 +252,7 @@ static void delete_entry(char *path, cha /* temp pool, otherwise lots of memory could be allocated */ apr_pool_create(&p, pool); - nextpath = apr_pstrcat(p, path, "/", basename, CACHE_HEADER_SUFFIX, NULL); - apr_file_remove(nextpath, p); - - nextpath = apr_pstrcat(p, path, "/", basename, CACHE_DATA_SUFFIX, NULL); + nextpath = apr_pstrcat(p, path, "/", basename, NULL); apr_file_remove(nextpath, p); apr_pool_destroy(p); @@ -286,13 +280,14 @@ static int process_dir(char *path, apr_p apr_finfo_t info; apr_size_t len; apr_time_t current, deviation; - char *nextpath, *base, *ext, *orig_basename; + char *nextpath, *base, *orig_basename; APR_RING_ENTRY(_direntry) anchor; DIRENTRY *d, *t, *n; ENTRY *e; int skip, retries; disk_cache_info_t disk_info; + APR_RING_INIT(&anchor, _direntry, link); apr_pool_create(&p, pool); h = apr_hash_make(p); @@ -329,12 +324,11 @@ static int process_dir(char *path, apr_p if (!base++) { base = d->basename; } - ext = strchr(base, '.'); /* there may be temporary files which may be gone before * processing, always skip these if not in realclean mode */ - if (!ext && !realclean) { + if (!realclean) { if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN) && strlen(base) == AP_TEMPFILE_NAMELEN) { continue; @@ -386,51 +380,30 @@ static int process_dir(char *path, apr_p continue; } - if (!ext) { + if(strncasecmp(base + strlen(base) - sizeof(CACHE_HEADER_SUFFIX) + 1, CACHE_HEADER_SUFFIX, sizeof(CACHE_HEADER_SUFFIX))) { + continue; + } + + if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN) && strlen(base) == AP_TEMPFILE_NAMELEN) { d->basename += skip; d->type = TEMP; - d->dsize = info.size; + d->hsize = info.size; apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d); - } continue; } - if (!strcasecmp(ext, CACHE_HEADER_SUFFIX)) { - *ext = '\0'; + /* Assume that everything else are cachefiles */ d->basename += skip; - /* if a user manually creates a '.header' file */ - if (d->basename[0] == '\0') { - continue; - } t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING); if (t) { d = t; } - d->type |= HEADER; + d->type |= HEADERDATA; d->htime = info.mtime; d->hsize = info.size; apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d); - continue; - } - - if (!strcasecmp(ext, CACHE_DATA_SUFFIX)) { - *ext = '\0'; - d->basename += skip; - /* if a user manually creates a '.data' file */ - if (d->basename[0] == '\0') { - continue; - } - t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING); - if (t) { - d = t; - } - d->type |= DATA; - d->dtime = info.mtime; - d->dsize = info.size; - apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d); - } } if (interrupted) { @@ -448,32 +421,40 @@ static int process_dir(char *path, apr_p switch(d->type) { case HEADERDATA: - nextpath = apr_pstrcat(p, path, "/", d->basename, - CACHE_HEADER_SUFFIX, NULL); + nextpath = apr_pstrcat(p, path, "/", d->basename, NULL); if (apr_file_open(&fd, nextpath, APR_FOPEN_READ | APR_FOPEN_BINARY, APR_OS_DEFAULT, p) == APR_SUCCESS) { len = sizeof(format); if (apr_file_read_full(fd, &format, len, &len) == APR_SUCCESS) { - if (format == DISK_FORMAT_VERSION) { - apr_off_t offset = 0; - - apr_file_seek(fd, APR_SET, &offset); + if (format == DISK_FORMAT_VERSION) { + apr_off_t offset; len = sizeof(disk_cache_info_t); if (apr_file_read_full(fd, &disk_info, len, &len) == APR_SUCCESS) { - apr_file_close(fd); e = apr_palloc(pool, sizeof(ENTRY)); APR_RING_INSERT_TAIL(&root, e, _entry, link); - e->expire = disk_info.expire; + e->expire = disk_info.expire; e->response_time = disk_info.response_time; - e->htime = d->htime; - e->dtime = d->dtime; - e->hsize = d->hsize; - e->dsize = d->dsize; + e->htime = disk_info.date; + e->hsize = disk_info.file_size; e->basename = apr_pstrdup(pool, d->basename); + e->name = apr_palloc(pool, disk_info.bodyname_len+1); + offset = disk_info.name_len; + apr_file_seek(fd, APR_CUR, &offset); + if(apr_file_read_full(fd, e->name, + disk_info.bodyname_len, NULL) + == APR_SUCCESS) + { + e->name[disk_info.bodyname_len] = '\0'; + } + else { + e->name = "UNKNOWN"; + } + apr_file_close(fd); + break; } else { @@ -482,11 +463,11 @@ static int process_dir(char *path, apr_p } else if (format == VARY_FORMAT_VERSION) { /* This must be a URL that added Vary headers later, - * so kill the orphaned .data file + * so kill the orphaned cachefile */ apr_file_close(fd); apr_file_remove(apr_pstrcat(p, path, "/", d->basename, - CACHE_DATA_SUFFIX, NULL), + NULL), p); } } @@ -505,58 +486,10 @@ static int process_dir(char *path, apr_p current = apr_time_now(); if (realclean || d->htime < current - deviation || d->htime > current + deviation) { - delete_entry(path, d->basename, p); + apr_file_printf(errfile, "header time %lld, current time %lld, deviation time %lld" + APR_EOL_STR, d->htime, current, deviation ); + delete_entry(path, d->basename, p); unsolicited += d->hsize; - unsolicited += d->dsize; - } - break; - - /* single data and header files may be deleted either in realclean - * mode or if their modification timestamp is not within a - * specified positive or negative offset to the current time. - * this handling is necessary due to possible race conditions - * between apache and this process - */ - case HEADER: - current = apr_time_now(); - nextpath = apr_pstrcat(p, path, "/", d->basename, - CACHE_HEADER_SUFFIX, NULL); - if (apr_file_open(&fd, nextpath, APR_FOPEN_READ | APR_FOPEN_BINARY, - APR_OS_DEFAULT, p) == APR_SUCCESS) { - len = sizeof(format); - if (apr_file_read_full(fd, &format, len, - &len) == APR_SUCCESS) { - if (format == VARY_FORMAT_VERSION) { - apr_time_t expires; - - len = sizeof(expires); - - apr_file_read_full(fd, &expires, len, &len); - - apr_file_close(fd); - - if (expires < current) { - delete_entry(path, d->basename, p); - } - break; - } - } - apr_file_close(fd); - } - - if (realclean || d->htime < current - deviation - || d->htime > current + deviation) { - delete_entry(path, d->basename, p); - unsolicited += d->hsize; - } - break; - - case DATA: - current = apr_time_now(); - if (realclean || d->dtime < current - deviation - || d->dtime > current + deviation) { - delete_entry(path, d->basename, p); - unsolicited += d->dsize; } break; @@ -565,7 +498,7 @@ static int process_dir(char *path, apr_p */ case TEMP: delete_file(path, d->basename, p); - unsolicited += d->dsize; + unsolicited += d->hsize; break; } } @@ -595,6 +528,7 @@ static void purge(char *path, apr_pool_t apr_off_t sum, total, entries, etotal; ENTRY *e, *n, *oldest; + sum = 0; entries = 0; @@ -602,7 +536,6 @@ static void purge(char *path, apr_pool_t e != APR_RING_SENTINEL(&root, _entry, link); e = APR_RING_NEXT(e, link)) { sum += e->hsize; - sum += e->dsize; entries++; } @@ -621,10 +554,10 @@ static void purge(char *path, apr_pool_t for (e = APR_RING_FIRST(&root); e != APR_RING_SENTINEL(&root, _entry, link) && !interrupted;) { n = APR_RING_NEXT(e, link); - if (e->response_time > now || e->htime > now || e->dtime > now) { + if (e->response_time > now || e->htime > now ) { delete_entry(path, e->basename, pool); + delete_entry(path, e->name, pool); sum -= e->hsize; - sum -= e->dsize; entries--; APR_RING_REMOVE(e, link); if (sum <= max) { @@ -647,8 +580,11 @@ static void purge(char *path, apr_pool_t n = APR_RING_NEXT(e, link); if (e->expire != APR_DATE_BAD && e->expire < now) { delete_entry(path, e->basename, pool); + delete_entry(path, e->name, pool); + if(verbose >= 1) { + apr_file_printf(errfile, "Expired: %s\n", e->name); + } sum -= e->hsize; - sum -= e->dsize; entries--; APR_RING_REMOVE(e, link); if (sum <= max) { @@ -676,14 +612,20 @@ static void purge(char *path, apr_pool_t for (e = APR_RING_NEXT(oldest, link); e != APR_RING_SENTINEL(&root, _entry, link); e = APR_RING_NEXT(e, link)) { - if (e->dtime < oldest->dtime) { + if (e->htime < oldest->htime) { oldest = e; } } delete_entry(path, oldest->basename, pool); + delete_entry(path, oldest->name, pool); + if(verbose >= 1) { + apr_file_printf(errfile, "Old: (%d s) " + "(%" APR_OFF_T_FMT " b) %s\n", + (int) apr_time_sec(apr_time_now() - oldest->htime), + oldest->hsize, oldest->name); + } sum -= oldest->hsize; - sum -= oldest->dsize; entries--; APR_RING_REMOVE(oldest, link); } @@ -831,10 +773,7 @@ int main(int argc, const char * const ar break; case 'v': - if (verbose) { - usage(); - } - verbose = 1; + verbose++; break; case 'r': @@ -985,7 +924,7 @@ int main(int argc, const char * const ar break; } - if (dowork && !interrupted) { + if (dowork && !interrupted) { if (!process_dir(path, instance) && !interrupted) { purge(path, instance, max); }