diff -rup dist/modules/cache/mod_disk_cache.c site/modules/cache/mod_disk_cache.c --- dist/modules/cache/mod_disk_cache.c 2006-07-12 05:38:44.000000000 +0200 +++ site/modules/cache/mod_disk_cache.c 2006-08-29 13:38:47.000000000 +0200 @@ -22,20 +22,21 @@ #include "util_filter.h" #include "util_script.h" #include "util_charset.h" +#include "ap_mpm.h" +#include "mpm_common.h" /* * mod_disk_cache: Disk Based HTTP 1.1 Cache. * - * Flow to Find the .data file: + * Flow to Find the right cache file: * Incoming client requests URI /foo/bar/baz * Generate off of /foo/bar/baz - * Open .header - * Read in .header file (may contain Format #1 or Format #2) + * Open + * Read in file (may contain Format #1 or Format #2) * If format #1 (Contains a list of Vary Headers): - * Use each header name (from .header) with our request values (headers_in) to + * Use each header name with our request values (headers_in) to * regenerate using HeaderName+HeaderValue+.../foo/bar/baz - * re-read in .header (must be format #2) - * read in .data + * re-read in (must be format #2) * * Format #1: * apr_uint32_t format; @@ -45,10 +46,13 @@ * Format #2: * disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format) * entity name (dobj->name) [length is in disk_cache_info_t->name_len] + * filename (r->filename) [length is in disk_cache_info_t->filename_len] * r->headers_out (delimited by CRLF) * CRLF * r->headers_in (delimited by CRLF) * CRLF + * + * Data is stored at the offset specified in disk_cache_info_t. */ module AP_MODULE_DECLARE_DATA disk_cache_module; @@ -62,72 +66,318 @@ static apr_status_t recall_body(cache_ha static apr_status_t read_array(request_rec *r, apr_array_header_t* arr, apr_file_t *file); + /* - * Local static functions + * Modified file bucket implementation to be able to deliver files + * while caching. */ -static char *header_file(apr_pool_t *p, disk_cache_conf *conf, - disk_cache_object_t *dobj, const char *name) +/* Derived from apr_buckets_file.c */ + +#define BUCKET_IS_CACHE(e) ((e)->type == &bucket_type_diskcache) +APU_DECLARE_DATA const apr_bucket_type_t bucket_type_diskcache; + + +typedef struct diskcache_bucket_data diskcache_bucket_data; + +struct diskcache_bucket_data { + /** Number of buckets using this memory */ + apr_bucket_refcount refcount; + apr_file_t *fd; + /** The pool into which any needed structures should + * be created while reading from this file bucket */ + apr_pool_t *readpool; + /* Cache update timeout */ + apr_interval_time_t updtimeout; + +}; + +static void diskcache_bucket_destroy(void *data) { - if (!dobj->hashfile) { - dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels, - conf->dirlength, name); + diskcache_bucket_data *f = data; + + if (apr_bucket_shared_destroy(f)) { + /* no need to close files here; it will get + * done automatically when the pool gets cleaned up */ + apr_bucket_free(f); + } +} + + +/* The idea here is to convert diskcache buckets to regular file buckets + as data becomes available */ +/* FIXME: Maybe we should care about the block argument... */ +static apr_status_t diskcache_bucket_read(apr_bucket *e, const char **str, + apr_size_t *len, + apr_read_type_e block) +{ + diskcache_bucket_data *a = e->data; + apr_file_t *f = a->fd; + apr_bucket *b = NULL; + char *buf; + apr_status_t rv; + apr_finfo_t finfo; + apr_size_t filelength = e->length; /* bytes remaining in file past offset */ + apr_off_t fileoffset = e->start; + apr_off_t fileend; + apr_size_t available; +#if APR_HAS_THREADS && !APR_HAS_XTHREAD_FILES + apr_int32_t flags; +#endif + +#if APR_HAS_THREADS && !APR_HAS_XTHREAD_FILES + if ((flags = apr_file_flags_get(f)) & APR_XTHREAD) { + /* this file descriptor is shared across multiple threads and + * this OS doesn't support that natively, so as a workaround + * we must reopen the file into a->readpool */ + const char *fname; + apr_file_name_get(&fname, f); + + rv = apr_file_open(&f, fname, (flags & ~APR_XTHREAD), 0, a->readpool); + if (rv != APR_SUCCESS) + return rv; + + a->fd = f; + } +#endif + + /* in case we die prematurely */ + *str = NULL; + *len = 0; + + /* DEBUG + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, + "disk_cache: Called diskcache_bucket_read"); + */ + + while(1) { + /* Figure out how big the file is right now, sit here until + it's grown enough or we get bored */ + fileend = 0; + rv = apr_file_seek(f, APR_END, &fileend); + if(rv != APR_SUCCESS) { + return rv; + } + + if(fileend >= fileoffset + MIN(filelength, CACHE_BUF_SIZE)) { + break; + } + + rv = apr_file_info_get(&finfo, APR_FINFO_MTIME, f); + if(rv != APR_SUCCESS || + finfo.mtime < (apr_time_now() - a->updtimeout) ) + { + return APR_EGENERAL; + } + apr_sleep(CACHE_LOOP_SLEEP); + } + + /* Convert this bucket to a zero-length heap bucket so we won't be called + again */ + buf = apr_bucket_alloc(0, e->list); + apr_bucket_heap_make(e, buf, 0, apr_bucket_free); + + /* Wrap as much as possible into a regular file bucket */ + available = MIN(filelength, fileend-fileoffset); + b = apr_bucket_file_create(f, fileoffset, available, a->readpool, e->list); + APR_BUCKET_INSERT_AFTER(e, b); + + /* DEBUG + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, + "disk_cache: diskcache_bucket_read: Converted to regular file" + " off %" APR_OFF_T_FMT " len %" APR_SIZE_T_FMT, + fileoffset, available); + */ + + + /* Put any remains in yet another bucket */ + if(available < filelength) { + e=b; + /* for efficiency, we can just build a new apr_bucket struct + * to wrap around the existing bucket */ + b = apr_bucket_alloc(sizeof(*b), e->list); + b->start = fileoffset + available; + b->length = filelength - available; + b->data = a; + b->type = &bucket_type_diskcache; + b->free = apr_bucket_free; + b->list = e->list; + APR_BUCKET_INSERT_AFTER(e, b); + } + else { + diskcache_bucket_destroy(a); } - if (dobj->prefix) { - return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/", - dobj->hashfile, CACHE_HEADER_SUFFIX, NULL); - } - else { - return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile, - CACHE_HEADER_SUFFIX, NULL); - } + *str = buf; + return APR_SUCCESS; } -static char *data_file(apr_pool_t *p, disk_cache_conf *conf, - disk_cache_object_t *dobj, const char *name) +static apr_bucket * diskcache_bucket_make(apr_bucket *b, + apr_file_t *fd, + apr_off_t offset, + apr_size_t len, + apr_interval_time_t timeout, + apr_pool_t *p) { - if (!dobj->hashfile) { - dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels, - conf->dirlength, name); + diskcache_bucket_data *f; + + f = apr_bucket_alloc(sizeof(*f), b->list); + f->fd = fd; + f->readpool = p; + f->updtimeout = timeout; + + b = apr_bucket_shared_make(b, f, offset, len); + b->type = &bucket_type_diskcache; + + return b; +} + +static apr_bucket * diskcache_bucket_create(apr_file_t *fd, + apr_off_t offset, + apr_size_t len, + apr_interval_time_t timeout, + apr_pool_t *p, + apr_bucket_alloc_t *list) +{ + apr_bucket *b = apr_bucket_alloc(sizeof(*b), list); + + APR_BUCKET_INIT(b); + b->free = apr_bucket_free; + b->list = list; + return diskcache_bucket_make(b, fd, offset, len, timeout, p); +} + + +/* FIXME: This is probably only correct for the first case, that seems + to be the one that occurs all the time... */ +static apr_status_t diskcache_bucket_setaside(apr_bucket *data, + apr_pool_t *reqpool) +{ + diskcache_bucket_data *a = data->data; + apr_file_t *fd = NULL; + apr_file_t *f = a->fd; + apr_pool_t *curpool = apr_file_pool_get(f); + + if (apr_pool_is_ancestor(curpool, reqpool)) { + return APR_SUCCESS; + } + + if (!apr_pool_is_ancestor(a->readpool, reqpool)) { + /* FIXME: Debug */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, + "disk_cache: diskcache_bucket_setaside: apa2"); + a->readpool = reqpool; + } + + /* FIXME: Debug */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, + "disk_cache: diskcache_bucket_setaside: apa3"); + + apr_file_setaside(&fd, f, reqpool); + a->fd = fd; + return APR_SUCCESS; +} + +APU_DECLARE_DATA const apr_bucket_type_t bucket_type_diskcache = { + "DISKCACHE", 5, APR_BUCKET_DATA, + diskcache_bucket_destroy, + diskcache_bucket_read, + diskcache_bucket_setaside, + apr_bucket_shared_split, + apr_bucket_shared_copy +}; + +/* From apr_brigade.c */ + +/* A "safe" maximum bucket size, 1Gb */ +#define MAX_BUCKET_SIZE (0x40000000) + +static apr_bucket * diskcache_brigade_insert(apr_bucket_brigade *bb, + apr_file_t *f, apr_off_t + start, apr_off_t length, + apr_interval_time_t timeout, + apr_pool_t *p) +{ + apr_bucket *e; + + if (length < MAX_BUCKET_SIZE) { + e = diskcache_bucket_create(f, start, (apr_size_t)length, timeout, p, + bb->bucket_alloc); + } + else { + /* Several buckets are needed. */ + e = diskcache_bucket_create(f, start, MAX_BUCKET_SIZE, timeout, p, + bb->bucket_alloc); + + while (length > MAX_BUCKET_SIZE) { + apr_bucket *ce; + apr_bucket_copy(e, &ce); + APR_BRIGADE_INSERT_TAIL(bb, ce); + e->start += MAX_BUCKET_SIZE; + length -= MAX_BUCKET_SIZE; + } + e->length = (apr_size_t)length; /* Resize just the last bucket */ } - if (dobj->prefix) { - return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/", - dobj->hashfile, CACHE_DATA_SUFFIX, NULL); - } - else { - return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile, - CACHE_DATA_SUFFIX, NULL); - } + APR_BRIGADE_INSERT_TAIL(bb, e); + return e; +} + +/* --------------------------------------------------------------- */ + +/* + * Local static functions + */ + +static char *cache_file(apr_pool_t *p, disk_cache_conf *conf, + const char *prefix, const char *name) +{ + + char *hashfile; + + hashfile = ap_cache_generate_name(p, conf->dirlevels, + conf->dirlength, name); + if (prefix) { + return apr_pstrcat(p, prefix, CACHE_VDIR_SUFFIX, "/", + hashfile, NULL); + } + else { + return apr_pstrcat(p, conf->cache_root, "/", hashfile, NULL); + } } -static void mkdir_structure(disk_cache_conf *conf, const char *file, apr_pool_t *pool) + +static apr_status_t mkdir_structure(const char *file, apr_pool_t *pool) { apr_status_t rv; char *p; + int i; - for (p = (char*)file + conf->cache_root_len + 1;;) { - p = strchr(p, '/'); - if (!p) - break; - *p = '\0'; + p = strrchr(file, '/'); + if(!p) { + return APR_EGENERAL; + } - rv = apr_dir_make(file, - APR_UREAD|APR_UWRITE|APR_UEXECUTE, pool); - if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) { - /* XXX */ + *p = '\0'; + + /* Be stubborn to overcome racyness when others deletes directories + while we're trying to create them */ + for(i=0; i < 10; i++) { + rv = apr_dir_make_recursive(file, + APR_UREAD|APR_UWRITE|APR_UEXECUTE, pool); + if(rv == APR_SUCCESS) { + break; } - *p = '/'; - ++p; } + *p = '/'; + + return rv; } /* htcacheclean may remove directories underneath us. * So, we'll try renaming three times at a cost of 0.002 seconds. */ -static apr_status_t safe_file_rename(disk_cache_conf *conf, - const char *src, const char *dest, +static apr_status_t safe_file_rename(const char *src, const char *dest, apr_pool_t *pool) { apr_status_t rv; @@ -138,48 +388,32 @@ static apr_status_t safe_file_rename(dis int i; for (i = 0; i < 2 && rv != APR_SUCCESS; i++) { - /* 1000 micro-seconds aka 0.001 seconds. */ - apr_sleep(1000); - - mkdir_structure(conf, dest, pool); + mkdir_structure(dest, pool); rv = apr_file_rename(src, dest, pool); + + if(rv != APR_SUCCESS) { + /* 1000 micro-seconds aka 0.001 seconds. */ + apr_sleep(1000); + } } } return rv; } -static apr_status_t file_cache_el_final(disk_cache_object_t *dobj, - request_rec *r) -{ - /* move the data over */ - if (dobj->tfd) { - apr_status_t rv; - - apr_file_close(dobj->tfd); - /* This assumes that the tempfile is on the same file system - * as the cache_root. If not, then we need a file copy/move - * rather than a rename. - */ - rv = apr_file_rename(dobj->tempfile, dobj->datafile, r->pool); - if (rv != APR_SUCCESS) { - /* XXX log */ - } +static apr_status_t file_cache_errorcleanup(disk_cache_object_t *dobj, + request_rec *r) +{ + /* Remove the cache file */ + apr_file_remove(dobj->cachefile, r->pool); - dobj->tfd = NULL; + /* If there is an alias file, remove it */ + if(dobj->aliasfile != NULL) { + apr_file_remove(dobj->aliasfile, r->pool); } - return APR_SUCCESS; -} - -static apr_status_t file_cache_errorcleanup(disk_cache_object_t *dobj, request_rec *r) -{ - /* Remove the header file and the body file. */ - apr_file_remove(dobj->hdrsfile, r->pool); - apr_file_remove(dobj->datafile, r->pool); - /* If we opened the temporary data file, close and remove it. */ if (dobj->tfd) { apr_file_close(dobj->tfd); @@ -191,53 +425,6 @@ static apr_status_t file_cache_errorclea } -/* These two functions get and put state information into the data - * file for an ap_cache_el, this state information will be read - * and written transparent to clients of this module - */ -static int file_cache_recall_mydata(apr_file_t *fd, cache_info *info, - disk_cache_object_t *dobj, request_rec *r) -{ - apr_status_t rv; - char *urlbuff; - disk_cache_info_t disk_info; - apr_size_t len; - - /* read the data from the cache file */ - len = sizeof(disk_cache_info_t); - rv = apr_file_read_full(fd, &disk_info, len, &len); - if (rv != APR_SUCCESS) { - return rv; - } - - /* Store it away so we can get it later. */ - dobj->disk_info = disk_info; - - info->status = disk_info.status; - info->date = disk_info.date; - info->expire = disk_info.expire; - info->request_time = disk_info.request_time; - info->response_time = disk_info.response_time; - - /* Note that we could optimize this by conditionally doing the palloc - * depending upon the size. */ - urlbuff = apr_palloc(r->pool, disk_info.name_len + 1); - len = disk_info.name_len; - rv = apr_file_read_full(fd, urlbuff, len, &len); - if (rv != APR_SUCCESS) { - return rv; - } - urlbuff[disk_info.name_len] = '\0'; - - /* check that we have the same URL */ - /* Would strncmp be correct? */ - if (strcmp(urlbuff, dobj->name) != 0) { - return APR_EGENERAL; - } - - return APR_SUCCESS; -} - static const char* regen_key(apr_pool_t *p, apr_table_t *headers, apr_array_header_t *varray, const char *oldkey) { @@ -327,6 +514,22 @@ static int create_entity(cache_handle_t return DECLINED; } + /* Note, len is -1 if unknown so don't trust it too hard */ + if (len > conf->maxfs) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: URL %s failed the size check " + "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")", + key, len, conf->maxfs); + return DECLINED; + } + if (len >= 0 && len < conf->minfs) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: URL %s failed the size check " + "(%" APR_OFF_T_FMT " < %" APR_OFF_T_FMT ")", + key, len, conf->minfs); + return DECLINED; + } + /* Allocate and initialize cache_object_t and disk_cache_object_t */ h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(*obj)); obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(*dobj)); @@ -334,212 +537,520 @@ static int create_entity(cache_handle_t obj->key = apr_pstrdup(r->pool, key); dobj->name = obj->key; - dobj->prefix = NULL; /* Save the cache root */ dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len); dobj->root_len = conf->cache_root_len; - dobj->datafile = data_file(r->pool, conf, dobj, key); - dobj->hdrsfile = header_file(r->pool, conf, dobj, key); + dobj->cachefile = cache_file(r->pool, conf, dobj->prefix, key); dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); + dobj->bodysize_in = len; + dobj->bodysize = -1; + dobj->bodyoff = CACHE_DATA_OFFSET; + dobj->updtimeout = conf->updtimeout; + dobj->removedirs = conf->removedirs; + + /* FIXME: Make the minsize to cache files separately runtime config */ + if(len > CACHE_BUF_SIZE && r->filename != NULL && + strlen(r->filename) > 0) + { + dobj->filename = r->filename; + dobj->aliasfile = dobj->cachefile; + dobj->cachefile = cache_file(r->pool, conf, NULL, r->filename); + } return OK; } -static int open_entity(cache_handle_t *h, request_rec *r, const char *key) + +static apr_status_t file_read_timeout(apr_file_t *file, char * buf, + apr_size_t len, apr_time_t timeout) { - apr_uint32_t format; - apr_size_t len; - const char *nkey; - apr_status_t rc; - static int error_logged = 0; - disk_cache_conf *conf = ap_get_module_config(r->server->module_config, - &disk_cache_module); + apr_size_t left, done; apr_finfo_t finfo; - cache_object_t *obj; - cache_info *info; - disk_cache_object_t *dobj; - int flags; + apr_status_t rc; - h->cache_obj = NULL; + done = 0; + left = len; - /* Look up entity keyed to 'url' */ - if (conf->cache_root == NULL) { - if (!error_logged) { - error_logged = 1; - ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, - "disk_cache: Cannot cache files to disk without a CacheRoot specified."); + while(1) { + rc = apr_file_read_full(file, buf+done, left, &len); + if (rc == APR_SUCCESS) { + break; } - return DECLINED; - } + done += len; + left -= len; - /* Create and init the cache object */ - h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(cache_object_t)); - obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t)); + if(rc != APR_EOF) { + return rc; + } + rc = apr_file_info_get(&finfo, APR_FINFO_MTIME, file); + if(rc != APR_SUCCESS) { + return rc; + } + if(finfo.mtime < (apr_time_now() - timeout) ) { + return APR_ETIMEDOUT; + } + apr_sleep(CACHE_LOOP_SLEEP); + } - info = &(obj->info); + return APR_SUCCESS; +} - /* Open the headers file */ - dobj->prefix = NULL; - /* Save the cache root */ - dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len); - dobj->root_len = conf->cache_root_len; +static apr_status_t open_header(cache_handle_t *h, request_rec *r, + const char *key, disk_cache_conf *conf) +{ + int flags; + disk_cache_format_t format; + apr_status_t rc; + const char *nkey = key; + disk_cache_info_t disk_info; + cache_object_t *obj = h->cache_obj; + disk_cache_object_t *dobj = obj->vobj; + core_dir_config *pdconf = ap_get_module_config(r->per_dir_config, + &core_module); - dobj->hdrsfile = header_file(r->pool, conf, dobj, key); flags = APR_READ|APR_BINARY|APR_BUFFERED; - rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, flags, 0, r->pool); +#if APR_HAS_SENDFILE + flags |= ((pdconf->enable_sendfile == ENABLE_SENDFILE_OFF) + ? 0 : APR_SENDFILE_ENABLED); +#endif + + rc = apr_file_open(&dobj->fd, dobj->cachefile, flags, 0, r->pool); if (rc != APR_SUCCESS) { - return DECLINED; + return CACHE_EDECLINED; } /* read the format from the cache file */ - len = sizeof(format); - apr_file_read_full(dobj->hfd, &format, len, &len); + rc = apr_file_read_full(dobj->fd, &format, sizeof(format), NULL); + if(rc != APR_SUCCESS) { + return CACHE_ENODATA; + } if (format == VARY_FORMAT_VERSION) { apr_array_header_t* varray; apr_time_t expire; - len = sizeof(expire); - apr_file_read_full(dobj->hfd, &expire, len, &len); + rc = apr_file_read_full(dobj->fd, &expire, sizeof(expire), NULL); + if(rc != APR_SUCCESS) { + return CACHE_ENODATA; + } if (expire < r->request_time) { - return DECLINED; + return CACHE_EDECLINED; } varray = apr_array_make(r->pool, 5, sizeof(char*)); - rc = read_array(r, varray, dobj->hfd); + rc = read_array(r, varray, dobj->fd); if (rc != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server, "disk_cache: Cannot parse vary header file: %s", - dobj->hdrsfile); - return DECLINED; + dobj->cachefile); + return CACHE_EDECLINED; } - apr_file_close(dobj->hfd); + apr_file_close(dobj->fd); nkey = regen_key(r->pool, r->headers_in, varray, key); - dobj->hashfile = NULL; - dobj->prefix = dobj->hdrsfile; - dobj->hdrsfile = header_file(r->pool, conf, dobj, nkey); + dobj->prefix = dobj->cachefile; + dobj->cachefile = cache_file(r->pool, conf, dobj->prefix, nkey); - flags = APR_READ|APR_BINARY|APR_BUFFERED; - rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, flags, 0, r->pool); + rc = apr_file_open(&dobj->fd, dobj->cachefile, flags, 0, r->pool); if (rc != APR_SUCCESS) { - return DECLINED; + return CACHE_EDECLINED; + } + rc = apr_file_read_full(dobj->fd, &format, sizeof(format), NULL); + if(rc != APR_SUCCESS) { + return CACHE_ENODATA; } - } - else if (format != DISK_FORMAT_VERSION) { - ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, - "cache_disk: File '%s' has a version mismatch. File had version: %d.", - dobj->hdrsfile, format); - return DECLINED; - } - else { - apr_off_t offset = 0; - /* This wasn't a Vary Format file, so we must seek to the - * start of the file again, so that later reads work. - */ - apr_file_seek(dobj->hfd, APR_SET, &offset); - nkey = key; } - obj->key = nkey; - dobj->key = nkey; - dobj->name = key; - dobj->datafile = data_file(r->pool, conf, dobj, nkey); - dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); + if (format == ALIAS_FORMAT_VERSION) { + disk_cache_alias_t disk_alias; + char name[MAX_STRING_LEN+1]; + char *filename; + + rc = apr_file_read_full(dobj->fd, &disk_alias, sizeof(disk_alias), NULL); + if (rc != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server, + "disk_cache: Cannot load file header: %s", + dobj->cachefile); + return CACHE_ENODATA; + } + + /* We must return DECLINED here, since we can't tell whether the + URL->file mapping has changed without letting the request pass + through the filters that determines an eventual r->filename */ + if (disk_alias.expire < r->request_time) { + return CACHE_EDECLINED; + } + + if(disk_alias.name_len > MAX_STRING_LEN || + disk_alias.filename_len > MAX_STRING_LEN) + { + ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server, + "disk_cache: Corrupt file header, deleting: %s", + dobj->cachefile); + file_cache_errorcleanup(dobj, r); + return CACHE_EDECLINED; + } + + rc = apr_file_read_full(dobj->fd, &name, disk_alias.name_len, NULL); + if(rc != APR_SUCCESS) { + return CACHE_ENODATA; + } + name[disk_alias.name_len] = '\0'; + /* check that we have the same URL */ + if (strcmp(name, key) != 0) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "disk_cache: Cached URL %s didn't match requested URL %s", + name, key); + return CACHE_EDECLINED; + } + + filename = apr_palloc(r->pool, disk_alias.filename_len+1); + rc = apr_file_read_full(dobj->fd, filename, disk_alias.filename_len, + NULL); + if(rc != APR_SUCCESS) { + return CACHE_ENODATA; + } + filename[disk_alias.filename_len] = '\0'; + + dobj->filename = filename; + + apr_file_close(dobj->fd); + + dobj->aliasfile = dobj->cachefile; + dobj->prefix = NULL; + dobj->cachefile = cache_file(r->pool, conf, dobj->prefix, filename); + + rc = apr_file_open(&dobj->fd, dobj->cachefile, flags, 0, r->pool); + if (rc != APR_SUCCESS) { + return CACHE_EDECLINED; + } + rc = apr_file_read_full(dobj->fd, &format, sizeof(format), NULL); + if(rc != APR_SUCCESS) { + return CACHE_ENODATA; + } + + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: URL %s is a file cached in %s", + key, dobj->cachefile); - /* Open the data file */ - flags = APR_READ|APR_BINARY; -#ifdef APR_SENDFILE_ENABLED - flags |= APR_SENDFILE_ENABLED; -#endif - rc = apr_file_open(&dobj->fd, dobj->datafile, flags, 0, r->pool); - if (rc != APR_SUCCESS) { - /* XXX: Log message */ - return DECLINED; } - rc = apr_file_info_get(&finfo, APR_FINFO_SIZE, dobj->fd); - if (rc == APR_SUCCESS) { - dobj->file_size = finfo.size; + if(format != DISK_FORMAT_VERSION) { + ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, + "disk_cache: File '%s' had a version mismatch. File had " + "version: %d (current is %d). Deleted.", dobj->cachefile, + format, DISK_FORMAT_VERSION); + file_cache_errorcleanup(dobj, r); + return CACHE_EDECLINED; } - /* Read the bytes to setup the cache_info fields */ - rc = file_cache_recall_mydata(dobj->hfd, info, dobj, r); + obj->key = nkey; + dobj->name = key; + + /* read the data from the cache file */ + rc = apr_file_read_full(dobj->fd, &disk_info, sizeof(disk_info), NULL); if (rc != APR_SUCCESS) { - /* XXX log message */ - return DECLINED; + ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server, + "disk_cache: Cannot load header file: %s", + dobj->cachefile); + return CACHE_ENODATA; } - /* Initialize the cache_handle callback functions */ - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "disk_cache: Recalled cached URL info header %s", dobj->name); - return OK; -} + /* Store it away so we can get it later. */ + dobj->disk_info = disk_info; -static int remove_entity(cache_handle_t *h) -{ - /* Null out the cache object pointer so next time we start from scratch */ - h->cache_obj = NULL; - return OK; + return APR_SUCCESS; } -static int remove_url(cache_handle_t *h, apr_pool_t *p) +static apr_status_t open_header_timeout(cache_handle_t *h, request_rec *r, + const char *key, disk_cache_conf *conf, + disk_cache_object_t *dobj) { apr_status_t rc; - disk_cache_object_t *dobj; - - /* Get disk cache object from cache handle */ - dobj = (disk_cache_object_t *) h->cache_obj->vobj; - if (!dobj) { - return DECLINED; - } + apr_finfo_t finfo; - /* Delete headers file */ - if (dobj->hdrsfile) { - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, - "disk_cache: Deleting %s from cache.", dobj->hdrsfile); + while(1) { + if(dobj->fd) { + apr_file_close(dobj->fd); + } + rc = open_header(h, r, key, conf); + if(rc != APR_SUCCESS && rc != CACHE_ENODATA) { + return rc; + } - rc = apr_file_remove(dobj->hdrsfile, p); - if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) { - /* Will only result in an output if httpd is started with -e debug. - * For reason see log_error_core for the case s == NULL. - */ - ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL, - "disk_cache: Failed to delete headers file %s from cache.", - dobj->hdrsfile); + if(rc == APR_SUCCESS && dobj->disk_info.bodysize >= 0) { + break; + } + rc = apr_file_info_get(&finfo, APR_FINFO_MTIME, dobj->fd); + if(rc != APR_SUCCESS) { + return rc; + } + if(finfo.mtime < (apr_time_now() - dobj->updtimeout)) { + ap_log_error(APLOG_MARK, APLOG_WARNING, 0, r->server, + "disk_cache: Timed out waiting for header for URL %s" + " - caching the body failed?", key); + return CACHE_EDECLINED; + } + apr_sleep(CACHE_LOOP_SLEEP); + } + + return APR_SUCCESS; +} + +static int open_entity(cache_handle_t *h, request_rec *r, const char *key) +{ + apr_status_t rc; + disk_cache_object_t *dobj; + cache_info *info; + apr_size_t len; + apr_off_t off, hdroff; + static int error_logged = 0; + disk_cache_conf *conf = ap_get_module_config(r->server->module_config, + &disk_cache_module); + apr_finfo_t finfo; + + h->cache_obj = NULL; + + /* Look up entity keyed to 'url' */ + if (conf->cache_root == NULL) { + if (!error_logged) { + error_logged = 1; + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "disk_cache: Cannot cache files to disk without a " + "CacheRoot specified."); + } + return DECLINED; + } + + /* Create and init the cache object */ + h->cache_obj = apr_pcalloc(r->pool, sizeof(cache_object_t)); + h->cache_obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t)); + info = &(h->cache_obj->info); + + /* Save the cache root */ + dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len); + dobj->root_len = conf->cache_root_len; + + dobj->cachefile = cache_file(r->pool, conf, dobj->prefix, key); + + dobj->updtimeout = conf->updtimeout; + dobj->removedirs = conf->removedirs; + + /* Open header and read basic info, wait until header contains + valid size information for the body */ + rc = open_header_timeout(h, r, key, conf, dobj); + if(rc != APR_SUCCESS) { + return DECLINED; + } + + info->status = dobj->disk_info.status; + info->date = dobj->disk_info.date; + info->expire = dobj->disk_info.expire; + info->request_time = dobj->disk_info.request_time; + info->response_time = dobj->disk_info.response_time; + + dobj->bodyoff = (apr_off_t) dobj->disk_info.bodyoff; + dobj->bodysize_in = (apr_off_t) dobj->disk_info.bodysize; + dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); + + if(dobj->disk_info.name_len > MAX_STRING_LEN || + dobj->disk_info.filename_len > MAX_STRING_LEN) + { + ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server, + "disk_cache: Corrupt cache header, deleting: %s", + dobj->cachefile); + file_cache_errorcleanup(dobj, r); + return DECLINED; + } + + if(dobj->aliasfile == NULL) { + char urlbuff[MAX_STRING_LEN]; + + len = dobj->disk_info.name_len; + + if(len > 0) { + rc = file_read_timeout(dobj->fd, urlbuff, len, dobj->updtimeout); + if (rc == APR_ETIMEDOUT) { + ap_log_error(APLOG_MARK, APLOG_WARNING, rc, r->server, + "disk_cache: Timed out waiting for urlbuff for " + "URL %s - caching failed?", key); + return DECLINED; + } + else if(rc != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_WARNING, rc, r->server, + "disk_cache: Error reading urlbuff for URL %s", + key); + return DECLINED; + } + } + urlbuff[len] = '\0'; + + /* check that we have the same URL */ + if (strcmp(urlbuff, dobj->name) != 0) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "disk_cache: Cached URL %s didn't match requested " + "URL %s", urlbuff, dobj->name); + return DECLINED; + } + } + else { + /* The URL has already been checked when we loaded the alias header */ + if(dobj->disk_info.name_len > 0) { + /* Seek past the URL */ + off = dobj->disk_info.name_len; + rc = apr_file_seek(dobj->fd, APR_CUR, &off); + if(rc != APR_SUCCESS) { + return rc; + } + } + } + + /* Read in the filename */ + if(dobj->disk_info.filename_len > 0) { + char fnamebuf[MAX_STRING_LEN]; + + len = dobj->disk_info.filename_len; + rc = file_read_timeout(dobj->fd, fnamebuf, len, dobj->updtimeout); + if (rc == APR_ETIMEDOUT) { + ap_log_error(APLOG_MARK, APLOG_WARNING, rc, r->server, + "disk_cache: Timed out waiting for filename for " + "URL %s - caching failed?", key); + return DECLINED; + } + else if(rc != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_WARNING, rc, r->server, + "disk_cache: Error reading filename for URL %s", + key); + return DECLINED; + } + fnamebuf[len] = '\0'; + + if(dobj->aliasfile == NULL) { + dobj->filename = apr_pstrdup(r->pool, fnamebuf); + } + else { + /* Sanity check so we are delivering the right file */ + if(strcmp(fnamebuf, dobj->filename) != 0) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "disk_cache: Cached file %s didn't match wanted " + "file %s", fnamebuf, dobj->filename); + return DECLINED; + } + } + r->filename = dobj->filename; + } + + /* Remember where we were */ + hdroff = 0; + rc = apr_file_seek(dobj->fd, APR_CUR, &hdroff); + if(rc != APR_SUCCESS) { + return rc; + } + + /* Wait here until we get data in the data segment, do quick sanity check + if it's there already */ + while(1) { + off = 0; + rc = apr_file_seek(dobj->fd, APR_END, &off); + if(rc != APR_SUCCESS) { + return rc; + } + if(off > dobj->bodyoff) { + dobj->bodysize = off - dobj->bodyoff; + } + else { + dobj->bodysize = 0; + } + + if(dobj->bodysize_in < dobj->bodysize) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "disk_cache: Bad cached body for URL %s, size %" + APR_OFF_T_FMT " != %" APR_OFF_T_FMT, dobj->name, + dobj->bodysize, off - dobj->bodyoff); + file_cache_errorcleanup(dobj, r); return DECLINED; } + else if(dobj->bodysize_in > dobj->bodysize) { + /* Still caching or failed? */ + rc = apr_file_info_get(&finfo, APR_FINFO_MTIME, dobj->fd); + if(rc != APR_SUCCESS || + finfo.mtime < (apr_time_now() - dobj->updtimeout) ) + { + ap_log_error(APLOG_MARK, APLOG_WARNING, rc, r->server, + "disk_cache: Body for URL %s is too small - " + "caching the body failed?", dobj->name); + return DECLINED; + } + } + if(dobj->bodysize > 0) { + break; + } + apr_sleep(CACHE_LOOP_SLEEP); + } + + /* Go back to where we were */ + rc = apr_file_seek(dobj->fd, APR_SET, &hdroff); + if(rc != APR_SUCCESS) { + return rc; } - /* Delete data file */ - if (dobj->datafile) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: Recalled status for cached URL %s", dobj->name); + return OK; +} + + +static int remove_entity(cache_handle_t *h) +{ + /* Null out the cache object pointer so next time we start from scratch */ + h->cache_obj = NULL; + return OK; +} + + +/* FIXME: It would make sense to have the errorcleanup and this function + to be the same */ +static int remove_url(cache_handle_t *h, apr_pool_t *p) +{ + apr_status_t rc; + disk_cache_object_t *dobj; + + /* Get disk cache object from cache handle */ + dobj = (disk_cache_object_t *) h->cache_obj->vobj; + if (!dobj) { + return DECLINED; + } + + /* Delete cache file */ + if (dobj->cachefile) { ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, - "disk_cache: Deleting %s from cache.", dobj->datafile); + "disk_cache: Deleting %s from cache.", dobj->cachefile); - rc = apr_file_remove(dobj->datafile, p); + rc = apr_file_remove(dobj->cachefile, p); if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) { /* Will only result in an output if httpd is started with -e debug. * For reason see log_error_core for the case s == NULL. */ ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL, - "disk_cache: Failed to delete data file %s from cache.", - dobj->datafile); + "disk_cache: Failed to delete headers file %s " + "from cache.", dobj->cachefile); return DECLINED; } } + if(!dobj->removedirs) { + return OK; + } + /* now delete directories as far as possible up to our cache root */ if (dobj->root) { - const char *str_to_copy; - - str_to_copy = dobj->hdrsfile ? dobj->hdrsfile : dobj->datafile; - if (str_to_copy) { + if (dobj->cachefile) { char *dir, *slash, *q; - dir = apr_pstrdup(p, str_to_copy); + dir = apr_pstrdup(p, dobj->cachefile); /* remove filename */ slash = strrchr(dir, '/'); @@ -550,10 +1061,10 @@ static int remove_url(cache_handle_t *h, * in the way as far as possible * * Note: due to the way we constructed the file names in - * header_file and data_file, we are guaranteed that the - * cache_root is suffixed by at least one '/' which will be - * turned into a terminating null by this loop. Therefore, - * we won't either delete or go above our cache root. + * cache_file, we are guaranteed that the cache_root is suffixed by + * at least one '/' which will be turned into a terminating null by + * this loop. Therefore, we won't either delete or go above our + * cache root. */ for (q = dir + dobj->root_len; *q ; ) { ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL, @@ -584,7 +1095,7 @@ static apr_status_t read_array(request_r rv = apr_file_gets(w, MAX_STRING_LEN - 1, file); if (rv != APR_SUCCESS) { ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, - "Premature end of vary array."); + "disk_cache: Premature end of vary array."); return rv; } @@ -639,7 +1150,7 @@ static apr_status_t store_array(apr_file &amt); } -static apr_status_t read_table(cache_handle_t *handle, request_rec *r, +static apr_status_t read_table(request_rec *r, apr_table_t *table, apr_file_t *file) { char w[MAX_STRING_LEN]; @@ -652,8 +1163,6 @@ static apr_status_t read_table(cache_han /* ### What about APR_EOF? */ rv = apr_file_gets(w, MAX_STRING_LEN - 1, file); if (rv != APR_SUCCESS) { - ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, - "Premature end of cache headers."); return rv; } @@ -696,7 +1205,7 @@ static apr_status_t read_table(cache_han } if (maybeASCII > maybeEBCDIC) { ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, - "CGI Interface Error: Script headers apparently ASCII: (CGI = %s)", + "disk_cache: CGI Interface Error: Script headers apparently ASCII: (CGI = %s)", r->filename); inbytes_left = outbytes_left = cp - w; apr_xlate_conv_buffer(ap_hdrs_from_ascii, @@ -721,6 +1230,51 @@ static apr_status_t read_table(cache_han return APR_SUCCESS; } + +static apr_status_t read_table_full(cache_handle_t *handle, request_rec *r, + apr_table_t **table, apr_file_t *file, + apr_time_t timeout) +{ + apr_off_t off; + apr_finfo_t finfo; + apr_status_t rv; + + off = 0; + rv = apr_file_seek(file, APR_CUR, &off); + if(rv != APR_SUCCESS) { + return rv; + } + + while(1) { + *table = apr_table_make(r->pool, 20); + rv = read_table(r, *table, file); + if(rv == APR_SUCCESS) { + break; + } + apr_table_clear(*table); + + rv = apr_file_seek(file, APR_SET, &off); + if(rv != APR_SUCCESS) { + return rv; + } + + rv = apr_file_info_get(&finfo, APR_FINFO_MTIME, file); + if(rv != APR_SUCCESS || + finfo.mtime < (apr_time_now() - timeout) ) + { + ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, + "disk_cache: Timed out waiting for cache headers " + "URL %s", handle->cache_obj->key); + return APR_EGENERAL; + } + apr_sleep(CACHE_LOOP_SLEEP); + } + + return APR_SUCCESS; +} + + + /* * Reads headers from a buffer and returns an array of headers. * Returns NULL on file error @@ -731,21 +1285,40 @@ static apr_status_t read_table(cache_han static apr_status_t recall_headers(cache_handle_t *h, request_rec *r) { disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj; + apr_status_t rv; + apr_off_t off; /* This case should not happen... */ - if (!dobj->hfd) { + if (!dobj->fd) { /* XXX log message */ return APR_NOTFOUND; } - h->req_hdrs = apr_table_make(r->pool, 20); - h->resp_hdrs = apr_table_make(r->pool, 20); + rv = read_table_full(h, r, &(h->resp_hdrs), dobj->fd, dobj->updtimeout); + if(rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: Timed out waiting for response headers " + "for URL %s - caching failed?", dobj->name); + return rv; + } - /* Call routine to read the header lines/status line */ - read_table(h, r, h->resp_hdrs, dobj->hfd); - read_table(h, r, h->req_hdrs, dobj->hfd); + rv = read_table_full(h, r, &(h->req_hdrs), dobj->fd, dobj->updtimeout); + if(rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: Timed out waiting for request headers " + "for URL %s - caching failed?", dobj->name); + return rv; + } - apr_file_close(dobj->hfd); + off = 0; + rv = apr_file_seek(dobj->fd, APR_CUR, &off); + if(rv != APR_SUCCESS || off > dobj->bodyoff) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: Too much headers (%" APR_OFF_T_FMT " > %" + APR_OFF_T_FMT ") for URL %s, skipping", + off, dobj->bodyoff, dobj->name); + return APR_EGENERAL; + } ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "disk_cache: Recalled headers for URL %s", dobj->name); @@ -757,9 +1330,29 @@ static apr_status_t recall_body(cache_ha apr_bucket *e; disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj; - e = apr_bucket_file_create(dobj->fd, 0, (apr_size_t) dobj->file_size, p, - bb->bucket_alloc); - APR_BRIGADE_INSERT_HEAD(bb, e); + + /* Insert as much as possible as regular file (ie. sendfile():able) */ + if(dobj->bodysize > 0) { + if(apr_brigade_insert_file(bb, dobj->fd, dobj->bodyoff, + dobj->bodysize, p) == NULL) + { + return APR_ENOMEM; + } + } + + /* Insert any remainder as read-while-caching bucket */ + if(dobj->bodysize < dobj->bodysize_in) { + if(diskcache_brigade_insert(bb, dobj->fd, + dobj->bodyoff + dobj->bodysize, + dobj->bodysize_in - dobj->bodysize, + dobj->updtimeout, p + ) == NULL) + { + return APR_ENOMEM; + } + } + + e = apr_bucket_eos_create(bb->bucket_alloc); APR_BRIGADE_INSERT_TAIL(bb, e); @@ -800,100 +1393,220 @@ static apr_status_t store_table(apr_file return rv; } -static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *info) + +static apr_status_t store_vary_header(cache_handle_t *h, disk_cache_conf *conf, + request_rec *r, cache_info *info, + const char *varyhdr) { - disk_cache_conf *conf = ap_get_module_config(r->server->module_config, - &disk_cache_module); + disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj; + apr_array_header_t* varray; + const char *vfile; apr_status_t rv; + int flags; + disk_cache_format_t format = VARY_FORMAT_VERSION; + struct iovec iov[2]; apr_size_t amt; - disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj; - disk_cache_info_t disk_info; - struct iovec iov[2]; + if(dobj->prefix != NULL) { + vfile = dobj->prefix; + } + else if(dobj->aliasfile != NULL) { + vfile = dobj->aliasfile; + } + else { + vfile = dobj->cachefile; + } - /* This is flaky... we need to manage the cache_info differently */ - h->cache_obj->info = *info; + flags = APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL | APR_BUFFERED; + rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile, flags, r->pool); + if (rv != APR_SUCCESS) { + return rv; + } - if (r->headers_out) { - const char *tmp; + iov[0].iov_base = (void*)&format; + iov[0].iov_len = sizeof(format); - tmp = apr_table_get(r->headers_out, "Vary"); + iov[1].iov_base = (void*)&info->expire; + iov[1].iov_len = sizeof(info->expire); - if (tmp) { - apr_array_header_t* varray; - apr_uint32_t format = VARY_FORMAT_VERSION; + rv = apr_file_writev(dobj->fd, (const struct iovec *) &iov, 2, &amt); + if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + return rv; + } + + varray = apr_array_make(r->pool, 6, sizeof(char*)); + tokens_to_array(r->pool, varyhdr, varray); - mkdir_structure(conf, dobj->hdrsfile, r->pool); + rv = store_array(dobj->tfd, varray); + if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + return rv; + } - rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile, - APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL, - r->pool); + rv = apr_file_close(dobj->tfd); + dobj->tfd = NULL; + if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + return rv; + } - if (rv != APR_SUCCESS) { - return rv; - } + rv = safe_file_rename(dobj->tempfile, vfile, r->pool); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: rename tempfile to varyfile failed: " + "%s -> %s", dobj->tempfile, vfile); + file_cache_errorcleanup(dobj, r); + return rv; + } + + dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); + + if(dobj->prefix == NULL) { + const char *tmp = regen_key(r->pool, r->headers_in, varray, dobj->name); - amt = sizeof(format); - apr_file_write(dobj->tfd, &format, &amt); + if(dobj->aliasfile != NULL) { + dobj->prefix = dobj->aliasfile; + dobj->aliasfile = cache_file(r->pool, conf, dobj->prefix, tmp); + } + else { + dobj->prefix = dobj->cachefile; + dobj->cachefile = cache_file(r->pool, conf, dobj->prefix, tmp); + } + } - amt = sizeof(info->expire); - apr_file_write(dobj->tfd, &info->expire, &amt); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: Stored vary header for URL %s", dobj->name); - varray = apr_array_make(r->pool, 6, sizeof(char*)); - tokens_to_array(r->pool, tmp, varray); + return APR_SUCCESS; +} - store_array(dobj->tfd, varray); - apr_file_close(dobj->tfd); +static apr_status_t store_alias_header(disk_cache_object_t *dobj, + disk_cache_conf *conf, + request_rec *r, cache_info *info) +{ + disk_cache_format_t format = ALIAS_FORMAT_VERSION; + disk_cache_alias_t disk_alias; + apr_status_t rv; + struct iovec iov[4]; + apr_size_t amt; + int flags; - dobj->tfd = NULL; + disk_alias.name_len = strlen(dobj->name); + disk_alias.filename_len = strlen(dobj->filename); + disk_alias.expire = info->expire; + + iov[0].iov_base = (void*)&format; + iov[0].iov_len = sizeof(format); + iov[1].iov_base = (void*)&disk_alias; + iov[1].iov_len = sizeof(disk_alias); + iov[2].iov_base = (void*)dobj->name; + iov[2].iov_len = disk_alias.name_len; + iov[3].iov_base = (void*)dobj->filename; + iov[3].iov_len = disk_alias.filename_len; - rv = safe_file_rename(conf, dobj->tempfile, dobj->hdrsfile, - r->pool); - if (rv != APR_SUCCESS) { - ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server, - "disk_cache: rename tempfile to varyfile failed: %s -> %s", - dobj->tempfile, dobj->hdrsfile); - return rv; - } + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: store_alias_header %s -> %s in %s", + dobj->name, dobj->filename, dobj->aliasfile); - dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); - tmp = regen_key(r->pool, r->headers_in, varray, dobj->name); - dobj->prefix = dobj->hdrsfile; - dobj->hashfile = NULL; - dobj->datafile = data_file(r->pool, conf, dobj, tmp); - dobj->hdrsfile = header_file(r->pool, conf, dobj, tmp); - } + flags = APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL | APR_BUFFERED; + rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile, flags, r->pool); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server, + "disk_cache: store_alias_header mktemp failed URL %s", + dobj->name); + return rv; } + rv = apr_file_writev(dobj->tfd, (const struct iovec *) &iov, 4, &amt); + + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server, + "disk_cache: store_alias_header writev failed URL %s", + dobj->name); + file_cache_errorcleanup(dobj, r); + return rv; + } - rv = apr_file_mktemp(&dobj->hfd, dobj->tempfile, - APR_CREATE | APR_WRITE | APR_BINARY | - APR_BUFFERED | APR_EXCL, r->pool); + rv = apr_file_close(dobj->tfd); + dobj->tfd = NULL; + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server, + "disk_cache: store_alias_header close failed URL %s", + dobj->name); + file_cache_errorcleanup(dobj, r); + return rv; + } + rv = safe_file_rename(dobj->tempfile, dobj->aliasfile, r->pool); if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: rename tempfile to alias header failed: " + "%s -> %s", dobj->tempfile, dobj->aliasfile); + file_cache_errorcleanup(dobj, r); return rv; } - dobj->name = h->cache_obj->key; + dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); + + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: Stored alias header for URL %s -> file %s", + dobj->name, dobj->filename); + + return APR_SUCCESS; +} + + +static apr_status_t store_disk_header(disk_cache_object_t *dobj, + request_rec *r, cache_info *info, + apr_time_t lastmod) +{ + disk_cache_format_t format = DISK_FORMAT_VERSION; + struct iovec iov[4]; + int niov; + disk_cache_info_t disk_info; + apr_size_t amt; + apr_status_t rv; - disk_info.format = DISK_FORMAT_VERSION; disk_info.date = info->date; disk_info.expire = info->expire; disk_info.entity_version = dobj->disk_info.entity_version++; disk_info.request_time = info->request_time; disk_info.response_time = info->response_time; disk_info.status = info->status; + disk_info.bodyoff = dobj->bodyoff; + disk_info.bodysize = dobj->bodysize_in; + disk_info.lastmod = lastmod; + + niov = 0; + iov[niov].iov_base = (void*)&format; + iov[niov++].iov_len = sizeof(format); + iov[niov].iov_base = (void*)&disk_info; + iov[niov++].iov_len = sizeof(disk_cache_info_t); + + /* Only record the URL if we're the primary record for that URL */ + if (dobj->aliasfile == NULL) { + disk_info.name_len = strlen(dobj->name); + iov[niov].iov_base = (void*)dobj->name; + iov[niov++].iov_len = disk_info.name_len; + } + else { + disk_info.name_len = 0; + } - disk_info.name_len = strlen(dobj->name); - - iov[0].iov_base = (void*)&disk_info; - iov[0].iov_len = sizeof(disk_cache_info_t); - iov[1].iov_base = (void*)dobj->name; - iov[1].iov_len = disk_info.name_len; + if(r->filename != NULL && strlen(r->filename) > 0) { + disk_info.filename_len = strlen(r->filename); + iov[niov].iov_base = (void*)r->filename; + iov[niov++].iov_len = disk_info.filename_len; + } + else { + disk_info.filename_len = 0; + } - rv = apr_file_writev(dobj->hfd, (const struct iovec *) &iov, 2, &amt); + rv = apr_file_writev(dobj->fd, (const struct iovec *) &iov, niov, &amt); if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); return rv; } @@ -911,8 +1624,9 @@ static apr_status_t store_headers(cache_ headers_out = apr_table_overlay(r->pool, headers_out, r->err_headers_out); - rv = store_table(dobj->hfd, headers_out); + rv = store_table(dobj->fd, headers_out); if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); return rv; } } @@ -924,35 +1638,752 @@ static apr_status_t store_headers(cache_ headers_in = ap_cache_cacheable_hdrs_out(r->pool, r->headers_in, r->server); - rv = store_table(dobj->hfd, headers_in); + rv = store_table(dobj->fd, headers_in); if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); return rv; } } - apr_file_close(dobj->hfd); /* flush and close */ - - /* Remove old file with the same name. If remove fails, then - * perhaps we need to create the directory tree where we are - * about to write the new headers file. + return APR_SUCCESS; +} + +/* Assumes file positioned just after the initial header, ie. as left by + open_header() */ +static int is_file_current(request_rec *r, disk_cache_object_t *dobj, + apr_time_t lastmod) +{ + char filename[MAX_STRING_LEN+1]; + + if(dobj->disk_info.name_len > 0 || + dobj->disk_info.filename_len == 0 || + dobj->disk_info.filename_len > MAX_STRING_LEN || + dobj->bodysize_in != dobj->disk_info.bodysize || + lastmod == APR_DATE_BAD || + dobj->disk_info.lastmod == APR_DATE_BAD || + lastmod != dobj->disk_info.lastmod) + { + return FALSE; + } + else if (apr_file_read_full(dobj->fd, filename, + dobj->disk_info.filename_len, NULL) + == APR_SUCCESS) + { + filename[dobj->disk_info.filename_len] = '\0'; + if(strcmp(r->filename, filename) != 0) { + return FALSE; + } + + /* This is where a file that's OK ends up */ + return TRUE; + } + + return FALSE; +} + + +static apr_status_t open_new_cachefile(request_rec *r, + disk_cache_object_t *dobj) +{ + int flags = APR_CREATE | APR_WRITE | APR_BINARY | APR_BUFFERED | APR_EXCL; + apr_status_t rv; + + while(1) { + rv = apr_file_open(&dobj->fd, dobj->cachefile, flags, + APR_FPROT_UREAD | APR_FPROT_UWRITE, r->pool); + + /* FIXME: Debug */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server, + "disk_cache: open_new_cachefile: Opening %s", + dobj->cachefile); + + if(APR_STATUS_IS_EEXIST(rv)) { + apr_finfo_t finfo; + + rv = apr_stat(&finfo, dobj->cachefile, APR_FINFO_MTIME, + r->pool); + if(APR_STATUS_IS_ENOENT(rv)) { + /* Someone else has already removed it, try again */ + continue; + } + else if(rv != APR_SUCCESS) { + return rv; + } + + if(finfo.mtime < (apr_time_now() - dobj->updtimeout) ) { + /* Something stale that's left around */ + + rv = apr_file_remove(dobj->cachefile, r->pool); + if(rv != APR_SUCCESS && !APR_STATUS_IS_ENOENT(rv)) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: open_new_cachefile: Failed to " + "remove old %s", dobj->cachefile); + return rv; + } + continue; + } + else { + /* Someone else has just created the file, assume + they're caching the same thing we are and wait + for them to finish */ + + dobj->skipstore = TRUE; + return APR_SUCCESS; + } + } + else if(APR_STATUS_IS_ENOENT(rv)) { + /* The directory for the file didn't exist */ + + rv = mkdir_structure(dobj->cachefile, r->pool); + if(rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: open_new_cachefile: Failed to make " + "directory for %s", dobj->cachefile); + return rv; + } + continue; + } + else if(rv == APR_SUCCESS) { + return APR_SUCCESS; + } + else { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: open_new_cachefile: Failed to open %s", + dobj->cachefile); + return rv; + } + } + + /* We should never get here, so */ + return APR_EGENERAL; +} + + +static apr_status_t store_headers(cache_handle_t *h, request_rec *r, + cache_info *info) +{ + disk_cache_conf *conf = ap_get_module_config(r->server->module_config, + &disk_cache_module); + apr_status_t rv; + apr_off_t off; + int flags=0, rewriting; + disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj; + apr_time_t lastmod = APR_DATE_BAD; + const char *lastmods; + + + /* This is flaky... we need to manage the cache_info differently */ + h->cache_obj->info = *info; + + /* Get last-modified timestamp */ + lastmods = apr_table_get(r->err_headers_out, "Last-Modified"); + if (lastmods == NULL) { + lastmods = apr_table_get(r->headers_out, "Last-Modified"); + } + if (lastmods != NULL) { + lastmod = apr_date_parse_http(lastmods); + } + + if(dobj->fd) { + rewriting = TRUE; + + /* FIXME: Configurable threshold for expire-check? */ + if(lastmod != APR_DATE_BAD && lastmod == dobj->disk_info.lastmod && + dobj->disk_info.expire > r->request_time) + { + /* Don't store header if it hasn't changed. That is simply stupid */ + dobj->skipstore = TRUE; + + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: store_headers: Headers current for URL " + "%s", dobj->name); + } + else { + ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, + "disk_cache: Rewriting headers for URL %s", + dobj->name); + } + } + else { + ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, + "disk_cache: Storing new headers for URL %s", dobj->name); + + rewriting = FALSE; + } + + if (r->headers_out) { + const char *tmp; + + tmp = apr_table_get(r->headers_out, "Vary"); + + if (tmp) { + rv = store_vary_header(h, conf, r, info, tmp); + if(rv != APR_SUCCESS) { + return rv; + } + } + } + + /* If we're rewriting, this automatically means that the alias header + was up to date */ + if(dobj->aliasfile && !rewriting) { + rv = store_alias_header(dobj, conf, r, info); + if(rv != APR_SUCCESS) { + return rv; + } + + /* We might end up here due to the alias header being expired, which + causes the request to tumble through the filter chain as if it was + totally uncached. We might already have cached the file the URL + refers to though, so try to open the header and check */ + + rv = open_header_timeout(h, r, h->cache_obj->key, conf, dobj); + if(rv == APR_SUCCESS) { + /* OK, there was something there already. But is it the + right thing? */ + if(is_file_current(r, dobj, lastmod)) { + ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, + "disk_cache: Found already cached file for " + "URL %s", dobj->name); + dobj->skipstore = TRUE; + } + else { + /* Nopes. Let's get rid of it */ + apr_file_close(dobj->fd); + dobj->fd = NULL; + rv = apr_file_remove(dobj->cachefile, r->pool); + if(rv != APR_SUCCESS && !APR_STATUS_IS_ENOENT(rv)) { + return rv; + } + } + } + } + + if(dobj->skipstore) { + return APR_SUCCESS; + } + + if(rewriting) { + /* Assume we are just rewriting the header if we have an fd. The + fd might be readonly though, in that case reopen it for writes. + Something equivalent to fdopen would have been handy. */ + + flags = apr_file_flags_get(dobj->fd); + + if(!(flags & APR_WRITE)) { + apr_file_close(dobj->fd); + rv = apr_file_open(&dobj->fd, dobj->cachefile, + APR_WRITE | APR_BINARY | APR_BUFFERED, 0, r->pool); + if (rv != APR_SUCCESS) { + return rv; + } + } + else { + /* We can write here, so let's just move to the right place */ + apr_off_t off=0; + rv = apr_file_seek(dobj->fd, APR_SET, &off); + if (rv != APR_SUCCESS) { + return rv; + } + } + } + else { + rv = open_new_cachefile(r, dobj); + if(rv != APR_SUCCESS) { + return rv; + } + } + + if(dobj->skipstore) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: Skipping store for URL %s: Someone else " + "beat us to it", dobj->name); + return APR_SUCCESS; + } + + rv = store_disk_header(dobj, r, info, lastmod); + if(rv != APR_SUCCESS) { + return rv; + } + + /* FIXME: It's probably better/safer to just set a maximum header size + and not try to be flexible. There are races if writing too much headers + since other threads might think it's a body */ + + /* Find out how much we wrote */ + off = 0; + rv = apr_file_seek(dobj->fd, APR_CUR, &off); + if (rv != APR_SUCCESS) { + return rv; + } + + if(rewriting) { + + /* Reopen if read/write-mode differs, flush otherwise */ + if(!(flags & APR_WRITE)) { + rv = apr_file_close(dobj->fd); + if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + return rv; + } + rv = apr_file_open(&dobj->fd, dobj->cachefile, flags, 0, r->pool); + if (rv != APR_SUCCESS) { + return rv; + } + } + else { + rv = apr_file_flush(dobj->fd); + if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + return rv; + } + } + + if(off > dobj->bodyoff) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "disk_cache: Header size %" APR_OFF_T_FMT " > %" + APR_OFF_T_FMT " for URL %s, skipping", + off, dobj->bodyoff, dobj->name); + return APR_EGENERAL; + } + } + else { + /* Storing stuff from scratch */ + + if(off * 1.25 > dobj->bodyoff) { + /* Whoops, we wrote too much headers. Recalculate body offset + and redo to get breathing room for future header updates */ + + ap_log_error(APLOG_MARK, APLOG_WARNING, 0, r->server, + "disk_cache: Header size %" APR_OFF_T_FMT " > %" + APR_OFF_T_FMT " for URL %s, will rewrite headers again", + (apr_off_t) (off * 1.25), dobj->bodyoff, dobj->name); + + dobj->bodyoff = ((off*1.25)/CACHE_DATA_OFFSET)+1; + dobj->bodyoff *= CACHE_DATA_OFFSET; + + rv = apr_file_close(dobj->fd); + dobj->fd = NULL; + if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + return rv; + } + rv = apr_file_remove(dobj->cachefile, r->pool); + if (rv != APR_SUCCESS) { + return rv; + } + + return store_headers(h, r, info); + } + + rv = apr_file_flush(dobj->fd); + if (rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + return rv; + } + } + + /* store_body() assumes the fd is correctly positioned */ + off = dobj->bodyoff; + rv = apr_file_seek(dobj->fd, APR_SET, &off); + if (rv != APR_SUCCESS) { + return rv; + } + + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: Stored headers for URL %s", dobj->name); + return APR_SUCCESS; +} + + +/* Stuff needed by the background copy thread */ +typedef struct copyinfo copyinfo; +struct copyinfo { + apr_off_t len; + /* Source info */ + const char *srcfile; + apr_finfo_t srcinfo; + apr_off_t srcoff; + /* Destination info */ + const char *destfile; + apr_off_t destoff; + + /* Our private pool */ + apr_pool_t *pool; + +#if APR_HAS_THREADS + /* Background process info */ + apr_thread_t *t; +#endif /* APR_HAS_THREADS */ +#if APR_HAS_FORK + apr_proc_t *proc; +#endif /* APR_HAS_FORK */ + + /* For logging */ + const server_rec *s; +}; + + +/* FIXME: Check if source file has changed during copying? */ +/* Either provide srcfd, or provide srcfile and srcinfo containing + APR_FINFO_INODE|APR_FINFO_MTIME to make sure we have opened the right file + (someone might have just replaced it which messes up things). + */ +/* FIXME: If copy_body bails then the cache bucket should be able to revert + to delivering from the original fd. + */ +static apr_status_t copy_body(apr_file_t *srcfd, const char *srcfile, + apr_finfo_t *srcinfo, apr_off_t srcoff, + const char *destfile, apr_off_t destoff, + apr_off_t len, apr_pool_t *p) +{ + apr_status_t rc; + apr_file_t *destfd; + apr_size_t size; + apr_finfo_t finfo; + apr_time_t starttime = apr_time_now(); + char buf[CACHE_BUF_SIZE]; + + /* Sanity-check arguments */ + if((srcfd == NULL && srcfile == NULL) || + (srcfd != NULL && srcfile != NULL) || + (srcfile != NULL && srcinfo == NULL)) + { + return APR_EGENERAL; + } + + if(srcfile != NULL) { + rc = apr_file_open(&srcfd, srcfile, APR_READ | APR_BINARY, 0, p); + if(rc != APR_SUCCESS) { + return rc; + } + rc = apr_file_info_get(&finfo, APR_FINFO_INODE|APR_FINFO_MTIME, srcfd); + if(rc != APR_SUCCESS) { + return rc; + } + if(srcinfo->inode != finfo.inode || srcinfo->mtime < finfo.mtime) { + return APR_EGENERAL; + } + } + if(srcoff != 0) { + rc = apr_file_seek(srcfd, APR_SET, &srcoff); + if(rc != APR_SUCCESS) { + return rc; + } + } + + rc = apr_file_open(&destfd, destfile, APR_WRITE | APR_BINARY, 0, p); + if(rc != APR_SUCCESS) { + return rc; + } + if(destoff != 0) { + rc = apr_file_seek(destfd, APR_SET, &destoff); + if(rc != APR_SUCCESS) { + return rc; + } + } + + /* Tried doing this with mmap, but sendfile on Linux got confused when + sending a file while it was being written to from an mmapped area. + The traditional way seems to be good enough, and less complex. */ - rv = apr_file_remove(dobj->hdrsfile, r->pool); + while(len > 0) { + size=MIN(len, CACHE_BUF_SIZE); + + rc = apr_file_read_full (srcfd, buf, size, NULL); + if(rc != APR_SUCCESS) { + return rc; + } + + rc = apr_file_write_full(destfd, buf, size, NULL); + if(rc != APR_SUCCESS) { + return rc; + } + len -= size; + } + + /* Check if file has changed during copying. This is not 100% foolproof + due to NFS attribute caching when on NFS etc. */ + /* FIXME: Can we assume that we're always copying an entire file? In that + case we can check if the current filesize matches the length + we think it is */ + rc = apr_file_info_get(&finfo, APR_FINFO_MTIME, srcfd); + if(rc != APR_SUCCESS) { + apr_file_close(destfd); + return rc; + } + if(starttime < finfo.mtime) { + apr_file_close(destfd); + return APR_EGENERAL; + } + + if(srcfile != NULL) { + apr_file_close(srcfd); + } + return apr_file_close(destfd); +} + + +#if APR_HAS_THREADS +static apr_status_t bgcopy_thread_cleanup(void *data) { + copyinfo *ci = data; + apr_status_t rc, ret; + apr_pool_t *p; + + /* FIXME: Debug */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ci->s, + "disk_cache: bgcopy_thread_cleanup: %s -> %s", + ci->srcfile, ci->destfile); + + rc = apr_thread_join(&ret, ci->t); + if(rc != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rc, ci->s, + "disk_cache: bgcopy_thread_cleanup: apr_thread_join " + "failed %s -> %s", ci->srcfile, ci->destfile); + return rc; + } + if(ret != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, ret, ci->s, + "disk_cache: Background caching body %s -> %s failed", + ci->srcfile, ci->destfile); + } + + /* FIXME: Debug */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ci->s, + "disk_cache: bgcopy_thread_cleanup: SUCCESS %s -> %s", + ci->srcfile, ci->destfile); + + /* Destroy our private pool */ + p = ci->pool; + apr_pool_destroy(p); + + return APR_SUCCESS; +} + + +static void *bgcopy_thread(apr_thread_t *t, void *data) +{ + copyinfo *ci = data; + apr_pool_t *p; + apr_status_t rc; + + p = apr_thread_pool_get(t); + + /* FIXME: Debug */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ci->s, + "disk_cache: bgcopy_thread: start %s -> %s", + ci->srcfile, ci->destfile); + + rc = copy_body(NULL, ci->srcfile, &(ci->srcinfo), ci->srcoff, ci->destfile, + ci->destoff, ci->len, p); + + if(rc != APR_SUCCESS) { + apr_file_remove(ci->destfile, p); + } + + /* FIXME: Debug */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ci->s, + "disk_cache: bgcopy_thread: done %s -> %s", + ci->srcfile, ci->destfile); + + apr_thread_exit(t, rc); + return NULL; +} +#endif /* APR_HAS_THREADS */ + + +#if APR_HAS_FORK +static apr_status_t bgcopy_child_cleanup(void *data) { + copyinfo *ci = data; + int status; + apr_exit_why_e why; + apr_pool_t *p; + + apr_proc_wait(ci->proc, &status, &why, APR_WAIT); + if(why == APR_PROC_EXIT) { + if(status != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, status, ci->s, + "disk_cache: Background caching body %s -> %s failed", + ci->srcfile, ci->destfile); + return APR_SUCCESS; + } + } + else if(status & (APR_PROC_SIGNAL | APR_PROC_SIGNAL_CORE) ) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, ci->s, + "disk_cache: Background caching body %s -> %s failed, " + "caught signal %d", ci->srcfile, ci->destfile, status); + return APR_SUCCESS; + } + + /* Destroy our private pool */ + p = ci->pool; + apr_pool_destroy(p); + + return APR_SUCCESS; +} +#endif /* APR_HAS_FORK */ + + +static apr_status_t do_bgcopy(apr_file_t *srcfd, apr_off_t off, apr_off_t len, + const char *cachefile, apr_off_t bodyoff, + request_rec *r) +{ + copyinfo *ci; + apr_status_t rv; + apr_pool_t *newpool; + const char *srcfile; + int mpm_query_info; + + /* It seems pool memory gets destroyed before the cleanup functions + are called when an error occur (a dropped connection, for example), + so we need a pool of our own + */ + rv = apr_pool_create(&newpool, NULL); if (rv != APR_SUCCESS) { - mkdir_structure(conf, dobj->hdrsfile, r->pool); + return rv; + } + + ci = apr_palloc(newpool, sizeof(*ci)); + if(ci == NULL) { + apr_pool_destroy(newpool); + return APR_ENOMEM; } - rv = safe_file_rename(conf, dobj->tempfile, dobj->hdrsfile, r->pool); + rv = apr_file_name_get(&srcfile, srcfd); + if(rv != APR_SUCCESS) { + return rv; + } + rv = apr_file_info_get(&(ci->srcinfo), APR_FINFO_INODE|APR_FINFO_MTIME, + srcfd); + if(rv != APR_SUCCESS) { + return rv; + } + + ci->pool = newpool; + ci->srcoff = off; + ci->len = len; + ci->srcfile = apr_pstrdup(newpool, srcfile); + ci->destfile = apr_pstrdup(newpool, cachefile); + ci->destoff = bodyoff; + ci->s = r->server; + +#if APR_HAS_THREADS + if(ap_mpm_query(AP_MPMQ_IS_THREADED, &mpm_query_info) == APR_SUCCESS) { + apr_threadattr_t *ta; + apr_thread_t *t; + rv = apr_threadattr_create(&ta, newpool); + if(rv != APR_SUCCESS) { + apr_pool_destroy(newpool); + return rv; + } + + apr_threadattr_detach_set(ta, FALSE); + + /* FIXME: This makes module unloadable on AIX */ +#if 0 +#ifdef AP_MPM_WANT_SET_STACKSIZE + if (ap_thread_stacksize != 0) { + apr_threadattr_stacksize_set(ta, ap_thread_stacksize); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: BG thread stacksize set to %" + APR_SIZE_T_FMT, ap_thread_stacksize); + } +#endif /* AP_MPM_WANT_SET_STACKSIZE */ +#endif /* 0 */ + + rv = apr_thread_create (&t, ta, bgcopy_thread, ci, newpool); + if (rv != APR_SUCCESS) { + apr_pool_destroy(newpool); + return rv; + } + ci->t = t; + + apr_pool_cleanup_register(r->pool, ci, bgcopy_thread_cleanup, + apr_pool_cleanup_null); + } + else +#endif /* APR_HAS_THREADS */ +#if APR_HAS_FORK + if(ap_mpm_query(AP_MPMQ_IS_FORKED, &mpm_query_info) == APR_SUCCESS) { + ci->proc = apr_palloc(newpool, sizeof(apr_proc_t)); + if(ci->proc == NULL) { + apr_pool_destroy(newpool); + return APR_ENOMEM; + } + rv = apr_proc_fork(ci->proc, newpool); + if(rv == APR_INCHILD) { + /* Child */ + rv = copy_body(NULL, ci->srcfile, &(ci->srcinfo), ci->srcoff, + ci->destfile, ci->destoff, ci->len, ci->pool); + if(rv != APR_SUCCESS) { + apr_file_remove(ci->destfile, ci->pool); + } + exit(rv); + } + else if(rv == APR_INPARENT) { + apr_pool_cleanup_register(r->pool, ci, bgcopy_child_cleanup, + apr_pool_cleanup_null); + } + else { + return rv; + } + } + else +#endif /* APR_HAS_FORK */ + if(1) + { + rv = copy_body(srcfd, NULL, NULL, ci->srcoff, ci->destfile, ci->destoff, + ci->len, newpool); + apr_pool_destroy(newpool); + } + + return rv; +} + +static apr_status_t replace_brigade_with_cache(cache_handle_t *h, + request_rec *r, + apr_bucket_brigade *bb) +{ + apr_status_t rv; + int flags; + apr_bucket *e; + core_dir_config *pdcfg = ap_get_module_config(r->per_dir_config, + &core_module); + disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj; + + flags = APR_READ|APR_BINARY; +#if APR_HAS_SENDFILE + flags |= ((pdcfg->enable_sendfile == ENABLE_SENDFILE_OFF) + ? 0 : APR_SENDFILE_ENABLED); +#endif + + rv = apr_file_open(&dobj->fd, dobj->cachefile, flags, 0, r->pool); if (rv != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, - "disk_cache: rename tempfile to hdrsfile failed: %s -> %s", - dobj->tempfile, dobj->hdrsfile); + "disk_cache: Error opening cachefile %s for URL %s", + dobj->cachefile, dobj->name); return rv; } - dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); + /* First, empty the brigade */ + e = APR_BRIGADE_FIRST(bb); + while (e != APR_BRIGADE_SENTINEL(bb)) { + apr_bucket *d; + d = e; + e = APR_BUCKET_NEXT(e); + apr_bucket_delete(d); + } + /* Then, populate it with our cached instance */ + rv = recall_body(h, r->pool, bb); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, + "disk_cache: Error serving URL %s from cache", dobj->name); + return rv; + } ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "disk_cache: Stored headers for URL %s", dobj->name); + "disk_cache: Serving cached body for URL %s", dobj->name); + return APR_SUCCESS; } @@ -961,87 +2392,228 @@ static apr_status_t store_body(cache_han { apr_bucket *e; apr_status_t rv; + int copy_file = FALSE; disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj; disk_cache_conf *conf = ap_get_module_config(r->server->module_config, &disk_cache_module); - /* We write to a temp file and then atomically rename the file over - * in file_cache_el_final(). - */ - if (!dobj->tfd) { - rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile, - APR_CREATE | APR_WRITE | APR_BINARY | - APR_BUFFERED | APR_EXCL, r->pool); - if (rv != APR_SUCCESS) { - return rv; + if(r->no_cache) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: store_body called for URL %s even though" + "no_cache is set", dobj->name); + file_cache_errorcleanup(dobj, r); + return APR_EGENERAL; + } + + if(dobj->bodysize < 0) { + dobj->bodysize = 0; + } + + dobj->store_body_called++; + + /* Check if we should skip the whole storing-business */ + if(dobj->skipstore) { + if(dobj->store_body_called == 1 && + dobj->bodysize_in >= 0 && + APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb)) ) + { + /* Yay, we can replace the body with the cached instance */ + return replace_brigade_with_cache(h, r, bb); } - dobj->file_size = 0; + return APR_SUCCESS; } - for (e = APR_BRIGADE_FIRST(bb); - e != APR_BRIGADE_SENTINEL(bb); - e = APR_BUCKET_NEXT(e)) + /* Check if this is a complete single sequential file, eligable for + * file copy. + */ + /* FIXME: Make the min size to do file copy run-time config? */ + if(dobj->store_body_called == 1 && + dobj->bodysize_in > APR_BUCKET_BUFF_SIZE && + APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb)) ) { - const char *str; - apr_size_t length, written; - rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ); - if (rv != APR_SUCCESS) { - ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, - "cache_disk: Error when reading bucket for URL %s", - h->cache_obj->key); - /* Remove the intermediate cache file and return non-APR_SUCCESS */ - file_cache_errorcleanup(dobj, r); - return rv; + apr_off_t begin = -1; + apr_off_t pos = -1; + apr_file_t *fd = NULL; + apr_bucket_file *a; + + copy_file = TRUE; + + for (e = APR_BRIGADE_FIRST(bb); + e != APR_BRIGADE_SENTINEL(bb); + e = APR_BUCKET_NEXT(e)) + { + if(APR_BUCKET_IS_EOS(e)) { + break; + } + if(!APR_BUCKET_IS_FILE(e)) { + copy_file = FALSE; + break; + } + + a = e->data; + + if(begin < 0) { + begin = pos = e->start; + fd = a->fd; + } + + if(fd != a->fd || pos != e->start) { + copy_file = FALSE; + break; + } + + pos += e->length; } - rv = apr_file_write_full(dobj->tfd, str, length, &written); - if (rv != APR_SUCCESS) { - ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, - "cache_disk: Error when writing cache file for URL %s", - h->cache_obj->key); - /* Remove the intermediate cache file and return non-APR_SUCCESS */ - file_cache_errorcleanup(dobj, r); - return rv; + } + + if(copy_file) { + apr_bucket_file *a; + + ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, + "disk_cache: Copying body for URL %s, len %" + APR_OFF_T_FMT, dobj->name, dobj->bodysize_in); + + e = APR_BRIGADE_FIRST(bb); + a = e->data; + + if(dobj->bodysize_in > conf->minbgsize) { + rv = do_bgcopy(a->fd, e->start, dobj->bodysize_in, dobj->cachefile, + dobj->bodyoff, r); + if(rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: Initiating background copy failed, " + "URL %s", dobj->name); + file_cache_errorcleanup(dobj, r); + return rv; + } } - dobj->file_size += written; - if (dobj->file_size > conf->maxfs) { - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "cache_disk: URL %s failed the size check " - "(%" APR_OFF_T_FMT ">%" APR_SIZE_T_FMT ")", - h->cache_obj->key, dobj->file_size, conf->maxfs); - /* Remove the intermediate cache file and return non-APR_SUCCESS */ - file_cache_errorcleanup(dobj, r); - return APR_EGENERAL; + else { + rv = copy_body(a->fd, NULL, NULL, e->start, dobj->cachefile, + dobj->bodyoff, dobj->bodysize_in, r->pool); + if(rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: Copying body failed, " + "URL %s", dobj->name); + file_cache_errorcleanup(dobj, r); + return rv; + } } + } + else { + if(dobj->store_body_called == 1) { + ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, + "disk_cache: Caching body for URL %s, len %" + APR_OFF_T_FMT, dobj->name, dobj->bodysize_in); + } - /* Was this the final bucket? If yes, close the temp file and perform - * sanity checks. - */ - if (APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb))) { - if (r->connection->aborted || r->no_cache) { + for (e = APR_BRIGADE_FIRST(bb); + e != APR_BRIGADE_SENTINEL(bb); + e = APR_BUCKET_NEXT(e)) + { + const char *str; + apr_size_t length, written; + + /* Ignore the non-data-buckets */ + if(APR_BUCKET_IS_METADATA(e)) { + continue; + } + + rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: Error when reading bucket for URL %s", + dobj->name); + file_cache_errorcleanup(dobj, r); + return rv; + } + rv = apr_file_write_full(dobj->fd, str, length, &written); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: Error when writing cache file for " + "URL %s", dobj->name); + file_cache_errorcleanup(dobj, r); + return rv; + } + dobj->bodysize += written; + if (dobj->bodysize > conf->maxfs) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: URL %s failed the size check " + "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")", + dobj->name, dobj->bodysize, conf->maxfs); + file_cache_errorcleanup(dobj, r); + return APR_EGENERAL; + } + } + } + + + /* Drop out here if this wasn't the end */ + if (!APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb))) { + return APR_SUCCESS; + } + + if(!copy_file) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: Done caching URL %s, len %" APR_OFF_T_FMT, + dobj->name, dobj->bodysize); + + /* FIXME: Do we really need to check r->no_cache here since we checked + it in the beginning? */ + /* Assume that if we've got an initial size then bucket brigade + was complete and there's no danger in keeping it even if the + connection was aborted */ + if (r->no_cache || (r->connection->aborted && dobj->bodysize_in < 0)) { ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, "disk_cache: Discarding body for URL %s " "because connection has been aborted.", - h->cache_obj->key); + dobj->name); /* Remove the intermediate cache file and return non-APR_SUCCESS */ file_cache_errorcleanup(dobj, r); return APR_EGENERAL; } - if (dobj->file_size < conf->minfs) { + + if (dobj->bodysize < conf->minfs) { ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "cache_disk: URL %s failed the size check " - "(%" APR_OFF_T_FMT "<%" APR_SIZE_T_FMT ")", - h->cache_obj->key, dobj->file_size, conf->minfs); + "disk_cache: URL %s failed the size check " + "(%" APR_OFF_T_FMT " < %" APR_OFF_T_FMT ")", + dobj->name, dobj->bodysize, conf->minfs); /* Remove the intermediate cache file and return non-APR_SUCCESS */ file_cache_errorcleanup(dobj, r); return APR_EGENERAL; } - /* All checks were fine. Move tempfile to final destination */ - /* Link to the perm file, and close the descriptor */ - file_cache_el_final(dobj, r); - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "disk_cache: Body for URL %s cached.", dobj->name); + if(dobj->bodysize_in < 0) { + /* Update header information now that we know the size */ + dobj->bodysize_in = dobj->bodysize; + rv = store_headers(h, r, &(h->cache_obj->info)); + if(rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + return rv; + } + } + else if(dobj->bodysize_in != dobj->bodysize) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: URL %s - body size mismatch: suggested %" + APR_OFF_T_FMT " bodysize %" APR_OFF_T_FMT ")", + dobj->name, dobj->bodysize_in, dobj->bodysize); + file_cache_errorcleanup(dobj, r); + return APR_EGENERAL; + } + } + + rv = apr_file_close(dobj->fd); + if(rv != APR_SUCCESS) { + file_cache_errorcleanup(dobj, r); + return rv; + } + + /* Redirect to cachefile if we copied a plain file */ + if(copy_file) { + rv = replace_brigade_with_cache(h, r, bb); + if(rv != APR_SUCCESS) { + return rv; + } } return APR_SUCCESS; @@ -1056,6 +2628,8 @@ static void *create_config(apr_pool_t *p conf->dirlength = DEFAULT_DIRLENGTH; conf->maxfs = DEFAULT_MAX_FILE_SIZE; conf->minfs = DEFAULT_MIN_FILE_SIZE; + conf->minbgsize = DEFAULT_MIN_BACKGROUND_SIZE; + conf->updtimeout = DEFAULT_UPDATE_TIMEOUT; conf->cache_root = NULL; conf->cache_root_len = 0; @@ -1117,7 +2691,12 @@ static const char { disk_cache_conf *conf = ap_get_module_config(parms->server->module_config, &disk_cache_module); - conf->minfs = atoi(arg); + + if (apr_strtoff(&conf->minfs, arg, NULL, 0) != APR_SUCCESS || + conf->minfs < 0) + { + return "CacheMinFileSize argument must be a non-negative integer representing the min size of a file to cache in bytes."; + } return NULL; } static const char @@ -1125,10 +2704,71 @@ static const char { disk_cache_conf *conf = ap_get_module_config(parms->server->module_config, &disk_cache_module); - conf->maxfs = atoi(arg); + + if (apr_strtoff(&conf->maxfs, arg, NULL, 0) != APR_SUCCESS || + conf->maxfs < 0) + { + return "CacheMaxFileSize argument must be a non-negative integer representing the max size of a file to cache in bytes."; + } + return NULL; } + +static const char +*set_cache_minbgsize(cmd_parms *parms, void *in_struct_ptr, const char *arg) +{ + disk_cache_conf *conf = ap_get_module_config(parms->server->module_config, + &disk_cache_module); + + if (apr_strtoff(&conf->minbgsize, arg, NULL, 0) != APR_SUCCESS || + conf->minbgsize < 0) + { + return "CacheMinBGSize argument must be a non-negative integer representing the min size in bytes for a file to be eligable for background caching"; + } + + return NULL; +} + + +static const char +*set_cache_updtimeout(cmd_parms *parms, void *in_struct_ptr, const char *arg) +{ + apr_int64_t val; + disk_cache_conf *conf = ap_get_module_config(parms->server->module_config, + &disk_cache_module); + + if (apr_strtoff(&val, arg, NULL, 0) != APR_SUCCESS || val < 0) + { + return "CacheUpdateTimeout argument must be a non-negative integer representing the timeout in milliseconds for cache update operations"; + } + + conf->updtimeout = val * 1000; + + return NULL; +} + + +static const char +*set_cache_removedirs(cmd_parms *parms, void *in_struct_ptr, const char *arg) +{ + disk_cache_conf *conf = ap_get_module_config(parms->server->module_config, + &disk_cache_module); + + if (strcasecmp(arg, "on") == 0 || strcasecmp(arg, "true")) { + conf->removedirs = TRUE; + } + else if (strcasecmp(arg, "off") == 0 || strcasecmp(arg, "false")) { + conf->removedirs = FALSE; + } + else { + return "CacheRemoveDirectories argument must be either on, true, off or false"; + } + + return NULL; +} + + static const command_rec disk_cache_cmds[] = { AP_INIT_TAKE1("CacheRoot", set_cache_root, NULL, RSRC_CONF, @@ -1141,6 +2781,12 @@ static const command_rec disk_cache_cmds "The minimum file size to cache a document"), AP_INIT_TAKE1("CacheMaxFileSize", set_cache_maxfs, NULL, RSRC_CONF, "The maximum file size to cache a document"), + AP_INIT_TAKE1("CacheMinBGSize", set_cache_minbgsize, NULL, RSRC_CONF, + "The minimum file size for background caching"), + AP_INIT_TAKE1("CacheUpdateTimeout", set_cache_updtimeout, NULL, RSRC_CONF, + "Timeout in ms for cache updates"), + AP_INIT_TAKE1("CacheRemoveDirectories", set_cache_removedirs, NULL, RSRC_CONF, + "Should we try to remove directories when we remove expired cache files."), {NULL} }; diff -rup dist/modules/cache/mod_disk_cache.h site/modules/cache/mod_disk_cache.h --- dist/modules/cache/mod_disk_cache.h 2006-07-12 05:38:44.000000000 +0200 +++ site/modules/cache/mod_disk_cache.h 2006-08-19 14:13:16.000000000 +0200 @@ -22,12 +22,19 @@ */ #define VARY_FORMAT_VERSION 3 -#define DISK_FORMAT_VERSION 4 +#define DISK_FORMAT_VERSION_OLD 4 +#define DISK_FORMAT_VERSION_OLD2 5 +#define ALIAS_FORMAT_VERSION 6 +#define DISK_FORMAT_VERSION 7 -#define CACHE_HEADER_SUFFIX ".header" -#define CACHE_DATA_SUFFIX ".data" #define CACHE_VDIR_SUFFIX ".vary" +#define CACHE_BUF_SIZE 65536 +#define CACHE_DATA_OFFSET 16384 + +/* How long to sleep before retrying while looping */ +#define CACHE_LOOP_SLEEP 200000 + #define AP_TEMPFILE_PREFIX "/" #define AP_TEMPFILE_BASE "aptmp" #define AP_TEMPFILE_SUFFIX "XXXXXX" @@ -35,22 +42,51 @@ #define AP_TEMPFILE_NAMELEN strlen(AP_TEMPFILE_BASE AP_TEMPFILE_SUFFIX) #define AP_TEMPFILE AP_TEMPFILE_PREFIX AP_TEMPFILE_BASE AP_TEMPFILE_SUFFIX +typedef apr_uint32_t disk_cache_format_t; + typedef struct { - /* Indicates the format of the header struct stored on-disk. */ - apr_uint32_t format; /* The HTTP status code returned for this response. */ - int status; - /* The size of the entity name that follows. */ - apr_size_t name_len; + apr_int32_t status; /* The number of times we've cached this entity. */ - apr_size_t entity_version; + apr_uint32_t entity_version; /* Miscellaneous time values. */ apr_time_t date; apr_time_t expire; apr_time_t request_time; apr_time_t response_time; + apr_time_t lastmod; /* Last-Modified (if present) */ + + /* The body size forced to 64bit to not break when people go from non-LFS + * to LFS builds */ + apr_int64_t bodysize; + /* Where the data is located in the file */ + apr_int64_t bodyoff; + + /* The size of the entity name that follows. */ + apr_uint32_t name_len; + /* The size of the filename that follows, to fill in r->filename */ + apr_uint32_t filename_len; + + /* On disk: + * name_len long string of entity name. + * filename_len long string of filename + */ } disk_cache_info_t; +typedef struct { + apr_uint32_t name_len; + apr_uint32_t filename_len; + apr_time_t expire; + + /* On disk: + * name_len long string of entity name. + * filename_len long string of filename the URL refers to. + */ +} disk_cache_alias_t; + + +/* Don't expose module-related stuff unless needed */ +#ifdef AP_FILTER_H /* * disk_cache_object_t * Pointed to by cache_object_t::vobj @@ -58,18 +94,34 @@ typedef struct { typedef struct disk_cache_object { const char *root; /* the location of the cache directory */ apr_size_t root_len; - char *tempfile; /* temp file tohold the content */ - const char *prefix; - const char *datafile; /* name of file where the data will go */ - const char *hdrsfile; /* name of file where the hdrs will go */ - const char *hashfile; /* Computed hash key for this URI */ - const char *name; /* Requested URI without vary bits - suitable for mortals. */ - const char *key; /* On-disk prefix; URI with Vary bits (if present) */ - apr_file_t *fd; /* data file */ - apr_file_t *hfd; /* headers file */ - apr_file_t *tfd; /* temporary file for data */ - apr_off_t file_size; /* File size of the cached data file */ - disk_cache_info_t disk_info; /* Header information. */ + + /* Temporary file */ + apr_file_t *tfd; + char *tempfile; + + /* Cache file */ + apr_file_t *fd; + const char *cachefile; + + const char *name; /* Requested URI without vary bits - + suitable for mortals. */ + const char *prefix; /* Prefix to deal with Vary headers */ + char *filename; /* Filename of requested URL (if present) */ + const char *aliasfile; /* File that contains the URL->file mapping */ + + apr_off_t bodysize_in; /* Size of body as reported upstreams */ + apr_off_t bodysize; /* File size of the cached body */ + apr_off_t bodyoff; /* At which offset to store the body */ + + int store_body_called; /* Number of times store_body() has executed */ + + int skipstore; /* Set if we should skip storing stuff */ + + int removedirs; /* Set it we should rmdir when doing rm */ + + apr_interval_time_t updtimeout; /* Cache update timeout */ + + disk_cache_info_t disk_info; /* Disk header information. */ } disk_cache_object_t; @@ -82,14 +134,25 @@ typedef struct disk_cache_object { #define DEFAULT_DIRLENGTH 2 #define DEFAULT_MIN_FILE_SIZE 1 #define DEFAULT_MAX_FILE_SIZE 1000000 +/* Background caching disabled by default */ +#define DEFAULT_MIN_BACKGROUND_SIZE DEFAULT_MAX_FILE_SIZE +#define DEFAULT_UPDATE_TIMEOUT apr_time_from_sec(10) typedef struct { const char* cache_root; apr_size_t cache_root_len; int dirlevels; /* Number of levels of subdirectories */ int dirlength; /* Length of subdirectory names */ - apr_size_t minfs; /* minumum file size for cached files */ - apr_size_t maxfs; /* maximum file size for cached files */ + apr_off_t minfs; /* minumum file size for cached files */ + apr_off_t maxfs; /* maximum file size for cached files */ + apr_off_t minbgsize; /* minimum file size to do bg caching */ + apr_interval_time_t updtimeout; /* Cache update timeout */ + int removedirs; /* Should we try to remove directories? */ } disk_cache_conf; +#define CACHE_ENODATA (APR_OS_START_USERERR+1) +#define CACHE_EDECLINED (APR_OS_START_USERERR+2) + +#endif /* AP_FILTER_H */ + #endif /*MOD_DISK_CACHE_H*/ diff -rup dist/support/htcacheclean.c site/support/htcacheclean.c --- dist/support/htcacheclean.c 2006-07-12 05:38:44.000000000 +0200 +++ site/support/htcacheclean.c 2006-08-15 18:15:52.000000000 +0200 @@ -70,10 +70,8 @@ typedef struct _direntry { APR_RING_ENTRY(_direntry) link; int type; /* type of file/fileset: TEMP, HEADER, DATA, HEADERDATA */ - apr_time_t htime; /* headers file modification time */ - apr_time_t dtime; /* body file modification time */ - apr_off_t hsize; /* headers file size */ - apr_off_t dsize; /* body or temporary file size */ + apr_time_t htime; /* file modification time */ + apr_off_t hsize; /* file size */ char *basename; /* file/fileset base name */ } DIRENTRY; @@ -81,11 +79,10 @@ typedef struct _entry { APR_RING_ENTRY(_entry) link; apr_time_t expire; /* cache entry exiration time */ apr_time_t response_time; /* cache entry time of last response to client */ - apr_time_t htime; /* headers file modification time */ - apr_time_t dtime; /* body file modification time */ - apr_off_t hsize; /* headers file size */ - apr_off_t dsize; /* body or temporary file size */ + apr_time_t htime; /* file modification time */ + apr_off_t hsize; /* file size */ char *basename; /* fileset base name */ + char *name; /* entity name */ } ENTRY; @@ -255,10 +252,7 @@ static void delete_entry(char *path, cha /* temp pool, otherwise lots of memory could be allocated */ apr_pool_create(&p, pool); - nextpath = apr_pstrcat(p, path, "/", basename, CACHE_HEADER_SUFFIX, NULL); - apr_file_remove(nextpath, p); - - nextpath = apr_pstrcat(p, path, "/", basename, CACHE_DATA_SUFFIX, NULL); + nextpath = apr_pstrcat(p, path, "/", basename, NULL); apr_file_remove(nextpath, p); apr_pool_destroy(p); @@ -286,7 +280,7 @@ static int process_dir(char *path, apr_p apr_finfo_t info; apr_size_t len; apr_time_t current, deviation; - char *nextpath, *base, *ext, *orig_basename; + char *nextpath, *base, *orig_basename; APR_RING_ENTRY(_direntry) anchor; DIRENTRY *d, *t, *n; ENTRY *e; @@ -329,12 +323,11 @@ static int process_dir(char *path, apr_p if (!base++) { base = d->basename; } - ext = strchr(base, '.'); /* there may be temporary files which may be gone before * processing, always skip these if not in realclean mode */ - if (!ext && !realclean) { + if (!realclean) { if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN) && strlen(base) == AP_TEMPFILE_NAMELEN) { continue; @@ -386,51 +379,25 @@ static int process_dir(char *path, apr_p continue; } - if (!ext) { - if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN) + if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN) && strlen(base) == AP_TEMPFILE_NAMELEN) { - d->basename += skip; - d->type = TEMP; - d->dsize = info.size; - apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d); - } - continue; - } - - if (!strcasecmp(ext, CACHE_HEADER_SUFFIX)) { - *ext = '\0'; d->basename += skip; - /* if a user manually creates a '.header' file */ - if (d->basename[0] == '\0') { - continue; - } - t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING); - if (t) { - d = t; - } - d->type |= HEADER; - d->htime = info.mtime; + d->type = TEMP; d->hsize = info.size; apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d); continue; } - if (!strcasecmp(ext, CACHE_DATA_SUFFIX)) { - *ext = '\0'; - d->basename += skip; - /* if a user manually creates a '.data' file */ - if (d->basename[0] == '\0') { - continue; - } - t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING); - if (t) { - d = t; - } - d->type |= DATA; - d->dtime = info.mtime; - d->dsize = info.size; - apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d); - } + /* Assume that everything else are cachefiles */ + d->basename += skip; + t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING); + if (t) { + d = t; + } + d->type |= HEADERDATA; + d->htime = info.mtime; + d->hsize = info.size; + apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d); } if (interrupted) { @@ -448,8 +415,7 @@ static int process_dir(char *path, apr_p switch(d->type) { case HEADERDATA: - nextpath = apr_pstrcat(p, path, "/", d->basename, - CACHE_HEADER_SUFFIX, NULL); + nextpath = apr_pstrcat(p, path, "/", d->basename, NULL); if (apr_file_open(&fd, nextpath, APR_FOPEN_READ | APR_FOPEN_BINARY, APR_OS_DEFAULT, p) == APR_SUCCESS) { len = sizeof(format); @@ -464,18 +430,26 @@ static int process_dir(char *path, apr_p if (apr_file_read_full(fd, &disk_info, len, &len) == APR_SUCCESS) { - apr_file_close(fd); e = apr_palloc(pool, sizeof(ENTRY)); APR_RING_INSERT_TAIL(&root, e, _entry, link); e->expire = disk_info.expire; e->response_time = disk_info.response_time; e->htime = d->htime; - e->dtime = d->dtime; e->hsize = d->hsize; - e->dsize = d->dsize; e->basename = apr_palloc(pool, strlen(d->basename) + 1); strcpy(e->basename, d->basename); + e->name = apr_palloc(pool, disk_info.name_len+1); + if(apr_file_read_full(fd, e->name, + disk_info.name_len, NULL) + == APR_SUCCESS) + { + e->name[disk_info.name_len] = '\0'; + } + else { + e->name = "UNKNOWN"; + } + apr_file_close(fd); break; } else { @@ -484,11 +458,11 @@ static int process_dir(char *path, apr_p } else if (format == VARY_FORMAT_VERSION) { /* This must be a URL that added Vary headers later, - * so kill the orphaned .data file + * so kill the orphaned cachefile */ apr_file_close(fd); apr_file_remove(apr_pstrcat(p, path, "/", d->basename, - CACHE_DATA_SUFFIX, NULL), + NULL), p); } } @@ -509,56 +483,6 @@ static int process_dir(char *path, apr_p || d->htime > current + deviation) { delete_entry(path, d->basename, p); unsolicited += d->hsize; - unsolicited += d->dsize; - } - break; - - /* single data and header files may be deleted either in realclean - * mode or if their modification timestamp is not within a - * specified positive or negative offset to the current time. - * this handling is necessary due to possible race conditions - * between apache and this process - */ - case HEADER: - current = apr_time_now(); - nextpath = apr_pstrcat(p, path, "/", d->basename, - CACHE_HEADER_SUFFIX, NULL); - if (apr_file_open(&fd, nextpath, APR_FOPEN_READ | APR_FOPEN_BINARY, - APR_OS_DEFAULT, p) == APR_SUCCESS) { - len = sizeof(format); - if (apr_file_read_full(fd, &format, len, - &len) == APR_SUCCESS) { - if (format == VARY_FORMAT_VERSION) { - apr_time_t expires; - - len = sizeof(expires); - - apr_file_read_full(fd, &expires, len, &len); - - apr_file_close(fd); - - if (expires < current) { - delete_entry(path, d->basename, p); - } - break; - } - } - apr_file_close(fd); - } - - if (realclean || d->htime < current - deviation - || d->htime > current + deviation) { - delete_entry(path, d->basename, p); - unsolicited += d->hsize; - } - break; - - case DATA: - current = apr_time_now(); - if (realclean || d->dtime < current - deviation - || d->dtime > current + deviation) { - delete_entry(path, d->basename, p); - unsolicited += d->dsize; } break; @@ -567,7 +491,7 @@ static int process_dir(char *path, apr_p */ case TEMP: delete_file(path, d->basename, p); - unsolicited += d->dsize; + unsolicited += d->hsize; break; } } @@ -604,7 +528,6 @@ static void purge(char *path, apr_pool_t e != APR_RING_SENTINEL(&root, _entry, link); e = APR_RING_NEXT(e, link)) { sum += e->hsize; - sum += e->dsize; entries++; } @@ -623,10 +546,9 @@ static void purge(char *path, apr_pool_t for (e = APR_RING_FIRST(&root); e != APR_RING_SENTINEL(&root, _entry, link) && !interrupted;) { n = APR_RING_NEXT(e, link); - if (e->response_time > now || e->htime > now || e->dtime > now) { + if (e->response_time > now || e->htime > now ) { delete_entry(path, e->basename, pool); sum -= e->hsize; - sum -= e->dsize; entries--; APR_RING_REMOVE(e, link); if (sum <= max) { @@ -649,8 +571,10 @@ static void purge(char *path, apr_pool_t n = APR_RING_NEXT(e, link); if (e->expire != APR_DATE_BAD && e->expire < now) { delete_entry(path, e->basename, pool); + if(verbose > 1) { + apr_file_printf(errfile, "Expired: %s\n", e->name); + } sum -= e->hsize; - sum -= e->dsize; entries--; APR_RING_REMOVE(e, link); if (sum <= max) { @@ -678,14 +602,19 @@ static void purge(char *path, apr_pool_t for (e = APR_RING_NEXT(oldest, link); e != APR_RING_SENTINEL(&root, _entry, link); e = APR_RING_NEXT(e, link)) { - if (e->dtime < oldest->dtime) { + if (e->htime < oldest->htime) { oldest = e; } } delete_entry(path, oldest->basename, pool); + if(verbose > 1) { + apr_file_printf(errfile, "Old: (%d s) " + "(%" APR_OFF_T_FMT " b) %s\n", + (int) apr_time_sec(apr_time_now() - oldest->htime), + oldest->hsize, oldest->name); + } sum -= oldest->hsize; - sum -= oldest->dsize; entries--; APR_RING_REMOVE(oldest, link); } @@ -833,10 +762,7 @@ int main(int argc, const char * const ar break; case 'v': - if (verbose) { - usage(); - } - verbose = 1; + verbose++; break; case 'r':