Bug 40196

Summary: EILSEQ handling, NoImplicitAdd should work, and so on...
Product: Apache httpd-2 Reporter: Tsuyoshi SASAMOTO <nazonazo>
Component: mod_charset_liteAssignee: Apache HTTPD Bugs Mailing List <bugs>
Status: NEW ---    
Severity: normal    
Priority: P3    
Version: 2.5-HEAD   
Target Milestone: ---   
Hardware: All   
OS: All   

Description Tsuyoshi SASAMOTO 2006-08-06 15:43:02 UTC
1. Add options of CharsetOptions: IgnoreIlSeq, NotIgnoreIlSeq;
   if IgnoreIlSeq is specified, translation errors caused by illegal
   byte sequences are ignored to prevent the connection being aborted.
   This is similar to "-c" option of the iconv(1).

2. It's still necessary for NoImplicitAdd to work, when only either
   input or output is required to be filtered, but not both.

3. Catch only forward proxy requests, but not reverse proxy ones.

4. It's better to allocate a slightly large buffer from the heap,
   rather than from the stack.

# It seems Rev. 380232 (Remove Content-Length) isn't backported to 2.2,
# shouldn't it?

--- httpd/httpd/trunk/modules/filters/mod_charset_lite.c	Tue Jul 11 20:33:53 2006
+++ httpd/httpd/trunk/modules/filters/mod_charset_lite.c	Tue Jul 11 20:33:53 2006
@@ -76,6 +76,8 @@
     const char *charset_default; /* how to ship on wire */
     /** module does ap_add_*_filter()? */
     enum {IA_INIT, IA_IMPADD, IA_NOIMPADD} implicit_add;
+    /** whether to ignore EILSEQ */
+    enum {II_INIT, II_IGNOREILSEQ, II_NOTIGNOREILSEQ} ignore_ilseq;
 } charset_dir_t;
 
 /* charset_filter_ctx_t is created for each filter instance; because the same
@@ -86,6 +88,11 @@
 typedef struct charset_filter_ctx_t {
     apr_xlate_t *xlate;
     int is_sb;              /* single-byte translation? */
+    apr_status_t (*xlate_conv_buffer)(apr_xlate_t *,
+                                      const char *,
+                                      apr_size_t *,
+                                      char *,
+                                      apr_size_t *);
     charset_dir_t *dc;
     ees_t ees;              /* extended error status */
     apr_size_t saved;
@@ -92,7 +99,7 @@
     char buf[FATTEST_CHAR]; /* we want to be able to build a complete char here */
     int ran;                /* has filter instance run before? */
     int noop;               /* should we pass brigades through unchanged? */
-    char *tmp;              /* buffer for input filtering */
+    char *tmp;              /* buffer for input & output filtering */
     apr_bucket_brigade *bb; /* input buckets we couldn't finish translating */
 } charset_filter_ctx_t;
 
@@ -138,6 +145,8 @@
         over->charset_source ? over->charset_source : base->charset_source;
     a->implicit_add =
         over->implicit_add != IA_INIT ? over->implicit_add : base->implicit_add;
+    a->ignore_ilseq =
+        over->ignore_ilseq != II_INIT ? over->ignore_ilseq : base->ignore_ilseq;
     return a;
 }
 
@@ -176,6 +185,12 @@
     else if (!strcasecmp(flag, "NoImplicitAdd")) {
         dc->implicit_add = IA_NOIMPADD;
     }
+    else if (!strcasecmp(flag, "IgnoreIlSeq")) {
+        dc->ignore_ilseq = II_IGNOREILSEQ;
+    }
+    else if (!strcasecmp(flag, "NotIgnoreIlSeq")) {
+        dc->ignore_ilseq = II_NOTIGNOREILSEQ;
+    }
     else if (!strncasecmp(flag, "DebugLevel=", 11)) {
         dc->debug = atoi(flag + 11);
     }
@@ -189,6 +204,32 @@
     return NULL;
 }
 
+/* xlate_conv_buffer_no_eilseq() calls apr_xlate_conv_buffer(),
+ * and traps APR_EINVAL (EILSEQ) to ignore it.
+ */
+static apr_status_t xlate_conv_buffer_no_eilseq(apr_xlate_t *convset,
+                                                const char *inbuf,
+                                                apr_size_t *inbytes_left,
+                                                char *outbuf,
+                                                apr_size_t *outbytes_left)
+{
+    apr_status_t rv;
+
+    while (inbytes_left && outbytes_left) {
+        apr_size_t inbytes = *inbytes_left, outbytes = *outbytes_left;
+
+        if ((rv = apr_xlate_conv_buffer(convset, inbuf, inbytes_left,
+                                        outbuf, outbytes_left)) != APR_EINVAL)
/* EILSEQ */
+            break;
+        if (*inbytes_left)
+            inbuf += inbytes - --(*inbytes_left);
+        if (*outbytes_left)
+            (outbuf += outbytes - --(*outbytes_left))[-1] = '?';
+    }
+
+    return rv != APR_EINVAL ? rv : APR_SUCCESS;
+}
+
 /* find_code_page() is a fixup hook that decides if translation should be
  * enabled; if so, it sets up request data for use by the filter registration
  * hook so that it knows what to do
@@ -226,8 +267,8 @@
         return DECLINED;
     }
 
-    /* catch proxy requests */
-    if (r->proxyreq) return DECLINED;
+    /* catch forward proxy requests */
+    if (r->proxyreq == PROXYREQ_PROXY) return DECLINED;
     /* mod_rewrite indicators */
     if (!strncmp(r->filename, "redirect:", 9)) return DECLINED;
     if (!strncmp(r->filename, "gone:", 5)) return DECLINED;
@@ -329,6 +370,8 @@
         if (apr_xlate_sb_get(input_ctx->xlate, &input_ctx->is_sb) != APR_SUCCESS) {
             input_ctx->is_sb = 0;
         }
+        input_ctx->xlate_conv_buffer = dc->ignore_ilseq == II_IGNOREILSEQ
+                                       ? xlate_conv_buffer_no_eilseq :
apr_xlate_conv_buffer;
     }
 
     return DECLINED;
@@ -369,7 +412,7 @@
     charset_dir_t *dc = ap_get_module_config(r->per_dir_config,
                                              &charset_lite_module);
 
-    if (reqinfo) {
+    if (reqinfo && dc->implicit_add != IA_NOIMPADD) {
         if (reqinfo->output_ctx && !configured_on_output(r,
XLATEOUT_FILTER_NAME)) {
             ap_add_output_filter(XLATEOUT_FILTER_NAME, reqinfo->output_ctx, r,
                                  r->connection);
@@ -492,11 +535,11 @@
         ++*cur_str;
         --*cur_len;
         tmp_input_len = ctx->saved;
-        rv = apr_xlate_conv_buffer(ctx->xlate,
-                                   ctx->buf,
-                                   &tmp_input_len,
-                                   *out_str,
-                                   out_len);
+        rv = ctx->xlate_conv_buffer(ctx->xlate,
+                                    ctx->buf,
+                                    &tmp_input_len,
+                                    *out_str,
+                                    out_len);
     } while (rv == APR_INCOMPLETE && *cur_len);
 
     if (rv == APR_SUCCESS) {
@@ -737,10 +780,10 @@
             else {
                 apr_size_t old_buffer_avail = *buffer_avail;
                 apr_size_t old_bucket_avail = bucket_avail;
-                rv = apr_xlate_conv_buffer(ctx->xlate,
-                                           bucket, &bucket_avail,
-                                           buffer,
-                                           buffer_avail);
+                rv = ctx->xlate_conv_buffer(ctx->xlate,
+                                            bucket, &bucket_avail,
+                                            buffer,
+                                            buffer_avail);
                 buffer  += old_buffer_avail - *buffer_avail;
                 bucket  += old_bucket_avail - bucket_avail;
 
@@ -803,7 +846,6 @@
     apr_bucket *dptr, *consumed_bucket;
     const char *cur_str;
     apr_size_t cur_len, cur_avail;
-    char tmp[OUTPUT_XLATE_BUF_SIZE];
     apr_size_t space_avail;
     int done;
     apr_status_t rv = APR_SUCCESS;
@@ -869,6 +911,8 @@
                 if (apr_xlate_sb_get(ctx->xlate, &ctx->is_sb) != APR_SUCCESS) {
                     ctx->is_sb = 0;
                 }
+                ctx->xlate_conv_buffer = dc->ignore_ilseq == II_IGNOREILSEQ
+                                         ? xlate_conv_buffer_no_eilseq :
apr_xlate_conv_buffer;
             }
         }
         else {
@@ -891,11 +935,14 @@
     if (!ctx->ran) {  /* filter never ran before */
         chk_filter_chain(f);
         ctx->ran = 1;
-        if (!ctx->noop && !ctx->is_sb) {
-            /* We're not converting between two single-byte charsets, so unset
-             * Content-Length since it is unlikely to remain the same.
-             */
-            apr_table_unset(f->r->headers_out, "Content-Length");
+        if (!ctx->noop) {
+            ctx->tmp = apr_palloc(f->r->pool, OUTPUT_XLATE_BUF_SIZE);
+            if (!ctx->is_sb) {
+                /* We're not converting between two single-byte charsets, so unset
+                 * Content-Length since it is unlikely to remain the same.
+                 */
+                apr_table_unset(f->r->headers_out, "Content-Length");
+            }
         }
     }
 
@@ -906,7 +953,7 @@
     dptr = APR_BRIGADE_FIRST(bb);
     done = 0;
     cur_len = 0;
-    space_avail = sizeof(tmp);
+    space_avail = OUTPUT_XLATE_BUF_SIZE;
     consumed_bucket = NULL;
     while (!done) {
         if (!cur_len) { /* no bytes left to process in the current bucket... */
@@ -942,7 +989,7 @@
             dptr = APR_BUCKET_NEXT(dptr); /* get ready for when we access the
                                           * next bucket */
         }
-        /* Try to fill up our tmp buffer with translated data. */
+        /* Try to fill up our ctx->tmp buffer with translated data. */
         cur_avail = cur_len;
 
         if (cur_len) { /* maybe we just hit the end of a pipe (len = 0) ? */
@@ -952,15 +999,15 @@
                  */
                 char *tmp_tmp;
 
-                tmp_tmp = tmp + sizeof(tmp) - space_avail;
+                tmp_tmp = ctx->tmp + OUTPUT_XLATE_BUF_SIZE - space_avail;
                 rv = finish_partial_char(ctx,
                                          &cur_str, &cur_len,
                                          &tmp_tmp, &space_avail);
             }
             else {
-                rv = apr_xlate_conv_buffer(ctx->xlate,
-                                           cur_str, &cur_avail,
-                                           tmp + sizeof(tmp) - space_avail,
&space_avail);
+                rv = ctx->xlate_conv_buffer(ctx->xlate,
+                                            cur_str, &cur_avail,
+                                            ctx->tmp + OUTPUT_XLATE_BUF_SIZE -
space_avail, &space_avail);
 
                 /* Update input ptr and len after consuming some bytes */
                 cur_str += cur_len - cur_avail;
@@ -985,19 +1032,19 @@
             /* It is time to flush, as there is not enough space left in the
              * current output buffer to bother with converting more data.
              */
-            rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
+            rv = send_downstream(f, ctx->tmp, OUTPUT_XLATE_BUF_SIZE - space_avail);
             if (rv != APR_SUCCESS) {
                 done = 1;
             }
 
-            /* tmp is now empty */
-            space_avail = sizeof(tmp);
+            /* ctx->tmp is now empty */
+            space_avail = OUTPUT_XLATE_BUF_SIZE;
         }
     }
 
     if (rv == APR_SUCCESS) {
-        if (space_avail < sizeof(tmp)) { /* gotta write out what we converted */
-            rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
+        if (space_avail < OUTPUT_XLATE_BUF_SIZE) { /* gotta write out what we
converted */
+            rv = send_downstream(f, ctx->tmp, OUTPUT_XLATE_BUF_SIZE - space_avail);
         }
     }
     if (rv == APR_SUCCESS) {
@@ -1136,7 +1183,7 @@
                     add_charset_options,
                     NULL,
                     OR_FILEINFO,
-                    "valid options: ImplicitAdd, NoImplicitAdd, DebugLevel=n"),
+                    "valid options: ImplicitAdd, NoImplicitAdd, IgnoreIlSeq,
NotIgnoreIlSeq, DebugLevel=n"),
     {NULL}
 };
 
--- httpd/httpd/trunk/docs/manual/mod/mod_charset_lite.xml	Tue Jul 11 20:55:32 2006
+++ httpd/httpd/trunk/docs/manual/mod/mod_charset_lite.xml	Tue Jul 11 20:55:32 2006
@@ -158,7 +158,7 @@
 <name>CharsetOptions</name>
 <description>Configures charset translation behavior</description>
 <syntax>CharsetOptions <var>option</var> [<var>option</var>] ...</syntax>
-<default>CharsetOptions DebugLevel=0 NoImplicitAdd</default>
+<default>CharsetOptions DebugLevel=0 ImplicitAdd NotIgnoreIlSeq</default>
 <contextlist><context>server config</context>
 <context>virtual host</context><context>directory</context>
 <context>.htaccess</context>
@@ -193,6 +193,15 @@
       >AddOutputFilter</directive> directive, <code>NoImplicitAdd</code>
       should be specified so that <module>mod_charset_lite</module>
       doesn't add its filter.</dd>
+
+      <dt><code>IgnoreIlSeq | NotIgnoreIlSeq</code></dt>
+
+      <dd>The <code>IgnoreIlSeq</code> keyword specifies that
+      <module>mod_charset_lite</module> should ignore translation
+      errors caused by illegal byte sequences of the input buffer.
+      This is similar to &quot;-c&quot; option of the iconv(1) program.
+      If <code>NotIgnoreIlSeq</code> is specified, an illegal byte
+      sequence may cause the connection to be aborted.</dd>
     </dl>
 </usage>
 </directivesynopsis>