Index: java/org/apache/coyote/http11/Http11InputBuffer.java =================================================================== --- java/org/apache/coyote/http11/Http11InputBuffer.java (revision 1828885) +++ java/org/apache/coyote/http11/Http11InputBuffer.java (working copy) @@ -456,7 +456,8 @@ end = pos; } else if (chr == Constants.QUESTION && parsingRequestLineQPos == -1) { parsingRequestLineQPos = pos; - } else if (HttpParser.isNotRequestTarget(chr)) { + } else if ((parsingRequestLineQPos == -1 && HttpParser.isNotRequestTarget(chr)) + || (parsingRequestLineQPos >= 0 && HttpParser.isNotQuery(chr))) { throw new IllegalArgumentException(sm.getString("iib.invalidRequestTarget")); } } Index: java/org/apache/tomcat/util/http/parser/HttpParser.java =================================================================== --- java/org/apache/tomcat/util/http/parser/HttpParser.java (revision 1828885) +++ java/org/apache/tomcat/util/http/parser/HttpParser.java (working copy) @@ -38,11 +38,15 @@ private static final int ARRAY_SIZE = 128; + private static final boolean USE_URL_LIVING_STANDARD = + Boolean.getBoolean("org.apache.tomcat.util.http.parser.HttpParser.USE_URL_LIVING_STANDARD"); + private static final boolean[] IS_CONTROL = new boolean[ARRAY_SIZE]; private static final boolean[] IS_SEPARATOR = new boolean[ARRAY_SIZE]; private static final boolean[] IS_TOKEN = new boolean[ARRAY_SIZE]; private static final boolean[] IS_HEX = new boolean[ARRAY_SIZE]; private static final boolean[] IS_NOT_REQUEST_TARGET = new boolean[ARRAY_SIZE]; + private static final boolean[] IS_NOT_QUERY = new boolean[ARRAY_SIZE]; private static final boolean[] IS_HTTP_PROTOCOL = new boolean[ARRAY_SIZE]; private static final boolean[] IS_ALPHA = new boolean[ARRAY_SIZE]; private static final boolean[] IS_NUMERIC = new boolean[ARRAY_SIZE]; @@ -72,13 +76,31 @@ IS_HEX[i] = true; } - // Not valid for request target. - // Combination of multiple rules from RFC7230 and RFC 3986. Must be - // ASCII, no controls plus a few additional characters excluded - if (IS_CONTROL[i] || i > 127 || - i == ' ' || i == '\"' || i == '#' || i == '<' || i == '>' || i == '\\' || - i == '^' || i == '`' || i == '{' || i == '|' || i == '}') { - IS_NOT_REQUEST_TARGET[i] = true; + // Not valid for request target and query. + if (USE_URL_LIVING_STANDARD) { + // This complies with the URL living standard: https://url.spec.whatwg.org/#query-state + // If byte is less than 0x21 (!), greater than 0x7E (~), or is 0x22 ("), 0x23 (#), 0x3C (<), or 0x3E (>), append byte, percent encoded, to url’s query. + if (IS_CONTROL[i] || i > 127 || + i == ' ' || i == '\"' || i == '#' || i == '<' || i == '>') { + IS_NOT_QUERY[i] = true; + } + // The C0 control percent-encode set are the C0 controls and all code points greater than U+007E (~). + // The fragment percent-encode set is the C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+003C (<), U+003E (>), and U+0060 (`). + // The path percent-encode set is the fragment percent-encode set and U+0023 (#), U+003F (?), U+007B ({), and U+007D (}). + if (IS_CONTROL[i] || i > 127 || + i == ' ' || i == '\"' || i == '#' || i == '<' || i == '>' || + i == '`' || i == '{' || i == '}') { + IS_NOT_REQUEST_TARGET[i] = true; + } + } else { + // Combination of multiple rules from RFC7230 and RFC 3986. Must be + // ASCII, no controls plus a few additional characters excluded + if (IS_CONTROL[i] || i > 127 || + i == ' ' || i == '\"' || i == '#' || i == '<' || i == '>' || i == '\\' || + i == '^' || i == '`' || i == '{' || i == '|' || i == '}') { + IS_NOT_REQUEST_TARGET[i] = true; + IS_NOT_QUERY[i] = true; + } } // Not valid for HTTP protocol @@ -163,6 +185,16 @@ } + public static boolean isNotQuery(int c) { + // Fast for valid query characters, slower for some incorrect ones + try { + return IS_NOT_QUERY[c]; + } catch (ArrayIndexOutOfBoundsException ex) { + return true; + } + } + + public static boolean isHttpProtocol(int c) { // Fast for valid HTTP protocol characters, slower for some incorrect // ones Index: java/org/apache/tomcat/util/net/TLSClientHelloExtractor.java =================================================================== --- java/org/apache/tomcat/util/net/TLSClientHelloExtractor.java (revision 1828885) +++ java/org/apache/tomcat/util/net/TLSClientHelloExtractor.java (working copy) @@ -271,7 +271,7 @@ // Read the target while (chr != ' ' && chr != '\t') { - if (HttpParser.isNotRequestTarget(chr) || !bb.hasRemaining()) { + if (HttpParser.isNotQuery(chr) || !bb.hasRemaining()) { return false; } chr = bb.get();