--- java/org/apache/catalina/connector/Request.java (revision 1139393) +++ java/org/apache/catalina/connector/Request.java (working copy) @@ -1516,15 +1516,12 @@ if (usingReader) return; - - // Ensure that the specified encoding is valid - byte buffer[] = new byte[1]; - buffer[0] = (byte) 'a'; - String dummy = new String(buffer, enc); + // Confirm that the encoding name is valid + B2CConverter.getCharset(enc); + // Save the validated encoding coyoteRequest.setCharacterEncoding(enc); - } --- java/org/apache/tomcat/util/buf/B2CConverter.java (revision 1139393) +++ java/org/apache/tomcat/util/buf/B2CConverter.java (working copy) @@ -22,6 +22,10 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; /** Efficient conversion of bytes to character . * @@ -39,7 +43,36 @@ private static org.apache.juli.logging.Log log= org.apache.juli.logging.LogFactory.getLog( B2CConverter.class ); + + private static final Map encodingToCharsetCache = + new HashMap(); + static { + for (Charset charset: Charset.availableCharsets().values()) { + encodingToCharsetCache.put( + charset.name().toLowerCase(Locale.US), charset); + for (String alias : charset.aliases()) { + encodingToCharsetCache.put( + alias.toLowerCase(Locale.US), charset); + } + } + } + + public static Charset getCharset(String enc) + throws UnsupportedEncodingException { + + // Encoding names should all be ASCII + String lowerCaseEnc = enc.toLowerCase(Locale.US); + + Charset charset = encodingToCharsetCache.get(lowerCaseEnc); + + if (charset == null) { + // Pre-population of the cache means this must be invalid + throw new UnsupportedEncodingException(enc); + } + return charset; + } + private IntermediateInputStream iis; private ReadConvertor conv; private String encoding; @@ -114,7 +147,7 @@ { // destroy the reader/iis iis=new IntermediateInputStream(); - conv=new ReadConvertor( iis, encoding ); + conv=new ReadConvertor( iis, getCharset(encoding) ); } private final int debug=0; @@ -192,10 +225,9 @@ /** Create a converter. */ - public ReadConvertor( IntermediateInputStream in, String enc ) - throws UnsupportedEncodingException + public ReadConvertor( IntermediateInputStream in, Charset charset ) { - super( in, enc ); + super( in, charset ); } /** Overriden - will do nothing but reset internal state. --- java/org/apache/tomcat/util/buf/ByteChunk.java (revision 1139393) +++ java/org/apache/tomcat/util/buf/ByteChunk.java (working copy) @@ -19,6 +19,8 @@ import java.io.IOException; import java.io.Serializable; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; /* * In a server it is very important to be able to operate on @@ -95,7 +97,12 @@ 8859_1, and this object is used mostly for servlets. */ public static final String DEFAULT_CHARACTER_ENCODING="ISO-8859-1"; - + + /** Default Charset to use for interpreting byte[] as as String + */ + public static final Charset DEFAULT_CHARSET = + Charset.forName(DEFAULT_CHARACTER_ENCODING); + // byte[] private byte[] buff; @@ -493,8 +500,14 @@ public String toStringInternal() { String strValue=null; try { - if( enc==null ) enc=DEFAULT_CHARACTER_ENCODING; - strValue = new String( buff, start, end-start, enc ); + Charset charset; + if (enc == null) { + charset = DEFAULT_CHARSET; + } else { + charset = B2CConverter.getCharset(enc); + } + strValue = charset.decode( + ByteBuffer.wrap(buff, start, end-start)).toString(); /* Does not improve the speed too much on most systems, it's safer to use the "clasical" new String().