Index: src/java/org/apache/poi/hssf/record/SSTRecord.java =================================================================== RCS file: /home/cvspublic/jakarta-poi/src/java/org/apache/poi/hssf/record/SSTRecord.java,v retrieving revision 1.8 diff -u -r1.8 SSTRecord.java --- src/java/org/apache/poi/hssf/record/SSTRecord.java 26 Jun 2003 13:33:47 -0000 1.8 +++ src/java/org/apache/poi/hssf/record/SSTRecord.java 5 Sep 2003 09:53:45 -0000 @@ -72,6 +72,7 @@ * @author Andrew C. Oliver (acoliver at apache dot org) * @author Marc Johnson (mjohnson at apache dot org) * @author Glen Stampoultzis (glens at apache.org) + * @author Amichai Rothman * * @see org.apache.poi.hssf.record.LabelSSTRecord * @see org.apache.poi.hssf.record.ContinueRecord @@ -206,14 +207,15 @@ * Add a string and assert the encoding (8-bit or 16-bit) to be * used. *

- * USE THIS METHOD AT YOUR OWN RISK. IF YOU FORCE 8-BIT ENCODING, - * YOU MAY CORRUPT YOUR STRING. IF YOU FORCE 16-BIT ENCODING AND - * IT ISN'T NECESSARY, YOU WILL WASTE SPACE WHEN THIS RECORD IS - * WRITTEN OUT. + * Note: If 8-bit encoding is requested but the given string + * cannot be represented in 8-bit, 16-bit encoding will be used. + * On the other hand, requesting 16-bit encoding for a string + * that can be represented using 8-bit, is wasteful of space. * * @param string string to be added * @param useUTF16 if true, forces 16-bit encoding. If false, - * forces 8-bit encoding + * uses 8-bit encoding unless the string cannot + * be represented by 8-bit characters. * * @return the index of that string in the table */ @@ -228,8 +230,7 @@ ucs.setString( str ); ucs.setCharCount( (short) str.length() ); - ucs.setOptionFlags( (byte) ( useUTF16 ? 1 - : 0 ) ); + ucs.setOptionFlags( (byte) ( useUTF16 || ucs.isUncompressedUnicode() ? 1 : 0 ) ); Integer integer = (Integer) field_3_strings.getKeyForValue( ucs ); if ( integer != null ) Index: src/java/org/apache/poi/hssf/record/UnicodeString.java =================================================================== RCS file: /home/cvspublic/jakarta-poi/src/java/org/apache/poi/hssf/record/UnicodeString.java,v retrieving revision 1.11 diff -u -r1.11 UnicodeString.java --- src/java/org/apache/poi/hssf/record/UnicodeString.java 19 Aug 2003 14:07:40 -0000 1.11 +++ src/java/org/apache/poi/hssf/record/UnicodeString.java 5 Sep 2003 09:53:46 -0000 @@ -68,6 +68,7 @@ * @author Andrew C. Oliver * @author Marc Johnson (mjohnson at apache dot org) * @author Glen Stampoultzis (glens at apache.org) + * @author Amichai Rothman * @version 2.0-pre */ @@ -155,13 +156,13 @@ if ((field_2_optionflags & 1) == 0) { try { - field_3_string = new String(data, 3, getCharCount(), + field_3_string = new String(data, 3, getCharCount(), StringUtil.getPreferredEncoding()); } catch (UnsupportedEncodingException e) { // Extract the message out of our encoding // error and then bubble a runtime exception. String errorMessage = e.getMessage(); - + // Make sure the message isn't null if (errorMessage == null) { errorMessage = e.toString(); @@ -264,6 +265,16 @@ public void setString(String string) { field_3_string = string; + // check if using compressed unicode will ruin data + try { + if (!isUncompressedUnicode() && + !getString().equals(new String(getString().getBytes("ISO-8859-1"),"ISO-8859-1"))) { + setOptionFlags((byte)(getOptionFlags() | 0x01)); // if so, make it uncompressed + } + } catch (UnsupportedEncodingException uee) { + // should never happen, since "ISO-8859-1" support is required by the Java specification + } + // adjust length if (getCharCount() < field_3_string.length()) { setCharCount(); @@ -306,48 +317,20 @@ public int serialize(int offset, byte [] data) { - int charsize = 1; - - if (getOptionFlags() == 1) - { - charsize = 2; - } - - // byte[] retval = new byte[ 3 + (getString().length() * charsize)]; + // header fields LittleEndian.putShort(data, 0 + offset, getCharCount()); data[ 2 + offset ] = getOptionFlags(); -// System.out.println("Unicode: We've got "+retval[2]+" for our option flag"); - try { - String unicodeString = new -String(getString().getBytes("Unicode"),"Unicode"); - if (getOptionFlags() == 0) - { - StringUtil.putCompressedUnicode(unicodeString, data, 0x3 + -offset); - } - else - { - StringUtil.putUnicodeLE(unicodeString, data, - 0x3 + offset); - } - } - catch (Exception e) { - if (getOptionFlags() == 0) - { - StringUtil.putCompressedUnicode(getString(), data, 0x3 + - offset); - } - else - { - StringUtil.putUnicodeLE(getString(), data, - 0x3 + offset); - } - } + // compressed/uncompressed string + if (isUncompressedUnicode()) + StringUtil.putUnicodeLE(getString(), data, 0x3 + offset); + else + StringUtil.putCompressedUnicode(getString(), data, 0x3 + offset); + return getRecordSize(); } - private boolean isUncompressedUnicode() + protected boolean isUncompressedUnicode() { return (getOptionFlags() & 0x01) == 1; }