*** misc/icu/source/common/brkiter.cpp Thu Aug 8 10:39:14 2002 --- misc/build/icu/source/common/brkiter.cpp Tue May 27 15:21:32 2003 *************** *** 39,51 **** // Creates a break iterator for word breaks. BreakIterator* ! BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) { // WARNING: This routine is currently written specifically to handle only the // default rules files and the alternate rules files for Thai. This function // will have to be made fully general at some time in the future! BreakIterator* result = NULL; - const char* filename = "word"; if (U_FAILURE(status)) return NULL; --- 39,50 ---- // Creates a break iterator for word breaks. BreakIterator* ! BreakIterator::createWordInstance(const Locale& key, const char* filename, UErrorCode& status) { // WARNING: This routine is currently written specifically to handle only the // default rules files and the alternate rules files for Thai. This function // will have to be made fully general at some time in the future! BreakIterator* result = NULL; if (U_FAILURE(status)) return NULL; *************** *** 78,83 **** --- 77,88 ---- } return result; + } + + BreakIterator* + BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) + { + return createWordInstance(key, "word", status); } // ------------------------------------- *** misc/icu/source/common/unicode/brkiter.h Thu Aug 8 10:39:14 2002 --- misc/build/icu/source/common/unicode/brkiter.h Tue May 27 15:21:32 2003 *************** *** 348,353 **** --- 348,355 ---- */ static BreakIterator* createWordInstance(const Locale& where, UErrorCode& status); + static BreakIterator* createWordInstance(const Locale& where, const char* filename, + UErrorCode& status); /** * Create BreakIterator for line-breaks using specified locale. *** misc/icu/source/config/mh-linux Fri Jan 25 04:35:04 2002 --- misc/build/icu/source/config/mh-linux Tue May 27 15:21:32 2003 *************** *** 22,27 **** --- 22,34 ---- LD_RPATH= LD_RPATH_PRE = -Wl,-rpath, + ## Force RPATH=$ORIGIN to locate own dependencies w/o need for LD_LIBRARY_PATH + ENABLE_RPATH=YES + RPATHLDFLAGS=${LD_RPATH_PRE}'$$ORIGIN' + + #SH# ENABLE_RPATH=YES + #SH# RPATHLDFLAGS="${LD_RPATH_PRE}'$$ORIGIN'" + ## Compiler switch to embed a library name LD_SONAME = -Wl,-soname -Wl,$(MIDDLE_SO_TARGET) *** misc/icu/source/config/mh-solaris Tue Jul 30 12:46:10 2002 --- misc/build/icu/source/config/mh-solaris Tue May 27 15:21:32 2003 *************** *** 23,38 **** ## Commands to link ## For Sun Workshop, use CC to link to bring in C++ runtime ! LINK.c= $(CXX) $(CXXFLAGS) $(LDFLAGS) ! LINK.cc= $(CXX) $(CXXFLAGS) $(LDFLAGS) ## Commands to make a shared library SHLIB.c= $(CC) $(CFLAGS) $(LDFLAGS) -G ! SHLIB.cc= $(CXX) $(CXXFLAGS) $(LDFLAGS) -G ## Compiler switch to embed a runtime search path LD_RPATH= -R LD_RPATH_PRE= ## Compiler switch to embed a library name LD_SONAME = -h $(MIDDLE_SO_TARGET) --- 23,46 ---- ## Commands to link ## For Sun Workshop, use CC to link to bring in C++ runtime ! ## For Sun Workshop, -norunpath stops compiler to record a useless RPATH ! LINK.c= $(CXX) $(CXXFLAGS) $(LDFLAGS) -norunpath ! LINK.cc= $(CXX) $(CXXFLAGS) $(LDFLAGS) -norunpath ## Commands to make a shared library SHLIB.c= $(CC) $(CFLAGS) $(LDFLAGS) -G ! SHLIB.cc= $(CXX) $(CXXFLAGS) $(LDFLAGS) -G -norunpath ## Compiler switch to embed a runtime search path LD_RPATH= -R LD_RPATH_PRE= + + ## Force RPATH=$ORIGIN to locate own dependencies w/o need for LD_LIBRARY_PATH + ENABLE_RPATH=YES + RPATHLDFLAGS=${LD_RPATH}'$$ORIGIN' + + #SH# ENABLE_RPATH=YES + #SH# RPATHLDFLAGS="${LD_RPATH}'$$ORIGIN'" ## Compiler switch to embed a library name LD_SONAME = -h $(MIDDLE_SO_TARGET) *** misc/icu/source/data/Makefile.in Fri Aug 16 03:08:02 2002 --- misc/build/icu/source/data/Makefile.in Tue May 27 15:21:32 2003 *************** *** 127,133 **** ## BRK files # ALL of these files can be deleted (the following BRK files) - they are copied ! BRK_FILES_SHORT=char.brk line.brk line_th.brk sent.brk word.brk title.brk word_th.brk BRK_FILES=$(BRK_FILES_SHORT:%=$(BUILDDIR)/$(ICUDT)%) # don't include thaidict.brk - it goes into a resource bundle - plus it isn't deleted --- 127,133 ---- ## BRK files # ALL of these files can be deleted (the following BRK files) - they are copied ! BRK_FILES_SHORT=char.brk line.brk line_th.brk sent.brk word.brk edit_word.brk dict_word.brk count_word.brk title.brk word_th.brk BRK_FILES=$(BRK_FILES_SHORT:%=$(BUILDDIR)/$(ICUDT)%) # don't include thaidict.brk - it goes into a resource bundle - plus it isn't deleted *** misc/icu/source/data/brkitr/count_word.txt Tue May 27 15:41:24 2003 --- misc/build/icu/source/data/brkitr/count_word.txt Tue May 27 15:21:32 2003 *************** *** 1 **** ! dummy --- 1,127 ---- ! # ! # Copyright (C) 2002, International Business Machines Corporation and others. ! # All Rights Reserved. ! # ! # file: word.txt ! # ! # ICU Word Break Rules ! # See Unicode Technical Report #29. ! # These rules are based on the proposed draft dated 2002-08-06 ! # ! ! ! ! #################################################################################### ! # ! # Definitions imported from Line Break Rules. ! # ! #################################################################################### ! $Numeric = [ \u0030-\u0039 \u0660-\u0669 \u06F0-\u06F9 \u0966-\u096F \u09E6-\u09EF ! \u0A66-\u0A6F \u0AE6-\u0AEF \u0B66-\u0B6F \u0BE7-\u0BEF \u0C66-\u0C6F ! \u0CE6-\u0CEF \u0D66-\u0D6F \u0E50-\u0E59 \u0ED0-\u0ED9 \u0F20-\u0F29 ! \u1040-\u1049 \u1369-\u1371 \u17E0-\u17E9 \u1810-\u1819 \U0001D7CE-\U0001D7FF]; ! ! ! ! #################################################################################### ! # ! # Definitions imported from Character Break Rules. ! # ! #################################################################################### ! # ! # Character Class Definitions. ! # The names are those from TR29. ! # ! $Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]]; ! ! # Note on $Extend: Earlier versions of TR29 included Mc characters. ! # To avoid test breakage, Mc is still included for the time being. ! # $Extend = [[:Mn:] [:Me:] \uff9e-\uff9f]; # FF9E..FF9F ; Other_Grapheme_Extend ! $Extend = [[:Mn:] [:Me:] [:Mc:] \uff9e-\uff9f]; # FF9E..FF9F ; Other_Grapheme_Extend ! ! ! ! ! #################################################################################### ! # ! # Word Break Rules. Definitions and Rules specific to word break begin Here. ! # ! #################################################################################### ! ! $Katakana = [[:Kana:] \u30fc \uff70 \uff9e-\uff9f]; ! $Hiragana = [[:Hira:]]; ! $Letter = [[[:Alphabetic:] \u02b9-\u02ba \u02c2-\u02cf \u02d2-\u02df \u02e5-\u02ed \u05f3] - ! [[:THAI:] [:LAO:] $Hiragana $Katakana ]]; ! $Format = [[:Cf:]]; ! ! $MidLetter = [\u0027 \u00ad \u05f4 \u2019]; ! ! $MidNumLet = [\u002e]; ! ! ! # From Line Break, IS - Numeric Separator (Infix) ! # $IS = [\u002c \u002e \u003a \u003b \u0589]; ! $MidNum = [\u002c \u003b \u0589]; ! ! # ! # "Extended" definitions. Classes of characters including trailing combining chars and, ! # for types of chars that can appear in the interior of a word only, ! # trailing format characters. ! # ! $LetterEx = $Letter $Extend*; ! $NumericEx = $Numeric $Extend*; ! $MidNumExF = $MidNum $Extend* $Format*; ! $MidNumLetExF = $MidNumLet $Extend* $Format*; ! $MidLetterExF = $MidLetter $Extend* $Format*; ! ! $word_pad=[[:P:][:S:][:Z:][:C:]]; ! ! # ! # Numbers. Rules 6, 9, 10 form the TR. ! # ! $NumberSequence = $NumericEx ($Format* ($MidNumExF | $MidNumLetExF)? $NumericEx)*; ! $NumberSequence $word_pad* {100}; ! ! # ! # Words. Alpha-numerics. Rule 3 - 10 ! # - must include at least one letter. ! # - may include both letters and numbers. ! # - may include MideLetter, MidNumber punctuation. ! # ! $LetterSequence = $LetterEx ($Format* ($MidLetterExF | $MidNumLetExF)? $LetterEx)*; ! $NumberSequence? $LetterSequence ($NumberSequence | $LetterSequence)* $word_pad* {200}; ! ! # ! # Hiragana and Katakana ! # ! $Hiragana $Extend* {300}; ! $Katakana $Extend* ($Format* $Katakana $Extend*)* {300}; ! ! # ! # Ideographic Characters. Stand by themselves as words. ! # ! # [:IDEOGRAPHIC:] $Extend* $word_pad* {400}; ! ! # ! # Everything Else, with no tag. ! # Non-Control chars combine with $Extend (combining) chars. ! # Controls are returned by themselves. ! # ! [^$Control] $Extend* $word_pad*; ! \r\n; ! .; ! ! # ! # Reverse Rules. Back up over any of the chars that can group together. ! # (Reverse rules do not need to be exact; they can back up too far, ! # but must back up at least enough, and must stop on a boundary.) ! # ! ! # NonStarters are the set of all characters that can appear at the 2nd - nth position of ! # a word. (They may also be the first.) The reverse rule skips over these, until it ! # reaches something that can only be the start (and probably only) char in a "word". ! # A space or punctuation meets the test. ! # ! $NonStarters = [$Numeric $Letter $Katakana $MidLetter $MidNum $MidNumLet $Extend $Format \u000a]; ! ! ! $NonStarters* .; *** misc/icu/source/data/brkitr/dict_word.txt Tue May 27 15:41:24 2003 --- misc/build/icu/source/data/brkitr/dict_word.txt Tue May 27 15:21:32 2003 *************** *** 1 **** ! dummy --- 1,126 ---- ! # ! # Copyright (C) 2002, International Business Machines Corporation and others. ! # All Rights Reserved. ! # ! # file: word.txt ! # ! # ICU Word Break Rules ! # See Unicode Technical Report #29. ! # These rules are based on the proposed draft dated 2002-08-06 ! # ! ! ! ! #################################################################################### ! # ! # Definitions imported from Line Break Rules. ! # ! #################################################################################### ! $Numeric = [ \u0030-\u0039 \u0660-\u0669 \u06F0-\u06F9 \u0966-\u096F \u09E6-\u09EF ! \u0A66-\u0A6F \u0AE6-\u0AEF \u0B66-\u0B6F \u0BE7-\u0BEF \u0C66-\u0C6F ! \u0CE6-\u0CEF \u0D66-\u0D6F \u0E50-\u0E59 \u0ED0-\u0ED9 \u0F20-\u0F29 ! \u1040-\u1049 \u1369-\u1371 \u17E0-\u17E9 \u1810-\u1819 \U0001D7CE-\U0001D7FF]; ! ! ! ! #################################################################################### ! # ! # Definitions imported from Character Break Rules. ! # ! #################################################################################### ! # ! # Character Class Definitions. ! # The names are those from TR29. ! # ! $Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]]; ! ! # Note on $Extend: Earlier versions of TR29 included Mc characters. ! # To avoid test breakage, Mc is still included for the time being. ! # $Extend = [[:Mn:] [:Me:] \uff9e-\uff9f]; # FF9E..FF9F ; Other_Grapheme_Extend ! $Extend = [[:Mn:] [:Me:] [:Mc:] \uff9e-\uff9f]; # FF9E..FF9F ; Other_Grapheme_Extend ! ! ! ! ! #################################################################################### ! # ! # Word Break Rules. Definitions and Rules specific to word break begin Here. ! # ! #################################################################################### ! ! $Katakana = [[:Kana:] \u30fc \uff70 \uff9e-\uff9f]; ! $Hiragana = [[:Hira:]]; ! $Letter = [[[:Alphabetic:] \u0002 \u002e \u0040 \u02b9-\u02ba \u02c2-\u02cf \u02d2-\u02df \u02e5-\u02ed \u05f3] - ! [[:THAI:] [:LAO:] $Hiragana $Katakana ]]; ! $Format = [[:Cf:]]; ! ! $MidLetter = [\u0027 \u0060 \u0084 \u00ad \u0384 \u05f4 \u2016 \u2018 \u2019 \u2032 ]; ! ! ! # From Line Break, IS - Numeric Separator (Infix) ! # $IS = [\u002c \u002e \u003a \u003b \u0589]; ! $MidNum = [\u002c \u002e \u003b \u0040 \u0084 \u0384 \u0589 \u066b \u2018 \u2019 \u201b \u2032]; ! ! # ! # "Extended" definitions. Classes of characters including trailing combining chars and, ! # for types of chars that can appear in the interior of a word only, ! # trailing format characters. ! # ! $LetterEx = $Letter $Extend*; ! $NumericEx = $Numeric $Extend*; ! $MidNumExF = $MidNum $Extend* $Format*; ! $MidLetterExF = $MidLetter $Extend* $Format*; ! ! ! # ! # Numbers. Rules 6, 9, 10 form the TR. ! # ! $NumberSequence = $NumericEx ($Format* $MidNumExF? $NumericEx)*; ! $NumberSequence {100}; ! ! # ! # Words. Alpha-numerics. Rule 3 - 10 ! # - must include at least one letter. ! # - may include both letters and numbers. ! # - may include MideLetter, MidNumber punctuation. ! # ! $LetterSequence = $LetterEx ($Format* $MidLetterExF? $LetterEx)*; ! $NumberSequence? $LetterSequence ($NumberSequence | $LetterSequence)* {200}; ! ! # puctuations by themselves ! [:P:]*; ! ! # ! # Hiragana and Katakana ! # ! $Hiragana $Extend* {300}; ! $Katakana $Extend* ($Format* $Katakana $Extend*)* {300}; ! ! # ! # Ideographic Characters. Stand by themselves as words. ! # ! # [:IDEOGRAPHIC:] $Extend* {400}; ! ! # ! # Everything Else, with no tag. ! # Non-Control chars combine with $Extend (combining) chars. ! # Controls are returned by themselves. ! # ! [^$Control] $Extend*; ! \r\n; ! .; ! ! # ! # Reverse Rules. Back up over any of the chars that can group together. ! # (Reverse rules do not need to be exact; they can back up too far, ! # but must back up at least enough, and must stop on a boundary.) ! # ! ! # NonStarters are the set of all characters that can appear at the 2nd - nth position of ! # a word. (They may also be the first.) The reverse rule skips over these, until it ! # reaches something that can only be the start (and probably only) char in a "word". ! # A space or punctuation meets the test. ! # ! $NonStarters = [$Numeric $Letter $Katakana $MidLetter $MidNum $Extend $Format \u000a]; ! ! ! $NonStarters* .; *** misc/icu/source/data/brkitr/edit_word.txt Tue May 27 15:41:24 2003 --- misc/build/icu/source/data/brkitr/edit_word.txt Tue May 27 15:21:32 2003 *************** *** 1 **** ! dummy --- 1,130 ---- ! # ! # Copyright (C) 2002, International Business Machines Corporation and others. ! # All Rights Reserved. ! # ! # file: word.txt ! # ! # ICU Word Break Rules ! # See Unicode Technical Report #29. ! # These rules are based on the proposed draft dated 2002-08-06 ! # ! ! ! ! #################################################################################### ! # ! # Definitions imported from Line Break Rules. ! # ! #################################################################################### ! $Numeric = [ \u0030-\u0039 \u0660-\u0669 \u06F0-\u06F9 \u0966-\u096F \u09E6-\u09EF ! \u0A66-\u0A6F \u0AE6-\u0AEF \u0B66-\u0B6F \u0BE7-\u0BEF \u0C66-\u0C6F ! \u0CE6-\u0CEF \u0D66-\u0D6F \u0E50-\u0E59 \u0ED0-\u0ED9 \u0F20-\u0F29 ! \u1040-\u1049 \u1369-\u1371 \u17E0-\u17E9 \u1810-\u1819 \U0001D7CE-\U0001D7FF]; ! ! ! ! #################################################################################### ! # ! # Definitions imported from Character Break Rules. ! # ! #################################################################################### ! # ! # Character Class Definitions. ! # The names are those from TR29. ! # ! $Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]]; ! ! # Note on $Extend: Earlier versions of TR29 included Mc characters. ! # To avoid test breakage, Mc is still included for the time being. ! # $Extend = [[:Mn:] [:Me:] \uff9e-\uff9f]; # FF9E..FF9F ; Other_Grapheme_Extend ! $Extend = [[:Mn:] [:Me:] [:Mc:] \uff9e-\uff9f]; # FF9E..FF9F ; Other_Grapheme_Extend ! ! ! ! ! #################################################################################### ! # ! # Word Break Rules. Definitions and Rules specific to word break begin Here. ! # ! #################################################################################### ! ! $Katakana = [[:Kana:] \u30fc \uff70 \uff9e-\uff9f]; ! $Hiragana = [[:Hira:]]; ! $Letter = [[[:Alphabetic:] \u0002 \u00a0 \u02b9-\u02ba \u02c2-\u02cf \u02d2-\u02df \u02e5-\u02ed \u05f3] - ! [[:THAI:] [:LAO:] $Hiragana $Katakana ]]; ! $Format = [[:Cf:]]; ! ! $MidLetter = [\u0027 \u00ad \u05f4 \u2019]; ! ! $MidNumLet = [\u002e]; ! ! ! # From Line Break, IS - Numeric Separator (Infix) ! # $IS = [\u002c \u002e \u003a \u003b \u0589]; ! $MidNum = [\u002c \u003b \u0589]; ! ! # ! # "Extended" definitions. Classes of characters including trailing combining chars and, ! # for types of chars that can appear in the interior of a word only, ! # trailing format characters. ! # ! $LetterEx = $Letter $Extend*; ! $NumericEx = $Numeric $Extend*; ! $MidNumExF = $MidNum $Extend* $Format*; ! $MidNumLetExF = $MidNumLet $Extend* $Format*; ! $MidLetterExF = $MidLetter $Extend* $Format*; ! ! ! # ! # Numbers. Rules 6, 9, 10 form the TR. ! # ! $NumberSequence = $NumericEx ($Format* ($MidNumExF | $MidNumLetExF)? $NumericEx)*; ! $NumberSequence {100}; ! ! # ! # Words. Alpha-numerics. Rule 3 - 10 ! # - must include at least one letter. ! # - may include both letters and numbers. ! # - may include MideLetter, MidNumber punctuation. ! # ! $LetterSequence = $LetterEx ($Format* ($MidLetterExF | $MidNumLetExF)? $LetterEx)*; ! $NumberSequence? $LetterSequence ($NumberSequence | $LetterSequence)* {200}; ! ! # Punctuations by themselves ! [[:P:]-[\u002E]]*; ! [\u002E]*; ! ! # ! # Hiragana and Katakana ! # ! $Hiragana $Extend* {300}; ! $Katakana $Extend* ($Format* $Katakana $Extend*)* {300}; ! ! # ! # Ideographic Characters. Stand by themselves as words. ! # ! # [:IDEOGRAPHIC:] $Extend* {400}; ! ! # ! # Everything Else, with no tag. ! # Non-Control chars combine with $Extend (combining) chars. ! # Controls are returned by themselves. ! # ! [^$Control] $Extend*; ! \r\n; ! .; ! ! # ! # Reverse Rules. Back up over any of the chars that can group together. ! # (Reverse rules do not need to be exact; they can back up too far, ! # but must back up at least enough, and must stop on a boundary.) ! # ! ! # NonStarters are the set of all characters that can appear at the 2nd - nth position of ! # a word. (They may also be the first.) The reverse rule skips over these, until it ! # reaches something that can only be the start (and probably only) char in a "word". ! # A space or punctuation meets the test. ! # ! $NonStarters = [$Numeric $Letter $Katakana $MidLetter $MidNum $MidNumLet $Extend $Format \u000a]; ! ! ! $NonStarters* .; *** misc/icu/source/data/brkitr/line.txt Tue Jul 23 08:02:06 2002 --- misc/build/icu/source/data/brkitr/line.txt Tue May 27 15:21:32 2003 *************** *** 68,74 **** $SG = [ \uD800-\uDFFF]; ! $AL = [ \u0023 \u0026 \u002A \u003C-\u003E \u0040-\u005A \u005E-\u007A \u007E \u00A6 \u00A9 \u00AC \u00AE-\u00AF \u00B5 \u00C0-\u00C5 \u00C7-\u00CF \u00D1-\u00D6 \u00D9-\u00DD \u00E2-\u00E5 \u00E7 \u00EB \u00EE-\u00EF \u00F1 \u00F4-\u00F6 \u00FB \u00FD \u00FF-\u0100 \u0102-\u0110 --- 68,75 ---- $SG = [ \uD800-\uDFFF]; ! $AL = [ $SA $XX $AI ! \u0023 \u0026 \u002A \u002B \u003C-\u003E \u0040-\u005A \u005E-\u007A \u007E \u00A6 \u00A9 \u00AC \u00AE-\u00AF \u00B5 \u00C0-\u00C5 \u00C7-\u00CF \u00D1-\u00D6 \u00D9-\u00DD \u00E2-\u00E5 \u00E7 \u00EB \u00EE-\u00EF \u00F1 \u00F4-\u00F6 \u00FB \u00FD \u00FF-\u0100 \u0102-\u0110 *************** *** 210,216 **** \U0001D165-\U0001D169 \U0001D16D-\U0001D182 \U0001D185-\U0001D18B \U0001D1AA-\U0001D1AD \U000E0001 \U000E0020-\U000E007F]; ! $PR = [ \u0024 \u002B \u005C \u00A3-\u00A5 \u00B1 \u09F2-\u09F3 \u0E3F \u17DB \u20A0-\u20A6 \u20A8-\u20B1 \u2116 \u2212-\u2213 \uFE69 \uFF04 \uFFE1 \uFFE5-\uFFE6]; --- 211,217 ---- \U0001D165-\U0001D169 \U0001D16D-\U0001D182 \U0001D185-\U0001D18B \U0001D1AA-\U0001D1AD \U000E0001 \U000E0020-\U000E007F]; ! $PR = [ \u0024 \u005C \u00A3-\u00A5 \u00B1 \u09F2-\u09F3 \u0E3F \u17DB \u20A0-\u20A6 \u20A8-\u20B1 \u2116 \u2212-\u2213 \uFE69 \uFF04 \uFFE1 \uFFE5-\uFFE6]; *************** *** 326,332 **** # $NumberInterior = $IDcm | ($NUcm | $ALcm | $IS $NUcm)+; $Number = $PR? ($OPcm | $HYcm)? $NumberInterior $CL? $POcm?; # Fancy Number 18 ! $Word = (($IDcm | ($ALcm | $NUcm)+) ($POcm? | $INcm?)) ; # Alpha-numeric. 16, 17 $Dashes = (($B2cm $SPcm*)*); # Dashes 11a --- 327,333 ---- # $NumberInterior = $IDcm | ($NUcm | $ALcm | $IS $NUcm)+; $Number = $PR? ($OPcm | $HYcm)? $NumberInterior $CL? $POcm?; # Fancy Number 18 ! $Word = (($IDcm | ($ALcm | $NUcm | $EX | $IS | $SY | $PR)+) ($POcm? | $INcm?)) ; # Alpha-numeric. 16, 17 $Dashes = (($B2cm $SPcm*)*); # Dashes 11a *** misc/icu/source/data/makedata.mak Sat Aug 10 06:55:36 2002 --- misc/build/icu/source/data/makedata.mak Tue May 27 15:21:32 2003 *************** *** 237,243 **** # # Break iterator data files. # ! BRK_FILES = "$(ICUBLD)\$(ICUDT)sent.brk" "$(ICUBLD)\$(ICUDT)char.brk" "$(ICUBLD)\$(ICUDT)line.brk" "$(ICUBLD)\$(ICUDT)word.brk" "$(ICUBLD)\$(ICUDT)title.brk" "$(ICUBLD)\$(ICUDT)line_th.brk" "$(ICUBLD)\$(ICUDT)word_th.brk" #invoke pkgdata for ICU common data # pkgdata will drop all output files (.dat, .dll, .lib) into the target (ICUBLD) directory. --- 237,243 ---- # # Break iterator data files. # ! BRK_FILES = "$(ICUBLD)\$(ICUDT)sent.brk" "$(ICUBLD)\$(ICUDT)char.brk" "$(ICUBLD)\$(ICUDT)line.brk" "$(ICUBLD)\$(ICUDT)word.brk" "$(ICUBLD)\$(ICUDT)edit_word.brk" "$(ICUBLD)\$(ICUDT)dict_word.brk" "$(ICUBLD)\$(ICUDT)count_word.brk" "$(ICUBLD)\$(ICUDT)title.brk" "$(ICUBLD)\$(ICUDT)line_th.brk" "$(ICUBLD)\$(ICUDT)word_th.brk" #invoke pkgdata for ICU common data # pkgdata will drop all output files (.dat, .dll, .lib) into the target (ICUBLD) directory. *************** *** 280,285 **** --- 280,294 ---- "$(ICUBLD)\$(ICUDT)word.brk" : "$(ICUBRK)\word.txt" "$(ICUBLD)\$(ICUDT)uprops.icu" genbrk -r "$(ICUBRK)\word.txt" -o "$(ICUBLD)\$(ICUDT)word.brk" -i "$(ICUBLD)\\" + + "$(ICUBLD)\$(ICUDT)edit_word.brk" : "$(ICUBRK)\edit_word.txt" "$(ICUBLD)\$(ICUDT)uprops.icu" + genbrk -r "$(ICUBRK)\edit_word.txt" -o "$(ICUBLD)\$(ICUDT)edit_word.brk" -i "$(ICUBLD)\\" + + "$(ICUBLD)\$(ICUDT)dict_word.brk" : "$(ICUBRK)\dict_word.txt" "$(ICUBLD)\$(ICUDT)uprops.icu" + genbrk -r "$(ICUBRK)\dict_word.txt" -o "$(ICUBLD)\$(ICUDT)dict_word.brk" -i "$(ICUBLD)\\" + + "$(ICUBLD)\$(ICUDT)count_word.brk" : "$(ICUBRK)\count_word.txt" "$(ICUBLD)\$(ICUDT)uprops.icu" + genbrk -r "$(ICUBRK)\count_word.txt" -o "$(ICUBLD)\$(ICUDT)count_word.brk" -i "$(ICUBLD)\\" "$(ICUBLD)\$(ICUDT)line.brk" : "$(ICUBRK)\line.txt" "$(ICUBLD)\$(ICUDT)uprops.icu" genbrk -r "$(ICUBRK)\line.txt" -o "$(ICUBLD)\$(ICUDT)line.brk" -i "$(ICUBLD)\\" *** misc/icu/source/icudefs.mk.in Thu Aug 1 06:28:32 2002 --- misc/build/icu/source/icudefs.mk.in Tue May 27 15:37:19 2003 *************** *** 133,139 **** --- 133,143 ---- SHLIB.cc= $(CXX) $(CXXFLAGS) $(LDFLAGS) -shared # Environment variable to set a runtime search path + ifeq ($(OS), IRIX) + LDLIBRARYPATH_ENVVAR = LD_LIBRARYN32_PATH + else LDLIBRARYPATH_ENVVAR = LD_LIBRARY_PATH + endif # Versioned target for a shared library. FINAL_SO_TARGET = $(SO_TARGET).$(SO_TARGET_VERSION) *** misc/icu/source/layout/ArabicLayoutEngine.cpp Wed Jul 17 05:29:18 2002 --- misc/build/icu/source/layout/ArabicLayoutEngine.cpp Tue May 27 15:21:32 2003 *************** *** 3,8 **** --- 3,9 ---- * %W% %E% * * (C) Copyright IBM Corp. 1998, 1999, 2000, 2001 - All Rights Reserved + * with some additions by Sun MicroSystems 2002 * */ *************** *** 146,152 **** --- 147,157 ---- GDEFMarkFilter filter(fGDEFTable); adjustMarkGlyphs(glyphs, glyphCount, false, &filter, positions, success); + #if 1 // HDU: #105697# TODO: improve handling of deleted/mark glyphs + } else if( count == glyphCount ) { + #else } else { + #endif GlyphDefinitionTableHeader *gdefTable = (GlyphDefinitionTableHeader *) ArabicShaping::glyphDefinitionTable; GDEFMarkFilter filter(gdefTable); *************** *** 194,199 **** --- 199,219 ---- charIndices = tempCharIndices; + #if 1 // HDU TODO: improve handling of deleted/mark glyphs + // remove deleted glyphs (0xFFFF) and deleted marks (0xFFFE) + // NOTE: we are removing them inplace + int nGlyphCount = 0; + for( int i = 0; i < tempGlyphCount; ++i ) + if( tempGlyphs[i] < 0xFFFE ) + { + tempGlyphs[ nGlyphCount ] = tempGlyphs[ i ]; + charIndices[ nGlyphCount ] = charIndices[ i ]; + ++nGlyphCount; + } + + tempGlyphCount = nGlyphCount; + #endif + // NOTE: need to copy tempGlyphs to an LEUnicode array if LEGlyphID and LEUnicode aren't the same size... ArabicOpenTypeLayoutEngine::mapCharsToGlyphs((LEUnicode *) tempGlyphs, 0, tempGlyphCount, false, true, glyphs, charIndices, success); *************** *** 253,258 **** --- 273,283 ---- success = LE_ILLEGAL_ARGUMENT_ERROR; return; } + + #if 1 // HDU: #105697# TODO: improve handling of deleted/mark glyphs + if( count != glyphCount ) + return; + #endif GDEFMarkFilter filter(fGDEFTable); *** misc/icu/source/layout/GXLayoutEngine.cpp Sat Jun 29 09:58:44 2002 --- misc/build/icu/source/layout/GXLayoutEngine.cpp Tue May 27 15:21:32 2003 *************** *** 39,45 **** return 0; } ! mapCharsToGlyphs(chars, offset, count, false, rightToLeft, glyphs, charIndices, success); if (LE_FAILURE(success)) { return 0; --- 39,45 ---- return 0; } ! mapCharsToGlyphs(chars, offset, count, rightToLeft, rightToLeft, glyphs, charIndices, success); if (LE_FAILURE(success)) { return 0; *** misc/icu/source/layout/LESwaps.h Wed Aug 14 10:17:50 2002 --- misc/build/icu/source/layout/LESwaps.h Tue May 27 15:21:32 2003 *************** *** 3,8 **** --- 3,9 ---- * @(#)LESwaps.h 1.3 00/03/15 * * (C) Copyright IBM Corp. 1998, 1999, 2000, 2001, 2002 - All Rights Reserved + * with additions by Sun Microsystems 2002 * */ *************** *** 13,26 **** U_NAMESPACE_BEGIN ! /** ! * These are convenience macros which invoke the swap functions ! * from a concise call. ! * ! * @draft ICU 2.2 ! */ ! #define SWAPW(value) (LESwaps::isBigEndian() ? (value) : LESwaps::swapWord(value)) ! #define SWAPL(value) (LESwaps::isBigEndian() ? (value) : LESwaps::swapLong(value)) /** * This class is used to access data which stored in big endian order --- 14,50 ---- U_NAMESPACE_BEGIN ! // the ALLOW_UNALIGNED hack prevents crashes with font files ! // containing unaligned tables platforms that do not tolerate ! // unaligned memory accesses; it works by assuming that every ! // use of the SWAPx macros is intended for accessing a big endian ! // value e.g. for "Watanabe Gothic"'s "mort" table ! #define ALLOW_UNALIGNED_HACK ! ! #ifndef ALLOW_UNALIGNED_HACK ! /** ! * These are convenience macros which invoke the swap functions ! * from a concise call. ! */ ! #define SWAPW(value) (LESwaps::isBigEndian() ? (value) : LESwaps::swapWord(value)) ! #define SWAPL(value) (LESwaps::isBigEndian() ? (value) : LESwaps::swapLong(value)) ! #else ! #define SWAPW(rValue) loadBigEndianWord(reinterpret_cast(rValue)) ! #define SWAPL(rValue) loadBigEndianLong(reinterpret_cast(rValue)) ! ! inline le_uint16 loadBigEndianWord( const le_uint16& rValue ) ! { ! const le_uint8* p = reinterpret_cast(&rValue); ! return ((p[0] << 8) + p[1]); ! } ! ! inline le_uint32 loadBigEndianLong( const le_uint32& rValue ) ! { ! const le_uint8* p = reinterpret_cast(&rValue); ! return ((p[0]<<24) + (p[1]<<16) + (p[2]<<8) + p[3]); ! } ! ! #endif /** * This class is used to access data which stored in big endian order *** misc/icu/source/layout/LookupProcessor.cpp Wed Jul 17 05:29:20 2002 --- misc/build/icu/source/layout/LookupProcessor.cpp Tue May 27 15:21:32 2003 *************** *** 109,115 **** Offset scriptListOffset, Offset featureListOffset, Offset lookupListOffset, LETag scriptTag, LETag languageTag, const LETag *featureOrder) : lookupListTable(NULL), featureListTable(NULL), lookupSelectArray(NULL), ! requiredFeatureTag(notSelected) { const ScriptListTable *scriptListTable = NULL; const LangSysTable *langSysTable = NULL; --- 109,115 ---- Offset scriptListOffset, Offset featureListOffset, Offset lookupListOffset, LETag scriptTag, LETag languageTag, const LETag *featureOrder) : lookupListTable(NULL), featureListTable(NULL), lookupSelectArray(NULL), ! requiredFeatureTag(notSelected), lookupOrderArray(NULL) { const ScriptListTable *scriptListTable = NULL; const LangSysTable *langSysTable = NULL; *** misc/icu/source/layout/OpenTypeLayoutEngine.h Wed Aug 14 10:17:50 2002 --- misc/build/icu/source/layout/OpenTypeLayoutEngine.h Tue May 27 15:21:32 2003 *************** *** 3,8 **** --- 3,9 ---- * %W% %E% * * (C) Copyright IBM Corp. 1998, 1999, 2000, 2001, 2002 - All Rights Reserved + * with some additions by Sun MicroSystems 2002 * */ *************** *** 315,320 **** --- 316,334 ---- glyphs = tempGlyphs; charIndices = tempCharIndices; + + #if 1 // HDU: remove deleted glyphs (0xFFFF) and deleted marks (0xFFFE) + int nGlyphCount = 0; + for( int i = 0; i < tempGlyphCount; ++i ) + if( glyphs[i] < 0xFFFE ) + { + glyphs[ nGlyphCount ] = glyphs[ i ]; + charIndices[ nGlyphCount ] = charIndices[ i ]; + ++nGlyphCount; + } + + tempGlyphCount = nGlyphCount; + #endif return tempGlyphCount; };