*** misc/icu/source/common/brkiter.cpp	Thu Aug  8 10:39:14 2002
--- misc/build/icu/source/common/brkiter.cpp	Tue May 27 15:21:32 2003
***************
*** 39,51 ****
  
  // Creates a break iterator for word breaks.
  BreakIterator*
! BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
  {
      // WARNING: This routine is currently written specifically to handle only the
      // default rules files and the alternate rules files for Thai.  This function
      // will have to be made fully general at some time in the future!
      BreakIterator* result = NULL;
-     const char* filename = "word";
  
      if (U_FAILURE(status))
          return NULL;
--- 39,50 ----
  
  // Creates a break iterator for word breaks.
  BreakIterator*
! BreakIterator::createWordInstance(const Locale& key, const char* filename, UErrorCode& status)
  {
      // WARNING: This routine is currently written specifically to handle only the
      // default rules files and the alternate rules files for Thai.  This function
      // will have to be made fully general at some time in the future!
      BreakIterator* result = NULL;
  
      if (U_FAILURE(status))
          return NULL;
***************
*** 78,83 ****
--- 77,88 ----
      }
  
      return result;
+ }
+ 
+ BreakIterator*
+ BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
+ {
+     return createWordInstance(key, "word", status);
  }
  
  // -------------------------------------
*** misc/icu/source/common/unicode/brkiter.h	Thu Aug  8 10:39:14 2002
--- misc/build/icu/source/common/unicode/brkiter.h	Tue May 27 15:21:32 2003
***************
*** 348,353 ****
--- 348,355 ----
       */
      static BreakIterator* createWordInstance(const Locale& where,
                                                     UErrorCode& status);
+     static BreakIterator* createWordInstance(const Locale& where, const char* filename,
+                                                    UErrorCode& status);
  
      /**
       * Create BreakIterator for line-breaks using specified locale.
*** misc/icu/source/config/mh-linux	Fri Jan 25 04:35:04 2002
--- misc/build/icu/source/config/mh-linux	Tue May 27 15:21:32 2003
***************
*** 22,27 ****
--- 22,34 ----
  LD_RPATH=	
  LD_RPATH_PRE = -Wl,-rpath,
  
+ ## Force RPATH=$ORIGIN to locate own dependencies w/o need for LD_LIBRARY_PATH
+ ENABLE_RPATH=YES
+ RPATHLDFLAGS=${LD_RPATH_PRE}'$$ORIGIN'
+ 
+ #SH#	ENABLE_RPATH=YES
+ #SH#	RPATHLDFLAGS="${LD_RPATH_PRE}'$$ORIGIN'"
+ 
  ## Compiler switch to embed a library name
  LD_SONAME = -Wl,-soname -Wl,$(MIDDLE_SO_TARGET)
  
*** misc/icu/source/config/mh-solaris	Tue Jul 30 12:46:10 2002
--- misc/build/icu/source/config/mh-solaris	Tue May 27 15:21:32 2003
***************
*** 23,38 ****
  
  ## Commands to link
  ## For Sun Workshop, use CC to link to bring in C++ runtime
! LINK.c=		$(CXX) $(CXXFLAGS) $(LDFLAGS)
! LINK.cc=	$(CXX) $(CXXFLAGS) $(LDFLAGS)
  
  ## Commands to make a shared library
  SHLIB.c=	$(CC) $(CFLAGS) $(LDFLAGS) -G
! SHLIB.cc=	$(CXX) $(CXXFLAGS) $(LDFLAGS) -G
  
  ## Compiler switch to embed a runtime search path
  LD_RPATH=	-R
  LD_RPATH_PRE=	
  
  ## Compiler switch to embed a library name
  LD_SONAME = -h $(MIDDLE_SO_TARGET)
--- 23,46 ----
  
  ## Commands to link
  ## For Sun Workshop, use CC to link to bring in C++ runtime
! ## For Sun Workshop, -norunpath stops compiler to record a useless RPATH
! LINK.c=		$(CXX) $(CXXFLAGS) $(LDFLAGS) -norunpath
! LINK.cc=	$(CXX) $(CXXFLAGS) $(LDFLAGS) -norunpath
  
  ## Commands to make a shared library
  SHLIB.c=	$(CC) $(CFLAGS) $(LDFLAGS) -G
! SHLIB.cc=	$(CXX) $(CXXFLAGS) $(LDFLAGS) -G -norunpath
  
  ## Compiler switch to embed a runtime search path
  LD_RPATH=	-R
  LD_RPATH_PRE=	
+ 
+ ## Force RPATH=$ORIGIN to locate own dependencies w/o need for LD_LIBRARY_PATH
+ ENABLE_RPATH=YES
+ RPATHLDFLAGS=${LD_RPATH}'$$ORIGIN'
+ 
+ #SH#	ENABLE_RPATH=YES
+ #SH#	RPATHLDFLAGS="${LD_RPATH}'$$ORIGIN'"
  
  ## Compiler switch to embed a library name
  LD_SONAME = -h $(MIDDLE_SO_TARGET)
*** misc/icu/source/data/Makefile.in	Fri Aug 16 03:08:02 2002
--- misc/build/icu/source/data/Makefile.in	Tue May 27 15:21:32 2003
***************
*** 127,133 ****
  
  ## BRK files
  # ALL of these files can be deleted (the following BRK files) - they are copied
! BRK_FILES_SHORT=char.brk line.brk line_th.brk sent.brk word.brk title.brk word_th.brk
  BRK_FILES=$(BRK_FILES_SHORT:%=$(BUILDDIR)/$(ICUDT)%)
  # don't include thaidict.brk - it goes into a resource bundle - plus it isn't deleted
  
--- 127,133 ----
  
  ## BRK files
  # ALL of these files can be deleted (the following BRK files) - they are copied
! BRK_FILES_SHORT=char.brk line.brk line_th.brk sent.brk word.brk edit_word.brk dict_word.brk count_word.brk title.brk word_th.brk
  BRK_FILES=$(BRK_FILES_SHORT:%=$(BUILDDIR)/$(ICUDT)%)
  # don't include thaidict.brk - it goes into a resource bundle - plus it isn't deleted
  
*** misc/icu/source/data/brkitr/count_word.txt	Tue May 27 15:41:24 2003
--- misc/build/icu/source/data/brkitr/count_word.txt	Tue May 27 15:21:32 2003
***************
*** 1 ****
! dummy
--- 1,127 ----
! #
! #   Copyright (C) 2002, International Business Machines Corporation and others.
! #       All Rights Reserved.
! #
! #   file:  word.txt   
! #
! #   ICU Word Break Rules
! #      See Unicode Technical Report #29.
! #      These rules are based on the proposed draft dated 2002-08-06
! #
! 
! 
! 
! ####################################################################################
! #
! #  Definitions imported from Line Break Rules.
! #
! ####################################################################################
! $Numeric = [ \u0030-\u0039 \u0660-\u0669 \u06F0-\u06F9 \u0966-\u096F \u09E6-\u09EF
!         \u0A66-\u0A6F \u0AE6-\u0AEF \u0B66-\u0B6F \u0BE7-\u0BEF \u0C66-\u0C6F
!         \u0CE6-\u0CEF \u0D66-\u0D6F \u0E50-\u0E59 \u0ED0-\u0ED9 \u0F20-\u0F29
!         \u1040-\u1049 \u1369-\u1371 \u17E0-\u17E9 \u1810-\u1819 \U0001D7CE-\U0001D7FF];
! 
! 
! 
! ####################################################################################
! #
! #  Definitions imported from Character Break Rules.
! #
! ####################################################################################
! #
! #  Character Class Definitions.
! #    The names are those from TR29.
! #
! $Control    = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]];
! 
! # Note on $Extend:  Earlier versions of TR29 included Mc characters.
! #                   To avoid test breakage, Mc is still included for the time being.
! # $Extend     = [[:Mn:] [:Me:] \uff9e-\uff9f];   #  FF9E..FF9F    ; Other_Grapheme_Extend
! $Extend     = [[:Mn:] [:Me:] [:Mc:] \uff9e-\uff9f];   #  FF9E..FF9F    ; Other_Grapheme_Extend
! 
! 
! 
! 
! ####################################################################################
! #
! #  Word Break Rules.    Definitions and Rules specific to word break begin Here. 
! #
! ####################################################################################
! 
! $Katakana  = [[:Kana:]  \u30fc \uff70 \uff9e-\uff9f];
! $Hiragana  = [[:Hira:]];
! $Letter    = [[[:Alphabetic:]  \u02b9-\u02ba \u02c2-\u02cf \u02d2-\u02df \u02e5-\u02ed \u05f3] -
!              [[:THAI:] [:LAO:] $Hiragana $Katakana ]];
! $Format    = [[:Cf:]];
! 
! $MidLetter = [\u0027 \u00ad \u05f4 \u2019];
! 
! $MidNumLet = [\u002e];
! 
! 
! # From Line Break, IS - Numeric Separator (Infix)
! #  $IS = [\u002c \u002e \u003a \u003b \u0589];
! $MidNum    = [\u002c \u003b \u0589];
! 
! #
! #  "Extended"  definitions.  Classes of characters including trailing combining chars and,
! #                            for types of chars that can appear in the interior of a word only,
! #                            trailing format characters.
! #
! $LetterEx     = $Letter    $Extend*; 
! $NumericEx    = $Numeric   $Extend*;
! $MidNumExF    = $MidNum    $Extend* $Format*;
! $MidNumLetExF = $MidNumLet $Extend* $Format*;
! $MidLetterExF = $MidLetter $Extend* $Format*;
! 
! $word_pad=[[:P:][:S:][:Z:][:C:]];
! 
! #
! #  Numbers.  Rules 6, 9, 10 form the TR.
! #
! $NumberSequence = $NumericEx ($Format* ($MidNumExF | $MidNumLetExF)? $NumericEx)*;
! $NumberSequence $word_pad* {100};
! 
! #
! #  Words.  Alpha-numerics.  Rule 3 - 10
! #     - must include at least one letter. 
! #     - may include both letters and numbers.
! #     - may include  MideLetter, MidNumber punctuation.
! #
! $LetterSequence = $LetterEx ($Format* ($MidLetterExF | $MidNumLetExF)? $LetterEx)*;
! $NumberSequence? $LetterSequence ($NumberSequence | $LetterSequence)* $word_pad* {200};
! 
! #
! #  Hiragana and Katakana
! #
! $Hiragana $Extend* {300};
! $Katakana $Extend* ($Format* $Katakana $Extend*)* {300};
! 
! #
! #  Ideographic Characters.  Stand by themselves as words.
! #
! # [:IDEOGRAPHIC:] $Extend* $word_pad* {400};
! 
! #
! #  Everything Else, with no tag.
! #                   Non-Control chars combine with $Extend (combining) chars.
! #                   Controls are returned by themselves.
! #
! [^$Control] $Extend* $word_pad*;
! \r\n;
! .;
! 
! #
! #  Reverse Rules.   Back up over any of the chars that can group together.
! #                   (Reverse rules do not need to be exact; they can back up  too far,
! #                   but must back up at least enough, and must stop on a boundary.)
! #
! 
! # NonStarters are the set of all characters that can appear at the 2nd - nth position of
! #    a word.   (They may also be the first.)   The reverse rule skips over these, until it
! #    reaches something that can only be the start (and probably only) char in a "word".
! #    A space or punctuation meets the test.
! #
! $NonStarters = [$Numeric $Letter $Katakana $MidLetter $MidNum $MidNumLet $Extend $Format \u000a];
! 
! ! $NonStarters* .;
*** misc/icu/source/data/brkitr/dict_word.txt	Tue May 27 15:41:24 2003
--- misc/build/icu/source/data/brkitr/dict_word.txt	Tue May 27 15:21:32 2003
***************
*** 1 ****
! dummy
--- 1,126 ----
! #
! #   Copyright (C) 2002, International Business Machines Corporation and others.
! #       All Rights Reserved.
! #
! #   file:  word.txt   
! #
! #   ICU Word Break Rules
! #      See Unicode Technical Report #29.
! #      These rules are based on the proposed draft dated 2002-08-06
! #
! 
! 
! 
! ####################################################################################
! #
! #  Definitions imported from Line Break Rules.
! #
! ####################################################################################
! $Numeric = [ \u0030-\u0039 \u0660-\u0669 \u06F0-\u06F9 \u0966-\u096F \u09E6-\u09EF
!         \u0A66-\u0A6F \u0AE6-\u0AEF \u0B66-\u0B6F \u0BE7-\u0BEF \u0C66-\u0C6F
!         \u0CE6-\u0CEF \u0D66-\u0D6F \u0E50-\u0E59 \u0ED0-\u0ED9 \u0F20-\u0F29
!         \u1040-\u1049 \u1369-\u1371 \u17E0-\u17E9 \u1810-\u1819 \U0001D7CE-\U0001D7FF];
! 
! 
! 
! ####################################################################################
! #
! #  Definitions imported from Character Break Rules.
! #
! ####################################################################################
! #
! #  Character Class Definitions.
! #    The names are those from TR29.
! #
! $Control    = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]];
! 
! # Note on $Extend:  Earlier versions of TR29 included Mc characters.
! #                   To avoid test breakage, Mc is still included for the time being.
! # $Extend     = [[:Mn:] [:Me:] \uff9e-\uff9f];   #  FF9E..FF9F    ; Other_Grapheme_Extend
! $Extend     = [[:Mn:] [:Me:] [:Mc:] \uff9e-\uff9f];   #  FF9E..FF9F    ; Other_Grapheme_Extend
! 
! 
! 
! 
! ####################################################################################
! #
! #  Word Break Rules.    Definitions and Rules specific to word break begin Here. 
! #
! ####################################################################################
! 
! $Katakana  = [[:Kana:]  \u30fc \uff70 \uff9e-\uff9f];
! $Hiragana  = [[:Hira:]];
! $Letter    = [[[:Alphabetic:] \u0002 \u002e \u0040 \u02b9-\u02ba \u02c2-\u02cf \u02d2-\u02df \u02e5-\u02ed \u05f3] -
!              [[:THAI:] [:LAO:] $Hiragana $Katakana ]];
! $Format    = [[:Cf:]];
! 
! $MidLetter = [\u0027 \u0060 \u0084 \u00ad \u0384 \u05f4 \u2016 \u2018 \u2019 \u2032 ];
! 
! 
! # From Line Break, IS - Numeric Separator (Infix)
! #  $IS = [\u002c \u002e \u003a \u003b \u0589];
! $MidNum    = [\u002c \u002e \u003b \u0040 \u0084 \u0384 \u0589 \u066b \u2018 \u2019 \u201b \u2032];
! 
! #
! #  "Extended"  definitions.  Classes of characters including trailing combining chars and,
! #                            for types of chars that can appear in the interior of a word only,
! #                            trailing format characters.
! #
! $LetterEx     = $Letter    $Extend*; 
! $NumericEx    = $Numeric   $Extend*;
! $MidNumExF    = $MidNum    $Extend* $Format*;
! $MidLetterExF = $MidLetter $Extend* $Format*;
! 
! 
! #
! #  Numbers.  Rules 6, 9, 10 form the TR.
! #
! $NumberSequence = $NumericEx ($Format* $MidNumExF? $NumericEx)*;
! $NumberSequence {100};
! 
! #
! #  Words.  Alpha-numerics.  Rule 3 - 10
! #     - must include at least one letter. 
! #     - may include both letters and numbers.
! #     - may include  MideLetter, MidNumber punctuation.
! #
! $LetterSequence = $LetterEx ($Format* $MidLetterExF? $LetterEx)*;
! $NumberSequence? $LetterSequence ($NumberSequence | $LetterSequence)* {200};
! 
! # puctuations by themselves
! [:P:]*;
! 
! #
! #  Hiragana and Katakana
! #
! $Hiragana $Extend* {300};
! $Katakana $Extend* ($Format* $Katakana $Extend*)* {300};
! 
! #
! #  Ideographic Characters.  Stand by themselves as words.
! #
! # [:IDEOGRAPHIC:] $Extend* {400};
! 
! #
! #  Everything Else, with no tag.
! #                   Non-Control chars combine with $Extend (combining) chars.
! #                   Controls are returned by themselves.
! #
! [^$Control] $Extend*;
! \r\n;
! .;
! 
! #
! #  Reverse Rules.   Back up over any of the chars that can group together.
! #                   (Reverse rules do not need to be exact; they can back up  too far,
! #                   but must back up at least enough, and must stop on a boundary.)
! #
! 
! # NonStarters are the set of all characters that can appear at the 2nd - nth position of
! #    a word.   (They may also be the first.)   The reverse rule skips over these, until it
! #    reaches something that can only be the start (and probably only) char in a "word".
! #    A space or punctuation meets the test.
! #
! $NonStarters = [$Numeric $Letter $Katakana $MidLetter $MidNum $Extend $Format \u000a];
! 
! ! $NonStarters* .;
*** misc/icu/source/data/brkitr/edit_word.txt	Tue May 27 15:41:24 2003
--- misc/build/icu/source/data/brkitr/edit_word.txt	Tue May 27 15:21:32 2003
***************
*** 1 ****
! dummy
--- 1,130 ----
! #
! #   Copyright (C) 2002, International Business Machines Corporation and others.
! #       All Rights Reserved.
! #
! #   file:  word.txt   
! #
! #   ICU Word Break Rules
! #      See Unicode Technical Report #29.
! #      These rules are based on the proposed draft dated 2002-08-06
! #
! 
! 
! 
! ####################################################################################
! #
! #  Definitions imported from Line Break Rules.
! #
! ####################################################################################
! $Numeric = [ \u0030-\u0039 \u0660-\u0669 \u06F0-\u06F9 \u0966-\u096F \u09E6-\u09EF
!         \u0A66-\u0A6F \u0AE6-\u0AEF \u0B66-\u0B6F \u0BE7-\u0BEF \u0C66-\u0C6F
!         \u0CE6-\u0CEF \u0D66-\u0D6F \u0E50-\u0E59 \u0ED0-\u0ED9 \u0F20-\u0F29
!         \u1040-\u1049 \u1369-\u1371 \u17E0-\u17E9 \u1810-\u1819 \U0001D7CE-\U0001D7FF];
! 
! 
! 
! ####################################################################################
! #
! #  Definitions imported from Character Break Rules.
! #
! ####################################################################################
! #
! #  Character Class Definitions.
! #    The names are those from TR29.
! #
! $Control    = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]];
! 
! # Note on $Extend:  Earlier versions of TR29 included Mc characters.
! #                   To avoid test breakage, Mc is still included for the time being.
! # $Extend     = [[:Mn:] [:Me:] \uff9e-\uff9f];   #  FF9E..FF9F    ; Other_Grapheme_Extend
! $Extend     = [[:Mn:] [:Me:] [:Mc:] \uff9e-\uff9f];   #  FF9E..FF9F    ; Other_Grapheme_Extend
! 
! 
! 
! 
! ####################################################################################
! #
! #  Word Break Rules.    Definitions and Rules specific to word break begin Here. 
! #
! ####################################################################################
! 
! $Katakana  = [[:Kana:]  \u30fc \uff70 \uff9e-\uff9f];
! $Hiragana  = [[:Hira:]];
! $Letter    = [[[:Alphabetic:] \u0002 \u00a0 \u02b9-\u02ba \u02c2-\u02cf \u02d2-\u02df \u02e5-\u02ed \u05f3] -
!              [[:THAI:] [:LAO:] $Hiragana $Katakana ]];
! $Format    = [[:Cf:]];
! 
! $MidLetter = [\u0027 \u00ad \u05f4 \u2019];
! 
! $MidNumLet = [\u002e];
! 
! 
! # From Line Break, IS - Numeric Separator (Infix)
! #  $IS = [\u002c \u002e \u003a \u003b \u0589];
! $MidNum    = [\u002c \u003b \u0589];
! 
! #
! #  "Extended"  definitions.  Classes of characters including trailing combining chars and,
! #                            for types of chars that can appear in the interior of a word only,
! #                            trailing format characters.
! #
! $LetterEx     = $Letter    $Extend*; 
! $NumericEx    = $Numeric   $Extend*;
! $MidNumExF    = $MidNum    $Extend* $Format*;
! $MidNumLetExF = $MidNumLet $Extend* $Format*;
! $MidLetterExF = $MidLetter $Extend* $Format*;
! 
! 
! #
! #  Numbers.  Rules 6, 9, 10 form the TR.
! #
! $NumberSequence = $NumericEx ($Format* ($MidNumExF | $MidNumLetExF)? $NumericEx)*;
! $NumberSequence {100};
! 
! #
! #  Words.  Alpha-numerics.  Rule 3 - 10
! #     - must include at least one letter. 
! #     - may include both letters and numbers.
! #     - may include  MideLetter, MidNumber punctuation.
! #
! $LetterSequence = $LetterEx ($Format* ($MidLetterExF | $MidNumLetExF)? $LetterEx)*;
! $NumberSequence? $LetterSequence ($NumberSequence | $LetterSequence)* {200};
! 
! # Punctuations by themselves
! [[:P:]-[\u002E]]*;
! [\u002E]*;
! 
! #
! #  Hiragana and Katakana
! #
! $Hiragana $Extend* {300};
! $Katakana $Extend* ($Format* $Katakana $Extend*)* {300};
! 
! #
! #  Ideographic Characters.  Stand by themselves as words.
! #
! # [:IDEOGRAPHIC:] $Extend* {400};
! 
! #
! #  Everything Else, with no tag.
! #                   Non-Control chars combine with $Extend (combining) chars.
! #                   Controls are returned by themselves.
! #
! [^$Control] $Extend*;
! \r\n;
! .;
! 
! #
! #  Reverse Rules.   Back up over any of the chars that can group together.
! #                   (Reverse rules do not need to be exact; they can back up  too far,
! #                   but must back up at least enough, and must stop on a boundary.)
! #
! 
! # NonStarters are the set of all characters that can appear at the 2nd - nth position of
! #    a word.   (They may also be the first.)   The reverse rule skips over these, until it
! #    reaches something that can only be the start (and probably only) char in a "word".
! #    A space or punctuation meets the test.
! #
! $NonStarters = [$Numeric $Letter $Katakana $MidLetter $MidNum $MidNumLet $Extend $Format \u000a];
! 
! ! $NonStarters* .;
*** misc/icu/source/data/brkitr/line.txt	Tue Jul 23 08:02:06 2002
--- misc/build/icu/source/data/brkitr/line.txt	Tue May 27 15:21:32 2003
***************
*** 68,74 ****
  
  $SG = [ \uD800-\uDFFF];
  
! $AL = [ \u0023 \u0026 \u002A \u003C-\u003E \u0040-\u005A \u005E-\u007A \u007E
          \u00A6 \u00A9 \u00AC \u00AE-\u00AF \u00B5 \u00C0-\u00C5 \u00C7-\u00CF
          \u00D1-\u00D6 \u00D9-\u00DD \u00E2-\u00E5 \u00E7 \u00EB \u00EE-\u00EF
          \u00F1 \u00F4-\u00F6 \u00FB \u00FD \u00FF-\u0100 \u0102-\u0110
--- 68,75 ----
  
  $SG = [ \uD800-\uDFFF];
  
! $AL = [ $SA $XX $AI
! 	\u0023 \u0026 \u002A \u002B \u003C-\u003E \u0040-\u005A \u005E-\u007A \u007E
          \u00A6 \u00A9 \u00AC \u00AE-\u00AF \u00B5 \u00C0-\u00C5 \u00C7-\u00CF
          \u00D1-\u00D6 \u00D9-\u00DD \u00E2-\u00E5 \u00E7 \u00EB \u00EE-\u00EF
          \u00F1 \u00F4-\u00F6 \u00FB \u00FD \u00FF-\u0100 \u0102-\u0110
***************
*** 210,216 ****
          \U0001D165-\U0001D169 \U0001D16D-\U0001D182 \U0001D185-\U0001D18B
          \U0001D1AA-\U0001D1AD \U000E0001 \U000E0020-\U000E007F];
  
! $PR = [ \u0024 \u002B \u005C \u00A3-\u00A5 \u00B1 \u09F2-\u09F3 \u0E3F \u17DB
          \u20A0-\u20A6 \u20A8-\u20B1 \u2116 \u2212-\u2213 \uFE69 \uFF04
          \uFFE1 \uFFE5-\uFFE6];
  
--- 211,217 ----
          \U0001D165-\U0001D169 \U0001D16D-\U0001D182 \U0001D185-\U0001D18B
          \U0001D1AA-\U0001D1AD \U000E0001 \U000E0020-\U000E007F];
  
! $PR = [ \u0024 \u005C \u00A3-\u00A5 \u00B1 \u09F2-\u09F3 \u0E3F \u17DB
          \u20A0-\u20A6 \u20A8-\u20B1 \u2116 \u2212-\u2213 \uFE69 \uFF04
          \uFFE1 \uFFE5-\uFFE6];
  
***************
*** 326,332 ****
  #
  $NumberInterior = $IDcm | ($NUcm | $ALcm | $IS $NUcm)+;  
  $Number         =  $PR? ($OPcm | $HYcm)? $NumberInterior $CL? $POcm?; # Fancy Number     18 
! $Word   = (($IDcm | ($ALcm | $NUcm)+) ($POcm? | $INcm?))  ;           # Alpha-numeric.   16, 17 
  $Dashes = (($B2cm $SPcm*)*);                                          # Dashes           11a   
          
          
--- 327,333 ----
  #
  $NumberInterior = $IDcm | ($NUcm | $ALcm | $IS $NUcm)+;  
  $Number         =  $PR? ($OPcm | $HYcm)? $NumberInterior $CL? $POcm?; # Fancy Number     18 
! $Word   = (($IDcm | ($ALcm | $NUcm | $EX | $IS  | $SY | $PR)+) ($POcm? | $INcm?))  ;           # Alpha-numeric.   16, 17 
  $Dashes = (($B2cm $SPcm*)*);                                          # Dashes           11a   
          
          
*** misc/icu/source/data/makedata.mak	Sat Aug 10 06:55:36 2002
--- misc/build/icu/source/data/makedata.mak	Tue May 27 15:21:32 2003
***************
*** 237,243 ****
  #
  #  Break iterator data files.
  #
! BRK_FILES = "$(ICUBLD)\$(ICUDT)sent.brk" "$(ICUBLD)\$(ICUDT)char.brk" "$(ICUBLD)\$(ICUDT)line.brk" "$(ICUBLD)\$(ICUDT)word.brk" "$(ICUBLD)\$(ICUDT)title.brk" "$(ICUBLD)\$(ICUDT)line_th.brk" "$(ICUBLD)\$(ICUDT)word_th.brk"
  
  #invoke pkgdata for ICU common data
  #  pkgdata will drop all output files (.dat, .dll, .lib) into the target (ICUBLD) directory.
--- 237,243 ----
  #
  #  Break iterator data files.
  #
! BRK_FILES = "$(ICUBLD)\$(ICUDT)sent.brk" "$(ICUBLD)\$(ICUDT)char.brk" "$(ICUBLD)\$(ICUDT)line.brk" "$(ICUBLD)\$(ICUDT)word.brk" "$(ICUBLD)\$(ICUDT)edit_word.brk" "$(ICUBLD)\$(ICUDT)dict_word.brk" "$(ICUBLD)\$(ICUDT)count_word.brk" "$(ICUBLD)\$(ICUDT)title.brk" "$(ICUBLD)\$(ICUDT)line_th.brk" "$(ICUBLD)\$(ICUDT)word_th.brk"
  
  #invoke pkgdata for ICU common data
  #  pkgdata will drop all output files (.dat, .dll, .lib) into the target (ICUBLD) directory.
***************
*** 280,285 ****
--- 280,294 ----
  
  "$(ICUBLD)\$(ICUDT)word.brk" : "$(ICUBRK)\word.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
  	genbrk -r "$(ICUBRK)\word.txt" -o "$(ICUBLD)\$(ICUDT)word.brk" -i "$(ICUBLD)\\"
+ 
+ "$(ICUBLD)\$(ICUDT)edit_word.brk" : "$(ICUBRK)\edit_word.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
+ 	genbrk -r "$(ICUBRK)\edit_word.txt" -o "$(ICUBLD)\$(ICUDT)edit_word.brk" -i "$(ICUBLD)\\"
+ 
+ "$(ICUBLD)\$(ICUDT)dict_word.brk" : "$(ICUBRK)\dict_word.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
+ 	genbrk -r "$(ICUBRK)\dict_word.txt" -o "$(ICUBLD)\$(ICUDT)dict_word.brk" -i "$(ICUBLD)\\"
+ 
+ "$(ICUBLD)\$(ICUDT)count_word.brk" : "$(ICUBRK)\count_word.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
+ 	genbrk -r "$(ICUBRK)\count_word.txt" -o "$(ICUBLD)\$(ICUDT)count_word.brk" -i "$(ICUBLD)\\"
  
  "$(ICUBLD)\$(ICUDT)line.brk" : "$(ICUBRK)\line.txt" "$(ICUBLD)\$(ICUDT)uprops.icu"
  	genbrk -r "$(ICUBRK)\line.txt" -o "$(ICUBLD)\$(ICUDT)line.brk" -i "$(ICUBLD)\\"
*** misc/icu/source/icudefs.mk.in	Thu Aug  1 06:28:32 2002
--- misc/build/icu/source/icudefs.mk.in	Tue May 27 15:37:19 2003
***************
*** 133,139 ****
--- 133,143 ----
  SHLIB.cc=     $(CXX) $(CXXFLAGS) $(LDFLAGS) -shared
  
  # Environment variable to set a runtime search path
+ ifeq ($(OS), IRIX)
+ LDLIBRARYPATH_ENVVAR = LD_LIBRARYN32_PATH
+ else
  LDLIBRARYPATH_ENVVAR = LD_LIBRARY_PATH
+ endif
  
  # Versioned target for a shared library.
  FINAL_SO_TARGET = $(SO_TARGET).$(SO_TARGET_VERSION)
*** misc/icu/source/layout/ArabicLayoutEngine.cpp	Wed Jul 17 05:29:18 2002
--- misc/build/icu/source/layout/ArabicLayoutEngine.cpp	Tue May 27 15:21:32 2003
***************
*** 3,8 ****
--- 3,9 ----
   * %W% %E%
   *
   * (C) Copyright IBM Corp. 1998, 1999, 2000, 2001 - All Rights Reserved
+  * with some additions by Sun MicroSystems 2002
   *
   */
  
***************
*** 146,152 ****
--- 147,157 ----
          GDEFMarkFilter filter(fGDEFTable);
  
          adjustMarkGlyphs(glyphs, glyphCount, false, &filter, positions, success);
+ #if 1 // HDU: #105697# TODO: improve handling of deleted/mark glyphs
+     } else if( count == glyphCount ) {
+ #else
      } else {
+ #endif
          GlyphDefinitionTableHeader *gdefTable = (GlyphDefinitionTableHeader *) ArabicShaping::glyphDefinitionTable;
          GDEFMarkFilter filter(gdefTable);
  
***************
*** 194,199 ****
--- 199,219 ----
  
      charIndices = tempCharIndices;
  
+ #if 1 // HDU TODO: improve handling of deleted/mark glyphs
+     // remove deleted glyphs (0xFFFF) and deleted marks (0xFFFE)
+     // NOTE: we are removing them inplace
+     int nGlyphCount = 0;
+     for( int i = 0; i < tempGlyphCount; ++i )
+         if( tempGlyphs[i] < 0xFFFE )
+         {
+             tempGlyphs[ nGlyphCount ]   = tempGlyphs[ i ];
+             charIndices[ nGlyphCount ]  = charIndices[ i ];
+             ++nGlyphCount;
+         }
+ 
+     tempGlyphCount = nGlyphCount;
+ #endif
+ 
      // NOTE: need to copy tempGlyphs to an LEUnicode array if LEGlyphID and LEUnicode aren't the same size...
      ArabicOpenTypeLayoutEngine::mapCharsToGlyphs((LEUnicode *) tempGlyphs, 0, tempGlyphCount, false, true, glyphs, charIndices, success);
  
***************
*** 253,258 ****
--- 273,283 ----
          success = LE_ILLEGAL_ARGUMENT_ERROR;
          return;
      }
+ 
+ #if 1 // HDU: #105697# TODO: improve handling of deleted/mark glyphs
+     if( count != glyphCount )
+         return;
+ #endif
  
      GDEFMarkFilter filter(fGDEFTable);
  
*** misc/icu/source/layout/GXLayoutEngine.cpp	Sat Jun 29 09:58:44 2002
--- misc/build/icu/source/layout/GXLayoutEngine.cpp	Tue May 27 15:21:32 2003
***************
*** 39,45 ****
          return 0;
      }
  
!     mapCharsToGlyphs(chars, offset, count, false, rightToLeft, glyphs, charIndices, success);
  
      if (LE_FAILURE(success)) {
          return 0;
--- 39,45 ----
          return 0;
      }
  
!     mapCharsToGlyphs(chars, offset, count, rightToLeft, rightToLeft, glyphs, charIndices, success);
  
      if (LE_FAILURE(success)) {
          return 0;
*** misc/icu/source/layout/LESwaps.h	Wed Aug 14 10:17:50 2002
--- misc/build/icu/source/layout/LESwaps.h	Tue May 27 15:21:32 2003
***************
*** 3,8 ****
--- 3,9 ----
   * @(#)LESwaps.h	1.3 00/03/15
   *
   * (C) Copyright IBM Corp. 1998, 1999, 2000, 2001, 2002 - All Rights Reserved
+  * with additions by Sun Microsystems 2002
   *
   */
  
***************
*** 13,26 ****
  
  U_NAMESPACE_BEGIN
  
! /**
!  * These are convenience macros which invoke the swap functions
!  * from a concise call.
!  *
!  * @draft ICU 2.2
!  */
! #define SWAPW(value) (LESwaps::isBigEndian() ? (value) : LESwaps::swapWord(value))
! #define SWAPL(value) (LESwaps::isBigEndian() ? (value) : LESwaps::swapLong(value))
  
  /**
   * This class is used to access data which stored in big endian order
--- 14,50 ----
  
  U_NAMESPACE_BEGIN
  
! // the ALLOW_UNALIGNED hack prevents crashes with font files
! // containing unaligned tables platforms that do not tolerate
! // unaligned memory accesses; it works by assuming that every
! // use of the SWAPx macros is intended for accessing a big endian
! // value e.g. for "Watanabe Gothic"'s "mort" table
! #define ALLOW_UNALIGNED_HACK
! 
! #ifndef ALLOW_UNALIGNED_HACK
!     /**
!      * These are convenience macros which invoke the swap functions
!      * from a concise call.
!      */
!     #define SWAPW(value) (LESwaps::isBigEndian() ? (value) : LESwaps::swapWord(value))
!     #define SWAPL(value) (LESwaps::isBigEndian() ? (value) : LESwaps::swapLong(value))
! #else
!     #define SWAPW(rValue) loadBigEndianWord(reinterpret_cast<const le_uint16&>(rValue))
!     #define SWAPL(rValue) loadBigEndianLong(reinterpret_cast<const le_uint32&>(rValue))
! 
! inline le_uint16 loadBigEndianWord( const le_uint16& rValue )
! {
!     const le_uint8* p = reinterpret_cast<const le_uint8*>(&rValue);
!     return ((p[0] << 8) + p[1]);
! }
! 
! inline le_uint32 loadBigEndianLong( const le_uint32& rValue )
! {
!     const le_uint8* p = reinterpret_cast<const le_uint8*>(&rValue);
!     return ((p[0]<<24) + (p[1]<<16) + (p[2]<<8) + p[3]);
! }
! 
! #endif
  
  /**
   * This class is used to access data which stored in big endian order
*** misc/icu/source/layout/LookupProcessor.cpp	Wed Jul 17 05:29:20 2002
--- misc/build/icu/source/layout/LookupProcessor.cpp	Tue May 27 15:21:32 2003
***************
*** 109,115 ****
          Offset scriptListOffset, Offset featureListOffset, Offset lookupListOffset,
          LETag scriptTag, LETag languageTag, const LETag *featureOrder)
      : lookupListTable(NULL), featureListTable(NULL), lookupSelectArray(NULL),
!       requiredFeatureTag(notSelected)
  {
      const ScriptListTable *scriptListTable = NULL;
      const LangSysTable *langSysTable = NULL;
--- 109,115 ----
          Offset scriptListOffset, Offset featureListOffset, Offset lookupListOffset,
          LETag scriptTag, LETag languageTag, const LETag *featureOrder)
      : lookupListTable(NULL), featureListTable(NULL), lookupSelectArray(NULL),
!       requiredFeatureTag(notSelected), lookupOrderArray(NULL)
  {
      const ScriptListTable *scriptListTable = NULL;
      const LangSysTable *langSysTable = NULL;
*** misc/icu/source/layout/OpenTypeLayoutEngine.h	Wed Aug 14 10:17:50 2002
--- misc/build/icu/source/layout/OpenTypeLayoutEngine.h	Tue May 27 15:21:32 2003
***************
*** 3,8 ****
--- 3,9 ----
   * %W% %E%
   *
   * (C) Copyright IBM Corp. 1998, 1999, 2000, 2001, 2002 - All Rights Reserved
+  * with some additions by Sun MicroSystems 2002
   *
   */
  
***************
*** 315,320 ****
--- 316,334 ----
  
          glyphs = tempGlyphs;
          charIndices = tempCharIndices;
+ 
+ #if 1 // HDU: remove deleted glyphs (0xFFFF) and deleted marks (0xFFFE)
+         int nGlyphCount = 0;
+         for( int i = 0; i < tempGlyphCount; ++i )
+             if( glyphs[i] < 0xFFFE )
+             {
+                 glyphs[ nGlyphCount ]       = glyphs[ i ];
+                 charIndices[ nGlyphCount ]  = charIndices[ i ];
+                 ++nGlyphCount;
+             }
+ 
+         tempGlyphCount = nGlyphCount;
+ #endif
  
          return tempGlyphCount;
      };