? i74034_alternative_breakiterator_01.patch Index: inc/breakiterator_cjk.hxx =================================================================== RCS file: /cvs/l10n/i18npool/inc/breakiterator_cjk.hxx,v retrieving revision 1.5 diff -u -r1.5 breakiterator_cjk.hxx --- inc/breakiterator_cjk.hxx 7 Sep 2005 16:46:55 -0000 1.5 +++ inc/breakiterator_cjk.hxx 31 Jan 2007 07:00:27 -0000 @@ -37,6 +37,7 @@ #include #include +#include namespace com { namespace sun { namespace star { namespace i18n { // ---------------------------------------------------- @@ -74,11 +75,27 @@ #ifdef BREAKITERATOR_ALL BREAKITERATOR_CJK( zh ) -BREAKITERATOR_CJK( ja ) +//BREAKITERATOR_CJK( ja ) BREAKITERATOR_CJK( ko ) #endif #undef BREAKITERATOR__CJK +class BreakIterator_ja : public BreakIterator_CJK { + MeCab::Tagger *tagger; +public: + BreakIterator_ja (); + ~BreakIterator_ja (); + Boundary SAL_CALL nextWord( const rtl::OUString& Text, sal_Int32 nStartPos, + const com::sun::star::lang::Locale& nLocale, sal_Int16 WordType) + throw(com::sun::star::uno::RuntimeException); + Boundary SAL_CALL previousWord( const rtl::OUString& Text, sal_Int32 nStartPos, + const com::sun::star::lang::Locale& nLocale, sal_Int16 WordType) + throw(com::sun::star::uno::RuntimeException); + Boundary SAL_CALL getWordBoundary( const rtl::OUString& Text, sal_Int32 nPos, + const com::sun::star::lang::Locale& nLocale, sal_Int16 WordType, sal_Bool bDirection ) + throw(com::sun::star::uno::RuntimeException); +}; + } } } } #endif // _I18N_BREAKITERATOR_CJK_HXX_ Index: source/breakiterator/breakiterator_cjk.cxx =================================================================== RCS file: /cvs/l10n/i18npool/source/breakiterator/breakiterator_cjk.cxx,v retrieving revision 1.14 diff -u -r1.14 breakiterator_cjk.cxx --- source/breakiterator/breakiterator_cjk.cxx 24 Oct 2006 13:53:13 -0000 1.14 +++ source/breakiterator/breakiterator_cjk.cxx 31 Jan 2007 07:00:27 -0000 @@ -40,6 +40,10 @@ #include #include +#include +#include +#include + using namespace ::com::sun::star::uno; using namespace ::com::sun::star::lang; using namespace ::rtl; @@ -140,30 +144,136 @@ } // ---------------------------------------------------- +// class BreakIterator_ko +// ----------------------------------------------------; +BreakIterator_ko::BreakIterator_ko() +{ + cBreakIterator = "com.sun.star.i18n.BreakIterator_ko"; +} + +BreakIterator_ko::~BreakIterator_ko() +{ +} + +// ---------------------------------------------------- // class BreakIterator_ja // ----------------------------------------------------; BreakIterator_ja::BreakIterator_ja() { - dict = new xdictionary("ja"); - dict->setJapaneseWordBreak(); - cBreakIterator = "com.sun.star.i18n.BreakIterator_ja"; +// dict = new xdictionary("ja"); +// dict->setJapaneseWordBreak(); + tagger = MeCab::createTagger(""); + cBreakIterator = "com.sun.star.i18n.BreakIterator_ja"; } BreakIterator_ja::~BreakIterator_ja() { - delete dict; +// delete dict; + delete tagger; } -// ---------------------------------------------------- -// class BreakIterator_ko -// ----------------------------------------------------; -BreakIterator_ko::BreakIterator_ko() +Boundary SAL_CALL +BreakIterator_ja::previousWord(const OUString& text, sal_Int32 anyPos, + const lang::Locale& nLocale, sal_Int16 wordType) throw(RuntimeException) { - cBreakIterator = "com.sun.star.i18n.BreakIterator_ko"; + return getWordBoundary(text, anyPos, nLocale, wordType, sal_False); } -BreakIterator_ko::~BreakIterator_ko() +Boundary SAL_CALL +BreakIterator_ja::nextWord(const OUString& text, sal_Int32 anyPos, + const lang::Locale& nLocale, sal_Int16 wordType) throw(RuntimeException) { + Boundary boundary; + + if( anyPos >= text.getLength() ) + { + boundary.startPos = boundary.endPos = text.getLength(); + return boundary; + } + + OStringBuffer aStr = OUStringToOString(text, RTL_TEXTENCODING_EUC_JP); + aStr.append( static_cast< char >(0) ); + MeCab::Node *node = tagger->parseToNode(aStr.getStr()); + + for( int cnt = 0; node; node = node->next ) + { + OUString chunk = OUString(node->surface, node->length, + RTL_TEXTENCODING_EUC_JP, + OSTRING_TO_OUSTRING_CVTFLAGS); + cnt += chunk.getLength() + (node->rlength - node->length); + if( cnt > anyPos ) + { + chunk = OUString(node->next->surface, node->next->length, + RTL_TEXTENCODING_EUC_JP, + OSTRING_TO_OUSTRING_CVTFLAGS); + + boundary.startPos = cnt + (node->next->rlength - node->next->length); + if( boundary.startPos >= text.getLength() ) + boundary.endPos = boundary.startPos; + else + boundary.endPos = cnt + chunk.getLength(); + + return boundary; + } + } + + boundary.startPos = boundary.endPos = text.getLength(); + return boundary; +} + +Boundary SAL_CALL +BreakIterator_ja::getWordBoundary( const OUString& text, sal_Int32 anyPos, + const lang::Locale& nLocale, sal_Int16 wordType, sal_Bool bDirection ) + throw(RuntimeException) +{ + Boundary boundary; + sal_Int32 len = text.getLength(); + + if( anyPos < 0 || + (anyPos == 0 && !bDirection) ) + { + boundary.startPos = boundary.endPos = 0; + } + else if( anyPos > len || + (anyPos == len && bDirection) ) + { + boundary.startPos = boundary.endPos = len; + } + else + { + OStringBuffer aStr = OUStringToOString(text, RTL_TEXTENCODING_EUC_JP); + aStr.append( static_cast< char >(0) ); + MeCab::Node *node = tagger->parseToNode( aStr.getStr() ); + + for( int cnt = 0; node; node = node->next ) + { + OUString chunk = OUString(node->surface, node->length, + RTL_TEXTENCODING_EUC_JP, + OSTRING_TO_OUSTRING_CVTFLAGS); + cnt += chunk.getLength() + (node->rlength - node->length); + if( cnt == anyPos && bDirection ) + { + chunk = OUString(node->next->surface, node->next->length, + RTL_TEXTENCODING_EUC_JP, + OSTRING_TO_OUSTRING_CVTFLAGS); + + boundary.startPos = cnt + (node->next->rlength - node->next->length); + if( boundary.startPos >= len ) + boundary.endPos = boundary.startPos; + else + boundary.endPos = cnt + chunk.getLength(); + break; + } + else if( cnt >= anyPos ) + { + boundary.startPos = cnt - chunk.getLength(); + boundary.endPos = cnt; + break; + } + } + } + + return boundary; } } } } } Index: util/makefile.mk =================================================================== RCS file: /cvs/l10n/i18npool/util/makefile.mk,v retrieving revision 1.21 diff -u -r1.21 makefile.mk --- util/makefile.mk 19 Dec 2006 18:05:56 -0000 1.21 +++ util/makefile.mk 31 Jan 2007 07:00:30 -0000 @@ -79,6 +79,8 @@ $(ICUINLIB) \ $(ICUUCLIB) +ENVLINKFLAGS= -lmecab + # --- Targets ------------------------------------------------------------ .INCLUDE : target.mk