View | Details | Raw Unified | Return to issue 74034
Collapse All | Expand All

(-)inc/breakiterator_cjk.hxx (-1 / +18 lines)
Lines 37-42 Link Here
37
37
38
#include <breakiterator_unicode.hxx>
38
#include <breakiterator_unicode.hxx>
39
#include <xdictionary.hxx>
39
#include <xdictionary.hxx>
40
#include <mecab.h>
40
41
41
namespace com { namespace sun { namespace star { namespace i18n {
42
namespace com { namespace sun { namespace star { namespace i18n {
42
//	----------------------------------------------------
43
//	----------------------------------------------------
Lines 74-84 Link Here
74
75
75
#ifdef BREAKITERATOR_ALL
76
#ifdef BREAKITERATOR_ALL
76
BREAKITERATOR_CJK( zh )
77
BREAKITERATOR_CJK( zh )
77
BREAKITERATOR_CJK( ja )
78
//BREAKITERATOR_CJK( ja )
78
BREAKITERATOR_CJK( ko )
79
BREAKITERATOR_CJK( ko )
79
#endif
80
#endif
80
#undef BREAKITERATOR__CJK
81
#undef BREAKITERATOR__CJK
81
82
83
class BreakIterator_ja : public BreakIterator_CJK {
84
    MeCab::Tagger *tagger;
85
public:
86
    BreakIterator_ja ();
87
    ~BreakIterator_ja ();
88
	Boundary SAL_CALL nextWord( const rtl::OUString& Text, sal_Int32 nStartPos,
89
		    const com::sun::star::lang::Locale& nLocale, sal_Int16 WordType)
90
		    throw(com::sun::star::uno::RuntimeException);
91
	Boundary SAL_CALL previousWord( const rtl::OUString& Text, sal_Int32 nStartPos,
92
		    const com::sun::star::lang::Locale& nLocale, sal_Int16 WordType)
93
		    throw(com::sun::star::uno::RuntimeException);
94
	Boundary SAL_CALL getWordBoundary( const rtl::OUString& Text, sal_Int32 nPos,
95
		    const com::sun::star::lang::Locale& nLocale, sal_Int16 WordType, sal_Bool bDirection )
96
		    throw(com::sun::star::uno::RuntimeException);
97
};
98
82
} } } }
99
} } } }
83
100
84
#endif // _I18N_BREAKITERATOR_CJK_HXX_
101
#endif // _I18N_BREAKITERATOR_CJK_HXX_
(-)source/breakiterator/breakiterator_cjk.cxx (-10 / +120 lines)
Lines 40-45 Link Here
40
#include <breakiterator_cjk.hxx>
40
#include <breakiterator_cjk.hxx>
41
#include <i18nutil/unicode.hxx>
41
#include <i18nutil/unicode.hxx>
42
42
43
#include <rtl/string.hxx>
44
#include <rtl/strbuf.hxx>
45
#include <mecab.h>
46
43
using namespace ::com::sun::star::uno;
47
using namespace ::com::sun::star::uno;
44
using namespace ::com::sun::star::lang;
48
using namespace ::com::sun::star::lang;
45
using namespace ::rtl;
49
using namespace ::rtl;
Lines 140-169 Link Here
140
}
144
}
141
145
142
//      ----------------------------------------------------
146
//      ----------------------------------------------------
147
//      class BreakIterator_ko
148
//      ----------------------------------------------------;
149
BreakIterator_ko::BreakIterator_ko()
150
{
151
        cBreakIterator = "com.sun.star.i18n.BreakIterator_ko";
152
}
153
154
BreakIterator_ko::~BreakIterator_ko()
155
{
156
}
157
158
//      ----------------------------------------------------
143
//      class BreakIterator_ja
159
//      class BreakIterator_ja
144
//      ----------------------------------------------------;
160
//      ----------------------------------------------------;
145
BreakIterator_ja::BreakIterator_ja()
161
BreakIterator_ja::BreakIterator_ja()
146
{
162
{
147
        dict = new xdictionary("ja");
163
//    dict = new xdictionary("ja");
148
        dict->setJapaneseWordBreak();
164
//    dict->setJapaneseWordBreak();
149
        cBreakIterator = "com.sun.star.i18n.BreakIterator_ja";
165
    tagger = MeCab::createTagger("");
166
    cBreakIterator = "com.sun.star.i18n.BreakIterator_ja";
150
}
167
}
151
168
152
BreakIterator_ja::~BreakIterator_ja()
169
BreakIterator_ja::~BreakIterator_ja()
153
{
170
{
154
        delete dict;
171
//    delete dict;
172
    delete tagger;
155
}
173
}
156
174
157
//      ----------------------------------------------------
175
Boundary SAL_CALL 
158
//      class BreakIterator_ko
176
BreakIterator_ja::previousWord(const OUString& text, sal_Int32 anyPos, 
159
//      ----------------------------------------------------;
177
        const lang::Locale& nLocale, sal_Int16 wordType) throw(RuntimeException)
160
BreakIterator_ko::BreakIterator_ko()
161
{
178
{
162
        cBreakIterator = "com.sun.star.i18n.BreakIterator_ko";
179
    return getWordBoundary(text, anyPos, nLocale, wordType, sal_False);
163
}
180
}
164
181
165
BreakIterator_ko::~BreakIterator_ko()
182
Boundary SAL_CALL 
183
BreakIterator_ja::nextWord(const OUString& text, sal_Int32 anyPos, 
184
        const lang::Locale& nLocale, sal_Int16 wordType) throw(RuntimeException)
166
{
185
{
186
    Boundary boundary;
187
188
    if( anyPos >= text.getLength() )
189
    {
190
	boundary.startPos = boundary.endPos = text.getLength();
191
	return boundary;
192
    }
193
194
    OStringBuffer aStr = OUStringToOString(text, RTL_TEXTENCODING_EUC_JP);
195
    aStr.append( static_cast< char >(0) );
196
    MeCab::Node *node = tagger->parseToNode(aStr.getStr());
197
198
    for( int cnt = 0; node; node = node->next )
199
    {
200
	OUString chunk = OUString(node->surface, node->length,
201
				  RTL_TEXTENCODING_EUC_JP,
202
				  OSTRING_TO_OUSTRING_CVTFLAGS);
203
	cnt += chunk.getLength() + (node->rlength - node->length);
204
	if( cnt > anyPos )
205
	{
206
	    chunk = OUString(node->next->surface, node->next->length,
207
			     RTL_TEXTENCODING_EUC_JP,
208
			     OSTRING_TO_OUSTRING_CVTFLAGS);
209
210
	    boundary.startPos = cnt + (node->next->rlength - node->next->length);
211
	    if( boundary.startPos >= text.getLength() )
212
		boundary.endPos = boundary.startPos;
213
	    else
214
		boundary.endPos = cnt + chunk.getLength();
215
216
	    return boundary;
217
	}
218
    }
219
220
    boundary.startPos = boundary.endPos = text.getLength();
221
    return boundary;
222
}
223
224
Boundary SAL_CALL 
225
BreakIterator_ja::getWordBoundary( const OUString& text, sal_Int32 anyPos, 
226
        const lang::Locale& nLocale, sal_Int16 wordType, sal_Bool bDirection ) 
227
        throw(RuntimeException)
228
{
229
    Boundary boundary;
230
    sal_Int32 len = text.getLength();
231
232
    if( anyPos < 0 ||
233
	(anyPos == 0 && !bDirection) )
234
    {
235
	boundary.startPos = boundary.endPos = 0;
236
    }
237
    else if( anyPos > len ||
238
	    (anyPos == len && bDirection) )
239
    {
240
	boundary.startPos = boundary.endPos = len;
241
    }
242
    else
243
    {
244
	OStringBuffer aStr = OUStringToOString(text, RTL_TEXTENCODING_EUC_JP);
245
	aStr.append( static_cast< char >(0) );
246
	MeCab::Node *node = tagger->parseToNode( aStr.getStr() );
247
	
248
	for( int cnt = 0; node; node = node->next )
249
	{
250
	    OUString chunk = OUString(node->surface, node->length,
251
				      RTL_TEXTENCODING_EUC_JP,
252
				      OSTRING_TO_OUSTRING_CVTFLAGS);
253
	    cnt += chunk.getLength() + (node->rlength - node->length);
254
	    if( cnt == anyPos && bDirection )
255
	    {
256
		chunk = OUString(node->next->surface, node->next->length,
257
				 RTL_TEXTENCODING_EUC_JP,
258
				 OSTRING_TO_OUSTRING_CVTFLAGS);
259
260
		boundary.startPos = cnt + (node->next->rlength - node->next->length);
261
		if( boundary.startPos >= len )
262
		    boundary.endPos = boundary.startPos;
263
		else
264
		    boundary.endPos = cnt + chunk.getLength();
265
		break;
266
	    }
267
	    else if( cnt >= anyPos )
268
	    {
269
		boundary.startPos = cnt - chunk.getLength();
270
		boundary.endPos = cnt;
271
		break;
272
	    }
273
	}
274
    }
275
276
    return boundary;
167
}
277
}
168
278
169
} } } }
279
} } } }
(-)util/makefile.mk (+2 lines)
Lines 79-84 Link Here
79
		$(ICUINLIB) \
79
		$(ICUINLIB) \
80
		$(ICUUCLIB)
80
		$(ICUUCLIB)
81
81
82
ENVLINKFLAGS= -lmecab
83
82
# --- Targets ------------------------------------------------------------
84
# --- Targets ------------------------------------------------------------
83
85
84
.INCLUDE :	target.mk
86
.INCLUDE :	target.mk

Return to issue 74034