Lines 300-309
Link Here
|
300 |
|
300 |
|
301 |
sal_Int32 strLen = Text.getLength(); |
301 |
sal_Int32 strLen = Text.getLength(); |
302 |
sal_uInt32 ch=0; |
302 |
sal_uInt32 ch=0; |
|
|
303 |
sal_uInt32 prev_ch=Text.getStr()[0]; |
303 |
while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen ) { |
304 |
while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen ) { |
304 |
sal_Int16 currentCharScriptType = getScriptClass(ch); |
305 |
sal_Int16 currentCharScriptType = getScriptClass(ch, prev_ch); |
305 |
if(ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK) |
306 |
if(ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK) |
306 |
break; |
307 |
break; |
|
|
308 |
prev_ch = ch; |
307 |
} |
309 |
} |
308 |
return nStartPos; |
310 |
return nStartPos; |
309 |
} |
311 |
} |
Lines 456-474
Link Here
|
456 |
|
458 |
|
457 |
#define scriptListCount sizeof (scriptList) / sizeof (UBlock2Script) |
459 |
#define scriptListCount sizeof (scriptList) / sizeof (UBlock2Script) |
458 |
|
460 |
|
459 |
sal_Int16 BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar) |
461 |
sal_Bool BreakIteratorImpl::changeToWeak(sal_uInt32 ch, sal_uInt32 prev_ch) |
|
|
462 |
{ |
463 |
switch (ch) { |
464 |
case 1 : |
465 |
case 2 : |
466 |
case 0x20 : |
467 |
case 0xA0 : |
468 |
return true; |
469 |
// Geresh or Gershayim or quotes following Hebrew |
470 |
case 0x22 : // quotation mark |
471 |
case 0x05F4 : // Gershayim |
472 |
case 0x27 : // apostrophe |
473 |
case 0x05F3 : // Geresh |
474 |
if ( prev_ch >= 0x05D0 && prev_ch <= 0x05EA) |
475 |
return true; |
476 |
default : |
477 |
return false; |
478 |
} |
479 |
} |
480 |
|
481 |
sal_Int16 BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar, sal_uInt32 lastChar) |
460 |
{ |
482 |
{ |
461 |
static sal_uInt32 lastChar = 0; |
|
|
462 |
static sal_Int16 nRet = 0; |
483 |
static sal_Int16 nRet = 0; |
463 |
|
484 |
|
464 |
if (currentChar != lastChar) { |
485 |
if (currentChar != lastChar) { |
465 |
lastChar = currentChar; |
|
|
466 |
|
467 |
//JP 21.9.2001: handle specific characters - always as weak |
486 |
//JP 21.9.2001: handle specific characters - always as weak |
468 |
// definition of 1 - this breaks a word |
487 |
// definition of 1 - this breaks a word |
469 |
// 2 - this can be inside a word |
488 |
// 2 - this can be inside a word |
470 |
// 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char. |
489 |
// 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char. |
471 |
if( 1 == currentChar || 2 == currentChar || 0x20 == currentChar || 0xA0 == currentChar) |
490 |
if ( changeToWeak(currentChar, lastChar)) |
472 |
nRet = ScriptType::WEAK; |
491 |
nRet = ScriptType::WEAK; |
473 |
// workaround for Coptic |
492 |
// workaround for Coptic |
474 |
else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar) |
493 |
else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar) |
Lines 481-486
Link Here
|
481 |
} |
500 |
} |
482 |
nRet=(i < scriptListCount && block >= scriptList[i].from) ? scriptList[i].script : ScriptType::WEAK; |
501 |
nRet=(i < scriptListCount && block >= scriptList[i].from) ? scriptList[i].script : ScriptType::WEAK; |
483 |
} |
502 |
} |
|
|
503 |
lastChar = currentChar; |
484 |
} |
504 |
} |
485 |
return nRet; |
505 |
return nRet; |
486 |
} |
506 |
} |