Index: third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h |
diff --git a/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h b/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h |
index 6a2b9e69cacfda40f495c8d6bec9bd90532575a4..fc7946df9b84e5250beeab35fc795bba6e306787 100644 |
--- a/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h |
+++ b/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h |
@@ -112,19 +112,55 @@ private: |
if (m_startIndex >= length) |
return TextRun::Range(0, 0); |
- if (m_textRun[m_startIndex] == spaceCharacter |
+ if (m_startIndex + 1u == length |
+ || m_textRun[m_startIndex] == spaceCharacter |
|| m_textRun[m_startIndex] == tabulationCharacter) { |
return TextRun::Range(m_startIndex++, 1); |
} |
+ // Delimit every CJK character because these scripts do not delimit |
drott
2015/12/21 10:36:01
AFAICS we have now three modes in this file: Separ
kojii
2015/12/21 11:59:56
I think that's not the correct way to see the opti
|
+ // words by spaces. |
+ if (!m_textRun.is8Bit()) { |
+ UChar32 ch; |
+ unsigned end = m_startIndex; |
+ U16_NEXT(m_textRun.characters16(), end, length, ch); |
+ if (Character::isCJKIdeographOrSymbol(ch)) |
+ return nextWordRangeForCJK(end, ch); |
+ } |
+ |
for (unsigned i = m_startIndex + 1; ; i++) { |
if (i == length || m_textRun[i] == spaceCharacter |
- || m_textRun[i] == tabulationCharacter) { |
+ || m_textRun[i] == tabulationCharacter |
+ || (!m_textRun.is8Bit() && Character::isCJKIdeographOrSymbol(m_textRun[i]))) { |
return nextRangeToEndOffset(i); |
} |
} |
} |
+ TextRun::Range nextWordRangeForCJK(unsigned end, UChar32 ch) |
+ { |
+ const unsigned length = m_textRun.length(); |
+ bool hasAnySpecificScript = !Character::isCommonOrInheritedScript(ch); |
+ for (unsigned i = end; i < length; end = i) { |
+ U16_NEXT(m_textRun.characters16(), i, length, ch); |
+ // Keep Marks and Modifiers together. |
+ if (U_GET_GC_MASK(ch) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_SK_MASK)) |
+ continue; |
+ // Avoid delimiting COMMON/INHERITED alone, which makes harder to |
+ // identify the script. |
+ if (Character::isCJKIdeographOrSymbol(ch)) { |
+ if (Character::isCommonOrInheritedScript(ch)) |
+ continue; |
+ if (!hasAnySpecificScript) { |
+ hasAnySpecificScript = true; |
+ continue; |
+ } |
+ } |
+ return nextRangeToEndOffset(end); |
+ } |
+ return nextRangeToEndOffset(length); |
+ } |
+ |
TextRun::Range nextRangeUntil(UChar ch) |
{ |
const unsigned length = m_textRun.length(); |