Index: third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h |
diff --git a/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h b/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h |
index f9598b3501849016ea11490884377679adf7a335..e9d9a6eb1c4dbc9973e11ab64ccc34d13a0515a7 100644 |
--- a/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h |
+++ b/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h |
@@ -111,7 +111,7 @@ private: |
return ch == spaceCharacter || ch == tabulationCharacter; |
} |
- unsigned nextWordEndIndex() |
+ unsigned nextWordEndIndex() const |
{ |
const unsigned length = m_textRun.length(); |
if (m_startIndex >= length) |
@@ -120,47 +120,49 @@ private: |
if (m_startIndex + 1u == length || isWordDelimiter(m_textRun[m_startIndex])) |
return m_startIndex + 1; |
- // Delimit every CJK character because these scripts do not delimit |
- // words by spaces, and not delimiting hits the performance. |
- if (!m_textRun.is8Bit()) { |
- UChar32 ch; |
- unsigned end = m_startIndex; |
- U16_NEXT(m_textRun.characters16(), end, length, ch); |
- if (Character::isCJKIdeographOrSymbol(ch)) { |
- bool hasAnyScript = !Character::isCommonOrInheritedScript(ch); |
- for (unsigned i = end; i < length; end = i) { |
- U16_NEXT(m_textRun.characters16(), i, length, ch); |
- // ZWJ and modifier check in order not to split those Emoji sequences. |
- if (U_GET_GC_MASK(ch) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_SK_MASK) |
- || ch == zeroWidthJoinerCharacter || Character::isModifier(ch)) |
- continue; |
- // Avoid delimiting COMMON/INHERITED alone, which makes harder to |
- // identify the script. |
- if (Character::isCJKIdeographOrSymbol(ch)) { |
- if (Character::isCommonOrInheritedScript(ch)) |
- continue; |
- if (!hasAnyScript) { |
- hasAnyScript = true; |
- continue; |
- } |
- } |
- return end; |
- } |
- return length; |
+ // 8Bit words end at isWordDelimiter(). |
+ if (m_textRun.is8Bit()) { |
+ for (unsigned i = m_startIndex + 1; ; i++) { |
+ if (i == length || isWordDelimiter(m_textRun[i])) |
+ return i; |
} |
} |
- for (unsigned i = m_startIndex + 1; ; i++) { |
- if (i == length || isWordDelimiter(m_textRun[i])) { |
- return i; |
+ // Non-CJK/Emoji words end at isWordDelimiter() or CJK/Emoji characters. |
+ unsigned end = m_startIndex; |
+ UChar32 ch = m_textRun.codepointAtAndNext(end); |
+ if (!Character::isCJKIdeographOrSymbol(ch)) { |
+ for (unsigned nextEnd = end; end < length; end = nextEnd) { |
+ ch = m_textRun.codepointAtAndNext(nextEnd); |
+ if (isWordDelimiter(ch) || Character::isCJKIdeographOrSymbolBase(ch)) |
+ return end; |
} |
- if (!m_textRun.is8Bit()) { |
- UChar32 nextChar; |
- U16_GET(m_textRun.characters16(), 0, i, length, nextChar); |
- if (Character::isCJKIdeographOrSymbolBase(nextChar)) |
- return i; |
+ return length; |
+ } |
+ |
+ // For CJK/Emoji words, delimit every character because these scripts do |
+ // not delimit words by spaces, and delimiting only at isWordDelimiter() |
+ // worsen the cache efficiency. |
+ bool hasAnyScript = !Character::isCommonOrInheritedScript(ch); |
+ for (unsigned nextEnd = end; end < length; end = nextEnd) { |
+ ch = m_textRun.codepointAtAndNext(nextEnd); |
+ // ZWJ and modifier check in order not to split those Emoji sequences. |
+ if (U_GET_GC_MASK(ch) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_SK_MASK) |
+ || ch == zeroWidthJoinerCharacter || Character::isModifier(ch)) |
+ continue; |
+ // Avoid delimiting COMMON/INHERITED alone, which makes harder to |
+ // identify the script. |
+ if (Character::isCJKIdeographOrSymbol(ch)) { |
+ if (Character::isCommonOrInheritedScript(ch)) |
+ continue; |
+ if (!hasAnyScript) { |
+ hasAnyScript = true; |
+ continue; |
+ } |
} |
+ return end; |
} |
+ return length; |
} |
bool shapeToEndIndex(RefPtr<const ShapeResult>* result, unsigned endIndex) |
@@ -180,7 +182,7 @@ private: |
return result->get(); |
} |
- unsigned endIndexUntil(UChar ch) |
+ unsigned endIndexUntil(UChar ch) const |
{ |
unsigned length = m_textRun.length(); |
ASSERT(m_startIndex < length); |