| Index: third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h
|
| diff --git a/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h b/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h
|
| index f9598b3501849016ea11490884377679adf7a335..e9d9a6eb1c4dbc9973e11ab64ccc34d13a0515a7 100644
|
| --- a/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h
|
| +++ b/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h
|
| @@ -111,7 +111,7 @@ private:
|
| return ch == spaceCharacter || ch == tabulationCharacter;
|
| }
|
|
|
| - unsigned nextWordEndIndex()
|
| + unsigned nextWordEndIndex() const
|
| {
|
| const unsigned length = m_textRun.length();
|
| if (m_startIndex >= length)
|
| @@ -120,47 +120,49 @@ private:
|
| if (m_startIndex + 1u == length || isWordDelimiter(m_textRun[m_startIndex]))
|
| return m_startIndex + 1;
|
|
|
| - // Delimit every CJK character because these scripts do not delimit
|
| - // words by spaces, and not delimiting hits the performance.
|
| - if (!m_textRun.is8Bit()) {
|
| - UChar32 ch;
|
| - unsigned end = m_startIndex;
|
| - U16_NEXT(m_textRun.characters16(), end, length, ch);
|
| - if (Character::isCJKIdeographOrSymbol(ch)) {
|
| - bool hasAnyScript = !Character::isCommonOrInheritedScript(ch);
|
| - for (unsigned i = end; i < length; end = i) {
|
| - U16_NEXT(m_textRun.characters16(), i, length, ch);
|
| - // ZWJ and modifier check in order not to split those Emoji sequences.
|
| - if (U_GET_GC_MASK(ch) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_SK_MASK)
|
| - || ch == zeroWidthJoinerCharacter || Character::isModifier(ch))
|
| - continue;
|
| - // Avoid delimiting COMMON/INHERITED alone, which makes harder to
|
| - // identify the script.
|
| - if (Character::isCJKIdeographOrSymbol(ch)) {
|
| - if (Character::isCommonOrInheritedScript(ch))
|
| - continue;
|
| - if (!hasAnyScript) {
|
| - hasAnyScript = true;
|
| - continue;
|
| - }
|
| - }
|
| - return end;
|
| - }
|
| - return length;
|
| + // 8Bit words end at isWordDelimiter().
|
| + if (m_textRun.is8Bit()) {
|
| + for (unsigned i = m_startIndex + 1; ; i++) {
|
| + if (i == length || isWordDelimiter(m_textRun[i]))
|
| + return i;
|
| }
|
| }
|
|
|
| - for (unsigned i = m_startIndex + 1; ; i++) {
|
| - if (i == length || isWordDelimiter(m_textRun[i])) {
|
| - return i;
|
| + // Non-CJK/Emoji words end at isWordDelimiter() or CJK/Emoji characters.
|
| + unsigned end = m_startIndex;
|
| + UChar32 ch = m_textRun.codepointAtAndNext(end);
|
| + if (!Character::isCJKIdeographOrSymbol(ch)) {
|
| + for (unsigned nextEnd = end; end < length; end = nextEnd) {
|
| + ch = m_textRun.codepointAtAndNext(nextEnd);
|
| + if (isWordDelimiter(ch) || Character::isCJKIdeographOrSymbolBase(ch))
|
| + return end;
|
| }
|
| - if (!m_textRun.is8Bit()) {
|
| - UChar32 nextChar;
|
| - U16_GET(m_textRun.characters16(), 0, i, length, nextChar);
|
| - if (Character::isCJKIdeographOrSymbolBase(nextChar))
|
| - return i;
|
| + return length;
|
| + }
|
| +
|
| + // For CJK/Emoji words, delimit every character because these scripts do
|
| + // not delimit words by spaces, and delimiting only at isWordDelimiter()
|
| + // worsen the cache efficiency.
|
| + bool hasAnyScript = !Character::isCommonOrInheritedScript(ch);
|
| + for (unsigned nextEnd = end; end < length; end = nextEnd) {
|
| + ch = m_textRun.codepointAtAndNext(nextEnd);
|
| + // ZWJ and modifier check in order not to split those Emoji sequences.
|
| + if (U_GET_GC_MASK(ch) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_SK_MASK)
|
| + || ch == zeroWidthJoinerCharacter || Character::isModifier(ch))
|
| + continue;
|
| + // Avoid delimiting COMMON/INHERITED alone, which makes harder to
|
| + // identify the script.
|
| + if (Character::isCJKIdeographOrSymbol(ch)) {
|
| + if (Character::isCommonOrInheritedScript(ch))
|
| + continue;
|
| + if (!hasAnyScript) {
|
| + hasAnyScript = true;
|
| + continue;
|
| + }
|
| }
|
| + return end;
|
| }
|
| + return length;
|
| }
|
|
|
| bool shapeToEndIndex(RefPtr<const ShapeResult>* result, unsigned endIndex)
|
| @@ -180,7 +182,7 @@ private:
|
| return result->get();
|
| }
|
|
|
| - unsigned endIndexUntil(UChar ch)
|
| + unsigned endIndexUntil(UChar ch) const
|
| {
|
| unsigned length = m_textRun.length();
|
| ASSERT(m_startIndex < length);
|
|
|