third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h - Issue 2263083002: Refactor CachingWordShapeIterator::nextWordEndIndex

Unified Diff: third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h

Issue 2263083002: Refactor CachingWordShapeIterator::nextWordEndIndex (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Comment editorial Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h

diff --git a/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h b/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h

index f9598b3501849016ea11490884377679adf7a335..e9d9a6eb1c4dbc9973e11ab64ccc34d13a0515a7 100644

--- a/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h

+++ b/third_party/WebKit/Source/platform/fonts/shaping/CachingWordShapeIterator.h

@@ -111,7 +111,7 @@ private:

return ch == spaceCharacter || ch == tabulationCharacter;

}

- unsigned nextWordEndIndex()

+ unsigned nextWordEndIndex() const

{

const unsigned length = m_textRun.length();

if (m_startIndex >= length)

@@ -120,47 +120,49 @@ private:

if (m_startIndex + 1u == length || isWordDelimiter(m_textRun[m_startIndex]))

return m_startIndex + 1;

- // Delimit every CJK character because these scripts do not delimit

- // words by spaces, and not delimiting hits the performance.

- if (!m_textRun.is8Bit()) {

- UChar32 ch;

- unsigned end = m_startIndex;

- U16_NEXT(m_textRun.characters16(), end, length, ch);

- if (Character::isCJKIdeographOrSymbol(ch)) {

- bool hasAnyScript = !Character::isCommonOrInheritedScript(ch);

- for (unsigned i = end; i < length; end = i) {

- U16_NEXT(m_textRun.characters16(), i, length, ch);

- // ZWJ and modifier check in order not to split those Emoji sequences.

- if (U_GET_GC_MASK(ch) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_SK_MASK)

- || ch == zeroWidthJoinerCharacter || Character::isModifier(ch))

- continue;

- // Avoid delimiting COMMON/INHERITED alone, which makes harder to

- // identify the script.

- if (Character::isCJKIdeographOrSymbol(ch)) {

- if (Character::isCommonOrInheritedScript(ch))

- continue;

- if (!hasAnyScript) {

- hasAnyScript = true;

- continue;

- }

- return end;

- }

- return length;

+ // 8Bit words end at isWordDelimiter().

+ if (m_textRun.is8Bit()) {

+ for (unsigned i = m_startIndex + 1; ; i++) {

+ if (i == length || isWordDelimiter(m_textRun[i]))

+ return i;

}

- for (unsigned i = m_startIndex + 1; ; i++) {

- if (i == length || isWordDelimiter(m_textRun[i])) {

- return i;

+ // Non-CJK/Emoji words end at isWordDelimiter() or CJK/Emoji characters.

+ unsigned end = m_startIndex;

+ UChar32 ch = m_textRun.codepointAtAndNext(end);

+ if (!Character::isCJKIdeographOrSymbol(ch)) {

+ for (unsigned nextEnd = end; end < length; end = nextEnd) {

+ ch = m_textRun.codepointAtAndNext(nextEnd);

+ if (isWordDelimiter(ch) || Character::isCJKIdeographOrSymbolBase(ch))

+ return end;

}

- if (!m_textRun.is8Bit()) {

- UChar32 nextChar;

- U16_GET(m_textRun.characters16(), 0, i, length, nextChar);

- if (Character::isCJKIdeographOrSymbolBase(nextChar))

- return i;

+ return length;

+ }

+ // For CJK/Emoji words, delimit every character because these scripts do

+ // not delimit words by spaces, and delimiting only at isWordDelimiter()

+ // worsen the cache efficiency.

+ bool hasAnyScript = !Character::isCommonOrInheritedScript(ch);

+ for (unsigned nextEnd = end; end < length; end = nextEnd) {

+ ch = m_textRun.codepointAtAndNext(nextEnd);

+ // ZWJ and modifier check in order not to split those Emoji sequences.

+ if (U_GET_GC_MASK(ch) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_SK_MASK)

+ || ch == zeroWidthJoinerCharacter || Character::isModifier(ch))

+ continue;

+ // Avoid delimiting COMMON/INHERITED alone, which makes harder to

+ // identify the script.

+ if (Character::isCJKIdeographOrSymbol(ch)) {

+ if (Character::isCommonOrInheritedScript(ch))

+ continue;

+ if (!hasAnyScript) {

+ hasAnyScript = true;

+ continue;

+ }

}

+ return end;

}

+ return length;

}

bool shapeToEndIndex(RefPtr<const ShapeResult>* result, unsigned endIndex)

@@ -180,7 +182,7 @@ private:

return result->get();

}

- unsigned endIndexUntil(UChar ch)

+ unsigned endIndexUntil(UChar ch) const

{

unsigned length = m_textRun.length();

ASSERT(m_startIndex < length);

« no previous file with comments | « no previous file | third_party/WebKit/Source/platform/text/TextRun.h » ('j') | no next file with comments »