| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2015 Google Inc. All rights reserved. | 2 * Copyright (C) 2015 Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
| 6 * are met: | 6 * are met: |
| 7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
| 8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
| 9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
| 10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 104 bool nextWord(RefPtr<const ShapeResult>* wordResult) | 104 bool nextWord(RefPtr<const ShapeResult>* wordResult) |
| 105 { | 105 { |
| 106 return shapeToEndIndex(wordResult, nextWordEndIndex()); | 106 return shapeToEndIndex(wordResult, nextWordEndIndex()); |
| 107 } | 107 } |
| 108 | 108 |
| 109 static bool isWordDelimiter(UChar ch) | 109 static bool isWordDelimiter(UChar ch) |
| 110 { | 110 { |
| 111 return ch == spaceCharacter || ch == tabulationCharacter; | 111 return ch == spaceCharacter || ch == tabulationCharacter; |
| 112 } | 112 } |
| 113 | 113 |
| 114 unsigned nextWordEndIndex() | 114 unsigned nextWordEndIndex() const |
| 115 { | 115 { |
| 116 const unsigned length = m_textRun.length(); | 116 const unsigned length = m_textRun.length(); |
| 117 if (m_startIndex >= length) | 117 if (m_startIndex >= length) |
| 118 return 0; | 118 return 0; |
| 119 | 119 |
| 120 if (m_startIndex + 1u == length || isWordDelimiter(m_textRun[m_startInde
x])) | 120 if (m_startIndex + 1u == length || isWordDelimiter(m_textRun[m_startInde
x])) |
| 121 return m_startIndex + 1; | 121 return m_startIndex + 1; |
| 122 | 122 |
| 123 // Delimit every CJK character because these scripts do not delimit | 123 // 8Bit words end at isWordDelimiter(). |
| 124 // words by spaces, and not delimiting hits the performance. | 124 if (m_textRun.is8Bit()) { |
| 125 if (!m_textRun.is8Bit()) { | 125 for (unsigned i = m_startIndex + 1; ; i++) { |
| 126 UChar32 ch; | 126 if (i == length || isWordDelimiter(m_textRun[i])) |
| 127 unsigned end = m_startIndex; | 127 return i; |
| 128 U16_NEXT(m_textRun.characters16(), end, length, ch); | |
| 129 if (Character::isCJKIdeographOrSymbol(ch)) { | |
| 130 bool hasAnyScript = !Character::isCommonOrInheritedScript(ch); | |
| 131 for (unsigned i = end; i < length; end = i) { | |
| 132 U16_NEXT(m_textRun.characters16(), i, length, ch); | |
| 133 // ZWJ and modifier check in order not to split those Emoji
sequences. | |
| 134 if (U_GET_GC_MASK(ch) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_S
K_MASK) | |
| 135 || ch == zeroWidthJoinerCharacter || Character::isModifi
er(ch)) | |
| 136 continue; | |
| 137 // Avoid delimiting COMMON/INHERITED alone, which makes hard
er to | |
| 138 // identify the script. | |
| 139 if (Character::isCJKIdeographOrSymbol(ch)) { | |
| 140 if (Character::isCommonOrInheritedScript(ch)) | |
| 141 continue; | |
| 142 if (!hasAnyScript) { | |
| 143 hasAnyScript = true; | |
| 144 continue; | |
| 145 } | |
| 146 } | |
| 147 return end; | |
| 148 } | |
| 149 return length; | |
| 150 } | 128 } |
| 151 } | 129 } |
| 152 | 130 |
| 153 for (unsigned i = m_startIndex + 1; ; i++) { | 131 // Non-CJK/Emoji words end at isWordDelimiter() or CJK/Emoji characters. |
| 154 if (i == length || isWordDelimiter(m_textRun[i])) { | 132 unsigned end = m_startIndex; |
| 155 return i; | 133 UChar32 ch = m_textRun.codepointAtAndNext(end); |
| 134 if (!Character::isCJKIdeographOrSymbol(ch)) { |
| 135 for (unsigned nextEnd = end; end < length; end = nextEnd) { |
| 136 ch = m_textRun.codepointAtAndNext(nextEnd); |
| 137 if (isWordDelimiter(ch) || Character::isCJKIdeographOrSymbolBase
(ch)) |
| 138 return end; |
| 156 } | 139 } |
| 157 if (!m_textRun.is8Bit()) { | 140 return length; |
| 158 UChar32 nextChar; | 141 } |
| 159 U16_GET(m_textRun.characters16(), 0, i, length, nextChar); | 142 |
| 160 if (Character::isCJKIdeographOrSymbolBase(nextChar)) | 143 // For CJK/Emoji words, delimit every character because these scripts do |
| 161 return i; | 144 // not delimit words by spaces, and delimiting only at isWordDelimiter() |
| 145 // worsen the cache efficiency. |
| 146 bool hasAnyScript = !Character::isCommonOrInheritedScript(ch); |
| 147 for (unsigned nextEnd = end; end < length; end = nextEnd) { |
| 148 ch = m_textRun.codepointAtAndNext(nextEnd); |
| 149 // ZWJ and modifier check in order not to split those Emoji sequence
s. |
| 150 if (U_GET_GC_MASK(ch) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_SK_MASK) |
| 151 || ch == zeroWidthJoinerCharacter || Character::isModifier(ch)) |
| 152 continue; |
| 153 // Avoid delimiting COMMON/INHERITED alone, which makes harder to |
| 154 // identify the script. |
| 155 if (Character::isCJKIdeographOrSymbol(ch)) { |
| 156 if (Character::isCommonOrInheritedScript(ch)) |
| 157 continue; |
| 158 if (!hasAnyScript) { |
| 159 hasAnyScript = true; |
| 160 continue; |
| 161 } |
| 162 } | 162 } |
| 163 return end; |
| 163 } | 164 } |
| 165 return length; |
| 164 } | 166 } |
| 165 | 167 |
| 166 bool shapeToEndIndex(RefPtr<const ShapeResult>* result, unsigned endIndex) | 168 bool shapeToEndIndex(RefPtr<const ShapeResult>* result, unsigned endIndex) |
| 167 { | 169 { |
| 168 if (!endIndex || endIndex <= m_startIndex) | 170 if (!endIndex || endIndex <= m_startIndex) |
| 169 return false; | 171 return false; |
| 170 | 172 |
| 171 const unsigned length = m_textRun.length(); | 173 const unsigned length = m_textRun.length(); |
| 172 if (!m_startIndex && endIndex == length) { | 174 if (!m_startIndex && endIndex == length) { |
| 173 *result = shapeWord(m_textRun, m_font); | 175 *result = shapeWord(m_textRun, m_font); |
| 174 } else { | 176 } else { |
| 175 ASSERT(endIndex <= length); | 177 ASSERT(endIndex <= length); |
| 176 TextRun subRun = m_textRun.subRun(m_startIndex, endIndex - m_startIn
dex); | 178 TextRun subRun = m_textRun.subRun(m_startIndex, endIndex - m_startIn
dex); |
| 177 *result = shapeWord(subRun, m_font); | 179 *result = shapeWord(subRun, m_font); |
| 178 } | 180 } |
| 179 m_startIndex = endIndex; | 181 m_startIndex = endIndex; |
| 180 return result->get(); | 182 return result->get(); |
| 181 } | 183 } |
| 182 | 184 |
| 183 unsigned endIndexUntil(UChar ch) | 185 unsigned endIndexUntil(UChar ch) const |
| 184 { | 186 { |
| 185 unsigned length = m_textRun.length(); | 187 unsigned length = m_textRun.length(); |
| 186 ASSERT(m_startIndex < length); | 188 ASSERT(m_startIndex < length); |
| 187 for (unsigned i = m_startIndex + 1; ; i++) { | 189 for (unsigned i = m_startIndex + 1; ; i++) { |
| 188 if (i == length || m_textRun[i] == ch) | 190 if (i == length || m_textRun[i] == ch) |
| 189 return i; | 191 return i; |
| 190 } | 192 } |
| 191 } | 193 } |
| 192 | 194 |
| 193 bool nextForAllowTabs(RefPtr<const ShapeResult>* wordResult) | 195 bool nextForAllowTabs(RefPtr<const ShapeResult>* wordResult) |
| (...skipping 28 matching lines...) Expand all Loading... |
| 222 const Font* m_font; | 224 const Font* m_font; |
| 223 ShapeResultSpacing m_spacing; | 225 ShapeResultSpacing m_spacing; |
| 224 float m_widthSoFar; // Used only when allowTabs() | 226 float m_widthSoFar; // Used only when allowTabs() |
| 225 unsigned m_startIndex : 31; | 227 unsigned m_startIndex : 31; |
| 226 unsigned m_shapeByWord : 1; | 228 unsigned m_shapeByWord : 1; |
| 227 }; | 229 }; |
| 228 | 230 |
| 229 } // namespace blink | 231 } // namespace blink |
| 230 | 232 |
| 231 #endif // CachingWordShapeIterator_h | 233 #endif // CachingWordShapeIterator_h |
| OLD | NEW |