OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2015 Google Inc. All rights reserved. | 2 * Copyright (C) 2015 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
6 * are met: | 6 * are met: |
7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
104 bool nextWord(RefPtr<const ShapeResult>* wordResult) | 104 bool nextWord(RefPtr<const ShapeResult>* wordResult) |
105 { | 105 { |
106 return shapeToEndIndex(wordResult, nextWordEndIndex()); | 106 return shapeToEndIndex(wordResult, nextWordEndIndex()); |
107 } | 107 } |
108 | 108 |
109 static bool isWordDelimiter(UChar ch) | 109 static bool isWordDelimiter(UChar ch) |
110 { | 110 { |
111 return ch == spaceCharacter || ch == tabulationCharacter; | 111 return ch == spaceCharacter || ch == tabulationCharacter; |
112 } | 112 } |
113 | 113 |
114 unsigned nextWordEndIndex() | 114 unsigned nextWordEndIndex() const |
115 { | 115 { |
116 const unsigned length = m_textRun.length(); | 116 const unsigned length = m_textRun.length(); |
117 if (m_startIndex >= length) | 117 if (m_startIndex >= length) |
118 return 0; | 118 return 0; |
119 | 119 |
120 if (m_startIndex + 1u == length || isWordDelimiter(m_textRun[m_startInde
x])) | 120 if (m_startIndex + 1u == length || isWordDelimiter(m_textRun[m_startInde
x])) |
121 return m_startIndex + 1; | 121 return m_startIndex + 1; |
122 | 122 |
123 // Delimit every CJK character because these scripts do not delimit | 123 // 8Bit words end at isWordDelimiter(). |
124 // words by spaces, and not delimiting hits the performance. | 124 if (m_textRun.is8Bit()) { |
125 if (!m_textRun.is8Bit()) { | 125 for (unsigned i = m_startIndex + 1; ; i++) { |
126 UChar32 ch; | 126 if (i == length || isWordDelimiter(m_textRun[i])) |
127 unsigned end = m_startIndex; | 127 return i; |
128 U16_NEXT(m_textRun.characters16(), end, length, ch); | |
129 if (Character::isCJKIdeographOrSymbol(ch)) { | |
130 bool hasAnyScript = !Character::isCommonOrInheritedScript(ch); | |
131 for (unsigned i = end; i < length; end = i) { | |
132 U16_NEXT(m_textRun.characters16(), i, length, ch); | |
133 // ZWJ and modifier check in order not to split those Emoji
sequences. | |
134 if (U_GET_GC_MASK(ch) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_S
K_MASK) | |
135 || ch == zeroWidthJoinerCharacter || Character::isModifi
er(ch)) | |
136 continue; | |
137 // Avoid delimiting COMMON/INHERITED alone, which makes hard
er to | |
138 // identify the script. | |
139 if (Character::isCJKIdeographOrSymbol(ch)) { | |
140 if (Character::isCommonOrInheritedScript(ch)) | |
141 continue; | |
142 if (!hasAnyScript) { | |
143 hasAnyScript = true; | |
144 continue; | |
145 } | |
146 } | |
147 return end; | |
148 } | |
149 return length; | |
150 } | 128 } |
151 } | 129 } |
152 | 130 |
153 for (unsigned i = m_startIndex + 1; ; i++) { | 131 // Non-CJK/Emoji words end at isWordDelimiter() or CJK/Emoji characters. |
154 if (i == length || isWordDelimiter(m_textRun[i])) { | 132 unsigned end = m_startIndex; |
155 return i; | 133 UChar32 ch = m_textRun.codepointAtAndNext(end); |
| 134 if (!Character::isCJKIdeographOrSymbol(ch)) { |
| 135 for (unsigned nextEnd = end; end < length; end = nextEnd) { |
| 136 ch = m_textRun.codepointAtAndNext(nextEnd); |
| 137 if (isWordDelimiter(ch) || Character::isCJKIdeographOrSymbolBase
(ch)) |
| 138 return end; |
156 } | 139 } |
157 if (!m_textRun.is8Bit()) { | 140 return length; |
158 UChar32 nextChar; | 141 } |
159 U16_GET(m_textRun.characters16(), 0, i, length, nextChar); | 142 |
160 if (Character::isCJKIdeographOrSymbolBase(nextChar)) | 143 // For CJK/Emoji words, delimit every character because these scripts do |
161 return i; | 144 // not delimit words by spaces, and delimiting only at isWordDelimiter() |
| 145 // worsen the cache efficiency. |
| 146 bool hasAnyScript = !Character::isCommonOrInheritedScript(ch); |
| 147 for (unsigned nextEnd = end; end < length; end = nextEnd) { |
| 148 ch = m_textRun.codepointAtAndNext(nextEnd); |
| 149 // ZWJ and modifier check in order not to split those Emoji sequence
s. |
| 150 if (U_GET_GC_MASK(ch) & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_SK_MASK) |
| 151 || ch == zeroWidthJoinerCharacter || Character::isModifier(ch)) |
| 152 continue; |
| 153 // Avoid delimiting COMMON/INHERITED alone, which makes harder to |
| 154 // identify the script. |
| 155 if (Character::isCJKIdeographOrSymbol(ch)) { |
| 156 if (Character::isCommonOrInheritedScript(ch)) |
| 157 continue; |
| 158 if (!hasAnyScript) { |
| 159 hasAnyScript = true; |
| 160 continue; |
| 161 } |
162 } | 162 } |
| 163 return end; |
163 } | 164 } |
| 165 return length; |
164 } | 166 } |
165 | 167 |
166 bool shapeToEndIndex(RefPtr<const ShapeResult>* result, unsigned endIndex) | 168 bool shapeToEndIndex(RefPtr<const ShapeResult>* result, unsigned endIndex) |
167 { | 169 { |
168 if (!endIndex || endIndex <= m_startIndex) | 170 if (!endIndex || endIndex <= m_startIndex) |
169 return false; | 171 return false; |
170 | 172 |
171 const unsigned length = m_textRun.length(); | 173 const unsigned length = m_textRun.length(); |
172 if (!m_startIndex && endIndex == length) { | 174 if (!m_startIndex && endIndex == length) { |
173 *result = shapeWord(m_textRun, m_font); | 175 *result = shapeWord(m_textRun, m_font); |
174 } else { | 176 } else { |
175 ASSERT(endIndex <= length); | 177 ASSERT(endIndex <= length); |
176 TextRun subRun = m_textRun.subRun(m_startIndex, endIndex - m_startIn
dex); | 178 TextRun subRun = m_textRun.subRun(m_startIndex, endIndex - m_startIn
dex); |
177 *result = shapeWord(subRun, m_font); | 179 *result = shapeWord(subRun, m_font); |
178 } | 180 } |
179 m_startIndex = endIndex; | 181 m_startIndex = endIndex; |
180 return result->get(); | 182 return result->get(); |
181 } | 183 } |
182 | 184 |
183 unsigned endIndexUntil(UChar ch) | 185 unsigned endIndexUntil(UChar ch) const |
184 { | 186 { |
185 unsigned length = m_textRun.length(); | 187 unsigned length = m_textRun.length(); |
186 ASSERT(m_startIndex < length); | 188 ASSERT(m_startIndex < length); |
187 for (unsigned i = m_startIndex + 1; ; i++) { | 189 for (unsigned i = m_startIndex + 1; ; i++) { |
188 if (i == length || m_textRun[i] == ch) | 190 if (i == length || m_textRun[i] == ch) |
189 return i; | 191 return i; |
190 } | 192 } |
191 } | 193 } |
192 | 194 |
193 bool nextForAllowTabs(RefPtr<const ShapeResult>* wordResult) | 195 bool nextForAllowTabs(RefPtr<const ShapeResult>* wordResult) |
(...skipping 28 matching lines...) Expand all Loading... |
222 const Font* m_font; | 224 const Font* m_font; |
223 ShapeResultSpacing m_spacing; | 225 ShapeResultSpacing m_spacing; |
224 float m_widthSoFar; // Used only when allowTabs() | 226 float m_widthSoFar; // Used only when allowTabs() |
225 unsigned m_startIndex : 31; | 227 unsigned m_startIndex : 31; |
226 unsigned m_shapeByWord : 1; | 228 unsigned m_shapeByWord : 1; |
227 }; | 229 }; |
228 | 230 |
229 } // namespace blink | 231 } // namespace blink |
230 | 232 |
231 #endif // CachingWordShapeIterator_h | 233 #endif // CachingWordShapeIterator_h |
OLD | NEW |