Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(146)

Side by Side Diff: third_party/WebKit/Source/platform/fonts/Character.cpp

Issue 1644893002: Revert of Improve performance of Character::isCJKIdeographOrSymbol by using trie tree (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2014 Google Inc. All rights reserved. 2 * Copyright (C) 2014 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 15 matching lines...) Expand all
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */ 29 */
30 30
31 #include "platform/fonts/Character.h" 31 #include "platform/fonts/Character.h"
32 32
33 #include "wtf/StdLibExtras.h" 33 #include "wtf/StdLibExtras.h"
34 #include "wtf/text/StringBuilder.h" 34 #include "wtf/text/StringBuilder.h"
35 #include <algorithm> 35 #include <algorithm>
36 #include <unicode/uobject.h>
37 #include <unicode/uscript.h> 36 #include <unicode/uscript.h>
38 #define MUTEX_H // Prevent compile failure of utrie2.h on Windows
39 #include <utrie2.h>
40 37
41 using namespace WTF; 38 using namespace WTF;
42 using namespace Unicode; 39 using namespace Unicode;
43 40
44 namespace blink { 41 namespace blink {
45 42
46 // Freezed trie tree, see CharacterDataGenerator.cpp. 43 static const UChar32 cjkIsolatedSymbolsArray[] = {
47 extern int32_t serializedCharacterDataSize; 44 // 0x2C7 Caron, Mandarin Chinese 3rd Tone
48 extern uint8_t serializedCharacterData[]; 45 0x2C7,
49 46 // 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone
50 static UTrie2* createTrie() 47 0x2CA,
51 { 48 // 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone
52 // Create a Trie from the value array. 49 0x2CB,
53 UErrorCode error = U_ZERO_ERROR; 50 // 0x2D9 Dot Above, Mandarin Chinese 5th Tone
54 UTrie2* trie = utrie2_openFromSerialized( 51 0x2D9,
55 UTrie2ValueBits::UTRIE2_16_VALUE_BITS, 52 0x2020, 0x2021, 0x2030, 0x203B, 0x203C, 0x2042, 0x2047, 0x2048, 0x2049, 0x20 51,
56 serializedCharacterData, serializedCharacterDataSize, 53 0x20DD, 0x20DE, 0x2100, 0x2103, 0x2105, 0x2109, 0x210A, 0x2113, 0x2116, 0x21 21,
57 nullptr, &error); 54 0x212B, 0x213B, 0x2150, 0x2151, 0x2152, 0x217F, 0x2189, 0x2307, 0x2312, 0x23 CE,
58 ASSERT(error == U_ZERO_ERROR); 55 0x2423, 0x25A0, 0x25A1, 0x25A2, 0x25AA, 0x25AB, 0x25B1, 0x25B2, 0x25B3, 0x25 B6,
59 return trie; 56 0x25B7, 0x25BC, 0x25BD, 0x25C0, 0x25C1, 0x25C6, 0x25C7, 0x25C9, 0x25CB, 0x25 CC,
60 } 57 0x25EF, 0x2605, 0x2606, 0x260E, 0x2616, 0x2617, 0x2640, 0x2642, 0x26A0, 0x26 BD,
61 58 0x26BE, 0x2713, 0x271A, 0x273F, 0x2740, 0x2756, 0x2B1A, 0xFE10, 0xFE11, 0xFE 12,
62 bool Character::hasProperty(UChar32 c, CharacterProperty property) 59 0xFE19, 0xFF1D,
63 { 60 // Emoji.
64 static UTrie2* trie = nullptr; 61 0x1F100
65 if (!trie) 62 };
66 trie = createTrie();
67 return UTRIE2_GET16(trie, c)
68 & static_cast<CharacterPropertyType>(property);
69 }
70 63
71 // Takes a flattened list of closed intervals 64 // Takes a flattened list of closed intervals
72 template <class T, size_t size> 65 template <class T, size_t size>
73 bool valueInIntervalList(const T (&intervalList)[size], const T& value) 66 bool valueInIntervalList(const T (&intervalList)[size], const T& value)
74 { 67 {
75 const T* bound = std::upper_bound(&intervalList[0], &intervalList[size], val ue); 68 const T* bound = std::upper_bound(&intervalList[0], &intervalList[size], val ue);
76 if ((bound - intervalList) % 2 == 1) 69 if ((bound - intervalList) % 2 == 1)
77 return true; 70 return true;
78 return bound > intervalList && *(bound - 1) == value; 71 return bound > intervalList && *(bound - 1) == value;
79 } 72 }
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
177 // Search for other Complex cases 170 // Search for other Complex cases
178 if (valueInIntervalList(complexCodePathRanges, c)) 171 if (valueInIntervalList(complexCodePathRanges, c))
179 return ComplexPath; 172 return ComplexPath;
180 } 173 }
181 174
182 return result; 175 return result;
183 } 176 }
184 177
185 bool Character::isUprightInMixedVertical(UChar32 character) 178 bool Character::isUprightInMixedVertical(UChar32 character)
186 { 179 {
187 return hasProperty(character, CharacterProperty::isUprightInMixedVertical); 180 // Fast path for common non-CJK
181 if (character < 0x000A7)
182 return false;
183
184 // Fast path for common CJK
185 if (isInRange(character, 0x02E80, 0x0A4CF))
186 return true;
187
188 if (isInRange(character, 0x0FF01, 0x0FFE7)) {
189 if (character <= 0x0FF0C || isInRange(character, 0x0FF0E, 0x0FF1B)
190 || isInRange(character, 0x0FF1F, 0x0FF60) || character >= 0x0FFE0)
191 return true;
192 return false;
193 }
194
195 // Fast path for medium-common non-CJK
196 if (character == 0x000A7 || character == 0x000A9 || character == 0x000AE)
197 return true;
198 if (character == 0x000B1 || character == 0x000BC || character == 0x000BD || character == 0x000BE)
199 return true;
200 if (character == 0x000D7 || character == 0x000F7)
201 return true;
202 if (character < 0x002EA)
203 return false;
204
205 static const UChar32 uprightRanges[] = {
206 // Spacing Modifier Letters (Part of)
207 0x002EA, 0x002EB,
208 // Hangul Jamo
209 0x01100, 0x011FF,
210 // Unified Canadian Aboriginal Syllabics
211 0x01401, 0x0167F,
212 // Unified Canadian Aboriginal Syllabics Extended
213 0x018B0, 0x018FF,
214 // General Punctuation (Part of)
215 0x02016, 0x02016,
216 0x02020, 0x02021,
217 0x02030, 0x02031,
218 0x0203B, 0x0203C,
219 0x02042, 0x02042,
220 0x02047, 0x02049,
221 0x02051, 0x02051,
222 0x02065, 0x02069,
223 // Combining Diacritical Marks for Symbols (Part of)
224 0x020DD, 0x020E0,
225 0x020E2, 0x020E4,
226 // Letterlike Symbols (Part of)/Number Forms
227 0x02100, 0x02101,
228 0x02103, 0x02109,
229 0x0210F, 0x0210F,
230 0x02113, 0x02114,
231 0x02116, 0x02117,
232 0x0211E, 0x02123,
233 0x02125, 0x02125,
234 0x02127, 0x02127,
235 0x02129, 0x02129,
236 0x0212E, 0x0212E,
237 0x02135, 0x0213F,
238 0x02145, 0x0214A,
239 0x0214C, 0x0214D,
240 0x0214F, 0x0218F,
241 // Mathematical Operators (Part of)
242 0x0221E, 0x0221E,
243 0x02234, 0x02235,
244 // Miscellaneous Technical (Part of)
245 0x02300, 0x02307,
246 0x0230C, 0x0231F,
247 0x02324, 0x0232B,
248 0x0237D, 0x0239A,
249 0x023BE, 0x023CD,
250 0x023CF, 0x023CF,
251 0x023D1, 0x023DB,
252 0x023E2, 0x02422,
253 // Control Pictures (Part of)/Optical Character Recognition/Enclosed Alp hanumerics
254 0x02424, 0x024FF,
255 // Geometric Shapes/Miscellaneous Symbols (Part of)
256 0x025A0, 0x02619,
257 0x02620, 0x02767,
258 0x02776, 0x02793,
259 // Miscellaneous Symbols and Arrows (Part of)
260 0x02B12, 0x02B2F,
261 0x02B50, 0x02B59,
262 0x02BB8, 0x02BFF,
263 // Hangul Jamo Extended-A
264 0x0A960, 0x0A97F,
265 // Hangul Syllables/Hangul Jamo Extended-B
266 0x0AC00, 0x0D7FF,
267 // Private Use Area/CJK Compatibility Ideographs
268 0x0E000, 0x0FAFF,
269 // Vertical Forms
270 0x0FE10, 0x0FE1F,
271 // CJK Compatibility Forms (Part of)
272 0x0FE30, 0x0FE48,
273 // Small Form Variants (Part of)
274 0x0FE50, 0x0FE57,
275 0x0FE59, 0x0FE62,
276 0x0FE67, 0x0FE6F,
277 // Specials (Part of)
278 0x0FFF0, 0x0FFF8,
279 0x0FFFC, 0x0FFFD,
280 // Meroitic Hieroglyphs
281 0x10980, 0x1099F,
282 // Siddham
283 0x11580, 0x115FF,
284 // Egyptian Hieroglyphs
285 0x13000, 0x1342F,
286 // Kana Supplement
287 0x1B000, 0x1B0FF,
288 // Byzantine Musical Symbols/Musical Symbols
289 0x1D000, 0x1D1FF,
290 // Tai Xuan Jing Symbols/Counting Rod Numerals
291 0x1D300, 0x1D37F,
292 // Mahjong Tiles/Domino Tiles/Playing Cards/Enclosed Alphanumeric Supple ment
293 // Enclosed Ideographic Supplement/Enclosed Ideographic Supplement
294 // Emoticons/Ornamental Dingbats/Transport and Map Symbols/Alchemical Sy mbols
295 // Alchemical Symbols
296 0x1F000, 0x1F7FF,
297 // CJK Unified Ideographs Extension B/C/D
298 // CJK Compatibility Ideographs Supplement
299 0x20000, 0x2FFFD,
300 0x30000, 0x3FFFD,
301 // Supplementary Private Use Area-A
302 0xF0000, 0xFFFFD,
303 // Supplementary Private Use Area-B
304 0x100000, 0x10FFFD,
305 };
306 return valueInIntervalList(uprightRanges, character);
307 }
308
309 bool Character::isCJKIdeograph(UChar32 c)
310 {
311 static const UChar32 cjkIdeographRanges[] = {
312 // CJK Radicals Supplement and Kangxi Radicals.
313 0x2E80, 0x2FDF,
314 // CJK Strokes.
315 0x31C0, 0x31EF,
316 // CJK Unified Ideographs Extension A.
317 0x3400, 0x4DBF,
318 // The basic CJK Unified Ideographs block.
319 0x4E00, 0x9FFF,
320 // CJK Compatibility Ideographs.
321 0xF900, 0xFAFF,
322 // CJK Unified Ideographs Extension B.
323 0x20000, 0x2A6DF,
324 // CJK Unified Ideographs Extension C.
325 // CJK Unified Ideographs Extension D.
326 0x2A700, 0x2B81F,
327 // CJK Compatibility Ideographs Supplement.
328 0x2F800, 0x2FA1F
329 };
330 static size_t cjkIdeographRangesCount = WTF_ARRAY_LENGTH(cjkIdeographRanges) ;
331
332 // Early out
333 if (c < cjkIdeographRanges[0] || c > cjkIdeographRanges[cjkIdeographRangesCo unt - 1])
334 return false;
335
336 return valueInIntervalList(cjkIdeographRanges, c);
188 } 337 }
189 338
190 bool Character::isCJKIdeographOrSymbol(UChar32 c) 339 bool Character::isCJKIdeographOrSymbol(UChar32 c)
191 { 340 {
192 // Likely common case 341 // Likely common case
193 if (c < 0x2C7) 342 if (c < 0x2C7)
194 return false; 343 return false;
195 344
196 return hasProperty(c, CharacterProperty::isCJKIdeographOrSymbol); 345 if (isCJKIdeograph(c))
346 return true;
347
348 static const UChar32 cjkSymbolRanges[] = {
349 0x2156, 0x215A,
350 0x2160, 0x216B,
351 0x2170, 0x217B,
352 0x23BE, 0x23CC,
353 0x2460, 0x2492,
354 0x249C, 0x24FF,
355 0x25CE, 0x25D3,
356 0x25E2, 0x25E6,
357 0x2600, 0x2603,
358 0x2660, 0x266F,
359 0x2672, 0x267D,
360 // Emoji HEAVY HEART EXCLAMATION MARK ORNAMENT..HEAVY BLACK HEART
361 // Needed in order not to break Emoji heart-kiss sequences in
362 // CachingWordShapeIterator.
363 // cmp. http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html
364 0x2763, 0x2764,
365 0x2776, 0x277F,
366 // Ideographic Description Characters, with CJK Symbols and Punctuation, excluding 0x3030.
367 // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0 x3100 .. 0x312F
368 0x2FF0, 0x302F,
369 0x3031, 0x312F,
370 // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF
371 0x3190, 0x31BF,
372 // Enclosed CJK Letters and Months (0x3200 .. 0x32FF).
373 // CJK Compatibility (0x3300 .. 0x33FF).
374 0x3200, 0x33FF,
375 0xF860, 0xF862,
376 // CJK Compatibility Forms.
377 0xFE30, 0xFE4F,
378 // Halfwidth and Fullwidth Forms
379 // Usually only used in CJK
380 0xFF00, 0xFF0C,
381 0xFF0E, 0xFF1A,
382 0xFF1F, 0xFFEF,
383 // Emoji.
384 0x1F110, 0x1F129,
385 0x1F130, 0x1F149,
386 0x1F150, 0x1F169,
387 0x1F170, 0x1F189,
388 0x1F200, 0x1F6FF
389 };
390
391 if (c >= cjkSymbolRanges[0]
392 && c <= cjkSymbolRanges[WTF_ARRAY_LENGTH(cjkSymbolRanges) - 1]
393 && valueInIntervalList(cjkSymbolRanges, c)) {
394 return true;
395 }
396
397 if (c < 0x2020 && c > 0x2D9)
398 return false;
399
400 // Hash lookup for isolated symbols (those not part of a contiguous range)
401 static HashSet<UChar32>* cjkIsolatedSymbols = 0;
402 if (!cjkIsolatedSymbols) {
403 cjkIsolatedSymbols = new HashSet<UChar32>();
404 for (size_t i = 0; i < WTF_ARRAY_LENGTH(cjkIsolatedSymbolsArray); ++i)
405 cjkIsolatedSymbols->add(cjkIsolatedSymbolsArray[i]);
406 }
407 return cjkIsolatedSymbols->contains(c);
197 } 408 }
198 409
199 unsigned Character::expansionOpportunityCount(const LChar* characters, size_t le ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus tify) 410 unsigned Character::expansionOpportunityCount(const LChar* characters, size_t le ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus tify)
200 { 411 {
201 unsigned count = 0; 412 unsigned count = 0;
202 if (textJustify == TextJustifyDistribute) { 413 if (textJustify == TextJustifyDistribute) {
203 isAfterExpansion = true; 414 isAfterExpansion = true;
204 return length; 415 return length;
205 } 416 }
206 417
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
313 } 524 }
314 525
315 bool Character::isCommonOrInheritedScript(UChar32 character) 526 bool Character::isCommonOrInheritedScript(UChar32 character)
316 { 527 {
317 UErrorCode status = U_ZERO_ERROR; 528 UErrorCode status = U_ZERO_ERROR;
318 UScriptCode script = uscript_getScript(character, &status); 529 UScriptCode script = uscript_getScript(character, &status);
319 return U_SUCCESS(status) && (script == USCRIPT_COMMON || script == USCRIPT_I NHERITED); 530 return U_SUCCESS(status) && (script == USCRIPT_COMMON || script == USCRIPT_I NHERITED);
320 } 531 }
321 532
322 } // namespace blink 533 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/platform/fonts/Character.h ('k') | third_party/WebKit/Source/platform/fonts/CharacterData.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698