OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2014 Google Inc. All rights reserved. | 2 * Copyright (C) 2014 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 15 matching lines...) Expand all Loading... |
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 */ | 29 */ |
30 | 30 |
31 #include "platform/fonts/Character.h" | 31 #include "platform/fonts/Character.h" |
32 | 32 |
33 #include "wtf/StdLibExtras.h" | 33 #include "wtf/StdLibExtras.h" |
34 #include "wtf/text/StringBuilder.h" | 34 #include "wtf/text/StringBuilder.h" |
35 #include <algorithm> | 35 #include <algorithm> |
36 #include <unicode/uobject.h> | |
37 #include <unicode/uscript.h> | 36 #include <unicode/uscript.h> |
38 #define MUTEX_H // Prevent compile failure of utrie2.h on Windows | |
39 #include <utrie2.h> | |
40 | 37 |
41 using namespace WTF; | 38 using namespace WTF; |
42 using namespace Unicode; | 39 using namespace Unicode; |
43 | 40 |
44 namespace blink { | 41 namespace blink { |
45 | 42 |
46 // Freezed trie tree, see CharacterDataGenerator.cpp. | 43 static const UChar32 cjkIsolatedSymbolsArray[] = { |
47 extern int32_t serializedCharacterDataSize; | 44 // 0x2C7 Caron, Mandarin Chinese 3rd Tone |
48 extern uint8_t serializedCharacterData[]; | 45 0x2C7, |
49 | 46 // 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone |
50 static UTrie2* createTrie() | 47 0x2CA, |
51 { | 48 // 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone |
52 // Create a Trie from the value array. | 49 0x2CB, |
53 UErrorCode error = U_ZERO_ERROR; | 50 // 0x2D9 Dot Above, Mandarin Chinese 5th Tone |
54 UTrie2* trie = utrie2_openFromSerialized( | 51 0x2D9, |
55 UTrie2ValueBits::UTRIE2_16_VALUE_BITS, | 52 0x2020, 0x2021, 0x2030, 0x203B, 0x203C, 0x2042, 0x2047, 0x2048, 0x2049, 0x20
51, |
56 serializedCharacterData, serializedCharacterDataSize, | 53 0x20DD, 0x20DE, 0x2100, 0x2103, 0x2105, 0x2109, 0x210A, 0x2113, 0x2116, 0x21
21, |
57 nullptr, &error); | 54 0x212B, 0x213B, 0x2150, 0x2151, 0x2152, 0x217F, 0x2189, 0x2307, 0x2312, 0x23
CE, |
58 ASSERT(error == U_ZERO_ERROR); | 55 0x2423, 0x25A0, 0x25A1, 0x25A2, 0x25AA, 0x25AB, 0x25B1, 0x25B2, 0x25B3, 0x25
B6, |
59 return trie; | 56 0x25B7, 0x25BC, 0x25BD, 0x25C0, 0x25C1, 0x25C6, 0x25C7, 0x25C9, 0x25CB, 0x25
CC, |
60 } | 57 0x25EF, 0x2605, 0x2606, 0x260E, 0x2616, 0x2617, 0x2640, 0x2642, 0x26A0, 0x26
BD, |
61 | 58 0x26BE, 0x2713, 0x271A, 0x273F, 0x2740, 0x2756, 0x2B1A, 0xFE10, 0xFE11, 0xFE
12, |
62 bool Character::hasProperty(UChar32 c, CharacterProperty property) | 59 0xFE19, 0xFF1D, |
63 { | 60 // Emoji. |
64 static UTrie2* trie = nullptr; | 61 0x1F100 |
65 if (!trie) | 62 }; |
66 trie = createTrie(); | |
67 return UTRIE2_GET16(trie, c) | |
68 & static_cast<CharacterPropertyType>(property); | |
69 } | |
70 | 63 |
71 // Takes a flattened list of closed intervals | 64 // Takes a flattened list of closed intervals |
72 template <class T, size_t size> | 65 template <class T, size_t size> |
73 bool valueInIntervalList(const T (&intervalList)[size], const T& value) | 66 bool valueInIntervalList(const T (&intervalList)[size], const T& value) |
74 { | 67 { |
75 const T* bound = std::upper_bound(&intervalList[0], &intervalList[size], val
ue); | 68 const T* bound = std::upper_bound(&intervalList[0], &intervalList[size], val
ue); |
76 if ((bound - intervalList) % 2 == 1) | 69 if ((bound - intervalList) % 2 == 1) |
77 return true; | 70 return true; |
78 return bound > intervalList && *(bound - 1) == value; | 71 return bound > intervalList && *(bound - 1) == value; |
79 } | 72 } |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
177 // Search for other Complex cases | 170 // Search for other Complex cases |
178 if (valueInIntervalList(complexCodePathRanges, c)) | 171 if (valueInIntervalList(complexCodePathRanges, c)) |
179 return ComplexPath; | 172 return ComplexPath; |
180 } | 173 } |
181 | 174 |
182 return result; | 175 return result; |
183 } | 176 } |
184 | 177 |
185 bool Character::isUprightInMixedVertical(UChar32 character) | 178 bool Character::isUprightInMixedVertical(UChar32 character) |
186 { | 179 { |
187 return hasProperty(character, CharacterProperty::isUprightInMixedVertical); | 180 // Fast path for common non-CJK |
| 181 if (character < 0x000A7) |
| 182 return false; |
| 183 |
| 184 // Fast path for common CJK |
| 185 if (isInRange(character, 0x02E80, 0x0A4CF)) |
| 186 return true; |
| 187 |
| 188 if (isInRange(character, 0x0FF01, 0x0FFE7)) { |
| 189 if (character <= 0x0FF0C || isInRange(character, 0x0FF0E, 0x0FF1B) |
| 190 || isInRange(character, 0x0FF1F, 0x0FF60) || character >= 0x0FFE0) |
| 191 return true; |
| 192 return false; |
| 193 } |
| 194 |
| 195 // Fast path for medium-common non-CJK |
| 196 if (character == 0x000A7 || character == 0x000A9 || character == 0x000AE) |
| 197 return true; |
| 198 if (character == 0x000B1 || character == 0x000BC || character == 0x000BD ||
character == 0x000BE) |
| 199 return true; |
| 200 if (character == 0x000D7 || character == 0x000F7) |
| 201 return true; |
| 202 if (character < 0x002EA) |
| 203 return false; |
| 204 |
| 205 static const UChar32 uprightRanges[] = { |
| 206 // Spacing Modifier Letters (Part of) |
| 207 0x002EA, 0x002EB, |
| 208 // Hangul Jamo |
| 209 0x01100, 0x011FF, |
| 210 // Unified Canadian Aboriginal Syllabics |
| 211 0x01401, 0x0167F, |
| 212 // Unified Canadian Aboriginal Syllabics Extended |
| 213 0x018B0, 0x018FF, |
| 214 // General Punctuation (Part of) |
| 215 0x02016, 0x02016, |
| 216 0x02020, 0x02021, |
| 217 0x02030, 0x02031, |
| 218 0x0203B, 0x0203C, |
| 219 0x02042, 0x02042, |
| 220 0x02047, 0x02049, |
| 221 0x02051, 0x02051, |
| 222 0x02065, 0x02069, |
| 223 // Combining Diacritical Marks for Symbols (Part of) |
| 224 0x020DD, 0x020E0, |
| 225 0x020E2, 0x020E4, |
| 226 // Letterlike Symbols (Part of)/Number Forms |
| 227 0x02100, 0x02101, |
| 228 0x02103, 0x02109, |
| 229 0x0210F, 0x0210F, |
| 230 0x02113, 0x02114, |
| 231 0x02116, 0x02117, |
| 232 0x0211E, 0x02123, |
| 233 0x02125, 0x02125, |
| 234 0x02127, 0x02127, |
| 235 0x02129, 0x02129, |
| 236 0x0212E, 0x0212E, |
| 237 0x02135, 0x0213F, |
| 238 0x02145, 0x0214A, |
| 239 0x0214C, 0x0214D, |
| 240 0x0214F, 0x0218F, |
| 241 // Mathematical Operators (Part of) |
| 242 0x0221E, 0x0221E, |
| 243 0x02234, 0x02235, |
| 244 // Miscellaneous Technical (Part of) |
| 245 0x02300, 0x02307, |
| 246 0x0230C, 0x0231F, |
| 247 0x02324, 0x0232B, |
| 248 0x0237D, 0x0239A, |
| 249 0x023BE, 0x023CD, |
| 250 0x023CF, 0x023CF, |
| 251 0x023D1, 0x023DB, |
| 252 0x023E2, 0x02422, |
| 253 // Control Pictures (Part of)/Optical Character Recognition/Enclosed Alp
hanumerics |
| 254 0x02424, 0x024FF, |
| 255 // Geometric Shapes/Miscellaneous Symbols (Part of) |
| 256 0x025A0, 0x02619, |
| 257 0x02620, 0x02767, |
| 258 0x02776, 0x02793, |
| 259 // Miscellaneous Symbols and Arrows (Part of) |
| 260 0x02B12, 0x02B2F, |
| 261 0x02B50, 0x02B59, |
| 262 0x02BB8, 0x02BFF, |
| 263 // Hangul Jamo Extended-A |
| 264 0x0A960, 0x0A97F, |
| 265 // Hangul Syllables/Hangul Jamo Extended-B |
| 266 0x0AC00, 0x0D7FF, |
| 267 // Private Use Area/CJK Compatibility Ideographs |
| 268 0x0E000, 0x0FAFF, |
| 269 // Vertical Forms |
| 270 0x0FE10, 0x0FE1F, |
| 271 // CJK Compatibility Forms (Part of) |
| 272 0x0FE30, 0x0FE48, |
| 273 // Small Form Variants (Part of) |
| 274 0x0FE50, 0x0FE57, |
| 275 0x0FE59, 0x0FE62, |
| 276 0x0FE67, 0x0FE6F, |
| 277 // Specials (Part of) |
| 278 0x0FFF0, 0x0FFF8, |
| 279 0x0FFFC, 0x0FFFD, |
| 280 // Meroitic Hieroglyphs |
| 281 0x10980, 0x1099F, |
| 282 // Siddham |
| 283 0x11580, 0x115FF, |
| 284 // Egyptian Hieroglyphs |
| 285 0x13000, 0x1342F, |
| 286 // Kana Supplement |
| 287 0x1B000, 0x1B0FF, |
| 288 // Byzantine Musical Symbols/Musical Symbols |
| 289 0x1D000, 0x1D1FF, |
| 290 // Tai Xuan Jing Symbols/Counting Rod Numerals |
| 291 0x1D300, 0x1D37F, |
| 292 // Mahjong Tiles/Domino Tiles/Playing Cards/Enclosed Alphanumeric Supple
ment |
| 293 // Enclosed Ideographic Supplement/Enclosed Ideographic Supplement |
| 294 // Emoticons/Ornamental Dingbats/Transport and Map Symbols/Alchemical Sy
mbols |
| 295 // Alchemical Symbols |
| 296 0x1F000, 0x1F7FF, |
| 297 // CJK Unified Ideographs Extension B/C/D |
| 298 // CJK Compatibility Ideographs Supplement |
| 299 0x20000, 0x2FFFD, |
| 300 0x30000, 0x3FFFD, |
| 301 // Supplementary Private Use Area-A |
| 302 0xF0000, 0xFFFFD, |
| 303 // Supplementary Private Use Area-B |
| 304 0x100000, 0x10FFFD, |
| 305 }; |
| 306 return valueInIntervalList(uprightRanges, character); |
| 307 } |
| 308 |
| 309 bool Character::isCJKIdeograph(UChar32 c) |
| 310 { |
| 311 static const UChar32 cjkIdeographRanges[] = { |
| 312 // CJK Radicals Supplement and Kangxi Radicals. |
| 313 0x2E80, 0x2FDF, |
| 314 // CJK Strokes. |
| 315 0x31C0, 0x31EF, |
| 316 // CJK Unified Ideographs Extension A. |
| 317 0x3400, 0x4DBF, |
| 318 // The basic CJK Unified Ideographs block. |
| 319 0x4E00, 0x9FFF, |
| 320 // CJK Compatibility Ideographs. |
| 321 0xF900, 0xFAFF, |
| 322 // CJK Unified Ideographs Extension B. |
| 323 0x20000, 0x2A6DF, |
| 324 // CJK Unified Ideographs Extension C. |
| 325 // CJK Unified Ideographs Extension D. |
| 326 0x2A700, 0x2B81F, |
| 327 // CJK Compatibility Ideographs Supplement. |
| 328 0x2F800, 0x2FA1F |
| 329 }; |
| 330 static size_t cjkIdeographRangesCount = WTF_ARRAY_LENGTH(cjkIdeographRanges)
; |
| 331 |
| 332 // Early out |
| 333 if (c < cjkIdeographRanges[0] || c > cjkIdeographRanges[cjkIdeographRangesCo
unt - 1]) |
| 334 return false; |
| 335 |
| 336 return valueInIntervalList(cjkIdeographRanges, c); |
188 } | 337 } |
189 | 338 |
190 bool Character::isCJKIdeographOrSymbol(UChar32 c) | 339 bool Character::isCJKIdeographOrSymbol(UChar32 c) |
191 { | 340 { |
192 // Likely common case | 341 // Likely common case |
193 if (c < 0x2C7) | 342 if (c < 0x2C7) |
194 return false; | 343 return false; |
195 | 344 |
196 return hasProperty(c, CharacterProperty::isCJKIdeographOrSymbol); | 345 if (isCJKIdeograph(c)) |
| 346 return true; |
| 347 |
| 348 static const UChar32 cjkSymbolRanges[] = { |
| 349 0x2156, 0x215A, |
| 350 0x2160, 0x216B, |
| 351 0x2170, 0x217B, |
| 352 0x23BE, 0x23CC, |
| 353 0x2460, 0x2492, |
| 354 0x249C, 0x24FF, |
| 355 0x25CE, 0x25D3, |
| 356 0x25E2, 0x25E6, |
| 357 0x2600, 0x2603, |
| 358 0x2660, 0x266F, |
| 359 0x2672, 0x267D, |
| 360 // Emoji HEAVY HEART EXCLAMATION MARK ORNAMENT..HEAVY BLACK HEART |
| 361 // Needed in order not to break Emoji heart-kiss sequences in |
| 362 // CachingWordShapeIterator. |
| 363 // cmp. http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html |
| 364 0x2763, 0x2764, |
| 365 0x2776, 0x277F, |
| 366 // Ideographic Description Characters, with CJK Symbols and Punctuation,
excluding 0x3030. |
| 367 // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0
x3100 .. 0x312F |
| 368 0x2FF0, 0x302F, |
| 369 0x3031, 0x312F, |
| 370 // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF |
| 371 0x3190, 0x31BF, |
| 372 // Enclosed CJK Letters and Months (0x3200 .. 0x32FF). |
| 373 // CJK Compatibility (0x3300 .. 0x33FF). |
| 374 0x3200, 0x33FF, |
| 375 0xF860, 0xF862, |
| 376 // CJK Compatibility Forms. |
| 377 0xFE30, 0xFE4F, |
| 378 // Halfwidth and Fullwidth Forms |
| 379 // Usually only used in CJK |
| 380 0xFF00, 0xFF0C, |
| 381 0xFF0E, 0xFF1A, |
| 382 0xFF1F, 0xFFEF, |
| 383 // Emoji. |
| 384 0x1F110, 0x1F129, |
| 385 0x1F130, 0x1F149, |
| 386 0x1F150, 0x1F169, |
| 387 0x1F170, 0x1F189, |
| 388 0x1F200, 0x1F6FF |
| 389 }; |
| 390 |
| 391 if (c >= cjkSymbolRanges[0] |
| 392 && c <= cjkSymbolRanges[WTF_ARRAY_LENGTH(cjkSymbolRanges) - 1] |
| 393 && valueInIntervalList(cjkSymbolRanges, c)) { |
| 394 return true; |
| 395 } |
| 396 |
| 397 if (c < 0x2020 && c > 0x2D9) |
| 398 return false; |
| 399 |
| 400 // Hash lookup for isolated symbols (those not part of a contiguous range) |
| 401 static HashSet<UChar32>* cjkIsolatedSymbols = 0; |
| 402 if (!cjkIsolatedSymbols) { |
| 403 cjkIsolatedSymbols = new HashSet<UChar32>(); |
| 404 for (size_t i = 0; i < WTF_ARRAY_LENGTH(cjkIsolatedSymbolsArray); ++i) |
| 405 cjkIsolatedSymbols->add(cjkIsolatedSymbolsArray[i]); |
| 406 } |
| 407 return cjkIsolatedSymbols->contains(c); |
197 } | 408 } |
198 | 409 |
199 unsigned Character::expansionOpportunityCount(const LChar* characters, size_t le
ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus
tify) | 410 unsigned Character::expansionOpportunityCount(const LChar* characters, size_t le
ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus
tify) |
200 { | 411 { |
201 unsigned count = 0; | 412 unsigned count = 0; |
202 if (textJustify == TextJustifyDistribute) { | 413 if (textJustify == TextJustifyDistribute) { |
203 isAfterExpansion = true; | 414 isAfterExpansion = true; |
204 return length; | 415 return length; |
205 } | 416 } |
206 | 417 |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
313 } | 524 } |
314 | 525 |
315 bool Character::isCommonOrInheritedScript(UChar32 character) | 526 bool Character::isCommonOrInheritedScript(UChar32 character) |
316 { | 527 { |
317 UErrorCode status = U_ZERO_ERROR; | 528 UErrorCode status = U_ZERO_ERROR; |
318 UScriptCode script = uscript_getScript(character, &status); | 529 UScriptCode script = uscript_getScript(character, &status); |
319 return U_SUCCESS(status) && (script == USCRIPT_COMMON || script == USCRIPT_I
NHERITED); | 530 return U_SUCCESS(status) && (script == USCRIPT_COMMON || script == USCRIPT_I
NHERITED); |
320 } | 531 } |
321 | 532 |
322 } // namespace blink | 533 } // namespace blink |
OLD | NEW |