OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) | 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) |
3 * (C) 1999 Antti Koivisto (koivisto@kde.org) | 3 * (C) 1999 Antti Koivisto (koivisto@kde.org) |
4 * (C) 2000 Dirk Mueller (mueller@kde.org) | 4 * (C) 2000 Dirk Mueller (mueller@kde.org) |
5 * Copyright (C) 2003, 2006, 2010, 2011 Apple Inc. All rights reserved. | 5 * Copyright (C) 2003, 2006, 2010, 2011 Apple Inc. All rights reserved. |
6 * | 6 * |
7 * This library is free software; you can redistribute it and/or | 7 * This library is free software; you can redistribute it and/or |
8 * modify it under the terms of the GNU Library General Public | 8 * modify it under the terms of the GNU Library General Public |
9 * License as published by the Free Software Foundation; either | 9 * License as published by the Free Software Foundation; either |
10 * version 2 of the License, or (at your option) any later version. | 10 * version 2 of the License, or (at your option) any later version. |
(...skipping 10 matching lines...) Expand all Loading... | |
21 * | 21 * |
22 */ | 22 */ |
23 | 23 |
24 #include "config.h" | 24 #include "config.h" |
25 #include "core/platform/graphics/Font.h" | 25 #include "core/platform/graphics/Font.h" |
26 | 26 |
27 #include "core/platform/graphics/FloatRect.h" | 27 #include "core/platform/graphics/FloatRect.h" |
28 #include "core/platform/graphics/TextRun.h" | 28 #include "core/platform/graphics/TextRun.h" |
29 #include "core/platform/graphics/WidthIterator.h" | 29 #include "core/platform/graphics/WidthIterator.h" |
30 #include "core/platform/text/transcoder/FontTranscoder.h" | 30 #include "core/platform/text/transcoder/FontTranscoder.h" |
31 #include <wtf/MainThread.h> | 31 #include "wtf/MainThread.h" |
32 #include <wtf/MathExtras.h> | 32 #include "wtf/MathExtras.h" |
33 #include <wtf/text/StringBuilder.h> | 33 #include "wtf/StdLibExtras.h" |
34 #include <wtf/UnusedParam.h> | 34 #include "wtf/UnusedParam.h" |
35 #include "wtf/text/StringBuilder.h" | |
35 | 36 |
36 using namespace WTF; | 37 using namespace WTF; |
37 using namespace Unicode; | 38 using namespace Unicode; |
38 | 39 |
39 namespace WTF { | 40 namespace WTF { |
40 | 41 |
41 // allow compilation of OwnPtr<TextLayout> in source files that don't have acces s to the TextLayout class definition | 42 // allow compilation of OwnPtr<TextLayout> in source files that don't have acces s to the TextLayout class definition |
42 template <> void deleteOwnedPtr<WebCore::TextLayout>(WebCore::TextLayout* ptr) | 43 template <> void deleteOwnedPtr<WebCore::TextLayout>(WebCore::TextLayout* ptr) |
43 { | 44 { |
44 WebCore::Font::deleteLayout(ptr); | 45 WebCore::Font::deleteLayout(ptr); |
(...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
346 if (!run.characterScanForCodePath()) | 347 if (!run.characterScanForCodePath()) |
347 return Simple; | 348 return Simple; |
348 | 349 |
349 if (run.is8Bit()) | 350 if (run.is8Bit()) |
350 return Simple; | 351 return Simple; |
351 | 352 |
352 // Start from 0 since drawing and highlighting also measure the characters b efore run->from. | 353 // Start from 0 since drawing and highlighting also measure the characters b efore run->from. |
353 return characterRangeCodePath(run.characters16(), run.length()); | 354 return characterRangeCodePath(run.characters16(), run.length()); |
354 } | 355 } |
355 | 356 |
357 static inline UChar keyExtractorUChar(const UChar* value) | |
358 { | |
359 return *value; | |
360 } | |
361 | |
362 static inline UChar32 keyExtractorUChar32(const UChar32* value) | |
363 { | |
364 return *value; | |
365 } | |
366 | |
356 Font::CodePath Font::characterRangeCodePath(const UChar* characters, unsigned le n) | 367 Font::CodePath Font::characterRangeCodePath(const UChar* characters, unsigned le n) |
357 { | 368 { |
358 // FIXME: Should use a UnicodeSet in ports where ICU is used. Note that we | 369 static UChar complexCodePathRanges[] = { |
359 // can't simply use UnicodeCharacter Property/class because some characters | 370 // U+02E5 through U+02E9 (Modifier Letters : Tone letters) |
360 // are not 'combining', but still need to go to the complex path. | 371 0x2E5, 0x2E9, |
361 // Alternatively, we may as well consider binary search over a sorted | 372 // U+0300 through U+036F Combining diacritical marks |
362 // list of ranges. | 373 0x300, 0x36F, |
374 // U+0591 through U+05CF excluding U+05BE Hebrew combining marks, ... | |
375 0x0591, 0x05BD, | |
376 // ... Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha | |
377 0x05BF, 0x05CF, | |
378 // U+0600 through U+109F Arabic, Syriac, Thaana, NKo, Samaritan, Mandaic , | |
379 // Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannad a, | |
380 // Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar | |
381 0x0600, 0x109F, | |
382 // U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left | |
383 // here if you precompose; Modern Korean will be precomposed as a result of step A) | |
384 0x1100, 0x11FF, | |
385 // U+135D through U+135F Ethiopic combining marks | |
386 0x135D, 0x135F, | |
387 // U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghanwa,Khmer, Mongol ian | |
388 0x1700, 0x18AF, | |
389 // U+1900 through U+194F Limbu (Unicode 4.0) | |
390 0x1900, 0x194F, | |
391 // U+1980 through U+19DF New Tai Lue | |
392 0x1980, 0x19DF, | |
393 // U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, Batak, Lepcha, Ve dic | |
394 0x1A00, 0x1CFF, | |
395 // U+1DC0 through U+1DFF Comining diacritical mark supplement | |
396 0x1DC0, 0x1DFF, | |
397 // U+20D0 through U+20FF Combining marks for symbols | |
398 0x20D0, 0x20FF, | |
399 // U+2CEF through U+2CF1 Combining marks for Coptic | |
400 0x2CEF, 0x2CF1, | |
401 // U+302A through U+302F Ideographic and Hangul Tone marks | |
402 0x302A, 0x302F, | |
403 // U+A67C through U+A67D Combining marks for old Cyrillic | |
404 0xA67C, 0xA67D, | |
405 // U+A6F0 through U+A6F1 Combining mark for Bamum | |
406 0xA6F0, 0xA6F1, | |
407 // U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extende d, | |
408 // Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei Ma yek | |
409 0xA800, 0xABFF, | |
410 // U+D7B0 through U+D7FF Hangul Jamo Ext. B | |
411 0xD7B0, 0xD7FF, | |
412 // U+FE00 through U+FE0F Unicode variation selectors | |
413 0xFE00, 0xFE0F, | |
414 // U+FE20 through U+FE2F Combining half marks | |
415 0xFE20, 0xFE2F | |
416 }; | |
417 static size_t complexCodePathRangesCount = WTF_ARRAY_LENGTH(complexCodePathR anges); | |
418 | |
363 CodePath result = Simple; | 419 CodePath result = Simple; |
364 for (unsigned i = 0; i < len; i++) { | 420 for (unsigned i = 0; i < len; i++) { |
365 const UChar c = characters[i]; | 421 const UChar c = characters[i]; |
366 if (c < 0x2E5) // U+02E5 through U+02E9 (Modifier Letters : Tone letters ) | 422 |
423 // Shortcut for common case | |
424 if (c < 0x2E5) | |
367 continue; | 425 continue; |
368 if (c <= 0x2E9) | |
369 return Complex; | |
370 | |
371 if (c < 0x300) // U+0300 through U+036F Combining diacritical marks | |
372 continue; | |
373 if (c <= 0x36F) | |
374 return Complex; | |
375 | |
376 if (c < 0x0591 || c == 0x05BE) // U+0591 through U+05CF excluding U+05BE Hebrew combining marks, Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha | |
377 continue; | |
378 if (c <= 0x05CF) | |
379 return Complex; | |
380 | |
381 // U+0600 through U+109F Arabic, Syriac, Thaana, NKo, Samaritan, Mandaic , | |
382 // Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannad a, | |
383 // Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar | |
384 if (c < 0x0600) | |
385 continue; | |
386 if (c <= 0x109F) | |
387 return Complex; | |
388 | |
389 // U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left here if you precompose; | |
390 // Modern Korean will be precomposed as a result of step A) | |
391 if (c < 0x1100) | |
392 continue; | |
393 if (c <= 0x11FF) | |
394 return Complex; | |
395 | |
396 if (c < 0x135D) // U+135D through U+135F Ethiopic combining marks | |
397 continue; | |
398 if (c <= 0x135F) | |
399 return Complex; | |
400 | |
401 if (c < 0x1700) // U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghan wa,Khmer, Mongolian | |
402 continue; | |
403 if (c <= 0x18AF) | |
404 return Complex; | |
405 | |
406 if (c < 0x1900) // U+1900 through U+194F Limbu (Unicode 4.0) | |
407 continue; | |
408 if (c <= 0x194F) | |
409 return Complex; | |
410 | |
411 if (c < 0x1980) // U+1980 through U+19DF New Tai Lue | |
412 continue; | |
413 if (c <= 0x19DF) | |
414 return Complex; | |
415 | |
416 if (c < 0x1A00) // U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, B atak, Lepcha, Vedic | |
417 continue; | |
418 if (c <= 0x1CFF) | |
419 return Complex; | |
420 | |
421 if (c < 0x1DC0) // U+1DC0 through U+1DFF Comining diacritical mark suppl ement | |
422 continue; | |
423 if (c <= 0x1DFF) | |
424 return Complex; | |
425 | 426 |
426 // U+1E00 through U+2000 characters with diacritics and stacked diacriti cs | 427 // U+1E00 through U+2000 characters with diacritics and stacked diacriti cs |
427 if (c <= 0x2000) { | 428 if (c >= 0x1E00 && c <= 0x2000) { |
428 result = SimpleWithGlyphOverflow; | 429 result = SimpleWithGlyphOverflow; |
429 continue; | 430 continue; |
430 } | 431 } |
431 | 432 |
432 if (c < 0x20D0) // U+20D0 through U+20FF Combining marks for symbols | 433 // Surrogate pairs |
433 continue; | 434 if (c > 0xD7FF && c <= 0xDBFF) { |
434 if (c <= 0x20FF) | |
435 return Complex; | |
436 | |
437 if (c < 0x2CEF) // U+2CEF through U+2CF1 Combining marks for Coptic | |
438 continue; | |
439 if (c <= 0x2CF1) | |
440 return Complex; | |
441 | |
442 if (c < 0x302A) // U+302A through U+302F Ideographic and Hangul Tone mar ks | |
443 continue; | |
444 if (c <= 0x302F) | |
445 return Complex; | |
446 | |
447 if (c < 0xA67C) // U+A67C through U+A67D Combining marks for old Cyrilli c | |
448 continue; | |
449 if (c <= 0xA67D) | |
450 return Complex; | |
451 | |
452 if (c < 0xA6F0) // U+A6F0 through U+A6F1 Combining mark for Bamum | |
453 continue; | |
454 if (c <= 0xA6F1) | |
455 return Complex; | |
456 | |
457 // U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extended , | |
458 // Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei May ek, | |
459 if (c < 0xA800) | |
460 continue; | |
461 if (c <= 0xABFF) | |
462 return Complex; | |
463 | |
464 if (c < 0xD7B0) // U+D7B0 through U+D7FF Hangul Jamo Ext. B | |
465 continue; | |
466 if (c <= 0xD7FF) | |
467 return Complex; | |
468 | |
469 if (c <= 0xDBFF) { | |
470 // High surrogate | |
471 | |
472 if (i == len - 1) | 435 if (i == len - 1) |
473 continue; | 436 continue; |
474 | 437 |
475 UChar next = characters[++i]; | 438 UChar next = characters[++i]; |
476 if (!U16_IS_TRAIL(next)) | 439 if (!U16_IS_TRAIL(next)) |
477 continue; | 440 continue; |
478 | 441 |
479 UChar32 supplementaryCharacter = U16_GET_SUPPLEMENTARY(c, next); | 442 UChar32 supplementaryCharacter = U16_GET_SUPPLEMENTARY(c, next); |
480 | 443 |
481 if (supplementaryCharacter < 0x1F1E6) // U+1F1E6 through U+1F1FF Reg ional Indicator Symbols | 444 if (supplementaryCharacter < 0x1F1E6) // U+1F1E6 through U+1F1FF Reg ional Indicator Symbols |
482 continue; | 445 continue; |
483 if (supplementaryCharacter <= 0x1F1FF) | 446 if (supplementaryCharacter <= 0x1F1FF) |
484 return Complex; | 447 return Complex; |
485 | 448 |
486 if (supplementaryCharacter < 0xE0100) // U+E0100 through U+E01EF Uni code variation selectors. | 449 if (supplementaryCharacter < 0xE0100) // U+E0100 through U+E01EF Uni code variation selectors. |
487 continue; | 450 continue; |
488 if (supplementaryCharacter <= 0xE01EF) | 451 if (supplementaryCharacter <= 0xE01EF) |
489 return Complex; | 452 return Complex; |
490 | 453 |
491 // FIXME: Check for Brahmi (U+11000 block), Kaithi (U+11080 block) a nd other complex scripts | 454 // FIXME: Check for Brahmi (U+11000 block), Kaithi (U+11080 block) a nd other complex scripts |
492 // in plane 1 or higher. | 455 // in plane 1 or higher. |
493 | 456 |
494 continue; | 457 continue; |
495 } | 458 } |
496 | 459 |
497 if (c < 0xFE00) // U+FE00 through U+FE0F Unicode variation selectors | 460 // Search for other Complex cases |
461 UChar* boundingCharacter = approximateBinarySearch<UChar, UChar>( | |
462 (UChar*)complexCodePathRanges, complexCodePathRangesCount, c, keyExt ractorUChar); | |
463 // Exact matches are complex | |
464 if (*boundingCharacter == c) | |
465 return Complex; | |
466 bool isEndOfRange = ((boundingCharacter - complexCodePathRanges) % 2); | |
467 if (*boundingCharacter < c) { | |
468 // Determine if we are in a range or out | |
469 if (!isEndOfRange) | |
470 return Complex; | |
498 continue; | 471 continue; |
499 if (c <= 0xFE0F) | 472 } |
500 return Complex; | 473 ASSERT(*boundingCharacter > c); |
501 | 474 // Determine if we are in a range or out - opposite condition to above |
502 if (c < 0xFE20) // U+FE20 through U+FE2F Combining half marks | 475 if (isEndOfRange) |
503 continue; | |
504 if (c <= 0xFE2F) | |
505 return Complex; | 476 return Complex; |
506 } | 477 } |
478 | |
507 return result; | 479 return result; |
508 } | 480 } |
509 | 481 |
510 bool Font::isCJKIdeograph(UChar32 c) | 482 bool Font::isCJKIdeograph(UChar32 c) |
511 { | 483 { |
512 // The basic CJK Unified Ideographs block. | 484 static UChar32 cjkIdeographRanges[] = { |
513 if (c >= 0x4E00 && c <= 0x9FFF) | 485 // CJK Radicals Supplement and Kangxi Radicals. |
514 return true; | 486 0x2E80, 0x2FDF, |
515 | 487 // CJK Strokes. |
516 // CJK Unified Ideographs Extension A. | 488 0x31C0, 0x31EF, |
517 if (c >= 0x3400 && c <= 0x4DBF) | 489 // CJK Unified Ideographs Extension A. |
518 return true; | 490 0x3400, 0x4DBF, |
519 | 491 // The basic CJK Unified Ideographs block. |
520 // CJK Radicals Supplement. | 492 0x4E00, 0x9FFF, |
521 if (c >= 0x2E80 && c <= 0x2EFF) | 493 // CJK Compatibility Ideographs. |
522 return true; | 494 0xF900, 0xFAFF, |
523 | 495 // CJK Unified Ideographs Extension B. |
524 // Kangxi Radicals. | 496 0x20000, 0x2A6DF, |
525 if (c >= 0x2F00 && c <= 0x2FDF) | 497 // CJK Unified Ideographs Extension C. |
526 return true; | 498 // CJK Unified Ideographs Extension D. |
527 | 499 0x2A700, 0x2B81F, |
528 // CJK Strokes. | 500 // CJK Compatibility Ideographs Supplement. |
529 if (c >= 0x31C0 && c <= 0x31EF) | 501 0x2F800, 0x2FA1F |
530 return true; | 502 }; |
531 | 503 static size_t cjkIdeographRangesCount = WTF_ARRAY_LENGTH(cjkIdeographRanges) ; |
532 // CJK Compatibility Ideographs. | 504 |
533 if (c >= 0xF900 && c <= 0xFAFF) | 505 // Early out |
534 return true; | 506 if (c < cjkIdeographRanges[0] || c > cjkIdeographRanges[cjkIdeographRangesCo unt - 1]) |
535 | 507 return false; |
536 // CJK Unified Ideographs Extension B. | 508 |
537 if (c >= 0x20000 && c <= 0x2A6DF) | 509 UChar32* boundingCharacter = approximateBinarySearch<UChar32, UChar32>( |
538 return true; | 510 (UChar32*)cjkIdeographRanges, cjkIdeographRangesCount, c, keyExtractorUC har32); |
539 | 511 // Exact matches are CJK |
540 // CJK Unified Ideographs Extension C. | 512 if (*boundingCharacter == c) |
541 if (c >= 0x2A700 && c <= 0x2B73F) | 513 return true; |
542 return true; | 514 bool isEndOfRange = ((boundingCharacter - cjkIdeographRanges) % 2); |
543 | 515 if (*boundingCharacter < c) |
544 // CJK Unified Ideographs Extension D. | 516 return !isEndOfRange; |
545 if (c >= 0x2B740 && c <= 0x2B81F) | 517 return isEndOfRange; |
546 return true; | |
547 | |
548 // CJK Compatibility Ideographs Supplement. | |
549 if (c >= 0x2F800 && c <= 0x2FA1F) | |
550 return true; | |
551 | |
552 return false; | |
553 } | 518 } |
554 | 519 |
555 bool Font::isCJKIdeographOrSymbol(UChar32 c) | 520 bool Font::isCJKIdeographOrSymbol(UChar32 c) |
556 { | 521 { |
557 // 0x2C7 Caron, Mandarin Chinese 3rd Tone | 522 // Likely common case |
558 // 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone | 523 if (c < 0x2C7) |
559 // 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone | |
560 // 0x2D9 Dot Above, Mandarin Chinese 5th Tone | |
561 if ((c == 0x2C7) || (c == 0x2CA) || (c == 0x2CB) || (c == 0x2D9)) | |
562 return true; | |
563 | |
564 if ((c == 0x2020) || (c == 0x2021) || (c == 0x2030) || (c == 0x203B) || (c = = 0x203C) | |
565 || (c == 0x2042) || (c == 0x2047) || (c == 0x2048) || (c == 0x2049) || ( c == 0x2051) | |
566 || (c == 0x20DD) || (c == 0x20DE) || (c == 0x2100) || (c == 0x2103) || ( c == 0x2105) | |
567 || (c == 0x2109) || (c == 0x210A) || (c == 0x2113) || (c == 0x2116) || ( c == 0x2121) | |
568 || (c == 0x212B) || (c == 0x213B) || (c == 0x2150) || (c == 0x2151) || ( c == 0x2152)) | |
569 return true; | |
570 | |
571 if (c >= 0x2156 && c <= 0x215A) | |
572 return true; | |
573 | |
574 if (c >= 0x2160 && c <= 0x216B) | |
575 return true; | |
576 | |
577 if (c >= 0x2170 && c <= 0x217B) | |
578 return true; | |
579 | |
580 if ((c == 0x217F) || (c == 0x2189) || (c == 0x2307) || (c == 0x2312) || (c = = 0x23BE) || (c == 0x23BF)) | |
581 return true; | |
582 | |
583 if (c >= 0x23C0 && c <= 0x23CC) | |
584 return true; | |
585 | |
586 if ((c == 0x23CE) || (c == 0x2423)) | |
587 return true; | |
588 | |
589 if (c >= 0x2460 && c <= 0x2492) | |
590 return true; | |
591 | |
592 if (c >= 0x249C && c <= 0x24FF) | |
593 return true; | |
594 | |
595 if ((c == 0x25A0) || (c == 0x25A1) || (c == 0x25A2) || (c == 0x25AA) || (c = = 0x25AB)) | |
596 return true; | |
597 | |
598 if ((c == 0x25B1) || (c == 0x25B2) || (c == 0x25B3) || (c == 0x25B6) || (c = = 0x25B7) || (c == 0x25BC) || (c == 0x25BD)) | |
599 return true; | |
600 | |
601 if ((c == 0x25C0) || (c == 0x25C1) || (c == 0x25C6) || (c == 0x25C7) || (c = = 0x25C9) || (c == 0x25CB) || (c == 0x25CC)) | |
602 return true; | |
603 | |
604 if (c >= 0x25CE && c <= 0x25D3) | |
605 return true; | |
606 | |
607 if (c >= 0x25E2 && c <= 0x25E6) | |
608 return true; | |
609 | |
610 if (c == 0x25EF) | |
611 return true; | |
612 | |
613 if (c >= 0x2600 && c <= 0x2603) | |
614 return true; | |
615 | |
616 if ((c == 0x2605) || (c == 0x2606) || (c == 0x260E) || (c == 0x2616) || (c = = 0x2617) || (c == 0x2640) || (c == 0x2642)) | |
617 return true; | |
618 | |
619 if (c >= 0x2660 && c <= 0x266F) | |
620 return true; | |
621 | |
622 if (c >= 0x2672 && c <= 0x267D) | |
623 return true; | |
624 | |
625 if ((c == 0x26A0) || (c == 0x26BD) || (c == 0x26BE) || (c == 0x2713) || (c = = 0x271A) || (c == 0x273F) || (c == 0x2740) || (c == 0x2756)) | |
626 return true; | |
627 | |
628 if (c >= 0x2776 && c <= 0x277F) | |
629 return true; | |
630 | |
631 if (c == 0x2B1A) | |
632 return true; | |
633 | |
634 // Ideographic Description Characters. | |
635 if (c >= 0x2FF0 && c <= 0x2FFF) | |
636 return true; | |
637 | |
638 // CJK Symbols and Punctuation, excluding 0x3030. | |
639 if (c >= 0x3000 && c < 0x3030) | |
640 return true; | |
641 | |
642 if (c > 0x3030 && c <= 0x303F) | |
643 return true; | |
644 | |
645 // Hiragana | |
646 if (c >= 0x3040 && c <= 0x309F) | |
647 return true; | |
648 | |
649 // Katakana | |
650 if (c >= 0x30A0 && c <= 0x30FF) | |
651 return true; | |
652 | |
653 // Bopomofo | |
654 if (c >= 0x3100 && c <= 0x312F) | |
655 return true; | |
656 | |
657 if (c >= 0x3190 && c <= 0x319F) | |
658 return true; | |
659 | |
660 // Bopomofo Extended | |
661 if (c >= 0x31A0 && c <= 0x31BF) | |
662 return true; | |
663 | |
664 // Enclosed CJK Letters and Months. | |
665 if (c >= 0x3200 && c <= 0x32FF) | |
666 return true; | |
667 | |
668 // CJK Compatibility. | |
669 if (c >= 0x3300 && c <= 0x33FF) | |
670 return true; | |
671 | |
672 if (c >= 0xF860 && c <= 0xF862) | |
673 return true; | |
674 | |
675 // CJK Compatibility Forms. | |
676 if (c >= 0xFE30 && c <= 0xFE4F) | |
677 return true; | |
678 | |
679 if ((c == 0xFE10) || (c == 0xFE11) || (c == 0xFE12) || (c == 0xFE19)) | |
680 return true; | |
681 | |
682 if ((c == 0xFF0D) || (c == 0xFF1B) || (c == 0xFF1C) || (c == 0xFF1E)) | |
683 return false; | 524 return false; |
684 | 525 |
685 // Halfwidth and Fullwidth Forms | 526 // Hash lookup for isolated symbols (those not part of a contiguous range) |
686 // Usually only used in CJK | 527 static HashSet<UChar32>* cjkIsolatedSymbols = 0; |
eseidel
2013/07/23 20:49:16
abarth made us a LiteralHashSet/StaticHashSet at s
| |
687 if (c >= 0xFF00 && c <= 0xFFEF) | 528 if (!cjkIsolatedSymbols) { |
688 return true; | 529 cjkIsolatedSymbols = new HashSet<UChar32>(); |
689 | 530 // 0x2C7 Caron, Mandarin Chinese 3rd Tone |
690 // Emoji. | 531 cjkIsolatedSymbols->add(0x2C7); |
691 if (c == 0x1F100) | 532 // 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone |
692 return true; | 533 cjkIsolatedSymbols->add(0x2CA); |
693 | 534 // 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone |
694 if (c >= 0x1F110 && c <= 0x1F129) | 535 cjkIsolatedSymbols->add(0x2CB); |
695 return true; | 536 // 0x2D9 Dot Above, Mandarin Chinese 5th Tone |
696 | 537 cjkIsolatedSymbols->add(0x2D9); |
697 if (c >= 0x1F130 && c <= 0x1F149) | 538 |
698 return true; | 539 cjkIsolatedSymbols->add(0x2020); |
699 | 540 cjkIsolatedSymbols->add(0x2021); |
700 if (c >= 0x1F150 && c <= 0x1F169) | 541 cjkIsolatedSymbols->add(0x2030); |
701 return true; | 542 cjkIsolatedSymbols->add(0x203B); |
702 | 543 cjkIsolatedSymbols->add(0x203C); |
703 if (c >= 0x1F170 && c <= 0x1F189) | 544 cjkIsolatedSymbols->add(0x2042); |
704 return true; | 545 cjkIsolatedSymbols->add(0x2047); |
705 | 546 cjkIsolatedSymbols->add(0x2048); |
706 if (c >= 0x1F200 && c <= 0x1F6F) | 547 cjkIsolatedSymbols->add(0x2049); |
707 return true; | 548 cjkIsolatedSymbols->add(0x2051); |
708 | 549 cjkIsolatedSymbols->add(0x20DD); |
709 return isCJKIdeograph(c); | 550 cjkIsolatedSymbols->add(0x20DE); |
551 cjkIsolatedSymbols->add(0x2100); | |
552 cjkIsolatedSymbols->add(0x2103); | |
553 cjkIsolatedSymbols->add(0x2105); | |
554 cjkIsolatedSymbols->add(0x2109); | |
555 cjkIsolatedSymbols->add(0x210A); | |
556 cjkIsolatedSymbols->add(0x2113); | |
557 cjkIsolatedSymbols->add(0x2116); | |
558 cjkIsolatedSymbols->add(0x2121); | |
559 cjkIsolatedSymbols->add(0x212B); | |
560 cjkIsolatedSymbols->add(0x213B); | |
561 cjkIsolatedSymbols->add(0x2150); | |
562 cjkIsolatedSymbols->add(0x2151); | |
563 cjkIsolatedSymbols->add(0x2152); | |
564 cjkIsolatedSymbols->add(0x217F); | |
565 cjkIsolatedSymbols->add(0x2189); | |
566 cjkIsolatedSymbols->add(0x2307); | |
567 cjkIsolatedSymbols->add(0x2312); | |
568 cjkIsolatedSymbols->add(0x23CE); | |
569 cjkIsolatedSymbols->add(0x2423); | |
570 cjkIsolatedSymbols->add(0x25A0); | |
571 cjkIsolatedSymbols->add(0x25A1); | |
572 cjkIsolatedSymbols->add(0x25A2); | |
573 cjkIsolatedSymbols->add(0x25AA); | |
574 cjkIsolatedSymbols->add(0x25AB); | |
575 cjkIsolatedSymbols->add(0x25B1); | |
576 cjkIsolatedSymbols->add(0x25B2); | |
577 cjkIsolatedSymbols->add(0x25B3); | |
578 cjkIsolatedSymbols->add(0x25B6); | |
579 cjkIsolatedSymbols->add(0x25B7); | |
580 cjkIsolatedSymbols->add(0x25BC); | |
581 cjkIsolatedSymbols->add(0x25BD); | |
582 cjkIsolatedSymbols->add(0x25C0); | |
583 cjkIsolatedSymbols->add(0x25C1); | |
584 cjkIsolatedSymbols->add(0x25C6); | |
585 cjkIsolatedSymbols->add(0x25C7); | |
586 cjkIsolatedSymbols->add(0x25C9); | |
587 cjkIsolatedSymbols->add(0x25CB); | |
588 cjkIsolatedSymbols->add(0x25CC); | |
589 cjkIsolatedSymbols->add(0x25EF); | |
590 cjkIsolatedSymbols->add(0x2605); | |
591 cjkIsolatedSymbols->add(0x2606); | |
592 cjkIsolatedSymbols->add(0x260E); | |
593 cjkIsolatedSymbols->add(0x2616); | |
594 cjkIsolatedSymbols->add(0x2617); | |
595 cjkIsolatedSymbols->add(0x2640); | |
596 cjkIsolatedSymbols->add(0x2642); | |
597 cjkIsolatedSymbols->add(0x26A0); | |
598 cjkIsolatedSymbols->add(0x26BD); | |
599 cjkIsolatedSymbols->add(0x26BE); | |
600 cjkIsolatedSymbols->add(0x2713); | |
601 cjkIsolatedSymbols->add(0x271A); | |
602 cjkIsolatedSymbols->add(0x273F); | |
603 cjkIsolatedSymbols->add(0x2740); | |
604 cjkIsolatedSymbols->add(0x2756); | |
605 cjkIsolatedSymbols->add(0x2B1A); | |
606 cjkIsolatedSymbols->add(0xFE10); | |
607 cjkIsolatedSymbols->add(0xFE11); | |
608 cjkIsolatedSymbols->add(0xFE12); | |
609 cjkIsolatedSymbols->add(0xFE19); | |
610 cjkIsolatedSymbols->add(0xFF1D); | |
611 // Emoji. | |
612 cjkIsolatedSymbols->add(0x1F100); | |
abarth-chromium
2013/07/24 22:34:24
This is likely to blow up the binary. Can you add
Stephen Chennney
2013/07/24 23:02:19
You mean make a static array and then iterate over
| |
613 } | |
614 if (cjkIsolatedSymbols->contains(c)) | |
615 return true; | |
616 | |
617 if (isCJKIdeograph(c)) | |
618 return true; | |
619 | |
620 static UChar32 cjkSymbolRanges[] = { | |
621 0x2156, 0x215A, | |
622 0x2160, 0x216B, | |
623 0x2170, 0x217B, | |
624 0x23BE, 0x23CC, | |
625 0x2460, 0x2492, | |
626 0x249C, 0x24FF, | |
627 0x25CE, 0x25D3, | |
628 0x25E2, 0x25E6, | |
629 0x2600, 0x2603, | |
630 0x2660, 0x266F, | |
631 0x2672, 0x267D, | |
632 0x2776, 0x277F, | |
633 // Ideographic Description Characters, with CJK Symbols and Punctuation, excluding 0x3030. | |
634 // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0 x3100 .. 0x312F | |
635 0x2FF0, 0x302F, | |
636 0x3031, 0x312F, | |
637 // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF | |
638 0x3190, 0x31BF, | |
639 // Enclosed CJK Letters and Months (0x3200 .. 0x32FF). | |
640 // CJK Compatibility (0x3300 .. 0x33FF). | |
641 0x3200, 0x33FF, | |
642 0xF860, 0xF862, | |
643 // CJK Compatibility Forms. | |
644 0xFE30, 0xFE4F, | |
645 // Halfwidth and Fullwidth Forms | |
646 // Usually only used in CJK | |
647 0xFF00, 0xFF0C, | |
648 0xFF0E, 0xFF1A, | |
649 0xFF1F, 0xFFEF, | |
650 // Emoji. | |
651 0x1F110, 0x1F129, | |
652 0x1F130, 0x1F149, | |
653 0x1F150, 0x1F169, | |
654 0x1F170, 0x1F189, | |
655 0x1F200, 0x1F6FF | |
656 }; | |
657 static size_t cjkSymbolRangesCount = WTF_ARRAY_LENGTH(cjkSymbolRanges); | |
658 | |
659 UChar32* boundingCharacter = approximateBinarySearch<UChar32, UChar32>( | |
eseidel
2013/07/23 20:50:52
You mentioned you fixed an error here? Can it be
| |
660 (UChar32*)cjkSymbolRanges, cjkSymbolRangesCount, c, keyExtractorUChar32) ; | |
661 // Exact matches are CJK Symbols | |
662 if (*boundingCharacter == c) | |
663 return true; | |
664 bool isEndOfRange = ((boundingCharacter - cjkSymbolRanges) % 2); | |
665 if (*boundingCharacter < c) | |
666 return !isEndOfRange; | |
667 return isEndOfRange; | |
710 } | 668 } |
711 | 669 |
712 unsigned Font::expansionOpportunityCount(const LChar* characters, size_t length, TextDirection direction, bool& isAfterExpansion) | 670 unsigned Font::expansionOpportunityCount(const LChar* characters, size_t length, TextDirection direction, bool& isAfterExpansion) |
713 { | 671 { |
714 unsigned count = 0; | 672 unsigned count = 0; |
715 if (direction == LTR) { | 673 if (direction == LTR) { |
716 for (size_t i = 0; i < length; ++i) { | 674 for (size_t i = 0; i < length; ++i) { |
717 if (treatAsSpace(characters[i])) { | 675 if (treatAsSpace(characters[i])) { |
718 count++; | 676 count++; |
719 isAfterExpansion = true; | 677 isAfterExpansion = true; |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
790 | 748 |
791 // Additional word-separator characters listed in CSS Text Level 3 Editor's Draft 3 November 2010. | 749 // Additional word-separator characters listed in CSS Text Level 3 Editor's Draft 3 November 2010. |
792 if (c == ethiopicWordspace || c == aegeanWordSeparatorLine || c == aegeanWor dSeparatorDot | 750 if (c == ethiopicWordspace || c == aegeanWordSeparatorLine || c == aegeanWor dSeparatorDot |
793 || c == ugariticWordDivider || c == tibetanMarkIntersyllabicTsheg || c = = tibetanMarkDelimiterTshegBstar) | 751 || c == ugariticWordDivider || c == tibetanMarkIntersyllabicTsheg || c = = tibetanMarkDelimiterTshegBstar) |
794 return false; | 752 return false; |
795 | 753 |
796 return true; | 754 return true; |
797 } | 755 } |
798 | 756 |
799 } | 757 } |
OLD | NEW |