| Index: Source/core/platform/graphics/Font.cpp
|
| diff --git a/Source/core/platform/graphics/Font.cpp b/Source/core/platform/graphics/Font.cpp
|
| index f948c7e8c9de9c02fc4e0ea40e31a48f61e23e9a..51a0b291bc4faa283c39418f6e320644c0a13495 100644
|
| --- a/Source/core/platform/graphics/Font.cpp
|
| +++ b/Source/core/platform/graphics/Font.cpp
|
| @@ -28,10 +28,11 @@
|
| #include "core/platform/graphics/TextRun.h"
|
| #include "core/platform/graphics/WidthIterator.h"
|
| #include "core/platform/text/transcoder/FontTranscoder.h"
|
| -#include <wtf/MainThread.h>
|
| -#include <wtf/MathExtras.h>
|
| -#include <wtf/text/StringBuilder.h>
|
| -#include <wtf/UnusedParam.h>
|
| +#include "wtf/MainThread.h"
|
| +#include "wtf/MathExtras.h"
|
| +#include "wtf/StdLibExtras.h"
|
| +#include "wtf/UnusedParam.h"
|
| +#include "wtf/text/StringBuilder.h"
|
|
|
| using namespace WTF;
|
| using namespace Unicode;
|
| @@ -353,122 +354,79 @@ Font::CodePath Font::codePath(const TextRun& run) const
|
| return characterRangeCodePath(run.characters16(), run.length());
|
| }
|
|
|
| -Font::CodePath Font::characterRangeCodePath(const UChar* characters, unsigned len)
|
| +static inline UChar keyExtractor(const UChar* value)
|
| {
|
| - // FIXME: Should use a UnicodeSet in ports where ICU is used. Note that we
|
| - // can't simply use UnicodeCharacter Property/class because some characters
|
| - // are not 'combining', but still need to go to the complex path.
|
| - // Alternatively, we may as well consider binary search over a sorted
|
| - // list of ranges.
|
| - CodePath result = Simple;
|
| - for (unsigned i = 0; i < len; i++) {
|
| - const UChar c = characters[i];
|
| - if (c < 0x2E5) // U+02E5 through U+02E9 (Modifier Letters : Tone letters)
|
| - continue;
|
| - if (c <= 0x2E9)
|
| - return Complex;
|
| -
|
| - if (c < 0x300) // U+0300 through U+036F Combining diacritical marks
|
| - continue;
|
| - if (c <= 0x36F)
|
| - return Complex;
|
| -
|
| - if (c < 0x0591 || c == 0x05BE) // U+0591 through U+05CF excluding U+05BE Hebrew combining marks, Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha
|
| - continue;
|
| - if (c <= 0x05CF)
|
| - return Complex;
|
| + return *value;
|
| +}
|
|
|
| +Font::CodePath Font::characterRangeCodePath(const UChar* characters, unsigned len)
|
| +{
|
| + static UChar complexCodePathRanges[] = {
|
| + // U+02E5 through U+02E9 (Modifier Letters : Tone letters)
|
| + 0x2E5, 0x2E9,
|
| + // U+0300 through U+036F Combining diacritical marks
|
| + 0x300, 0x36F,
|
| + // U+0591 through U+05CF excluding U+05BE Hebrew combining marks, ...
|
| + 0x0591, 0x05BD,
|
| + // ... Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha
|
| + 0x05BF, 0x05CF,
|
| // U+0600 through U+109F Arabic, Syriac, Thaana, NKo, Samaritan, Mandaic,
|
| - // Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannada,
|
| + // Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannada,
|
| // Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar
|
| - if (c < 0x0600)
|
| - continue;
|
| - if (c <= 0x109F)
|
| - return Complex;
|
| -
|
| - // U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left here if you precompose;
|
| - // Modern Korean will be precomposed as a result of step A)
|
| - if (c < 0x1100)
|
| - continue;
|
| - if (c <= 0x11FF)
|
| - return Complex;
|
| + 0x0600, 0x109F,
|
| + // U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left
|
| + // here if you precompose; Modern Korean will be precomposed as a result of step A)
|
| + 0x1100, 0x11FF,
|
| + // U+135D through U+135F Ethiopic combining marks
|
| + 0x135D, 0x135F,
|
| + // U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghanwa,Khmer, Mongolian
|
| + 0x1700, 0x18AF,
|
| + // U+1900 through U+194F Limbu (Unicode 4.0)
|
| + 0x1900, 0x194F,
|
| + // U+1980 through U+19DF New Tai Lue
|
| + 0x1980, 0x19DF,
|
| + // U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, Batak, Lepcha, Vedic
|
| + 0x1A00, 0x1CFF,
|
| + // U+1DC0 through U+1DFF Comining diacritical mark supplement
|
| + 0x1DC0, 0x1DFF,
|
| + // U+20D0 through U+20FF Combining marks for symbols
|
| + 0x20D0, 0x20FF,
|
| + // U+2CEF through U+2CF1 Combining marks for Coptic
|
| + 0x2CEF, 0x2CF1,
|
| + // U+302A through U+302F Ideographic and Hangul Tone marks
|
| + 0x302A, 0x302F,
|
| + // U+A67C through U+A67D Combining marks for old Cyrillic
|
| + 0xA67C, 0xA67D,
|
| + // U+A6F0 through U+A6F1 Combining mark for Bamum
|
| + 0xA6F0, 0xA6F1,
|
| + // U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extended,
|
| + // Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei Mayek
|
| + 0xA800, 0xABFF,
|
| + // U+D7B0 through U+D7FF Hangul Jamo Ext. B
|
| + 0xD7B0, 0xD7FF,
|
| + // U+FE00 through U+FE0F Unicode variation selectors
|
| + 0xFE00, 0xFE0F,
|
| + // U+FE20 through U+FE2F Combining half marks
|
| + 0xFE20, 0xFE2F,
|
| + };
|
| + static size_t complexCodePathRangesCount = WTF_ARRAY_LENGTH(complexCodePathRanges);
|
|
|
| - if (c < 0x135D) // U+135D through U+135F Ethiopic combining marks
|
| - continue;
|
| - if (c <= 0x135F)
|
| - return Complex;
|
| -
|
| - if (c < 0x1700) // U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghanwa,Khmer, Mongolian
|
| - continue;
|
| - if (c <= 0x18AF)
|
| - return Complex;
|
| -
|
| - if (c < 0x1900) // U+1900 through U+194F Limbu (Unicode 4.0)
|
| - continue;
|
| - if (c <= 0x194F)
|
| - return Complex;
|
| -
|
| - if (c < 0x1980) // U+1980 through U+19DF New Tai Lue
|
| - continue;
|
| - if (c <= 0x19DF)
|
| - return Complex;
|
| -
|
| - if (c < 0x1A00) // U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, Batak, Lepcha, Vedic
|
| - continue;
|
| - if (c <= 0x1CFF)
|
| - return Complex;
|
| + CodePath result = Simple;
|
| + for (unsigned i = 0; i < len; i++) {
|
| + const UChar c = characters[i];
|
|
|
| - if (c < 0x1DC0) // U+1DC0 through U+1DFF Comining diacritical mark supplement
|
| + // Shortcut for common case
|
| + if (c < 0x2E5)
|
| continue;
|
| - if (c <= 0x1DFF)
|
| - return Complex;
|
|
|
| // U+1E00 through U+2000 characters with diacritics and stacked diacritics
|
| - if (c <= 0x2000) {
|
| + if (c >= 0x1E00 && c <= 0x2000) {
|
| result = SimpleWithGlyphOverflow;
|
| continue;
|
| }
|
|
|
| - if (c < 0x20D0) // U+20D0 through U+20FF Combining marks for symbols
|
| - continue;
|
| - if (c <= 0x20FF)
|
| - return Complex;
|
| -
|
| - if (c < 0x2CEF) // U+2CEF through U+2CF1 Combining marks for Coptic
|
| - continue;
|
| - if (c <= 0x2CF1)
|
| - return Complex;
|
| -
|
| - if (c < 0x302A) // U+302A through U+302F Ideographic and Hangul Tone marks
|
| - continue;
|
| - if (c <= 0x302F)
|
| - return Complex;
|
| -
|
| - if (c < 0xA67C) // U+A67C through U+A67D Combining marks for old Cyrillic
|
| - continue;
|
| - if (c <= 0xA67D)
|
| - return Complex;
|
| -
|
| - if (c < 0xA6F0) // U+A6F0 through U+A6F1 Combining mark for Bamum
|
| - continue;
|
| - if (c <= 0xA6F1)
|
| - return Complex;
|
| -
|
| - // U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extended,
|
| - // Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei Mayek,
|
| - if (c < 0xA800)
|
| - continue;
|
| - if (c <= 0xABFF)
|
| - return Complex;
|
| -
|
| - if (c < 0xD7B0) // U+D7B0 through U+D7FF Hangul Jamo Ext. B
|
| - continue;
|
| - if (c <= 0xD7FF)
|
| - return Complex;
|
| -
|
| - if (c <= 0xDBFF) {
|
| - // High surrogate
|
| -
|
| + // Surrogate pairs
|
| + if (c > 0xD7FF && c <= 0xDBFF) {
|
| if (i == len - 1)
|
| continue;
|
|
|
| @@ -494,16 +452,25 @@ Font::CodePath Font::characterRangeCodePath(const UChar* characters, unsigned le
|
| continue;
|
| }
|
|
|
| - if (c < 0xFE00) // U+FE00 through U+FE0F Unicode variation selectors
|
| - continue;
|
| - if (c <= 0xFE0F)
|
| + // Search for other Complex cases
|
| + UChar* boundingCharacter = approximateBinarySearch<UChar, UChar>(
|
| + (UChar*)complexCodePathRanges, complexCodePathRangesCount, c, keyExtractor);
|
| + // Exact matches are complex
|
| + if (*boundingCharacter == c)
|
| return Complex;
|
| -
|
| - if (c < 0xFE20) // U+FE20 through U+FE2F Combining half marks
|
| + bool isEndOfRange = ((boundingCharacter - complexCodePathRanges) % 2);
|
| + if (*boundingCharacter < c) {
|
| + // Determine if we are in a range or out
|
| + if (!isEndOfRange)
|
| + return Complex;
|
| continue;
|
| - if (c <= 0xFE2F)
|
| + }
|
| + ASSERT(*boundingCharacter > c);
|
| + // Determine if we are in a range or out - opposite condition to above
|
| + if (isEndOfRange)
|
| return Complex;
|
| }
|
| +
|
| return result;
|
| }
|
|
|
| @@ -517,12 +484,8 @@ bool Font::isCJKIdeograph(UChar32 c)
|
| if (c >= 0x3400 && c <= 0x4DBF)
|
| return true;
|
|
|
| - // CJK Radicals Supplement.
|
| - if (c >= 0x2E80 && c <= 0x2EFF)
|
| - return true;
|
| -
|
| - // Kangxi Radicals.
|
| - if (c >= 0x2F00 && c <= 0x2FDF)
|
| + // CJK Radicals Supplement and Kangxi Radicals.
|
| + if (c >= 0x2E80 && c <= 0x2FDF)
|
| return true;
|
|
|
| // CJK Strokes.
|
| @@ -577,10 +540,10 @@ bool Font::isCJKIdeographOrSymbol(UChar32 c)
|
| if (c >= 0x2170 && c <= 0x217B)
|
| return true;
|
|
|
| - if ((c == 0x217F) || (c == 0x2189) || (c == 0x2307) || (c == 0x2312) || (c == 0x23BE) || (c == 0x23BF))
|
| + if ((c == 0x217F) || (c == 0x2189) || (c == 0x2307) || (c == 0x2312))
|
| return true;
|
|
|
| - if (c >= 0x23C0 && c <= 0x23CC)
|
| + if (c >= 0x23BE && c <= 0x23CC)
|
| return true;
|
|
|
| if ((c == 0x23CE) || (c == 0x2423))
|
| @@ -631,42 +594,21 @@ bool Font::isCJKIdeographOrSymbol(UChar32 c)
|
| if (c == 0x2B1A)
|
| return true;
|
|
|
| - // Ideographic Description Characters.
|
| - if (c >= 0x2FF0 && c <= 0x2FFF)
|
| - return true;
|
| -
|
| - // CJK Symbols and Punctuation, excluding 0x3030.
|
| - if (c >= 0x3000 && c < 0x3030)
|
| + // Ideographic Description Characters, with CJK Symbols and Punctuation, excluding 0x3030.
|
| + // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0x3100 .. 0x312F
|
| + if (c >= 0x2FF0 && c < 0x3030)
|
| return true;
|
|
|
| - if (c > 0x3030 && c <= 0x303F)
|
| + if (c > 0x3030 && c <= 0x312F)
|
| return true;
|
|
|
| - // Hiragana
|
| - if (c >= 0x3040 && c <= 0x309F)
|
| - return true;
|
| -
|
| - // Katakana
|
| - if (c >= 0x30A0 && c <= 0x30FF)
|
| - return true;
|
| -
|
| - // Bopomofo
|
| - if (c >= 0x3100 && c <= 0x312F)
|
| - return true;
|
| -
|
| - if (c >= 0x3190 && c <= 0x319F)
|
| - return true;
|
| -
|
| - // Bopomofo Extended
|
| - if (c >= 0x31A0 && c <= 0x31BF)
|
| + // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF
|
| + if (c >= 0x3190 && c <= 0x31BF)
|
| return true;
|
|
|
| - // Enclosed CJK Letters and Months.
|
| - if (c >= 0x3200 && c <= 0x32FF)
|
| - return true;
|
| -
|
| - // CJK Compatibility.
|
| - if (c >= 0x3300 && c <= 0x33FF)
|
| + // Enclosed CJK Letters and Months (0x3200 .. 0x32FF).
|
| + // CJK Compatibility (0x3300 .. 0x33FF).
|
| + if (c >= 0x3200 && c <= 0x33FF)
|
| return true;
|
|
|
| if (c >= 0xF860 && c <= 0xF862)
|
| @@ -703,7 +645,7 @@ bool Font::isCJKIdeographOrSymbol(UChar32 c)
|
| if (c >= 0x1F170 && c <= 0x1F189)
|
| return true;
|
|
|
| - if (c >= 0x1F200 && c <= 0x1F6F)
|
| + if (c >= 0x1F200 && c <= 0x1F6FF)
|
| return true;
|
|
|
| return isCJKIdeograph(c);
|
|
|