Index: Source/core/platform/graphics/Font.cpp |
diff --git a/Source/core/platform/graphics/Font.cpp b/Source/core/platform/graphics/Font.cpp |
index f948c7e8c9de9c02fc4e0ea40e31a48f61e23e9a..51a0b291bc4faa283c39418f6e320644c0a13495 100644 |
--- a/Source/core/platform/graphics/Font.cpp |
+++ b/Source/core/platform/graphics/Font.cpp |
@@ -28,10 +28,11 @@ |
#include "core/platform/graphics/TextRun.h" |
#include "core/platform/graphics/WidthIterator.h" |
#include "core/platform/text/transcoder/FontTranscoder.h" |
-#include <wtf/MainThread.h> |
-#include <wtf/MathExtras.h> |
-#include <wtf/text/StringBuilder.h> |
-#include <wtf/UnusedParam.h> |
+#include "wtf/MainThread.h" |
+#include "wtf/MathExtras.h" |
+#include "wtf/StdLibExtras.h" |
+#include "wtf/UnusedParam.h" |
+#include "wtf/text/StringBuilder.h" |
using namespace WTF; |
using namespace Unicode; |
@@ -353,122 +354,79 @@ Font::CodePath Font::codePath(const TextRun& run) const |
return characterRangeCodePath(run.characters16(), run.length()); |
} |
-Font::CodePath Font::characterRangeCodePath(const UChar* characters, unsigned len) |
+static inline UChar keyExtractor(const UChar* value) |
{ |
- // FIXME: Should use a UnicodeSet in ports where ICU is used. Note that we |
- // can't simply use UnicodeCharacter Property/class because some characters |
- // are not 'combining', but still need to go to the complex path. |
- // Alternatively, we may as well consider binary search over a sorted |
- // list of ranges. |
- CodePath result = Simple; |
- for (unsigned i = 0; i < len; i++) { |
- const UChar c = characters[i]; |
- if (c < 0x2E5) // U+02E5 through U+02E9 (Modifier Letters : Tone letters) |
- continue; |
- if (c <= 0x2E9) |
- return Complex; |
- |
- if (c < 0x300) // U+0300 through U+036F Combining diacritical marks |
- continue; |
- if (c <= 0x36F) |
- return Complex; |
- |
- if (c < 0x0591 || c == 0x05BE) // U+0591 through U+05CF excluding U+05BE Hebrew combining marks, Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha |
- continue; |
- if (c <= 0x05CF) |
- return Complex; |
+ return *value; |
+} |
+Font::CodePath Font::characterRangeCodePath(const UChar* characters, unsigned len) |
+{ |
+ static UChar complexCodePathRanges[] = { |
+ // U+02E5 through U+02E9 (Modifier Letters : Tone letters) |
+ 0x2E5, 0x2E9, |
+ // U+0300 through U+036F Combining diacritical marks |
+ 0x300, 0x36F, |
+ // U+0591 through U+05CF excluding U+05BE Hebrew combining marks, ... |
+ 0x0591, 0x05BD, |
+ // ... Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha |
+ 0x05BF, 0x05CF, |
// U+0600 through U+109F Arabic, Syriac, Thaana, NKo, Samaritan, Mandaic, |
- // Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannada, |
+ // Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannada, |
// Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar |
- if (c < 0x0600) |
- continue; |
- if (c <= 0x109F) |
- return Complex; |
- |
- // U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left here if you precompose; |
- // Modern Korean will be precomposed as a result of step A) |
- if (c < 0x1100) |
- continue; |
- if (c <= 0x11FF) |
- return Complex; |
+ 0x0600, 0x109F, |
+ // U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left |
+ // here if you precompose; Modern Korean will be precomposed as a result of step A) |
+ 0x1100, 0x11FF, |
+ // U+135D through U+135F Ethiopic combining marks |
+ 0x135D, 0x135F, |
+ // U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghanwa,Khmer, Mongolian |
+ 0x1700, 0x18AF, |
+ // U+1900 through U+194F Limbu (Unicode 4.0) |
+ 0x1900, 0x194F, |
+ // U+1980 through U+19DF New Tai Lue |
+ 0x1980, 0x19DF, |
+ // U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, Batak, Lepcha, Vedic |
+ 0x1A00, 0x1CFF, |
+ // U+1DC0 through U+1DFF Comining diacritical mark supplement |
+ 0x1DC0, 0x1DFF, |
+ // U+20D0 through U+20FF Combining marks for symbols |
+ 0x20D0, 0x20FF, |
+ // U+2CEF through U+2CF1 Combining marks for Coptic |
+ 0x2CEF, 0x2CF1, |
+ // U+302A through U+302F Ideographic and Hangul Tone marks |
+ 0x302A, 0x302F, |
+ // U+A67C through U+A67D Combining marks for old Cyrillic |
+ 0xA67C, 0xA67D, |
+ // U+A6F0 through U+A6F1 Combining mark for Bamum |
+ 0xA6F0, 0xA6F1, |
+ // U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extended, |
+ // Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei Mayek |
+ 0xA800, 0xABFF, |
+ // U+D7B0 through U+D7FF Hangul Jamo Ext. B |
+ 0xD7B0, 0xD7FF, |
+ // U+FE00 through U+FE0F Unicode variation selectors |
+ 0xFE00, 0xFE0F, |
+ // U+FE20 through U+FE2F Combining half marks |
+ 0xFE20, 0xFE2F, |
+ }; |
+ static size_t complexCodePathRangesCount = WTF_ARRAY_LENGTH(complexCodePathRanges); |
- if (c < 0x135D) // U+135D through U+135F Ethiopic combining marks |
- continue; |
- if (c <= 0x135F) |
- return Complex; |
- |
- if (c < 0x1700) // U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghanwa,Khmer, Mongolian |
- continue; |
- if (c <= 0x18AF) |
- return Complex; |
- |
- if (c < 0x1900) // U+1900 through U+194F Limbu (Unicode 4.0) |
- continue; |
- if (c <= 0x194F) |
- return Complex; |
- |
- if (c < 0x1980) // U+1980 through U+19DF New Tai Lue |
- continue; |
- if (c <= 0x19DF) |
- return Complex; |
- |
- if (c < 0x1A00) // U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, Batak, Lepcha, Vedic |
- continue; |
- if (c <= 0x1CFF) |
- return Complex; |
+ CodePath result = Simple; |
+ for (unsigned i = 0; i < len; i++) { |
+ const UChar c = characters[i]; |
- if (c < 0x1DC0) // U+1DC0 through U+1DFF Comining diacritical mark supplement |
+ // Shortcut for common case |
+ if (c < 0x2E5) |
continue; |
- if (c <= 0x1DFF) |
- return Complex; |
// U+1E00 through U+2000 characters with diacritics and stacked diacritics |
- if (c <= 0x2000) { |
+ if (c >= 0x1E00 && c <= 0x2000) { |
result = SimpleWithGlyphOverflow; |
continue; |
} |
- if (c < 0x20D0) // U+20D0 through U+20FF Combining marks for symbols |
- continue; |
- if (c <= 0x20FF) |
- return Complex; |
- |
- if (c < 0x2CEF) // U+2CEF through U+2CF1 Combining marks for Coptic |
- continue; |
- if (c <= 0x2CF1) |
- return Complex; |
- |
- if (c < 0x302A) // U+302A through U+302F Ideographic and Hangul Tone marks |
- continue; |
- if (c <= 0x302F) |
- return Complex; |
- |
- if (c < 0xA67C) // U+A67C through U+A67D Combining marks for old Cyrillic |
- continue; |
- if (c <= 0xA67D) |
- return Complex; |
- |
- if (c < 0xA6F0) // U+A6F0 through U+A6F1 Combining mark for Bamum |
- continue; |
- if (c <= 0xA6F1) |
- return Complex; |
- |
- // U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extended, |
- // Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei Mayek, |
- if (c < 0xA800) |
- continue; |
- if (c <= 0xABFF) |
- return Complex; |
- |
- if (c < 0xD7B0) // U+D7B0 through U+D7FF Hangul Jamo Ext. B |
- continue; |
- if (c <= 0xD7FF) |
- return Complex; |
- |
- if (c <= 0xDBFF) { |
- // High surrogate |
- |
+ // Surrogate pairs |
+ if (c > 0xD7FF && c <= 0xDBFF) { |
if (i == len - 1) |
continue; |
@@ -494,16 +452,25 @@ Font::CodePath Font::characterRangeCodePath(const UChar* characters, unsigned le |
continue; |
} |
- if (c < 0xFE00) // U+FE00 through U+FE0F Unicode variation selectors |
- continue; |
- if (c <= 0xFE0F) |
+ // Search for other Complex cases |
+ UChar* boundingCharacter = approximateBinarySearch<UChar, UChar>( |
+ (UChar*)complexCodePathRanges, complexCodePathRangesCount, c, keyExtractor); |
+ // Exact matches are complex |
+ if (*boundingCharacter == c) |
return Complex; |
- |
- if (c < 0xFE20) // U+FE20 through U+FE2F Combining half marks |
+ bool isEndOfRange = ((boundingCharacter - complexCodePathRanges) % 2); |
+ if (*boundingCharacter < c) { |
+ // Determine if we are in a range or out |
+ if (!isEndOfRange) |
+ return Complex; |
continue; |
- if (c <= 0xFE2F) |
+ } |
+ ASSERT(*boundingCharacter > c); |
+ // Determine if we are in a range or out - opposite condition to above |
+ if (isEndOfRange) |
return Complex; |
} |
+ |
return result; |
} |
@@ -517,12 +484,8 @@ bool Font::isCJKIdeograph(UChar32 c) |
if (c >= 0x3400 && c <= 0x4DBF) |
return true; |
- // CJK Radicals Supplement. |
- if (c >= 0x2E80 && c <= 0x2EFF) |
- return true; |
- |
- // Kangxi Radicals. |
- if (c >= 0x2F00 && c <= 0x2FDF) |
+ // CJK Radicals Supplement and Kangxi Radicals. |
+ if (c >= 0x2E80 && c <= 0x2FDF) |
return true; |
// CJK Strokes. |
@@ -577,10 +540,10 @@ bool Font::isCJKIdeographOrSymbol(UChar32 c) |
if (c >= 0x2170 && c <= 0x217B) |
return true; |
- if ((c == 0x217F) || (c == 0x2189) || (c == 0x2307) || (c == 0x2312) || (c == 0x23BE) || (c == 0x23BF)) |
+ if ((c == 0x217F) || (c == 0x2189) || (c == 0x2307) || (c == 0x2312)) |
return true; |
- if (c >= 0x23C0 && c <= 0x23CC) |
+ if (c >= 0x23BE && c <= 0x23CC) |
return true; |
if ((c == 0x23CE) || (c == 0x2423)) |
@@ -631,42 +594,21 @@ bool Font::isCJKIdeographOrSymbol(UChar32 c) |
if (c == 0x2B1A) |
return true; |
- // Ideographic Description Characters. |
- if (c >= 0x2FF0 && c <= 0x2FFF) |
- return true; |
- |
- // CJK Symbols and Punctuation, excluding 0x3030. |
- if (c >= 0x3000 && c < 0x3030) |
+ // Ideographic Description Characters, with CJK Symbols and Punctuation, excluding 0x3030. |
+ // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo 0x3100 .. 0x312F |
+ if (c >= 0x2FF0 && c < 0x3030) |
return true; |
- if (c > 0x3030 && c <= 0x303F) |
+ if (c > 0x3030 && c <= 0x312F) |
return true; |
- // Hiragana |
- if (c >= 0x3040 && c <= 0x309F) |
- return true; |
- |
- // Katakana |
- if (c >= 0x30A0 && c <= 0x30FF) |
- return true; |
- |
- // Bopomofo |
- if (c >= 0x3100 && c <= 0x312F) |
- return true; |
- |
- if (c >= 0x3190 && c <= 0x319F) |
- return true; |
- |
- // Bopomofo Extended |
- if (c >= 0x31A0 && c <= 0x31BF) |
+ // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF |
+ if (c >= 0x3190 && c <= 0x31BF) |
return true; |
- // Enclosed CJK Letters and Months. |
- if (c >= 0x3200 && c <= 0x32FF) |
- return true; |
- |
- // CJK Compatibility. |
- if (c >= 0x3300 && c <= 0x33FF) |
+ // Enclosed CJK Letters and Months (0x3200 .. 0x32FF). |
+ // CJK Compatibility (0x3300 .. 0x33FF). |
+ if (c >= 0x3200 && c <= 0x33FF) |
return true; |
if (c >= 0xF860 && c <= 0xF862) |
@@ -703,7 +645,7 @@ bool Font::isCJKIdeographOrSymbol(UChar32 c) |
if (c >= 0x1F170 && c <= 0x1F189) |
return true; |
- if (c >= 0x1F200 && c <= 0x1F6F) |
+ if (c >= 0x1F200 && c <= 0x1F6FF) |
return true; |
return isCJKIdeograph(c); |