| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010, 2012 Google Inc. All rights reser
ved. | |
| 3 * | |
| 4 * Redistribution and use in source and binary forms, with or without | |
| 5 * modification, are permitted provided that the following conditions are | |
| 6 * met: | |
| 7 * | |
| 8 * * Redistributions of source code must retain the above copyright | |
| 9 * notice, this list of conditions and the following disclaimer. | |
| 10 * * Redistributions in binary form must reproduce the above | |
| 11 * copyright notice, this list of conditions and the following disclaimer | |
| 12 * in the documentation and/or other materials provided with the | |
| 13 * distribution. | |
| 14 * * Neither the name of Google Inc. nor the names of its | |
| 15 * contributors may be used to endorse or promote products derived from | |
| 16 * this software without specific prior written permission. | |
| 17 * | |
| 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 29 */ | |
| 30 | |
| 31 #include "config.h" | |
| 32 #include "platform/fonts/FontFallbackWin.h" | |
| 33 | |
| 34 #include "platform/win/HWndDC.h" | |
| 35 #include "wtf/HashMap.h" | |
| 36 #include "wtf/text/StringHash.h" | |
| 37 #include "wtf/text/WTFString.h" | |
| 38 #include <limits> | |
| 39 #include <unicode/locid.h> | |
| 40 #include <unicode/uchar.h> | |
| 41 | |
| 42 namespace WebCore { | |
| 43 | |
| 44 namespace { | |
| 45 | |
| 46 bool isFontPresent(const UChar* fontName) | |
| 47 { | |
| 48 HFONT hfont = CreateFont(12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, fontName); | |
| 49 if (!hfont) | |
| 50 return false; | |
| 51 HWndDC dc(0); | |
| 52 HGDIOBJ oldFont = static_cast<HFONT>(SelectObject(dc, hfont)); | |
| 53 WCHAR actualFontName[LF_FACESIZE]; | |
| 54 GetTextFace(dc, LF_FACESIZE, actualFontName); | |
| 55 actualFontName[LF_FACESIZE - 1] = 0; | |
| 56 SelectObject(dc, oldFont); | |
| 57 DeleteObject(hfont); | |
| 58 // We don't have to worry about East Asian fonts with locale-dependent | |
| 59 // names here for now. | |
| 60 // FIXME: Why not? | |
| 61 return !wcscmp(fontName, actualFontName); | |
| 62 } | |
| 63 | |
| 64 // A simple mapping from UScriptCode to family name. This is a sparse array, | |
| 65 // which works well since the range of UScriptCode values is small. | |
| 66 typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT]; | |
| 67 | |
| 68 void initializeScriptFontMap(ScriptToFontMap& scriptFontMap) | |
| 69 { | |
| 70 struct FontMap { | |
| 71 UScriptCode script; | |
| 72 const UChar* family; | |
| 73 }; | |
| 74 | |
| 75 static const FontMap fontMap[] = { | |
| 76 {USCRIPT_LATIN, L"times new roman"}, | |
| 77 {USCRIPT_GREEK, L"times new roman"}, | |
| 78 {USCRIPT_CYRILLIC, L"times new roman"}, | |
| 79 // FIXME: Consider trying new Vista fonts before XP fonts for CJK. | |
| 80 // Some Vista users do want to use Vista cleartype CJK fonts. If we | |
| 81 // did, the results of tests with CJK characters would have to be | |
| 82 // regenerated for Vista. | |
| 83 {USCRIPT_SIMPLIFIED_HAN, L"simsun"}, | |
| 84 {USCRIPT_TRADITIONAL_HAN, L"pmingliu"}, | |
| 85 {USCRIPT_HIRAGANA, L"ms pgothic"}, | |
| 86 {USCRIPT_KATAKANA, L"ms pgothic"}, | |
| 87 {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"}, | |
| 88 {USCRIPT_HANGUL, L"gulim"}, | |
| 89 {USCRIPT_THAI, L"tahoma"}, | |
| 90 {USCRIPT_HEBREW, L"david"}, | |
| 91 {USCRIPT_ARABIC, L"tahoma"}, | |
| 92 {USCRIPT_DEVANAGARI, L"mangal"}, | |
| 93 {USCRIPT_BENGALI, L"vrinda"}, | |
| 94 {USCRIPT_GURMUKHI, L"raavi"}, | |
| 95 {USCRIPT_GUJARATI, L"shruti"}, | |
| 96 {USCRIPT_TAMIL, L"latha"}, | |
| 97 {USCRIPT_TELUGU, L"gautami"}, | |
| 98 {USCRIPT_KANNADA, L"tunga"}, | |
| 99 {USCRIPT_GEORGIAN, L"sylfaen"}, | |
| 100 {USCRIPT_ARMENIAN, L"sylfaen"}, | |
| 101 {USCRIPT_THAANA, L"mv boli"}, | |
| 102 {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"}, | |
| 103 {USCRIPT_CHEROKEE, L"plantagenet cherokee"}, | |
| 104 {USCRIPT_MONGOLIAN, L"mongolian balti"}, | |
| 105 // For USCRIPT_COMMON, we map blocks to scripts when | |
| 106 // that makes sense. | |
| 107 }; | |
| 108 | |
| 109 struct ScriptToFontFamilies { | |
| 110 UScriptCode script; | |
| 111 const UChar** families; | |
| 112 }; | |
| 113 | |
| 114 // Kartika on Vista or earlier lacks the support for Chillu | |
| 115 // letters added to Unicode 5.1. | |
| 116 // Try AnjaliOldLipi (a very widely used Malaylalam font with the full | |
| 117 // Unicode 5.x support) before falling back to Kartika. | |
| 118 static const UChar* malayalamFonts[] = {L"AnjaliOldLipi", L"Lohit Malayalam"
, L"Kartika", L"Rachana", 0}; | |
| 119 // Try Khmer OS before Vista fonts because 'Khmer OS' goes along better | |
| 120 // with Latin and looks better/larger for the same size. | |
| 121 static const UChar* khmerFonts[] = {L"Khmer OS", L"MoolBoran", L"DaunPenh",
L"Code2000", 0}; | |
| 122 // For the following 6 scripts, two or fonts are listed. The fonts in | |
| 123 // the 1st slot are not available on Windows XP. To support these | |
| 124 // scripts on XP, listed in the rest of slots are widely used | |
| 125 // fonts. | |
| 126 static const UChar* ethiopicFonts[] = {L"Nyala", L"Abyssinica SIL", L"Ethiop
ia Jiret", L"Visual Geez Unicode", L"GF Zemen Unicode", 0}; | |
| 127 static const UChar* oriyaFonts[] = {L"Kalinga", L"ori1Uni", L"Lohit Oriya",
0}; | |
| 128 static const UChar* laoFonts[] = {L"DokChampa", L"Saysettha OT", L"Phetsarat
h OT", L"Code2000", 0}; | |
| 129 static const UChar* tibetanFonts[] = {L"Microsoft Himalaya", L"Jomolhari", L
"Tibetan Machine Uni", 0}; | |
| 130 static const UChar* sinhalaFonts[] = {L"Iskoola Pota", L"AksharUnicode", 0}; | |
| 131 static const UChar* yiFonts[] = {L"Microsoft Yi Balti", L"Nuosu SIL", L"Code
2000", 0}; | |
| 132 // http://www.bethmardutho.org/support/meltho/download/index.php | |
| 133 static const UChar* syriacFonts[] = {L"Estrangelo Edessa", L"Estrangelo Nisi
bin", L"Code2000", 0}; | |
| 134 // No Myanmar/Burmese font is shipped with Windows, yet. Try a few | |
| 135 // widely available/used ones that supports Unicode 5.1 or later. | |
| 136 static const UChar* myanmarFonts[] = {L"Padauk", L"Parabaik", L"Myanmar3", L
"Code2000", 0}; | |
| 137 | |
| 138 static const ScriptToFontFamilies scriptToFontFamilies[] = { | |
| 139 {USCRIPT_MALAYALAM, malayalamFonts}, | |
| 140 {USCRIPT_KHMER, khmerFonts}, | |
| 141 {USCRIPT_ETHIOPIC, ethiopicFonts}, | |
| 142 {USCRIPT_ORIYA, oriyaFonts}, | |
| 143 {USCRIPT_LAO, laoFonts}, | |
| 144 {USCRIPT_TIBETAN, tibetanFonts}, | |
| 145 {USCRIPT_SINHALA, sinhalaFonts}, | |
| 146 {USCRIPT_YI, yiFonts}, | |
| 147 {USCRIPT_SYRIAC, syriacFonts}, | |
| 148 {USCRIPT_MYANMAR, myanmarFonts}, | |
| 149 }; | |
| 150 | |
| 151 for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i) | |
| 152 scriptFontMap[fontMap[i].script] = fontMap[i].family; | |
| 153 | |
| 154 // FIXME: Instead of scanning the hard-coded list, we have to | |
| 155 // use EnumFont* to 'inspect' fonts to pick up fonts covering scripts | |
| 156 // when it's possible (e.g. using OS/2 table). If we do that, this | |
| 157 // had better be pulled out of here. | |
| 158 for (size_t i = 0; i < WTF_ARRAY_LENGTH(scriptToFontFamilies); ++i) { | |
| 159 UScriptCode script = scriptToFontFamilies[i].script; | |
| 160 scriptFontMap[script] = 0; | |
| 161 const UChar** familyPtr = scriptToFontFamilies[i].families; | |
| 162 while (*familyPtr) { | |
| 163 if (isFontPresent(*familyPtr)) { | |
| 164 scriptFontMap[script] = *familyPtr; | |
| 165 break; | |
| 166 } | |
| 167 ++familyPtr; | |
| 168 } | |
| 169 } | |
| 170 | |
| 171 // Initialize the locale-dependent mapping. | |
| 172 // Since Chrome synchronizes the ICU default locale with its UI locale, | |
| 173 // this ICU locale tells the current UI locale of Chrome. | |
| 174 icu::Locale locale = icu::Locale::getDefault(); | |
| 175 const UChar* localeFamily = 0; | |
| 176 if (locale == icu::Locale::getJapanese()) { | |
| 177 localeFamily = scriptFontMap[USCRIPT_HIRAGANA]; | |
| 178 } else if (locale == icu::Locale::getKorean()) { | |
| 179 localeFamily = scriptFontMap[USCRIPT_HANGUL]; | |
| 180 } else if (locale == icu::Locale::getTraditionalChinese()) { | |
| 181 localeFamily = scriptFontMap[USCRIPT_TRADITIONAL_HAN]; | |
| 182 } else { | |
| 183 // For other locales, use the simplified Chinese font for Han. | |
| 184 localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN]; | |
| 185 } | |
| 186 if (localeFamily) | |
| 187 scriptFontMap[USCRIPT_HAN] = localeFamily; | |
| 188 } | |
| 189 | |
| 190 // There are a lot of characters in USCRIPT_COMMON that can be covered | |
| 191 // by fonts for scripts closely related to them. See | |
| 192 // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:] | |
| 193 // FIXME: make this more efficient with a wider coverage | |
| 194 UScriptCode getScriptBasedOnUnicodeBlock(int ucs4) | |
| 195 { | |
| 196 UBlockCode block = ublock_getCode(ucs4); | |
| 197 switch (block) { | |
| 198 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: | |
| 199 return USCRIPT_HAN; | |
| 200 case UBLOCK_HIRAGANA: | |
| 201 case UBLOCK_KATAKANA: | |
| 202 return USCRIPT_HIRAGANA; | |
| 203 case UBLOCK_ARABIC: | |
| 204 return USCRIPT_ARABIC; | |
| 205 case UBLOCK_THAI: | |
| 206 return USCRIPT_THAI; | |
| 207 case UBLOCK_GREEK: | |
| 208 return USCRIPT_GREEK; | |
| 209 case UBLOCK_DEVANAGARI: | |
| 210 // For Danda and Double Danda (U+0964, U+0965), use a Devanagari | |
| 211 // font for now although they're used by other scripts as well. | |
| 212 // Without a context, we can't do any better. | |
| 213 return USCRIPT_DEVANAGARI; | |
| 214 case UBLOCK_ARMENIAN: | |
| 215 return USCRIPT_ARMENIAN; | |
| 216 case UBLOCK_GEORGIAN: | |
| 217 return USCRIPT_GEORGIAN; | |
| 218 case UBLOCK_KANNADA: | |
| 219 return USCRIPT_KANNADA; | |
| 220 default: | |
| 221 return USCRIPT_COMMON; | |
| 222 } | |
| 223 } | |
| 224 | |
| 225 UScriptCode getScript(int ucs4) | |
| 226 { | |
| 227 UErrorCode err = U_ZERO_ERROR; | |
| 228 UScriptCode script = uscript_getScript(ucs4, &err); | |
| 229 // If script is invalid, common or inherited or there's an error, | |
| 230 // infer a script based on the unicode block of a character. | |
| 231 if (script <= USCRIPT_INHERITED || U_FAILURE(err)) | |
| 232 script = getScriptBasedOnUnicodeBlock(ucs4); | |
| 233 return script; | |
| 234 } | |
| 235 | |
| 236 } // namespace | |
| 237 | |
| 238 // FIXME: this is font fallback code version 0.1 | |
| 239 // - Cover all the scripts | |
| 240 // - Get the default font for each script/generic family from the | |
| 241 // preference instead of hardcoding in the source. | |
| 242 // (at least, read values from the registry for IE font settings). | |
| 243 // - Support generic families (from FontDescription) | |
| 244 // - If the default font for a script is not available, | |
| 245 // try some more fonts known to support it. Finally, we can | |
| 246 // use EnumFontFamilies or similar APIs to come up with a list of | |
| 247 // fonts supporting the script and cache the result. | |
| 248 // - Consider using UnicodeSet (or UnicodeMap) converted from | |
| 249 // GLYPHSET (BMP) or directly read from truetype cmap tables to | |
| 250 // keep track of which character is supported by which font | |
| 251 // - Update script_font_cache in response to WM_FONTCHANGE | |
| 252 | |
| 253 const UChar* getFontFamilyForScript(UScriptCode script, | |
| 254 FontDescription::GenericFamilyType generic) | |
| 255 { | |
| 256 static ScriptToFontMap scriptFontMap; | |
| 257 static bool initialized = false; | |
| 258 if (!initialized) { | |
| 259 initializeScriptFontMap(scriptFontMap); | |
| 260 initialized = true; | |
| 261 } | |
| 262 if (script == USCRIPT_INVALID_CODE) | |
| 263 return 0; | |
| 264 ASSERT(script < USCRIPT_CODE_LIMIT); | |
| 265 return scriptFontMap[script]; | |
| 266 } | |
| 267 | |
| 268 // FIXME: | |
| 269 // - Handle 'Inherited', 'Common' and 'Unknown' | |
| 270 // (see http://www.unicode.org/reports/tr24/#Usage_Model ) | |
| 271 // For 'Inherited' and 'Common', perhaps we need to | |
| 272 // accept another parameter indicating the previous family | |
| 273 // and just return it. | |
| 274 // - All the characters (or characters up to the point a single | |
| 275 // font can cover) need to be taken into account | |
| 276 const UChar* getFallbackFamily(UChar32 character, | |
| 277 FontDescription::GenericFamilyType generic, | |
| 278 UScriptCode* scriptChecked) | |
| 279 { | |
| 280 ASSERT(character); | |
| 281 UScriptCode script = getScript(character); | |
| 282 | |
| 283 // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for | |
| 284 // Han (determined in a locale-dependent way above). Full-width ASCII | |
| 285 // characters are rather widely used in Japanese and Chinese documents and | |
| 286 // they're fully covered by Chinese, Japanese and Korean fonts. | |
| 287 if (0xFF00 < character && character < 0xFF5F) | |
| 288 script = USCRIPT_HAN; | |
| 289 | |
| 290 if (script == USCRIPT_COMMON) | |
| 291 script = getScriptBasedOnUnicodeBlock(character); | |
| 292 | |
| 293 const UChar* family = getFontFamilyForScript(script, generic); | |
| 294 // Another lame work-around to cover non-BMP characters. | |
| 295 // If the font family for script is not found or the character is | |
| 296 // not in BMP (> U+FFFF), we resort to the hard-coded list of | |
| 297 // fallback fonts for now. | |
| 298 if (!family || character > 0xFFFF) { | |
| 299 int plane = character >> 16; | |
| 300 switch (plane) { | |
| 301 case 1: | |
| 302 family = L"code2001"; | |
| 303 break; | |
| 304 case 2: | |
| 305 // Use a Traditional Chinese ExtB font if in Traditional Chinese loc
ale. | |
| 306 // Otherwise, use a Simplified Chinese ExtB font. Windows Japanese | |
| 307 // fonts do support a small subset of ExtB (that are included in JIS
X 0213), | |
| 308 // but its coverage is rather sparse. | |
| 309 // Eventually, this should be controlled by lang/xml:lang. | |
| 310 if (icu::Locale::getDefault() == icu::Locale::getTraditionalChinese(
)) | |
| 311 family = L"pmingliu-extb"; | |
| 312 else | |
| 313 family = L"simsun-extb"; | |
| 314 break; | |
| 315 default: | |
| 316 family = L"lucida sans unicode"; | |
| 317 } | |
| 318 } | |
| 319 | |
| 320 if (scriptChecked) | |
| 321 *scriptChecked = script; | |
| 322 return family; | |
| 323 } | |
| 324 | |
| 325 | |
| 326 const UChar* getFallbackFamilyForFirstNonCommonCharacter(const UChar* characters
, | |
| 327 int length, | |
| 328 FontDescription::GenericFamilyType generic) | |
| 329 { | |
| 330 ASSERT(characters && characters[0] && length > 0); | |
| 331 UScriptCode script = USCRIPT_COMMON; | |
| 332 | |
| 333 // Sometimes characters common to script (e.g. space) is at | |
| 334 // the beginning of a string so that we need to skip them | |
| 335 // to get a font required to render the string. | |
| 336 int i = 0; | |
| 337 UChar32 ucs4 = 0; | |
| 338 while (i < length && script == USCRIPT_COMMON) { | |
| 339 U16_NEXT(characters, i, length, ucs4); | |
| 340 script = getScript(ucs4); | |
| 341 } | |
| 342 | |
| 343 const UChar* family = getFallbackFamily(ucs4, generic, 0); | |
| 344 | |
| 345 return family; | |
| 346 } | |
| 347 | |
| 348 } // namespace WebCore | |
| OLD | NEW |