| OLD | NEW | 
|---|
|  | (Empty) | 
| 1 /* |  | 
| 2  * Copyright (c) 2006, 2007, 2008, 2009, 2010, 2012 Google Inc. All rights reser
     ved. |  | 
| 3  * |  | 
| 4  * Redistribution and use in source and binary forms, with or without |  | 
| 5  * modification, are permitted provided that the following conditions are |  | 
| 6  * met: |  | 
| 7  * |  | 
| 8  *     * Redistributions of source code must retain the above copyright |  | 
| 9  * notice, this list of conditions and the following disclaimer. |  | 
| 10  *     * Redistributions in binary form must reproduce the above |  | 
| 11  * copyright notice, this list of conditions and the following disclaimer |  | 
| 12  * in the documentation and/or other materials provided with the |  | 
| 13  * distribution. |  | 
| 14  *     * Neither the name of Google Inc. nor the names of its |  | 
| 15  * contributors may be used to endorse or promote products derived from |  | 
| 16  * this software without specific prior written permission. |  | 
| 17  * |  | 
| 18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |  | 
| 19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |  | 
| 20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |  | 
| 21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |  | 
| 22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |  | 
| 23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |  | 
| 24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |  | 
| 25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |  | 
| 26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |  | 
| 27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |  | 
| 28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |  | 
| 29  */ |  | 
| 30 |  | 
| 31 #include "config.h" |  | 
| 32 #include "core/platform/graphics/chromium/FontFallbackWin.h" |  | 
| 33 |  | 
| 34 #include "platform/win/HWndDC.h" |  | 
| 35 #include "wtf/HashMap.h" |  | 
| 36 #include "wtf/text/StringHash.h" |  | 
| 37 #include "wtf/text/WTFString.h" |  | 
| 38 #include <limits> |  | 
| 39 #include <unicode/locid.h> |  | 
| 40 #include <unicode/uchar.h> |  | 
| 41 |  | 
| 42 namespace WebCore { |  | 
| 43 |  | 
| 44 namespace { |  | 
| 45 |  | 
| 46 bool isFontPresent(const UChar* fontName) |  | 
| 47 { |  | 
| 48     HFONT hfont = CreateFont(12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, fontName); |  | 
| 49     if (!hfont) |  | 
| 50         return false; |  | 
| 51     HWndDC dc(0); |  | 
| 52     HGDIOBJ oldFont = static_cast<HFONT>(SelectObject(dc, hfont)); |  | 
| 53     WCHAR actualFontName[LF_FACESIZE]; |  | 
| 54     GetTextFace(dc, LF_FACESIZE, actualFontName); |  | 
| 55     actualFontName[LF_FACESIZE - 1] = 0; |  | 
| 56     SelectObject(dc, oldFont); |  | 
| 57     DeleteObject(hfont); |  | 
| 58     // We don't have to worry about East Asian fonts with locale-dependent |  | 
| 59     // names here for now. |  | 
| 60     // FIXME: Why not? |  | 
| 61     return !wcscmp(fontName, actualFontName); |  | 
| 62 } |  | 
| 63 |  | 
| 64 // A simple mapping from UScriptCode to family name. This is a sparse array, |  | 
| 65 // which works well since the range of UScriptCode values is small. |  | 
| 66 typedef const UChar* ScriptToFontMap[USCRIPT_CODE_LIMIT]; |  | 
| 67 |  | 
| 68 void initializeScriptFontMap(ScriptToFontMap& scriptFontMap) |  | 
| 69 { |  | 
| 70     struct FontMap { |  | 
| 71         UScriptCode script; |  | 
| 72         const UChar* family; |  | 
| 73     }; |  | 
| 74 |  | 
| 75     static const FontMap fontMap[] = { |  | 
| 76         {USCRIPT_LATIN, L"times new roman"}, |  | 
| 77         {USCRIPT_GREEK, L"times new roman"}, |  | 
| 78         {USCRIPT_CYRILLIC, L"times new roman"}, |  | 
| 79         // FIXME: Consider trying new Vista fonts before XP fonts for CJK. |  | 
| 80         // Some Vista users do want to use Vista cleartype CJK fonts. If we |  | 
| 81         // did, the results of tests with CJK characters would have to be |  | 
| 82         // regenerated for Vista. |  | 
| 83         {USCRIPT_SIMPLIFIED_HAN, L"simsun"}, |  | 
| 84         {USCRIPT_TRADITIONAL_HAN, L"pmingliu"}, |  | 
| 85         {USCRIPT_HIRAGANA, L"ms pgothic"}, |  | 
| 86         {USCRIPT_KATAKANA, L"ms pgothic"}, |  | 
| 87         {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"}, |  | 
| 88         {USCRIPT_HANGUL, L"gulim"}, |  | 
| 89         {USCRIPT_THAI, L"tahoma"}, |  | 
| 90         {USCRIPT_HEBREW, L"david"}, |  | 
| 91         {USCRIPT_ARABIC, L"tahoma"}, |  | 
| 92         {USCRIPT_DEVANAGARI, L"mangal"}, |  | 
| 93         {USCRIPT_BENGALI, L"vrinda"}, |  | 
| 94         {USCRIPT_GURMUKHI, L"raavi"}, |  | 
| 95         {USCRIPT_GUJARATI, L"shruti"}, |  | 
| 96         {USCRIPT_TAMIL, L"latha"}, |  | 
| 97         {USCRIPT_TELUGU, L"gautami"}, |  | 
| 98         {USCRIPT_KANNADA, L"tunga"}, |  | 
| 99         {USCRIPT_GEORGIAN, L"sylfaen"}, |  | 
| 100         {USCRIPT_ARMENIAN, L"sylfaen"}, |  | 
| 101         {USCRIPT_THAANA, L"mv boli"}, |  | 
| 102         {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"}, |  | 
| 103         {USCRIPT_CHEROKEE, L"plantagenet cherokee"}, |  | 
| 104         {USCRIPT_MONGOLIAN, L"mongolian balti"}, |  | 
| 105         // For USCRIPT_COMMON, we map blocks to scripts when |  | 
| 106         // that makes sense. |  | 
| 107     }; |  | 
| 108 |  | 
| 109     struct ScriptToFontFamilies { |  | 
| 110         UScriptCode script; |  | 
| 111         const UChar** families; |  | 
| 112     }; |  | 
| 113 |  | 
| 114     // Kartika on Vista or earlier lacks the support for Chillu |  | 
| 115     // letters added to Unicode 5.1. |  | 
| 116     // Try AnjaliOldLipi (a very widely used Malaylalam font with the full |  | 
| 117     // Unicode 5.x support) before falling back to Kartika. |  | 
| 118     static const UChar* malayalamFonts[] = {L"AnjaliOldLipi", L"Lohit Malayalam"
     , L"Kartika", L"Rachana", 0}; |  | 
| 119     // Try Khmer OS before Vista fonts because 'Khmer OS' goes along better |  | 
| 120     // with Latin and looks better/larger for the same size. |  | 
| 121     static const UChar* khmerFonts[] = {L"Khmer OS", L"MoolBoran", L"DaunPenh", 
     L"Code2000", 0}; |  | 
| 122     // For the following 6 scripts, two or fonts are listed. The fonts in |  | 
| 123     // the 1st slot are not available on Windows XP. To support these |  | 
| 124     // scripts on XP, listed in the rest of slots are widely used |  | 
| 125     // fonts. |  | 
| 126     static const UChar* ethiopicFonts[] = {L"Nyala", L"Abyssinica SIL", L"Ethiop
     ia Jiret", L"Visual Geez Unicode", L"GF Zemen Unicode", 0}; |  | 
| 127     static const UChar* oriyaFonts[] = {L"Kalinga", L"ori1Uni", L"Lohit Oriya", 
     0}; |  | 
| 128     static const UChar* laoFonts[] = {L"DokChampa", L"Saysettha OT", L"Phetsarat
     h OT", L"Code2000", 0}; |  | 
| 129     static const UChar* tibetanFonts[] = {L"Microsoft Himalaya", L"Jomolhari", L
     "Tibetan Machine Uni", 0}; |  | 
| 130     static const UChar* sinhalaFonts[] = {L"Iskoola Pota", L"AksharUnicode", 0}; |  | 
| 131     static const UChar* yiFonts[] = {L"Microsoft Yi Balti", L"Nuosu SIL", L"Code
     2000", 0}; |  | 
| 132     // http://www.bethmardutho.org/support/meltho/download/index.php |  | 
| 133     static const UChar* syriacFonts[] = {L"Estrangelo Edessa", L"Estrangelo Nisi
     bin", L"Code2000", 0}; |  | 
| 134     // No Myanmar/Burmese font is shipped with Windows, yet. Try a few |  | 
| 135     // widely available/used ones that supports Unicode 5.1 or later. |  | 
| 136     static const UChar* myanmarFonts[] = {L"Padauk", L"Parabaik", L"Myanmar3", L
     "Code2000", 0}; |  | 
| 137 |  | 
| 138     static const ScriptToFontFamilies scriptToFontFamilies[] = { |  | 
| 139         {USCRIPT_MALAYALAM, malayalamFonts}, |  | 
| 140         {USCRIPT_KHMER, khmerFonts}, |  | 
| 141         {USCRIPT_ETHIOPIC, ethiopicFonts}, |  | 
| 142         {USCRIPT_ORIYA, oriyaFonts}, |  | 
| 143         {USCRIPT_LAO, laoFonts}, |  | 
| 144         {USCRIPT_TIBETAN, tibetanFonts}, |  | 
| 145         {USCRIPT_SINHALA, sinhalaFonts}, |  | 
| 146         {USCRIPT_YI, yiFonts}, |  | 
| 147         {USCRIPT_SYRIAC, syriacFonts}, |  | 
| 148         {USCRIPT_MYANMAR, myanmarFonts}, |  | 
| 149     }; |  | 
| 150 |  | 
| 151     for (size_t i = 0; i < WTF_ARRAY_LENGTH(fontMap); ++i) |  | 
| 152         scriptFontMap[fontMap[i].script] = fontMap[i].family; |  | 
| 153 |  | 
| 154     // FIXME: Instead of scanning the hard-coded list, we have to |  | 
| 155     // use EnumFont* to 'inspect' fonts to pick up fonts covering scripts |  | 
| 156     // when it's possible (e.g. using OS/2 table). If we do that, this |  | 
| 157     // had better be pulled out of here. |  | 
| 158     for (size_t i = 0; i < WTF_ARRAY_LENGTH(scriptToFontFamilies); ++i) { |  | 
| 159         UScriptCode script = scriptToFontFamilies[i].script; |  | 
| 160         scriptFontMap[script] = 0; |  | 
| 161         const UChar** familyPtr = scriptToFontFamilies[i].families; |  | 
| 162         while (*familyPtr) { |  | 
| 163             if (isFontPresent(*familyPtr)) { |  | 
| 164                 scriptFontMap[script] = *familyPtr; |  | 
| 165                 break; |  | 
| 166             } |  | 
| 167             ++familyPtr; |  | 
| 168         } |  | 
| 169     } |  | 
| 170 |  | 
| 171     // Initialize the locale-dependent mapping. |  | 
| 172     // Since Chrome synchronizes the ICU default locale with its UI locale, |  | 
| 173     // this ICU locale tells the current UI locale of Chrome. |  | 
| 174     icu::Locale locale = icu::Locale::getDefault(); |  | 
| 175     const UChar* localeFamily = 0; |  | 
| 176     if (locale == icu::Locale::getJapanese()) { |  | 
| 177         localeFamily = scriptFontMap[USCRIPT_HIRAGANA]; |  | 
| 178     } else if (locale == icu::Locale::getKorean()) { |  | 
| 179         localeFamily = scriptFontMap[USCRIPT_HANGUL]; |  | 
| 180     } else if (locale == icu::Locale::getTraditionalChinese()) { |  | 
| 181         localeFamily = scriptFontMap[USCRIPT_TRADITIONAL_HAN]; |  | 
| 182     } else { |  | 
| 183         // For other locales, use the simplified Chinese font for Han. |  | 
| 184         localeFamily = scriptFontMap[USCRIPT_SIMPLIFIED_HAN]; |  | 
| 185     } |  | 
| 186     if (localeFamily) |  | 
| 187         scriptFontMap[USCRIPT_HAN] = localeFamily; |  | 
| 188 } |  | 
| 189 |  | 
| 190 // There are a lot of characters in USCRIPT_COMMON that can be covered |  | 
| 191 // by fonts for scripts closely related to them. See |  | 
| 192 // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:] |  | 
| 193 // FIXME: make this more efficient with a wider coverage |  | 
| 194 UScriptCode getScriptBasedOnUnicodeBlock(int ucs4) |  | 
| 195 { |  | 
| 196     UBlockCode block = ublock_getCode(ucs4); |  | 
| 197     switch (block) { |  | 
| 198     case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: |  | 
| 199         return USCRIPT_HAN; |  | 
| 200     case UBLOCK_HIRAGANA: |  | 
| 201     case UBLOCK_KATAKANA: |  | 
| 202         return USCRIPT_HIRAGANA; |  | 
| 203     case UBLOCK_ARABIC: |  | 
| 204         return USCRIPT_ARABIC; |  | 
| 205     case UBLOCK_THAI: |  | 
| 206         return USCRIPT_THAI; |  | 
| 207     case UBLOCK_GREEK: |  | 
| 208         return USCRIPT_GREEK; |  | 
| 209     case UBLOCK_DEVANAGARI: |  | 
| 210         // For Danda and Double Danda (U+0964, U+0965), use a Devanagari |  | 
| 211         // font for now although they're used by other scripts as well. |  | 
| 212         // Without a context, we can't do any better. |  | 
| 213         return USCRIPT_DEVANAGARI; |  | 
| 214     case UBLOCK_ARMENIAN: |  | 
| 215         return USCRIPT_ARMENIAN; |  | 
| 216     case UBLOCK_GEORGIAN: |  | 
| 217         return USCRIPT_GEORGIAN; |  | 
| 218     case UBLOCK_KANNADA: |  | 
| 219         return USCRIPT_KANNADA; |  | 
| 220     default: |  | 
| 221         return USCRIPT_COMMON; |  | 
| 222     } |  | 
| 223 } |  | 
| 224 |  | 
| 225 UScriptCode getScript(int ucs4) |  | 
| 226 { |  | 
| 227     UErrorCode err = U_ZERO_ERROR; |  | 
| 228     UScriptCode script = uscript_getScript(ucs4, &err); |  | 
| 229     // If script is invalid, common or inherited or there's an error, |  | 
| 230     // infer a script based on the unicode block of a character. |  | 
| 231     if (script <= USCRIPT_INHERITED || U_FAILURE(err)) |  | 
| 232         script = getScriptBasedOnUnicodeBlock(ucs4); |  | 
| 233     return script; |  | 
| 234 } |  | 
| 235 |  | 
| 236 } // namespace |  | 
| 237 |  | 
| 238 // FIXME: this is font fallback code version 0.1 |  | 
| 239 //  - Cover all the scripts |  | 
| 240 //  - Get the default font for each script/generic family from the |  | 
| 241 //    preference instead of hardcoding in the source. |  | 
| 242 //    (at least, read values from the registry for IE font settings). |  | 
| 243 //  - Support generic families (from FontDescription) |  | 
| 244 //  - If the default font for a script is not available, |  | 
| 245 //    try some more fonts known to support it. Finally, we can |  | 
| 246 //    use EnumFontFamilies or similar APIs to come up with a list of |  | 
| 247 //    fonts supporting the script and cache the result. |  | 
| 248 //  - Consider using UnicodeSet (or UnicodeMap) converted from |  | 
| 249 //    GLYPHSET (BMP) or directly read from truetype cmap tables to |  | 
| 250 //    keep track of which character is supported by which font |  | 
| 251 //  - Update script_font_cache in response to WM_FONTCHANGE |  | 
| 252 |  | 
| 253 const UChar* getFontFamilyForScript(UScriptCode script, |  | 
| 254     FontDescription::GenericFamilyType generic) |  | 
| 255 { |  | 
| 256     static ScriptToFontMap scriptFontMap; |  | 
| 257     static bool initialized = false; |  | 
| 258     if (!initialized) { |  | 
| 259         initializeScriptFontMap(scriptFontMap); |  | 
| 260         initialized = true; |  | 
| 261     } |  | 
| 262     if (script == USCRIPT_INVALID_CODE) |  | 
| 263         return 0; |  | 
| 264     ASSERT(script < USCRIPT_CODE_LIMIT); |  | 
| 265     return scriptFontMap[script]; |  | 
| 266 } |  | 
| 267 |  | 
| 268 // FIXME: |  | 
| 269 //  - Handle 'Inherited', 'Common' and 'Unknown' |  | 
| 270 //    (see http://www.unicode.org/reports/tr24/#Usage_Model ) |  | 
| 271 //    For 'Inherited' and 'Common', perhaps we need to |  | 
| 272 //    accept another parameter indicating the previous family |  | 
| 273 //    and just return it. |  | 
| 274 //  - All the characters (or characters up to the point a single |  | 
| 275 //    font can cover) need to be taken into account |  | 
| 276 const UChar* getFallbackFamily(UChar32 character, |  | 
| 277     FontDescription::GenericFamilyType generic, |  | 
| 278     UScriptCode* scriptChecked) |  | 
| 279 { |  | 
| 280     ASSERT(character); |  | 
| 281     UScriptCode script = getScript(character); |  | 
| 282 |  | 
| 283     // For the full-width ASCII characters (U+FF00 - U+FF5E), use the font for |  | 
| 284     // Han (determined in a locale-dependent way above). Full-width ASCII |  | 
| 285     // characters are rather widely used in Japanese and Chinese documents and |  | 
| 286     // they're fully covered by Chinese, Japanese and Korean fonts. |  | 
| 287     if (0xFF00 < character && character < 0xFF5F) |  | 
| 288         script = USCRIPT_HAN; |  | 
| 289 |  | 
| 290     if (script == USCRIPT_COMMON) |  | 
| 291         script = getScriptBasedOnUnicodeBlock(character); |  | 
| 292 |  | 
| 293     const UChar* family = getFontFamilyForScript(script, generic); |  | 
| 294     // Another lame work-around to cover non-BMP characters. |  | 
| 295     // If the font family for script is not found or the character is |  | 
| 296     // not in BMP (> U+FFFF), we resort to the hard-coded list of |  | 
| 297     // fallback fonts for now. |  | 
| 298     if (!family || character > 0xFFFF) { |  | 
| 299         int plane = character >> 16; |  | 
| 300         switch (plane) { |  | 
| 301         case 1: |  | 
| 302             family = L"code2001"; |  | 
| 303             break; |  | 
| 304         case 2: |  | 
| 305             // Use a Traditional Chinese ExtB font if in Traditional Chinese loc
     ale. |  | 
| 306             // Otherwise, use a Simplified Chinese ExtB font. Windows Japanese |  | 
| 307             // fonts do support a small subset of ExtB (that are included in JIS
      X 0213), |  | 
| 308             // but its coverage is rather sparse. |  | 
| 309             // Eventually, this should be controlled by lang/xml:lang. |  | 
| 310             if (icu::Locale::getDefault() == icu::Locale::getTraditionalChinese(
     )) |  | 
| 311                 family = L"pmingliu-extb"; |  | 
| 312             else |  | 
| 313                 family = L"simsun-extb"; |  | 
| 314             break; |  | 
| 315         default: |  | 
| 316             family = L"lucida sans unicode"; |  | 
| 317         } |  | 
| 318     } |  | 
| 319 |  | 
| 320     if (scriptChecked) |  | 
| 321         *scriptChecked = script; |  | 
| 322     return family; |  | 
| 323 } |  | 
| 324 |  | 
| 325 |  | 
| 326 const UChar* getFallbackFamilyForFirstNonCommonCharacter(const UChar* characters
     , |  | 
| 327     int length, |  | 
| 328     FontDescription::GenericFamilyType generic) |  | 
| 329 { |  | 
| 330     ASSERT(characters && characters[0] && length > 0); |  | 
| 331     UScriptCode script = USCRIPT_COMMON; |  | 
| 332 |  | 
| 333     // Sometimes characters common to script (e.g. space) is at |  | 
| 334     // the beginning of a string so that we need to skip them |  | 
| 335     // to get a font required to render the string. |  | 
| 336     int i = 0; |  | 
| 337     UChar32 ucs4 = 0; |  | 
| 338     while (i < length && script == USCRIPT_COMMON) { |  | 
| 339         U16_NEXT(characters, i, length, ucs4); |  | 
| 340         script = getScript(ucs4); |  | 
| 341     } |  | 
| 342 |  | 
| 343     const UChar* family = getFallbackFamily(ucs4, generic, 0); |  | 
| 344 |  | 
| 345     return family; |  | 
| 346 } |  | 
| 347 |  | 
| 348 } // namespace WebCore |  | 
| OLD | NEW | 
|---|