| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/gfx/font_utils.h" | |
| 6 | |
| 7 #include <limits> | |
| 8 #include <map> | |
| 9 | |
| 10 #include "base/gfx/uniscribe.h" | |
| 11 #include "base/logging.h" | |
| 12 #include "base/singleton.h" | |
| 13 #include "base/string_util.h" | |
| 14 #include "unicode/locid.h" | |
| 15 #include "unicode/uchar.h" | |
| 16 | |
| 17 namespace gfx { | |
| 18 | |
| 19 namespace { | |
| 20 | |
| 21 // hash_map has extra cost with no sizable gain for a small number of integer | |
| 22 // key items. When the map size becomes much bigger (which will be later as | |
| 23 // more scripts are added) and this turns out to be prominent in the profile, we | |
| 24 // may consider switching to hash_map (or just an array if we support all the | |
| 25 // scripts) | |
| 26 typedef std::map<UScriptCode, const wchar_t*> ScriptToFontMap; | |
| 27 | |
| 28 struct ScriptToFontMapSingletonTraits | |
| 29 : public DefaultSingletonTraits<ScriptToFontMap> { | |
| 30 static ScriptToFontMap* New() { | |
| 31 struct FontMap { | |
| 32 UScriptCode script; | |
| 33 const wchar_t* family; | |
| 34 }; | |
| 35 | |
| 36 const static FontMap font_map[] = { | |
| 37 {USCRIPT_LATIN, L"times new roman"}, | |
| 38 {USCRIPT_GREEK, L"times new roman"}, | |
| 39 {USCRIPT_CYRILLIC, L"times new roman"}, | |
| 40 {USCRIPT_SIMPLIFIED_HAN, L"simsun"}, | |
| 41 //{USCRIPT_TRADITIONAL_HAN, L"pmingliu"}, | |
| 42 {USCRIPT_HIRAGANA, L"ms pgothic"}, | |
| 43 {USCRIPT_KATAKANA, L"ms pgothic"}, | |
| 44 {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"}, | |
| 45 {USCRIPT_HANGUL, L"gulim"}, | |
| 46 {USCRIPT_THAI, L"tahoma"}, | |
| 47 {USCRIPT_HEBREW, L"david"}, | |
| 48 {USCRIPT_ARABIC, L"tahoma"}, | |
| 49 {USCRIPT_DEVANAGARI, L"mangal"}, | |
| 50 {USCRIPT_BENGALI, L"vrinda"}, | |
| 51 {USCRIPT_GURMUKHI, L"raavi"}, | |
| 52 {USCRIPT_GUJARATI, L"shruti"}, | |
| 53 {USCRIPT_ORIYA, L"kalinga"}, | |
| 54 {USCRIPT_TAMIL, L"latha"}, | |
| 55 {USCRIPT_TELUGU, L"gautami"}, | |
| 56 {USCRIPT_KANNADA, L"tunga"}, | |
| 57 {USCRIPT_MALAYALAM, L"kartika"}, | |
| 58 {USCRIPT_LAO, L"dokchampa"}, | |
| 59 {USCRIPT_TIBETAN, L"microsoft himalaya"}, | |
| 60 {USCRIPT_GEORGIAN, L"sylfaen"}, | |
| 61 {USCRIPT_ARMENIAN, L"sylfaen"}, | |
| 62 {USCRIPT_ETHIOPIC, L"nyala"}, | |
| 63 {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"}, | |
| 64 {USCRIPT_CHEROKEE, L"plantagenet cherokee"}, | |
| 65 {USCRIPT_YI, L"microsoft yi balti"}, | |
| 66 {USCRIPT_SINHALA, L"iskoola pota"}, | |
| 67 {USCRIPT_SYRIAC, L"estrangelo edessa"}, | |
| 68 {USCRIPT_KHMER, L"daunpenh"}, | |
| 69 {USCRIPT_THAANA, L"mv boli"}, | |
| 70 {USCRIPT_MONGOLIAN, L"mongolian balti"}, | |
| 71 {USCRIPT_MYANMAR, L"padauk"}, | |
| 72 // For USCRIPT_COMMON, we map blocks to scripts when | |
| 73 // that makes sense. | |
| 74 }; | |
| 75 | |
| 76 ScriptToFontMap* new_instance = new ScriptToFontMap; | |
| 77 // Cannot recover from OOM so that there's no need to check. | |
| 78 for (int i = 0; i < arraysize(font_map); ++i) | |
| 79 (*new_instance)[font_map[i].script] = font_map[i].family; | |
| 80 | |
| 81 // Initialize the locale-dependent mapping. | |
| 82 // Since Chrome synchronizes the ICU default locale with its UI locale, | |
| 83 // this ICU locale tells the current UI locale of Chrome. | |
| 84 Locale locale = Locale::getDefault(); | |
| 85 ScriptToFontMap::const_iterator iter; | |
| 86 if (locale == Locale::getJapanese()) { | |
| 87 iter = new_instance->find(USCRIPT_HIRAGANA); | |
| 88 } else if (locale == Locale::getKorean()) { | |
| 89 iter = new_instance->find(USCRIPT_HANGUL); | |
| 90 } else { | |
| 91 // Use Simplified Chinese font for all other locales including | |
| 92 // Traditional Chinese because Simsun (SC font) has a wider | |
| 93 // coverage (covering both SC and TC) than PMingLiu (TC font). | |
| 94 // This also speeds up the TC version of Chrome when rendering SC pages. | |
| 95 iter = new_instance->find(USCRIPT_SIMPLIFIED_HAN); | |
| 96 } | |
| 97 if (iter != new_instance->end()) | |
| 98 (*new_instance)[USCRIPT_HAN] = iter->second; | |
| 99 | |
| 100 return new_instance; | |
| 101 } | |
| 102 }; | |
| 103 | |
| 104 Singleton<ScriptToFontMap, ScriptToFontMapSingletonTraits> script_font_map; | |
| 105 | |
| 106 const int kUndefinedAscent = std::numeric_limits<int>::min(); | |
| 107 | |
| 108 // Given an HFONT, return the ascent. If GetTextMetrics fails, | |
| 109 // kUndefinedAscent is returned, instead. | |
| 110 int GetAscent(HFONT hfont) { | |
| 111 HDC dc = GetDC(NULL); | |
| 112 HGDIOBJ oldFont = SelectObject(dc, hfont); | |
| 113 TEXTMETRIC tm; | |
| 114 BOOL got_metrics = GetTextMetrics(dc, &tm); | |
| 115 SelectObject(dc, oldFont); | |
| 116 ReleaseDC(NULL, dc); | |
| 117 return got_metrics ? tm.tmAscent : kUndefinedAscent; | |
| 118 } | |
| 119 | |
| 120 struct FontData { | |
| 121 FontData() : hfont(NULL), ascent(kUndefinedAscent), script_cache(NULL) {} | |
| 122 HFONT hfont; | |
| 123 int ascent; | |
| 124 mutable SCRIPT_CACHE script_cache; | |
| 125 }; | |
| 126 | |
| 127 // Again, using hash_map does not earn us much here. | |
| 128 // page_cycler_test intl2 gave us a 'better' result with map than with hash_map | |
| 129 // even though they're well-within 1-sigma of each other so that the difference | |
| 130 // is not significant. On the other hand, some pages in intl2 seem to | |
| 131 // take longer to load with map in the 1st pass. Need to experiment further. | |
| 132 typedef std::map<std::wstring, FontData*> FontDataCache; | |
| 133 struct FontDataCacheSingletonTraits | |
| 134 : public DefaultSingletonTraits<FontDataCache> { | |
| 135 static void Delete(FontDataCache* cache) { | |
| 136 FontDataCache::iterator iter = cache->begin(); | |
| 137 while (iter != cache->end()) { | |
| 138 SCRIPT_CACHE script_cache = iter->second->script_cache; | |
| 139 if (script_cache) | |
| 140 ScriptFreeCache(&script_cache); | |
| 141 delete iter->second; | |
| 142 ++iter; | |
| 143 } | |
| 144 delete cache; | |
| 145 } | |
| 146 }; | |
| 147 | |
| 148 } // namespace | |
| 149 | |
| 150 // TODO(jungshik) : this is font fallback code version 0.1 | |
| 151 // - Cover all the scripts | |
| 152 // - Get the default font for each script/generic family from the | |
| 153 // preference instead of hardcoding in the source. | |
| 154 // (at least, read values from the registry for IE font settings). | |
| 155 // - Support generic families (from FontDescription) | |
| 156 // - If the default font for a script is not available, | |
| 157 // try some more fonts known to support it. Finally, we can | |
| 158 // use EnumFontFamilies or similar APIs to come up with a list of | |
| 159 // fonts supporting the script and cache the result. | |
| 160 // - Consider using UnicodeSet (or UnicodeMap) converted from | |
| 161 // GLYPHSET (BMP) or directly read from truetype cmap tables to | |
| 162 // keep track of which character is supported by which font | |
| 163 // - Update script_font_cache in response to WM_FONTCHANGE | |
| 164 | |
| 165 const wchar_t* GetFontFamilyForScript(UScriptCode script, | |
| 166 GenericFamilyType generic) { | |
| 167 ScriptToFontMap::const_iterator iter = script_font_map->find(script); | |
| 168 const wchar_t* family = NULL; | |
| 169 if (iter != script_font_map->end()) { | |
| 170 family = iter->second; | |
| 171 } | |
| 172 return family; | |
| 173 } | |
| 174 | |
| 175 // TODO(jungshik) | |
| 176 // - Handle 'Inherited', 'Common' and 'Unknown' | |
| 177 // (see http://www.unicode.org/reports/tr24/#Usage_Model ) | |
| 178 // For 'Inherited' and 'Common', perhaps we need to | |
| 179 // accept another parameter indicating the previous family | |
| 180 // and just return it. | |
| 181 // - All the characters (or characters up to the point a single | |
| 182 // font can cover) need to be taken into account | |
| 183 const wchar_t* GetFallbackFamily(const wchar_t *characters, | |
| 184 int length, | |
| 185 GenericFamilyType generic, | |
| 186 UChar32 *char_checked, | |
| 187 UScriptCode *script_checked) { | |
| 188 DCHECK(characters && characters[0] && length > 0); | |
| 189 UScriptCode script = USCRIPT_COMMON; | |
| 190 | |
| 191 // Sometimes characters common to script (e.g. space) is at | |
| 192 // the beginning of a string so that we need to skip them | |
| 193 // to get a font required to render the string. | |
| 194 int i = 0; | |
| 195 UChar32 ucs4 = 0; | |
| 196 while (i < length && script == USCRIPT_COMMON || | |
| 197 script == USCRIPT_INVALID_CODE) { | |
| 198 U16_NEXT(characters, i, length, ucs4); | |
| 199 UErrorCode err = U_ZERO_ERROR; | |
| 200 script = uscript_getScript(ucs4, &err); | |
| 201 // silently ignore the error | |
| 202 } | |
| 203 | |
| 204 // hack for full width ASCII. For the full-width ASCII, use the font | |
| 205 // for Han (which is locale-dependent). | |
| 206 if (0xFF00 < ucs4 && ucs4 < 0xFF5F) | |
| 207 script = USCRIPT_HAN; | |
| 208 | |
| 209 // There are a lot of characters in USCRIPT_COMMON that can be covered | |
| 210 // by fonts for scripts closely related to them. | |
| 211 // See http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:] | |
| 212 // TODO(jungshik): make this more efficient with a wider coverage | |
| 213 if (script == USCRIPT_COMMON || script == USCRIPT_INHERITED) { | |
| 214 UBlockCode block = ublock_getCode(ucs4); | |
| 215 switch (block) { | |
| 216 case UBLOCK_BASIC_LATIN: | |
| 217 script = USCRIPT_LATIN; | |
| 218 break; | |
| 219 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: | |
| 220 script = USCRIPT_HAN; | |
| 221 break; | |
| 222 case UBLOCK_HIRAGANA: | |
| 223 case UBLOCK_KATAKANA: | |
| 224 script = USCRIPT_HIRAGANA; | |
| 225 break; | |
| 226 case UBLOCK_ARABIC: | |
| 227 script = USCRIPT_ARABIC; | |
| 228 break; | |
| 229 case UBLOCK_GREEK: | |
| 230 script = USCRIPT_GREEK; | |
| 231 break; | |
| 232 case UBLOCK_DEVANAGARI: | |
| 233 // For Danda and Double Danda (U+0964, U+0965), use a Devanagari | |
| 234 // font for now although they're used by other scripts as well. | |
| 235 // Without a context, we can't do any better. | |
| 236 script = USCRIPT_DEVANAGARI; | |
| 237 break; | |
| 238 case UBLOCK_ARMENIAN: | |
| 239 script = USCRIPT_ARMENIAN; | |
| 240 break; | |
| 241 case UBLOCK_GEORGIAN: | |
| 242 script = USCRIPT_GEORGIAN; | |
| 243 break; | |
| 244 case UBLOCK_KANNADA: | |
| 245 script = USCRIPT_KANNADA; | |
| 246 break; | |
| 247 } | |
| 248 } | |
| 249 | |
| 250 // Another lame work-around to cover non-BMP characters. | |
| 251 const wchar_t* family = GetFontFamilyForScript(script, generic); | |
| 252 if (!family) { | |
| 253 int plane = ucs4 >> 16; | |
| 254 switch (plane) { | |
| 255 case 1: | |
| 256 family = L"code2001"; | |
| 257 break; | |
| 258 case 2: | |
| 259 family = L"simsun-extb"; | |
| 260 break; | |
| 261 default: | |
| 262 family = L"lucida sans unicode"; | |
| 263 } | |
| 264 } | |
| 265 | |
| 266 if (char_checked) *char_checked = ucs4; | |
| 267 if (script_checked) *script_checked = script; | |
| 268 return family; | |
| 269 } | |
| 270 | |
| 271 | |
| 272 | |
| 273 // Be aware that this is not thread-safe. | |
| 274 bool GetDerivedFontData(const wchar_t *family, | |
| 275 int style, | |
| 276 LOGFONT *logfont, | |
| 277 int *ascent, | |
| 278 HFONT *hfont, | |
| 279 SCRIPT_CACHE **script_cache) { | |
| 280 DCHECK(logfont && family && *family); | |
| 281 // Using |Singleton| here is not free, but the intl2 page cycler test | |
| 282 // does not show any noticeable difference with and without it. Leaking | |
| 283 // the contents of FontDataCache (especially SCRIPT_CACHE) at the end | |
| 284 // of a renderer process may not be a good idea. We may use | |
| 285 // atexit(). However, with no noticeable performance difference, |Singleton| | |
| 286 // is cleaner, I believe. | |
| 287 FontDataCache* font_data_cache = | |
| 288 Singleton<FontDataCache, FontDataCacheSingletonTraits>::get(); | |
| 289 // TODO(jungshik) : This comes up pretty high in the profile so that | |
| 290 // we need to measure whether using SHA256 (after coercing all the | |
| 291 // fields to char*) is faster than StringPrintf. | |
| 292 std::wstring font_key = StringPrintf(L"%1d:%d:%ls", style, logfont->lfHeight, | |
| 293 family); | |
| 294 FontDataCache::const_iterator iter = font_data_cache->find(font_key); | |
| 295 FontData *derived; | |
| 296 if (iter == font_data_cache->end()) { | |
| 297 DCHECK(wcslen(family) < LF_FACESIZE); | |
| 298 wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family); | |
| 299 // TODO(jungshik): CreateFontIndirect always comes up with | |
| 300 // a font even if there's no font matching the name. Need to | |
| 301 // check it against what we actually want (as is done in FontCacheWin.cpp) | |
| 302 derived = new FontData; | |
| 303 derived->hfont = CreateFontIndirect(logfont); | |
| 304 // GetAscent may return kUndefinedAscent, but we still want to | |
| 305 // cache it so that we won't have to call CreateFontIndirect once | |
| 306 // more for HFONT next time. | |
| 307 derived->ascent = GetAscent(derived->hfont); | |
| 308 (*font_data_cache)[font_key] = derived; | |
| 309 } else { | |
| 310 derived = iter->second; | |
| 311 // Last time, GetAscent failed so that only HFONT was | |
| 312 // cached. Try once more assuming that TryPreloadFont | |
| 313 // was called by a caller between calls. | |
| 314 if (kUndefinedAscent == derived->ascent) | |
| 315 derived->ascent = GetAscent(derived->hfont); | |
| 316 } | |
| 317 *hfont = derived->hfont; | |
| 318 *ascent = derived->ascent; | |
| 319 *script_cache = &(derived->script_cache); | |
| 320 return *ascent != kUndefinedAscent; | |
| 321 } | |
| 322 | |
| 323 int GetStyleFromLogfont(const LOGFONT* logfont) { | |
| 324 // TODO(jungshik) : consider defining UNDEFINED or INVALID for style and | |
| 325 // returning it when logfont is NULL | |
| 326 if (!logfont) { | |
| 327 NOTREACHED(); | |
| 328 return FONT_STYLE_NORMAL; | |
| 329 } | |
| 330 return (logfont->lfItalic ? FONT_STYLE_ITALIC : FONT_STYLE_NORMAL) | | |
| 331 (logfont->lfUnderline ? FONT_STYLE_UNDERLINED : FONT_STYLE_NORMAL) | | |
| 332 (logfont->lfWeight >= 700 ? FONT_STYLE_BOLD : FONT_STYLE_NORMAL); | |
| 333 } | |
| 334 | |
| 335 } // namespace gfx | |
| 336 | |
| OLD | NEW |