OLD | NEW |
| (Empty) |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "base/gfx/font_utils.h" | |
6 | |
7 #include <limits> | |
8 #include <map> | |
9 | |
10 #include "base/gfx/uniscribe.h" | |
11 #include "base/logging.h" | |
12 #include "base/singleton.h" | |
13 #include "base/string_util.h" | |
14 #include "unicode/locid.h" | |
15 #include "unicode/uchar.h" | |
16 | |
17 namespace gfx { | |
18 | |
19 namespace { | |
20 | |
21 // hash_map has extra cost with no sizable gain for a small number of integer | |
22 // key items. When the map size becomes much bigger (which will be later as | |
23 // more scripts are added) and this turns out to be prominent in the profile, we | |
24 // may consider switching to hash_map (or just an array if we support all the | |
25 // scripts) | |
26 typedef std::map<UScriptCode, const wchar_t*> ScriptToFontMap; | |
27 | |
28 struct ScriptToFontMapSingletonTraits | |
29 : public DefaultSingletonTraits<ScriptToFontMap> { | |
30 static ScriptToFontMap* New() { | |
31 struct FontMap { | |
32 UScriptCode script; | |
33 const wchar_t* family; | |
34 }; | |
35 | |
36 const static FontMap font_map[] = { | |
37 {USCRIPT_LATIN, L"times new roman"}, | |
38 {USCRIPT_GREEK, L"times new roman"}, | |
39 {USCRIPT_CYRILLIC, L"times new roman"}, | |
40 {USCRIPT_SIMPLIFIED_HAN, L"simsun"}, | |
41 //{USCRIPT_TRADITIONAL_HAN, L"pmingliu"}, | |
42 {USCRIPT_HIRAGANA, L"ms pgothic"}, | |
43 {USCRIPT_KATAKANA, L"ms pgothic"}, | |
44 {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"}, | |
45 {USCRIPT_HANGUL, L"gulim"}, | |
46 {USCRIPT_THAI, L"tahoma"}, | |
47 {USCRIPT_HEBREW, L"david"}, | |
48 {USCRIPT_ARABIC, L"tahoma"}, | |
49 {USCRIPT_DEVANAGARI, L"mangal"}, | |
50 {USCRIPT_BENGALI, L"vrinda"}, | |
51 {USCRIPT_GURMUKHI, L"raavi"}, | |
52 {USCRIPT_GUJARATI, L"shruti"}, | |
53 {USCRIPT_ORIYA, L"kalinga"}, | |
54 {USCRIPT_TAMIL, L"latha"}, | |
55 {USCRIPT_TELUGU, L"gautami"}, | |
56 {USCRIPT_KANNADA, L"tunga"}, | |
57 {USCRIPT_MALAYALAM, L"kartika"}, | |
58 {USCRIPT_LAO, L"dokchampa"}, | |
59 {USCRIPT_TIBETAN, L"microsoft himalaya"}, | |
60 {USCRIPT_GEORGIAN, L"sylfaen"}, | |
61 {USCRIPT_ARMENIAN, L"sylfaen"}, | |
62 {USCRIPT_ETHIOPIC, L"nyala"}, | |
63 {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"}, | |
64 {USCRIPT_CHEROKEE, L"plantagenet cherokee"}, | |
65 {USCRIPT_YI, L"microsoft yi balti"}, | |
66 {USCRIPT_SINHALA, L"iskoola pota"}, | |
67 {USCRIPT_SYRIAC, L"estrangelo edessa"}, | |
68 {USCRIPT_KHMER, L"daunpenh"}, | |
69 {USCRIPT_THAANA, L"mv boli"}, | |
70 {USCRIPT_MONGOLIAN, L"mongolian balti"}, | |
71 {USCRIPT_MYANMAR, L"padauk"}, | |
72 // For USCRIPT_COMMON, we map blocks to scripts when | |
73 // that makes sense. | |
74 }; | |
75 | |
76 ScriptToFontMap* new_instance = new ScriptToFontMap; | |
77 // Cannot recover from OOM so that there's no need to check. | |
78 for (int i = 0; i < arraysize(font_map); ++i) | |
79 (*new_instance)[font_map[i].script] = font_map[i].family; | |
80 | |
81 // Initialize the locale-dependent mapping. | |
82 // Since Chrome synchronizes the ICU default locale with its UI locale, | |
83 // this ICU locale tells the current UI locale of Chrome. | |
84 Locale locale = Locale::getDefault(); | |
85 ScriptToFontMap::const_iterator iter; | |
86 if (locale == Locale::getJapanese()) { | |
87 iter = new_instance->find(USCRIPT_HIRAGANA); | |
88 } else if (locale == Locale::getKorean()) { | |
89 iter = new_instance->find(USCRIPT_HANGUL); | |
90 } else { | |
91 // Use Simplified Chinese font for all other locales including | |
92 // Traditional Chinese because Simsun (SC font) has a wider | |
93 // coverage (covering both SC and TC) than PMingLiu (TC font). | |
94 // This also speeds up the TC version of Chrome when rendering SC pages. | |
95 iter = new_instance->find(USCRIPT_SIMPLIFIED_HAN); | |
96 } | |
97 if (iter != new_instance->end()) | |
98 (*new_instance)[USCRIPT_HAN] = iter->second; | |
99 | |
100 return new_instance; | |
101 } | |
102 }; | |
103 | |
104 Singleton<ScriptToFontMap, ScriptToFontMapSingletonTraits> script_font_map; | |
105 | |
106 const int kUndefinedAscent = std::numeric_limits<int>::min(); | |
107 | |
108 // Given an HFONT, return the ascent. If GetTextMetrics fails, | |
109 // kUndefinedAscent is returned, instead. | |
110 int GetAscent(HFONT hfont) { | |
111 HDC dc = GetDC(NULL); | |
112 HGDIOBJ oldFont = SelectObject(dc, hfont); | |
113 TEXTMETRIC tm; | |
114 BOOL got_metrics = GetTextMetrics(dc, &tm); | |
115 SelectObject(dc, oldFont); | |
116 ReleaseDC(NULL, dc); | |
117 return got_metrics ? tm.tmAscent : kUndefinedAscent; | |
118 } | |
119 | |
120 struct FontData { | |
121 FontData() : hfont(NULL), ascent(kUndefinedAscent), script_cache(NULL) {} | |
122 HFONT hfont; | |
123 int ascent; | |
124 mutable SCRIPT_CACHE script_cache; | |
125 }; | |
126 | |
127 // Again, using hash_map does not earn us much here. | |
128 // page_cycler_test intl2 gave us a 'better' result with map than with hash_map | |
129 // even though they're well-within 1-sigma of each other so that the difference | |
130 // is not significant. On the other hand, some pages in intl2 seem to | |
131 // take longer to load with map in the 1st pass. Need to experiment further. | |
132 typedef std::map<std::wstring, FontData*> FontDataCache; | |
133 struct FontDataCacheSingletonTraits | |
134 : public DefaultSingletonTraits<FontDataCache> { | |
135 static void Delete(FontDataCache* cache) { | |
136 FontDataCache::iterator iter = cache->begin(); | |
137 while (iter != cache->end()) { | |
138 SCRIPT_CACHE script_cache = iter->second->script_cache; | |
139 if (script_cache) | |
140 ScriptFreeCache(&script_cache); | |
141 delete iter->second; | |
142 ++iter; | |
143 } | |
144 delete cache; | |
145 } | |
146 }; | |
147 | |
148 } // namespace | |
149 | |
150 // TODO(jungshik) : this is font fallback code version 0.1 | |
151 // - Cover all the scripts | |
152 // - Get the default font for each script/generic family from the | |
153 // preference instead of hardcoding in the source. | |
154 // (at least, read values from the registry for IE font settings). | |
155 // - Support generic families (from FontDescription) | |
156 // - If the default font for a script is not available, | |
157 // try some more fonts known to support it. Finally, we can | |
158 // use EnumFontFamilies or similar APIs to come up with a list of | |
159 // fonts supporting the script and cache the result. | |
160 // - Consider using UnicodeSet (or UnicodeMap) converted from | |
161 // GLYPHSET (BMP) or directly read from truetype cmap tables to | |
162 // keep track of which character is supported by which font | |
163 // - Update script_font_cache in response to WM_FONTCHANGE | |
164 | |
165 const wchar_t* GetFontFamilyForScript(UScriptCode script, | |
166 GenericFamilyType generic) { | |
167 ScriptToFontMap::const_iterator iter = script_font_map->find(script); | |
168 const wchar_t* family = NULL; | |
169 if (iter != script_font_map->end()) { | |
170 family = iter->second; | |
171 } | |
172 return family; | |
173 } | |
174 | |
175 // TODO(jungshik) | |
176 // - Handle 'Inherited', 'Common' and 'Unknown' | |
177 // (see http://www.unicode.org/reports/tr24/#Usage_Model ) | |
178 // For 'Inherited' and 'Common', perhaps we need to | |
179 // accept another parameter indicating the previous family | |
180 // and just return it. | |
181 // - All the characters (or characters up to the point a single | |
182 // font can cover) need to be taken into account | |
183 const wchar_t* GetFallbackFamily(const wchar_t *characters, | |
184 int length, | |
185 GenericFamilyType generic, | |
186 UChar32 *char_checked, | |
187 UScriptCode *script_checked) { | |
188 DCHECK(characters && characters[0] && length > 0); | |
189 UScriptCode script = USCRIPT_COMMON; | |
190 | |
191 // Sometimes characters common to script (e.g. space) is at | |
192 // the beginning of a string so that we need to skip them | |
193 // to get a font required to render the string. | |
194 int i = 0; | |
195 UChar32 ucs4 = 0; | |
196 while (i < length && script == USCRIPT_COMMON || | |
197 script == USCRIPT_INVALID_CODE) { | |
198 U16_NEXT(characters, i, length, ucs4); | |
199 UErrorCode err = U_ZERO_ERROR; | |
200 script = uscript_getScript(ucs4, &err); | |
201 // silently ignore the error | |
202 } | |
203 | |
204 // hack for full width ASCII. For the full-width ASCII, use the font | |
205 // for Han (which is locale-dependent). | |
206 if (0xFF00 < ucs4 && ucs4 < 0xFF5F) | |
207 script = USCRIPT_HAN; | |
208 | |
209 // There are a lot of characters in USCRIPT_COMMON that can be covered | |
210 // by fonts for scripts closely related to them. | |
211 // See http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:] | |
212 // TODO(jungshik): make this more efficient with a wider coverage | |
213 if (script == USCRIPT_COMMON || script == USCRIPT_INHERITED) { | |
214 UBlockCode block = ublock_getCode(ucs4); | |
215 switch (block) { | |
216 case UBLOCK_BASIC_LATIN: | |
217 script = USCRIPT_LATIN; | |
218 break; | |
219 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: | |
220 script = USCRIPT_HAN; | |
221 break; | |
222 case UBLOCK_HIRAGANA: | |
223 case UBLOCK_KATAKANA: | |
224 script = USCRIPT_HIRAGANA; | |
225 break; | |
226 case UBLOCK_ARABIC: | |
227 script = USCRIPT_ARABIC; | |
228 break; | |
229 case UBLOCK_GREEK: | |
230 script = USCRIPT_GREEK; | |
231 break; | |
232 case UBLOCK_DEVANAGARI: | |
233 // For Danda and Double Danda (U+0964, U+0965), use a Devanagari | |
234 // font for now although they're used by other scripts as well. | |
235 // Without a context, we can't do any better. | |
236 script = USCRIPT_DEVANAGARI; | |
237 break; | |
238 case UBLOCK_ARMENIAN: | |
239 script = USCRIPT_ARMENIAN; | |
240 break; | |
241 case UBLOCK_GEORGIAN: | |
242 script = USCRIPT_GEORGIAN; | |
243 break; | |
244 case UBLOCK_KANNADA: | |
245 script = USCRIPT_KANNADA; | |
246 break; | |
247 } | |
248 } | |
249 | |
250 // Another lame work-around to cover non-BMP characters. | |
251 const wchar_t* family = GetFontFamilyForScript(script, generic); | |
252 if (!family) { | |
253 int plane = ucs4 >> 16; | |
254 switch (plane) { | |
255 case 1: | |
256 family = L"code2001"; | |
257 break; | |
258 case 2: | |
259 family = L"simsun-extb"; | |
260 break; | |
261 default: | |
262 family = L"lucida sans unicode"; | |
263 } | |
264 } | |
265 | |
266 if (char_checked) *char_checked = ucs4; | |
267 if (script_checked) *script_checked = script; | |
268 return family; | |
269 } | |
270 | |
271 | |
272 | |
273 // Be aware that this is not thread-safe. | |
274 bool GetDerivedFontData(const wchar_t *family, | |
275 int style, | |
276 LOGFONT *logfont, | |
277 int *ascent, | |
278 HFONT *hfont, | |
279 SCRIPT_CACHE **script_cache) { | |
280 DCHECK(logfont && family && *family); | |
281 // Using |Singleton| here is not free, but the intl2 page cycler test | |
282 // does not show any noticeable difference with and without it. Leaking | |
283 // the contents of FontDataCache (especially SCRIPT_CACHE) at the end | |
284 // of a renderer process may not be a good idea. We may use | |
285 // atexit(). However, with no noticeable performance difference, |Singleton| | |
286 // is cleaner, I believe. | |
287 FontDataCache* font_data_cache = | |
288 Singleton<FontDataCache, FontDataCacheSingletonTraits>::get(); | |
289 // TODO(jungshik) : This comes up pretty high in the profile so that | |
290 // we need to measure whether using SHA256 (after coercing all the | |
291 // fields to char*) is faster than StringPrintf. | |
292 std::wstring font_key = StringPrintf(L"%1d:%d:%ls", style, logfont->lfHeight, | |
293 family); | |
294 FontDataCache::const_iterator iter = font_data_cache->find(font_key); | |
295 FontData *derived; | |
296 if (iter == font_data_cache->end()) { | |
297 DCHECK(wcslen(family) < LF_FACESIZE); | |
298 wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family); | |
299 // TODO(jungshik): CreateFontIndirect always comes up with | |
300 // a font even if there's no font matching the name. Need to | |
301 // check it against what we actually want (as is done in FontCacheWin.cpp) | |
302 derived = new FontData; | |
303 derived->hfont = CreateFontIndirect(logfont); | |
304 // GetAscent may return kUndefinedAscent, but we still want to | |
305 // cache it so that we won't have to call CreateFontIndirect once | |
306 // more for HFONT next time. | |
307 derived->ascent = GetAscent(derived->hfont); | |
308 (*font_data_cache)[font_key] = derived; | |
309 } else { | |
310 derived = iter->second; | |
311 // Last time, GetAscent failed so that only HFONT was | |
312 // cached. Try once more assuming that TryPreloadFont | |
313 // was called by a caller between calls. | |
314 if (kUndefinedAscent == derived->ascent) | |
315 derived->ascent = GetAscent(derived->hfont); | |
316 } | |
317 *hfont = derived->hfont; | |
318 *ascent = derived->ascent; | |
319 *script_cache = &(derived->script_cache); | |
320 return *ascent != kUndefinedAscent; | |
321 } | |
322 | |
323 int GetStyleFromLogfont(const LOGFONT* logfont) { | |
324 // TODO(jungshik) : consider defining UNDEFINED or INVALID for style and | |
325 // returning it when logfont is NULL | |
326 if (!logfont) { | |
327 NOTREACHED(); | |
328 return FONT_STYLE_NORMAL; | |
329 } | |
330 return (logfont->lfItalic ? FONT_STYLE_ITALIC : FONT_STYLE_NORMAL) | | |
331 (logfont->lfUnderline ? FONT_STYLE_UNDERLINED : FONT_STYLE_NORMAL) | | |
332 (logfont->lfWeight >= 700 ? FONT_STYLE_BOLD : FONT_STYLE_NORMAL); | |
333 } | |
334 | |
335 } // namespace gfx | |
336 | |
OLD | NEW |