Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(529)

Side by Side Diff: base/gfx/font_utils.cc

Issue 10785: Debase our Uniscribe code. This moves FontUtils and all our Uniscribe code fr... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/gfx/font_utils.h"
6
7 #include <limits>
8 #include <map>
9
10 #include "base/gfx/uniscribe.h"
11 #include "base/logging.h"
12 #include "base/singleton.h"
13 #include "base/string_util.h"
14 #include "unicode/locid.h"
15 #include "unicode/uchar.h"
16
17 namespace gfx {
18
19 namespace {
20
21 // hash_map has extra cost with no sizable gain for a small number of integer
22 // key items. When the map size becomes much bigger (which will be later as
23 // more scripts are added) and this turns out to be prominent in the profile, we
24 // may consider switching to hash_map (or just an array if we support all the
25 // scripts)
26 typedef std::map<UScriptCode, const wchar_t*> ScriptToFontMap;
27
28 struct ScriptToFontMapSingletonTraits
29 : public DefaultSingletonTraits<ScriptToFontMap> {
30 static ScriptToFontMap* New() {
31 struct FontMap {
32 UScriptCode script;
33 const wchar_t* family;
34 };
35
36 const static FontMap font_map[] = {
37 {USCRIPT_LATIN, L"times new roman"},
38 {USCRIPT_GREEK, L"times new roman"},
39 {USCRIPT_CYRILLIC, L"times new roman"},
40 {USCRIPT_SIMPLIFIED_HAN, L"simsun"},
41 //{USCRIPT_TRADITIONAL_HAN, L"pmingliu"},
42 {USCRIPT_HIRAGANA, L"ms pgothic"},
43 {USCRIPT_KATAKANA, L"ms pgothic"},
44 {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"},
45 {USCRIPT_HANGUL, L"gulim"},
46 {USCRIPT_THAI, L"tahoma"},
47 {USCRIPT_HEBREW, L"david"},
48 {USCRIPT_ARABIC, L"tahoma"},
49 {USCRIPT_DEVANAGARI, L"mangal"},
50 {USCRIPT_BENGALI, L"vrinda"},
51 {USCRIPT_GURMUKHI, L"raavi"},
52 {USCRIPT_GUJARATI, L"shruti"},
53 {USCRIPT_ORIYA, L"kalinga"},
54 {USCRIPT_TAMIL, L"latha"},
55 {USCRIPT_TELUGU, L"gautami"},
56 {USCRIPT_KANNADA, L"tunga"},
57 {USCRIPT_MALAYALAM, L"kartika"},
58 {USCRIPT_LAO, L"dokchampa"},
59 {USCRIPT_TIBETAN, L"microsoft himalaya"},
60 {USCRIPT_GEORGIAN, L"sylfaen"},
61 {USCRIPT_ARMENIAN, L"sylfaen"},
62 {USCRIPT_ETHIOPIC, L"nyala"},
63 {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"},
64 {USCRIPT_CHEROKEE, L"plantagenet cherokee"},
65 {USCRIPT_YI, L"microsoft yi balti"},
66 {USCRIPT_SINHALA, L"iskoola pota"},
67 {USCRIPT_SYRIAC, L"estrangelo edessa"},
68 {USCRIPT_KHMER, L"daunpenh"},
69 {USCRIPT_THAANA, L"mv boli"},
70 {USCRIPT_MONGOLIAN, L"mongolian balti"},
71 {USCRIPT_MYANMAR, L"padauk"},
72 // For USCRIPT_COMMON, we map blocks to scripts when
73 // that makes sense.
74 };
75
76 ScriptToFontMap* new_instance = new ScriptToFontMap;
77 // Cannot recover from OOM so that there's no need to check.
78 for (int i = 0; i < arraysize(font_map); ++i)
79 (*new_instance)[font_map[i].script] = font_map[i].family;
80
81 // Initialize the locale-dependent mapping.
82 // Since Chrome synchronizes the ICU default locale with its UI locale,
83 // this ICU locale tells the current UI locale of Chrome.
84 Locale locale = Locale::getDefault();
85 ScriptToFontMap::const_iterator iter;
86 if (locale == Locale::getJapanese()) {
87 iter = new_instance->find(USCRIPT_HIRAGANA);
88 } else if (locale == Locale::getKorean()) {
89 iter = new_instance->find(USCRIPT_HANGUL);
90 } else {
91 // Use Simplified Chinese font for all other locales including
92 // Traditional Chinese because Simsun (SC font) has a wider
93 // coverage (covering both SC and TC) than PMingLiu (TC font).
94 // This also speeds up the TC version of Chrome when rendering SC pages.
95 iter = new_instance->find(USCRIPT_SIMPLIFIED_HAN);
96 }
97 if (iter != new_instance->end())
98 (*new_instance)[USCRIPT_HAN] = iter->second;
99
100 return new_instance;
101 }
102 };
103
104 Singleton<ScriptToFontMap, ScriptToFontMapSingletonTraits> script_font_map;
105
106 const int kUndefinedAscent = std::numeric_limits<int>::min();
107
108 // Given an HFONT, return the ascent. If GetTextMetrics fails,
109 // kUndefinedAscent is returned, instead.
110 int GetAscent(HFONT hfont) {
111 HDC dc = GetDC(NULL);
112 HGDIOBJ oldFont = SelectObject(dc, hfont);
113 TEXTMETRIC tm;
114 BOOL got_metrics = GetTextMetrics(dc, &tm);
115 SelectObject(dc, oldFont);
116 ReleaseDC(NULL, dc);
117 return got_metrics ? tm.tmAscent : kUndefinedAscent;
118 }
119
120 struct FontData {
121 FontData() : hfont(NULL), ascent(kUndefinedAscent), script_cache(NULL) {}
122 HFONT hfont;
123 int ascent;
124 mutable SCRIPT_CACHE script_cache;
125 };
126
127 // Again, using hash_map does not earn us much here.
128 // page_cycler_test intl2 gave us a 'better' result with map than with hash_map
129 // even though they're well-within 1-sigma of each other so that the difference
130 // is not significant. On the other hand, some pages in intl2 seem to
131 // take longer to load with map in the 1st pass. Need to experiment further.
132 typedef std::map<std::wstring, FontData*> FontDataCache;
133 struct FontDataCacheSingletonTraits
134 : public DefaultSingletonTraits<FontDataCache> {
135 static void Delete(FontDataCache* cache) {
136 FontDataCache::iterator iter = cache->begin();
137 while (iter != cache->end()) {
138 SCRIPT_CACHE script_cache = iter->second->script_cache;
139 if (script_cache)
140 ScriptFreeCache(&script_cache);
141 delete iter->second;
142 ++iter;
143 }
144 delete cache;
145 }
146 };
147
148 } // namespace
149
150 // TODO(jungshik) : this is font fallback code version 0.1
151 // - Cover all the scripts
152 // - Get the default font for each script/generic family from the
153 // preference instead of hardcoding in the source.
154 // (at least, read values from the registry for IE font settings).
155 // - Support generic families (from FontDescription)
156 // - If the default font for a script is not available,
157 // try some more fonts known to support it. Finally, we can
158 // use EnumFontFamilies or similar APIs to come up with a list of
159 // fonts supporting the script and cache the result.
160 // - Consider using UnicodeSet (or UnicodeMap) converted from
161 // GLYPHSET (BMP) or directly read from truetype cmap tables to
162 // keep track of which character is supported by which font
163 // - Update script_font_cache in response to WM_FONTCHANGE
164
165 const wchar_t* GetFontFamilyForScript(UScriptCode script,
166 GenericFamilyType generic) {
167 ScriptToFontMap::const_iterator iter = script_font_map->find(script);
168 const wchar_t* family = NULL;
169 if (iter != script_font_map->end()) {
170 family = iter->second;
171 }
172 return family;
173 }
174
175 // TODO(jungshik)
176 // - Handle 'Inherited', 'Common' and 'Unknown'
177 // (see http://www.unicode.org/reports/tr24/#Usage_Model )
178 // For 'Inherited' and 'Common', perhaps we need to
179 // accept another parameter indicating the previous family
180 // and just return it.
181 // - All the characters (or characters up to the point a single
182 // font can cover) need to be taken into account
183 const wchar_t* GetFallbackFamily(const wchar_t *characters,
184 int length,
185 GenericFamilyType generic,
186 UChar32 *char_checked,
187 UScriptCode *script_checked) {
188 DCHECK(characters && characters[0] && length > 0);
189 UScriptCode script = USCRIPT_COMMON;
190
191 // Sometimes characters common to script (e.g. space) is at
192 // the beginning of a string so that we need to skip them
193 // to get a font required to render the string.
194 int i = 0;
195 UChar32 ucs4 = 0;
196 while (i < length && script == USCRIPT_COMMON ||
197 script == USCRIPT_INVALID_CODE) {
198 U16_NEXT(characters, i, length, ucs4);
199 UErrorCode err = U_ZERO_ERROR;
200 script = uscript_getScript(ucs4, &err);
201 // silently ignore the error
202 }
203
204 // hack for full width ASCII. For the full-width ASCII, use the font
205 // for Han (which is locale-dependent).
206 if (0xFF00 < ucs4 && ucs4 < 0xFF5F)
207 script = USCRIPT_HAN;
208
209 // There are a lot of characters in USCRIPT_COMMON that can be covered
210 // by fonts for scripts closely related to them.
211 // See http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
212 // TODO(jungshik): make this more efficient with a wider coverage
213 if (script == USCRIPT_COMMON || script == USCRIPT_INHERITED) {
214 UBlockCode block = ublock_getCode(ucs4);
215 switch (block) {
216 case UBLOCK_BASIC_LATIN:
217 script = USCRIPT_LATIN;
218 break;
219 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
220 script = USCRIPT_HAN;
221 break;
222 case UBLOCK_HIRAGANA:
223 case UBLOCK_KATAKANA:
224 script = USCRIPT_HIRAGANA;
225 break;
226 case UBLOCK_ARABIC:
227 script = USCRIPT_ARABIC;
228 break;
229 case UBLOCK_GREEK:
230 script = USCRIPT_GREEK;
231 break;
232 case UBLOCK_DEVANAGARI:
233 // For Danda and Double Danda (U+0964, U+0965), use a Devanagari
234 // font for now although they're used by other scripts as well.
235 // Without a context, we can't do any better.
236 script = USCRIPT_DEVANAGARI;
237 break;
238 case UBLOCK_ARMENIAN:
239 script = USCRIPT_ARMENIAN;
240 break;
241 case UBLOCK_GEORGIAN:
242 script = USCRIPT_GEORGIAN;
243 break;
244 case UBLOCK_KANNADA:
245 script = USCRIPT_KANNADA;
246 break;
247 }
248 }
249
250 // Another lame work-around to cover non-BMP characters.
251 const wchar_t* family = GetFontFamilyForScript(script, generic);
252 if (!family) {
253 int plane = ucs4 >> 16;
254 switch (plane) {
255 case 1:
256 family = L"code2001";
257 break;
258 case 2:
259 family = L"simsun-extb";
260 break;
261 default:
262 family = L"lucida sans unicode";
263 }
264 }
265
266 if (char_checked) *char_checked = ucs4;
267 if (script_checked) *script_checked = script;
268 return family;
269 }
270
271
272
273 // Be aware that this is not thread-safe.
274 bool GetDerivedFontData(const wchar_t *family,
275 int style,
276 LOGFONT *logfont,
277 int *ascent,
278 HFONT *hfont,
279 SCRIPT_CACHE **script_cache) {
280 DCHECK(logfont && family && *family);
281 // Using |Singleton| here is not free, but the intl2 page cycler test
282 // does not show any noticeable difference with and without it. Leaking
283 // the contents of FontDataCache (especially SCRIPT_CACHE) at the end
284 // of a renderer process may not be a good idea. We may use
285 // atexit(). However, with no noticeable performance difference, |Singleton|
286 // is cleaner, I believe.
287 FontDataCache* font_data_cache =
288 Singleton<FontDataCache, FontDataCacheSingletonTraits>::get();
289 // TODO(jungshik) : This comes up pretty high in the profile so that
290 // we need to measure whether using SHA256 (after coercing all the
291 // fields to char*) is faster than StringPrintf.
292 std::wstring font_key = StringPrintf(L"%1d:%d:%ls", style, logfont->lfHeight,
293 family);
294 FontDataCache::const_iterator iter = font_data_cache->find(font_key);
295 FontData *derived;
296 if (iter == font_data_cache->end()) {
297 DCHECK(wcslen(family) < LF_FACESIZE);
298 wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family);
299 // TODO(jungshik): CreateFontIndirect always comes up with
300 // a font even if there's no font matching the name. Need to
301 // check it against what we actually want (as is done in FontCacheWin.cpp)
302 derived = new FontData;
303 derived->hfont = CreateFontIndirect(logfont);
304 // GetAscent may return kUndefinedAscent, but we still want to
305 // cache it so that we won't have to call CreateFontIndirect once
306 // more for HFONT next time.
307 derived->ascent = GetAscent(derived->hfont);
308 (*font_data_cache)[font_key] = derived;
309 } else {
310 derived = iter->second;
311 // Last time, GetAscent failed so that only HFONT was
312 // cached. Try once more assuming that TryPreloadFont
313 // was called by a caller between calls.
314 if (kUndefinedAscent == derived->ascent)
315 derived->ascent = GetAscent(derived->hfont);
316 }
317 *hfont = derived->hfont;
318 *ascent = derived->ascent;
319 *script_cache = &(derived->script_cache);
320 return *ascent != kUndefinedAscent;
321 }
322
323 int GetStyleFromLogfont(const LOGFONT* logfont) {
324 // TODO(jungshik) : consider defining UNDEFINED or INVALID for style and
325 // returning it when logfont is NULL
326 if (!logfont) {
327 NOTREACHED();
328 return FONT_STYLE_NORMAL;
329 }
330 return (logfont->lfItalic ? FONT_STYLE_ITALIC : FONT_STYLE_NORMAL) |
331 (logfont->lfUnderline ? FONT_STYLE_UNDERLINED : FONT_STYLE_NORMAL) |
332 (logfont->lfWeight >= 700 ? FONT_STYLE_BOLD : FONT_STYLE_NORMAL);
333 }
334
335 } // namespace gfx
336
OLDNEW
« no previous file with comments | « base/gfx/font_utils.h ('k') | base/gfx/uniscribe.h » ('j') | webkit/build/port/port.vcproj » ('J')

Powered by Google App Engine
This is Rietveld 408576698