base/gfx/font_utils.cc - Issue 10785: Debase our Uniscribe code. This moves FontUtils and all our Uniscribe code fr...

Side by Side Diff: base/gfx/font_utils.cc

Issue 10785: Debase our Uniscribe code. This moves FontUtils and all our Uniscribe code fr... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 12 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "base/gfx/font_utils.h"

6

7 #include <limits>

8 #include <map>

9

10 #include "base/gfx/uniscribe.h"

11 #include "base/logging.h"

12 #include "base/singleton.h"

13 #include "base/string_util.h"

14 #include "unicode/locid.h"

15 #include "unicode/uchar.h"

16

17 namespace gfx {

18

19 namespace {

20

21 // hash_map has extra cost with no sizable gain for a small number of integer

22 // key items. When the map size becomes much bigger (which will be later as

23 // more scripts are added) and this turns out to be prominent in the profile, we

24 // may consider switching to hash_map (or just an array if we support all the

25 // scripts)

26 typedef std::map<UScriptCode, const wchar_t*> ScriptToFontMap;

27

28 struct ScriptToFontMapSingletonTraits

29 : public DefaultSingletonTraits<ScriptToFontMap> {

30 static ScriptToFontMap* New() {

31 struct FontMap {

32 UScriptCode script;

33 const wchar_t* family;

34 };

35

36 const static FontMap font_map[] = {

37 {USCRIPT_LATIN, L"times new roman"},

38 {USCRIPT_GREEK, L"times new roman"},

39 {USCRIPT_CYRILLIC, L"times new roman"},

40 {USCRIPT_SIMPLIFIED_HAN, L"simsun"},

41 //{USCRIPT_TRADITIONAL_HAN, L"pmingliu"},

42 {USCRIPT_HIRAGANA, L"ms pgothic"},

43 {USCRIPT_KATAKANA, L"ms pgothic"},

44 {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"},

45 {USCRIPT_HANGUL, L"gulim"},

46 {USCRIPT_THAI, L"tahoma"},

47 {USCRIPT_HEBREW, L"david"},

48 {USCRIPT_ARABIC, L"tahoma"},

49 {USCRIPT_DEVANAGARI, L"mangal"},

50 {USCRIPT_BENGALI, L"vrinda"},

51 {USCRIPT_GURMUKHI, L"raavi"},

52 {USCRIPT_GUJARATI, L"shruti"},

53 {USCRIPT_ORIYA, L"kalinga"},

54 {USCRIPT_TAMIL, L"latha"},

55 {USCRIPT_TELUGU, L"gautami"},

56 {USCRIPT_KANNADA, L"tunga"},

57 {USCRIPT_MALAYALAM, L"kartika"},

58 {USCRIPT_LAO, L"dokchampa"},

59 {USCRIPT_TIBETAN, L"microsoft himalaya"},

60 {USCRIPT_GEORGIAN, L"sylfaen"},

61 {USCRIPT_ARMENIAN, L"sylfaen"},

62 {USCRIPT_ETHIOPIC, L"nyala"},

63 {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"},

64 {USCRIPT_CHEROKEE, L"plantagenet cherokee"},

65 {USCRIPT_YI, L"microsoft yi balti"},

66 {USCRIPT_SINHALA, L"iskoola pota"},

67 {USCRIPT_SYRIAC, L"estrangelo edessa"},

68 {USCRIPT_KHMER, L"daunpenh"},

69 {USCRIPT_THAANA, L"mv boli"},

70 {USCRIPT_MONGOLIAN, L"mongolian balti"},

71 {USCRIPT_MYANMAR, L"padauk"},

72 // For USCRIPT_COMMON, we map blocks to scripts when

73 // that makes sense.

74 };

75

76 ScriptToFontMap* new_instance = new ScriptToFontMap;

77 // Cannot recover from OOM so that there's no need to check.

78 for (int i = 0; i < arraysize(font_map); ++i)

79 (*new_instance)[font_map[i].script] = font_map[i].family;

80

81 // Initialize the locale-dependent mapping.

82 // Since Chrome synchronizes the ICU default locale with its UI locale,

83 // this ICU locale tells the current UI locale of Chrome.

84 Locale locale = Locale::getDefault();

85 ScriptToFontMap::const_iterator iter;

86 if (locale == Locale::getJapanese()) {

87 iter = new_instance->find(USCRIPT_HIRAGANA);

88 } else if (locale == Locale::getKorean()) {

89 iter = new_instance->find(USCRIPT_HANGUL);

90 } else {

91 // Use Simplified Chinese font for all other locales including

92 // Traditional Chinese because Simsun (SC font) has a wider

93 // coverage (covering both SC and TC) than PMingLiu (TC font).

94 // This also speeds up the TC version of Chrome when rendering SC pages.

95 iter = new_instance->find(USCRIPT_SIMPLIFIED_HAN);

96 }

97 if (iter != new_instance->end())

98 (*new_instance)[USCRIPT_HAN] = iter->second;

99

100 return new_instance;

101 }

102 };

103

104 Singleton<ScriptToFontMap, ScriptToFontMapSingletonTraits> script_font_map;

105

106 const int kUndefinedAscent = std::numeric_limits<int>::min();

107

108 // Given an HFONT, return the ascent. If GetTextMetrics fails,

109 // kUndefinedAscent is returned, instead.

110 int GetAscent(HFONT hfont) {

111 HDC dc = GetDC(NULL);

112 HGDIOBJ oldFont = SelectObject(dc, hfont);

113 TEXTMETRIC tm;

114 BOOL got_metrics = GetTextMetrics(dc, &tm);

115 SelectObject(dc, oldFont);

116 ReleaseDC(NULL, dc);

117 return got_metrics ? tm.tmAscent : kUndefinedAscent;

118 }

119

120 struct FontData {

121 FontData() : hfont(NULL), ascent(kUndefinedAscent), script_cache(NULL) {}

122 HFONT hfont;

123 int ascent;

124 mutable SCRIPT_CACHE script_cache;

125 };

126

127 // Again, using hash_map does not earn us much here.

128 // page_cycler_test intl2 gave us a 'better' result with map than with hash_map

129 // even though they're well-within 1-sigma of each other so that the difference

130 // is not significant. On the other hand, some pages in intl2 seem to

131 // take longer to load with map in the 1st pass. Need to experiment further.

132 typedef std::map<std::wstring, FontData*> FontDataCache;

133 struct FontDataCacheSingletonTraits

134 : public DefaultSingletonTraits<FontDataCache> {

135 static void Delete(FontDataCache* cache) {

136 FontDataCache::iterator iter = cache->begin();

137 while (iter != cache->end()) {

138 SCRIPT_CACHE script_cache = iter->second->script_cache;

139 if (script_cache)

140 ScriptFreeCache(&script_cache);

141 delete iter->second;

142 ++iter;

143 }

144 delete cache;

145 }

146 };

147

148 } // namespace

149

150 // TODO(jungshik) : this is font fallback code version 0.1

151 // - Cover all the scripts

152 // - Get the default font for each script/generic family from the

153 // preference instead of hardcoding in the source.

154 // (at least, read values from the registry for IE font settings).

155 // - Support generic families (from FontDescription)

156 // - If the default font for a script is not available,

157 // try some more fonts known to support it. Finally, we can

158 // use EnumFontFamilies or similar APIs to come up with a list of

159 // fonts supporting the script and cache the result.

160 // - Consider using UnicodeSet (or UnicodeMap) converted from

161 // GLYPHSET (BMP) or directly read from truetype cmap tables to

162 // keep track of which character is supported by which font

163 // - Update script_font_cache in response to WM_FONTCHANGE

164

165 const wchar_t* GetFontFamilyForScript(UScriptCode script,

166 GenericFamilyType generic) {

167 ScriptToFontMap::const_iterator iter = script_font_map->find(script);

168 const wchar_t* family = NULL;

169 if (iter != script_font_map->end()) {

170 family = iter->second;

171 }

172 return family;

173 }

174

175 // TODO(jungshik)

176 // - Handle 'Inherited', 'Common' and 'Unknown'

177 // (see http://www.unicode.org/reports/tr24/#Usage_Model )

178 // For 'Inherited' and 'Common', perhaps we need to

179 // accept another parameter indicating the previous family

180 // and just return it.

181 // - All the characters (or characters up to the point a single

182 // font can cover) need to be taken into account

183 const wchar_t* GetFallbackFamily(const wchar_t *characters,

184 int length,

185 GenericFamilyType generic,

186 UChar32 *char_checked,

187 UScriptCode *script_checked) {

188 DCHECK(characters && characters[0] && length > 0);

189 UScriptCode script = USCRIPT_COMMON;

190

191 // Sometimes characters common to script (e.g. space) is at

192 // the beginning of a string so that we need to skip them

193 // to get a font required to render the string.

194 int i = 0;

195 UChar32 ucs4 = 0;

196 while (i < length && script == USCRIPT_COMMON \|\|

197 script == USCRIPT_INVALID_CODE) {

198 U16_NEXT(characters, i, length, ucs4);

199 UErrorCode err = U_ZERO_ERROR;

200 script = uscript_getScript(ucs4, &err);

201 // silently ignore the error

202 }

203

204 // hack for full width ASCII. For the full-width ASCII, use the font

205 // for Han (which is locale-dependent).

206 if (0xFF00 < ucs4 && ucs4 < 0xFF5F)

207 script = USCRIPT_HAN;

208

209 // There are a lot of characters in USCRIPT_COMMON that can be covered

210 // by fonts for scripts closely related to them.

211 // See http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]

212 // TODO(jungshik): make this more efficient with a wider coverage

213 if (script == USCRIPT_COMMON \|\| script == USCRIPT_INHERITED) {

214 UBlockCode block = ublock_getCode(ucs4);

215 switch (block) {

216 case UBLOCK_BASIC_LATIN:

217 script = USCRIPT_LATIN;

218 break;

219 case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:

220 script = USCRIPT_HAN;

221 break;

222 case UBLOCK_HIRAGANA:

223 case UBLOCK_KATAKANA:

224 script = USCRIPT_HIRAGANA;

225 break;

226 case UBLOCK_ARABIC:

227 script = USCRIPT_ARABIC;

228 break;

229 case UBLOCK_GREEK:

230 script = USCRIPT_GREEK;

231 break;

232 case UBLOCK_DEVANAGARI:

233 // For Danda and Double Danda (U+0964, U+0965), use a Devanagari

234 // font for now although they're used by other scripts as well.

235 // Without a context, we can't do any better.

236 script = USCRIPT_DEVANAGARI;

237 break;

238 case UBLOCK_ARMENIAN:

239 script = USCRIPT_ARMENIAN;

240 break;

241 case UBLOCK_GEORGIAN:

242 script = USCRIPT_GEORGIAN;

243 break;

244 case UBLOCK_KANNADA:

245 script = USCRIPT_KANNADA;

246 break;

247 }

248 }

249

250 // Another lame work-around to cover non-BMP characters.

251 const wchar_t* family = GetFontFamilyForScript(script, generic);

252 if (!family) {

253 int plane = ucs4 >> 16;

254 switch (plane) {

255 case 1:

256 family = L"code2001";

257 break;

258 case 2:

259 family = L"simsun-extb";

260 break;

261 default:

262 family = L"lucida sans unicode";

263 }

264 }

265

266 if (char_checked) *char_checked = ucs4;

267 if (script_checked) *script_checked = script;

268 return family;

269 }

270

271

272

273 // Be aware that this is not thread-safe.

274 bool GetDerivedFontData(const wchar_t *family,

275 int style,

276 LOGFONT *logfont,

277 int *ascent,

278 HFONT *hfont,

279 SCRIPT_CACHE **script_cache) {

280 DCHECK(logfont && family && *family);

281 // Using \|Singleton\| here is not free, but the intl2 page cycler test

282 // does not show any noticeable difference with and without it. Leaking

283 // the contents of FontDataCache (especially SCRIPT_CACHE) at the end

284 // of a renderer process may not be a good idea. We may use

285 // atexit(). However, with no noticeable performance difference, \|Singleton\|

286 // is cleaner, I believe.

287 FontDataCache* font_data_cache =

288 Singleton<FontDataCache, FontDataCacheSingletonTraits>::get();

289 // TODO(jungshik) : This comes up pretty high in the profile so that

290 // we need to measure whether using SHA256 (after coercing all the

291 // fields to char*) is faster than StringPrintf.

292 std::wstring font_key = StringPrintf(L"%1d:%d:%ls", style, logfont->lfHeight,

293 family);

294 FontDataCache::const_iterator iter = font_data_cache->find(font_key);

295 FontData *derived;

296 if (iter == font_data_cache->end()) {

297 DCHECK(wcslen(family) < LF_FACESIZE);

298 wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family);

299 // TODO(jungshik): CreateFontIndirect always comes up with

300 // a font even if there's no font matching the name. Need to

301 // check it against what we actually want (as is done in FontCacheWin.cpp)

302 derived = new FontData;

303 derived->hfont = CreateFontIndirect(logfont);

304 // GetAscent may return kUndefinedAscent, but we still want to

305 // cache it so that we won't have to call CreateFontIndirect once

306 // more for HFONT next time.

307 derived->ascent = GetAscent(derived->hfont);

308 (*font_data_cache)[font_key] = derived;

309 } else {

310 derived = iter->second;

311 // Last time, GetAscent failed so that only HFONT was

312 // cached. Try once more assuming that TryPreloadFont

313 // was called by a caller between calls.

314 if (kUndefinedAscent == derived->ascent)

315 derived->ascent = GetAscent(derived->hfont);

316 }

317 *hfont = derived->hfont;

318 *ascent = derived->ascent;

319 *script_cache = &(derived->script_cache);

320 return *ascent != kUndefinedAscent;

321 }

322

323 int GetStyleFromLogfont(const LOGFONT* logfont) {

324 // TODO(jungshik) : consider defining UNDEFINED or INVALID for style and

325 // returning it when logfont is NULL

326 if (!logfont) {

327 NOTREACHED();

328 return FONT_STYLE_NORMAL;

329 }

330 return (logfont->lfItalic ? FONT_STYLE_ITALIC : FONT_STYLE_NORMAL) \|

331 (logfont->lfUnderline ? FONT_STYLE_UNDERLINED : FONT_STYLE_NORMAL) \|

332 (logfont->lfWeight >= 700 ? FONT_STYLE_BOLD : FONT_STYLE_NORMAL);

333 }

334

335 } // namespace gfx

336

OLD	NEW

« no previous file with comments | « base/gfx/font_utils.h ('k') | base/gfx/uniscribe.h » ('j') | webkit/build/port/port.vcproj » ('J')