base/gfx/uniscribe.h - Issue 10785: Debase our Uniscribe code. This moves FontUtils and all our Uniscribe code fr...

Side by Side Diff: base/gfx/uniscribe.h

Issue 10785: Debase our Uniscribe code. This moves FontUtils and all our Uniscribe code fr... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 12 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4 //

5 // A wrapper around Uniscribe that provides a reasonable API.

6

7 #ifndef BASE_GFX_UNISCRIBE_H__

8 #define BASE_GFX_UNISCRIBE_H__

9

10 #include <windows.h>

11 #include <usp10.h>

12 #include <wchar.h>

13 #include <map>

14 #include <vector>

15

16 #include "base/stack_container.h"

17 #include "testing/gtest/include/gtest/gtest_prod.h"

18

19 namespace gfx {

20

21 #define UNISCRIBE_STATE_STACK_RUNS 8

22 #define UNISCRIBE_STATE_STACK_CHARS 32

23

24 // This object should be safe to create & destroy frequently, as long as the

25 // caller preserves the script_cache when possible (this data may be slow to

26 // compute).

27 //

28 // This object is "kind of large" (~1K) because it reserves a lot of space for

29 // working with to avoid expensive heap operations. Therefore, not only should

30 // you not worry about creating and destroying it, you should try to not keep

31 // them around.

32 class UniscribeState {

33 public:

34 // Initializes this Uniscribe run with the text pointed to by \|run\| with

35 // \|length\|. The input is NOT null terminated.

36 //

37 // The is_rtl flag should be set if the input script is RTL. It is assumed

38 // that the caller has already divided up the input text (using ICU, for

39 // example) into runs of the same direction of script. This avoids

40 // disagreements between the caller and Uniscribe later (see FillItems).

41 //

42 // A script cache should be provided by the caller that is initialized to

43 // NULL. When the caller is done with the cache (it may be stored between

44 // runs as long as it is used consistently with the same HFONT), it should

45 // call ScriptFreeCache().

46 UniscribeState(const wchar_t* input,

47 int input_length,

48 bool is_rtl,

49 HFONT hfont,

50 SCRIPT_CACHE* script_cache,

51 SCRIPT_FONTPROPERTIES* font_properties);

52

53 virtual ~UniscribeState();

54

55 // Sets Uniscribe's directional override flag. False by default.

56 bool directional_override() const {

57 return directional_override_;

58 }

59 void set_directional_override(bool override) {

60 directional_override_ = override;

61 }

62

63 // Set's Uniscribe's no-ligate override flag. False by default.

64 bool inhibit_ligate() const {

65 return inhibit_ligate_;

66 }

67 void set_inhibit_ligate(bool inhibit) {

68 inhibit_ligate_ = inhibit;

69 }

70

71 // Set letter spacing. We will try to insert this much space between

72 // graphemes (one or more glyphs perceived as a single unit by ordinary users

73 // of a script). Positive values increase letter spacing, negative values

74 // decrease it. 0 by default.

75 int letter_spacing() const {

76 return letter_spacing_;

77 }

78 void set_letter_spacing(int letter_spacing) {

79 letter_spacing_ = letter_spacing;

80 }

81

82 // Set the width of a standard space character. We use this to normalize

83 // space widths. Windows will make spaces after Hindi characters larger than

84 // other spaces. A space_width of 0 means to use the default space width.

85 //

86 // Must be set before Init() is called.

87 int space_width() const {

88 return space_width_;

89 }

90 void set_space_width(int space_width) {

91 space_width_ = space_width;

92 }

93

94 // Set word spacing. We will try to insert this much extra space between

95 // each word in the input (beyond whatever whitespace character separates

96 // words). Positive values lead to increased letter spacing, negative values

97 // decrease it. 0 by default.

98 //

99 // Must be set before Init() is called.

100 int word_spacing() const {

101 return word_spacing_;

102 }

103 void set_word_spacing(int word_spacing) {

104 word_spacing_ = word_spacing;

105 }

106 void set_ascent(int ascent) {

107 ascent_ = ascent;

108 }

109

110 // You must call this after setting any options but before doing any

111 // other calls like asking for widths or drawing.

112 void Init() { InitWithOptionalLengthProtection(true); }

113

114 // Returns the total width in pixels of the text run.

115 int Width() const;

116

117 // Call to justify the text, with the amount of space that should be ADDED to

118 // get the desired width that the column should be justified to. Normally,

119 // spaces are inserted, but for Arabic there will be kashidas (extra strokes)

120 // inserted instead.

121 //

122 // This function MUST be called AFTER Init().

123 void Justify(int additional_space);

124

125 // Computes the given character offset into a pixel offset of the beginning

126 // of that character.

127 int CharacterToX(int offset) const;

128

129 // Converts the given pixel X position into a logical character offset into

130 // the run. For positions appearing before the first character, this will

131 // return -1.

132 int XToCharacter(int x) const;

133

134 // Draws the given characters to (x, y) in the given DC. The font will be

135 // handled by this function, but the font color and other attributes should

136 // be pre-set.

137 //

138 // The y position is the upper left corner, NOT the baseline.

139 void Draw(HDC dc, int x, int y, int from, int to);

140

141 // Returns the first glyph assigned to the character at the given offset.

142 // This function is used to retrieve glyph information when Uniscribe is

143 // being used to generate glyphs for non-complex, non-BMP (above U+FFFF)

144 // characters. These characters are not otherwise special and have no

145 // complex shaping rules, so we don't otherwise need Uniscribe, except

146 // Uniscribe is the only way to get glyphs for non-BMP characters.

147 //

148 // Returns 0 if there is no glyph for the given character.

149 WORD FirstGlyphForCharacter(int char_offset) const;

150

151 protected:

152 // Backend for init. The flag allows the unit test to specify whether we

153 // should fail early for very long strings like normal, or try to pass the

154 // long string to Uniscribe. The latter provides a way to force failure of

155 // shaping.

156 void InitWithOptionalLengthProtection(bool length_protection);

157

158 // Tries to preload the font when the it is not accessible.

159 // This is the default implementation and it does not do anything.

160 virtual void TryToPreloadFont(HFONT font) {}

161

162 private:

163 FRIEND_TEST(UniscribeTest, TooBig);

164

165 // An array corresponding to each item in runs_ containing information

166 // on each of the glyphs that were generated. Like runs_, this is in

167 // reading order. However, for rtl text, the characters within each

168 // item will be reversed.

169 struct Shaping {

170 Shaping()

171 : pre_padding(0),

172 hfont_(NULL),

173 script_cache_(NULL),

174 ascent_offset_(0) {

175 abc.abcA = 0;

176 abc.abcB = 0;

177 abc.abcC = 0;

178 }

179

180 // Returns the number of glyphs (which will be drawn to the screen)

181 // in this run.

182 int glyph_length() const {

183 return static_cast<int>(glyphs->size());

184 }

185

186 // Returns the number of characters (that we started with) in this run.

187 int char_length() const {

188 return static_cast<int>(logs->size());

189 }

190

191 // Returns the advance array that should be used when measuring glyphs.

192 // The returned pointer will indicate an array with glyph_length() elements

193 // and the advance that should be used for each one. This is either the

194 // real advance, or the justified advances if there is one, and is the

195 // array we want to use for measurement.

196 const int* effective_advances() const {

197 if (advance->empty())

198 return 0;

199 if (justify->empty())

200 return &advance[0];

201 return &justify[0];

202 }

203

204 // This is the advance amount of space that we have added to the beginning

205 // of the run. It is like the ABC's \|A\| advance but one that we create and

206 // must handle internally whenever computing with pixel offsets.

207 int pre_padding;

208

209 // Glyph indices in the font used to display this item. These indices

210 // are in screen order.

211 StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> glyphs;

212

213 // For each input character, this tells us the first glyph index it

214 // generated. This is the only array with size of the input chars.

215 //

216 // All offsets are from the beginning of this run. Multiple characters can

217 // generate one glyph, in which case there will be adjacent duplicates in

218 // this list. One character can also generate multiple glyphs, in which

219 // case there will be skipped indices in this list.

220 StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> logs;

221

222 // Flags and such for each glyph.

223 StackVector<SCRIPT_VISATTR, UNISCRIBE_STATE_STACK_CHARS> visattr;

224

225 // Horizontal advances for each glyph listed above, this is basically

226 // how wide each glyph is.

227 StackVector<int, UNISCRIBE_STATE_STACK_CHARS> advance;

228

229 // This contains glyph offsets, from the nominal position of a glyph. It

230 // is used to adjust the positions of multiple combining characters

231 // around/above/below base characters in a context-sensitive manner so

232 // that they don't bump against each other and the base character.

233 StackVector<GOFFSET, UNISCRIBE_STATE_STACK_CHARS> offsets;

234

235 // Filled by a call to Justify, this is empty for nonjustified text.

236 // If nonempty, this contains the array of justify characters for each

237 // character as returned by ScriptJustify.

238 //

239 // This is the same as the advance array, but with extra space added for

240 // some characters. The difference between a glyph's \|justify\| width and

241 // it's \|advance\| width is the extra space added.

242 StackVector<int, UNISCRIBE_STATE_STACK_CHARS> justify;

243

244 // Sizing information for this run. This treats the entire run as a

245 // character with a preceeding advance, width, and ending advance.

246 // The B width is the sum of the \|advance\| array, and the A and C widths

247 // are any extra spacing applied to each end.

248 //

249 // It is unclear from the documentation what this actually means. From

250 // experimentation, it seems that the sum of the character advances is

251 // always the sum of the ABC values, and I'm not sure what you're supposed

252 // to do with the ABC values.

253 ABC abc;

254

255 // Pointers to windows font data used to render this run.

256 HFONT hfont_;

257 SCRIPT_CACHE* script_cache_;

258

259 // Ascent offset between the ascent of the primary font

260 // and that of the fallback font. The offset needs to be applied,

261 // when drawing a string, to align multiple runs rendered with

262 // different fonts.

263 int ascent_offset_;

264 };

265

266 // Computes the runs_ array from the text run.

267 void FillRuns();

268

269 // Computes the shapes_ array given an runs_ array already filled in.

270 void FillShapes();

271

272 // Fills in the screen_order_ array (see below).

273 void FillScreenOrder();

274

275 // Called to update the glyph positions based on the current spacing options

276 // that are set.

277 void ApplySpacing();

278

279 // Normalizes all advances for spaces to the same width. This keeps windows

280 // from making spaces after Hindi characters larger, which is then

281 // inconsistent with our meaure of the width since WebKit doesn't include

282 // spaces in text-runs sent to uniscribe unless white-space:pre.

283 void AdjustSpaceAdvances();

284

285 // Returns the total width of a single item.

286 int AdvanceForItem(int item_index) const;

287

288 // Shapes a run (pointed to by \|input\|) using \|hfont\| first.

289 // Tries a series of fonts specified retrieved with NextWinFontData

290 // and finally a font covering characters in \|*input\|. A string pointed

291 // by \|input\| comes from ScriptItemize and is supposed to contain

292 // characters belonging to a single script aside from characters

293 // common to all scripts (e.g. space).

294 bool Shape(const wchar_t* input,

295 int item_length,

296 int num_glyphs,

297 SCRIPT_ITEM& run,

298 Shaping& shaping);

299

300 // Gets Windows font data for the next best font to try in the list

301 // of fonts. When there's no more font available, returns false

302 // without touching any of out params. Need to call ResetFontIndex

303 // to start scanning of the font list from the beginning.

304 virtual bool NextWinFontData(HFONT* hfont,

305 SCRIPT_CACHE** script_cache,

306 SCRIPT_FONTPROPERTIES** font_properties,

307 int* ascent) {

308 return false;

309 }

310

311 // Resets the font index to the first in the list of fonts

312 // to try after the primaryFont turns out not to work. With font_index

313 // reset, NextWinFontData scans fallback fonts from the beginning.

314 virtual void ResetFontIndex() {}

315

316 // The input data for this run of Uniscribe. See the constructor.

317 const wchar_t* input_;

318 const int input_length_;

319 const bool is_rtl_;

320

321 // Windows font data for the primary font :

322 // In a sense, logfont_ and style_ are redundant because

323 // hfont_ contains all the information. However, invoking GetObject,

324 // everytime we need the height and the style, is rather expensive so

325 // that we cache them. Would it be better to add getter and (virtual)

326 // setter for the height and the style of the primary font, instead of

327 // logfont_? Then, a derived class ctor can set ascent_, height_ and style_

328 // if they're known. Getters for them would have to 'infer' their values from

329 // hfont_ ONLY when they're not set.

330 HFONT hfont_;

331 SCRIPT_CACHE* script_cache_;

332 SCRIPT_FONTPROPERTIES* font_properties_;

333 int ascent_;

334 LOGFONT logfont_;

335 int style_;

336

337 // Options, see the getters/setters above.

338 bool directional_override_;

339 bool inhibit_ligate_;

340 int letter_spacing_;

341 int space_width_;

342 int word_spacing_;

343 int justification_width_;

344

345 // Uniscribe breaks the text into Runs. These are one length of text that is

346 // in one script and one direction. This array is in reading order.

347 StackVector<SCRIPT_ITEM, UNISCRIBE_STATE_STACK_RUNS> runs_;

348

349 StackVector<Shaping, UNISCRIBE_STATE_STACK_RUNS> shapes_;

350

351 // This is a mapping between reading order and screen order for the items.

352 // Uniscribe's items array are in reading order. For right-to-left text,

353 // or mixed (although WebKit's \|TextRun\| should really be only one

354 // direction), this makes it very difficult to compute character offsets

355 // and positions. This list is in screen order from left to right, and

356 // gives the index into the \|runs_\| and \|shapes_\| arrays of each

357 // subsequent item.

358 StackVector<int, UNISCRIBE_STATE_STACK_RUNS> screen_order_;

359

360 DISALLOW_EVIL_CONSTRUCTORS(UniscribeState);

361 };

362

363 } // namespace gfx

364

365 #endif // BASE_GFX_UNISCRIBE_H__

366

OLD	NEW

« no previous file with comments | « base/gfx/font_utils.cc ('k') | base/gfx/uniscribe.cc » ('j') | webkit/build/port/port.vcproj » ('J')