OLD | NEW |
| (Empty) |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 // | |
5 // A wrapper around Uniscribe that provides a reasonable API. | |
6 | |
7 #ifndef BASE_GFX_UNISCRIBE_H__ | |
8 #define BASE_GFX_UNISCRIBE_H__ | |
9 | |
10 #include <windows.h> | |
11 #include <usp10.h> | |
12 #include <wchar.h> | |
13 #include <map> | |
14 #include <vector> | |
15 | |
16 #include "base/stack_container.h" | |
17 #include "testing/gtest/include/gtest/gtest_prod.h" | |
18 | |
19 namespace gfx { | |
20 | |
21 #define UNISCRIBE_STATE_STACK_RUNS 8 | |
22 #define UNISCRIBE_STATE_STACK_CHARS 32 | |
23 | |
24 // This object should be safe to create & destroy frequently, as long as the | |
25 // caller preserves the script_cache when possible (this data may be slow to | |
26 // compute). | |
27 // | |
28 // This object is "kind of large" (~1K) because it reserves a lot of space for | |
29 // working with to avoid expensive heap operations. Therefore, not only should | |
30 // you not worry about creating and destroying it, you should try to not keep | |
31 // them around. | |
32 class UniscribeState { | |
33 public: | |
34 // Initializes this Uniscribe run with the text pointed to by |run| with | |
35 // |length|. The input is NOT null terminated. | |
36 // | |
37 // The is_rtl flag should be set if the input script is RTL. It is assumed | |
38 // that the caller has already divided up the input text (using ICU, for | |
39 // example) into runs of the same direction of script. This avoids | |
40 // disagreements between the caller and Uniscribe later (see FillItems). | |
41 // | |
42 // A script cache should be provided by the caller that is initialized to | |
43 // NULL. When the caller is done with the cache (it may be stored between | |
44 // runs as long as it is used consistently with the same HFONT), it should | |
45 // call ScriptFreeCache(). | |
46 UniscribeState(const wchar_t* input, | |
47 int input_length, | |
48 bool is_rtl, | |
49 HFONT hfont, | |
50 SCRIPT_CACHE* script_cache, | |
51 SCRIPT_FONTPROPERTIES* font_properties); | |
52 | |
53 virtual ~UniscribeState(); | |
54 | |
55 // Sets Uniscribe's directional override flag. False by default. | |
56 bool directional_override() const { | |
57 return directional_override_; | |
58 } | |
59 void set_directional_override(bool override) { | |
60 directional_override_ = override; | |
61 } | |
62 | |
63 // Set's Uniscribe's no-ligate override flag. False by default. | |
64 bool inhibit_ligate() const { | |
65 return inhibit_ligate_; | |
66 } | |
67 void set_inhibit_ligate(bool inhibit) { | |
68 inhibit_ligate_ = inhibit; | |
69 } | |
70 | |
71 // Set letter spacing. We will try to insert this much space between | |
72 // graphemes (one or more glyphs perceived as a single unit by ordinary users | |
73 // of a script). Positive values increase letter spacing, negative values | |
74 // decrease it. 0 by default. | |
75 int letter_spacing() const { | |
76 return letter_spacing_; | |
77 } | |
78 void set_letter_spacing(int letter_spacing) { | |
79 letter_spacing_ = letter_spacing; | |
80 } | |
81 | |
82 // Set the width of a standard space character. We use this to normalize | |
83 // space widths. Windows will make spaces after Hindi characters larger than | |
84 // other spaces. A space_width of 0 means to use the default space width. | |
85 // | |
86 // Must be set before Init() is called. | |
87 int space_width() const { | |
88 return space_width_; | |
89 } | |
90 void set_space_width(int space_width) { | |
91 space_width_ = space_width; | |
92 } | |
93 | |
94 // Set word spacing. We will try to insert this much extra space between | |
95 // each word in the input (beyond whatever whitespace character separates | |
96 // words). Positive values lead to increased letter spacing, negative values | |
97 // decrease it. 0 by default. | |
98 // | |
99 // Must be set before Init() is called. | |
100 int word_spacing() const { | |
101 return word_spacing_; | |
102 } | |
103 void set_word_spacing(int word_spacing) { | |
104 word_spacing_ = word_spacing; | |
105 } | |
106 void set_ascent(int ascent) { | |
107 ascent_ = ascent; | |
108 } | |
109 | |
110 // You must call this after setting any options but before doing any | |
111 // other calls like asking for widths or drawing. | |
112 void Init() { InitWithOptionalLengthProtection(true); } | |
113 | |
114 // Returns the total width in pixels of the text run. | |
115 int Width() const; | |
116 | |
117 // Call to justify the text, with the amount of space that should be ADDED to | |
118 // get the desired width that the column should be justified to. Normally, | |
119 // spaces are inserted, but for Arabic there will be kashidas (extra strokes) | |
120 // inserted instead. | |
121 // | |
122 // This function MUST be called AFTER Init(). | |
123 void Justify(int additional_space); | |
124 | |
125 // Computes the given character offset into a pixel offset of the beginning | |
126 // of that character. | |
127 int CharacterToX(int offset) const; | |
128 | |
129 // Converts the given pixel X position into a logical character offset into | |
130 // the run. For positions appearing before the first character, this will | |
131 // return -1. | |
132 int XToCharacter(int x) const; | |
133 | |
134 // Draws the given characters to (x, y) in the given DC. The font will be | |
135 // handled by this function, but the font color and other attributes should | |
136 // be pre-set. | |
137 // | |
138 // The y position is the upper left corner, NOT the baseline. | |
139 void Draw(HDC dc, int x, int y, int from, int to); | |
140 | |
141 // Returns the first glyph assigned to the character at the given offset. | |
142 // This function is used to retrieve glyph information when Uniscribe is | |
143 // being used to generate glyphs for non-complex, non-BMP (above U+FFFF) | |
144 // characters. These characters are not otherwise special and have no | |
145 // complex shaping rules, so we don't otherwise need Uniscribe, except | |
146 // Uniscribe is the only way to get glyphs for non-BMP characters. | |
147 // | |
148 // Returns 0 if there is no glyph for the given character. | |
149 WORD FirstGlyphForCharacter(int char_offset) const; | |
150 | |
151 protected: | |
152 // Backend for init. The flag allows the unit test to specify whether we | |
153 // should fail early for very long strings like normal, or try to pass the | |
154 // long string to Uniscribe. The latter provides a way to force failure of | |
155 // shaping. | |
156 void InitWithOptionalLengthProtection(bool length_protection); | |
157 | |
158 // Tries to preload the font when the it is not accessible. | |
159 // This is the default implementation and it does not do anything. | |
160 virtual void TryToPreloadFont(HFONT font) {} | |
161 | |
162 private: | |
163 FRIEND_TEST(UniscribeTest, TooBig); | |
164 | |
165 // An array corresponding to each item in runs_ containing information | |
166 // on each of the glyphs that were generated. Like runs_, this is in | |
167 // reading order. However, for rtl text, the characters within each | |
168 // item will be reversed. | |
169 struct Shaping { | |
170 Shaping() | |
171 : pre_padding(0), | |
172 hfont_(NULL), | |
173 script_cache_(NULL), | |
174 ascent_offset_(0) { | |
175 abc.abcA = 0; | |
176 abc.abcB = 0; | |
177 abc.abcC = 0; | |
178 } | |
179 | |
180 // Returns the number of glyphs (which will be drawn to the screen) | |
181 // in this run. | |
182 int glyph_length() const { | |
183 return static_cast<int>(glyphs->size()); | |
184 } | |
185 | |
186 // Returns the number of characters (that we started with) in this run. | |
187 int char_length() const { | |
188 return static_cast<int>(logs->size()); | |
189 } | |
190 | |
191 // Returns the advance array that should be used when measuring glyphs. | |
192 // The returned pointer will indicate an array with glyph_length() elements | |
193 // and the advance that should be used for each one. This is either the | |
194 // real advance, or the justified advances if there is one, and is the | |
195 // array we want to use for measurement. | |
196 const int* effective_advances() const { | |
197 if (advance->empty()) | |
198 return 0; | |
199 if (justify->empty()) | |
200 return &advance[0]; | |
201 return &justify[0]; | |
202 } | |
203 | |
204 // This is the advance amount of space that we have added to the beginning | |
205 // of the run. It is like the ABC's |A| advance but one that we create and | |
206 // must handle internally whenever computing with pixel offsets. | |
207 int pre_padding; | |
208 | |
209 // Glyph indices in the font used to display this item. These indices | |
210 // are in screen order. | |
211 StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> glyphs; | |
212 | |
213 // For each input character, this tells us the first glyph index it | |
214 // generated. This is the only array with size of the input chars. | |
215 // | |
216 // All offsets are from the beginning of this run. Multiple characters can | |
217 // generate one glyph, in which case there will be adjacent duplicates in | |
218 // this list. One character can also generate multiple glyphs, in which | |
219 // case there will be skipped indices in this list. | |
220 StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> logs; | |
221 | |
222 // Flags and such for each glyph. | |
223 StackVector<SCRIPT_VISATTR, UNISCRIBE_STATE_STACK_CHARS> visattr; | |
224 | |
225 // Horizontal advances for each glyph listed above, this is basically | |
226 // how wide each glyph is. | |
227 StackVector<int, UNISCRIBE_STATE_STACK_CHARS> advance; | |
228 | |
229 // This contains glyph offsets, from the nominal position of a glyph. It | |
230 // is used to adjust the positions of multiple combining characters | |
231 // around/above/below base characters in a context-sensitive manner so | |
232 // that they don't bump against each other and the base character. | |
233 StackVector<GOFFSET, UNISCRIBE_STATE_STACK_CHARS> offsets; | |
234 | |
235 // Filled by a call to Justify, this is empty for nonjustified text. | |
236 // If nonempty, this contains the array of justify characters for each | |
237 // character as returned by ScriptJustify. | |
238 // | |
239 // This is the same as the advance array, but with extra space added for | |
240 // some characters. The difference between a glyph's |justify| width and | |
241 // it's |advance| width is the extra space added. | |
242 StackVector<int, UNISCRIBE_STATE_STACK_CHARS> justify; | |
243 | |
244 // Sizing information for this run. This treats the entire run as a | |
245 // character with a preceeding advance, width, and ending advance. | |
246 // The B width is the sum of the |advance| array, and the A and C widths | |
247 // are any extra spacing applied to each end. | |
248 // | |
249 // It is unclear from the documentation what this actually means. From | |
250 // experimentation, it seems that the sum of the character advances is | |
251 // always the sum of the ABC values, and I'm not sure what you're supposed | |
252 // to do with the ABC values. | |
253 ABC abc; | |
254 | |
255 // Pointers to windows font data used to render this run. | |
256 HFONT hfont_; | |
257 SCRIPT_CACHE* script_cache_; | |
258 | |
259 // Ascent offset between the ascent of the primary font | |
260 // and that of the fallback font. The offset needs to be applied, | |
261 // when drawing a string, to align multiple runs rendered with | |
262 // different fonts. | |
263 int ascent_offset_; | |
264 }; | |
265 | |
266 // Computes the runs_ array from the text run. | |
267 void FillRuns(); | |
268 | |
269 // Computes the shapes_ array given an runs_ array already filled in. | |
270 void FillShapes(); | |
271 | |
272 // Fills in the screen_order_ array (see below). | |
273 void FillScreenOrder(); | |
274 | |
275 // Called to update the glyph positions based on the current spacing options | |
276 // that are set. | |
277 void ApplySpacing(); | |
278 | |
279 // Normalizes all advances for spaces to the same width. This keeps windows | |
280 // from making spaces after Hindi characters larger, which is then | |
281 // inconsistent with our meaure of the width since WebKit doesn't include | |
282 // spaces in text-runs sent to uniscribe unless white-space:pre. | |
283 void AdjustSpaceAdvances(); | |
284 | |
285 // Returns the total width of a single item. | |
286 int AdvanceForItem(int item_index) const; | |
287 | |
288 // Shapes a run (pointed to by |input|) using |hfont| first. | |
289 // Tries a series of fonts specified retrieved with NextWinFontData | |
290 // and finally a font covering characters in |*input|. A string pointed | |
291 // by |input| comes from ScriptItemize and is supposed to contain | |
292 // characters belonging to a single script aside from characters | |
293 // common to all scripts (e.g. space). | |
294 bool Shape(const wchar_t* input, | |
295 int item_length, | |
296 int num_glyphs, | |
297 SCRIPT_ITEM& run, | |
298 Shaping& shaping); | |
299 | |
300 // Gets Windows font data for the next best font to try in the list | |
301 // of fonts. When there's no more font available, returns false | |
302 // without touching any of out params. Need to call ResetFontIndex | |
303 // to start scanning of the font list from the beginning. | |
304 virtual bool NextWinFontData(HFONT* hfont, | |
305 SCRIPT_CACHE** script_cache, | |
306 SCRIPT_FONTPROPERTIES** font_properties, | |
307 int* ascent) { | |
308 return false; | |
309 } | |
310 | |
311 // Resets the font index to the first in the list of fonts | |
312 // to try after the primaryFont turns out not to work. With font_index | |
313 // reset, NextWinFontData scans fallback fonts from the beginning. | |
314 virtual void ResetFontIndex() {} | |
315 | |
316 // The input data for this run of Uniscribe. See the constructor. | |
317 const wchar_t* input_; | |
318 const int input_length_; | |
319 const bool is_rtl_; | |
320 | |
321 // Windows font data for the primary font : | |
322 // In a sense, logfont_ and style_ are redundant because | |
323 // hfont_ contains all the information. However, invoking GetObject, | |
324 // everytime we need the height and the style, is rather expensive so | |
325 // that we cache them. Would it be better to add getter and (virtual) | |
326 // setter for the height and the style of the primary font, instead of | |
327 // logfont_? Then, a derived class ctor can set ascent_, height_ and style_ | |
328 // if they're known. Getters for them would have to 'infer' their values from | |
329 // hfont_ ONLY when they're not set. | |
330 HFONT hfont_; | |
331 SCRIPT_CACHE* script_cache_; | |
332 SCRIPT_FONTPROPERTIES* font_properties_; | |
333 int ascent_; | |
334 LOGFONT logfont_; | |
335 int style_; | |
336 | |
337 // Options, see the getters/setters above. | |
338 bool directional_override_; | |
339 bool inhibit_ligate_; | |
340 int letter_spacing_; | |
341 int space_width_; | |
342 int word_spacing_; | |
343 int justification_width_; | |
344 | |
345 // Uniscribe breaks the text into Runs. These are one length of text that is | |
346 // in one script and one direction. This array is in reading order. | |
347 StackVector<SCRIPT_ITEM, UNISCRIBE_STATE_STACK_RUNS> runs_; | |
348 | |
349 StackVector<Shaping, UNISCRIBE_STATE_STACK_RUNS> shapes_; | |
350 | |
351 // This is a mapping between reading order and screen order for the items. | |
352 // Uniscribe's items array are in reading order. For right-to-left text, | |
353 // or mixed (although WebKit's |TextRun| should really be only one | |
354 // direction), this makes it very difficult to compute character offsets | |
355 // and positions. This list is in screen order from left to right, and | |
356 // gives the index into the |runs_| and |shapes_| arrays of each | |
357 // subsequent item. | |
358 StackVector<int, UNISCRIBE_STATE_STACK_RUNS> screen_order_; | |
359 | |
360 DISALLOW_EVIL_CONSTRUCTORS(UniscribeState); | |
361 }; | |
362 | |
363 } // namespace gfx | |
364 | |
365 #endif // BASE_GFX_UNISCRIBE_H__ | |
366 | |
OLD | NEW |