OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2006, 2007, 2008, 2009, Google Inc. All rights reserved. | |
3 * | |
4 * Redistribution and use in source and binary forms, with or without | |
5 * modification, are permitted provided that the following conditions are | |
6 * met: | |
7 * | |
8 * * Redistributions of source code must retain the above copyright | |
9 * notice, this list of conditions and the following disclaimer. | |
10 * * Redistributions in binary form must reproduce the above | |
11 * copyright notice, this list of conditions and the following disclaimer | |
12 * in the documentation and/or other materials provided with the | |
13 * distribution. | |
14 * * Neither the name of Google Inc. nor the names of its | |
15 * contributors may be used to endorse or promote products derived from | |
16 * this software without specific prior written permission. | |
17 * | |
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 */ | |
30 | |
31 // A wrapper around Uniscribe that provides a reasonable API. | |
32 | |
33 #ifndef UniscribeHelper_h | |
34 #define UniscribeHelper_h | |
35 | |
36 #include <windows.h> | |
37 #include <usp10.h> | |
38 #include <map> | |
39 | |
40 #include <unicode/uchar.h> | |
41 #include "wtf/Vector.h" | |
42 | |
43 class UniscribeTest_TooBig_Test; // A gunit test for UniscribeHelper. | |
44 | |
45 namespace WebCore { | |
46 | |
47 class FloatRect; | |
48 class FontFeatureSettings; | |
49 class FontPlatformData; | |
50 class GraphicsContext; | |
51 | |
52 const unsigned cUniscribeHelperStackRuns = 8; | |
53 const unsigned cUniscribeHelperStackChars = 32; | |
54 const unsigned cUniscribeHelperFeatures = 4; | |
55 | |
56 // This object should be safe to create & destroy frequently, as long as the | |
57 // caller preserves the script_cache when possible (this data may be slow to | |
58 // compute). | |
59 // | |
60 // This object is "kind of large" (~1K) because it reserves a lot of space for | |
61 // working with to avoid expensive heap operations. Therefore, not only should | |
62 // you not worry about creating and destroying it, you should try to not keep | |
63 // them around. | |
64 class UniscribeHelper { | |
65 public: | |
66 // Initializes this Uniscribe run with the text pointed to by |run| with | |
67 // |length|. The input is NOT null terminated. | |
68 // | |
69 // The is_rtl flag should be set if the input script is RTL. It is assumed | |
70 // that the caller has already divided up the input text (using ICU, for | |
71 // example) into runs of the same direction of script. This avoids | |
72 // disagreements between the caller and Uniscribe later (see FillItems). | |
73 // | |
74 // A script cache should be provided by the caller that is initialized to | |
75 // NULL. When the caller is done with the cache (it may be stored between | |
76 // runs as long as it is used consistently with the same HFONT), it should | |
77 // call ScriptFreeCache(). | |
78 UniscribeHelper(const UChar* input, | |
79 int inputLength, | |
80 bool isRtl, | |
81 HFONT, | |
82 SCRIPT_CACHE*, | |
83 SCRIPT_FONTPROPERTIES*, | |
84 WORD); | |
85 | |
86 virtual ~UniscribeHelper(); | |
87 | |
88 // Sets Uniscribe's directional override flag. False by default. | |
89 bool directionalOverride() const | |
90 { | |
91 return m_directionalOverride; | |
92 } | |
93 void setDirectionalOverride(bool override) | |
94 { | |
95 m_directionalOverride = override; | |
96 } | |
97 | |
98 // Set's Uniscribe's no-ligate override flag. False by default. | |
99 bool inhibitLigate() const | |
100 { | |
101 return m_inhibitLigate; | |
102 } | |
103 void setInhibitLigate(bool inhibit) | |
104 { | |
105 m_inhibitLigate = inhibit; | |
106 } | |
107 | |
108 // Set letter spacing. We will try to insert this much space between | |
109 // graphemes (one or more glyphs perceived as a single unit by ordinary | |
110 // users of a script). Positive values increase letter spacing, negative | |
111 // values decrease it. 0 by default. | |
112 int letterSpacing() const | |
113 { | |
114 return m_letterSpacing; | |
115 } | |
116 void setLetterSpacing(int letterSpacing) | |
117 { | |
118 m_letterSpacing = letterSpacing; | |
119 } | |
120 | |
121 // Set the width of a standard space character. We use this to normalize | |
122 // space widths. Windows will make spaces after Hindi characters larger than | |
123 // other spaces. A space_width of 0 means to use the default space width. | |
124 // | |
125 // Must be set before Init() is called. | |
126 int spaceWidth() const | |
127 { | |
128 return m_spaceWidth; | |
129 } | |
130 void setSpaceWidth(int spaceWidth) | |
131 { | |
132 m_spaceWidth = spaceWidth; | |
133 } | |
134 | |
135 // Set word spacing. We will try to insert this much extra space between | |
136 // each word in the input (beyond whatever whitespace character separates | |
137 // words). Positive values lead to increased letter spacing, negative values | |
138 // decrease it. 0 by default. | |
139 // | |
140 // Must be set before Init() is called. | |
141 int wordSpacing() const | |
142 { | |
143 return m_wordSpacing; | |
144 } | |
145 void setWordSpacing(int wordSpacing) | |
146 { | |
147 m_wordSpacing = wordSpacing; | |
148 } | |
149 | |
150 void setAscent(int ascent) | |
151 { | |
152 m_ascent = ascent; | |
153 } | |
154 | |
155 // When set to true, this class is used only to look up glyph | |
156 // indices for a range of Unicode characters without glyph placement. | |
157 // By default, it's false. This should be set to true when this | |
158 // class is used for glyph index look-up for non-BMP characters | |
159 // in GlyphPageNodeChromiumWin.cpp. | |
160 void setDisableFontFallback(bool disableFontFallback) | |
161 { | |
162 m_disableFontFallback = true; | |
163 } | |
164 | |
165 // Set TEXTRANGE_PROPERTIES structure which contains | |
166 // OpenType feature records generated from FontFeatureSettings. | |
167 void setRangeProperties(const FontFeatureSettings*); | |
168 | |
169 // You must call this after setting any options but before doing any | |
170 // other calls like asking for widths or drawing. | |
171 void init() | |
172 { | |
173 initWithOptionalLengthProtection(true); | |
174 } | |
175 | |
176 // Returns the total width in pixels of the text run. | |
177 int width() const; | |
178 | |
179 // Call to justify the text, with the amount of space that should be ADDED | |
180 // to get the desired width that the column should be justified to. | |
181 // Normally, spaces are inserted, but for Arabic there will be kashidas | |
182 // (extra strokes) inserted instead. | |
183 // | |
184 // This function MUST be called AFTER Init(). | |
185 void justify(int additionalSpace); | |
186 | |
187 // Computes the given character offset into a pixel offset of the beginning | |
188 // of that character. | |
189 int characterToX(int offset) const; | |
190 | |
191 // Converts the given pixel X position into a logical character offset into | |
192 // the run. For positions appearing before the first character, this will | |
193 // return -1. | |
194 int xToCharacter(int x) const; | |
195 | |
196 // Draws the given characters to (x, y) in the given DC. The font will be | |
197 // handled by this function, but the font color and other attributes should | |
198 // be pre-set. | |
199 // | |
200 // The y position is the upper left corner, NOT the baseline. | |
201 void draw(GraphicsContext*, const FontPlatformData&, HDC, | |
202 int x, int y, const FloatRect& textRect, | |
203 int from, int to); | |
204 | |
205 // Returns the first glyph assigned to the character at the given offset. | |
206 // This function is used to retrieve glyph information when Uniscribe is | |
207 // being used to generate glyphs for non-complex, non-BMP (above U+FFFF) | |
208 // characters. These characters are not otherwise special and have no | |
209 // complex shaping rules, so we don't otherwise need Uniscribe, except | |
210 // Uniscribe is the only way to get glyphs for non-BMP characters. | |
211 // | |
212 // Returns 0 if there is no glyph for the given character. | |
213 WORD firstGlyphForCharacter(int charOffset) const; | |
214 | |
215 protected: | |
216 // Backend for init. The flag allows the unit test to specify whether we | |
217 // should fail early for very long strings like normal, or try to pass the | |
218 // long string to Uniscribe. The latter provides a way to force failure of | |
219 // shaping. | |
220 void initWithOptionalLengthProtection(bool lengthProtection); | |
221 | |
222 // Tries to preload the font when the it is not accessible. | |
223 // This is the default implementation and it does not do anything. | |
224 virtual void tryToPreloadFont(HFONT) {} | |
225 | |
226 // Let our subclasses provide the input lazily in case they can't compute | |
227 // it in their constructors. Once we have input, however, we don't let | |
228 // our subclasses change it. | |
229 void setInput(const UChar* input) { ASSERT(!m_input); m_input = input; } | |
230 | |
231 private: | |
232 friend class UniscribeTest_TooBig_Test; | |
233 | |
234 // An array corresponding to each item in runs_ containing information | |
235 // on each of the glyphs that were generated. Like runs_, this is in | |
236 // reading order. However, for rtl text, the characters within each | |
237 // item will be reversed. | |
238 struct Shaping { | |
239 Shaping() | |
240 : m_prePadding(0) | |
241 , m_hfont(NULL) | |
242 , m_scriptCache(NULL) | |
243 , m_ascentOffset(0) | |
244 , m_spaceGlyph(0) | |
245 { | |
246 m_abc.abcA = 0; | |
247 m_abc.abcB = 0; | |
248 m_abc.abcC = 0; | |
249 } | |
250 | |
251 // Returns the number of glyphs (which will be drawn to the screen) | |
252 // in this run. | |
253 int glyphLength() const | |
254 { | |
255 return static_cast<int>(m_glyphs.size()); | |
256 } | |
257 | |
258 // Returns the number of characters (that we started with) in this run. | |
259 int charLength() const | |
260 { | |
261 return static_cast<int>(m_logs.size()); | |
262 } | |
263 | |
264 // Returns the advance array that should be used when measuring glyphs. | |
265 // The returned pointer will indicate an array with glyph_length() | |
266 // elements and the advance that should be used for each one. This is | |
267 // either the real advance, or the justified advances if there is one, | |
268 // and is the array we want to use for measurement. | |
269 const int* effectiveAdvances() const | |
270 { | |
271 if (m_advance.size() == 0) | |
272 return 0; | |
273 if (m_justify.size() == 0) | |
274 return &m_advance[0]; | |
275 return &m_justify[0]; | |
276 } | |
277 | |
278 // This is the advance amount of space that we have added to the | |
279 // beginning of the run. It is like the ABC's |A| advance but one that | |
280 // we create and must handle internally whenever computing with pixel | |
281 // offsets. | |
282 int m_prePadding; | |
283 | |
284 // Glyph indices in the font used to display this item. These indices | |
285 // are in screen order. | |
286 Vector<WORD, cUniscribeHelperStackChars> m_glyphs; | |
287 | |
288 // For each input character, this tells us the first glyph index it | |
289 // generated. This is the only array with size of the input chars. | |
290 // | |
291 // All offsets are from the beginning of this run. Multiple characters | |
292 // can generate one glyph, in which case there will be adjacent | |
293 // duplicates in this list. One character can also generate multiple | |
294 // glyphs, in which case there will be skipped indices in this list. | |
295 Vector<WORD, cUniscribeHelperStackChars> m_logs; | |
296 | |
297 // Flags and such for each glyph. | |
298 Vector<SCRIPT_VISATTR, cUniscribeHelperStackChars> m_visualAttributes; | |
299 | |
300 // Horizontal advances for each glyph listed above, this is basically | |
301 // how wide each glyph is. | |
302 Vector<int, cUniscribeHelperStackChars> m_advance; | |
303 | |
304 // This contains glyph offsets, from the nominal position of a glyph. | |
305 // It is used to adjust the positions of multiple combining characters | |
306 // around/above/below base characters in a context-sensitive manner so | |
307 // that they don't bump against each other and the base character. | |
308 Vector<GOFFSET, cUniscribeHelperStackChars> m_offsets; | |
309 | |
310 // Filled by a call to Justify, this is empty for nonjustified text. | |
311 // If nonempty, this contains the array of justify characters for each | |
312 // character as returned by ScriptJustify. | |
313 // | |
314 // This is the same as the advance array, but with extra space added | |
315 // for some characters. The difference between a glyph's |justify| | |
316 // width and it's |advance| width is the extra space added. | |
317 Vector<int, cUniscribeHelperStackChars> m_justify; | |
318 | |
319 // Sizing information for this run. This treats the entire run as a | |
320 // character with a preceeding advance, width, and ending advance. The | |
321 // B width is the sum of the |advance| array, and the A and C widths | |
322 // are any extra spacing applied to each end. | |
323 // | |
324 // It is unclear from the documentation what this actually means. From | |
325 // experimentation, it seems that the sum of the character advances is | |
326 // always the sum of the ABC values, and I'm not sure what you're | |
327 // supposed to do with the ABC values. | |
328 ABC m_abc; | |
329 | |
330 // Pointers to windows font data used to render this run. | |
331 HFONT m_hfont; | |
332 SCRIPT_CACHE* m_scriptCache; | |
333 | |
334 // Ascent offset between the ascent of the primary font | |
335 // and that of the fallback font. The offset needs to be applied, | |
336 // when drawing a string, to align multiple runs rendered with | |
337 // different fonts. | |
338 int m_ascentOffset; | |
339 | |
340 WORD m_spaceGlyph; | |
341 }; | |
342 | |
343 // Computes the runs_ array from the text run. | |
344 void fillRuns(); | |
345 | |
346 // Computes the shapes_ array given an runs_ array already filled in. | |
347 void fillShapes(); | |
348 | |
349 // Fills in the screen_order_ array (see below). | |
350 void fillScreenOrder(); | |
351 | |
352 // Called to update the glyph positions based on the current spacing | |
353 // options that are set. | |
354 void applySpacing(); | |
355 | |
356 // Normalizes all advances for spaces to the same width. This keeps windows | |
357 // from making spaces after Hindi characters larger, which is then | |
358 // inconsistent with our meaure of the width since WebKit doesn't include | |
359 // spaces in text-runs sent to uniscribe unless white-space:pre. | |
360 void adjustSpaceAdvances(); | |
361 | |
362 // Returns the total width of a single item. | |
363 int advanceForItem(int) const; | |
364 | |
365 bool containsMissingGlyphs(const Shaping&, | |
366 const SCRIPT_ITEM&, | |
367 const SCRIPT_FONTPROPERTIES*) const; | |
368 | |
369 // Shapes a run (pointed to by |input|) using |hfont| first. | |
370 // Tries a series of fonts specified retrieved with NextWinFontData | |
371 // and finally a font covering characters in |*input|. A string pointed | |
372 // by |input| comes from ScriptItemize and is supposed to contain | |
373 // characters belonging to a single script aside from characters common to | |
374 // all scripts (e.g. space). | |
375 bool shape(const UChar* input, int itemLength, int numGlyphs, SCRIPT_ITEM& r
un, OPENTYPE_TAG, Shaping&); | |
376 | |
377 // Gets Windows font data for the next best font to try in the list | |
378 // of fonts. When there's no more font available, returns false | |
379 // without touching any of out params. Need to call ResetFontIndex | |
380 // to start scanning of the font list from the beginning. | |
381 virtual bool nextWinFontData(HFONT&, SCRIPT_CACHE*&, SCRIPT_FONTPROPERTIES*&
, int&, WORD&) | |
382 { | |
383 return false; | |
384 } | |
385 | |
386 // Resets the font index to the first in the list of fonts to try after the | |
387 // primaryFont turns out not to work. With fontIndex reset, | |
388 // NextWinFontData scans fallback fonts from the beginning. | |
389 virtual void resetFontIndex() {} | |
390 | |
391 // If m_cachedDC is 0, creates one that is compatible with the screen DC. | |
392 void EnsureCachedDCCreated(); | |
393 | |
394 // The input data for this run of Uniscribe. See the constructor. | |
395 const UChar* m_input; | |
396 const int m_inputLength; | |
397 const bool m_isRtl; | |
398 | |
399 // Windows font data for the primary font. In a sense, m_logfont and m_style | |
400 // are redundant because m_hfont contains all the information. However, | |
401 // invoking GetObject, everytime we need the height and the style, is rather | |
402 // expensive so that we cache them. Would it be better to add getter and | |
403 // (virtual) setter for the height and the style of the primary font, | |
404 // instead of m_logfont? Then, a derived class ctor can set m_ascent, | |
405 // m_height and m_style if they're known. Getters for them would have to | |
406 // 'infer' their values from m_hfont ONLY when they're not set. | |
407 HFONT m_hfont; | |
408 // We cache the DC to use with ScriptShape/ScriptPlace. | |
409 static HDC m_cachedDC; | |
410 SCRIPT_CACHE* m_scriptCache; | |
411 SCRIPT_FONTPROPERTIES* m_fontProperties; | |
412 int m_ascent; | |
413 LOGFONT m_logfont; | |
414 int m_style; | |
415 WORD m_spaceGlyph; | |
416 | |
417 // Options, see the getters/setters above. | |
418 bool m_directionalOverride; | |
419 bool m_inhibitLigate; | |
420 int m_letterSpacing; | |
421 int m_spaceWidth; | |
422 int m_wordSpacing; | |
423 bool m_disableFontFallback; | |
424 | |
425 // Uniscribe breaks the text into Runs. These are one length of text that is | |
426 // in one script and one direction. This array is in reading order. | |
427 Vector<SCRIPT_ITEM, cUniscribeHelperStackRuns> m_runs; | |
428 | |
429 Vector<Shaping, cUniscribeHelperStackRuns> m_shapes; | |
430 Vector<OPENTYPE_TAG, cUniscribeHelperStackRuns> m_scriptTags; | |
431 | |
432 // This is a mapping between reading order and screen order for the items. | |
433 // Uniscribe's items array are in reading order. For right-to-left text, | |
434 // or mixed (although WebKit's |TextRun| should really be only one | |
435 // direction), this makes it very difficult to compute character offsets | |
436 // and positions. This list is in screen order from left to right, and | |
437 // gives the index into the |m_runs| and |m_shapes| arrays of each | |
438 // subsequent item. | |
439 Vector<int, cUniscribeHelperStackRuns> m_screenOrder; | |
440 | |
441 // This contains Uniscribe's OpenType feature settings. This structure | |
442 // is filled by using WebKit's |FontFeatureSettings|. | |
443 TEXTRANGE_PROPERTIES m_rangeProperties; | |
444 Vector<OPENTYPE_FEATURE_RECORD, cUniscribeHelperFeatures> m_featureRecords; | |
445 }; | |
446 | |
447 } // namespace WebCore | |
448 | |
449 #endif // UniscribeHelper_h | |
OLD | NEW |