| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 // | |
| 5 // A wrapper around Uniscribe that provides a reasonable API. | |
| 6 | |
| 7 #ifndef BASE_GFX_UNISCRIBE_H__ | |
| 8 #define BASE_GFX_UNISCRIBE_H__ | |
| 9 | |
| 10 #include <windows.h> | |
| 11 #include <usp10.h> | |
| 12 #include <wchar.h> | |
| 13 #include <map> | |
| 14 #include <vector> | |
| 15 | |
| 16 #include "base/stack_container.h" | |
| 17 #include "testing/gtest/include/gtest/gtest_prod.h" | |
| 18 | |
| 19 namespace gfx { | |
| 20 | |
| 21 #define UNISCRIBE_STATE_STACK_RUNS 8 | |
| 22 #define UNISCRIBE_STATE_STACK_CHARS 32 | |
| 23 | |
| 24 // This object should be safe to create & destroy frequently, as long as the | |
| 25 // caller preserves the script_cache when possible (this data may be slow to | |
| 26 // compute). | |
| 27 // | |
| 28 // This object is "kind of large" (~1K) because it reserves a lot of space for | |
| 29 // working with to avoid expensive heap operations. Therefore, not only should | |
| 30 // you not worry about creating and destroying it, you should try to not keep | |
| 31 // them around. | |
| 32 class UniscribeState { | |
| 33 public: | |
| 34 // Initializes this Uniscribe run with the text pointed to by |run| with | |
| 35 // |length|. The input is NOT null terminated. | |
| 36 // | |
| 37 // The is_rtl flag should be set if the input script is RTL. It is assumed | |
| 38 // that the caller has already divided up the input text (using ICU, for | |
| 39 // example) into runs of the same direction of script. This avoids | |
| 40 // disagreements between the caller and Uniscribe later (see FillItems). | |
| 41 // | |
| 42 // A script cache should be provided by the caller that is initialized to | |
| 43 // NULL. When the caller is done with the cache (it may be stored between | |
| 44 // runs as long as it is used consistently with the same HFONT), it should | |
| 45 // call ScriptFreeCache(). | |
| 46 UniscribeState(const wchar_t* input, | |
| 47 int input_length, | |
| 48 bool is_rtl, | |
| 49 HFONT hfont, | |
| 50 SCRIPT_CACHE* script_cache, | |
| 51 SCRIPT_FONTPROPERTIES* font_properties); | |
| 52 | |
| 53 virtual ~UniscribeState(); | |
| 54 | |
| 55 // Sets Uniscribe's directional override flag. False by default. | |
| 56 bool directional_override() const { | |
| 57 return directional_override_; | |
| 58 } | |
| 59 void set_directional_override(bool override) { | |
| 60 directional_override_ = override; | |
| 61 } | |
| 62 | |
| 63 // Set's Uniscribe's no-ligate override flag. False by default. | |
| 64 bool inhibit_ligate() const { | |
| 65 return inhibit_ligate_; | |
| 66 } | |
| 67 void set_inhibit_ligate(bool inhibit) { | |
| 68 inhibit_ligate_ = inhibit; | |
| 69 } | |
| 70 | |
| 71 // Set letter spacing. We will try to insert this much space between | |
| 72 // graphemes (one or more glyphs perceived as a single unit by ordinary users | |
| 73 // of a script). Positive values increase letter spacing, negative values | |
| 74 // decrease it. 0 by default. | |
| 75 int letter_spacing() const { | |
| 76 return letter_spacing_; | |
| 77 } | |
| 78 void set_letter_spacing(int letter_spacing) { | |
| 79 letter_spacing_ = letter_spacing; | |
| 80 } | |
| 81 | |
| 82 // Set the width of a standard space character. We use this to normalize | |
| 83 // space widths. Windows will make spaces after Hindi characters larger than | |
| 84 // other spaces. A space_width of 0 means to use the default space width. | |
| 85 // | |
| 86 // Must be set before Init() is called. | |
| 87 int space_width() const { | |
| 88 return space_width_; | |
| 89 } | |
| 90 void set_space_width(int space_width) { | |
| 91 space_width_ = space_width; | |
| 92 } | |
| 93 | |
| 94 // Set word spacing. We will try to insert this much extra space between | |
| 95 // each word in the input (beyond whatever whitespace character separates | |
| 96 // words). Positive values lead to increased letter spacing, negative values | |
| 97 // decrease it. 0 by default. | |
| 98 // | |
| 99 // Must be set before Init() is called. | |
| 100 int word_spacing() const { | |
| 101 return word_spacing_; | |
| 102 } | |
| 103 void set_word_spacing(int word_spacing) { | |
| 104 word_spacing_ = word_spacing; | |
| 105 } | |
| 106 void set_ascent(int ascent) { | |
| 107 ascent_ = ascent; | |
| 108 } | |
| 109 | |
| 110 // You must call this after setting any options but before doing any | |
| 111 // other calls like asking for widths or drawing. | |
| 112 void Init() { InitWithOptionalLengthProtection(true); } | |
| 113 | |
| 114 // Returns the total width in pixels of the text run. | |
| 115 int Width() const; | |
| 116 | |
| 117 // Call to justify the text, with the amount of space that should be ADDED to | |
| 118 // get the desired width that the column should be justified to. Normally, | |
| 119 // spaces are inserted, but for Arabic there will be kashidas (extra strokes) | |
| 120 // inserted instead. | |
| 121 // | |
| 122 // This function MUST be called AFTER Init(). | |
| 123 void Justify(int additional_space); | |
| 124 | |
| 125 // Computes the given character offset into a pixel offset of the beginning | |
| 126 // of that character. | |
| 127 int CharacterToX(int offset) const; | |
| 128 | |
| 129 // Converts the given pixel X position into a logical character offset into | |
| 130 // the run. For positions appearing before the first character, this will | |
| 131 // return -1. | |
| 132 int XToCharacter(int x) const; | |
| 133 | |
| 134 // Draws the given characters to (x, y) in the given DC. The font will be | |
| 135 // handled by this function, but the font color and other attributes should | |
| 136 // be pre-set. | |
| 137 // | |
| 138 // The y position is the upper left corner, NOT the baseline. | |
| 139 void Draw(HDC dc, int x, int y, int from, int to); | |
| 140 | |
| 141 // Returns the first glyph assigned to the character at the given offset. | |
| 142 // This function is used to retrieve glyph information when Uniscribe is | |
| 143 // being used to generate glyphs for non-complex, non-BMP (above U+FFFF) | |
| 144 // characters. These characters are not otherwise special and have no | |
| 145 // complex shaping rules, so we don't otherwise need Uniscribe, except | |
| 146 // Uniscribe is the only way to get glyphs for non-BMP characters. | |
| 147 // | |
| 148 // Returns 0 if there is no glyph for the given character. | |
| 149 WORD FirstGlyphForCharacter(int char_offset) const; | |
| 150 | |
| 151 protected: | |
| 152 // Backend for init. The flag allows the unit test to specify whether we | |
| 153 // should fail early for very long strings like normal, or try to pass the | |
| 154 // long string to Uniscribe. The latter provides a way to force failure of | |
| 155 // shaping. | |
| 156 void InitWithOptionalLengthProtection(bool length_protection); | |
| 157 | |
| 158 // Tries to preload the font when the it is not accessible. | |
| 159 // This is the default implementation and it does not do anything. | |
| 160 virtual void TryToPreloadFont(HFONT font) {} | |
| 161 | |
| 162 private: | |
| 163 FRIEND_TEST(UniscribeTest, TooBig); | |
| 164 | |
| 165 // An array corresponding to each item in runs_ containing information | |
| 166 // on each of the glyphs that were generated. Like runs_, this is in | |
| 167 // reading order. However, for rtl text, the characters within each | |
| 168 // item will be reversed. | |
| 169 struct Shaping { | |
| 170 Shaping() | |
| 171 : pre_padding(0), | |
| 172 hfont_(NULL), | |
| 173 script_cache_(NULL), | |
| 174 ascent_offset_(0) { | |
| 175 abc.abcA = 0; | |
| 176 abc.abcB = 0; | |
| 177 abc.abcC = 0; | |
| 178 } | |
| 179 | |
| 180 // Returns the number of glyphs (which will be drawn to the screen) | |
| 181 // in this run. | |
| 182 int glyph_length() const { | |
| 183 return static_cast<int>(glyphs->size()); | |
| 184 } | |
| 185 | |
| 186 // Returns the number of characters (that we started with) in this run. | |
| 187 int char_length() const { | |
| 188 return static_cast<int>(logs->size()); | |
| 189 } | |
| 190 | |
| 191 // Returns the advance array that should be used when measuring glyphs. | |
| 192 // The returned pointer will indicate an array with glyph_length() elements | |
| 193 // and the advance that should be used for each one. This is either the | |
| 194 // real advance, or the justified advances if there is one, and is the | |
| 195 // array we want to use for measurement. | |
| 196 const int* effective_advances() const { | |
| 197 if (advance->empty()) | |
| 198 return 0; | |
| 199 if (justify->empty()) | |
| 200 return &advance[0]; | |
| 201 return &justify[0]; | |
| 202 } | |
| 203 | |
| 204 // This is the advance amount of space that we have added to the beginning | |
| 205 // of the run. It is like the ABC's |A| advance but one that we create and | |
| 206 // must handle internally whenever computing with pixel offsets. | |
| 207 int pre_padding; | |
| 208 | |
| 209 // Glyph indices in the font used to display this item. These indices | |
| 210 // are in screen order. | |
| 211 StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> glyphs; | |
| 212 | |
| 213 // For each input character, this tells us the first glyph index it | |
| 214 // generated. This is the only array with size of the input chars. | |
| 215 // | |
| 216 // All offsets are from the beginning of this run. Multiple characters can | |
| 217 // generate one glyph, in which case there will be adjacent duplicates in | |
| 218 // this list. One character can also generate multiple glyphs, in which | |
| 219 // case there will be skipped indices in this list. | |
| 220 StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> logs; | |
| 221 | |
| 222 // Flags and such for each glyph. | |
| 223 StackVector<SCRIPT_VISATTR, UNISCRIBE_STATE_STACK_CHARS> visattr; | |
| 224 | |
| 225 // Horizontal advances for each glyph listed above, this is basically | |
| 226 // how wide each glyph is. | |
| 227 StackVector<int, UNISCRIBE_STATE_STACK_CHARS> advance; | |
| 228 | |
| 229 // This contains glyph offsets, from the nominal position of a glyph. It | |
| 230 // is used to adjust the positions of multiple combining characters | |
| 231 // around/above/below base characters in a context-sensitive manner so | |
| 232 // that they don't bump against each other and the base character. | |
| 233 StackVector<GOFFSET, UNISCRIBE_STATE_STACK_CHARS> offsets; | |
| 234 | |
| 235 // Filled by a call to Justify, this is empty for nonjustified text. | |
| 236 // If nonempty, this contains the array of justify characters for each | |
| 237 // character as returned by ScriptJustify. | |
| 238 // | |
| 239 // This is the same as the advance array, but with extra space added for | |
| 240 // some characters. The difference between a glyph's |justify| width and | |
| 241 // it's |advance| width is the extra space added. | |
| 242 StackVector<int, UNISCRIBE_STATE_STACK_CHARS> justify; | |
| 243 | |
| 244 // Sizing information for this run. This treats the entire run as a | |
| 245 // character with a preceeding advance, width, and ending advance. | |
| 246 // The B width is the sum of the |advance| array, and the A and C widths | |
| 247 // are any extra spacing applied to each end. | |
| 248 // | |
| 249 // It is unclear from the documentation what this actually means. From | |
| 250 // experimentation, it seems that the sum of the character advances is | |
| 251 // always the sum of the ABC values, and I'm not sure what you're supposed | |
| 252 // to do with the ABC values. | |
| 253 ABC abc; | |
| 254 | |
| 255 // Pointers to windows font data used to render this run. | |
| 256 HFONT hfont_; | |
| 257 SCRIPT_CACHE* script_cache_; | |
| 258 | |
| 259 // Ascent offset between the ascent of the primary font | |
| 260 // and that of the fallback font. The offset needs to be applied, | |
| 261 // when drawing a string, to align multiple runs rendered with | |
| 262 // different fonts. | |
| 263 int ascent_offset_; | |
| 264 }; | |
| 265 | |
| 266 // Computes the runs_ array from the text run. | |
| 267 void FillRuns(); | |
| 268 | |
| 269 // Computes the shapes_ array given an runs_ array already filled in. | |
| 270 void FillShapes(); | |
| 271 | |
| 272 // Fills in the screen_order_ array (see below). | |
| 273 void FillScreenOrder(); | |
| 274 | |
| 275 // Called to update the glyph positions based on the current spacing options | |
| 276 // that are set. | |
| 277 void ApplySpacing(); | |
| 278 | |
| 279 // Normalizes all advances for spaces to the same width. This keeps windows | |
| 280 // from making spaces after Hindi characters larger, which is then | |
| 281 // inconsistent with our meaure of the width since WebKit doesn't include | |
| 282 // spaces in text-runs sent to uniscribe unless white-space:pre. | |
| 283 void AdjustSpaceAdvances(); | |
| 284 | |
| 285 // Returns the total width of a single item. | |
| 286 int AdvanceForItem(int item_index) const; | |
| 287 | |
| 288 // Shapes a run (pointed to by |input|) using |hfont| first. | |
| 289 // Tries a series of fonts specified retrieved with NextWinFontData | |
| 290 // and finally a font covering characters in |*input|. A string pointed | |
| 291 // by |input| comes from ScriptItemize and is supposed to contain | |
| 292 // characters belonging to a single script aside from characters | |
| 293 // common to all scripts (e.g. space). | |
| 294 bool Shape(const wchar_t* input, | |
| 295 int item_length, | |
| 296 int num_glyphs, | |
| 297 SCRIPT_ITEM& run, | |
| 298 Shaping& shaping); | |
| 299 | |
| 300 // Gets Windows font data for the next best font to try in the list | |
| 301 // of fonts. When there's no more font available, returns false | |
| 302 // without touching any of out params. Need to call ResetFontIndex | |
| 303 // to start scanning of the font list from the beginning. | |
| 304 virtual bool NextWinFontData(HFONT* hfont, | |
| 305 SCRIPT_CACHE** script_cache, | |
| 306 SCRIPT_FONTPROPERTIES** font_properties, | |
| 307 int* ascent) { | |
| 308 return false; | |
| 309 } | |
| 310 | |
| 311 // Resets the font index to the first in the list of fonts | |
| 312 // to try after the primaryFont turns out not to work. With font_index | |
| 313 // reset, NextWinFontData scans fallback fonts from the beginning. | |
| 314 virtual void ResetFontIndex() {} | |
| 315 | |
| 316 // The input data for this run of Uniscribe. See the constructor. | |
| 317 const wchar_t* input_; | |
| 318 const int input_length_; | |
| 319 const bool is_rtl_; | |
| 320 | |
| 321 // Windows font data for the primary font : | |
| 322 // In a sense, logfont_ and style_ are redundant because | |
| 323 // hfont_ contains all the information. However, invoking GetObject, | |
| 324 // everytime we need the height and the style, is rather expensive so | |
| 325 // that we cache them. Would it be better to add getter and (virtual) | |
| 326 // setter for the height and the style of the primary font, instead of | |
| 327 // logfont_? Then, a derived class ctor can set ascent_, height_ and style_ | |
| 328 // if they're known. Getters for them would have to 'infer' their values from | |
| 329 // hfont_ ONLY when they're not set. | |
| 330 HFONT hfont_; | |
| 331 SCRIPT_CACHE* script_cache_; | |
| 332 SCRIPT_FONTPROPERTIES* font_properties_; | |
| 333 int ascent_; | |
| 334 LOGFONT logfont_; | |
| 335 int style_; | |
| 336 | |
| 337 // Options, see the getters/setters above. | |
| 338 bool directional_override_; | |
| 339 bool inhibit_ligate_; | |
| 340 int letter_spacing_; | |
| 341 int space_width_; | |
| 342 int word_spacing_; | |
| 343 int justification_width_; | |
| 344 | |
| 345 // Uniscribe breaks the text into Runs. These are one length of text that is | |
| 346 // in one script and one direction. This array is in reading order. | |
| 347 StackVector<SCRIPT_ITEM, UNISCRIBE_STATE_STACK_RUNS> runs_; | |
| 348 | |
| 349 StackVector<Shaping, UNISCRIBE_STATE_STACK_RUNS> shapes_; | |
| 350 | |
| 351 // This is a mapping between reading order and screen order for the items. | |
| 352 // Uniscribe's items array are in reading order. For right-to-left text, | |
| 353 // or mixed (although WebKit's |TextRun| should really be only one | |
| 354 // direction), this makes it very difficult to compute character offsets | |
| 355 // and positions. This list is in screen order from left to right, and | |
| 356 // gives the index into the |runs_| and |shapes_| arrays of each | |
| 357 // subsequent item. | |
| 358 StackVector<int, UNISCRIBE_STATE_STACK_RUNS> screen_order_; | |
| 359 | |
| 360 DISALLOW_EVIL_CONSTRUCTORS(UniscribeState); | |
| 361 }; | |
| 362 | |
| 363 } // namespace gfx | |
| 364 | |
| 365 #endif // BASE_GFX_UNISCRIBE_H__ | |
| 366 | |
| OLD | NEW |