Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(85)

Side by Side Diff: third_party/WebKit/Source/platform/fonts/Character.cpp

Issue 1847853004: Move Character.h from platform/fonts to platform/text (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: rebase again Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2014 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "platform/fonts/Character.h"
32
33 #include "wtf/StdLibExtras.h"
34 #include "wtf/text/StringBuilder.h"
35 #include <algorithm>
36 #include <unicode/uobject.h>
37 #include <unicode/uscript.h>
38
39 #if defined(USING_SYSTEM_ICU)
40 #include "platform/fonts/CharacterPropertyDataGenerator.h"
41 #include <unicode/uniset.h>
42 #else
43 #define MUTEX_H // Prevent compile failure of utrie2.h on Windows
44 #include <utrie2.h>
45 #endif
46
47 using namespace WTF;
48 using namespace Unicode;
49
50 namespace blink {
51
52 #if defined(USING_SYSTEM_ICU)
53 static icu::UnicodeSet* createUnicodeSet(
54 const UChar32* characters, size_t charactersCount,
55 const UChar32* ranges, size_t rangesCount)
56 {
57 icu::UnicodeSet* unicodeSet = new icu::UnicodeSet();
58 for (size_t i = 0; i < charactersCount; i++)
59 unicodeSet->add(characters[i]);
60 for (size_t i = 0; i < rangesCount; i += 2)
61 unicodeSet->add(ranges[i], ranges[i + 1]);
62 unicodeSet->freeze();
63 return unicodeSet;
64 }
65
66 #define CREATE_UNICODE_SET(name) \
67 createUnicodeSet( \
68 name##Array, WTF_ARRAY_LENGTH(name##Array), \
69 name##Ranges, WTF_ARRAY_LENGTH(name##Ranges))
70
71 #define RETURN_HAS_PROPERTY(c, name) \
72 static icu::UnicodeSet* unicodeSet = nullptr; \
73 if (!unicodeSet) \
74 unicodeSet = CREATE_UNICODE_SET(name); \
75 return unicodeSet->contains(c);
76 #else
77 // Freezed trie tree, see CharacterDataGenerator.cpp.
78 extern int32_t serializedCharacterDataSize;
79 extern uint8_t serializedCharacterData[];
80
81 static UTrie2* createTrie()
82 {
83 // Create a Trie from the value array.
84 UErrorCode error = U_ZERO_ERROR;
85 UTrie2* trie = utrie2_openFromSerialized(
86 UTrie2ValueBits::UTRIE2_16_VALUE_BITS,
87 serializedCharacterData, serializedCharacterDataSize,
88 nullptr, &error);
89 ASSERT(error == U_ZERO_ERROR);
90 return trie;
91 }
92
93 static bool hasProperty(UChar32 c, CharacterProperty property)
94 {
95 static UTrie2* trie = nullptr;
96 if (!trie)
97 trie = createTrie();
98 return UTRIE2_GET16(trie, c)
99 & static_cast<CharacterPropertyType>(property);
100 }
101
102 #define RETURN_HAS_PROPERTY(c, name) \
103 return hasProperty(c, CharacterProperty::name);
104 #endif
105
106 // Takes a flattened list of closed intervals
107 template <class T, size_t size>
108 bool valueInIntervalList(const T (&intervalList)[size], const T& value)
109 {
110 const T* bound = std::upper_bound(&intervalList[0], &intervalList[size], val ue);
111 if ((bound - intervalList) % 2 == 1)
112 return true;
113 return bound > intervalList && *(bound - 1) == value;
114 }
115
116 CodePath Character::characterRangeCodePath(const UChar* characters, unsigned len )
117 {
118 static const UChar complexCodePathRanges[] = {
119 // U+02E5 through U+02E9 (Modifier Letters : Tone letters)
120 0x2E5, 0x2E9,
121 // U+0300 through U+036F Combining diacritical marks
122 0x300, 0x36F,
123 // U+0591 through U+05CF excluding U+05BE Hebrew combining marks, ...
124 0x0591, 0x05BD,
125 // ... Hebrew punctuation Paseq, Sof Pasuq and Nun Hafukha
126 0x05BF, 0x05CF,
127 // U+0600 through U+109F Arabic, Syriac, Thaana, NKo, Samaritan, Mandaic ,
128 // Devanagari, Bengali, Gurmukhi, Gujarati, Oriya, Tamil, Telugu, Kannad a,
129 // Malayalam, Sinhala, Thai, Lao, Tibetan, Myanmar
130 0x0600, 0x109F,
131 // U+1100 through U+11FF Hangul Jamo (only Ancient Korean should be left
132 // here if you precompose; Modern Korean will be precomposed as a result of step A)
133 0x1100, 0x11FF,
134 // U+135D through U+135F Ethiopic combining marks
135 0x135D, 0x135F,
136 // U+1780 through U+18AF Tagalog, Hanunoo, Buhid, Taghanwa,Khmer, Mongol ian
137 0x1700, 0x18AF,
138 // U+1900 through U+194F Limbu (Unicode 4.0)
139 0x1900, 0x194F,
140 // U+1980 through U+19DF New Tai Lue
141 0x1980, 0x19DF,
142 // U+1A00 through U+1CFF Buginese, Tai Tham, Balinese, Batak, Lepcha, Ve dic
143 0x1A00, 0x1CFF,
144 // U+1DC0 through U+1DFF Comining diacritical mark supplement
145 0x1DC0, 0x1DFF,
146 // U+20D0 through U+20FF Combining marks for symbols
147 0x20D0, 0x20FF,
148 // U+2CEF through U+2CF1 Combining marks for Coptic
149 0x2CEF, 0x2CF1,
150 // U+302A through U+302F Ideographic and Hangul Tone marks
151 0x302A, 0x302F,
152 // Combining Katakana-Hiragana Voiced/Semi-voiced Sound Mark
153 0x3099, 0x309A,
154 // U+A67C through U+A67D Combining marks for old Cyrillic
155 0xA67C, 0xA67D,
156 // U+A6F0 through U+A6F1 Combining mark for Bamum
157 0xA6F0, 0xA6F1,
158 // U+A800 through U+ABFF Nagri, Phags-pa, Saurashtra, Devanagari Extende d,
159 // Hangul Jamo Ext. A, Javanese, Myanmar Extended A, Tai Viet, Meetei Ma yek
160 0xA800, 0xABFF,
161 // U+D7B0 through U+D7FF Hangul Jamo Ext. B
162 0xD7B0, 0xD7FF,
163 // U+FE00 through U+FE0F Unicode variation selectors
164 0xFE00, 0xFE0F,
165 // U+FE20 through U+FE2F Combining half marks
166 0xFE20, 0xFE2F
167 };
168
169 CodePath result = SimplePath;
170 for (unsigned i = 0; i < len; i++) {
171 const UChar c = characters[i];
172
173 // Shortcut for common case
174 if (c < 0x2E5)
175 continue;
176
177 // Surrogate pairs
178 if (c > 0xD7FF && c <= 0xDBFF) {
179 if (i == len - 1)
180 continue;
181
182 UChar next = characters[++i];
183 if (!U16_IS_TRAIL(next))
184 continue;
185
186 UChar32 supplementaryCharacter = U16_GET_SUPPLEMENTARY(c, next);
187
188 if (supplementaryCharacter < 0x1F1E6) // U+1F1E6 through U+1F1FF Reg ional Indicator Symbols
189 continue;
190 if (supplementaryCharacter <= 0x1F1FF)
191 return ComplexPath;
192
193 // Emoji Fitzpatrick modifiers trigger upgrade to complex path for s haping them.
194 if (supplementaryCharacter < 0x1F3FB)
195 continue;
196 if (supplementaryCharacter <= 0x1F3FF)
197 return ComplexPath;
198
199 if (supplementaryCharacter == eyeCharacter)
200 return ComplexPath;
201
202 // Man and Woman Emojies,
203 // in order to support emoji joiner combinations for family and coup le pictographs.
204 // Compare http://unicode.org/reports/tr51/#Emoji_ZWJ_Sequences
205 if (supplementaryCharacter < 0x1F468)
206 continue;
207 if (supplementaryCharacter <= 0x1F469)
208 return ComplexPath;
209
210 if (supplementaryCharacter == leftSpeechBubbleCharacter)
211 return ComplexPath;
212
213 if (supplementaryCharacter < 0xE0100) // U+E0100 through U+E01EF Uni code variation selectors.
214 continue;
215 if (supplementaryCharacter <= 0xE01EF)
216 return ComplexPath;
217
218 // FIXME: Check for Brahmi (U+11000 block), Kaithi (U+11080 block) a nd other complex scripts
219 // in plane 1 or higher.
220
221 continue;
222 }
223
224 // Search for other Complex cases
225 if (valueInIntervalList(complexCodePathRanges, c))
226 return ComplexPath;
227 }
228
229 return result;
230 }
231
232 bool Character::isUprightInMixedVertical(UChar32 character)
233 {
234 RETURN_HAS_PROPERTY(character, isUprightInMixedVertical)
235 }
236
237 bool Character::isCJKIdeographOrSymbol(UChar32 c)
238 {
239 // Likely common case
240 if (c < 0x2C7)
241 return false;
242
243 RETURN_HAS_PROPERTY(c, isCJKIdeographOrSymbol)
244 }
245
246 unsigned Character::expansionOpportunityCount(const LChar* characters, size_t le ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus tify)
247 {
248 unsigned count = 0;
249 if (textJustify == TextJustifyDistribute) {
250 isAfterExpansion = true;
251 return length;
252 }
253
254 if (direction == LTR) {
255 for (size_t i = 0; i < length; ++i) {
256 if (treatAsSpace(characters[i])) {
257 count++;
258 isAfterExpansion = true;
259 } else {
260 isAfterExpansion = false;
261 }
262 }
263 } else {
264 for (size_t i = length; i > 0; --i) {
265 if (treatAsSpace(characters[i - 1])) {
266 count++;
267 isAfterExpansion = true;
268 } else {
269 isAfterExpansion = false;
270 }
271 }
272 }
273
274 return count;
275 }
276
277 unsigned Character::expansionOpportunityCount(const UChar* characters, size_t le ngth, TextDirection direction, bool& isAfterExpansion, const TextJustify textJus tify)
278 {
279 unsigned count = 0;
280 if (direction == LTR) {
281 for (size_t i = 0; i < length; ++i) {
282 UChar32 character = characters[i];
283 if (treatAsSpace(character)) {
284 count++;
285 isAfterExpansion = true;
286 continue;
287 }
288 if (U16_IS_LEAD(character) && i + 1 < length && U16_IS_TRAIL(charact ers[i + 1])) {
289 character = U16_GET_SUPPLEMENTARY(character, characters[i + 1]);
290 i++;
291 }
292 if (textJustify == TextJustify::TextJustifyAuto && isCJKIdeographOrS ymbol(character)) {
293 if (!isAfterExpansion)
294 count++;
295 count++;
296 isAfterExpansion = true;
297 continue;
298 }
299 isAfterExpansion = false;
300 }
301 } else {
302 for (size_t i = length; i > 0; --i) {
303 UChar32 character = characters[i - 1];
304 if (treatAsSpace(character)) {
305 count++;
306 isAfterExpansion = true;
307 continue;
308 }
309 if (U16_IS_TRAIL(character) && i > 1 && U16_IS_LEAD(characters[i - 2 ])) {
310 character = U16_GET_SUPPLEMENTARY(characters[i - 2], character);
311 i--;
312 }
313 if (textJustify == TextJustify::TextJustifyAuto && isCJKIdeographOrS ymbol(character)) {
314 if (!isAfterExpansion)
315 count++;
316 count++;
317 isAfterExpansion = true;
318 continue;
319 }
320 isAfterExpansion = false;
321 }
322 }
323 return count;
324 }
325
326 bool Character::canReceiveTextEmphasis(UChar32 c)
327 {
328 CharCategory category = Unicode::category(c);
329 if (category & (Separator_Space | Separator_Line | Separator_Paragraph | Oth er_NotAssigned | Other_Control | Other_Format))
330 return false;
331
332 // Additional word-separator characters listed in CSS Text Level 3 Editor's Draft 3 November 2010.
333 if (c == ethiopicWordspaceCharacter || c == aegeanWordSeparatorLineCharacter || c == aegeanWordSeparatorDotCharacter
334 || c == ugariticWordDividerCharacter || c == tibetanMarkIntersyllabicTsh egCharacter || c == tibetanMarkDelimiterTshegBstarCharacter)
335 return false;
336
337 return true;
338 }
339
340 template <typename CharacterType>
341 static inline String normalizeSpacesInternal(const CharacterType* characters, un signed length)
342 {
343 StringBuilder normalized;
344 normalized.reserveCapacity(length);
345
346 for (unsigned i = 0; i < length; ++i)
347 normalized.append(Character::normalizeSpaces(characters[i]));
348
349 return normalized.toString();
350 }
351
352 String Character::normalizeSpaces(const LChar* characters, unsigned length)
353 {
354 return normalizeSpacesInternal(characters, length);
355 }
356
357 String Character::normalizeSpaces(const UChar* characters, unsigned length)
358 {
359 return normalizeSpacesInternal(characters, length);
360 }
361
362 bool Character::isCommonOrInheritedScript(UChar32 character)
363 {
364 UErrorCode status = U_ZERO_ERROR;
365 UScriptCode script = uscript_getScript(character, &status);
366 return U_SUCCESS(status) && (script == USCRIPT_COMMON || script == USCRIPT_I NHERITED);
367 }
368
369 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/platform/fonts/Character.h ('k') | third_party/WebKit/Source/platform/fonts/CharacterEmoji.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698