Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(869)

Side by Side Diff: third_party/WebKit/Source/platform/fonts/CharacterDataGenerator.cpp

Issue 1742293002: Support src/build/linux/unbundle in CharacterDataGenerator (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Cleanup Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/WebKit/Source/platform/fonts/CharacterData.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "CharacterData.h"
6
5 #include "CharacterProperty.h" 7 #include "CharacterProperty.h"
6
7 #include <cassert> 8 #include <cassert>
8 #include <cstring> 9 #include <cstring>
9 #include <stdio.h> 10 #include <stdio.h>
10 #include <unicode/uobject.h> 11 #if !defined(USING_SYSTEM_ICU)
11 #define MUTEX_H // Prevent compile failure of utrie2.h on Windows 12 #define MUTEX_H // Prevent compile failure of utrie2.h on Windows
12 #include <utrie2.h> 13 #include <utrie2.h>
14 #endif
15
16 #if defined(USING_SYSTEM_ICU)
17 static void generate(FILE*)
18 {
19 }
20 #else
21
22 using namespace blink;
13 23
14 const UChar32 kMaxCodepoint = 0x10FFFF; 24 const UChar32 kMaxCodepoint = 0x10FFFF;
15 using CharacterProperty = blink::CharacterProperty;
16 #define ARRAY_LENGTH(a) (sizeof(a) / sizeof((a)[0])) 25 #define ARRAY_LENGTH(a) (sizeof(a) / sizeof((a)[0]))
17 26
18 static const UChar32 cjkIsolatedSymbolsArray[] = {
19 // 0x2C7 Caron, Mandarin Chinese 3rd Tone
20 0x2C7,
21 // 0x2CA Modifier Letter Acute Accent, Mandarin Chinese 2nd Tone
22 0x2CA,
23 // 0x2CB Modifier Letter Grave Access, Mandarin Chinese 4th Tone
24 0x2CB,
25 // 0x2D9 Dot Above, Mandarin Chinese 5th Tone
26 0x2D9,
27 0x2020, 0x2021, 0x2030, 0x203B, 0x203C, 0x2042, 0x2047, 0x2048, 0x2049, 0x20 51,
28 0x20DD, 0x20DE, 0x2100, 0x2103, 0x2105, 0x2109, 0x210A, 0x2113, 0x2116, 0x21 21,
29 0x212B, 0x213B, 0x2150, 0x2151, 0x2152, 0x217F, 0x2189, 0x2307, 0x2312, 0x23 CE,
30 0x2423, 0x25A0, 0x25A1, 0x25A2, 0x25AA, 0x25AB, 0x25B1, 0x25B2, 0x25B3, 0x25 B6,
31 0x25B7, 0x25BC, 0x25BD, 0x25C0, 0x25C1, 0x25C6, 0x25C7, 0x25C9, 0x25CB, 0x25 CC,
32 0x25EF, 0x2605, 0x2606, 0x260E, 0x2616, 0x2617, 0x2640, 0x2642, 0x26A0, 0x26 BD,
33 0x26BE, 0x2713, 0x271A, 0x273F, 0x2740, 0x2756, 0x2B1A, 0xFE10, 0xFE11, 0xFE 12,
34 0xFE19, 0xFF1D,
35 // Emoji.
36 0x1F100
37 };
38
39 static const UChar32 cjkIdeographRanges[] = {
40 // CJK Radicals Supplement and Kangxi Radicals.
41 0x2E80, 0x2FDF,
42 // CJK Strokes.
43 0x31C0, 0x31EF,
44 // CJK Unified Ideographs Extension A.
45 0x3400, 0x4DBF,
46 // The basic CJK Unified Ideographs block.
47 0x4E00, 0x9FFF,
48 // CJK Compatibility Ideographs.
49 0xF900, 0xFAFF,
50 // CJK Unified Ideographs Extension B.
51 0x20000, 0x2A6DF,
52 // CJK Unified Ideographs Extension C.
53 // CJK Unified Ideographs Extension D.
54 0x2A700, 0x2B81F,
55 // CJK Compatibility Ideographs Supplement.
56 0x2F800, 0x2FA1F
57 };
58
59 static const UChar32 cjkSymbolRanges[] = {
60 0x2156, 0x215A,
61 0x2160, 0x216B,
62 0x2170, 0x217B,
63 0x23BE, 0x23CC,
64 0x2460, 0x2492,
65 0x249C, 0x24FF,
66 0x25CE, 0x25D3,
67 0x25E2, 0x25E6,
68 0x2600, 0x2603,
69 0x2660, 0x266F,
70 // Emoji HEAVY HEART EXCLAMATION MARK ORNAMENT..HEAVY BLACK HEART
71 // Needed in order not to break Emoji heart-kiss sequences in
72 // CachingWordShapeIterator.
73 // cmp. http://www.unicode.org/emoji/charts/emoji-zwj-sequences.html
74 0x2763, 0x2764,
75 0x2672, 0x267D,
76 0x2776, 0x277F,
77 // Ideographic Description Characters, with CJK Symbols and Punctuation,
78 // excluding 0x3030.
79 // Then Hiragana 0x3040 .. 0x309F, Katakana 0x30A0 .. 0x30FF, Bopomofo
80 // 0x3100 .. 0x312F
81 0x2FF0, 0x302F,
82 0x3031, 0x312F,
83 // More Bopomofo and Bopomofo Extended 0x31A0 .. 0x31BF
84 0x3190, 0x31BF,
85 // Enclosed CJK Letters and Months (0x3200 .. 0x32FF).
86 // CJK Compatibility (0x3300 .. 0x33FF).
87 0x3200, 0x33FF,
88 0xF860, 0xF862,
89 // CJK Compatibility Forms.
90 0xFE30, 0xFE4F,
91 // Halfwidth and Fullwidth Forms
92 // Usually only used in CJK
93 0xFF00, 0xFF0C,
94 0xFF0E, 0xFF1A,
95 0xFF1F, 0xFFEF,
96 // Emoji.
97 0x1F110, 0x1F129,
98 0x1F130, 0x1F149,
99 0x1F150, 0x1F169,
100 0x1F170, 0x1F189,
101 0x1F200, 0x1F6FF
102 };
103
104 // Individual codepoints needed for Unicode vertical text layout according to
105 // http://www.unicode.org/reports/tr50/
106 // Taken from the corresponding data file:
107 // http://www.unicode.org/Public/vertical/revision-13/VerticalOrientation-13.txt
108 static const UChar32 isUprightInMixedVerticalArray[] = {
109 0x000A7,
110 0x000A9,
111 0x000AE,
112 0x000B1,
113 0x000D7,
114 0x000F7
115 };
116
117 static const UChar32 isUprightInMixedVerticalRanges[] = {
118 0x000BC, 0x000BE,
119 // Spacing Modifier Letters (Part of)
120 0x002EA, 0x002EB,
121 // Hangul Jamo
122 0x01100, 0x011FF,
123 // Unified Canadian Aboriginal Syllabics
124 0x01401, 0x0167F,
125 // Unified Canadian Aboriginal Syllabics Extended
126 0x018B0, 0x018FF,
127 // General Punctuation (Part of)
128 0x02016, 0x02016,
129 0x02020, 0x02021,
130 0x02030, 0x02031,
131 0x0203B, 0x0203C,
132 0x02042, 0x02042,
133 0x02047, 0x02049,
134 0x02051, 0x02051,
135 0x02065, 0x02069,
136 // Combining Diacritical Marks for Symbols (Part of)
137 0x020DD, 0x020E0,
138 0x020E2, 0x020E4,
139 // Letterlike Symbols (Part of)/Number Forms
140 0x02100, 0x02101,
141 0x02103, 0x02109,
142 0x0210F, 0x0210F,
143 0x02113, 0x02114,
144 0x02116, 0x02117,
145 0x0211E, 0x02123,
146 0x02125, 0x02125,
147 0x02127, 0x02127,
148 0x02129, 0x02129,
149 0x0212E, 0x0212E,
150 0x02135, 0x0213F,
151 0x02145, 0x0214A,
152 0x0214C, 0x0214D,
153 0x0214F, 0x0218F,
154 // Mathematical Operators (Part of)
155 0x0221E, 0x0221E,
156 0x02234, 0x02235,
157 // Miscellaneous Technical (Part of)
158 0x02300, 0x02307,
159 0x0230C, 0x0231F,
160 0x02324, 0x0232B,
161 0x0237D, 0x0239A,
162 0x023BE, 0x023CD,
163 0x023CF, 0x023CF,
164 0x023D1, 0x023DB,
165 0x023E2, 0x02422,
166 // Control Pictures (Part of)/Optical Character Recognition/Enclosed
167 // Alphanumerics
168 0x02424, 0x024FF,
169 // Geometric Shapes/Miscellaneous Symbols (Part of)
170 0x025A0, 0x02619,
171 0x02620, 0x02767,
172 0x02776, 0x02793,
173 // Miscellaneous Symbols and Arrows (Part of)
174 0x02B12, 0x02B2F,
175 0x02B50, 0x02B59,
176 0x02BB8, 0x02BFF,
177 // Common CJK
178 0x02E80, 0x0A4CF,
179 // Hangul Jamo Extended-A
180 0x0A960, 0x0A97F,
181 // Hangul Syllables/Hangul Jamo Extended-B
182 0x0AC00, 0x0D7FF,
183 // Private Use Area/CJK Compatibility Ideographs
184 0x0E000, 0x0FAFF,
185 // Vertical Forms
186 0x0FE10, 0x0FE1F,
187 // CJK Compatibility Forms (Part of)
188 0x0FE30, 0x0FE48,
189 // Small Form Variants (Part of)
190 0x0FE50, 0x0FE57,
191 0x0FE59, 0x0FE62,
192 0x0FE67, 0x0FE6F,
193 // Halfwidth and Fullwidth Forms
194 0x0FF01, 0x0FF0C,
195 0x0FF0E, 0x0FF1B,
196 0x0FF1F, 0x0FF60,
197 0x0FFE0, 0x0FFE7,
198 // Specials (Part of)
199 0x0FFF0, 0x0FFF8,
200 0x0FFFC, 0x0FFFD,
201 // Meroitic Hieroglyphs
202 0x10980, 0x1099F,
203 // Siddham
204 0x11580, 0x115FF,
205 // Egyptian Hieroglyphs
206 0x13000, 0x1342F,
207 // Kana Supplement
208 0x1B000, 0x1B0FF,
209 // Byzantine Musical Symbols/Musical Symbols
210 0x1D000, 0x1D1FF,
211 // Tai Xuan Jing Symbols/Counting Rod Numerals
212 0x1D300, 0x1D37F,
213 // Mahjong Tiles/Domino Tiles/Playing Cards/Enclosed Alphanumeric Supplement
214 // Enclosed Ideographic Supplement/Enclosed Ideographic Supplement
215 // Emoticons/Ornamental Dingbats/Transport and Map Symbols/Alchemical
216 // Symbols Alchemical Symbols
217 0x1F000, 0x1F7FF,
218 // CJK Unified Ideographs Extension B/C/D
219 // CJK Compatibility Ideographs Supplement
220 0x20000, 0x2FFFD,
221 0x30000, 0x3FFFD,
222 // Supplementary Private Use Area-A
223 0xF0000, 0xFFFFD,
224 // Supplementary Private Use Area-B
225 0x100000, 0x10FFFD,
226 };
227
228 static void setRanges(CharacterProperty* values, 27 static void setRanges(CharacterProperty* values,
229 const UChar32* ranges, size_t length, 28 const UChar32* ranges, size_t length,
230 CharacterProperty value) 29 CharacterProperty value)
231 { 30 {
232 assert(length % 2 == 0); 31 assert(length % 2 == 0);
233 const UChar32* end = ranges + length; 32 const UChar32* end = ranges + length;
234 for (; ranges != end; ranges += 2) { 33 for (; ranges != end; ranges += 2) {
235 assert(ranges[0] <= ranges[1] 34 assert(ranges[0] <= ranges[1]
236 && ranges[1] <= kMaxCodepoint); 35 && ranges[1] <= kMaxCodepoint);
237 for (UChar32 c = ranges[0]; c <= ranges[1]; c++) 36 for (UChar32 c = ranges[0]; c <= ranges[1]; c++)
238 values[c] |= value; 37 values[c] |= value;
239 } 38 }
240 } 39 }
241 40
242 static void setValues(CharacterProperty* values, 41 static void setValues(CharacterProperty* values,
243 const UChar32* begin, size_t length, 42 const UChar32* begin, size_t length,
244 CharacterProperty value) 43 CharacterProperty value)
245 { 44 {
246 const UChar32* end = begin + length; 45 const UChar32* end = begin + length;
247 for (; begin != end; begin++) { 46 for (; begin != end; begin++) {
248 assert(*begin <= kMaxCodepoint); 47 assert(*begin <= kMaxCodepoint);
249 values[*begin] |= value; 48 values[*begin] |= value;
250 } 49 }
251 } 50 }
252 51
253 static void generate(FILE* fp, int32_t size, uint8_t* array) 52 static void generateUTrieSerialized(FILE* fp, int32_t size, uint8_t* array)
254 { 53 {
255 fprintf(fp, 54 fprintf(fp,
256 "#include <cstdint>\n\n" 55 "#include <cstdint>\n\n"
257 "namespace blink {\n\n" 56 "namespace blink {\n\n"
258 "int32_t serializedCharacterDataSize = %d;\n" 57 "int32_t serializedCharacterDataSize = %d;\n"
259 "uint8_t serializedCharacterData[] = {", size); 58 "uint8_t serializedCharacterData[] = {", size);
260 for (int32_t i = 0; i < size; ) { 59 for (int32_t i = 0; i < size; ) {
261 fprintf(fp, "\n "); 60 fprintf(fp, "\n ");
262 for (int col = 0; col < 16 && i < size; col++, i++) 61 for (int col = 0; col < 16 && i < size; col++, i++)
263 fprintf(fp, " 0x%02X,", array[i]); 62 fprintf(fp, " 0x%02X,", array[i]);
264 } 63 }
265 fprintf(fp, 64 fprintf(fp,
266 "\n};\n\n" 65 "\n};\n\n"
267 "} // namespace blink\n"); 66 "} // namespace blink\n");
268 } 67 }
269 68
270 int main(int argc, char** argv) 69 static void generate(FILE* fp)
271 { 70 {
272 // Create a value array of all possible code points. 71 // Create a value array of all possible code points.
273 const UChar32 size = kMaxCodepoint + 1; 72 const UChar32 size = kMaxCodepoint + 1;
274 CharacterProperty* values = new CharacterProperty[size]; 73 CharacterProperty* values = new CharacterProperty[size];
275 memset(values, 0, sizeof(CharacterProperty) * size); 74 memset(values, 0, sizeof(CharacterProperty) * size);
276 75
277 setRanges(values, 76 #define SET(name) \
278 cjkIdeographRanges, ARRAY_LENGTH(cjkIdeographRanges), 77 setRanges(values, name##Ranges, ARRAY_LENGTH(name##Ranges), \
279 CharacterProperty::isCJKIdeographOrSymbol); 78 CharacterProperty::name); \
280 setRanges(values, 79 setValues(values, name##Array, ARRAY_LENGTH(name##Array), \
281 cjkSymbolRanges, ARRAY_LENGTH(cjkSymbolRanges), 80 CharacterProperty::name);
282 CharacterProperty::isCJKIdeographOrSymbol);
283 setValues(values,
284 cjkIsolatedSymbolsArray, ARRAY_LENGTH(cjkIsolatedSymbolsArray),
285 CharacterProperty::isCJKIdeographOrSymbol);
286 81
287 setRanges(values, 82 SET(isCJKIdeographOrSymbol);
288 isUprightInMixedVerticalRanges, 83 SET(isUprightInMixedVertical);
289 ARRAY_LENGTH(isUprightInMixedVerticalRanges),
290 CharacterProperty::isUprightInMixedVertical);
291 setValues(values,
292 isUprightInMixedVerticalArray,
293 ARRAY_LENGTH(isUprightInMixedVerticalArray),
294 CharacterProperty::isUprightInMixedVertical);
295 84
296 // Create a trie from the value array. 85 // Create a trie from the value array.
297 UErrorCode error = U_ZERO_ERROR; 86 UErrorCode error = U_ZERO_ERROR;
298 UTrie2* trie = utrie2_open(0, 0, &error); 87 UTrie2* trie = utrie2_open(0, 0, &error);
299 assert(error == U_ZERO_ERROR); 88 assert(error == U_ZERO_ERROR);
300 UChar32 start = 0; 89 UChar32 start = 0;
301 CharacterProperty value = values[0]; 90 CharacterProperty value = values[0];
302 for (UChar32 c = 1;; c++) { 91 for (UChar32 c = 1;; c++) {
303 if (c < size && values[c] == value) 92 if (c < size && values[c] == value)
304 continue; 93 continue;
(...skipping 10 matching lines...) Expand all
315 104
316 // Freeze and serialize the trie to a byte array. 105 // Freeze and serialize the trie to a byte array.
317 utrie2_freeze(trie, UTrie2ValueBits::UTRIE2_16_VALUE_BITS, &error); 106 utrie2_freeze(trie, UTrie2ValueBits::UTRIE2_16_VALUE_BITS, &error);
318 assert(error == U_ZERO_ERROR); 107 assert(error == U_ZERO_ERROR);
319 int32_t serializedSize = utrie2_serialize(trie, nullptr, 0, &error); 108 int32_t serializedSize = utrie2_serialize(trie, nullptr, 0, &error);
320 error = U_ZERO_ERROR; 109 error = U_ZERO_ERROR;
321 uint8_t* serialized = new uint8_t[serializedSize]; 110 uint8_t* serialized = new uint8_t[serializedSize];
322 serializedSize = utrie2_serialize(trie, serialized, serializedSize, &error); 111 serializedSize = utrie2_serialize(trie, serialized, serializedSize, &error);
323 assert(error == U_ZERO_ERROR); 112 assert(error == U_ZERO_ERROR);
324 113
114 generateUTrieSerialized(fp, serializedSize, serialized);
115
116 utrie2_close(trie);
117 }
118 #endif
119
120 int main(int argc, char** argv)
121 {
122
325 // Write the serialized array to the source file. 123 // Write the serialized array to the source file.
326 if (argc <= 1) { 124 if (argc <= 1) {
327 generate(stdout, serializedSize, serialized); 125 generate(stdout);
328 } else { 126 } else {
329 FILE* fp = fopen(argv[1], "wb"); 127 FILE* fp = fopen(argv[1], "wb");
330 generate(fp, serializedSize, serialized); 128 generate(fp);
331 fclose(fp); 129 fclose(fp);
332 } 130 }
333 131
334 utrie2_close(trie);
335
336 return 0; 132 return 0;
337 } 133 }
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/platform/fonts/CharacterData.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698