Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: third_party/ots/src/cmap.cc

Issue 658573004: Updating to new OTS repo from https://github.com/khaledhosny/ots.git (Closed) Base URL: https://chromium.googlesource.com/external/ots@master
Patch Set: Removing Emoji files from Build.gn Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/ots/src/cmap.h ('k') | third_party/ots/src/cvt.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "cmap.h"
6
7 #include <algorithm>
8 #include <set>
9 #include <utility>
10 #include <vector>
11
12 #include "maxp.h"
13 #include "os2.h"
14
15 // cmap - Character To Glyph Index Mapping Table
16 // http://www.microsoft.com/typography/otspec/cmap.htm
17
18 #define TABLE_NAME "cmap"
19
20 namespace {
21
22 struct CMAPSubtableHeader {
23 uint16_t platform;
24 uint16_t encoding;
25 uint32_t offset;
26 uint16_t format;
27 uint32_t length;
28 uint32_t language;
29 };
30
31 struct Subtable314Range {
32 uint16_t start_range;
33 uint16_t end_range;
34 int16_t id_delta;
35 uint16_t id_range_offset;
36 uint32_t id_range_offset_offset;
37 };
38
39 // The maximum number of groups in format 12, 13 or 14 subtables.
40 // Note: 0xFFFF is the maximum number of glyphs in a single font file.
41 const unsigned kMaxCMAPGroups = 0xFFFF;
42
43 // Glyph array size for the Mac Roman (format 0) table.
44 const size_t kFormat0ArraySize = 256;
45
46 // The upper limit of the Unicode code point.
47 const uint32_t kUnicodeUpperLimit = 0x10FFFF;
48
49 // The maximum number of UVS records (See below).
50 const uint32_t kMaxCMAPSelectorRecords = 259;
51 // The range of UVSes are:
52 // 0x180B-0x180D (3 code points)
53 // 0xFE00-0xFE0F (16 code points)
54 // 0xE0100-0xE01EF (240 code points)
55 const uint32_t kMongolianVSStart = 0x180B;
56 const uint32_t kMongolianVSEnd = 0x180D;
57 const uint32_t kVSStart = 0xFE00;
58 const uint32_t kVSEnd = 0xFE0F;
59 const uint32_t kIVSStart = 0xE0100;
60 const uint32_t kIVSEnd = 0xE01EF;
61 const uint32_t kUVSUpperLimit = 0xFFFFFF;
62
63 // Parses Format 4 tables
64 bool ParseFormat4(ots::OpenTypeFile *file, int platform, int encoding,
65 const uint8_t *data, size_t length, uint16_t num_glyphs) {
66 ots::Buffer subtable(data, length);
67
68 // 0.3.4, 3.0.4 or 3.1.4 subtables are complex and, rather than expanding the
69 // whole thing and recompacting it, we validate it and include it verbatim
70 // in the output.
71
72 if (!file->os2) {
73 return OTS_FAILURE_MSG("Required OS/2 table missing");
74 }
75
76 if (!subtable.Skip(4)) {
77 return OTS_FAILURE_MSG("Can't read 4 bytes at start of cmap format 4 subtabl e");
78 }
79 uint16_t language = 0;
80 if (!subtable.ReadU16(&language)) {
81 return OTS_FAILURE_MSG("Can't read language");
82 }
83 if (language) {
84 // Platform ID 3 (windows) subtables should have language '0'.
85 return OTS_FAILURE_MSG("Languages should be 0 (%d)", language);
86 }
87
88 uint16_t segcountx2, search_range, entry_selector, range_shift;
89 segcountx2 = search_range = entry_selector = range_shift = 0;
90 if (!subtable.ReadU16(&segcountx2) ||
91 !subtable.ReadU16(&search_range) ||
92 !subtable.ReadU16(&entry_selector) ||
93 !subtable.ReadU16(&range_shift)) {
94 return OTS_FAILURE_MSG("Failed to read subcmap structure");
95 }
96
97 if (segcountx2 & 1 || search_range & 1) {
98 return OTS_FAILURE_MSG("Bad subcmap structure");
99 }
100 const uint16_t segcount = segcountx2 >> 1;
101 // There must be at least one segment according the spec.
102 if (segcount < 1) {
103 return OTS_FAILURE_MSG("Segcount < 1 (%d)", segcount);
104 }
105
106 // log2segcount is the maximal x s.t. 2^x < segcount
107 unsigned log2segcount = 0;
108 while (1u << (log2segcount + 1) <= segcount) {
109 log2segcount++;
110 }
111
112 const uint16_t expected_search_range = 2 * 1u << log2segcount;
113 if (expected_search_range != search_range) {
114 return OTS_FAILURE_MSG("expected search range != search range (%d != %d)", e xpected_search_range, search_range);
115 }
116
117 if (entry_selector != log2segcount) {
118 return OTS_FAILURE_MSG("entry selector != log2(segement count) (%d != %d)", entry_selector, log2segcount);
119 }
120
121 const uint16_t expected_range_shift = segcountx2 - search_range;
122 if (range_shift != expected_range_shift) {
123 return OTS_FAILURE_MSG("unexpected range shift (%d != %d)", range_shift, exp ected_range_shift);
124 }
125
126 std::vector<Subtable314Range> ranges(segcount);
127
128 for (unsigned i = 0; i < segcount; ++i) {
129 if (!subtable.ReadU16(&ranges[i].end_range)) {
130 return OTS_FAILURE_MSG("Failed to read segment %d", i);
131 }
132 }
133
134 uint16_t padding;
135 if (!subtable.ReadU16(&padding)) {
136 return OTS_FAILURE_MSG("Failed to read cmap subtable segment padding");
137 }
138 if (padding) {
139 return OTS_FAILURE_MSG("Non zero cmap subtable segment padding (%d)", paddin g);
140 }
141
142 for (unsigned i = 0; i < segcount; ++i) {
143 if (!subtable.ReadU16(&ranges[i].start_range)) {
144 return OTS_FAILURE_MSG("Failed to read segment start range %d", i);
145 }
146 }
147 for (unsigned i = 0; i < segcount; ++i) {
148 if (!subtable.ReadS16(&ranges[i].id_delta)) {
149 return OTS_FAILURE_MSG("Failed to read segment delta %d", i);
150 }
151 }
152 for (unsigned i = 0; i < segcount; ++i) {
153 ranges[i].id_range_offset_offset = subtable.offset();
154 if (!subtable.ReadU16(&ranges[i].id_range_offset)) {
155 return OTS_FAILURE_MSG("Failed to read segment range offset %d", i);
156 }
157
158 if (ranges[i].id_range_offset & 1) {
159 // Some font generators seem to put 65535 on id_range_offset
160 // for 0xFFFF-0xFFFF range.
161 // (e.g., many fonts in http://www.princexml.com/fonts/)
162 if (i == segcount - 1u) {
163 OTS_WARNING("bad id_range_offset");
164 ranges[i].id_range_offset = 0;
165 // The id_range_offset value in the transcoded font will not change
166 // since this table is not actually "transcoded" yet.
167 } else {
168 return OTS_FAILURE_MSG("Bad segment offset (%d)", ranges[i].id_range_off set);
169 }
170 }
171 }
172
173 // ranges must be ascending order, based on the end_code. Ranges may not
174 // overlap.
175 for (unsigned i = 1; i < segcount; ++i) {
176 if ((i == segcount - 1u) &&
177 (ranges[i - 1].start_range == 0xffff) &&
178 (ranges[i - 1].end_range == 0xffff) &&
179 (ranges[i].start_range == 0xffff) &&
180 (ranges[i].end_range == 0xffff)) {
181 // Some fonts (e.g., Germania.ttf) have multiple 0xffff terminators.
182 // We'll accept them as an exception.
183 OTS_WARNING("multiple 0xffff terminators found");
184 continue;
185 }
186
187 // Note: some Linux fonts (e.g., LucidaSansOblique.ttf, bsmi00lp.ttf) have
188 // unsorted table...
189 if (ranges[i].end_range <= ranges[i - 1].end_range) {
190 return OTS_FAILURE_MSG("Out of order end range (%d <= %d)", ranges[i].end_ range, ranges[i-1].end_range);
191 }
192 if (ranges[i].start_range <= ranges[i - 1].end_range) {
193 return OTS_FAILURE_MSG("out of order start range (%d <= %d)", ranges[i].st art_range, ranges[i-1].end_range);
194 }
195
196 // On many fonts, the value of {first, last}_char_index are incorrect.
197 // Fix them.
198 if (file->os2->first_char_index != 0xFFFF &&
199 ranges[i].start_range != 0xFFFF &&
200 file->os2->first_char_index > ranges[i].start_range) {
201 file->os2->first_char_index = ranges[i].start_range;
202 }
203 if (file->os2->last_char_index != 0xFFFF &&
204 ranges[i].end_range != 0xFFFF &&
205 file->os2->last_char_index < ranges[i].end_range) {
206 file->os2->last_char_index = ranges[i].end_range;
207 }
208 }
209
210 // The last range must end at 0xffff
211 if (ranges[segcount - 1].start_range != 0xffff || ranges[segcount - 1].end_ran ge != 0xffff) {
212 return OTS_FAILURE_MSG("Final segment start and end must be 0xFFFF (0x%04X-0 x%04X)",
213 ranges[segcount - 1].start_range, ranges[segcount - 1 ].end_range);
214 }
215
216 // A format 4 CMAP subtable is complex. To be safe we simulate a lookup of
217 // each code-point defined in the table and make sure that they are all valid
218 // glyphs and that we don't access anything out-of-bounds.
219 for (unsigned i = 0; i < segcount; ++i) {
220 for (unsigned cp = ranges[i].start_range; cp <= ranges[i].end_range; ++cp) {
221 const uint16_t code_point = static_cast<uint16_t>(cp);
222 if (ranges[i].id_range_offset == 0) {
223 // this is explictly allowed to overflow in the spec
224 const uint16_t glyph = code_point + ranges[i].id_delta;
225 if (glyph >= num_glyphs) {
226 return OTS_FAILURE_MSG("Range glyph reference too high (%d > %d)", gly ph, num_glyphs - 1);
227 }
228 } else {
229 const uint16_t range_delta = code_point - ranges[i].start_range;
230 // this might seem odd, but it's true. The offset is relative to the
231 // location of the offset value itself.
232 const uint32_t glyph_id_offset = ranges[i].id_range_offset_offset +
233 ranges[i].id_range_offset +
234 range_delta * 2;
235 // We need to be able to access a 16-bit value from this offset
236 if (glyph_id_offset + 1 >= length) {
237 return OTS_FAILURE_MSG("bad glyph id offset (%d > %ld)", glyph_id_offs et, length);
238 }
239 uint16_t glyph;
240 std::memcpy(&glyph, data + glyph_id_offset, 2);
241 glyph = ntohs(glyph);
242 if (glyph >= num_glyphs) {
243 return OTS_FAILURE_MSG("Range glyph reference too high (%d > %d)", gly ph, num_glyphs - 1);
244 }
245 }
246 }
247 }
248
249 // We accept the table.
250 // TODO(yusukes): transcode the subtable.
251 if (platform == 3 && encoding == 0) {
252 file->cmap->subtable_3_0_4_data = data;
253 file->cmap->subtable_3_0_4_length = length;
254 } else if (platform == 3 && encoding == 1) {
255 file->cmap->subtable_3_1_4_data = data;
256 file->cmap->subtable_3_1_4_length = length;
257 } else if (platform == 0 && encoding == 3) {
258 file->cmap->subtable_0_3_4_data = data;
259 file->cmap->subtable_0_3_4_length = length;
260 } else {
261 return OTS_FAILURE_MSG("Unknown cmap subtable type (platform=%d, encoding=%d )", platform, encoding);
262 }
263
264 return true;
265 }
266
267 bool Parse31012(ots::OpenTypeFile *file,
268 const uint8_t *data, size_t length, uint16_t num_glyphs) {
269 ots::Buffer subtable(data, length);
270
271 // Format 12 tables are simple. We parse these and fully serialise them
272 // later.
273
274 if (!subtable.Skip(8)) {
275 return OTS_FAILURE_MSG("failed to skip the first 8 bytes of format 12 subtab le");
276 }
277 uint32_t language = 0;
278 if (!subtable.ReadU32(&language)) {
279 return OTS_FAILURE_MSG("can't read format 12 subtable language");
280 }
281 if (language) {
282 return OTS_FAILURE_MSG("format 12 subtable language should be zero (%d)", la nguage);
283 }
284
285 uint32_t num_groups = 0;
286 if (!subtable.ReadU32(&num_groups)) {
287 return OTS_FAILURE_MSG("can't read number of format 12 subtable groups");
288 }
289 if (num_groups == 0 || num_groups > kMaxCMAPGroups) {
290 return OTS_FAILURE_MSG("bad format 12 subtable group count %d", num_groups);
291 }
292
293 std::vector<ots::OpenTypeCMAPSubtableRange> &groups
294 = file->cmap->subtable_3_10_12;
295 groups.resize(num_groups);
296
297 for (unsigned i = 0; i < num_groups; ++i) {
298 if (!subtable.ReadU32(&groups[i].start_range) ||
299 !subtable.ReadU32(&groups[i].end_range) ||
300 !subtable.ReadU32(&groups[i].start_glyph_id)) {
301 return OTS_FAILURE_MSG("can't read format 12 subtable group");
302 }
303
304 if (groups[i].start_range > kUnicodeUpperLimit ||
305 groups[i].end_range > kUnicodeUpperLimit ||
306 groups[i].start_glyph_id > 0xFFFF) {
307 return OTS_FAILURE_MSG("bad format 12 subtable group (startCharCode=0x%4X, endCharCode=0x%4X, startGlyphID=%d)",
308 groups[i].start_range, groups[i].end_range, groups[ i].start_glyph_id);
309 }
310
311 // [0xD800, 0xDFFF] are surrogate code points.
312 if (groups[i].start_range >= 0xD800 &&
313 groups[i].start_range <= 0xDFFF) {
314 return OTS_FAILURE_MSG("format 12 subtable out of range group startCharCod e (0x%4X)", groups[i].start_range);
315 }
316 if (groups[i].end_range >= 0xD800 &&
317 groups[i].end_range <= 0xDFFF) {
318 return OTS_FAILURE_MSG("format 12 subtable out of range group endCharCode (0x%4X)", groups[i].end_range);
319 }
320 if (groups[i].start_range < 0xD800 &&
321 groups[i].end_range > 0xDFFF) {
322 return OTS_FAILURE_MSG("bad format 12 subtable group startCharCode (0x%4X) or endCharCode (0x%4X)",
323 groups[i].start_range, groups[i].end_range);
324 }
325
326 // We assert that the glyph value is within range. Because of the range
327 // limits, above, we don't need to worry about overflow.
328 if (groups[i].end_range < groups[i].start_range) {
329 return OTS_FAILURE_MSG("format 12 subtable group endCharCode before startC harCode (0x%4X < 0x%4X)",
330 groups[i].end_range, groups[i].start_range);
331 }
332 if ((groups[i].end_range - groups[i].start_range) +
333 groups[i].start_glyph_id > num_glyphs) {
334 return OTS_FAILURE_MSG("bad format 12 subtable group startGlyphID (%d)", g roups[i].start_glyph_id);
335 }
336 }
337
338 // the groups must be sorted by start code and may not overlap
339 for (unsigned i = 1; i < num_groups; ++i) {
340 if (groups[i].start_range <= groups[i - 1].start_range) {
341 return OTS_FAILURE_MSG("out of order format 12 subtable group (startCharCo de=0x%4X <= startCharCode=0x%4X of previous group)",
342 groups[i].start_range, groups[i-1].start_range);
343 }
344 if (groups[i].start_range <= groups[i - 1].end_range) {
345 return OTS_FAILURE_MSG("overlapping format 12 subtable groups (startCharCo de=0x%4X <= endCharCode=0x%4X of previous group)",
346 groups[i].start_range, groups[i-1].end_range);
347 }
348 }
349
350 return true;
351 }
352
353 bool Parse31013(ots::OpenTypeFile *file,
354 const uint8_t *data, size_t length, uint16_t num_glyphs) {
355 ots::Buffer subtable(data, length);
356
357 // Format 13 tables are simple. We parse these and fully serialise them
358 // later.
359
360 if (!subtable.Skip(8)) {
361 return OTS_FAILURE_MSG("Bad cmap subtable length");
362 }
363 uint32_t language = 0;
364 if (!subtable.ReadU32(&language)) {
365 return OTS_FAILURE_MSG("Can't read cmap subtable language");
366 }
367 if (language) {
368 return OTS_FAILURE_MSG("Cmap subtable language should be zero but is %d", la nguage);
369 }
370
371 uint32_t num_groups = 0;
372 if (!subtable.ReadU32(&num_groups)) {
373 return OTS_FAILURE_MSG("Can't read number of groups in a cmap subtable");
374 }
375
376 // We limit the number of groups in the same way as in 3.10.12 tables. See
377 // the comment there in
378 if (num_groups == 0 || num_groups > kMaxCMAPGroups) {
379 return OTS_FAILURE_MSG("Bad number of groups (%d) in a cmap subtable", num_g roups);
380 }
381
382 std::vector<ots::OpenTypeCMAPSubtableRange> &groups
383 = file->cmap->subtable_3_10_13;
384 groups.resize(num_groups);
385
386 for (unsigned i = 0; i < num_groups; ++i) {
387 if (!subtable.ReadU32(&groups[i].start_range) ||
388 !subtable.ReadU32(&groups[i].end_range) ||
389 !subtable.ReadU32(&groups[i].start_glyph_id)) {
390 return OTS_FAILURE_MSG("Can't read subrange structure in a cmap subtable") ;
391 }
392
393 // We conservatively limit all of the values to protect some parsers from
394 // overflows
395 if (groups[i].start_range > kUnicodeUpperLimit ||
396 groups[i].end_range > kUnicodeUpperLimit ||
397 groups[i].start_glyph_id > 0xFFFF) {
398 return OTS_FAILURE_MSG("Bad subrange with start_range=%d, end_range=%d, st art_glyph_id=%d", groups[i].start_range, groups[i].end_range, groups[i].start_gl yph_id);
399 }
400
401 if (groups[i].start_glyph_id >= num_glyphs) {
402 return OTS_FAILURE_MSG("Subrange starting glyph id too high (%d > %d)", gr oups[i].start_glyph_id, num_glyphs);
403 }
404 }
405
406 // the groups must be sorted by start code and may not overlap
407 for (unsigned i = 1; i < num_groups; ++i) {
408 if (groups[i].start_range <= groups[i - 1].start_range) {
409 return OTS_FAILURE_MSG("Overlapping subrange starts (%d >= %d)", groups[i] . start_range, groups[i-1].start_range);
410 }
411 if (groups[i].start_range <= groups[i - 1].end_range) {
412 return OTS_FAILURE_MSG("Overlapping subranges (%d <= %d)", groups[i].start _range, groups[i-1].end_range);
413 }
414 }
415
416 return true;
417 }
418
419 bool Parse0514(ots::OpenTypeFile *file,
420 const uint8_t *data, size_t length, uint16_t num_glyphs) {
421 // Unicode Variation Selector table
422 ots::Buffer subtable(data, length);
423
424 // Format 14 tables are simple. We parse these and fully serialise them
425 // later.
426
427 // Skip format (USHORT) and length (ULONG)
428 if (!subtable.Skip(6)) {
429 return OTS_FAILURE_MSG("Can't read start of cmap subtable");
430 }
431
432 uint32_t num_records = 0;
433 if (!subtable.ReadU32(&num_records)) {
434 return OTS_FAILURE_MSG("Can't read number of records in cmap subtable");
435 }
436 if (num_records == 0 || num_records > kMaxCMAPSelectorRecords) {
437 return OTS_FAILURE_MSG("Bad number of records (%d) in cmap subtable", num_re cords);
438 }
439
440 std::vector<ots::OpenTypeCMAPSubtableVSRecord>& records
441 = file->cmap->subtable_0_5_14;
442 records.resize(num_records);
443
444 for (unsigned i = 0; i < num_records; ++i) {
445 if (!subtable.ReadU24(&records[i].var_selector) ||
446 !subtable.ReadU32(&records[i].default_offset) ||
447 !subtable.ReadU32(&records[i].non_default_offset)) {
448 return OTS_FAILURE_MSG("Can't read record structure of record %d in cmap s ubtale", i);
449 }
450 // Checks the value of variation selector
451 if (!((records[i].var_selector >= kMongolianVSStart &&
452 records[i].var_selector <= kMongolianVSEnd) ||
453 (records[i].var_selector >= kVSStart &&
454 records[i].var_selector <= kVSEnd) ||
455 (records[i].var_selector >= kIVSStart &&
456 records[i].var_selector <= kIVSEnd))) {
457 return OTS_FAILURE_MSG("Bad record variation selector (%04X) in record %i" , records[i].var_selector, i);
458 }
459 if (i > 0 &&
460 records[i-1].var_selector >= records[i].var_selector) {
461 return OTS_FAILURE_MSG("Out of order variation selector (%04X >= %04X) in record %d", records[i-1].var_selector, records[i].var_selector, i);
462 }
463
464 // Checks offsets
465 if (!records[i].default_offset && !records[i].non_default_offset) {
466 return OTS_FAILURE_MSG("No default aoffset in variation selector record %d ", i);
467 }
468 if (records[i].default_offset &&
469 records[i].default_offset >= length) {
470 return OTS_FAILURE_MSG("Default offset too high (%d >= %ld) in record %d", records[i].default_offset, length, i);
471 }
472 if (records[i].non_default_offset &&
473 records[i].non_default_offset >= length) {
474 return OTS_FAILURE_MSG("Non default offset too high (%d >= %ld) in record %d", records[i].non_default_offset, length, i);
475 }
476 }
477
478 for (unsigned i = 0; i < num_records; ++i) {
479 // Checks default UVS table
480 if (records[i].default_offset) {
481 subtable.set_offset(records[i].default_offset);
482 uint32_t num_ranges = 0;
483 if (!subtable.ReadU32(&num_ranges)) {
484 return OTS_FAILURE_MSG("Can't read number of ranges in record %d", i);
485 }
486 if (!num_ranges || num_ranges > kMaxCMAPGroups) {
487 return OTS_FAILURE_MSG("number of ranges too high (%d > %d) in record %d ", num_ranges, kMaxCMAPGroups, i);
488 }
489
490 uint32_t last_unicode_value = 0;
491 std::vector<ots::OpenTypeCMAPSubtableVSRange>& ranges
492 = records[i].ranges;
493 ranges.resize(num_ranges);
494
495 for (unsigned j = 0; j < num_ranges; ++j) {
496 if (!subtable.ReadU24(&ranges[j].unicode_value) ||
497 !subtable.ReadU8(&ranges[j].additional_count)) {
498 return OTS_FAILURE_MSG("Can't read range info in variation selector re cord %d", i);
499 }
500 const uint32_t check_value =
501 ranges[j].unicode_value + ranges[j].additional_count;
502 if (ranges[j].unicode_value == 0 ||
503 ranges[j].unicode_value > kUnicodeUpperLimit ||
504 check_value > kUVSUpperLimit ||
505 (last_unicode_value &&
506 ranges[j].unicode_value <= last_unicode_value)) {
507 return OTS_FAILURE_MSG("Bad Unicode value *%04X) in variation selector range %d record %d", ranges[j].unicode_value, j, i);
508 }
509 last_unicode_value = check_value;
510 }
511 }
512
513 // Checks non default UVS table
514 if (records[i].non_default_offset) {
515 subtable.set_offset(records[i].non_default_offset);
516 uint32_t num_mappings = 0;
517 if (!subtable.ReadU32(&num_mappings)) {
518 return OTS_FAILURE_MSG("Can't read number of mappings in variation selec tor record %d", i);
519 }
520 if (!num_mappings || num_mappings > kMaxCMAPGroups) {
521 return OTS_FAILURE_MSG("Number of mappings too high (%d) in variation se lector record %d", num_mappings, i);
522 }
523
524 uint32_t last_unicode_value = 0;
525 std::vector<ots::OpenTypeCMAPSubtableVSMapping>& mappings
526 = records[i].mappings;
527 mappings.resize(num_mappings);
528
529 for (unsigned j = 0; j < num_mappings; ++j) {
530 if (!subtable.ReadU24(&mappings[j].unicode_value) ||
531 !subtable.ReadU16(&mappings[j].glyph_id)) {
532 return OTS_FAILURE_MSG("Can't read mapping %d in variation selector re cord %d", j, i);
533 }
534 if (mappings[j].glyph_id == 0 ||
535 mappings[j].unicode_value == 0 ||
536 mappings[j].unicode_value > kUnicodeUpperLimit ||
537 (last_unicode_value &&
538 mappings[j].unicode_value <= last_unicode_value)) {
539 return OTS_FAILURE_MSG("Bad mapping (%04X -> %d) in mapping %d of vari ation selector %d", mappings[j].unicode_value, mappings[j].glyph_id, j, i);
540 }
541 last_unicode_value = mappings[j].unicode_value;
542 }
543 }
544 }
545
546 if (subtable.offset() != length) {
547 return OTS_FAILURE_MSG("Bad subtable offset (%ld != %ld)", subtable.offset() , length);
548 }
549 file->cmap->subtable_0_5_14_length = subtable.offset();
550 return true;
551 }
552
553 bool Parse100(ots::OpenTypeFile *file, const uint8_t *data, size_t length) {
554 // Mac Roman table
555 ots::Buffer subtable(data, length);
556
557 if (!subtable.Skip(4)) {
558 return OTS_FAILURE_MSG("Bad cmap subtable");
559 }
560 uint16_t language = 0;
561 if (!subtable.ReadU16(&language)) {
562 return OTS_FAILURE_MSG("Can't read language in cmap subtable");
563 }
564 if (language) {
565 // simsun.ttf has non-zero language id.
566 OTS_WARNING("language id should be zero: %u", language);
567 }
568
569 file->cmap->subtable_1_0_0.reserve(kFormat0ArraySize);
570 for (size_t i = 0; i < kFormat0ArraySize; ++i) {
571 uint8_t glyph_id = 0;
572 if (!subtable.ReadU8(&glyph_id)) {
573 return OTS_FAILURE_MSG("Can't read glyph id at array[%ld] in cmap subtable ", i);
574 }
575 file->cmap->subtable_1_0_0.push_back(glyph_id);
576 }
577
578 return true;
579 }
580
581 } // namespace
582
583 namespace ots {
584
585 bool ots_cmap_parse(OpenTypeFile *file, const uint8_t *data, size_t length) {
586 Buffer table(data, length);
587 file->cmap = new OpenTypeCMAP;
588
589 uint16_t version = 0;
590 uint16_t num_tables = 0;
591 if (!table.ReadU16(&version) ||
592 !table.ReadU16(&num_tables)) {
593 return OTS_FAILURE_MSG("Can't read structure of cmap");
594 }
595
596 if (version != 0) {
597 return OTS_FAILURE_MSG("Non zero cmap version (%d)", version);
598 }
599 if (!num_tables) {
600 return OTS_FAILURE_MSG("No subtables in cmap!");
601 }
602
603 std::vector<CMAPSubtableHeader> subtable_headers;
604
605 // read the subtable headers
606 subtable_headers.reserve(num_tables);
607 for (unsigned i = 0; i < num_tables; ++i) {
608 CMAPSubtableHeader subt;
609
610 if (!table.ReadU16(&subt.platform) ||
611 !table.ReadU16(&subt.encoding) ||
612 !table.ReadU32(&subt.offset)) {
613 return OTS_FAILURE_MSG("Can't read subtable information cmap subtable %d", i);
614 }
615
616 subtable_headers.push_back(subt);
617 }
618
619 const size_t data_offset = table.offset();
620
621 // make sure that all the offsets are valid.
622 for (unsigned i = 0; i < num_tables; ++i) {
623 if (subtable_headers[i].offset > 1024 * 1024 * 1024) {
624 return OTS_FAILURE_MSG("Bad subtable offset in cmap subtable %d", i);
625 }
626 if (subtable_headers[i].offset < data_offset ||
627 subtable_headers[i].offset >= length) {
628 return OTS_FAILURE_MSG("Bad subtable offset (%d) in cmap subtable %d", sub table_headers[i].offset, i);
629 }
630 }
631
632 // the format of the table is the first couple of bytes in the table. The
633 // length of the table is stored in a format-specific way.
634 for (unsigned i = 0; i < num_tables; ++i) {
635 table.set_offset(subtable_headers[i].offset);
636 if (!table.ReadU16(&subtable_headers[i].format)) {
637 return OTS_FAILURE_MSG("Can't read cmap subtable header format %d", i);
638 }
639
640 uint16_t len = 0;
641 uint16_t lang = 0;
642 switch (subtable_headers[i].format) {
643 case 0:
644 case 4:
645 if (!table.ReadU16(&len)) {
646 return OTS_FAILURE_MSG("Can't read cmap subtable %d length", i);
647 }
648 if (!table.ReadU16(&lang)) {
649 return OTS_FAILURE_MSG("Can't read cmap subtable %d language", i);
650 }
651 subtable_headers[i].length = len;
652 subtable_headers[i].language = lang;
653 break;
654 case 12:
655 case 13:
656 if (!table.Skip(2)) {
657 return OTS_FAILURE_MSG("Bad cmap subtable %d structure", i);
658 }
659 if (!table.ReadU32(&subtable_headers[i].length)) {
660 return OTS_FAILURE_MSG("Can read cmap subtable %d length", i);
661 }
662 if (!table.ReadU32(&subtable_headers[i].language)) {
663 return OTS_FAILURE_MSG("Can't read cmap subtable %d language", i);
664 }
665 break;
666 case 14:
667 if (!table.ReadU32(&subtable_headers[i].length)) {
668 return OTS_FAILURE_MSG("Can't read cmap subtable %d length", i);
669 }
670 subtable_headers[i].language = 0;
671 break;
672 default:
673 subtable_headers[i].length = 0;
674 subtable_headers[i].language = 0;
675 break;
676 }
677 }
678
679 // check if the table is sorted first by platform ID, then by encoding ID.
680 uint32_t last_id = 0;
681 for (unsigned i = 0; i < num_tables; ++i) {
682 uint32_t current_id
683 = (subtable_headers[i].platform << 24)
684 + (subtable_headers[i].encoding << 16)
685 + subtable_headers[i].language;
686 if ((i != 0) && (last_id >= current_id)) {
687 return OTS_FAILURE_MSG("subtable %d with platform ID %d, encoding ID %d, l anguage ID %d "
688 "following subtable with platform ID %d, encoding I D %d, language ID %d",
689 i,
690 (uint8_t)(current_id >> 24), (uint8_t)(current_id > > 16), (uint8_t)(current_id),
691 (uint8_t)(last_id >> 24), (uint8_t)(last_id >> 16), (uint8_t)(last_id));
692 }
693 last_id = current_id;
694 }
695
696 // Now, verify that all the lengths are sane
697 for (unsigned i = 0; i < num_tables; ++i) {
698 if (!subtable_headers[i].length) continue;
699 if (subtable_headers[i].length > 1024 * 1024 * 1024) {
700 return OTS_FAILURE_MSG("Bad cmap subtable %d length", i);
701 }
702 // We know that both the offset and length are < 1GB, so the following
703 // addition doesn't overflow
704 const uint32_t end_byte
705 = subtable_headers[i].offset + subtable_headers[i].length;
706 if (end_byte > length) {
707 return OTS_FAILURE_MSG("Over long cmap subtable %d @ %d for %d", i, subtab le_headers[i].offset, subtable_headers[i].length);
708 }
709 }
710
711 // check that the cmap subtables are not overlapping.
712 std::set<std::pair<uint32_t, uint32_t> > uniq_checker;
713 std::vector<std::pair<uint32_t, uint8_t> > overlap_checker;
714 for (unsigned i = 0; i < num_tables; ++i) {
715 const uint32_t end_byte
716 = subtable_headers[i].offset + subtable_headers[i].length;
717
718 if (!uniq_checker.insert(std::make_pair(subtable_headers[i].offset,
719 end_byte)).second) {
720 // Sometimes Unicode table and MS table share exactly the same data.
721 // We'll allow this.
722 continue;
723 }
724 overlap_checker.push_back(
725 std::make_pair(subtable_headers[i].offset,
726 static_cast<uint8_t>(1) /* start */));
727 overlap_checker.push_back(
728 std::make_pair(end_byte, static_cast<uint8_t>(0) /* end */));
729 }
730 std::sort(overlap_checker.begin(), overlap_checker.end());
731 int overlap_count = 0;
732 for (unsigned i = 0; i < overlap_checker.size(); ++i) {
733 overlap_count += (overlap_checker[i].second ? 1 : -1);
734 if (overlap_count > 1) {
735 return OTS_FAILURE_MSG("Excessive overlap count %d", overlap_count);
736 }
737 }
738
739 // we grab the number of glyphs in the file from the maxp table to make sure
740 // that the character map isn't referencing anything beyound this range.
741 if (!file->maxp) {
742 return OTS_FAILURE_MSG("No maxp table in font! Needed by cmap.");
743 }
744 const uint16_t num_glyphs = file->maxp->num_glyphs;
745
746 // We only support a subset of the possible character map tables. Microsoft
747 // 'strongly recommends' that everyone supports the Unicode BMP table with
748 // the UCS-4 table for non-BMP glyphs. We'll pass the following subtables:
749 // Platform ID Encoding ID Format
750 // 0 0 4 (Unicode Default)
751 // 0 1 4 (Unicode 1.1)
752 // 0 3 4 (Unicode BMP)
753 // 0 3 12 (Unicode UCS-4)
754 // 0 5 14 (Unicode Variation Sequences)
755 // 1 0 0 (Mac Roman)
756 // 3 0 4 (MS Symbol)
757 // 3 1 4 (MS Unicode BMP)
758 // 3 10 12 (MS Unicode UCS-4)
759 // 3 10 13 (MS UCS-4 Fallback mapping)
760 //
761 // Note:
762 // * 0-0-4 and 0-1-4 tables are (usually) written as a 3-1-4 table. If 3-1-4 table
763 // also exists, the 0-0-4 or 0-1-4 tables are ignored.
764 // * Unlike 0-0-4 table, 0-3-4 table is written as a 0-3-4 table.
765 // Some fonts which include 0-5-14 table seems to be required 0-3-4
766 // table. The 0-3-4 table will be wriiten even if 3-1-4 table also exists.
767 // * 0-3-12 table is written as a 3-10-12 table. If 3-10-12 table also
768 // exists, the 0-3-12 table is ignored.
769 //
770
771 for (unsigned i = 0; i < num_tables; ++i) {
772 if (subtable_headers[i].platform == 0) {
773 // Unicode platform
774
775 if ((subtable_headers[i].encoding == 0 || subtable_headers[i].encoding == 1) &&
776 (subtable_headers[i].format == 4)) {
777 // parse and output the 0-0-4 and 0-1-4 tables as 3-1-4 table. Sometimes the 0-0-4
778 // table actually points to MS symbol data and thus should be parsed as
779 // 3-0-4 table (e.g., marqueem.ttf and quixotic.ttf). This error will be
780 // recovered in ots_cmap_serialise().
781 if (!ParseFormat4(file, 3, 1, data + subtable_headers[i].offset,
782 subtable_headers[i].length, num_glyphs)) {
783 return OTS_FAILURE_MSG("Failed to parse format 4 cmap subtable %d", i) ;
784 }
785 } else if ((subtable_headers[i].encoding == 3) &&
786 (subtable_headers[i].format == 4)) {
787 // parse and output the 0-3-4 table as 0-3-4 table.
788 if (!ParseFormat4(file, 0, 3, data + subtable_headers[i].offset,
789 subtable_headers[i].length, num_glyphs)) {
790 return OTS_FAILURE_MSG("Failed to parse format 4 cmap subtable %d", i) ;
791 }
792 } else if ((subtable_headers[i].encoding == 3) &&
793 (subtable_headers[i].format == 12)) {
794 // parse and output the 0-3-12 table as 3-10-12 table.
795 if (!Parse31012(file, data + subtable_headers[i].offset,
796 subtable_headers[i].length, num_glyphs)) {
797 return OTS_FAILURE_MSG("Failed to parse format 12 cmap subtable %d", i );
798 }
799 } else if ((subtable_headers[i].encoding == 5) &&
800 (subtable_headers[i].format == 14)) {
801 if (!Parse0514(file, data + subtable_headers[i].offset,
802 subtable_headers[i].length, num_glyphs)) {
803 return OTS_FAILURE_MSG("Failed to parse format 14 cmap subtable %d", i );
804 }
805 }
806 } else if (subtable_headers[i].platform == 1) {
807 // Mac platform
808
809 if ((subtable_headers[i].encoding == 0) &&
810 (subtable_headers[i].format == 0)) {
811 // parse and output the 1-0-0 table.
812 if (!Parse100(file, data + subtable_headers[i].offset,
813 subtable_headers[i].length)) {
814 return OTS_FAILURE();
815 }
816 }
817 } else if (subtable_headers[i].platform == 3) {
818 // MS platform
819
820 switch (subtable_headers[i].encoding) {
821 case 0:
822 case 1:
823 if (subtable_headers[i].format == 4) {
824 // parse 3-0-4 or 3-1-4 table.
825 if (!ParseFormat4(file, subtable_headers[i].platform,
826 subtable_headers[i].encoding,
827 data + subtable_headers[i].offset,
828 subtable_headers[i].length, num_glyphs)) {
829 return OTS_FAILURE();
830 }
831 }
832 break;
833 case 10:
834 if (subtable_headers[i].format == 12) {
835 file->cmap->subtable_3_10_12.clear();
836 if (!Parse31012(file, data + subtable_headers[i].offset,
837 subtable_headers[i].length, num_glyphs)) {
838 return OTS_FAILURE();
839 }
840 } else if (subtable_headers[i].format == 13) {
841 file->cmap->subtable_3_10_13.clear();
842 if (!Parse31013(file, data + subtable_headers[i].offset,
843 subtable_headers[i].length, num_glyphs)) {
844 return OTS_FAILURE();
845 }
846 }
847 break;
848 }
849 }
850 }
851
852 return true;
853 }
854
855 bool ots_cmap_should_serialise(OpenTypeFile *file) {
856 return file->cmap != NULL;
857 }
858
859 bool ots_cmap_serialise(OTSStream *out, OpenTypeFile *file) {
860 const bool have_034 = file->cmap->subtable_0_3_4_data != NULL;
861 const bool have_0514 = file->cmap->subtable_0_5_14.size() != 0;
862 const bool have_100 = file->cmap->subtable_1_0_0.size() != 0;
863 const bool have_304 = file->cmap->subtable_3_0_4_data != NULL;
864 // MS Symbol and MS Unicode tables should not co-exist.
865 // See the comment above in 0-0-4 parser.
866 const bool have_314 = (!have_304) && file->cmap->subtable_3_1_4_data;
867 const bool have_31012 = file->cmap->subtable_3_10_12.size() != 0;
868 const bool have_31013 = file->cmap->subtable_3_10_13.size() != 0;
869 const uint16_t num_subtables = static_cast<uint16_t>(have_034) +
870 static_cast<uint16_t>(have_0514) +
871 static_cast<uint16_t>(have_100) +
872 static_cast<uint16_t>(have_304) +
873 static_cast<uint16_t>(have_314) +
874 static_cast<uint16_t>(have_31012) +
875 static_cast<uint16_t>(have_31013);
876 const off_t table_start = out->Tell();
877
878 // Some fonts don't have 3-0-4 MS Symbol nor 3-1-4 Unicode BMP tables
879 // (e.g., old fonts for Mac). We don't support them.
880 if (!have_304 && !have_314 && !have_034 && !have_31012 && !have_31013) {
881 return OTS_FAILURE_MSG("no supported subtables were found");
882 }
883
884 if (!out->WriteU16(0) ||
885 !out->WriteU16(num_subtables)) {
886 return OTS_FAILURE();
887 }
888
889 const off_t record_offset = out->Tell();
890 if (!out->Pad(num_subtables * 8)) {
891 return OTS_FAILURE();
892 }
893
894 const off_t offset_034 = out->Tell();
895 if (have_034) {
896 if (!out->Write(file->cmap->subtable_0_3_4_data,
897 file->cmap->subtable_0_3_4_length)) {
898 return OTS_FAILURE();
899 }
900 }
901
902 const off_t offset_0514 = out->Tell();
903 if (have_0514) {
904 const std::vector<ots::OpenTypeCMAPSubtableVSRecord> &records
905 = file->cmap->subtable_0_5_14;
906 const unsigned num_records = records.size();
907 if (!out->WriteU16(14) ||
908 !out->WriteU32(file->cmap->subtable_0_5_14_length) ||
909 !out->WriteU32(num_records)) {
910 return OTS_FAILURE();
911 }
912 for (unsigned i = 0; i < num_records; ++i) {
913 if (!out->WriteU24(records[i].var_selector) ||
914 !out->WriteU32(records[i].default_offset) ||
915 !out->WriteU32(records[i].non_default_offset)) {
916 return OTS_FAILURE();
917 }
918 }
919 for (unsigned i = 0; i < num_records; ++i) {
920 if (records[i].default_offset) {
921 const std::vector<ots::OpenTypeCMAPSubtableVSRange> &ranges
922 = records[i].ranges;
923 const unsigned num_ranges = ranges.size();
924 if (!out->Seek(records[i].default_offset + offset_0514) ||
925 !out->WriteU32(num_ranges)) {
926 return OTS_FAILURE();
927 }
928 for (unsigned j = 0; j < num_ranges; ++j) {
929 if (!out->WriteU24(ranges[j].unicode_value) ||
930 !out->WriteU8(ranges[j].additional_count)) {
931 return OTS_FAILURE();
932 }
933 }
934 }
935 if (records[i].non_default_offset) {
936 const std::vector<ots::OpenTypeCMAPSubtableVSMapping> &mappings
937 = records[i].mappings;
938 const unsigned num_mappings = mappings.size();
939 if (!out->Seek(records[i].non_default_offset + offset_0514) ||
940 !out->WriteU32(num_mappings)) {
941 return OTS_FAILURE();
942 }
943 for (unsigned j = 0; j < num_mappings; ++j) {
944 if (!out->WriteU24(mappings[j].unicode_value) ||
945 !out->WriteU16(mappings[j].glyph_id)) {
946 return OTS_FAILURE();
947 }
948 }
949 }
950 }
951 }
952
953 const off_t offset_100 = out->Tell();
954 if (have_100) {
955 if (!out->WriteU16(0) || // format
956 !out->WriteU16(6 + kFormat0ArraySize) || // length
957 !out->WriteU16(0)) { // language
958 return OTS_FAILURE();
959 }
960 if (!out->Write(&(file->cmap->subtable_1_0_0[0]), kFormat0ArraySize)) {
961 return OTS_FAILURE();
962 }
963 }
964
965 const off_t offset_304 = out->Tell();
966 if (have_304) {
967 if (!out->Write(file->cmap->subtable_3_0_4_data,
968 file->cmap->subtable_3_0_4_length)) {
969 return OTS_FAILURE();
970 }
971 }
972
973 const off_t offset_314 = out->Tell();
974 if (have_314) {
975 if (!out->Write(file->cmap->subtable_3_1_4_data,
976 file->cmap->subtable_3_1_4_length)) {
977 return OTS_FAILURE();
978 }
979 }
980
981 const off_t offset_31012 = out->Tell();
982 if (have_31012) {
983 std::vector<OpenTypeCMAPSubtableRange> &groups
984 = file->cmap->subtable_3_10_12;
985 const unsigned num_groups = groups.size();
986 if (!out->WriteU16(12) ||
987 !out->WriteU16(0) ||
988 !out->WriteU32(num_groups * 12 + 16) ||
989 !out->WriteU32(0) ||
990 !out->WriteU32(num_groups)) {
991 return OTS_FAILURE();
992 }
993
994 for (unsigned i = 0; i < num_groups; ++i) {
995 if (!out->WriteU32(groups[i].start_range) ||
996 !out->WriteU32(groups[i].end_range) ||
997 !out->WriteU32(groups[i].start_glyph_id)) {
998 return OTS_FAILURE();
999 }
1000 }
1001 }
1002
1003 const off_t offset_31013 = out->Tell();
1004 if (have_31013) {
1005 std::vector<OpenTypeCMAPSubtableRange> &groups
1006 = file->cmap->subtable_3_10_13;
1007 const unsigned num_groups = groups.size();
1008 if (!out->WriteU16(13) ||
1009 !out->WriteU16(0) ||
1010 !out->WriteU32(num_groups * 12 + 16) ||
1011 !out->WriteU32(0) ||
1012 !out->WriteU32(num_groups)) {
1013 return OTS_FAILURE();
1014 }
1015
1016 for (unsigned i = 0; i < num_groups; ++i) {
1017 if (!out->WriteU32(groups[i].start_range) ||
1018 !out->WriteU32(groups[i].end_range) ||
1019 !out->WriteU32(groups[i].start_glyph_id)) {
1020 return OTS_FAILURE();
1021 }
1022 }
1023 }
1024
1025 const off_t table_end = out->Tell();
1026 // We might have hanging bytes from the above's checksum which the OTSStream
1027 // then merges into the table of offsets.
1028 OTSStream::ChecksumState saved_checksum = out->SaveChecksumState();
1029 out->ResetChecksum();
1030
1031 // Now seek back and write the table of offsets
1032 if (!out->Seek(record_offset)) {
1033 return OTS_FAILURE();
1034 }
1035
1036 if (have_034) {
1037 if (!out->WriteU16(0) ||
1038 !out->WriteU16(3) ||
1039 !out->WriteU32(offset_034 - table_start)) {
1040 return OTS_FAILURE();
1041 }
1042 }
1043
1044 if (have_0514) {
1045 if (!out->WriteU16(0) ||
1046 !out->WriteU16(5) ||
1047 !out->WriteU32(offset_0514 - table_start)) {
1048 return OTS_FAILURE();
1049 }
1050 }
1051
1052 if (have_100) {
1053 if (!out->WriteU16(1) ||
1054 !out->WriteU16(0) ||
1055 !out->WriteU32(offset_100 - table_start)) {
1056 return OTS_FAILURE();
1057 }
1058 }
1059
1060 if (have_304) {
1061 if (!out->WriteU16(3) ||
1062 !out->WriteU16(0) ||
1063 !out->WriteU32(offset_304 - table_start)) {
1064 return OTS_FAILURE();
1065 }
1066 }
1067
1068 if (have_314) {
1069 if (!out->WriteU16(3) ||
1070 !out->WriteU16(1) ||
1071 !out->WriteU32(offset_314 - table_start)) {
1072 return OTS_FAILURE();
1073 }
1074 }
1075
1076 if (have_31012) {
1077 if (!out->WriteU16(3) ||
1078 !out->WriteU16(10) ||
1079 !out->WriteU32(offset_31012 - table_start)) {
1080 return OTS_FAILURE();
1081 }
1082 }
1083
1084 if (have_31013) {
1085 if (!out->WriteU16(3) ||
1086 !out->WriteU16(10) ||
1087 !out->WriteU32(offset_31013 - table_start)) {
1088 return OTS_FAILURE();
1089 }
1090 }
1091
1092 if (!out->Seek(table_end)) {
1093 return OTS_FAILURE();
1094 }
1095 out->RestoreChecksum(saved_checksum);
1096
1097 return true;
1098 }
1099
1100 void ots_cmap_free(OpenTypeFile *file) {
1101 delete file->cmap;
1102 }
1103
1104 } // namespace ots
1105
1106 #undef TABLE_NAME
OLDNEW
« no previous file with comments | « third_party/ots/src/cmap.h ('k') | third_party/ots/src/cvt.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698