OLD | NEW |
| (Empty) |
1 // Copyright 2014 PDFium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
6 | |
7 #include "core/fpdfapi/fpdf_font/font_int.h" | |
8 | |
9 #include "core/fpdfapi/cmaps/cmap_int.h" | |
10 #include "core/fpdfapi/cpdf_modulemgr.h" | |
11 #include "core/fpdfapi/fpdf_font/ttgsubtable.h" | |
12 #include "core/fpdfapi/fpdf_page/cpdf_pagemodule.h" | |
13 #include "core/fpdfapi/fpdf_parser/cpdf_array.h" | |
14 #include "core/fpdfapi/fpdf_parser/cpdf_dictionary.h" | |
15 #include "core/fpdfapi/fpdf_parser/cpdf_simple_parser.h" | |
16 #include "core/fxcrt/fx_ext.h" | |
17 #include "core/fxge/fx_freetype.h" | |
18 | |
19 namespace { | |
20 | |
21 const FX_CHAR* const g_CharsetNames[CIDSET_NUM_SETS] = { | |
22 nullptr, "GB1", "CNS1", "Japan1", "Korea1", "UCS"}; | |
23 | |
24 | |
25 class CPDF_PredefinedCMap { | |
26 public: | |
27 const FX_CHAR* m_pName; | |
28 CIDSet m_Charset; | |
29 CIDCoding m_Coding; | |
30 CPDF_CMap::CodingScheme m_CodingScheme; | |
31 uint8_t m_LeadingSegCount; | |
32 uint8_t m_LeadingSegs[4]; | |
33 }; | |
34 | |
35 const CPDF_PredefinedCMap g_PredefinedCMaps[] = { | |
36 {"GB-EUC", | |
37 CIDSET_GB1, | |
38 CIDCODING_GB, | |
39 CPDF_CMap::MixedTwoBytes, | |
40 1, | |
41 {0xa1, 0xfe}}, | |
42 {"GBpc-EUC", | |
43 CIDSET_GB1, | |
44 CIDCODING_GB, | |
45 CPDF_CMap::MixedTwoBytes, | |
46 1, | |
47 {0xa1, 0xfc}}, | |
48 {"GBK-EUC", | |
49 CIDSET_GB1, | |
50 CIDCODING_GB, | |
51 CPDF_CMap::MixedTwoBytes, | |
52 1, | |
53 {0x81, 0xfe}}, | |
54 {"GBKp-EUC", | |
55 CIDSET_GB1, | |
56 CIDCODING_GB, | |
57 CPDF_CMap::MixedTwoBytes, | |
58 1, | |
59 {0x81, 0xfe}}, | |
60 {"GBK2K-EUC", | |
61 CIDSET_GB1, | |
62 CIDCODING_GB, | |
63 CPDF_CMap::MixedTwoBytes, | |
64 1, | |
65 {0x81, 0xfe}}, | |
66 {"GBK2K", | |
67 CIDSET_GB1, | |
68 CIDCODING_GB, | |
69 CPDF_CMap::MixedTwoBytes, | |
70 1, | |
71 {0x81, 0xfe}}, | |
72 {"UniGB-UCS2", CIDSET_GB1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}}, | |
73 {"UniGB-UTF16", CIDSET_GB1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}}, | |
74 {"B5pc", | |
75 CIDSET_CNS1, | |
76 CIDCODING_BIG5, | |
77 CPDF_CMap::MixedTwoBytes, | |
78 1, | |
79 {0xa1, 0xfc}}, | |
80 {"HKscs-B5", | |
81 CIDSET_CNS1, | |
82 CIDCODING_BIG5, | |
83 CPDF_CMap::MixedTwoBytes, | |
84 1, | |
85 {0x88, 0xfe}}, | |
86 {"ETen-B5", | |
87 CIDSET_CNS1, | |
88 CIDCODING_BIG5, | |
89 CPDF_CMap::MixedTwoBytes, | |
90 1, | |
91 {0xa1, 0xfe}}, | |
92 {"ETenms-B5", | |
93 CIDSET_CNS1, | |
94 CIDCODING_BIG5, | |
95 CPDF_CMap::MixedTwoBytes, | |
96 1, | |
97 {0xa1, 0xfe}}, | |
98 {"UniCNS-UCS2", CIDSET_CNS1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}}, | |
99 {"UniCNS-UTF16", CIDSET_CNS1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}}, | |
100 {"83pv-RKSJ", | |
101 CIDSET_JAPAN1, | |
102 CIDCODING_JIS, | |
103 CPDF_CMap::MixedTwoBytes, | |
104 2, | |
105 {0x81, 0x9f, 0xe0, 0xfc}}, | |
106 {"90ms-RKSJ", | |
107 CIDSET_JAPAN1, | |
108 CIDCODING_JIS, | |
109 CPDF_CMap::MixedTwoBytes, | |
110 2, | |
111 {0x81, 0x9f, 0xe0, 0xfc}}, | |
112 {"90msp-RKSJ", | |
113 CIDSET_JAPAN1, | |
114 CIDCODING_JIS, | |
115 CPDF_CMap::MixedTwoBytes, | |
116 2, | |
117 {0x81, 0x9f, 0xe0, 0xfc}}, | |
118 {"90pv-RKSJ", | |
119 CIDSET_JAPAN1, | |
120 CIDCODING_JIS, | |
121 CPDF_CMap::MixedTwoBytes, | |
122 2, | |
123 {0x81, 0x9f, 0xe0, 0xfc}}, | |
124 {"Add-RKSJ", | |
125 CIDSET_JAPAN1, | |
126 CIDCODING_JIS, | |
127 CPDF_CMap::MixedTwoBytes, | |
128 2, | |
129 {0x81, 0x9f, 0xe0, 0xfc}}, | |
130 {"EUC", | |
131 CIDSET_JAPAN1, | |
132 CIDCODING_JIS, | |
133 CPDF_CMap::MixedTwoBytes, | |
134 2, | |
135 {0x8e, 0x8e, 0xa1, 0xfe}}, | |
136 {"H", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}}, | |
137 {"V", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}}, | |
138 {"Ext-RKSJ", | |
139 CIDSET_JAPAN1, | |
140 CIDCODING_JIS, | |
141 CPDF_CMap::MixedTwoBytes, | |
142 2, | |
143 {0x81, 0x9f, 0xe0, 0xfc}}, | |
144 {"UniJIS-UCS2", CIDSET_JAPAN1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}}, | |
145 {"UniJIS-UCS2-HW", | |
146 CIDSET_JAPAN1, | |
147 CIDCODING_UCS2, | |
148 CPDF_CMap::TwoBytes, | |
149 0, | |
150 {}}, | |
151 {"UniJIS-UTF16", | |
152 CIDSET_JAPAN1, | |
153 CIDCODING_UTF16, | |
154 CPDF_CMap::TwoBytes, | |
155 0, | |
156 {}}, | |
157 {"KSC-EUC", | |
158 CIDSET_KOREA1, | |
159 CIDCODING_KOREA, | |
160 CPDF_CMap::MixedTwoBytes, | |
161 1, | |
162 {0xa1, 0xfe}}, | |
163 {"KSCms-UHC", | |
164 CIDSET_KOREA1, | |
165 CIDCODING_KOREA, | |
166 CPDF_CMap::MixedTwoBytes, | |
167 1, | |
168 {0x81, 0xfe}}, | |
169 {"KSCms-UHC-HW", | |
170 CIDSET_KOREA1, | |
171 CIDCODING_KOREA, | |
172 CPDF_CMap::MixedTwoBytes, | |
173 1, | |
174 {0x81, 0xfe}}, | |
175 {"KSCpc-EUC", | |
176 CIDSET_KOREA1, | |
177 CIDCODING_KOREA, | |
178 CPDF_CMap::MixedTwoBytes, | |
179 1, | |
180 {0xa1, 0xfd}}, | |
181 {"UniKS-UCS2", CIDSET_KOREA1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}}, | |
182 {"UniKS-UTF16", CIDSET_KOREA1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}}, | |
183 }; | |
184 | |
185 CIDSet CIDSetFromSizeT(size_t index) { | |
186 if (index >= CIDSET_NUM_SETS) { | |
187 NOTREACHED(); | |
188 return CIDSET_UNKNOWN; | |
189 } | |
190 return static_cast<CIDSet>(index); | |
191 } | |
192 | |
193 CFX_ByteStringC CMap_GetString(const CFX_ByteStringC& word) { | |
194 if (word.GetLength() <= 2) | |
195 return CFX_ByteStringC(); | |
196 return CFX_ByteStringC(&word[1], word.GetLength() - 2); | |
197 } | |
198 | |
199 int CompareDWORD(const void* data1, const void* data2) { | |
200 return (*(uint32_t*)data1) - (*(uint32_t*)data2); | |
201 } | |
202 | |
203 int CompareCID(const void* key, const void* element) { | |
204 if ((*(uint32_t*)key) < (*(uint32_t*)element)) { | |
205 return -1; | |
206 } | |
207 if ((*(uint32_t*)key) > | |
208 (*(uint32_t*)element) + ((uint32_t*)element)[1] / 65536) { | |
209 return 1; | |
210 } | |
211 return 0; | |
212 } | |
213 | |
214 int CheckCodeRange(uint8_t* codes, | |
215 int size, | |
216 CMap_CodeRange* pRanges, | |
217 int nRanges) { | |
218 int iSeg = nRanges - 1; | |
219 while (iSeg >= 0) { | |
220 if (pRanges[iSeg].m_CharSize < size) { | |
221 --iSeg; | |
222 continue; | |
223 } | |
224 int iChar = 0; | |
225 while (iChar < size) { | |
226 if (codes[iChar] < pRanges[iSeg].m_Lower[iChar] || | |
227 codes[iChar] > pRanges[iSeg].m_Upper[iChar]) { | |
228 break; | |
229 } | |
230 ++iChar; | |
231 } | |
232 if (iChar == pRanges[iSeg].m_CharSize) | |
233 return 2; | |
234 | |
235 if (iChar) | |
236 return (size == pRanges[iSeg].m_CharSize) ? 2 : 1; | |
237 iSeg--; | |
238 } | |
239 return 0; | |
240 } | |
241 | |
242 int GetCharSizeImpl(uint32_t charcode, | |
243 CMap_CodeRange* pRanges, | |
244 int iRangesSize) { | |
245 if (!iRangesSize) | |
246 return 1; | |
247 | |
248 uint8_t codes[4]; | |
249 codes[0] = codes[1] = 0x00; | |
250 codes[2] = (uint8_t)(charcode >> 8 & 0xFF); | |
251 codes[3] = (uint8_t)charcode; | |
252 int offset = 0; | |
253 int size = 4; | |
254 for (int i = 0; i < 4; ++i) { | |
255 int iSeg = iRangesSize - 1; | |
256 while (iSeg >= 0) { | |
257 if (pRanges[iSeg].m_CharSize < size) { | |
258 --iSeg; | |
259 continue; | |
260 } | |
261 int iChar = 0; | |
262 while (iChar < size) { | |
263 if (codes[offset + iChar] < pRanges[iSeg].m_Lower[iChar] || | |
264 codes[offset + iChar] > pRanges[iSeg].m_Upper[iChar]) { | |
265 break; | |
266 } | |
267 ++iChar; | |
268 } | |
269 if (iChar == pRanges[iSeg].m_CharSize) | |
270 return size; | |
271 --iSeg; | |
272 } | |
273 --size; | |
274 ++offset; | |
275 } | |
276 return 1; | |
277 } | |
278 | |
279 } // namespace | |
280 | |
281 CPDF_CMapManager::CPDF_CMapManager() { | |
282 FXSYS_memset(m_CID2UnicodeMaps, 0, sizeof m_CID2UnicodeMaps); | |
283 } | |
284 CPDF_CMapManager::~CPDF_CMapManager() { | |
285 for (const auto& pair : m_CMaps) { | |
286 delete pair.second; | |
287 } | |
288 m_CMaps.clear(); | |
289 for (size_t i = 0; i < FX_ArraySize(m_CID2UnicodeMaps); ++i) { | |
290 delete m_CID2UnicodeMaps[i]; | |
291 } | |
292 } | |
293 CPDF_CMap* CPDF_CMapManager::GetPredefinedCMap(const CFX_ByteString& name, | |
294 bool bPromptCJK) { | |
295 auto it = m_CMaps.find(name); | |
296 if (it != m_CMaps.end()) { | |
297 return it->second; | |
298 } | |
299 CPDF_CMap* pCMap = LoadPredefinedCMap(name, bPromptCJK); | |
300 if (!name.IsEmpty()) { | |
301 m_CMaps[name] = pCMap; | |
302 } | |
303 return pCMap; | |
304 } | |
305 CPDF_CMap* CPDF_CMapManager::LoadPredefinedCMap(const CFX_ByteString& name, | |
306 bool bPromptCJK) { | |
307 CPDF_CMap* pCMap = new CPDF_CMap; | |
308 const FX_CHAR* pname = name.c_str(); | |
309 if (*pname == '/') { | |
310 pname++; | |
311 } | |
312 pCMap->LoadPredefined(this, pname, bPromptCJK); | |
313 return pCMap; | |
314 } | |
315 | |
316 CPDF_CID2UnicodeMap* CPDF_CMapManager::GetCID2UnicodeMap(CIDSet charset, | |
317 bool bPromptCJK) { | |
318 if (!m_CID2UnicodeMaps[charset]) | |
319 m_CID2UnicodeMaps[charset] = LoadCID2UnicodeMap(charset, bPromptCJK); | |
320 return m_CID2UnicodeMaps[charset]; | |
321 } | |
322 CPDF_CID2UnicodeMap* CPDF_CMapManager::LoadCID2UnicodeMap(CIDSet charset, | |
323 bool bPromptCJK) { | |
324 CPDF_CID2UnicodeMap* pMap = new CPDF_CID2UnicodeMap(); | |
325 pMap->Load(this, charset, bPromptCJK); | |
326 return pMap; | |
327 } | |
328 | |
329 CPDF_CMapParser::CPDF_CMapParser() | |
330 : m_pCMap(nullptr), m_Status(0), m_CodeSeq(0) {} | |
331 | |
332 CPDF_CMapParser::~CPDF_CMapParser() {} | |
333 | |
334 void CPDF_CMapParser::Initialize(CPDF_CMap* pCMap) { | |
335 m_pCMap = pCMap; | |
336 m_Status = 0; | |
337 m_CodeSeq = 0; | |
338 m_AddMaps.EstimateSize(0, 10240); | |
339 } | |
340 | |
341 void CPDF_CMapParser::ParseWord(const CFX_ByteStringC& word) { | |
342 if (word.IsEmpty()) { | |
343 return; | |
344 } | |
345 if (word == "begincidchar") { | |
346 m_Status = 1; | |
347 m_CodeSeq = 0; | |
348 } else if (word == "begincidrange") { | |
349 m_Status = 2; | |
350 m_CodeSeq = 0; | |
351 } else if (word == "endcidrange" || word == "endcidchar") { | |
352 m_Status = 0; | |
353 } else if (word == "/WMode") { | |
354 m_Status = 6; | |
355 } else if (word == "/Registry") { | |
356 m_Status = 3; | |
357 } else if (word == "/Ordering") { | |
358 m_Status = 4; | |
359 } else if (word == "/Supplement") { | |
360 m_Status = 5; | |
361 } else if (word == "begincodespacerange") { | |
362 m_Status = 7; | |
363 m_CodeSeq = 0; | |
364 } else if (word == "usecmap") { | |
365 } else if (m_Status == 1 || m_Status == 2) { | |
366 m_CodePoints[m_CodeSeq] = CMap_GetCode(word); | |
367 m_CodeSeq++; | |
368 uint32_t StartCode, EndCode; | |
369 uint16_t StartCID; | |
370 if (m_Status == 1) { | |
371 if (m_CodeSeq < 2) { | |
372 return; | |
373 } | |
374 EndCode = StartCode = m_CodePoints[0]; | |
375 StartCID = (uint16_t)m_CodePoints[1]; | |
376 } else { | |
377 if (m_CodeSeq < 3) { | |
378 return; | |
379 } | |
380 StartCode = m_CodePoints[0]; | |
381 EndCode = m_CodePoints[1]; | |
382 StartCID = (uint16_t)m_CodePoints[2]; | |
383 } | |
384 if (EndCode < 0x10000) { | |
385 for (uint32_t code = StartCode; code <= EndCode; code++) { | |
386 m_pCMap->m_pMapping[code] = (uint16_t)(StartCID + code - StartCode); | |
387 } | |
388 } else { | |
389 uint32_t buf[2]; | |
390 buf[0] = StartCode; | |
391 buf[1] = ((EndCode - StartCode) << 16) + StartCID; | |
392 m_AddMaps.AppendBlock(buf, sizeof buf); | |
393 } | |
394 m_CodeSeq = 0; | |
395 } else if (m_Status == 3) { | |
396 m_Status = 0; | |
397 } else if (m_Status == 4) { | |
398 m_pCMap->m_Charset = CharsetFromOrdering(CMap_GetString(word)); | |
399 m_Status = 0; | |
400 } else if (m_Status == 5) { | |
401 m_Status = 0; | |
402 } else if (m_Status == 6) { | |
403 m_pCMap->m_bVertical = CMap_GetCode(word) != 0; | |
404 m_Status = 0; | |
405 } else if (m_Status == 7) { | |
406 if (word == "endcodespacerange") { | |
407 int nSegs = m_CodeRanges.GetSize(); | |
408 if (nSegs > 1) { | |
409 m_pCMap->m_CodingScheme = CPDF_CMap::MixedFourBytes; | |
410 m_pCMap->m_nCodeRanges = nSegs; | |
411 FX_Free(m_pCMap->m_pLeadingBytes); | |
412 m_pCMap->m_pLeadingBytes = | |
413 FX_Alloc2D(uint8_t, nSegs, sizeof(CMap_CodeRange)); | |
414 FXSYS_memcpy(m_pCMap->m_pLeadingBytes, m_CodeRanges.GetData(), | |
415 nSegs * sizeof(CMap_CodeRange)); | |
416 } else if (nSegs == 1) { | |
417 m_pCMap->m_CodingScheme = (m_CodeRanges[0].m_CharSize == 2) | |
418 ? CPDF_CMap::TwoBytes | |
419 : CPDF_CMap::OneByte; | |
420 } | |
421 m_Status = 0; | |
422 } else { | |
423 if (word.GetLength() == 0 || word.GetAt(0) != '<') { | |
424 return; | |
425 } | |
426 if (m_CodeSeq % 2) { | |
427 CMap_CodeRange range; | |
428 if (CMap_GetCodeRange(range, m_LastWord.AsStringC(), word)) { | |
429 m_CodeRanges.Add(range); | |
430 } | |
431 } | |
432 m_CodeSeq++; | |
433 } | |
434 } | |
435 m_LastWord = word; | |
436 } | |
437 | |
438 // Static. | |
439 uint32_t CPDF_CMapParser::CMap_GetCode(const CFX_ByteStringC& word) { | |
440 pdfium::base::CheckedNumeric<uint32_t> num = 0; | |
441 if (word.GetAt(0) == '<') { | |
442 for (int i = 1; i < word.GetLength() && std::isxdigit(word.GetAt(i)); ++i) { | |
443 num = num * 16 + FXSYS_toHexDigit(word.GetAt(i)); | |
444 if (!num.IsValid()) | |
445 return 0; | |
446 } | |
447 return num.ValueOrDie(); | |
448 } | |
449 | |
450 for (int i = 0; i < word.GetLength() && std::isdigit(word.GetAt(i)); ++i) { | |
451 num = num * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(word.GetAt(i))); | |
452 if (!num.IsValid()) | |
453 return 0; | |
454 } | |
455 return num.ValueOrDie(); | |
456 } | |
457 | |
458 // Static. | |
459 bool CPDF_CMapParser::CMap_GetCodeRange(CMap_CodeRange& range, | |
460 const CFX_ByteStringC& first, | |
461 const CFX_ByteStringC& second) { | |
462 if (first.GetLength() == 0 || first.GetAt(0) != '<') | |
463 return false; | |
464 | |
465 int i; | |
466 for (i = 1; i < first.GetLength(); ++i) { | |
467 if (first.GetAt(i) == '>') { | |
468 break; | |
469 } | |
470 } | |
471 range.m_CharSize = (i - 1) / 2; | |
472 if (range.m_CharSize > 4) | |
473 return false; | |
474 | |
475 for (i = 0; i < range.m_CharSize; ++i) { | |
476 uint8_t digit1 = first.GetAt(i * 2 + 1); | |
477 uint8_t digit2 = first.GetAt(i * 2 + 2); | |
478 range.m_Lower[i] = FXSYS_toHexDigit(digit1) * 16 + FXSYS_toHexDigit(digit2); | |
479 } | |
480 | |
481 uint32_t size = second.GetLength(); | |
482 for (i = 0; i < range.m_CharSize; ++i) { | |
483 uint8_t digit1 = ((uint32_t)i * 2 + 1 < size) | |
484 ? second.GetAt((FX_STRSIZE)i * 2 + 1) | |
485 : '0'; | |
486 uint8_t digit2 = ((uint32_t)i * 2 + 2 < size) | |
487 ? second.GetAt((FX_STRSIZE)i * 2 + 2) | |
488 : '0'; | |
489 range.m_Upper[i] = FXSYS_toHexDigit(digit1) * 16 + FXSYS_toHexDigit(digit2); | |
490 } | |
491 return true; | |
492 } | |
493 | |
494 CPDF_CMap::CPDF_CMap() { | |
495 m_Charset = CIDSET_UNKNOWN; | |
496 m_Coding = CIDCODING_UNKNOWN; | |
497 m_CodingScheme = TwoBytes; | |
498 m_bVertical = false; | |
499 m_bLoaded = false; | |
500 m_pMapping = nullptr; | |
501 m_pLeadingBytes = nullptr; | |
502 m_pAddMapping = nullptr; | |
503 m_pEmbedMap = nullptr; | |
504 m_nCodeRanges = 0; | |
505 } | |
506 CPDF_CMap::~CPDF_CMap() { | |
507 FX_Free(m_pMapping); | |
508 FX_Free(m_pAddMapping); | |
509 FX_Free(m_pLeadingBytes); | |
510 } | |
511 | |
512 bool CPDF_CMap::IsLoaded() const { | |
513 return m_bLoaded; | |
514 } | |
515 | |
516 bool CPDF_CMap::IsVertWriting() const { | |
517 return m_bVertical; | |
518 } | |
519 | |
520 void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr, | |
521 const CFX_ByteString& bsName, | |
522 bool bPromptCJK) { | |
523 m_PredefinedCMap = bsName; | |
524 if (m_PredefinedCMap == "Identity-H" || m_PredefinedCMap == "Identity-V") { | |
525 m_Coding = CIDCODING_CID; | |
526 m_bVertical = bsName[9] == 'V'; | |
527 m_bLoaded = true; | |
528 return; | |
529 } | |
530 CFX_ByteString cmapid = m_PredefinedCMap; | |
531 m_bVertical = cmapid.Right(1) == "V"; | |
532 if (cmapid.GetLength() > 2) { | |
533 cmapid = cmapid.Left(cmapid.GetLength() - 2); | |
534 } | |
535 const CPDF_PredefinedCMap* map = nullptr; | |
536 for (size_t i = 0; i < FX_ArraySize(g_PredefinedCMaps); ++i) { | |
537 if (cmapid == CFX_ByteStringC(g_PredefinedCMaps[i].m_pName)) { | |
538 map = &g_PredefinedCMaps[i]; | |
539 break; | |
540 } | |
541 } | |
542 if (!map) | |
543 return; | |
544 | |
545 m_Charset = map->m_Charset; | |
546 m_Coding = map->m_Coding; | |
547 m_CodingScheme = map->m_CodingScheme; | |
548 if (m_CodingScheme == MixedTwoBytes) { | |
549 m_pLeadingBytes = FX_Alloc(uint8_t, 256); | |
550 for (uint32_t i = 0; i < map->m_LeadingSegCount; ++i) { | |
551 const uint8_t* segs = map->m_LeadingSegs; | |
552 for (int b = segs[i * 2]; b <= segs[i * 2 + 1]; ++b) { | |
553 m_pLeadingBytes[b] = 1; | |
554 } | |
555 } | |
556 } | |
557 FPDFAPI_FindEmbeddedCMap(bsName, m_Charset, m_Coding, m_pEmbedMap); | |
558 if (!m_pEmbedMap) | |
559 return; | |
560 | |
561 m_bLoaded = true; | |
562 } | |
563 | |
564 void CPDF_CMap::LoadEmbedded(const uint8_t* pData, uint32_t size) { | |
565 m_pMapping = FX_Alloc(uint16_t, 65536); | |
566 CPDF_CMapParser parser; | |
567 parser.Initialize(this); | |
568 CPDF_SimpleParser syntax(pData, size); | |
569 while (1) { | |
570 CFX_ByteStringC word = syntax.GetWord(); | |
571 if (word.IsEmpty()) { | |
572 break; | |
573 } | |
574 parser.ParseWord(word); | |
575 } | |
576 if (m_CodingScheme == MixedFourBytes && parser.m_AddMaps.GetSize()) { | |
577 m_pAddMapping = FX_Alloc(uint8_t, parser.m_AddMaps.GetSize() + 4); | |
578 *(uint32_t*)m_pAddMapping = parser.m_AddMaps.GetSize() / 8; | |
579 FXSYS_memcpy(m_pAddMapping + 4, parser.m_AddMaps.GetBuffer(), | |
580 parser.m_AddMaps.GetSize()); | |
581 FXSYS_qsort(m_pAddMapping + 4, parser.m_AddMaps.GetSize() / 8, 8, | |
582 CompareDWORD); | |
583 } | |
584 } | |
585 | |
586 uint16_t CPDF_CMap::CIDFromCharCode(uint32_t charcode) const { | |
587 if (m_Coding == CIDCODING_CID) { | |
588 return (uint16_t)charcode; | |
589 } | |
590 if (m_pEmbedMap) { | |
591 return FPDFAPI_CIDFromCharCode(m_pEmbedMap, charcode); | |
592 } | |
593 if (!m_pMapping) { | |
594 return (uint16_t)charcode; | |
595 } | |
596 if (charcode >> 16) { | |
597 if (m_pAddMapping) { | |
598 void* found = FXSYS_bsearch(&charcode, m_pAddMapping + 4, | |
599 *(uint32_t*)m_pAddMapping, 8, CompareCID); | |
600 if (!found) | |
601 return 0; | |
602 return (uint16_t)(((uint32_t*)found)[1] % 65536 + charcode - | |
603 *(uint32_t*)found); | |
604 } | |
605 return 0; | |
606 } | |
607 return m_pMapping[charcode]; | |
608 } | |
609 | |
610 uint32_t CPDF_CMap::GetNextChar(const FX_CHAR* pString, | |
611 int nStrLen, | |
612 int& offset) const { | |
613 switch (m_CodingScheme) { | |
614 case OneByte: | |
615 return ((uint8_t*)pString)[offset++]; | |
616 case TwoBytes: | |
617 offset += 2; | |
618 return ((uint8_t*)pString)[offset - 2] * 256 + | |
619 ((uint8_t*)pString)[offset - 1]; | |
620 case MixedTwoBytes: { | |
621 uint8_t byte1 = ((uint8_t*)pString)[offset++]; | |
622 if (!m_pLeadingBytes[byte1]) { | |
623 return byte1; | |
624 } | |
625 uint8_t byte2 = ((uint8_t*)pString)[offset++]; | |
626 return byte1 * 256 + byte2; | |
627 } | |
628 case MixedFourBytes: { | |
629 uint8_t codes[4]; | |
630 int char_size = 1; | |
631 codes[0] = ((uint8_t*)pString)[offset++]; | |
632 CMap_CodeRange* pRanges = (CMap_CodeRange*)m_pLeadingBytes; | |
633 while (1) { | |
634 int ret = CheckCodeRange(codes, char_size, pRanges, m_nCodeRanges); | |
635 if (ret == 0) { | |
636 return 0; | |
637 } | |
638 if (ret == 2) { | |
639 uint32_t charcode = 0; | |
640 for (int i = 0; i < char_size; i++) { | |
641 charcode = (charcode << 8) + codes[i]; | |
642 } | |
643 return charcode; | |
644 } | |
645 if (char_size == 4 || offset == nStrLen) { | |
646 return 0; | |
647 } | |
648 codes[char_size++] = ((uint8_t*)pString)[offset++]; | |
649 } | |
650 break; | |
651 } | |
652 } | |
653 return 0; | |
654 } | |
655 int CPDF_CMap::GetCharSize(uint32_t charcode) const { | |
656 switch (m_CodingScheme) { | |
657 case OneByte: | |
658 return 1; | |
659 case TwoBytes: | |
660 return 2; | |
661 case MixedTwoBytes: | |
662 case MixedFourBytes: | |
663 if (charcode < 0x100) { | |
664 return 1; | |
665 } | |
666 if (charcode < 0x10000) { | |
667 return 2; | |
668 } | |
669 if (charcode < 0x1000000) { | |
670 return 3; | |
671 } | |
672 return 4; | |
673 } | |
674 return 1; | |
675 } | |
676 int CPDF_CMap::CountChar(const FX_CHAR* pString, int size) const { | |
677 switch (m_CodingScheme) { | |
678 case OneByte: | |
679 return size; | |
680 case TwoBytes: | |
681 return (size + 1) / 2; | |
682 case MixedTwoBytes: { | |
683 int count = 0; | |
684 for (int i = 0; i < size; i++) { | |
685 count++; | |
686 if (m_pLeadingBytes[((uint8_t*)pString)[i]]) { | |
687 i++; | |
688 } | |
689 } | |
690 return count; | |
691 } | |
692 case MixedFourBytes: { | |
693 int count = 0, offset = 0; | |
694 while (offset < size) { | |
695 GetNextChar(pString, size, offset); | |
696 count++; | |
697 } | |
698 return count; | |
699 } | |
700 } | |
701 return size; | |
702 } | |
703 | |
704 int CPDF_CMap::AppendChar(FX_CHAR* str, uint32_t charcode) const { | |
705 switch (m_CodingScheme) { | |
706 case OneByte: | |
707 str[0] = (uint8_t)charcode; | |
708 return 1; | |
709 case TwoBytes: | |
710 str[0] = (uint8_t)(charcode / 256); | |
711 str[1] = (uint8_t)(charcode % 256); | |
712 return 2; | |
713 case MixedTwoBytes: | |
714 case MixedFourBytes: | |
715 if (charcode < 0x100) { | |
716 CMap_CodeRange* pRanges = (CMap_CodeRange*)m_pLeadingBytes; | |
717 int iSize = GetCharSizeImpl(charcode, pRanges, m_nCodeRanges); | |
718 if (iSize == 0) { | |
719 iSize = 1; | |
720 } | |
721 if (iSize > 1) { | |
722 FXSYS_memset(str, 0, sizeof(uint8_t) * iSize); | |
723 } | |
724 str[iSize - 1] = (uint8_t)charcode; | |
725 return iSize; | |
726 } | |
727 if (charcode < 0x10000) { | |
728 str[0] = (uint8_t)(charcode >> 8); | |
729 str[1] = (uint8_t)charcode; | |
730 return 2; | |
731 } | |
732 if (charcode < 0x1000000) { | |
733 str[0] = (uint8_t)(charcode >> 16); | |
734 str[1] = (uint8_t)(charcode >> 8); | |
735 str[2] = (uint8_t)charcode; | |
736 return 3; | |
737 } | |
738 str[0] = (uint8_t)(charcode >> 24); | |
739 str[1] = (uint8_t)(charcode >> 16); | |
740 str[2] = (uint8_t)(charcode >> 8); | |
741 str[3] = (uint8_t)charcode; | |
742 return 4; | |
743 } | |
744 return 0; | |
745 } | |
746 | |
747 CPDF_CID2UnicodeMap::CPDF_CID2UnicodeMap() { | |
748 m_EmbeddedCount = 0; | |
749 } | |
750 | |
751 CPDF_CID2UnicodeMap::~CPDF_CID2UnicodeMap() {} | |
752 | |
753 bool CPDF_CID2UnicodeMap::IsLoaded() { | |
754 return m_EmbeddedCount != 0; | |
755 } | |
756 | |
757 FX_WCHAR CPDF_CID2UnicodeMap::UnicodeFromCID(uint16_t CID) { | |
758 if (m_Charset == CIDSET_UNICODE) { | |
759 return CID; | |
760 } | |
761 if (CID < m_EmbeddedCount) { | |
762 return m_pEmbeddedMap[CID]; | |
763 } | |
764 return 0; | |
765 } | |
766 | |
767 void CPDF_CID2UnicodeMap::Load(CPDF_CMapManager* pMgr, | |
768 CIDSet charset, | |
769 bool bPromptCJK) { | |
770 m_Charset = charset; | |
771 | |
772 CPDF_FontGlobals* pFontGlobals = | |
773 CPDF_ModuleMgr::Get()->GetPageModule()->GetFontGlobals(); | |
774 m_pEmbeddedMap = pFontGlobals->m_EmbeddedToUnicodes[charset].m_pMap; | |
775 m_EmbeddedCount = pFontGlobals->m_EmbeddedToUnicodes[charset].m_Count; | |
776 } | |
777 | |
778 CIDSet CharsetFromOrdering(const CFX_ByteStringC& ordering) { | |
779 for (size_t charset = 1; charset < FX_ArraySize(g_CharsetNames); ++charset) { | |
780 if (ordering == g_CharsetNames[charset]) | |
781 return CIDSetFromSizeT(charset); | |
782 } | |
783 return CIDSET_UNKNOWN; | |
784 } | |
OLD | NEW |