OLD | NEW |
| (Empty) |
1 // Copyright 2016 PDFium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
6 | |
7 #include "core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" | |
8 | |
9 #include <vector> | |
10 | |
11 #include "core/fpdfapi/cpdf_modulemgr.h" | |
12 #include "core/fpdfapi/fpdf_parser/cpdf_array.h" | |
13 #include "core/fpdfapi/fpdf_parser/cpdf_boolean.h" | |
14 #include "core/fpdfapi/fpdf_parser/cpdf_crypto_handler.h" | |
15 #include "core/fpdfapi/fpdf_parser/cpdf_dictionary.h" | |
16 #include "core/fpdfapi/fpdf_parser/cpdf_name.h" | |
17 #include "core/fpdfapi/fpdf_parser/cpdf_null.h" | |
18 #include "core/fpdfapi/fpdf_parser/cpdf_number.h" | |
19 #include "core/fpdfapi/fpdf_parser/cpdf_reference.h" | |
20 #include "core/fpdfapi/fpdf_parser/cpdf_stream.h" | |
21 #include "core/fpdfapi/fpdf_parser/cpdf_string.h" | |
22 #include "core/fpdfapi/fpdf_parser/fpdf_parser_decode.h" | |
23 #include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h" | |
24 #include "core/fxcrt/fx_ext.h" | |
25 #include "third_party/base/numerics/safe_math.h" | |
26 | |
27 namespace { | |
28 | |
29 struct SearchTagRecord { | |
30 CFX_ByteStringC m_bsTag; | |
31 FX_STRSIZE m_Offset; | |
32 }; | |
33 | |
34 } // namespace | |
35 | |
36 // static | |
37 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0; | |
38 | |
39 CPDF_SyntaxParser::CPDF_SyntaxParser() | |
40 : CPDF_SyntaxParser(CFX_WeakPtr<CFX_ByteStringPool>()) {} | |
41 | |
42 CPDF_SyntaxParser::CPDF_SyntaxParser( | |
43 const CFX_WeakPtr<CFX_ByteStringPool>& pPool) | |
44 : m_MetadataObjnum(0), | |
45 m_pFileAccess(nullptr), | |
46 m_pFileBuf(nullptr), | |
47 m_BufSize(CPDF_ModuleMgr::kFileBufSize), | |
48 m_pPool(pPool) {} | |
49 | |
50 CPDF_SyntaxParser::~CPDF_SyntaxParser() { | |
51 FX_Free(m_pFileBuf); | |
52 } | |
53 | |
54 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) { | |
55 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); | |
56 m_Pos = pos; | |
57 return GetNextChar(ch); | |
58 } | |
59 | |
60 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) { | |
61 FX_FILESIZE pos = m_Pos + m_HeaderOffset; | |
62 if (pos >= m_FileLen) | |
63 return FALSE; | |
64 | |
65 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { | |
66 FX_FILESIZE read_pos = pos; | |
67 uint32_t read_size = m_BufSize; | |
68 if ((FX_FILESIZE)read_size > m_FileLen) | |
69 read_size = (uint32_t)m_FileLen; | |
70 | |
71 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { | |
72 if (m_FileLen < (FX_FILESIZE)read_size) { | |
73 read_pos = 0; | |
74 read_size = (uint32_t)m_FileLen; | |
75 } else { | |
76 read_pos = m_FileLen - read_size; | |
77 } | |
78 } | |
79 | |
80 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) | |
81 return FALSE; | |
82 | |
83 m_BufOffset = read_pos; | |
84 } | |
85 ch = m_pFileBuf[pos - m_BufOffset]; | |
86 m_Pos++; | |
87 return TRUE; | |
88 } | |
89 | |
90 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) { | |
91 pos += m_HeaderOffset; | |
92 if (pos >= m_FileLen) | |
93 return FALSE; | |
94 | |
95 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { | |
96 FX_FILESIZE read_pos; | |
97 if (pos < (FX_FILESIZE)m_BufSize) | |
98 read_pos = 0; | |
99 else | |
100 read_pos = pos - m_BufSize + 1; | |
101 | |
102 uint32_t read_size = m_BufSize; | |
103 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { | |
104 if (m_FileLen < (FX_FILESIZE)read_size) { | |
105 read_pos = 0; | |
106 read_size = (uint32_t)m_FileLen; | |
107 } else { | |
108 read_pos = m_FileLen - read_size; | |
109 } | |
110 } | |
111 | |
112 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) | |
113 return FALSE; | |
114 | |
115 m_BufOffset = read_pos; | |
116 } | |
117 ch = m_pFileBuf[pos - m_BufOffset]; | |
118 return TRUE; | |
119 } | |
120 | |
121 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) { | |
122 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) | |
123 return FALSE; | |
124 m_Pos += size; | |
125 return TRUE; | |
126 } | |
127 | |
128 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) { | |
129 m_WordSize = 0; | |
130 if (bIsNumber) | |
131 *bIsNumber = true; | |
132 | |
133 uint8_t ch; | |
134 if (!GetNextChar(ch)) | |
135 return; | |
136 | |
137 while (1) { | |
138 while (PDFCharIsWhitespace(ch)) { | |
139 if (!GetNextChar(ch)) | |
140 return; | |
141 } | |
142 | |
143 if (ch != '%') | |
144 break; | |
145 | |
146 while (1) { | |
147 if (!GetNextChar(ch)) | |
148 return; | |
149 if (PDFCharIsLineEnding(ch)) | |
150 break; | |
151 } | |
152 } | |
153 | |
154 if (PDFCharIsDelimiter(ch)) { | |
155 if (bIsNumber) | |
156 *bIsNumber = false; | |
157 | |
158 m_WordBuffer[m_WordSize++] = ch; | |
159 if (ch == '/') { | |
160 while (1) { | |
161 if (!GetNextChar(ch)) | |
162 return; | |
163 | |
164 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { | |
165 m_Pos--; | |
166 return; | |
167 } | |
168 | |
169 if (m_WordSize < sizeof(m_WordBuffer) - 1) | |
170 m_WordBuffer[m_WordSize++] = ch; | |
171 } | |
172 } else if (ch == '<') { | |
173 if (!GetNextChar(ch)) | |
174 return; | |
175 | |
176 if (ch == '<') | |
177 m_WordBuffer[m_WordSize++] = ch; | |
178 else | |
179 m_Pos--; | |
180 } else if (ch == '>') { | |
181 if (!GetNextChar(ch)) | |
182 return; | |
183 | |
184 if (ch == '>') | |
185 m_WordBuffer[m_WordSize++] = ch; | |
186 else | |
187 m_Pos--; | |
188 } | |
189 return; | |
190 } | |
191 | |
192 while (1) { | |
193 if (m_WordSize < sizeof(m_WordBuffer) - 1) | |
194 m_WordBuffer[m_WordSize++] = ch; | |
195 | |
196 if (!PDFCharIsNumeric(ch)) { | |
197 if (bIsNumber) | |
198 *bIsNumber = false; | |
199 } | |
200 | |
201 if (!GetNextChar(ch)) | |
202 return; | |
203 | |
204 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { | |
205 m_Pos--; | |
206 break; | |
207 } | |
208 } | |
209 } | |
210 | |
211 CFX_ByteString CPDF_SyntaxParser::ReadString() { | |
212 uint8_t ch; | |
213 if (!GetNextChar(ch)) | |
214 return CFX_ByteString(); | |
215 | |
216 CFX_ByteTextBuf buf; | |
217 int32_t parlevel = 0; | |
218 int32_t status = 0; | |
219 int32_t iEscCode = 0; | |
220 while (1) { | |
221 switch (status) { | |
222 case 0: | |
223 if (ch == ')') { | |
224 if (parlevel == 0) { | |
225 return buf.MakeString(); | |
226 } | |
227 parlevel--; | |
228 buf.AppendChar(')'); | |
229 } else if (ch == '(') { | |
230 parlevel++; | |
231 buf.AppendChar('('); | |
232 } else if (ch == '\\') { | |
233 status = 1; | |
234 } else { | |
235 buf.AppendChar(ch); | |
236 } | |
237 break; | |
238 case 1: | |
239 if (ch >= '0' && ch <= '7') { | |
240 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
241 status = 2; | |
242 break; | |
243 } | |
244 | |
245 if (ch == 'n') { | |
246 buf.AppendChar('\n'); | |
247 } else if (ch == 'r') { | |
248 buf.AppendChar('\r'); | |
249 } else if (ch == 't') { | |
250 buf.AppendChar('\t'); | |
251 } else if (ch == 'b') { | |
252 buf.AppendChar('\b'); | |
253 } else if (ch == 'f') { | |
254 buf.AppendChar('\f'); | |
255 } else if (ch == '\r') { | |
256 status = 4; | |
257 break; | |
258 } else if (ch != '\n') { | |
259 buf.AppendChar(ch); | |
260 } | |
261 status = 0; | |
262 break; | |
263 case 2: | |
264 if (ch >= '0' && ch <= '7') { | |
265 iEscCode = | |
266 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
267 status = 3; | |
268 } else { | |
269 buf.AppendChar(iEscCode); | |
270 status = 0; | |
271 continue; | |
272 } | |
273 break; | |
274 case 3: | |
275 if (ch >= '0' && ch <= '7') { | |
276 iEscCode = | |
277 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
278 buf.AppendChar(iEscCode); | |
279 status = 0; | |
280 } else { | |
281 buf.AppendChar(iEscCode); | |
282 status = 0; | |
283 continue; | |
284 } | |
285 break; | |
286 case 4: | |
287 status = 0; | |
288 if (ch != '\n') | |
289 continue; | |
290 break; | |
291 } | |
292 | |
293 if (!GetNextChar(ch)) | |
294 break; | |
295 } | |
296 | |
297 GetNextChar(ch); | |
298 return buf.MakeString(); | |
299 } | |
300 | |
301 CFX_ByteString CPDF_SyntaxParser::ReadHexString() { | |
302 uint8_t ch; | |
303 if (!GetNextChar(ch)) | |
304 return CFX_ByteString(); | |
305 | |
306 CFX_ByteTextBuf buf; | |
307 bool bFirst = true; | |
308 uint8_t code = 0; | |
309 while (1) { | |
310 if (ch == '>') | |
311 break; | |
312 | |
313 if (std::isxdigit(ch)) { | |
314 int val = FXSYS_toHexDigit(ch); | |
315 if (bFirst) { | |
316 code = val * 16; | |
317 } else { | |
318 code += val; | |
319 buf.AppendByte(code); | |
320 } | |
321 bFirst = !bFirst; | |
322 } | |
323 | |
324 if (!GetNextChar(ch)) | |
325 break; | |
326 } | |
327 if (!bFirst) | |
328 buf.AppendByte(code); | |
329 | |
330 return buf.MakeString(); | |
331 } | |
332 | |
333 void CPDF_SyntaxParser::ToNextLine() { | |
334 uint8_t ch; | |
335 while (GetNextChar(ch)) { | |
336 if (ch == '\n') | |
337 break; | |
338 | |
339 if (ch == '\r') { | |
340 GetNextChar(ch); | |
341 if (ch != '\n') | |
342 --m_Pos; | |
343 break; | |
344 } | |
345 } | |
346 } | |
347 | |
348 void CPDF_SyntaxParser::ToNextWord() { | |
349 uint8_t ch; | |
350 if (!GetNextChar(ch)) | |
351 return; | |
352 | |
353 while (1) { | |
354 while (PDFCharIsWhitespace(ch)) { | |
355 if (!GetNextChar(ch)) | |
356 return; | |
357 } | |
358 | |
359 if (ch != '%') | |
360 break; | |
361 | |
362 while (1) { | |
363 if (!GetNextChar(ch)) | |
364 return; | |
365 if (PDFCharIsLineEnding(ch)) | |
366 break; | |
367 } | |
368 } | |
369 m_Pos--; | |
370 } | |
371 | |
372 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) { | |
373 GetNextWordInternal(bIsNumber); | |
374 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize); | |
375 } | |
376 | |
377 CFX_ByteString CPDF_SyntaxParser::GetKeyword() { | |
378 return GetNextWord(nullptr); | |
379 } | |
380 | |
381 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList, | |
382 uint32_t objnum, | |
383 uint32_t gennum, | |
384 FX_BOOL bDecrypt) { | |
385 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); | |
386 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) | |
387 return nullptr; | |
388 | |
389 FX_FILESIZE SavedObjPos = m_Pos; | |
390 bool bIsNumber; | |
391 CFX_ByteString word = GetNextWord(&bIsNumber); | |
392 if (word.GetLength() == 0) | |
393 return nullptr; | |
394 | |
395 if (bIsNumber) { | |
396 FX_FILESIZE SavedPos = m_Pos; | |
397 CFX_ByteString nextword = GetNextWord(&bIsNumber); | |
398 if (bIsNumber) { | |
399 CFX_ByteString nextword2 = GetNextWord(nullptr); | |
400 if (nextword2 == "R") | |
401 return new CPDF_Reference(pObjList, FXSYS_atoui(word.c_str())); | |
402 } | |
403 m_Pos = SavedPos; | |
404 return new CPDF_Number(word.AsStringC()); | |
405 } | |
406 | |
407 if (word == "true" || word == "false") | |
408 return new CPDF_Boolean(word == "true"); | |
409 | |
410 if (word == "null") | |
411 return new CPDF_Null; | |
412 | |
413 if (word == "(") { | |
414 CFX_ByteString str = ReadString(); | |
415 if (m_pCryptoHandler && bDecrypt) | |
416 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
417 return new CPDF_String(MaybeIntern(str), FALSE); | |
418 } | |
419 | |
420 if (word == "<") { | |
421 CFX_ByteString str = ReadHexString(); | |
422 if (m_pCryptoHandler && bDecrypt) | |
423 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
424 return new CPDF_String(MaybeIntern(str), TRUE); | |
425 } | |
426 | |
427 if (word == "[") { | |
428 CPDF_Array* pArray = new CPDF_Array; | |
429 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) | |
430 pArray->Add(pObj); | |
431 | |
432 return pArray; | |
433 } | |
434 | |
435 if (word[0] == '/') { | |
436 return new CPDF_Name(MaybeIntern( | |
437 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)))); | |
438 } | |
439 | |
440 if (word == "<<") { | |
441 int32_t nKeys = 0; | |
442 FX_FILESIZE dwSignValuePos = 0; | |
443 | |
444 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( | |
445 new CPDF_Dictionary(m_pPool)); | |
446 while (1) { | |
447 CFX_ByteString key = GetNextWord(nullptr); | |
448 if (key.IsEmpty()) | |
449 return nullptr; | |
450 | |
451 FX_FILESIZE SavedPos = m_Pos - key.GetLength(); | |
452 if (key == ">>") | |
453 break; | |
454 | |
455 if (key == "endobj") { | |
456 m_Pos = SavedPos; | |
457 break; | |
458 } | |
459 | |
460 if (key[0] != '/') | |
461 continue; | |
462 | |
463 ++nKeys; | |
464 key = PDF_NameDecode(key); | |
465 if (key.IsEmpty()) | |
466 continue; | |
467 | |
468 if (key == "/Contents") | |
469 dwSignValuePos = m_Pos; | |
470 | |
471 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true); | |
472 if (!pObj) | |
473 continue; | |
474 | |
475 CFX_ByteString keyNoSlash(key.raw_str() + 1, key.GetLength() - 1); | |
476 pDict->SetFor(keyNoSlash, pObj); | |
477 } | |
478 | |
479 // Only when this is a signature dictionary and has contents, we reset the | |
480 // contents to the un-decrypted form. | |
481 if (pDict->IsSignatureDict() && dwSignValuePos) { | |
482 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); | |
483 m_Pos = dwSignValuePos; | |
484 pDict->SetFor("Contents", GetObject(pObjList, objnum, gennum, false)); | |
485 } | |
486 | |
487 FX_FILESIZE SavedPos = m_Pos; | |
488 CFX_ByteString nextword = GetNextWord(nullptr); | |
489 if (nextword != "stream") { | |
490 m_Pos = SavedPos; | |
491 return pDict.release(); | |
492 } | |
493 return ReadStream(pDict.release(), objnum, gennum); | |
494 } | |
495 | |
496 if (word == ">>") | |
497 m_Pos = SavedObjPos; | |
498 | |
499 return nullptr; | |
500 } | |
501 | |
502 CPDF_Object* CPDF_SyntaxParser::GetObjectForStrict( | |
503 CPDF_IndirectObjectHolder* pObjList, | |
504 uint32_t objnum, | |
505 uint32_t gennum) { | |
506 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); | |
507 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) | |
508 return nullptr; | |
509 | |
510 FX_FILESIZE SavedObjPos = m_Pos; | |
511 bool bIsNumber; | |
512 CFX_ByteString word = GetNextWord(&bIsNumber); | |
513 if (word.GetLength() == 0) | |
514 return nullptr; | |
515 | |
516 if (bIsNumber) { | |
517 FX_FILESIZE SavedPos = m_Pos; | |
518 CFX_ByteString nextword = GetNextWord(&bIsNumber); | |
519 if (bIsNumber) { | |
520 CFX_ByteString nextword2 = GetNextWord(nullptr); | |
521 if (nextword2 == "R") | |
522 return new CPDF_Reference(pObjList, FXSYS_atoui(word.c_str())); | |
523 } | |
524 m_Pos = SavedPos; | |
525 return new CPDF_Number(word.AsStringC()); | |
526 } | |
527 | |
528 if (word == "true" || word == "false") | |
529 return new CPDF_Boolean(word == "true"); | |
530 | |
531 if (word == "null") | |
532 return new CPDF_Null; | |
533 | |
534 if (word == "(") { | |
535 CFX_ByteString str = ReadString(); | |
536 if (m_pCryptoHandler) | |
537 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
538 return new CPDF_String(MaybeIntern(str), FALSE); | |
539 } | |
540 | |
541 if (word == "<") { | |
542 CFX_ByteString str = ReadHexString(); | |
543 if (m_pCryptoHandler) | |
544 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
545 return new CPDF_String(MaybeIntern(str), TRUE); | |
546 } | |
547 | |
548 if (word == "[") { | |
549 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray( | |
550 new CPDF_Array); | |
551 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) | |
552 pArray->Add(pObj); | |
553 | |
554 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr; | |
555 } | |
556 | |
557 if (word[0] == '/') { | |
558 return new CPDF_Name(MaybeIntern( | |
559 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)))); | |
560 } | |
561 | |
562 if (word == "<<") { | |
563 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( | |
564 new CPDF_Dictionary(m_pPool)); | |
565 while (1) { | |
566 FX_FILESIZE SavedPos = m_Pos; | |
567 CFX_ByteString key = GetNextWord(nullptr); | |
568 if (key.IsEmpty()) | |
569 return nullptr; | |
570 | |
571 if (key == ">>") | |
572 break; | |
573 | |
574 if (key == "endobj") { | |
575 m_Pos = SavedPos; | |
576 break; | |
577 } | |
578 | |
579 if (key[0] != '/') | |
580 continue; | |
581 | |
582 key = PDF_NameDecode(key); | |
583 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj( | |
584 GetObject(pObjList, objnum, gennum, true)); | |
585 if (!obj) { | |
586 uint8_t ch; | |
587 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) { | |
588 continue; | |
589 } | |
590 return nullptr; | |
591 } | |
592 | |
593 if (key.GetLength() > 1) { | |
594 pDict->SetFor(CFX_ByteString(key.c_str() + 1, key.GetLength() - 1), | |
595 obj.release()); | |
596 } | |
597 } | |
598 | |
599 FX_FILESIZE SavedPos = m_Pos; | |
600 CFX_ByteString nextword = GetNextWord(nullptr); | |
601 if (nextword != "stream") { | |
602 m_Pos = SavedPos; | |
603 return pDict.release(); | |
604 } | |
605 | |
606 return ReadStream(pDict.release(), objnum, gennum); | |
607 } | |
608 | |
609 if (word == ">>") | |
610 m_Pos = SavedObjPos; | |
611 | |
612 return nullptr; | |
613 } | |
614 | |
615 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) { | |
616 unsigned char byte1 = 0; | |
617 unsigned char byte2 = 0; | |
618 | |
619 GetCharAt(pos, byte1); | |
620 GetCharAt(pos + 1, byte2); | |
621 | |
622 if (byte1 == '\r' && byte2 == '\n') | |
623 return 2; | |
624 | |
625 if (byte1 == '\r' || byte1 == '\n') | |
626 return 1; | |
627 | |
628 return 0; | |
629 } | |
630 | |
631 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, | |
632 uint32_t objnum, | |
633 uint32_t gennum) { | |
634 CPDF_Object* pLenObj = pDict->GetObjectFor("Length"); | |
635 FX_FILESIZE len = -1; | |
636 CPDF_Reference* pLenObjRef = ToReference(pLenObj); | |
637 | |
638 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() && | |
639 pLenObjRef->GetRefObjNum() != objnum); | |
640 if (pLenObj && differingObjNum) | |
641 len = pLenObj->GetInteger(); | |
642 | |
643 // Locate the start of stream. | |
644 ToNextLine(); | |
645 FX_FILESIZE streamStartPos = m_Pos; | |
646 | |
647 const CFX_ByteStringC kEndStreamStr("endstream"); | |
648 const CFX_ByteStringC kEndObjStr("endobj"); | |
649 | |
650 CPDF_CryptoHandler* pCryptoHandler = | |
651 objnum == (uint32_t)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get(); | |
652 if (!pCryptoHandler) { | |
653 FX_BOOL bSearchForKeyword = TRUE; | |
654 if (len >= 0) { | |
655 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos; | |
656 pos += len; | |
657 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) | |
658 m_Pos = pos.ValueOrDie(); | |
659 | |
660 m_Pos += ReadEOLMarkers(m_Pos); | |
661 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1); | |
662 GetNextWordInternal(nullptr); | |
663 // Earlier version of PDF specification doesn't require EOL marker before | |
664 // 'endstream' keyword. If keyword 'endstream' follows the bytes in | |
665 // specified length, it signals the end of stream. | |
666 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.raw_str(), | |
667 kEndStreamStr.GetLength()) == 0) { | |
668 bSearchForKeyword = FALSE; | |
669 } | |
670 } | |
671 | |
672 if (bSearchForKeyword) { | |
673 // If len is not available, len needs to be calculated | |
674 // by searching the keywords "endstream" or "endobj". | |
675 m_Pos = streamStartPos; | |
676 FX_FILESIZE endStreamOffset = 0; | |
677 while (endStreamOffset >= 0) { | |
678 endStreamOffset = FindTag(kEndStreamStr, 0); | |
679 | |
680 // Can't find "endstream". | |
681 if (endStreamOffset < 0) | |
682 break; | |
683 | |
684 // Stop searching when "endstream" is found. | |
685 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen, | |
686 kEndStreamStr, TRUE)) { | |
687 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength(); | |
688 break; | |
689 } | |
690 } | |
691 | |
692 m_Pos = streamStartPos; | |
693 FX_FILESIZE endObjOffset = 0; | |
694 while (endObjOffset >= 0) { | |
695 endObjOffset = FindTag(kEndObjStr, 0); | |
696 | |
697 // Can't find "endobj". | |
698 if (endObjOffset < 0) | |
699 break; | |
700 | |
701 // Stop searching when "endobj" is found. | |
702 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr, | |
703 TRUE)) { | |
704 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength(); | |
705 break; | |
706 } | |
707 } | |
708 | |
709 // Can't find "endstream" or "endobj". | |
710 if (endStreamOffset < 0 && endObjOffset < 0) { | |
711 pDict->Release(); | |
712 return nullptr; | |
713 } | |
714 | |
715 if (endStreamOffset < 0 && endObjOffset >= 0) { | |
716 // Correct the position of end stream. | |
717 endStreamOffset = endObjOffset; | |
718 } else if (endStreamOffset >= 0 && endObjOffset < 0) { | |
719 // Correct the position of end obj. | |
720 endObjOffset = endStreamOffset; | |
721 } else if (endStreamOffset > endObjOffset) { | |
722 endStreamOffset = endObjOffset; | |
723 } | |
724 | |
725 len = endStreamOffset; | |
726 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); | |
727 if (numMarkers == 2) { | |
728 len -= 2; | |
729 } else { | |
730 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); | |
731 if (numMarkers == 1) { | |
732 len -= 1; | |
733 } | |
734 } | |
735 | |
736 if (len < 0) { | |
737 pDict->Release(); | |
738 return nullptr; | |
739 } | |
740 pDict->SetIntegerFor("Length", len); | |
741 } | |
742 m_Pos = streamStartPos; | |
743 } | |
744 | |
745 if (len < 0) { | |
746 pDict->Release(); | |
747 return nullptr; | |
748 } | |
749 | |
750 uint8_t* pData = nullptr; | |
751 if (len > 0) { | |
752 pData = FX_Alloc(uint8_t, len); | |
753 ReadBlock(pData, len); | |
754 if (pCryptoHandler) { | |
755 CFX_BinaryBuf dest_buf; | |
756 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len)); | |
757 | |
758 void* context = pCryptoHandler->DecryptStart(objnum, gennum); | |
759 pCryptoHandler->DecryptStream(context, pData, len, dest_buf); | |
760 pCryptoHandler->DecryptFinish(context, dest_buf); | |
761 | |
762 FX_Free(pData); | |
763 pData = dest_buf.GetBuffer(); | |
764 len = dest_buf.GetSize(); | |
765 dest_buf.DetachBuffer(); | |
766 } | |
767 } | |
768 | |
769 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict); | |
770 streamStartPos = m_Pos; | |
771 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1); | |
772 | |
773 GetNextWordInternal(nullptr); | |
774 | |
775 int numMarkers = ReadEOLMarkers(m_Pos); | |
776 if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) && | |
777 numMarkers != 0 && | |
778 FXSYS_memcmp(m_WordBuffer, kEndObjStr.raw_str(), | |
779 kEndObjStr.GetLength()) == 0) { | |
780 m_Pos = streamStartPos; | |
781 } | |
782 return pStream; | |
783 } | |
784 | |
785 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess, | |
786 uint32_t HeaderOffset) { | |
787 FX_Free(m_pFileBuf); | |
788 | |
789 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize); | |
790 m_HeaderOffset = HeaderOffset; | |
791 m_FileLen = pFileAccess->GetSize(); | |
792 m_Pos = 0; | |
793 m_pFileAccess = pFileAccess; | |
794 m_BufOffset = 0; | |
795 pFileAccess->ReadBlock( | |
796 m_pFileBuf, 0, | |
797 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize)); | |
798 } | |
799 | |
800 uint32_t CPDF_SyntaxParser::GetDirectNum() { | |
801 bool bIsNumber; | |
802 GetNextWordInternal(&bIsNumber); | |
803 if (!bIsNumber) | |
804 return 0; | |
805 | |
806 m_WordBuffer[m_WordSize] = 0; | |
807 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer)); | |
808 } | |
809 | |
810 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, | |
811 FX_FILESIZE limit, | |
812 const CFX_ByteStringC& tag, | |
813 FX_BOOL checkKeyword) { | |
814 const uint32_t taglen = tag.GetLength(); | |
815 | |
816 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]); | |
817 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) && | |
818 !PDFCharIsWhitespace(tag[taglen - 1]); | |
819 | |
820 uint8_t ch; | |
821 if (bCheckRight && startpos + (int32_t)taglen <= limit && | |
822 GetCharAt(startpos + (int32_t)taglen, ch)) { | |
823 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || | |
824 (checkKeyword && PDFCharIsDelimiter(ch))) { | |
825 return false; | |
826 } | |
827 } | |
828 | |
829 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { | |
830 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || | |
831 (checkKeyword && PDFCharIsDelimiter(ch))) { | |
832 return false; | |
833 } | |
834 } | |
835 return true; | |
836 } | |
837 | |
838 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards | |
839 // and drop the bool. | |
840 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag, | |
841 FX_BOOL bWholeWord, | |
842 FX_BOOL bForward, | |
843 FX_FILESIZE limit) { | |
844 int32_t taglen = tag.GetLength(); | |
845 if (taglen == 0) | |
846 return FALSE; | |
847 | |
848 FX_FILESIZE pos = m_Pos; | |
849 int32_t offset = 0; | |
850 if (!bForward) | |
851 offset = taglen - 1; | |
852 | |
853 const uint8_t* tag_data = tag.raw_str(); | |
854 uint8_t byte; | |
855 while (1) { | |
856 if (bForward) { | |
857 if (limit && pos >= m_Pos + limit) | |
858 return FALSE; | |
859 | |
860 if (!GetCharAt(pos, byte)) | |
861 return FALSE; | |
862 | |
863 } else { | |
864 if (limit && pos <= m_Pos - limit) | |
865 return FALSE; | |
866 | |
867 if (!GetCharAtBackward(pos, byte)) | |
868 return FALSE; | |
869 } | |
870 | |
871 if (byte == tag_data[offset]) { | |
872 if (bForward) { | |
873 offset++; | |
874 if (offset < taglen) { | |
875 pos++; | |
876 continue; | |
877 } | |
878 } else { | |
879 offset--; | |
880 if (offset >= 0) { | |
881 pos--; | |
882 continue; | |
883 } | |
884 } | |
885 | |
886 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos; | |
887 if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) { | |
888 m_Pos = startpos; | |
889 return TRUE; | |
890 } | |
891 } | |
892 | |
893 if (bForward) { | |
894 offset = byte == tag_data[0] ? 1 : 0; | |
895 pos++; | |
896 } else { | |
897 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1; | |
898 pos--; | |
899 } | |
900 | |
901 if (pos < 0) | |
902 return FALSE; | |
903 } | |
904 | |
905 return FALSE; | |
906 } | |
907 | |
908 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags, | |
909 FX_BOOL bWholeWord, | |
910 FX_FILESIZE limit) { | |
911 int32_t ntags = 1; | |
912 for (int i = 0; i < tags.GetLength(); ++i) { | |
913 if (tags[i] == 0) | |
914 ++ntags; | |
915 } | |
916 | |
917 // Ensure that the input byte string happens to be nul-terminated. This | |
918 // need not be the case, but the loop below uses this guarantee to put | |
919 // the last pattern into the vector. | |
920 ASSERT(tags[tags.GetLength()] == 0); | |
921 std::vector<SearchTagRecord> patterns(ntags); | |
922 uint32_t start = 0; | |
923 uint32_t itag = 0; | |
924 uint32_t max_len = 0; | |
925 for (int i = 0; i <= tags.GetLength(); ++i) { | |
926 if (tags[i] == 0) { | |
927 uint32_t len = i - start; | |
928 max_len = std::max(len, max_len); | |
929 patterns[itag].m_bsTag = tags.Mid(start, len); | |
930 patterns[itag].m_Offset = 0; | |
931 start = i + 1; | |
932 ++itag; | |
933 } | |
934 } | |
935 | |
936 const FX_FILESIZE pos_limit = m_Pos + limit; | |
937 for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) { | |
938 uint8_t byte; | |
939 if (!GetCharAt(pos, byte)) | |
940 break; | |
941 | |
942 for (int i = 0; i < ntags; ++i) { | |
943 SearchTagRecord& pat = patterns[i]; | |
944 if (pat.m_bsTag[pat.m_Offset] != byte) { | |
945 pat.m_Offset = (pat.m_bsTag[0] == byte) ? 1 : 0; | |
946 continue; | |
947 } | |
948 | |
949 ++pat.m_Offset; | |
950 if (pat.m_Offset != pat.m_bsTag.GetLength()) | |
951 continue; | |
952 | |
953 if (!bWholeWord || IsWholeWord(pos - pat.m_bsTag.GetLength(), limit, | |
954 pat.m_bsTag, FALSE)) { | |
955 return i; | |
956 } | |
957 | |
958 pat.m_Offset = (pat.m_bsTag[0] == byte) ? 1 : 0; | |
959 } | |
960 } | |
961 return -1; | |
962 } | |
963 | |
964 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag, | |
965 FX_FILESIZE limit) { | |
966 int32_t taglen = tag.GetLength(); | |
967 int32_t match = 0; | |
968 limit += m_Pos; | |
969 FX_FILESIZE startpos = m_Pos; | |
970 | |
971 while (1) { | |
972 uint8_t ch; | |
973 if (!GetNextChar(ch)) | |
974 return -1; | |
975 | |
976 if (ch == tag[match]) { | |
977 match++; | |
978 if (match == taglen) | |
979 return m_Pos - startpos - taglen; | |
980 } else { | |
981 match = ch == tag[0] ? 1 : 0; | |
982 } | |
983 | |
984 if (limit && m_Pos == limit) | |
985 return -1; | |
986 } | |
987 return -1; | |
988 } | |
989 | |
990 void CPDF_SyntaxParser::SetEncrypt( | |
991 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) { | |
992 m_pCryptoHandler = std::move(pCryptoHandler); | |
993 } | |
994 | |
995 CFX_ByteString CPDF_SyntaxParser::MaybeIntern(const CFX_ByteString& str) { | |
996 return m_pPool ? m_pPool->Intern(str) : str; | |
997 } | |
OLD | NEW |