OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2014 PDFium Authors. All rights reserved. | |
dsinclair
2016/03/08 01:15:05
nit: 2016?
Tom Sepez
2016/03/08 19:35:41
Done.
| |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
6 | |
7 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" | |
8 | |
9 #include "core/include/fpdfapi/fpdf_module.h" | |
10 #include "core/include/fpdfapi/fpdf_parser.h" | |
11 #include "core/include/fxcrt/fx_ext.h" | |
12 #include "third_party/base/numerics/safe_math.h" | |
13 | |
14 struct SearchTagRecord { | |
dsinclair
2016/03/08 01:15:05
Can this go in namespace {}?
Tom Sepez
2016/03/08 19:35:42
Done.
| |
15 const char* m_pTag; | |
16 FX_DWORD m_Len; | |
17 FX_DWORD m_Offset; | |
18 }; | |
19 | |
20 // static | |
21 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0; | |
22 | |
23 CPDF_SyntaxParser::CPDF_SyntaxParser() | |
24 : m_MetadataObjnum(0), | |
25 m_pFileAccess(nullptr), | |
26 m_pFileBuf(nullptr), | |
27 m_BufSize(CPDF_ModuleMgr::kFileBufSize) {} | |
28 | |
29 CPDF_SyntaxParser::~CPDF_SyntaxParser() { | |
30 FX_Free(m_pFileBuf); | |
31 } | |
32 | |
33 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) { | |
34 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); | |
35 m_Pos = pos; | |
36 return GetNextChar(ch); | |
37 } | |
38 | |
39 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) { | |
40 FX_FILESIZE pos = m_Pos + m_HeaderOffset; | |
41 if (pos >= m_FileLen) | |
42 return FALSE; | |
43 | |
44 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { | |
45 FX_FILESIZE read_pos = pos; | |
46 FX_DWORD read_size = m_BufSize; | |
47 if ((FX_FILESIZE)read_size > m_FileLen) | |
48 read_size = (FX_DWORD)m_FileLen; | |
49 | |
50 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { | |
51 if (m_FileLen < (FX_FILESIZE)read_size) { | |
52 read_pos = 0; | |
53 read_size = (FX_DWORD)m_FileLen; | |
54 } else { | |
55 read_pos = m_FileLen - read_size; | |
56 } | |
57 } | |
58 | |
59 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) | |
60 return FALSE; | |
61 | |
62 m_BufOffset = read_pos; | |
63 } | |
64 ch = m_pFileBuf[pos - m_BufOffset]; | |
65 m_Pos++; | |
66 return TRUE; | |
67 } | |
68 | |
69 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) { | |
70 pos += m_HeaderOffset; | |
71 if (pos >= m_FileLen) | |
72 return FALSE; | |
73 | |
74 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { | |
75 FX_FILESIZE read_pos; | |
76 if (pos < (FX_FILESIZE)m_BufSize) | |
77 read_pos = 0; | |
78 else | |
79 read_pos = pos - m_BufSize + 1; | |
80 | |
81 FX_DWORD read_size = m_BufSize; | |
82 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { | |
83 if (m_FileLen < (FX_FILESIZE)read_size) { | |
84 read_pos = 0; | |
85 read_size = (FX_DWORD)m_FileLen; | |
86 } else { | |
87 read_pos = m_FileLen - read_size; | |
88 } | |
89 } | |
90 | |
91 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) | |
92 return FALSE; | |
93 | |
94 m_BufOffset = read_pos; | |
95 } | |
96 ch = m_pFileBuf[pos - m_BufOffset]; | |
97 return TRUE; | |
98 } | |
99 | |
100 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) { | |
101 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) | |
102 return FALSE; | |
103 m_Pos += size; | |
104 return TRUE; | |
105 } | |
106 | |
107 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) { | |
108 m_WordSize = 0; | |
109 if (bIsNumber) | |
110 *bIsNumber = true; | |
111 | |
112 uint8_t ch; | |
113 if (!GetNextChar(ch)) | |
114 return; | |
115 | |
116 while (1) { | |
117 while (PDFCharIsWhitespace(ch)) { | |
118 if (!GetNextChar(ch)) | |
119 return; | |
120 } | |
121 | |
122 if (ch != '%') | |
123 break; | |
124 | |
125 while (1) { | |
126 if (!GetNextChar(ch)) | |
127 return; | |
128 if (PDFCharIsLineEnding(ch)) | |
129 break; | |
130 } | |
131 } | |
132 | |
133 if (PDFCharIsDelimiter(ch)) { | |
134 if (bIsNumber) | |
135 *bIsNumber = false; | |
136 | |
137 m_WordBuffer[m_WordSize++] = ch; | |
138 if (ch == '/') { | |
139 while (1) { | |
140 if (!GetNextChar(ch)) | |
141 return; | |
142 | |
143 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { | |
144 m_Pos--; | |
145 return; | |
146 } | |
147 | |
148 if (m_WordSize < sizeof(m_WordBuffer) - 1) | |
149 m_WordBuffer[m_WordSize++] = ch; | |
150 } | |
151 } else if (ch == '<') { | |
152 if (!GetNextChar(ch)) | |
153 return; | |
154 | |
155 if (ch == '<') | |
156 m_WordBuffer[m_WordSize++] = ch; | |
157 else | |
158 m_Pos--; | |
159 } else if (ch == '>') { | |
160 if (!GetNextChar(ch)) | |
161 return; | |
162 | |
163 if (ch == '>') | |
164 m_WordBuffer[m_WordSize++] = ch; | |
165 else | |
166 m_Pos--; | |
167 } | |
168 return; | |
169 } | |
170 | |
171 while (1) { | |
172 if (m_WordSize < sizeof(m_WordBuffer) - 1) | |
173 m_WordBuffer[m_WordSize++] = ch; | |
174 | |
175 if (!PDFCharIsNumeric(ch)) { | |
176 if (bIsNumber) | |
177 *bIsNumber = false; | |
178 } | |
179 | |
180 if (!GetNextChar(ch)) | |
181 return; | |
182 | |
183 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { | |
184 m_Pos--; | |
185 break; | |
186 } | |
187 } | |
188 } | |
189 | |
190 CFX_ByteString CPDF_SyntaxParser::ReadString() { | |
191 uint8_t ch; | |
192 if (!GetNextChar(ch)) | |
193 return CFX_ByteString(); | |
194 | |
195 CFX_ByteTextBuf buf; | |
196 int32_t parlevel = 0; | |
197 int32_t status = 0; | |
198 int32_t iEscCode = 0; | |
199 while (1) { | |
200 switch (status) { | |
201 case 0: | |
202 if (ch == ')') { | |
203 if (parlevel == 0) { | |
204 return buf.GetByteString(); | |
205 } | |
206 parlevel--; | |
207 buf.AppendChar(')'); | |
208 } else if (ch == '(') { | |
209 parlevel++; | |
210 buf.AppendChar('('); | |
211 } else if (ch == '\\') { | |
212 status = 1; | |
213 } else { | |
214 buf.AppendChar(ch); | |
215 } | |
216 break; | |
217 case 1: | |
218 if (ch >= '0' && ch <= '7') { | |
219 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
220 status = 2; | |
221 break; | |
222 } | |
223 | |
224 if (ch == 'n') { | |
225 buf.AppendChar('\n'); | |
226 } else if (ch == 'r') { | |
227 buf.AppendChar('\r'); | |
228 } else if (ch == 't') { | |
229 buf.AppendChar('\t'); | |
230 } else if (ch == 'b') { | |
231 buf.AppendChar('\b'); | |
232 } else if (ch == 'f') { | |
233 buf.AppendChar('\f'); | |
234 } else if (ch == '\r') { | |
235 status = 4; | |
236 break; | |
237 } else if (ch != '\n') { | |
238 buf.AppendChar(ch); | |
239 } | |
240 status = 0; | |
241 break; | |
242 case 2: | |
243 if (ch >= '0' && ch <= '7') { | |
244 iEscCode = | |
245 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
246 status = 3; | |
247 } else { | |
248 buf.AppendChar(iEscCode); | |
249 status = 0; | |
250 continue; | |
251 } | |
252 break; | |
253 case 3: | |
254 if (ch >= '0' && ch <= '7') { | |
255 iEscCode = | |
256 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
257 buf.AppendChar(iEscCode); | |
258 status = 0; | |
259 } else { | |
260 buf.AppendChar(iEscCode); | |
261 status = 0; | |
262 continue; | |
263 } | |
264 break; | |
265 case 4: | |
266 status = 0; | |
267 if (ch != '\n') | |
268 continue; | |
269 break; | |
270 } | |
271 | |
272 if (!GetNextChar(ch)) | |
273 break; | |
274 } | |
275 | |
276 GetNextChar(ch); | |
277 return buf.GetByteString(); | |
278 } | |
279 | |
280 CFX_ByteString CPDF_SyntaxParser::ReadHexString() { | |
281 uint8_t ch; | |
282 if (!GetNextChar(ch)) | |
283 return CFX_ByteString(); | |
284 | |
285 CFX_ByteTextBuf buf; | |
286 bool bFirst = true; | |
287 uint8_t code = 0; | |
288 while (1) { | |
289 if (ch == '>') | |
290 break; | |
291 | |
292 if (std::isxdigit(ch)) { | |
293 int val = FXSYS_toHexDigit(ch); | |
294 if (bFirst) { | |
295 code = val * 16; | |
296 } else { | |
297 code += val; | |
298 buf.AppendByte(code); | |
299 } | |
300 bFirst = !bFirst; | |
301 } | |
302 | |
303 if (!GetNextChar(ch)) | |
304 break; | |
305 } | |
306 if (!bFirst) | |
307 buf.AppendByte(code); | |
308 | |
309 return buf.GetByteString(); | |
310 } | |
311 | |
312 void CPDF_SyntaxParser::ToNextLine() { | |
313 uint8_t ch; | |
314 while (GetNextChar(ch)) { | |
315 if (ch == '\n') | |
316 break; | |
317 | |
318 if (ch == '\r') { | |
319 GetNextChar(ch); | |
320 if (ch != '\n') | |
321 --m_Pos; | |
322 break; | |
323 } | |
324 } | |
325 } | |
326 | |
327 void CPDF_SyntaxParser::ToNextWord() { | |
328 uint8_t ch; | |
329 if (!GetNextChar(ch)) | |
330 return; | |
331 | |
332 while (1) { | |
333 while (PDFCharIsWhitespace(ch)) { | |
334 if (!GetNextChar(ch)) | |
335 return; | |
336 } | |
337 | |
338 if (ch != '%') | |
339 break; | |
340 | |
341 while (1) { | |
342 if (!GetNextChar(ch)) | |
343 return; | |
344 if (PDFCharIsLineEnding(ch)) | |
345 break; | |
346 } | |
347 } | |
348 m_Pos--; | |
349 } | |
350 | |
351 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) { | |
352 GetNextWordInternal(bIsNumber); | |
353 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize); | |
354 } | |
355 | |
356 CFX_ByteString CPDF_SyntaxParser::GetKeyword() { | |
357 return GetNextWord(nullptr); | |
358 } | |
359 | |
360 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList, | |
361 FX_DWORD objnum, | |
362 FX_DWORD gennum, | |
363 FX_BOOL bDecrypt) { | |
364 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); | |
365 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) | |
366 return nullptr; | |
367 | |
368 FX_FILESIZE SavedPos = m_Pos; | |
369 bool bIsNumber; | |
370 CFX_ByteString word = GetNextWord(&bIsNumber); | |
371 if (word.GetLength() == 0) | |
372 return nullptr; | |
373 | |
374 if (bIsNumber) { | |
375 FX_FILESIZE SavedPos = m_Pos; | |
376 CFX_ByteString nextword = GetNextWord(&bIsNumber); | |
377 if (bIsNumber) { | |
378 CFX_ByteString nextword2 = GetNextWord(nullptr); | |
379 if (nextword2 == "R") { | |
380 FX_DWORD objnum = FXSYS_atoui(word); | |
381 return new CPDF_Reference(pObjList, objnum); | |
382 } | |
383 } | |
384 m_Pos = SavedPos; | |
385 return new CPDF_Number(word); | |
386 } | |
387 | |
388 if (word == "true" || word == "false") | |
389 return new CPDF_Boolean(word == "true"); | |
390 | |
391 if (word == "null") | |
392 return new CPDF_Null; | |
393 | |
394 if (word == "(") { | |
395 CFX_ByteString str = ReadString(); | |
396 if (m_pCryptoHandler && bDecrypt) | |
397 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
398 return new CPDF_String(str, FALSE); | |
399 } | |
400 | |
401 if (word == "<") { | |
402 CFX_ByteString str = ReadHexString(); | |
403 if (m_pCryptoHandler && bDecrypt) | |
404 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
405 | |
406 return new CPDF_String(str, TRUE); | |
407 } | |
408 | |
409 if (word == "[") { | |
410 CPDF_Array* pArray = new CPDF_Array; | |
411 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) | |
412 pArray->Add(pObj); | |
413 | |
414 return pArray; | |
415 } | |
416 | |
417 if (word[0] == '/') { | |
418 return new CPDF_Name( | |
419 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); | |
420 } | |
421 | |
422 if (word == "<<") { | |
423 int32_t nKeys = 0; | |
424 FX_FILESIZE dwSignValuePos = 0; | |
425 | |
426 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( | |
427 new CPDF_Dictionary); | |
428 while (1) { | |
429 CFX_ByteString key = GetNextWord(nullptr); | |
430 if (key.IsEmpty()) | |
431 return nullptr; | |
432 | |
433 FX_FILESIZE SavedPos = m_Pos - key.GetLength(); | |
434 if (key == ">>") | |
435 break; | |
436 | |
437 if (key == "endobj") { | |
438 m_Pos = SavedPos; | |
439 break; | |
440 } | |
441 | |
442 if (key[0] != '/') | |
443 continue; | |
444 | |
445 ++nKeys; | |
446 key = PDF_NameDecode(key); | |
447 if (key.IsEmpty()) | |
448 continue; | |
449 | |
450 if (key == "/Contents") | |
451 dwSignValuePos = m_Pos; | |
452 | |
453 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true); | |
454 if (!pObj) | |
455 continue; | |
456 | |
457 CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1); | |
458 pDict->SetAt(keyNoSlash, pObj); | |
459 } | |
460 | |
461 // Only when this is a signature dictionary and has contents, we reset the | |
462 // contents to the un-decrypted form. | |
463 if (IsSignatureDict(pDict.get()) && dwSignValuePos) { | |
464 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); | |
465 m_Pos = dwSignValuePos; | |
466 pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false)); | |
467 } | |
468 | |
469 FX_FILESIZE SavedPos = m_Pos; | |
470 CFX_ByteString nextword = GetNextWord(nullptr); | |
471 if (nextword != "stream") { | |
472 m_Pos = SavedPos; | |
473 return pDict.release(); | |
474 } | |
475 return ReadStream(pDict.release(), objnum, gennum); | |
476 } | |
477 | |
478 if (word == ">>") | |
479 m_Pos = SavedPos; | |
480 | |
481 return nullptr; | |
482 } | |
483 | |
484 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict( | |
485 CPDF_IndirectObjectHolder* pObjList, | |
486 FX_DWORD objnum, | |
487 FX_DWORD gennum) { | |
488 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); | |
489 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) | |
490 return nullptr; | |
491 | |
492 FX_FILESIZE SavedPos = m_Pos; | |
493 bool bIsNumber; | |
494 CFX_ByteString word = GetNextWord(&bIsNumber); | |
495 if (word.GetLength() == 0) | |
496 return nullptr; | |
497 | |
498 if (bIsNumber) { | |
499 FX_FILESIZE SavedPos = m_Pos; | |
500 CFX_ByteString nextword = GetNextWord(&bIsNumber); | |
501 if (bIsNumber) { | |
502 CFX_ByteString nextword2 = GetNextWord(nullptr); | |
503 if (nextword2 == "R") | |
504 return new CPDF_Reference(pObjList, FXSYS_atoui(word)); | |
505 } | |
506 m_Pos = SavedPos; | |
507 return new CPDF_Number(word); | |
508 } | |
509 | |
510 if (word == "true" || word == "false") | |
511 return new CPDF_Boolean(word == "true"); | |
512 | |
513 if (word == "null") | |
514 return new CPDF_Null; | |
515 | |
516 if (word == "(") { | |
517 CFX_ByteString str = ReadString(); | |
518 if (m_pCryptoHandler) | |
519 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
520 return new CPDF_String(str, FALSE); | |
521 } | |
522 | |
523 if (word == "<") { | |
524 CFX_ByteString str = ReadHexString(); | |
525 if (m_pCryptoHandler) | |
526 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
527 return new CPDF_String(str, TRUE); | |
528 } | |
529 | |
530 if (word == "[") { | |
531 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray( | |
532 new CPDF_Array); | |
533 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) | |
534 pArray->Add(pObj); | |
535 | |
536 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr; | |
537 } | |
538 | |
539 if (word[0] == '/') { | |
540 return new CPDF_Name( | |
541 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); | |
542 } | |
543 | |
544 if (word == "<<") { | |
545 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( | |
546 new CPDF_Dictionary); | |
547 while (1) { | |
548 FX_FILESIZE SavedPos = m_Pos; | |
549 CFX_ByteString key = GetNextWord(nullptr); | |
550 if (key.IsEmpty()) | |
551 return nullptr; | |
552 | |
553 if (key == ">>") | |
554 break; | |
555 | |
556 if (key == "endobj") { | |
557 m_Pos = SavedPos; | |
558 break; | |
559 } | |
560 | |
561 if (key[0] != '/') | |
562 continue; | |
563 | |
564 key = PDF_NameDecode(key); | |
565 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj( | |
566 GetObject(pObjList, objnum, gennum, true)); | |
567 if (!obj) { | |
568 uint8_t ch; | |
569 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) { | |
570 continue; | |
571 } | |
572 return nullptr; | |
573 } | |
574 | |
575 if (key.GetLength() > 1) { | |
576 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1), | |
577 obj.release()); | |
578 } | |
579 } | |
580 | |
581 FX_FILESIZE SavedPos = m_Pos; | |
582 CFX_ByteString nextword = GetNextWord(nullptr); | |
583 if (nextword != "stream") { | |
584 m_Pos = SavedPos; | |
585 return pDict.release(); | |
586 } | |
587 | |
588 return ReadStream(pDict.release(), objnum, gennum); | |
589 } | |
590 | |
591 if (word == ">>") | |
592 m_Pos = SavedPos; | |
593 | |
594 return nullptr; | |
595 } | |
596 | |
597 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) { | |
598 unsigned char byte1 = 0; | |
599 unsigned char byte2 = 0; | |
600 | |
601 GetCharAt(pos, byte1); | |
602 GetCharAt(pos + 1, byte2); | |
603 | |
604 if (byte1 == '\r' && byte2 == '\n') | |
605 return 2; | |
606 | |
607 if (byte1 == '\r' || byte1 == '\n') | |
608 return 1; | |
609 | |
610 return 0; | |
611 } | |
612 | |
613 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, | |
614 FX_DWORD objnum, | |
615 FX_DWORD gennum) { | |
616 CPDF_Object* pLenObj = pDict->GetElement("Length"); | |
617 FX_FILESIZE len = -1; | |
618 CPDF_Reference* pLenObjRef = ToReference(pLenObj); | |
619 | |
620 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() && | |
621 pLenObjRef->GetRefObjNum() != objnum); | |
622 if (pLenObj && differingObjNum) | |
623 len = pLenObj->GetInteger(); | |
624 | |
625 // Locate the start of stream. | |
626 ToNextLine(); | |
627 FX_FILESIZE streamStartPos = m_Pos; | |
628 | |
629 const CFX_ByteStringC kEndStreamStr("endstream"); | |
630 const CFX_ByteStringC kEndObjStr("endobj"); | |
631 | |
632 CPDF_CryptoHandler* pCryptoHandler = | |
633 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get(); | |
634 if (!pCryptoHandler) { | |
635 FX_BOOL bSearchForKeyword = TRUE; | |
636 if (len >= 0) { | |
637 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos; | |
638 pos += len; | |
639 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) | |
640 m_Pos = pos.ValueOrDie(); | |
641 | |
642 m_Pos += ReadEOLMarkers(m_Pos); | |
643 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1); | |
644 GetNextWordInternal(nullptr); | |
645 // Earlier version of PDF specification doesn't require EOL marker before | |
646 // 'endstream' keyword. If keyword 'endstream' follows the bytes in | |
647 // specified length, it signals the end of stream. | |
648 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(), | |
649 kEndStreamStr.GetLength()) == 0) { | |
650 bSearchForKeyword = FALSE; | |
651 } | |
652 } | |
653 | |
654 if (bSearchForKeyword) { | |
655 // If len is not available, len needs to be calculated | |
656 // by searching the keywords "endstream" or "endobj". | |
657 m_Pos = streamStartPos; | |
658 FX_FILESIZE endStreamOffset = 0; | |
659 while (endStreamOffset >= 0) { | |
660 endStreamOffset = FindTag(kEndStreamStr, 0); | |
661 | |
662 // Can't find "endstream". | |
663 if (endStreamOffset < 0) | |
664 break; | |
665 | |
666 // Stop searching when "endstream" is found. | |
667 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen, | |
668 kEndStreamStr, TRUE)) { | |
669 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength(); | |
670 break; | |
671 } | |
672 } | |
673 | |
674 m_Pos = streamStartPos; | |
675 FX_FILESIZE endObjOffset = 0; | |
676 while (endObjOffset >= 0) { | |
677 endObjOffset = FindTag(kEndObjStr, 0); | |
678 | |
679 // Can't find "endobj". | |
680 if (endObjOffset < 0) | |
681 break; | |
682 | |
683 // Stop searching when "endobj" is found. | |
684 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr, | |
685 TRUE)) { | |
686 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength(); | |
687 break; | |
688 } | |
689 } | |
690 | |
691 // Can't find "endstream" or "endobj". | |
692 if (endStreamOffset < 0 && endObjOffset < 0) { | |
693 pDict->Release(); | |
694 return nullptr; | |
695 } | |
696 | |
697 if (endStreamOffset < 0 && endObjOffset >= 0) { | |
698 // Correct the position of end stream. | |
699 endStreamOffset = endObjOffset; | |
700 } else if (endStreamOffset >= 0 && endObjOffset < 0) { | |
701 // Correct the position of end obj. | |
702 endObjOffset = endStreamOffset; | |
703 } else if (endStreamOffset > endObjOffset) { | |
704 endStreamOffset = endObjOffset; | |
705 } | |
706 | |
707 len = endStreamOffset; | |
708 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); | |
709 if (numMarkers == 2) { | |
710 len -= 2; | |
711 } else { | |
712 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); | |
713 if (numMarkers == 1) { | |
714 len -= 1; | |
715 } | |
716 } | |
717 | |
718 if (len < 0) { | |
719 pDict->Release(); | |
720 return nullptr; | |
721 } | |
722 pDict->SetAtInteger("Length", len); | |
723 } | |
724 m_Pos = streamStartPos; | |
725 } | |
726 | |
727 if (len < 0) { | |
728 pDict->Release(); | |
729 return nullptr; | |
730 } | |
731 | |
732 uint8_t* pData = nullptr; | |
733 if (len > 0) { | |
734 pData = FX_Alloc(uint8_t, len); | |
735 ReadBlock(pData, len); | |
736 if (pCryptoHandler) { | |
737 CFX_BinaryBuf dest_buf; | |
738 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len)); | |
739 | |
740 void* context = pCryptoHandler->DecryptStart(objnum, gennum); | |
741 pCryptoHandler->DecryptStream(context, pData, len, dest_buf); | |
742 pCryptoHandler->DecryptFinish(context, dest_buf); | |
743 | |
744 FX_Free(pData); | |
745 pData = dest_buf.GetBuffer(); | |
746 len = dest_buf.GetSize(); | |
747 dest_buf.DetachBuffer(); | |
748 } | |
749 } | |
750 | |
751 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict); | |
752 streamStartPos = m_Pos; | |
753 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1); | |
754 | |
755 GetNextWordInternal(nullptr); | |
756 | |
757 int numMarkers = ReadEOLMarkers(m_Pos); | |
758 if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 && | |
759 FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) == | |
760 0) { | |
761 m_Pos = streamStartPos; | |
762 } | |
763 return pStream; | |
764 } | |
765 | |
766 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess, | |
767 FX_DWORD HeaderOffset) { | |
768 FX_Free(m_pFileBuf); | |
769 | |
770 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize); | |
771 m_HeaderOffset = HeaderOffset; | |
772 m_FileLen = pFileAccess->GetSize(); | |
773 m_Pos = 0; | |
774 m_pFileAccess = pFileAccess; | |
775 m_BufOffset = 0; | |
776 pFileAccess->ReadBlock( | |
777 m_pFileBuf, 0, | |
778 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize)); | |
779 } | |
780 | |
781 uint32_t CPDF_SyntaxParser::GetDirectNum() { | |
782 bool bIsNumber; | |
783 GetNextWordInternal(&bIsNumber); | |
784 if (!bIsNumber) | |
785 return 0; | |
786 | |
787 m_WordBuffer[m_WordSize] = 0; | |
788 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer)); | |
789 } | |
790 | |
791 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, | |
792 FX_FILESIZE limit, | |
793 const CFX_ByteStringC& tag, | |
794 FX_BOOL checkKeyword) { | |
795 const FX_DWORD taglen = tag.GetLength(); | |
796 | |
797 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]); | |
798 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) && | |
799 !PDFCharIsWhitespace(tag[taglen - 1]); | |
800 | |
801 uint8_t ch; | |
802 if (bCheckRight && startpos + (int32_t)taglen <= limit && | |
803 GetCharAt(startpos + (int32_t)taglen, ch)) { | |
804 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || | |
805 (checkKeyword && PDFCharIsDelimiter(ch))) { | |
806 return false; | |
807 } | |
808 } | |
809 | |
810 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { | |
811 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || | |
812 (checkKeyword && PDFCharIsDelimiter(ch))) { | |
813 return false; | |
814 } | |
815 } | |
816 return true; | |
817 } | |
818 | |
819 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards | |
820 // and drop the bool. | |
821 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag, | |
822 FX_BOOL bWholeWord, | |
823 FX_BOOL bForward, | |
824 FX_FILESIZE limit) { | |
825 int32_t taglen = tag.GetLength(); | |
826 if (taglen == 0) | |
827 return FALSE; | |
828 | |
829 FX_FILESIZE pos = m_Pos; | |
830 int32_t offset = 0; | |
831 if (!bForward) | |
832 offset = taglen - 1; | |
833 | |
834 const uint8_t* tag_data = tag.GetPtr(); | |
835 uint8_t byte; | |
836 while (1) { | |
837 if (bForward) { | |
838 if (limit && pos >= m_Pos + limit) | |
839 return FALSE; | |
840 | |
841 if (!GetCharAt(pos, byte)) | |
842 return FALSE; | |
843 | |
844 } else { | |
845 if (limit && pos <= m_Pos - limit) | |
846 return FALSE; | |
847 | |
848 if (!GetCharAtBackward(pos, byte)) | |
849 return FALSE; | |
850 } | |
851 | |
852 if (byte == tag_data[offset]) { | |
853 if (bForward) { | |
854 offset++; | |
855 if (offset < taglen) { | |
856 pos++; | |
857 continue; | |
858 } | |
859 } else { | |
860 offset--; | |
861 if (offset >= 0) { | |
862 pos--; | |
863 continue; | |
864 } | |
865 } | |
866 | |
867 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos; | |
868 if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) { | |
869 m_Pos = startpos; | |
870 return TRUE; | |
871 } | |
872 } | |
873 | |
874 if (bForward) { | |
875 offset = byte == tag_data[0] ? 1 : 0; | |
876 pos++; | |
877 } else { | |
878 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1; | |
879 pos--; | |
880 } | |
881 | |
882 if (pos < 0) | |
883 return FALSE; | |
884 } | |
885 | |
886 return FALSE; | |
887 } | |
888 | |
889 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags, | |
890 FX_BOOL bWholeWord, | |
891 FX_FILESIZE limit) { | |
892 int32_t ntags = 1; | |
893 for (int i = 0; i < tags.GetLength(); ++i) { | |
894 if (tags[i] == 0) | |
895 ++ntags; | |
896 } | |
897 | |
898 std::vector<SearchTagRecord> patterns(ntags); | |
899 FX_DWORD start = 0; | |
900 FX_DWORD itag = 0; | |
901 FX_DWORD max_len = 0; | |
902 for (int i = 0; i <= tags.GetLength(); ++i) { | |
903 if (tags[i] == 0) { | |
904 FX_DWORD len = i - start; | |
905 max_len = std::max(len, max_len); | |
906 patterns[itag].m_pTag = tags.GetCStr() + start; | |
907 patterns[itag].m_Len = len; | |
908 patterns[itag].m_Offset = 0; | |
909 start = i + 1; | |
910 ++itag; | |
911 } | |
912 } | |
913 | |
914 const FX_FILESIZE pos_limit = m_Pos + limit; | |
915 for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) { | |
916 uint8_t byte; | |
917 if (!GetCharAt(pos, byte)) | |
918 break; | |
919 | |
920 for (int i = 0; i < ntags; ++i) { | |
921 SearchTagRecord& pat = patterns[i]; | |
922 if (pat.m_pTag[pat.m_Offset] != byte) { | |
923 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0; | |
924 continue; | |
925 } | |
926 | |
927 ++pat.m_Offset; | |
928 if (pat.m_Offset != pat.m_Len) | |
929 continue; | |
930 | |
931 if (!bWholeWord || | |
932 IsWholeWord(pos - pat.m_Len, limit, | |
933 CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) { | |
934 return i; | |
935 } | |
936 | |
937 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0; | |
938 } | |
939 } | |
940 return -1; | |
941 } | |
942 | |
943 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag, | |
944 FX_FILESIZE limit) { | |
945 int32_t taglen = tag.GetLength(); | |
946 int32_t match = 0; | |
947 limit += m_Pos; | |
948 FX_FILESIZE startpos = m_Pos; | |
949 | |
950 while (1) { | |
951 uint8_t ch; | |
952 if (!GetNextChar(ch)) | |
953 return -1; | |
954 | |
955 if (ch == tag[match]) { | |
956 match++; | |
957 if (match == taglen) | |
958 return m_Pos - startpos - taglen; | |
959 } else { | |
960 match = ch == tag[0] ? 1 : 0; | |
961 } | |
962 | |
963 if (limit && m_Pos == limit) | |
964 return -1; | |
965 } | |
966 return -1; | |
967 } | |
968 | |
969 void CPDF_SyntaxParser::SetEncrypt( | |
970 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) { | |
971 m_pCryptoHandler = std::move(pCryptoHandler); | |
972 } | |
OLD | NEW |