Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(66)

Side by Side Diff: core/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp

Issue 2392603004: Move core/fpdfapi/fpdf_parser to core/fpdfapi/parser (Closed)
Patch Set: Rebase to master Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
8
9 #include <vector>
10
11 #include "core/fpdfapi/cpdf_modulemgr.h"
12 #include "core/fpdfapi/fpdf_parser/cpdf_array.h"
13 #include "core/fpdfapi/fpdf_parser/cpdf_boolean.h"
14 #include "core/fpdfapi/fpdf_parser/cpdf_crypto_handler.h"
15 #include "core/fpdfapi/fpdf_parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/fpdf_parser/cpdf_name.h"
17 #include "core/fpdfapi/fpdf_parser/cpdf_null.h"
18 #include "core/fpdfapi/fpdf_parser/cpdf_number.h"
19 #include "core/fpdfapi/fpdf_parser/cpdf_reference.h"
20 #include "core/fpdfapi/fpdf_parser/cpdf_stream.h"
21 #include "core/fpdfapi/fpdf_parser/cpdf_string.h"
22 #include "core/fpdfapi/fpdf_parser/fpdf_parser_decode.h"
23 #include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h"
24 #include "core/fxcrt/fx_ext.h"
25 #include "third_party/base/numerics/safe_math.h"
26
27 namespace {
28
29 struct SearchTagRecord {
30 CFX_ByteStringC m_bsTag;
31 FX_STRSIZE m_Offset;
32 };
33
34 } // namespace
35
36 // static
37 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
38
39 CPDF_SyntaxParser::CPDF_SyntaxParser()
40 : CPDF_SyntaxParser(CFX_WeakPtr<CFX_ByteStringPool>()) {}
41
42 CPDF_SyntaxParser::CPDF_SyntaxParser(
43 const CFX_WeakPtr<CFX_ByteStringPool>& pPool)
44 : m_MetadataObjnum(0),
45 m_pFileAccess(nullptr),
46 m_pFileBuf(nullptr),
47 m_BufSize(CPDF_ModuleMgr::kFileBufSize),
48 m_pPool(pPool) {}
49
50 CPDF_SyntaxParser::~CPDF_SyntaxParser() {
51 FX_Free(m_pFileBuf);
52 }
53
54 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
55 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
56 m_Pos = pos;
57 return GetNextChar(ch);
58 }
59
60 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
61 FX_FILESIZE pos = m_Pos + m_HeaderOffset;
62 if (pos >= m_FileLen)
63 return FALSE;
64
65 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
66 FX_FILESIZE read_pos = pos;
67 uint32_t read_size = m_BufSize;
68 if ((FX_FILESIZE)read_size > m_FileLen)
69 read_size = (uint32_t)m_FileLen;
70
71 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
72 if (m_FileLen < (FX_FILESIZE)read_size) {
73 read_pos = 0;
74 read_size = (uint32_t)m_FileLen;
75 } else {
76 read_pos = m_FileLen - read_size;
77 }
78 }
79
80 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
81 return FALSE;
82
83 m_BufOffset = read_pos;
84 }
85 ch = m_pFileBuf[pos - m_BufOffset];
86 m_Pos++;
87 return TRUE;
88 }
89
90 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
91 pos += m_HeaderOffset;
92 if (pos >= m_FileLen)
93 return FALSE;
94
95 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
96 FX_FILESIZE read_pos;
97 if (pos < (FX_FILESIZE)m_BufSize)
98 read_pos = 0;
99 else
100 read_pos = pos - m_BufSize + 1;
101
102 uint32_t read_size = m_BufSize;
103 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
104 if (m_FileLen < (FX_FILESIZE)read_size) {
105 read_pos = 0;
106 read_size = (uint32_t)m_FileLen;
107 } else {
108 read_pos = m_FileLen - read_size;
109 }
110 }
111
112 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
113 return FALSE;
114
115 m_BufOffset = read_pos;
116 }
117 ch = m_pFileBuf[pos - m_BufOffset];
118 return TRUE;
119 }
120
121 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) {
122 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
123 return FALSE;
124 m_Pos += size;
125 return TRUE;
126 }
127
128 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
129 m_WordSize = 0;
130 if (bIsNumber)
131 *bIsNumber = true;
132
133 uint8_t ch;
134 if (!GetNextChar(ch))
135 return;
136
137 while (1) {
138 while (PDFCharIsWhitespace(ch)) {
139 if (!GetNextChar(ch))
140 return;
141 }
142
143 if (ch != '%')
144 break;
145
146 while (1) {
147 if (!GetNextChar(ch))
148 return;
149 if (PDFCharIsLineEnding(ch))
150 break;
151 }
152 }
153
154 if (PDFCharIsDelimiter(ch)) {
155 if (bIsNumber)
156 *bIsNumber = false;
157
158 m_WordBuffer[m_WordSize++] = ch;
159 if (ch == '/') {
160 while (1) {
161 if (!GetNextChar(ch))
162 return;
163
164 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
165 m_Pos--;
166 return;
167 }
168
169 if (m_WordSize < sizeof(m_WordBuffer) - 1)
170 m_WordBuffer[m_WordSize++] = ch;
171 }
172 } else if (ch == '<') {
173 if (!GetNextChar(ch))
174 return;
175
176 if (ch == '<')
177 m_WordBuffer[m_WordSize++] = ch;
178 else
179 m_Pos--;
180 } else if (ch == '>') {
181 if (!GetNextChar(ch))
182 return;
183
184 if (ch == '>')
185 m_WordBuffer[m_WordSize++] = ch;
186 else
187 m_Pos--;
188 }
189 return;
190 }
191
192 while (1) {
193 if (m_WordSize < sizeof(m_WordBuffer) - 1)
194 m_WordBuffer[m_WordSize++] = ch;
195
196 if (!PDFCharIsNumeric(ch)) {
197 if (bIsNumber)
198 *bIsNumber = false;
199 }
200
201 if (!GetNextChar(ch))
202 return;
203
204 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
205 m_Pos--;
206 break;
207 }
208 }
209 }
210
211 CFX_ByteString CPDF_SyntaxParser::ReadString() {
212 uint8_t ch;
213 if (!GetNextChar(ch))
214 return CFX_ByteString();
215
216 CFX_ByteTextBuf buf;
217 int32_t parlevel = 0;
218 int32_t status = 0;
219 int32_t iEscCode = 0;
220 while (1) {
221 switch (status) {
222 case 0:
223 if (ch == ')') {
224 if (parlevel == 0) {
225 return buf.MakeString();
226 }
227 parlevel--;
228 buf.AppendChar(')');
229 } else if (ch == '(') {
230 parlevel++;
231 buf.AppendChar('(');
232 } else if (ch == '\\') {
233 status = 1;
234 } else {
235 buf.AppendChar(ch);
236 }
237 break;
238 case 1:
239 if (ch >= '0' && ch <= '7') {
240 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
241 status = 2;
242 break;
243 }
244
245 if (ch == 'n') {
246 buf.AppendChar('\n');
247 } else if (ch == 'r') {
248 buf.AppendChar('\r');
249 } else if (ch == 't') {
250 buf.AppendChar('\t');
251 } else if (ch == 'b') {
252 buf.AppendChar('\b');
253 } else if (ch == 'f') {
254 buf.AppendChar('\f');
255 } else if (ch == '\r') {
256 status = 4;
257 break;
258 } else if (ch != '\n') {
259 buf.AppendChar(ch);
260 }
261 status = 0;
262 break;
263 case 2:
264 if (ch >= '0' && ch <= '7') {
265 iEscCode =
266 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
267 status = 3;
268 } else {
269 buf.AppendChar(iEscCode);
270 status = 0;
271 continue;
272 }
273 break;
274 case 3:
275 if (ch >= '0' && ch <= '7') {
276 iEscCode =
277 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
278 buf.AppendChar(iEscCode);
279 status = 0;
280 } else {
281 buf.AppendChar(iEscCode);
282 status = 0;
283 continue;
284 }
285 break;
286 case 4:
287 status = 0;
288 if (ch != '\n')
289 continue;
290 break;
291 }
292
293 if (!GetNextChar(ch))
294 break;
295 }
296
297 GetNextChar(ch);
298 return buf.MakeString();
299 }
300
301 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {
302 uint8_t ch;
303 if (!GetNextChar(ch))
304 return CFX_ByteString();
305
306 CFX_ByteTextBuf buf;
307 bool bFirst = true;
308 uint8_t code = 0;
309 while (1) {
310 if (ch == '>')
311 break;
312
313 if (std::isxdigit(ch)) {
314 int val = FXSYS_toHexDigit(ch);
315 if (bFirst) {
316 code = val * 16;
317 } else {
318 code += val;
319 buf.AppendByte(code);
320 }
321 bFirst = !bFirst;
322 }
323
324 if (!GetNextChar(ch))
325 break;
326 }
327 if (!bFirst)
328 buf.AppendByte(code);
329
330 return buf.MakeString();
331 }
332
333 void CPDF_SyntaxParser::ToNextLine() {
334 uint8_t ch;
335 while (GetNextChar(ch)) {
336 if (ch == '\n')
337 break;
338
339 if (ch == '\r') {
340 GetNextChar(ch);
341 if (ch != '\n')
342 --m_Pos;
343 break;
344 }
345 }
346 }
347
348 void CPDF_SyntaxParser::ToNextWord() {
349 uint8_t ch;
350 if (!GetNextChar(ch))
351 return;
352
353 while (1) {
354 while (PDFCharIsWhitespace(ch)) {
355 if (!GetNextChar(ch))
356 return;
357 }
358
359 if (ch != '%')
360 break;
361
362 while (1) {
363 if (!GetNextChar(ch))
364 return;
365 if (PDFCharIsLineEnding(ch))
366 break;
367 }
368 }
369 m_Pos--;
370 }
371
372 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
373 GetNextWordInternal(bIsNumber);
374 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);
375 }
376
377 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {
378 return GetNextWord(nullptr);
379 }
380
381 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,
382 uint32_t objnum,
383 uint32_t gennum,
384 FX_BOOL bDecrypt) {
385 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
386 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
387 return nullptr;
388
389 FX_FILESIZE SavedObjPos = m_Pos;
390 bool bIsNumber;
391 CFX_ByteString word = GetNextWord(&bIsNumber);
392 if (word.GetLength() == 0)
393 return nullptr;
394
395 if (bIsNumber) {
396 FX_FILESIZE SavedPos = m_Pos;
397 CFX_ByteString nextword = GetNextWord(&bIsNumber);
398 if (bIsNumber) {
399 CFX_ByteString nextword2 = GetNextWord(nullptr);
400 if (nextword2 == "R")
401 return new CPDF_Reference(pObjList, FXSYS_atoui(word.c_str()));
402 }
403 m_Pos = SavedPos;
404 return new CPDF_Number(word.AsStringC());
405 }
406
407 if (word == "true" || word == "false")
408 return new CPDF_Boolean(word == "true");
409
410 if (word == "null")
411 return new CPDF_Null;
412
413 if (word == "(") {
414 CFX_ByteString str = ReadString();
415 if (m_pCryptoHandler && bDecrypt)
416 m_pCryptoHandler->Decrypt(objnum, gennum, str);
417 return new CPDF_String(MaybeIntern(str), FALSE);
418 }
419
420 if (word == "<") {
421 CFX_ByteString str = ReadHexString();
422 if (m_pCryptoHandler && bDecrypt)
423 m_pCryptoHandler->Decrypt(objnum, gennum, str);
424 return new CPDF_String(MaybeIntern(str), TRUE);
425 }
426
427 if (word == "[") {
428 CPDF_Array* pArray = new CPDF_Array;
429 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
430 pArray->Add(pObj);
431
432 return pArray;
433 }
434
435 if (word[0] == '/') {
436 return new CPDF_Name(MaybeIntern(
437 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))));
438 }
439
440 if (word == "<<") {
441 int32_t nKeys = 0;
442 FX_FILESIZE dwSignValuePos = 0;
443
444 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
445 new CPDF_Dictionary(m_pPool));
446 while (1) {
447 CFX_ByteString key = GetNextWord(nullptr);
448 if (key.IsEmpty())
449 return nullptr;
450
451 FX_FILESIZE SavedPos = m_Pos - key.GetLength();
452 if (key == ">>")
453 break;
454
455 if (key == "endobj") {
456 m_Pos = SavedPos;
457 break;
458 }
459
460 if (key[0] != '/')
461 continue;
462
463 ++nKeys;
464 key = PDF_NameDecode(key);
465 if (key.IsEmpty())
466 continue;
467
468 if (key == "/Contents")
469 dwSignValuePos = m_Pos;
470
471 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);
472 if (!pObj)
473 continue;
474
475 CFX_ByteString keyNoSlash(key.raw_str() + 1, key.GetLength() - 1);
476 pDict->SetFor(keyNoSlash, pObj);
477 }
478
479 // Only when this is a signature dictionary and has contents, we reset the
480 // contents to the un-decrypted form.
481 if (pDict->IsSignatureDict() && dwSignValuePos) {
482 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
483 m_Pos = dwSignValuePos;
484 pDict->SetFor("Contents", GetObject(pObjList, objnum, gennum, false));
485 }
486
487 FX_FILESIZE SavedPos = m_Pos;
488 CFX_ByteString nextword = GetNextWord(nullptr);
489 if (nextword != "stream") {
490 m_Pos = SavedPos;
491 return pDict.release();
492 }
493 return ReadStream(pDict.release(), objnum, gennum);
494 }
495
496 if (word == ">>")
497 m_Pos = SavedObjPos;
498
499 return nullptr;
500 }
501
502 CPDF_Object* CPDF_SyntaxParser::GetObjectForStrict(
503 CPDF_IndirectObjectHolder* pObjList,
504 uint32_t objnum,
505 uint32_t gennum) {
506 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
507 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
508 return nullptr;
509
510 FX_FILESIZE SavedObjPos = m_Pos;
511 bool bIsNumber;
512 CFX_ByteString word = GetNextWord(&bIsNumber);
513 if (word.GetLength() == 0)
514 return nullptr;
515
516 if (bIsNumber) {
517 FX_FILESIZE SavedPos = m_Pos;
518 CFX_ByteString nextword = GetNextWord(&bIsNumber);
519 if (bIsNumber) {
520 CFX_ByteString nextword2 = GetNextWord(nullptr);
521 if (nextword2 == "R")
522 return new CPDF_Reference(pObjList, FXSYS_atoui(word.c_str()));
523 }
524 m_Pos = SavedPos;
525 return new CPDF_Number(word.AsStringC());
526 }
527
528 if (word == "true" || word == "false")
529 return new CPDF_Boolean(word == "true");
530
531 if (word == "null")
532 return new CPDF_Null;
533
534 if (word == "(") {
535 CFX_ByteString str = ReadString();
536 if (m_pCryptoHandler)
537 m_pCryptoHandler->Decrypt(objnum, gennum, str);
538 return new CPDF_String(MaybeIntern(str), FALSE);
539 }
540
541 if (word == "<") {
542 CFX_ByteString str = ReadHexString();
543 if (m_pCryptoHandler)
544 m_pCryptoHandler->Decrypt(objnum, gennum, str);
545 return new CPDF_String(MaybeIntern(str), TRUE);
546 }
547
548 if (word == "[") {
549 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(
550 new CPDF_Array);
551 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
552 pArray->Add(pObj);
553
554 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;
555 }
556
557 if (word[0] == '/') {
558 return new CPDF_Name(MaybeIntern(
559 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))));
560 }
561
562 if (word == "<<") {
563 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
564 new CPDF_Dictionary(m_pPool));
565 while (1) {
566 FX_FILESIZE SavedPos = m_Pos;
567 CFX_ByteString key = GetNextWord(nullptr);
568 if (key.IsEmpty())
569 return nullptr;
570
571 if (key == ">>")
572 break;
573
574 if (key == "endobj") {
575 m_Pos = SavedPos;
576 break;
577 }
578
579 if (key[0] != '/')
580 continue;
581
582 key = PDF_NameDecode(key);
583 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(
584 GetObject(pObjList, objnum, gennum, true));
585 if (!obj) {
586 uint8_t ch;
587 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {
588 continue;
589 }
590 return nullptr;
591 }
592
593 if (key.GetLength() > 1) {
594 pDict->SetFor(CFX_ByteString(key.c_str() + 1, key.GetLength() - 1),
595 obj.release());
596 }
597 }
598
599 FX_FILESIZE SavedPos = m_Pos;
600 CFX_ByteString nextword = GetNextWord(nullptr);
601 if (nextword != "stream") {
602 m_Pos = SavedPos;
603 return pDict.release();
604 }
605
606 return ReadStream(pDict.release(), objnum, gennum);
607 }
608
609 if (word == ">>")
610 m_Pos = SavedObjPos;
611
612 return nullptr;
613 }
614
615 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
616 unsigned char byte1 = 0;
617 unsigned char byte2 = 0;
618
619 GetCharAt(pos, byte1);
620 GetCharAt(pos + 1, byte2);
621
622 if (byte1 == '\r' && byte2 == '\n')
623 return 2;
624
625 if (byte1 == '\r' || byte1 == '\n')
626 return 1;
627
628 return 0;
629 }
630
631 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
632 uint32_t objnum,
633 uint32_t gennum) {
634 CPDF_Object* pLenObj = pDict->GetObjectFor("Length");
635 FX_FILESIZE len = -1;
636 CPDF_Reference* pLenObjRef = ToReference(pLenObj);
637
638 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&
639 pLenObjRef->GetRefObjNum() != objnum);
640 if (pLenObj && differingObjNum)
641 len = pLenObj->GetInteger();
642
643 // Locate the start of stream.
644 ToNextLine();
645 FX_FILESIZE streamStartPos = m_Pos;
646
647 const CFX_ByteStringC kEndStreamStr("endstream");
648 const CFX_ByteStringC kEndObjStr("endobj");
649
650 CPDF_CryptoHandler* pCryptoHandler =
651 objnum == (uint32_t)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
652 if (!pCryptoHandler) {
653 FX_BOOL bSearchForKeyword = TRUE;
654 if (len >= 0) {
655 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
656 pos += len;
657 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
658 m_Pos = pos.ValueOrDie();
659
660 m_Pos += ReadEOLMarkers(m_Pos);
661 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
662 GetNextWordInternal(nullptr);
663 // Earlier version of PDF specification doesn't require EOL marker before
664 // 'endstream' keyword. If keyword 'endstream' follows the bytes in
665 // specified length, it signals the end of stream.
666 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.raw_str(),
667 kEndStreamStr.GetLength()) == 0) {
668 bSearchForKeyword = FALSE;
669 }
670 }
671
672 if (bSearchForKeyword) {
673 // If len is not available, len needs to be calculated
674 // by searching the keywords "endstream" or "endobj".
675 m_Pos = streamStartPos;
676 FX_FILESIZE endStreamOffset = 0;
677 while (endStreamOffset >= 0) {
678 endStreamOffset = FindTag(kEndStreamStr, 0);
679
680 // Can't find "endstream".
681 if (endStreamOffset < 0)
682 break;
683
684 // Stop searching when "endstream" is found.
685 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
686 kEndStreamStr, TRUE)) {
687 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
688 break;
689 }
690 }
691
692 m_Pos = streamStartPos;
693 FX_FILESIZE endObjOffset = 0;
694 while (endObjOffset >= 0) {
695 endObjOffset = FindTag(kEndObjStr, 0);
696
697 // Can't find "endobj".
698 if (endObjOffset < 0)
699 break;
700
701 // Stop searching when "endobj" is found.
702 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
703 TRUE)) {
704 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
705 break;
706 }
707 }
708
709 // Can't find "endstream" or "endobj".
710 if (endStreamOffset < 0 && endObjOffset < 0) {
711 pDict->Release();
712 return nullptr;
713 }
714
715 if (endStreamOffset < 0 && endObjOffset >= 0) {
716 // Correct the position of end stream.
717 endStreamOffset = endObjOffset;
718 } else if (endStreamOffset >= 0 && endObjOffset < 0) {
719 // Correct the position of end obj.
720 endObjOffset = endStreamOffset;
721 } else if (endStreamOffset > endObjOffset) {
722 endStreamOffset = endObjOffset;
723 }
724
725 len = endStreamOffset;
726 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
727 if (numMarkers == 2) {
728 len -= 2;
729 } else {
730 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
731 if (numMarkers == 1) {
732 len -= 1;
733 }
734 }
735
736 if (len < 0) {
737 pDict->Release();
738 return nullptr;
739 }
740 pDict->SetIntegerFor("Length", len);
741 }
742 m_Pos = streamStartPos;
743 }
744
745 if (len < 0) {
746 pDict->Release();
747 return nullptr;
748 }
749
750 uint8_t* pData = nullptr;
751 if (len > 0) {
752 pData = FX_Alloc(uint8_t, len);
753 ReadBlock(pData, len);
754 if (pCryptoHandler) {
755 CFX_BinaryBuf dest_buf;
756 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
757
758 void* context = pCryptoHandler->DecryptStart(objnum, gennum);
759 pCryptoHandler->DecryptStream(context, pData, len, dest_buf);
760 pCryptoHandler->DecryptFinish(context, dest_buf);
761
762 FX_Free(pData);
763 pData = dest_buf.GetBuffer();
764 len = dest_buf.GetSize();
765 dest_buf.DetachBuffer();
766 }
767 }
768
769 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);
770 streamStartPos = m_Pos;
771 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
772
773 GetNextWordInternal(nullptr);
774
775 int numMarkers = ReadEOLMarkers(m_Pos);
776 if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) &&
777 numMarkers != 0 &&
778 FXSYS_memcmp(m_WordBuffer, kEndObjStr.raw_str(),
779 kEndObjStr.GetLength()) == 0) {
780 m_Pos = streamStartPos;
781 }
782 return pStream;
783 }
784
785 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,
786 uint32_t HeaderOffset) {
787 FX_Free(m_pFileBuf);
788
789 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
790 m_HeaderOffset = HeaderOffset;
791 m_FileLen = pFileAccess->GetSize();
792 m_Pos = 0;
793 m_pFileAccess = pFileAccess;
794 m_BufOffset = 0;
795 pFileAccess->ReadBlock(
796 m_pFileBuf, 0,
797 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
798 }
799
800 uint32_t CPDF_SyntaxParser::GetDirectNum() {
801 bool bIsNumber;
802 GetNextWordInternal(&bIsNumber);
803 if (!bIsNumber)
804 return 0;
805
806 m_WordBuffer[m_WordSize] = 0;
807 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
808 }
809
810 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
811 FX_FILESIZE limit,
812 const CFX_ByteStringC& tag,
813 FX_BOOL checkKeyword) {
814 const uint32_t taglen = tag.GetLength();
815
816 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
817 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
818 !PDFCharIsWhitespace(tag[taglen - 1]);
819
820 uint8_t ch;
821 if (bCheckRight && startpos + (int32_t)taglen <= limit &&
822 GetCharAt(startpos + (int32_t)taglen, ch)) {
823 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
824 (checkKeyword && PDFCharIsDelimiter(ch))) {
825 return false;
826 }
827 }
828
829 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
830 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
831 (checkKeyword && PDFCharIsDelimiter(ch))) {
832 return false;
833 }
834 }
835 return true;
836 }
837
838 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards
839 // and drop the bool.
840 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
841 FX_BOOL bWholeWord,
842 FX_BOOL bForward,
843 FX_FILESIZE limit) {
844 int32_t taglen = tag.GetLength();
845 if (taglen == 0)
846 return FALSE;
847
848 FX_FILESIZE pos = m_Pos;
849 int32_t offset = 0;
850 if (!bForward)
851 offset = taglen - 1;
852
853 const uint8_t* tag_data = tag.raw_str();
854 uint8_t byte;
855 while (1) {
856 if (bForward) {
857 if (limit && pos >= m_Pos + limit)
858 return FALSE;
859
860 if (!GetCharAt(pos, byte))
861 return FALSE;
862
863 } else {
864 if (limit && pos <= m_Pos - limit)
865 return FALSE;
866
867 if (!GetCharAtBackward(pos, byte))
868 return FALSE;
869 }
870
871 if (byte == tag_data[offset]) {
872 if (bForward) {
873 offset++;
874 if (offset < taglen) {
875 pos++;
876 continue;
877 }
878 } else {
879 offset--;
880 if (offset >= 0) {
881 pos--;
882 continue;
883 }
884 }
885
886 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
887 if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) {
888 m_Pos = startpos;
889 return TRUE;
890 }
891 }
892
893 if (bForward) {
894 offset = byte == tag_data[0] ? 1 : 0;
895 pos++;
896 } else {
897 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
898 pos--;
899 }
900
901 if (pos < 0)
902 return FALSE;
903 }
904
905 return FALSE;
906 }
907
908 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
909 FX_BOOL bWholeWord,
910 FX_FILESIZE limit) {
911 int32_t ntags = 1;
912 for (int i = 0; i < tags.GetLength(); ++i) {
913 if (tags[i] == 0)
914 ++ntags;
915 }
916
917 // Ensure that the input byte string happens to be nul-terminated. This
918 // need not be the case, but the loop below uses this guarantee to put
919 // the last pattern into the vector.
920 ASSERT(tags[tags.GetLength()] == 0);
921 std::vector<SearchTagRecord> patterns(ntags);
922 uint32_t start = 0;
923 uint32_t itag = 0;
924 uint32_t max_len = 0;
925 for (int i = 0; i <= tags.GetLength(); ++i) {
926 if (tags[i] == 0) {
927 uint32_t len = i - start;
928 max_len = std::max(len, max_len);
929 patterns[itag].m_bsTag = tags.Mid(start, len);
930 patterns[itag].m_Offset = 0;
931 start = i + 1;
932 ++itag;
933 }
934 }
935
936 const FX_FILESIZE pos_limit = m_Pos + limit;
937 for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) {
938 uint8_t byte;
939 if (!GetCharAt(pos, byte))
940 break;
941
942 for (int i = 0; i < ntags; ++i) {
943 SearchTagRecord& pat = patterns[i];
944 if (pat.m_bsTag[pat.m_Offset] != byte) {
945 pat.m_Offset = (pat.m_bsTag[0] == byte) ? 1 : 0;
946 continue;
947 }
948
949 ++pat.m_Offset;
950 if (pat.m_Offset != pat.m_bsTag.GetLength())
951 continue;
952
953 if (!bWholeWord || IsWholeWord(pos - pat.m_bsTag.GetLength(), limit,
954 pat.m_bsTag, FALSE)) {
955 return i;
956 }
957
958 pat.m_Offset = (pat.m_bsTag[0] == byte) ? 1 : 0;
959 }
960 }
961 return -1;
962 }
963
964 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
965 FX_FILESIZE limit) {
966 int32_t taglen = tag.GetLength();
967 int32_t match = 0;
968 limit += m_Pos;
969 FX_FILESIZE startpos = m_Pos;
970
971 while (1) {
972 uint8_t ch;
973 if (!GetNextChar(ch))
974 return -1;
975
976 if (ch == tag[match]) {
977 match++;
978 if (match == taglen)
979 return m_Pos - startpos - taglen;
980 } else {
981 match = ch == tag[0] ? 1 : 0;
982 }
983
984 if (limit && m_Pos == limit)
985 return -1;
986 }
987 return -1;
988 }
989
990 void CPDF_SyntaxParser::SetEncrypt(
991 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {
992 m_pCryptoHandler = std::move(pCryptoHandler);
993 }
994
995 CFX_ByteString CPDF_SyntaxParser::MaybeIntern(const CFX_ByteString& str) {
996 return m_pPool ? m_pPool->Intern(str) : str;
997 }
OLDNEW
« no previous file with comments | « core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h ('k') | core/fpdfapi/fpdf_parser/cpdf_syntax_parser_unittest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698