Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(517)

Side by Side Diff: core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp

Issue 1775023003: Re-land "Split CPDF_SyntaxParser into its own named .cpp/.h files." (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Include <vector>. Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
8
9 #include <vector>
10
11 #include "core/include/fpdfapi/fpdf_module.h"
12 #include "core/include/fpdfapi/fpdf_parser.h"
13 #include "core/include/fxcrt/fx_ext.h"
14 #include "third_party/base/numerics/safe_math.h"
15
16 namespace {
17
18 struct SearchTagRecord {
19 const char* m_pTag;
20 FX_DWORD m_Len;
21 FX_DWORD m_Offset;
22 };
23
24 } // namespace
25
26 // static
27 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
28
29 CPDF_SyntaxParser::CPDF_SyntaxParser()
30 : m_MetadataObjnum(0),
31 m_pFileAccess(nullptr),
32 m_pFileBuf(nullptr),
33 m_BufSize(CPDF_ModuleMgr::kFileBufSize) {}
34
35 CPDF_SyntaxParser::~CPDF_SyntaxParser() {
36 FX_Free(m_pFileBuf);
37 }
38
39 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
40 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
41 m_Pos = pos;
42 return GetNextChar(ch);
43 }
44
45 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
46 FX_FILESIZE pos = m_Pos + m_HeaderOffset;
47 if (pos >= m_FileLen)
48 return FALSE;
49
50 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
51 FX_FILESIZE read_pos = pos;
52 FX_DWORD read_size = m_BufSize;
53 if ((FX_FILESIZE)read_size > m_FileLen)
54 read_size = (FX_DWORD)m_FileLen;
55
56 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
57 if (m_FileLen < (FX_FILESIZE)read_size) {
58 read_pos = 0;
59 read_size = (FX_DWORD)m_FileLen;
60 } else {
61 read_pos = m_FileLen - read_size;
62 }
63 }
64
65 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
66 return FALSE;
67
68 m_BufOffset = read_pos;
69 }
70 ch = m_pFileBuf[pos - m_BufOffset];
71 m_Pos++;
72 return TRUE;
73 }
74
75 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
76 pos += m_HeaderOffset;
77 if (pos >= m_FileLen)
78 return FALSE;
79
80 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
81 FX_FILESIZE read_pos;
82 if (pos < (FX_FILESIZE)m_BufSize)
83 read_pos = 0;
84 else
85 read_pos = pos - m_BufSize + 1;
86
87 FX_DWORD read_size = m_BufSize;
88 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
89 if (m_FileLen < (FX_FILESIZE)read_size) {
90 read_pos = 0;
91 read_size = (FX_DWORD)m_FileLen;
92 } else {
93 read_pos = m_FileLen - read_size;
94 }
95 }
96
97 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
98 return FALSE;
99
100 m_BufOffset = read_pos;
101 }
102 ch = m_pFileBuf[pos - m_BufOffset];
103 return TRUE;
104 }
105
106 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {
107 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
108 return FALSE;
109 m_Pos += size;
110 return TRUE;
111 }
112
113 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
114 m_WordSize = 0;
115 if (bIsNumber)
116 *bIsNumber = true;
117
118 uint8_t ch;
119 if (!GetNextChar(ch))
120 return;
121
122 while (1) {
123 while (PDFCharIsWhitespace(ch)) {
124 if (!GetNextChar(ch))
125 return;
126 }
127
128 if (ch != '%')
129 break;
130
131 while (1) {
132 if (!GetNextChar(ch))
133 return;
134 if (PDFCharIsLineEnding(ch))
135 break;
136 }
137 }
138
139 if (PDFCharIsDelimiter(ch)) {
140 if (bIsNumber)
141 *bIsNumber = false;
142
143 m_WordBuffer[m_WordSize++] = ch;
144 if (ch == '/') {
145 while (1) {
146 if (!GetNextChar(ch))
147 return;
148
149 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
150 m_Pos--;
151 return;
152 }
153
154 if (m_WordSize < sizeof(m_WordBuffer) - 1)
155 m_WordBuffer[m_WordSize++] = ch;
156 }
157 } else if (ch == '<') {
158 if (!GetNextChar(ch))
159 return;
160
161 if (ch == '<')
162 m_WordBuffer[m_WordSize++] = ch;
163 else
164 m_Pos--;
165 } else if (ch == '>') {
166 if (!GetNextChar(ch))
167 return;
168
169 if (ch == '>')
170 m_WordBuffer[m_WordSize++] = ch;
171 else
172 m_Pos--;
173 }
174 return;
175 }
176
177 while (1) {
178 if (m_WordSize < sizeof(m_WordBuffer) - 1)
179 m_WordBuffer[m_WordSize++] = ch;
180
181 if (!PDFCharIsNumeric(ch)) {
182 if (bIsNumber)
183 *bIsNumber = false;
184 }
185
186 if (!GetNextChar(ch))
187 return;
188
189 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
190 m_Pos--;
191 break;
192 }
193 }
194 }
195
196 CFX_ByteString CPDF_SyntaxParser::ReadString() {
197 uint8_t ch;
198 if (!GetNextChar(ch))
199 return CFX_ByteString();
200
201 CFX_ByteTextBuf buf;
202 int32_t parlevel = 0;
203 int32_t status = 0;
204 int32_t iEscCode = 0;
205 while (1) {
206 switch (status) {
207 case 0:
208 if (ch == ')') {
209 if (parlevel == 0) {
210 return buf.GetByteString();
211 }
212 parlevel--;
213 buf.AppendChar(')');
214 } else if (ch == '(') {
215 parlevel++;
216 buf.AppendChar('(');
217 } else if (ch == '\\') {
218 status = 1;
219 } else {
220 buf.AppendChar(ch);
221 }
222 break;
223 case 1:
224 if (ch >= '0' && ch <= '7') {
225 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
226 status = 2;
227 break;
228 }
229
230 if (ch == 'n') {
231 buf.AppendChar('\n');
232 } else if (ch == 'r') {
233 buf.AppendChar('\r');
234 } else if (ch == 't') {
235 buf.AppendChar('\t');
236 } else if (ch == 'b') {
237 buf.AppendChar('\b');
238 } else if (ch == 'f') {
239 buf.AppendChar('\f');
240 } else if (ch == '\r') {
241 status = 4;
242 break;
243 } else if (ch != '\n') {
244 buf.AppendChar(ch);
245 }
246 status = 0;
247 break;
248 case 2:
249 if (ch >= '0' && ch <= '7') {
250 iEscCode =
251 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
252 status = 3;
253 } else {
254 buf.AppendChar(iEscCode);
255 status = 0;
256 continue;
257 }
258 break;
259 case 3:
260 if (ch >= '0' && ch <= '7') {
261 iEscCode =
262 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
263 buf.AppendChar(iEscCode);
264 status = 0;
265 } else {
266 buf.AppendChar(iEscCode);
267 status = 0;
268 continue;
269 }
270 break;
271 case 4:
272 status = 0;
273 if (ch != '\n')
274 continue;
275 break;
276 }
277
278 if (!GetNextChar(ch))
279 break;
280 }
281
282 GetNextChar(ch);
283 return buf.GetByteString();
284 }
285
286 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {
287 uint8_t ch;
288 if (!GetNextChar(ch))
289 return CFX_ByteString();
290
291 CFX_ByteTextBuf buf;
292 bool bFirst = true;
293 uint8_t code = 0;
294 while (1) {
295 if (ch == '>')
296 break;
297
298 if (std::isxdigit(ch)) {
299 int val = FXSYS_toHexDigit(ch);
300 if (bFirst) {
301 code = val * 16;
302 } else {
303 code += val;
304 buf.AppendByte(code);
305 }
306 bFirst = !bFirst;
307 }
308
309 if (!GetNextChar(ch))
310 break;
311 }
312 if (!bFirst)
313 buf.AppendByte(code);
314
315 return buf.GetByteString();
316 }
317
318 void CPDF_SyntaxParser::ToNextLine() {
319 uint8_t ch;
320 while (GetNextChar(ch)) {
321 if (ch == '\n')
322 break;
323
324 if (ch == '\r') {
325 GetNextChar(ch);
326 if (ch != '\n')
327 --m_Pos;
328 break;
329 }
330 }
331 }
332
333 void CPDF_SyntaxParser::ToNextWord() {
334 uint8_t ch;
335 if (!GetNextChar(ch))
336 return;
337
338 while (1) {
339 while (PDFCharIsWhitespace(ch)) {
340 if (!GetNextChar(ch))
341 return;
342 }
343
344 if (ch != '%')
345 break;
346
347 while (1) {
348 if (!GetNextChar(ch))
349 return;
350 if (PDFCharIsLineEnding(ch))
351 break;
352 }
353 }
354 m_Pos--;
355 }
356
357 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
358 GetNextWordInternal(bIsNumber);
359 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);
360 }
361
362 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {
363 return GetNextWord(nullptr);
364 }
365
366 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,
367 FX_DWORD objnum,
368 FX_DWORD gennum,
369 FX_BOOL bDecrypt) {
370 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
371 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
372 return nullptr;
373
374 FX_FILESIZE SavedPos = m_Pos;
375 bool bIsNumber;
376 CFX_ByteString word = GetNextWord(&bIsNumber);
377 if (word.GetLength() == 0)
378 return nullptr;
379
380 if (bIsNumber) {
381 FX_FILESIZE SavedPos = m_Pos;
382 CFX_ByteString nextword = GetNextWord(&bIsNumber);
383 if (bIsNumber) {
384 CFX_ByteString nextword2 = GetNextWord(nullptr);
385 if (nextword2 == "R") {
386 FX_DWORD objnum = FXSYS_atoui(word);
387 return new CPDF_Reference(pObjList, objnum);
388 }
389 }
390 m_Pos = SavedPos;
391 return new CPDF_Number(word);
392 }
393
394 if (word == "true" || word == "false")
395 return new CPDF_Boolean(word == "true");
396
397 if (word == "null")
398 return new CPDF_Null;
399
400 if (word == "(") {
401 CFX_ByteString str = ReadString();
402 if (m_pCryptoHandler && bDecrypt)
403 m_pCryptoHandler->Decrypt(objnum, gennum, str);
404 return new CPDF_String(str, FALSE);
405 }
406
407 if (word == "<") {
408 CFX_ByteString str = ReadHexString();
409 if (m_pCryptoHandler && bDecrypt)
410 m_pCryptoHandler->Decrypt(objnum, gennum, str);
411
412 return new CPDF_String(str, TRUE);
413 }
414
415 if (word == "[") {
416 CPDF_Array* pArray = new CPDF_Array;
417 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
418 pArray->Add(pObj);
419
420 return pArray;
421 }
422
423 if (word[0] == '/') {
424 return new CPDF_Name(
425 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
426 }
427
428 if (word == "<<") {
429 int32_t nKeys = 0;
430 FX_FILESIZE dwSignValuePos = 0;
431
432 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
433 new CPDF_Dictionary);
434 while (1) {
435 CFX_ByteString key = GetNextWord(nullptr);
436 if (key.IsEmpty())
437 return nullptr;
438
439 FX_FILESIZE SavedPos = m_Pos - key.GetLength();
440 if (key == ">>")
441 break;
442
443 if (key == "endobj") {
444 m_Pos = SavedPos;
445 break;
446 }
447
448 if (key[0] != '/')
449 continue;
450
451 ++nKeys;
452 key = PDF_NameDecode(key);
453 if (key.IsEmpty())
454 continue;
455
456 if (key == "/Contents")
457 dwSignValuePos = m_Pos;
458
459 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);
460 if (!pObj)
461 continue;
462
463 CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);
464 pDict->SetAt(keyNoSlash, pObj);
465 }
466
467 // Only when this is a signature dictionary and has contents, we reset the
468 // contents to the un-decrypted form.
469 if (IsSignatureDict(pDict.get()) && dwSignValuePos) {
470 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
471 m_Pos = dwSignValuePos;
472 pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false));
473 }
474
475 FX_FILESIZE SavedPos = m_Pos;
476 CFX_ByteString nextword = GetNextWord(nullptr);
477 if (nextword != "stream") {
478 m_Pos = SavedPos;
479 return pDict.release();
480 }
481 return ReadStream(pDict.release(), objnum, gennum);
482 }
483
484 if (word == ">>")
485 m_Pos = SavedPos;
486
487 return nullptr;
488 }
489
490 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(
491 CPDF_IndirectObjectHolder* pObjList,
492 FX_DWORD objnum,
493 FX_DWORD gennum) {
494 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
495 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
496 return nullptr;
497
498 FX_FILESIZE SavedPos = m_Pos;
499 bool bIsNumber;
500 CFX_ByteString word = GetNextWord(&bIsNumber);
501 if (word.GetLength() == 0)
502 return nullptr;
503
504 if (bIsNumber) {
505 FX_FILESIZE SavedPos = m_Pos;
506 CFX_ByteString nextword = GetNextWord(&bIsNumber);
507 if (bIsNumber) {
508 CFX_ByteString nextword2 = GetNextWord(nullptr);
509 if (nextword2 == "R")
510 return new CPDF_Reference(pObjList, FXSYS_atoui(word));
511 }
512 m_Pos = SavedPos;
513 return new CPDF_Number(word);
514 }
515
516 if (word == "true" || word == "false")
517 return new CPDF_Boolean(word == "true");
518
519 if (word == "null")
520 return new CPDF_Null;
521
522 if (word == "(") {
523 CFX_ByteString str = ReadString();
524 if (m_pCryptoHandler)
525 m_pCryptoHandler->Decrypt(objnum, gennum, str);
526 return new CPDF_String(str, FALSE);
527 }
528
529 if (word == "<") {
530 CFX_ByteString str = ReadHexString();
531 if (m_pCryptoHandler)
532 m_pCryptoHandler->Decrypt(objnum, gennum, str);
533 return new CPDF_String(str, TRUE);
534 }
535
536 if (word == "[") {
537 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(
538 new CPDF_Array);
539 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
540 pArray->Add(pObj);
541
542 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;
543 }
544
545 if (word[0] == '/') {
546 return new CPDF_Name(
547 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
548 }
549
550 if (word == "<<") {
551 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
552 new CPDF_Dictionary);
553 while (1) {
554 FX_FILESIZE SavedPos = m_Pos;
555 CFX_ByteString key = GetNextWord(nullptr);
556 if (key.IsEmpty())
557 return nullptr;
558
559 if (key == ">>")
560 break;
561
562 if (key == "endobj") {
563 m_Pos = SavedPos;
564 break;
565 }
566
567 if (key[0] != '/')
568 continue;
569
570 key = PDF_NameDecode(key);
571 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(
572 GetObject(pObjList, objnum, gennum, true));
573 if (!obj) {
574 uint8_t ch;
575 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {
576 continue;
577 }
578 return nullptr;
579 }
580
581 if (key.GetLength() > 1) {
582 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),
583 obj.release());
584 }
585 }
586
587 FX_FILESIZE SavedPos = m_Pos;
588 CFX_ByteString nextword = GetNextWord(nullptr);
589 if (nextword != "stream") {
590 m_Pos = SavedPos;
591 return pDict.release();
592 }
593
594 return ReadStream(pDict.release(), objnum, gennum);
595 }
596
597 if (word == ">>")
598 m_Pos = SavedPos;
599
600 return nullptr;
601 }
602
603 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
604 unsigned char byte1 = 0;
605 unsigned char byte2 = 0;
606
607 GetCharAt(pos, byte1);
608 GetCharAt(pos + 1, byte2);
609
610 if (byte1 == '\r' && byte2 == '\n')
611 return 2;
612
613 if (byte1 == '\r' || byte1 == '\n')
614 return 1;
615
616 return 0;
617 }
618
619 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
620 FX_DWORD objnum,
621 FX_DWORD gennum) {
622 CPDF_Object* pLenObj = pDict->GetElement("Length");
623 FX_FILESIZE len = -1;
624 CPDF_Reference* pLenObjRef = ToReference(pLenObj);
625
626 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&
627 pLenObjRef->GetRefObjNum() != objnum);
628 if (pLenObj && differingObjNum)
629 len = pLenObj->GetInteger();
630
631 // Locate the start of stream.
632 ToNextLine();
633 FX_FILESIZE streamStartPos = m_Pos;
634
635 const CFX_ByteStringC kEndStreamStr("endstream");
636 const CFX_ByteStringC kEndObjStr("endobj");
637
638 CPDF_CryptoHandler* pCryptoHandler =
639 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
640 if (!pCryptoHandler) {
641 FX_BOOL bSearchForKeyword = TRUE;
642 if (len >= 0) {
643 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
644 pos += len;
645 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
646 m_Pos = pos.ValueOrDie();
647
648 m_Pos += ReadEOLMarkers(m_Pos);
649 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
650 GetNextWordInternal(nullptr);
651 // Earlier version of PDF specification doesn't require EOL marker before
652 // 'endstream' keyword. If keyword 'endstream' follows the bytes in
653 // specified length, it signals the end of stream.
654 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),
655 kEndStreamStr.GetLength()) == 0) {
656 bSearchForKeyword = FALSE;
657 }
658 }
659
660 if (bSearchForKeyword) {
661 // If len is not available, len needs to be calculated
662 // by searching the keywords "endstream" or "endobj".
663 m_Pos = streamStartPos;
664 FX_FILESIZE endStreamOffset = 0;
665 while (endStreamOffset >= 0) {
666 endStreamOffset = FindTag(kEndStreamStr, 0);
667
668 // Can't find "endstream".
669 if (endStreamOffset < 0)
670 break;
671
672 // Stop searching when "endstream" is found.
673 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
674 kEndStreamStr, TRUE)) {
675 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
676 break;
677 }
678 }
679
680 m_Pos = streamStartPos;
681 FX_FILESIZE endObjOffset = 0;
682 while (endObjOffset >= 0) {
683 endObjOffset = FindTag(kEndObjStr, 0);
684
685 // Can't find "endobj".
686 if (endObjOffset < 0)
687 break;
688
689 // Stop searching when "endobj" is found.
690 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
691 TRUE)) {
692 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
693 break;
694 }
695 }
696
697 // Can't find "endstream" or "endobj".
698 if (endStreamOffset < 0 && endObjOffset < 0) {
699 pDict->Release();
700 return nullptr;
701 }
702
703 if (endStreamOffset < 0 && endObjOffset >= 0) {
704 // Correct the position of end stream.
705 endStreamOffset = endObjOffset;
706 } else if (endStreamOffset >= 0 && endObjOffset < 0) {
707 // Correct the position of end obj.
708 endObjOffset = endStreamOffset;
709 } else if (endStreamOffset > endObjOffset) {
710 endStreamOffset = endObjOffset;
711 }
712
713 len = endStreamOffset;
714 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
715 if (numMarkers == 2) {
716 len -= 2;
717 } else {
718 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
719 if (numMarkers == 1) {
720 len -= 1;
721 }
722 }
723
724 if (len < 0) {
725 pDict->Release();
726 return nullptr;
727 }
728 pDict->SetAtInteger("Length", len);
729 }
730 m_Pos = streamStartPos;
731 }
732
733 if (len < 0) {
734 pDict->Release();
735 return nullptr;
736 }
737
738 uint8_t* pData = nullptr;
739 if (len > 0) {
740 pData = FX_Alloc(uint8_t, len);
741 ReadBlock(pData, len);
742 if (pCryptoHandler) {
743 CFX_BinaryBuf dest_buf;
744 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
745
746 void* context = pCryptoHandler->DecryptStart(objnum, gennum);
747 pCryptoHandler->DecryptStream(context, pData, len, dest_buf);
748 pCryptoHandler->DecryptFinish(context, dest_buf);
749
750 FX_Free(pData);
751 pData = dest_buf.GetBuffer();
752 len = dest_buf.GetSize();
753 dest_buf.DetachBuffer();
754 }
755 }
756
757 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);
758 streamStartPos = m_Pos;
759 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
760
761 GetNextWordInternal(nullptr);
762
763 int numMarkers = ReadEOLMarkers(m_Pos);
764 if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 &&
765 FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==
766 0) {
767 m_Pos = streamStartPos;
768 }
769 return pStream;
770 }
771
772 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,
773 FX_DWORD HeaderOffset) {
774 FX_Free(m_pFileBuf);
775
776 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
777 m_HeaderOffset = HeaderOffset;
778 m_FileLen = pFileAccess->GetSize();
779 m_Pos = 0;
780 m_pFileAccess = pFileAccess;
781 m_BufOffset = 0;
782 pFileAccess->ReadBlock(
783 m_pFileBuf, 0,
784 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
785 }
786
787 uint32_t CPDF_SyntaxParser::GetDirectNum() {
788 bool bIsNumber;
789 GetNextWordInternal(&bIsNumber);
790 if (!bIsNumber)
791 return 0;
792
793 m_WordBuffer[m_WordSize] = 0;
794 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
795 }
796
797 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
798 FX_FILESIZE limit,
799 const CFX_ByteStringC& tag,
800 FX_BOOL checkKeyword) {
801 const FX_DWORD taglen = tag.GetLength();
802
803 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
804 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
805 !PDFCharIsWhitespace(tag[taglen - 1]);
806
807 uint8_t ch;
808 if (bCheckRight && startpos + (int32_t)taglen <= limit &&
809 GetCharAt(startpos + (int32_t)taglen, ch)) {
810 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
811 (checkKeyword && PDFCharIsDelimiter(ch))) {
812 return false;
813 }
814 }
815
816 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
817 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
818 (checkKeyword && PDFCharIsDelimiter(ch))) {
819 return false;
820 }
821 }
822 return true;
823 }
824
825 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards
826 // and drop the bool.
827 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
828 FX_BOOL bWholeWord,
829 FX_BOOL bForward,
830 FX_FILESIZE limit) {
831 int32_t taglen = tag.GetLength();
832 if (taglen == 0)
833 return FALSE;
834
835 FX_FILESIZE pos = m_Pos;
836 int32_t offset = 0;
837 if (!bForward)
838 offset = taglen - 1;
839
840 const uint8_t* tag_data = tag.GetPtr();
841 uint8_t byte;
842 while (1) {
843 if (bForward) {
844 if (limit && pos >= m_Pos + limit)
845 return FALSE;
846
847 if (!GetCharAt(pos, byte))
848 return FALSE;
849
850 } else {
851 if (limit && pos <= m_Pos - limit)
852 return FALSE;
853
854 if (!GetCharAtBackward(pos, byte))
855 return FALSE;
856 }
857
858 if (byte == tag_data[offset]) {
859 if (bForward) {
860 offset++;
861 if (offset < taglen) {
862 pos++;
863 continue;
864 }
865 } else {
866 offset--;
867 if (offset >= 0) {
868 pos--;
869 continue;
870 }
871 }
872
873 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
874 if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) {
875 m_Pos = startpos;
876 return TRUE;
877 }
878 }
879
880 if (bForward) {
881 offset = byte == tag_data[0] ? 1 : 0;
882 pos++;
883 } else {
884 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
885 pos--;
886 }
887
888 if (pos < 0)
889 return FALSE;
890 }
891
892 return FALSE;
893 }
894
895 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
896 FX_BOOL bWholeWord,
897 FX_FILESIZE limit) {
898 int32_t ntags = 1;
899 for (int i = 0; i < tags.GetLength(); ++i) {
900 if (tags[i] == 0)
901 ++ntags;
902 }
903
904 std::vector<SearchTagRecord> patterns(ntags);
905 FX_DWORD start = 0;
906 FX_DWORD itag = 0;
907 FX_DWORD max_len = 0;
908 for (int i = 0; i <= tags.GetLength(); ++i) {
909 if (tags[i] == 0) {
910 FX_DWORD len = i - start;
911 max_len = std::max(len, max_len);
912 patterns[itag].m_pTag = tags.GetCStr() + start;
913 patterns[itag].m_Len = len;
914 patterns[itag].m_Offset = 0;
915 start = i + 1;
916 ++itag;
917 }
918 }
919
920 const FX_FILESIZE pos_limit = m_Pos + limit;
921 for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) {
922 uint8_t byte;
923 if (!GetCharAt(pos, byte))
924 break;
925
926 for (int i = 0; i < ntags; ++i) {
927 SearchTagRecord& pat = patterns[i];
928 if (pat.m_pTag[pat.m_Offset] != byte) {
929 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
930 continue;
931 }
932
933 ++pat.m_Offset;
934 if (pat.m_Offset != pat.m_Len)
935 continue;
936
937 if (!bWholeWord ||
938 IsWholeWord(pos - pat.m_Len, limit,
939 CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {
940 return i;
941 }
942
943 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
944 }
945 }
946 return -1;
947 }
948
949 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
950 FX_FILESIZE limit) {
951 int32_t taglen = tag.GetLength();
952 int32_t match = 0;
953 limit += m_Pos;
954 FX_FILESIZE startpos = m_Pos;
955
956 while (1) {
957 uint8_t ch;
958 if (!GetNextChar(ch))
959 return -1;
960
961 if (ch == tag[match]) {
962 match++;
963 if (match == taglen)
964 return m_Pos - startpos - taglen;
965 } else {
966 match = ch == tag[0] ? 1 : 0;
967 }
968
969 if (limit && m_Pos == limit)
970 return -1;
971 }
972 return -1;
973 }
974
975 void CPDF_SyntaxParser::SetEncrypt(
976 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {
977 m_pCryptoHandler = std::move(pCryptoHandler);
978 }
OLDNEW
« no previous file with comments | « core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h ('k') | core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698