Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(16)

Side by Side Diff: core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp

Issue 1774753002: Split CPDF_SyntaxParser into its own named .cpp/.h files. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Copyright 2016. Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
8
9 #include "core/include/fpdfapi/fpdf_module.h"
10 #include "core/include/fpdfapi/fpdf_parser.h"
11 #include "core/include/fxcrt/fx_ext.h"
12 #include "third_party/base/numerics/safe_math.h"
13
14 namespace {
15
16 struct SearchTagRecord {
17 const char* m_pTag;
18 FX_DWORD m_Len;
19 FX_DWORD m_Offset;
20 };
21
22 } // namespace
23
24 // static
25 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
26
27 CPDF_SyntaxParser::CPDF_SyntaxParser()
28 : m_MetadataObjnum(0),
29 m_pFileAccess(nullptr),
30 m_pFileBuf(nullptr),
31 m_BufSize(CPDF_ModuleMgr::kFileBufSize) {}
32
33 CPDF_SyntaxParser::~CPDF_SyntaxParser() {
34 FX_Free(m_pFileBuf);
35 }
36
37 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
38 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
39 m_Pos = pos;
40 return GetNextChar(ch);
41 }
42
43 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
44 FX_FILESIZE pos = m_Pos + m_HeaderOffset;
45 if (pos >= m_FileLen)
46 return FALSE;
47
48 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
49 FX_FILESIZE read_pos = pos;
50 FX_DWORD read_size = m_BufSize;
51 if ((FX_FILESIZE)read_size > m_FileLen)
52 read_size = (FX_DWORD)m_FileLen;
53
54 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
55 if (m_FileLen < (FX_FILESIZE)read_size) {
56 read_pos = 0;
57 read_size = (FX_DWORD)m_FileLen;
58 } else {
59 read_pos = m_FileLen - read_size;
60 }
61 }
62
63 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
64 return FALSE;
65
66 m_BufOffset = read_pos;
67 }
68 ch = m_pFileBuf[pos - m_BufOffset];
69 m_Pos++;
70 return TRUE;
71 }
72
73 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
74 pos += m_HeaderOffset;
75 if (pos >= m_FileLen)
76 return FALSE;
77
78 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
79 FX_FILESIZE read_pos;
80 if (pos < (FX_FILESIZE)m_BufSize)
81 read_pos = 0;
82 else
83 read_pos = pos - m_BufSize + 1;
84
85 FX_DWORD read_size = m_BufSize;
86 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
87 if (m_FileLen < (FX_FILESIZE)read_size) {
88 read_pos = 0;
89 read_size = (FX_DWORD)m_FileLen;
90 } else {
91 read_pos = m_FileLen - read_size;
92 }
93 }
94
95 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
96 return FALSE;
97
98 m_BufOffset = read_pos;
99 }
100 ch = m_pFileBuf[pos - m_BufOffset];
101 return TRUE;
102 }
103
104 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {
105 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
106 return FALSE;
107 m_Pos += size;
108 return TRUE;
109 }
110
111 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
112 m_WordSize = 0;
113 if (bIsNumber)
114 *bIsNumber = true;
115
116 uint8_t ch;
117 if (!GetNextChar(ch))
118 return;
119
120 while (1) {
121 while (PDFCharIsWhitespace(ch)) {
122 if (!GetNextChar(ch))
123 return;
124 }
125
126 if (ch != '%')
127 break;
128
129 while (1) {
130 if (!GetNextChar(ch))
131 return;
132 if (PDFCharIsLineEnding(ch))
133 break;
134 }
135 }
136
137 if (PDFCharIsDelimiter(ch)) {
138 if (bIsNumber)
139 *bIsNumber = false;
140
141 m_WordBuffer[m_WordSize++] = ch;
142 if (ch == '/') {
143 while (1) {
144 if (!GetNextChar(ch))
145 return;
146
147 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
148 m_Pos--;
149 return;
150 }
151
152 if (m_WordSize < sizeof(m_WordBuffer) - 1)
153 m_WordBuffer[m_WordSize++] = ch;
154 }
155 } else if (ch == '<') {
156 if (!GetNextChar(ch))
157 return;
158
159 if (ch == '<')
160 m_WordBuffer[m_WordSize++] = ch;
161 else
162 m_Pos--;
163 } else if (ch == '>') {
164 if (!GetNextChar(ch))
165 return;
166
167 if (ch == '>')
168 m_WordBuffer[m_WordSize++] = ch;
169 else
170 m_Pos--;
171 }
172 return;
173 }
174
175 while (1) {
176 if (m_WordSize < sizeof(m_WordBuffer) - 1)
177 m_WordBuffer[m_WordSize++] = ch;
178
179 if (!PDFCharIsNumeric(ch)) {
180 if (bIsNumber)
181 *bIsNumber = false;
182 }
183
184 if (!GetNextChar(ch))
185 return;
186
187 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
188 m_Pos--;
189 break;
190 }
191 }
192 }
193
194 CFX_ByteString CPDF_SyntaxParser::ReadString() {
195 uint8_t ch;
196 if (!GetNextChar(ch))
197 return CFX_ByteString();
198
199 CFX_ByteTextBuf buf;
200 int32_t parlevel = 0;
201 int32_t status = 0;
202 int32_t iEscCode = 0;
203 while (1) {
204 switch (status) {
205 case 0:
206 if (ch == ')') {
207 if (parlevel == 0) {
208 return buf.GetByteString();
209 }
210 parlevel--;
211 buf.AppendChar(')');
212 } else if (ch == '(') {
213 parlevel++;
214 buf.AppendChar('(');
215 } else if (ch == '\\') {
216 status = 1;
217 } else {
218 buf.AppendChar(ch);
219 }
220 break;
221 case 1:
222 if (ch >= '0' && ch <= '7') {
223 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
224 status = 2;
225 break;
226 }
227
228 if (ch == 'n') {
229 buf.AppendChar('\n');
230 } else if (ch == 'r') {
231 buf.AppendChar('\r');
232 } else if (ch == 't') {
233 buf.AppendChar('\t');
234 } else if (ch == 'b') {
235 buf.AppendChar('\b');
236 } else if (ch == 'f') {
237 buf.AppendChar('\f');
238 } else if (ch == '\r') {
239 status = 4;
240 break;
241 } else if (ch != '\n') {
242 buf.AppendChar(ch);
243 }
244 status = 0;
245 break;
246 case 2:
247 if (ch >= '0' && ch <= '7') {
248 iEscCode =
249 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
250 status = 3;
251 } else {
252 buf.AppendChar(iEscCode);
253 status = 0;
254 continue;
255 }
256 break;
257 case 3:
258 if (ch >= '0' && ch <= '7') {
259 iEscCode =
260 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
261 buf.AppendChar(iEscCode);
262 status = 0;
263 } else {
264 buf.AppendChar(iEscCode);
265 status = 0;
266 continue;
267 }
268 break;
269 case 4:
270 status = 0;
271 if (ch != '\n')
272 continue;
273 break;
274 }
275
276 if (!GetNextChar(ch))
277 break;
278 }
279
280 GetNextChar(ch);
281 return buf.GetByteString();
282 }
283
284 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {
285 uint8_t ch;
286 if (!GetNextChar(ch))
287 return CFX_ByteString();
288
289 CFX_ByteTextBuf buf;
290 bool bFirst = true;
291 uint8_t code = 0;
292 while (1) {
293 if (ch == '>')
294 break;
295
296 if (std::isxdigit(ch)) {
297 int val = FXSYS_toHexDigit(ch);
298 if (bFirst) {
299 code = val * 16;
300 } else {
301 code += val;
302 buf.AppendByte(code);
303 }
304 bFirst = !bFirst;
305 }
306
307 if (!GetNextChar(ch))
308 break;
309 }
310 if (!bFirst)
311 buf.AppendByte(code);
312
313 return buf.GetByteString();
314 }
315
316 void CPDF_SyntaxParser::ToNextLine() {
317 uint8_t ch;
318 while (GetNextChar(ch)) {
319 if (ch == '\n')
320 break;
321
322 if (ch == '\r') {
323 GetNextChar(ch);
324 if (ch != '\n')
325 --m_Pos;
326 break;
327 }
328 }
329 }
330
331 void CPDF_SyntaxParser::ToNextWord() {
332 uint8_t ch;
333 if (!GetNextChar(ch))
334 return;
335
336 while (1) {
337 while (PDFCharIsWhitespace(ch)) {
338 if (!GetNextChar(ch))
339 return;
340 }
341
342 if (ch != '%')
343 break;
344
345 while (1) {
346 if (!GetNextChar(ch))
347 return;
348 if (PDFCharIsLineEnding(ch))
349 break;
350 }
351 }
352 m_Pos--;
353 }
354
355 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
356 GetNextWordInternal(bIsNumber);
357 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);
358 }
359
360 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {
361 return GetNextWord(nullptr);
362 }
363
364 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,
365 FX_DWORD objnum,
366 FX_DWORD gennum,
367 FX_BOOL bDecrypt) {
368 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
369 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
370 return nullptr;
371
372 FX_FILESIZE SavedPos = m_Pos;
373 bool bIsNumber;
374 CFX_ByteString word = GetNextWord(&bIsNumber);
375 if (word.GetLength() == 0)
376 return nullptr;
377
378 if (bIsNumber) {
379 FX_FILESIZE SavedPos = m_Pos;
380 CFX_ByteString nextword = GetNextWord(&bIsNumber);
381 if (bIsNumber) {
382 CFX_ByteString nextword2 = GetNextWord(nullptr);
383 if (nextword2 == "R") {
384 FX_DWORD objnum = FXSYS_atoui(word);
385 return new CPDF_Reference(pObjList, objnum);
386 }
387 }
388 m_Pos = SavedPos;
389 return new CPDF_Number(word);
390 }
391
392 if (word == "true" || word == "false")
393 return new CPDF_Boolean(word == "true");
394
395 if (word == "null")
396 return new CPDF_Null;
397
398 if (word == "(") {
399 CFX_ByteString str = ReadString();
400 if (m_pCryptoHandler && bDecrypt)
401 m_pCryptoHandler->Decrypt(objnum, gennum, str);
402 return new CPDF_String(str, FALSE);
403 }
404
405 if (word == "<") {
406 CFX_ByteString str = ReadHexString();
407 if (m_pCryptoHandler && bDecrypt)
408 m_pCryptoHandler->Decrypt(objnum, gennum, str);
409
410 return new CPDF_String(str, TRUE);
411 }
412
413 if (word == "[") {
414 CPDF_Array* pArray = new CPDF_Array;
415 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
416 pArray->Add(pObj);
417
418 return pArray;
419 }
420
421 if (word[0] == '/') {
422 return new CPDF_Name(
423 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
424 }
425
426 if (word == "<<") {
427 int32_t nKeys = 0;
428 FX_FILESIZE dwSignValuePos = 0;
429
430 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
431 new CPDF_Dictionary);
432 while (1) {
433 CFX_ByteString key = GetNextWord(nullptr);
434 if (key.IsEmpty())
435 return nullptr;
436
437 FX_FILESIZE SavedPos = m_Pos - key.GetLength();
438 if (key == ">>")
439 break;
440
441 if (key == "endobj") {
442 m_Pos = SavedPos;
443 break;
444 }
445
446 if (key[0] != '/')
447 continue;
448
449 ++nKeys;
450 key = PDF_NameDecode(key);
451 if (key.IsEmpty())
452 continue;
453
454 if (key == "/Contents")
455 dwSignValuePos = m_Pos;
456
457 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);
458 if (!pObj)
459 continue;
460
461 CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);
462 pDict->SetAt(keyNoSlash, pObj);
463 }
464
465 // Only when this is a signature dictionary and has contents, we reset the
466 // contents to the un-decrypted form.
467 if (IsSignatureDict(pDict.get()) && dwSignValuePos) {
468 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
469 m_Pos = dwSignValuePos;
470 pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false));
471 }
472
473 FX_FILESIZE SavedPos = m_Pos;
474 CFX_ByteString nextword = GetNextWord(nullptr);
475 if (nextword != "stream") {
476 m_Pos = SavedPos;
477 return pDict.release();
478 }
479 return ReadStream(pDict.release(), objnum, gennum);
480 }
481
482 if (word == ">>")
483 m_Pos = SavedPos;
484
485 return nullptr;
486 }
487
488 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(
489 CPDF_IndirectObjectHolder* pObjList,
490 FX_DWORD objnum,
491 FX_DWORD gennum) {
492 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
493 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
494 return nullptr;
495
496 FX_FILESIZE SavedPos = m_Pos;
497 bool bIsNumber;
498 CFX_ByteString word = GetNextWord(&bIsNumber);
499 if (word.GetLength() == 0)
500 return nullptr;
501
502 if (bIsNumber) {
503 FX_FILESIZE SavedPos = m_Pos;
504 CFX_ByteString nextword = GetNextWord(&bIsNumber);
505 if (bIsNumber) {
506 CFX_ByteString nextword2 = GetNextWord(nullptr);
507 if (nextword2 == "R")
508 return new CPDF_Reference(pObjList, FXSYS_atoui(word));
509 }
510 m_Pos = SavedPos;
511 return new CPDF_Number(word);
512 }
513
514 if (word == "true" || word == "false")
515 return new CPDF_Boolean(word == "true");
516
517 if (word == "null")
518 return new CPDF_Null;
519
520 if (word == "(") {
521 CFX_ByteString str = ReadString();
522 if (m_pCryptoHandler)
523 m_pCryptoHandler->Decrypt(objnum, gennum, str);
524 return new CPDF_String(str, FALSE);
525 }
526
527 if (word == "<") {
528 CFX_ByteString str = ReadHexString();
529 if (m_pCryptoHandler)
530 m_pCryptoHandler->Decrypt(objnum, gennum, str);
531 return new CPDF_String(str, TRUE);
532 }
533
534 if (word == "[") {
535 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(
536 new CPDF_Array);
537 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
538 pArray->Add(pObj);
539
540 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;
541 }
542
543 if (word[0] == '/') {
544 return new CPDF_Name(
545 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
546 }
547
548 if (word == "<<") {
549 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
550 new CPDF_Dictionary);
551 while (1) {
552 FX_FILESIZE SavedPos = m_Pos;
553 CFX_ByteString key = GetNextWord(nullptr);
554 if (key.IsEmpty())
555 return nullptr;
556
557 if (key == ">>")
558 break;
559
560 if (key == "endobj") {
561 m_Pos = SavedPos;
562 break;
563 }
564
565 if (key[0] != '/')
566 continue;
567
568 key = PDF_NameDecode(key);
569 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(
570 GetObject(pObjList, objnum, gennum, true));
571 if (!obj) {
572 uint8_t ch;
573 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {
574 continue;
575 }
576 return nullptr;
577 }
578
579 if (key.GetLength() > 1) {
580 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),
581 obj.release());
582 }
583 }
584
585 FX_FILESIZE SavedPos = m_Pos;
586 CFX_ByteString nextword = GetNextWord(nullptr);
587 if (nextword != "stream") {
588 m_Pos = SavedPos;
589 return pDict.release();
590 }
591
592 return ReadStream(pDict.release(), objnum, gennum);
593 }
594
595 if (word == ">>")
596 m_Pos = SavedPos;
597
598 return nullptr;
599 }
600
601 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
602 unsigned char byte1 = 0;
603 unsigned char byte2 = 0;
604
605 GetCharAt(pos, byte1);
606 GetCharAt(pos + 1, byte2);
607
608 if (byte1 == '\r' && byte2 == '\n')
609 return 2;
610
611 if (byte1 == '\r' || byte1 == '\n')
612 return 1;
613
614 return 0;
615 }
616
617 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
618 FX_DWORD objnum,
619 FX_DWORD gennum) {
620 CPDF_Object* pLenObj = pDict->GetElement("Length");
621 FX_FILESIZE len = -1;
622 CPDF_Reference* pLenObjRef = ToReference(pLenObj);
623
624 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&
625 pLenObjRef->GetRefObjNum() != objnum);
626 if (pLenObj && differingObjNum)
627 len = pLenObj->GetInteger();
628
629 // Locate the start of stream.
630 ToNextLine();
631 FX_FILESIZE streamStartPos = m_Pos;
632
633 const CFX_ByteStringC kEndStreamStr("endstream");
634 const CFX_ByteStringC kEndObjStr("endobj");
635
636 CPDF_CryptoHandler* pCryptoHandler =
637 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
638 if (!pCryptoHandler) {
639 FX_BOOL bSearchForKeyword = TRUE;
640 if (len >= 0) {
641 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
642 pos += len;
643 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
644 m_Pos = pos.ValueOrDie();
645
646 m_Pos += ReadEOLMarkers(m_Pos);
647 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
648 GetNextWordInternal(nullptr);
649 // Earlier version of PDF specification doesn't require EOL marker before
650 // 'endstream' keyword. If keyword 'endstream' follows the bytes in
651 // specified length, it signals the end of stream.
652 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),
653 kEndStreamStr.GetLength()) == 0) {
654 bSearchForKeyword = FALSE;
655 }
656 }
657
658 if (bSearchForKeyword) {
659 // If len is not available, len needs to be calculated
660 // by searching the keywords "endstream" or "endobj".
661 m_Pos = streamStartPos;
662 FX_FILESIZE endStreamOffset = 0;
663 while (endStreamOffset >= 0) {
664 endStreamOffset = FindTag(kEndStreamStr, 0);
665
666 // Can't find "endstream".
667 if (endStreamOffset < 0)
668 break;
669
670 // Stop searching when "endstream" is found.
671 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
672 kEndStreamStr, TRUE)) {
673 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
674 break;
675 }
676 }
677
678 m_Pos = streamStartPos;
679 FX_FILESIZE endObjOffset = 0;
680 while (endObjOffset >= 0) {
681 endObjOffset = FindTag(kEndObjStr, 0);
682
683 // Can't find "endobj".
684 if (endObjOffset < 0)
685 break;
686
687 // Stop searching when "endobj" is found.
688 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
689 TRUE)) {
690 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
691 break;
692 }
693 }
694
695 // Can't find "endstream" or "endobj".
696 if (endStreamOffset < 0 && endObjOffset < 0) {
697 pDict->Release();
698 return nullptr;
699 }
700
701 if (endStreamOffset < 0 && endObjOffset >= 0) {
702 // Correct the position of end stream.
703 endStreamOffset = endObjOffset;
704 } else if (endStreamOffset >= 0 && endObjOffset < 0) {
705 // Correct the position of end obj.
706 endObjOffset = endStreamOffset;
707 } else if (endStreamOffset > endObjOffset) {
708 endStreamOffset = endObjOffset;
709 }
710
711 len = endStreamOffset;
712 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
713 if (numMarkers == 2) {
714 len -= 2;
715 } else {
716 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
717 if (numMarkers == 1) {
718 len -= 1;
719 }
720 }
721
722 if (len < 0) {
723 pDict->Release();
724 return nullptr;
725 }
726 pDict->SetAtInteger("Length", len);
727 }
728 m_Pos = streamStartPos;
729 }
730
731 if (len < 0) {
732 pDict->Release();
733 return nullptr;
734 }
735
736 uint8_t* pData = nullptr;
737 if (len > 0) {
738 pData = FX_Alloc(uint8_t, len);
739 ReadBlock(pData, len);
740 if (pCryptoHandler) {
741 CFX_BinaryBuf dest_buf;
742 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
743
744 void* context = pCryptoHandler->DecryptStart(objnum, gennum);
745 pCryptoHandler->DecryptStream(context, pData, len, dest_buf);
746 pCryptoHandler->DecryptFinish(context, dest_buf);
747
748 FX_Free(pData);
749 pData = dest_buf.GetBuffer();
750 len = dest_buf.GetSize();
751 dest_buf.DetachBuffer();
752 }
753 }
754
755 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);
756 streamStartPos = m_Pos;
757 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
758
759 GetNextWordInternal(nullptr);
760
761 int numMarkers = ReadEOLMarkers(m_Pos);
762 if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 &&
763 FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==
764 0) {
765 m_Pos = streamStartPos;
766 }
767 return pStream;
768 }
769
770 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,
771 FX_DWORD HeaderOffset) {
772 FX_Free(m_pFileBuf);
773
774 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
775 m_HeaderOffset = HeaderOffset;
776 m_FileLen = pFileAccess->GetSize();
777 m_Pos = 0;
778 m_pFileAccess = pFileAccess;
779 m_BufOffset = 0;
780 pFileAccess->ReadBlock(
781 m_pFileBuf, 0,
782 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
783 }
784
785 uint32_t CPDF_SyntaxParser::GetDirectNum() {
786 bool bIsNumber;
787 GetNextWordInternal(&bIsNumber);
788 if (!bIsNumber)
789 return 0;
790
791 m_WordBuffer[m_WordSize] = 0;
792 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
793 }
794
795 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
796 FX_FILESIZE limit,
797 const CFX_ByteStringC& tag,
798 FX_BOOL checkKeyword) {
799 const FX_DWORD taglen = tag.GetLength();
800
801 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
802 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
803 !PDFCharIsWhitespace(tag[taglen - 1]);
804
805 uint8_t ch;
806 if (bCheckRight && startpos + (int32_t)taglen <= limit &&
807 GetCharAt(startpos + (int32_t)taglen, ch)) {
808 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
809 (checkKeyword && PDFCharIsDelimiter(ch))) {
810 return false;
811 }
812 }
813
814 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
815 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
816 (checkKeyword && PDFCharIsDelimiter(ch))) {
817 return false;
818 }
819 }
820 return true;
821 }
822
823 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards
824 // and drop the bool.
825 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
826 FX_BOOL bWholeWord,
827 FX_BOOL bForward,
828 FX_FILESIZE limit) {
829 int32_t taglen = tag.GetLength();
830 if (taglen == 0)
831 return FALSE;
832
833 FX_FILESIZE pos = m_Pos;
834 int32_t offset = 0;
835 if (!bForward)
836 offset = taglen - 1;
837
838 const uint8_t* tag_data = tag.GetPtr();
839 uint8_t byte;
840 while (1) {
841 if (bForward) {
842 if (limit && pos >= m_Pos + limit)
843 return FALSE;
844
845 if (!GetCharAt(pos, byte))
846 return FALSE;
847
848 } else {
849 if (limit && pos <= m_Pos - limit)
850 return FALSE;
851
852 if (!GetCharAtBackward(pos, byte))
853 return FALSE;
854 }
855
856 if (byte == tag_data[offset]) {
857 if (bForward) {
858 offset++;
859 if (offset < taglen) {
860 pos++;
861 continue;
862 }
863 } else {
864 offset--;
865 if (offset >= 0) {
866 pos--;
867 continue;
868 }
869 }
870
871 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
872 if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) {
873 m_Pos = startpos;
874 return TRUE;
875 }
876 }
877
878 if (bForward) {
879 offset = byte == tag_data[0] ? 1 : 0;
880 pos++;
881 } else {
882 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
883 pos--;
884 }
885
886 if (pos < 0)
887 return FALSE;
888 }
889
890 return FALSE;
891 }
892
893 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
894 FX_BOOL bWholeWord,
895 FX_FILESIZE limit) {
896 int32_t ntags = 1;
897 for (int i = 0; i < tags.GetLength(); ++i) {
898 if (tags[i] == 0)
899 ++ntags;
900 }
901
902 std::vector<SearchTagRecord> patterns(ntags);
903 FX_DWORD start = 0;
904 FX_DWORD itag = 0;
905 FX_DWORD max_len = 0;
906 for (int i = 0; i <= tags.GetLength(); ++i) {
907 if (tags[i] == 0) {
908 FX_DWORD len = i - start;
909 max_len = std::max(len, max_len);
910 patterns[itag].m_pTag = tags.GetCStr() + start;
911 patterns[itag].m_Len = len;
912 patterns[itag].m_Offset = 0;
913 start = i + 1;
914 ++itag;
915 }
916 }
917
918 const FX_FILESIZE pos_limit = m_Pos + limit;
919 for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) {
920 uint8_t byte;
921 if (!GetCharAt(pos, byte))
922 break;
923
924 for (int i = 0; i < ntags; ++i) {
925 SearchTagRecord& pat = patterns[i];
926 if (pat.m_pTag[pat.m_Offset] != byte) {
927 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
928 continue;
929 }
930
931 ++pat.m_Offset;
932 if (pat.m_Offset != pat.m_Len)
933 continue;
934
935 if (!bWholeWord ||
936 IsWholeWord(pos - pat.m_Len, limit,
937 CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {
938 return i;
939 }
940
941 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
942 }
943 }
944 return -1;
945 }
946
947 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
948 FX_FILESIZE limit) {
949 int32_t taglen = tag.GetLength();
950 int32_t match = 0;
951 limit += m_Pos;
952 FX_FILESIZE startpos = m_Pos;
953
954 while (1) {
955 uint8_t ch;
956 if (!GetNextChar(ch))
957 return -1;
958
959 if (ch == tag[match]) {
960 match++;
961 if (match == taglen)
962 return m_Pos - startpos - taglen;
963 } else {
964 match = ch == tag[0] ? 1 : 0;
965 }
966
967 if (limit && m_Pos == limit)
968 return -1;
969 }
970 return -1;
971 }
972
973 void CPDF_SyntaxParser::SetEncrypt(
974 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {
975 m_pCryptoHandler = std::move(pCryptoHandler);
976 }
OLDNEW
« no previous file with comments | « core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h ('k') | core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698