Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(934)

Side by Side Diff: core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp

Issue 1774753002: Split CPDF_SyntaxParser into its own named .cpp/.h files. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Stray file. Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 PDFium Authors. All rights reserved.
dsinclair 2016/03/08 01:15:05 nit: 2016?
Tom Sepez 2016/03/08 19:35:41 Done.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
8
9 #include "core/include/fpdfapi/fpdf_module.h"
10 #include "core/include/fpdfapi/fpdf_parser.h"
11 #include "core/include/fxcrt/fx_ext.h"
12 #include "third_party/base/numerics/safe_math.h"
13
14 struct SearchTagRecord {
dsinclair 2016/03/08 01:15:05 Can this go in namespace {}?
Tom Sepez 2016/03/08 19:35:42 Done.
15 const char* m_pTag;
16 FX_DWORD m_Len;
17 FX_DWORD m_Offset;
18 };
19
20 // static
21 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
22
23 CPDF_SyntaxParser::CPDF_SyntaxParser()
24 : m_MetadataObjnum(0),
25 m_pFileAccess(nullptr),
26 m_pFileBuf(nullptr),
27 m_BufSize(CPDF_ModuleMgr::kFileBufSize) {}
28
29 CPDF_SyntaxParser::~CPDF_SyntaxParser() {
30 FX_Free(m_pFileBuf);
31 }
32
33 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
34 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
35 m_Pos = pos;
36 return GetNextChar(ch);
37 }
38
39 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
40 FX_FILESIZE pos = m_Pos + m_HeaderOffset;
41 if (pos >= m_FileLen)
42 return FALSE;
43
44 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
45 FX_FILESIZE read_pos = pos;
46 FX_DWORD read_size = m_BufSize;
47 if ((FX_FILESIZE)read_size > m_FileLen)
48 read_size = (FX_DWORD)m_FileLen;
49
50 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
51 if (m_FileLen < (FX_FILESIZE)read_size) {
52 read_pos = 0;
53 read_size = (FX_DWORD)m_FileLen;
54 } else {
55 read_pos = m_FileLen - read_size;
56 }
57 }
58
59 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
60 return FALSE;
61
62 m_BufOffset = read_pos;
63 }
64 ch = m_pFileBuf[pos - m_BufOffset];
65 m_Pos++;
66 return TRUE;
67 }
68
69 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
70 pos += m_HeaderOffset;
71 if (pos >= m_FileLen)
72 return FALSE;
73
74 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
75 FX_FILESIZE read_pos;
76 if (pos < (FX_FILESIZE)m_BufSize)
77 read_pos = 0;
78 else
79 read_pos = pos - m_BufSize + 1;
80
81 FX_DWORD read_size = m_BufSize;
82 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
83 if (m_FileLen < (FX_FILESIZE)read_size) {
84 read_pos = 0;
85 read_size = (FX_DWORD)m_FileLen;
86 } else {
87 read_pos = m_FileLen - read_size;
88 }
89 }
90
91 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
92 return FALSE;
93
94 m_BufOffset = read_pos;
95 }
96 ch = m_pFileBuf[pos - m_BufOffset];
97 return TRUE;
98 }
99
100 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {
101 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
102 return FALSE;
103 m_Pos += size;
104 return TRUE;
105 }
106
107 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
108 m_WordSize = 0;
109 if (bIsNumber)
110 *bIsNumber = true;
111
112 uint8_t ch;
113 if (!GetNextChar(ch))
114 return;
115
116 while (1) {
117 while (PDFCharIsWhitespace(ch)) {
118 if (!GetNextChar(ch))
119 return;
120 }
121
122 if (ch != '%')
123 break;
124
125 while (1) {
126 if (!GetNextChar(ch))
127 return;
128 if (PDFCharIsLineEnding(ch))
129 break;
130 }
131 }
132
133 if (PDFCharIsDelimiter(ch)) {
134 if (bIsNumber)
135 *bIsNumber = false;
136
137 m_WordBuffer[m_WordSize++] = ch;
138 if (ch == '/') {
139 while (1) {
140 if (!GetNextChar(ch))
141 return;
142
143 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
144 m_Pos--;
145 return;
146 }
147
148 if (m_WordSize < sizeof(m_WordBuffer) - 1)
149 m_WordBuffer[m_WordSize++] = ch;
150 }
151 } else if (ch == '<') {
152 if (!GetNextChar(ch))
153 return;
154
155 if (ch == '<')
156 m_WordBuffer[m_WordSize++] = ch;
157 else
158 m_Pos--;
159 } else if (ch == '>') {
160 if (!GetNextChar(ch))
161 return;
162
163 if (ch == '>')
164 m_WordBuffer[m_WordSize++] = ch;
165 else
166 m_Pos--;
167 }
168 return;
169 }
170
171 while (1) {
172 if (m_WordSize < sizeof(m_WordBuffer) - 1)
173 m_WordBuffer[m_WordSize++] = ch;
174
175 if (!PDFCharIsNumeric(ch)) {
176 if (bIsNumber)
177 *bIsNumber = false;
178 }
179
180 if (!GetNextChar(ch))
181 return;
182
183 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
184 m_Pos--;
185 break;
186 }
187 }
188 }
189
190 CFX_ByteString CPDF_SyntaxParser::ReadString() {
191 uint8_t ch;
192 if (!GetNextChar(ch))
193 return CFX_ByteString();
194
195 CFX_ByteTextBuf buf;
196 int32_t parlevel = 0;
197 int32_t status = 0;
198 int32_t iEscCode = 0;
199 while (1) {
200 switch (status) {
201 case 0:
202 if (ch == ')') {
203 if (parlevel == 0) {
204 return buf.GetByteString();
205 }
206 parlevel--;
207 buf.AppendChar(')');
208 } else if (ch == '(') {
209 parlevel++;
210 buf.AppendChar('(');
211 } else if (ch == '\\') {
212 status = 1;
213 } else {
214 buf.AppendChar(ch);
215 }
216 break;
217 case 1:
218 if (ch >= '0' && ch <= '7') {
219 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
220 status = 2;
221 break;
222 }
223
224 if (ch == 'n') {
225 buf.AppendChar('\n');
226 } else if (ch == 'r') {
227 buf.AppendChar('\r');
228 } else if (ch == 't') {
229 buf.AppendChar('\t');
230 } else if (ch == 'b') {
231 buf.AppendChar('\b');
232 } else if (ch == 'f') {
233 buf.AppendChar('\f');
234 } else if (ch == '\r') {
235 status = 4;
236 break;
237 } else if (ch != '\n') {
238 buf.AppendChar(ch);
239 }
240 status = 0;
241 break;
242 case 2:
243 if (ch >= '0' && ch <= '7') {
244 iEscCode =
245 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
246 status = 3;
247 } else {
248 buf.AppendChar(iEscCode);
249 status = 0;
250 continue;
251 }
252 break;
253 case 3:
254 if (ch >= '0' && ch <= '7') {
255 iEscCode =
256 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
257 buf.AppendChar(iEscCode);
258 status = 0;
259 } else {
260 buf.AppendChar(iEscCode);
261 status = 0;
262 continue;
263 }
264 break;
265 case 4:
266 status = 0;
267 if (ch != '\n')
268 continue;
269 break;
270 }
271
272 if (!GetNextChar(ch))
273 break;
274 }
275
276 GetNextChar(ch);
277 return buf.GetByteString();
278 }
279
280 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {
281 uint8_t ch;
282 if (!GetNextChar(ch))
283 return CFX_ByteString();
284
285 CFX_ByteTextBuf buf;
286 bool bFirst = true;
287 uint8_t code = 0;
288 while (1) {
289 if (ch == '>')
290 break;
291
292 if (std::isxdigit(ch)) {
293 int val = FXSYS_toHexDigit(ch);
294 if (bFirst) {
295 code = val * 16;
296 } else {
297 code += val;
298 buf.AppendByte(code);
299 }
300 bFirst = !bFirst;
301 }
302
303 if (!GetNextChar(ch))
304 break;
305 }
306 if (!bFirst)
307 buf.AppendByte(code);
308
309 return buf.GetByteString();
310 }
311
312 void CPDF_SyntaxParser::ToNextLine() {
313 uint8_t ch;
314 while (GetNextChar(ch)) {
315 if (ch == '\n')
316 break;
317
318 if (ch == '\r') {
319 GetNextChar(ch);
320 if (ch != '\n')
321 --m_Pos;
322 break;
323 }
324 }
325 }
326
327 void CPDF_SyntaxParser::ToNextWord() {
328 uint8_t ch;
329 if (!GetNextChar(ch))
330 return;
331
332 while (1) {
333 while (PDFCharIsWhitespace(ch)) {
334 if (!GetNextChar(ch))
335 return;
336 }
337
338 if (ch != '%')
339 break;
340
341 while (1) {
342 if (!GetNextChar(ch))
343 return;
344 if (PDFCharIsLineEnding(ch))
345 break;
346 }
347 }
348 m_Pos--;
349 }
350
351 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
352 GetNextWordInternal(bIsNumber);
353 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);
354 }
355
356 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {
357 return GetNextWord(nullptr);
358 }
359
360 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,
361 FX_DWORD objnum,
362 FX_DWORD gennum,
363 FX_BOOL bDecrypt) {
364 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
365 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
366 return nullptr;
367
368 FX_FILESIZE SavedPos = m_Pos;
369 bool bIsNumber;
370 CFX_ByteString word = GetNextWord(&bIsNumber);
371 if (word.GetLength() == 0)
372 return nullptr;
373
374 if (bIsNumber) {
375 FX_FILESIZE SavedPos = m_Pos;
376 CFX_ByteString nextword = GetNextWord(&bIsNumber);
377 if (bIsNumber) {
378 CFX_ByteString nextword2 = GetNextWord(nullptr);
379 if (nextword2 == "R") {
380 FX_DWORD objnum = FXSYS_atoui(word);
381 return new CPDF_Reference(pObjList, objnum);
382 }
383 }
384 m_Pos = SavedPos;
385 return new CPDF_Number(word);
386 }
387
388 if (word == "true" || word == "false")
389 return new CPDF_Boolean(word == "true");
390
391 if (word == "null")
392 return new CPDF_Null;
393
394 if (word == "(") {
395 CFX_ByteString str = ReadString();
396 if (m_pCryptoHandler && bDecrypt)
397 m_pCryptoHandler->Decrypt(objnum, gennum, str);
398 return new CPDF_String(str, FALSE);
399 }
400
401 if (word == "<") {
402 CFX_ByteString str = ReadHexString();
403 if (m_pCryptoHandler && bDecrypt)
404 m_pCryptoHandler->Decrypt(objnum, gennum, str);
405
406 return new CPDF_String(str, TRUE);
407 }
408
409 if (word == "[") {
410 CPDF_Array* pArray = new CPDF_Array;
411 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
412 pArray->Add(pObj);
413
414 return pArray;
415 }
416
417 if (word[0] == '/') {
418 return new CPDF_Name(
419 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
420 }
421
422 if (word == "<<") {
423 int32_t nKeys = 0;
424 FX_FILESIZE dwSignValuePos = 0;
425
426 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
427 new CPDF_Dictionary);
428 while (1) {
429 CFX_ByteString key = GetNextWord(nullptr);
430 if (key.IsEmpty())
431 return nullptr;
432
433 FX_FILESIZE SavedPos = m_Pos - key.GetLength();
434 if (key == ">>")
435 break;
436
437 if (key == "endobj") {
438 m_Pos = SavedPos;
439 break;
440 }
441
442 if (key[0] != '/')
443 continue;
444
445 ++nKeys;
446 key = PDF_NameDecode(key);
447 if (key.IsEmpty())
448 continue;
449
450 if (key == "/Contents")
451 dwSignValuePos = m_Pos;
452
453 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);
454 if (!pObj)
455 continue;
456
457 CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);
458 pDict->SetAt(keyNoSlash, pObj);
459 }
460
461 // Only when this is a signature dictionary and has contents, we reset the
462 // contents to the un-decrypted form.
463 if (IsSignatureDict(pDict.get()) && dwSignValuePos) {
464 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
465 m_Pos = dwSignValuePos;
466 pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false));
467 }
468
469 FX_FILESIZE SavedPos = m_Pos;
470 CFX_ByteString nextword = GetNextWord(nullptr);
471 if (nextword != "stream") {
472 m_Pos = SavedPos;
473 return pDict.release();
474 }
475 return ReadStream(pDict.release(), objnum, gennum);
476 }
477
478 if (word == ">>")
479 m_Pos = SavedPos;
480
481 return nullptr;
482 }
483
484 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(
485 CPDF_IndirectObjectHolder* pObjList,
486 FX_DWORD objnum,
487 FX_DWORD gennum) {
488 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
489 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
490 return nullptr;
491
492 FX_FILESIZE SavedPos = m_Pos;
493 bool bIsNumber;
494 CFX_ByteString word = GetNextWord(&bIsNumber);
495 if (word.GetLength() == 0)
496 return nullptr;
497
498 if (bIsNumber) {
499 FX_FILESIZE SavedPos = m_Pos;
500 CFX_ByteString nextword = GetNextWord(&bIsNumber);
501 if (bIsNumber) {
502 CFX_ByteString nextword2 = GetNextWord(nullptr);
503 if (nextword2 == "R")
504 return new CPDF_Reference(pObjList, FXSYS_atoui(word));
505 }
506 m_Pos = SavedPos;
507 return new CPDF_Number(word);
508 }
509
510 if (word == "true" || word == "false")
511 return new CPDF_Boolean(word == "true");
512
513 if (word == "null")
514 return new CPDF_Null;
515
516 if (word == "(") {
517 CFX_ByteString str = ReadString();
518 if (m_pCryptoHandler)
519 m_pCryptoHandler->Decrypt(objnum, gennum, str);
520 return new CPDF_String(str, FALSE);
521 }
522
523 if (word == "<") {
524 CFX_ByteString str = ReadHexString();
525 if (m_pCryptoHandler)
526 m_pCryptoHandler->Decrypt(objnum, gennum, str);
527 return new CPDF_String(str, TRUE);
528 }
529
530 if (word == "[") {
531 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(
532 new CPDF_Array);
533 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
534 pArray->Add(pObj);
535
536 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;
537 }
538
539 if (word[0] == '/') {
540 return new CPDF_Name(
541 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
542 }
543
544 if (word == "<<") {
545 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
546 new CPDF_Dictionary);
547 while (1) {
548 FX_FILESIZE SavedPos = m_Pos;
549 CFX_ByteString key = GetNextWord(nullptr);
550 if (key.IsEmpty())
551 return nullptr;
552
553 if (key == ">>")
554 break;
555
556 if (key == "endobj") {
557 m_Pos = SavedPos;
558 break;
559 }
560
561 if (key[0] != '/')
562 continue;
563
564 key = PDF_NameDecode(key);
565 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(
566 GetObject(pObjList, objnum, gennum, true));
567 if (!obj) {
568 uint8_t ch;
569 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {
570 continue;
571 }
572 return nullptr;
573 }
574
575 if (key.GetLength() > 1) {
576 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),
577 obj.release());
578 }
579 }
580
581 FX_FILESIZE SavedPos = m_Pos;
582 CFX_ByteString nextword = GetNextWord(nullptr);
583 if (nextword != "stream") {
584 m_Pos = SavedPos;
585 return pDict.release();
586 }
587
588 return ReadStream(pDict.release(), objnum, gennum);
589 }
590
591 if (word == ">>")
592 m_Pos = SavedPos;
593
594 return nullptr;
595 }
596
597 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
598 unsigned char byte1 = 0;
599 unsigned char byte2 = 0;
600
601 GetCharAt(pos, byte1);
602 GetCharAt(pos + 1, byte2);
603
604 if (byte1 == '\r' && byte2 == '\n')
605 return 2;
606
607 if (byte1 == '\r' || byte1 == '\n')
608 return 1;
609
610 return 0;
611 }
612
613 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
614 FX_DWORD objnum,
615 FX_DWORD gennum) {
616 CPDF_Object* pLenObj = pDict->GetElement("Length");
617 FX_FILESIZE len = -1;
618 CPDF_Reference* pLenObjRef = ToReference(pLenObj);
619
620 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&
621 pLenObjRef->GetRefObjNum() != objnum);
622 if (pLenObj && differingObjNum)
623 len = pLenObj->GetInteger();
624
625 // Locate the start of stream.
626 ToNextLine();
627 FX_FILESIZE streamStartPos = m_Pos;
628
629 const CFX_ByteStringC kEndStreamStr("endstream");
630 const CFX_ByteStringC kEndObjStr("endobj");
631
632 CPDF_CryptoHandler* pCryptoHandler =
633 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
634 if (!pCryptoHandler) {
635 FX_BOOL bSearchForKeyword = TRUE;
636 if (len >= 0) {
637 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
638 pos += len;
639 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
640 m_Pos = pos.ValueOrDie();
641
642 m_Pos += ReadEOLMarkers(m_Pos);
643 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
644 GetNextWordInternal(nullptr);
645 // Earlier version of PDF specification doesn't require EOL marker before
646 // 'endstream' keyword. If keyword 'endstream' follows the bytes in
647 // specified length, it signals the end of stream.
648 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),
649 kEndStreamStr.GetLength()) == 0) {
650 bSearchForKeyword = FALSE;
651 }
652 }
653
654 if (bSearchForKeyword) {
655 // If len is not available, len needs to be calculated
656 // by searching the keywords "endstream" or "endobj".
657 m_Pos = streamStartPos;
658 FX_FILESIZE endStreamOffset = 0;
659 while (endStreamOffset >= 0) {
660 endStreamOffset = FindTag(kEndStreamStr, 0);
661
662 // Can't find "endstream".
663 if (endStreamOffset < 0)
664 break;
665
666 // Stop searching when "endstream" is found.
667 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
668 kEndStreamStr, TRUE)) {
669 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
670 break;
671 }
672 }
673
674 m_Pos = streamStartPos;
675 FX_FILESIZE endObjOffset = 0;
676 while (endObjOffset >= 0) {
677 endObjOffset = FindTag(kEndObjStr, 0);
678
679 // Can't find "endobj".
680 if (endObjOffset < 0)
681 break;
682
683 // Stop searching when "endobj" is found.
684 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
685 TRUE)) {
686 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
687 break;
688 }
689 }
690
691 // Can't find "endstream" or "endobj".
692 if (endStreamOffset < 0 && endObjOffset < 0) {
693 pDict->Release();
694 return nullptr;
695 }
696
697 if (endStreamOffset < 0 && endObjOffset >= 0) {
698 // Correct the position of end stream.
699 endStreamOffset = endObjOffset;
700 } else if (endStreamOffset >= 0 && endObjOffset < 0) {
701 // Correct the position of end obj.
702 endObjOffset = endStreamOffset;
703 } else if (endStreamOffset > endObjOffset) {
704 endStreamOffset = endObjOffset;
705 }
706
707 len = endStreamOffset;
708 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
709 if (numMarkers == 2) {
710 len -= 2;
711 } else {
712 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
713 if (numMarkers == 1) {
714 len -= 1;
715 }
716 }
717
718 if (len < 0) {
719 pDict->Release();
720 return nullptr;
721 }
722 pDict->SetAtInteger("Length", len);
723 }
724 m_Pos = streamStartPos;
725 }
726
727 if (len < 0) {
728 pDict->Release();
729 return nullptr;
730 }
731
732 uint8_t* pData = nullptr;
733 if (len > 0) {
734 pData = FX_Alloc(uint8_t, len);
735 ReadBlock(pData, len);
736 if (pCryptoHandler) {
737 CFX_BinaryBuf dest_buf;
738 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
739
740 void* context = pCryptoHandler->DecryptStart(objnum, gennum);
741 pCryptoHandler->DecryptStream(context, pData, len, dest_buf);
742 pCryptoHandler->DecryptFinish(context, dest_buf);
743
744 FX_Free(pData);
745 pData = dest_buf.GetBuffer();
746 len = dest_buf.GetSize();
747 dest_buf.DetachBuffer();
748 }
749 }
750
751 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);
752 streamStartPos = m_Pos;
753 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
754
755 GetNextWordInternal(nullptr);
756
757 int numMarkers = ReadEOLMarkers(m_Pos);
758 if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 &&
759 FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==
760 0) {
761 m_Pos = streamStartPos;
762 }
763 return pStream;
764 }
765
766 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,
767 FX_DWORD HeaderOffset) {
768 FX_Free(m_pFileBuf);
769
770 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
771 m_HeaderOffset = HeaderOffset;
772 m_FileLen = pFileAccess->GetSize();
773 m_Pos = 0;
774 m_pFileAccess = pFileAccess;
775 m_BufOffset = 0;
776 pFileAccess->ReadBlock(
777 m_pFileBuf, 0,
778 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
779 }
780
781 uint32_t CPDF_SyntaxParser::GetDirectNum() {
782 bool bIsNumber;
783 GetNextWordInternal(&bIsNumber);
784 if (!bIsNumber)
785 return 0;
786
787 m_WordBuffer[m_WordSize] = 0;
788 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
789 }
790
791 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
792 FX_FILESIZE limit,
793 const CFX_ByteStringC& tag,
794 FX_BOOL checkKeyword) {
795 const FX_DWORD taglen = tag.GetLength();
796
797 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
798 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
799 !PDFCharIsWhitespace(tag[taglen - 1]);
800
801 uint8_t ch;
802 if (bCheckRight && startpos + (int32_t)taglen <= limit &&
803 GetCharAt(startpos + (int32_t)taglen, ch)) {
804 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
805 (checkKeyword && PDFCharIsDelimiter(ch))) {
806 return false;
807 }
808 }
809
810 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
811 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
812 (checkKeyword && PDFCharIsDelimiter(ch))) {
813 return false;
814 }
815 }
816 return true;
817 }
818
819 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards
820 // and drop the bool.
821 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
822 FX_BOOL bWholeWord,
823 FX_BOOL bForward,
824 FX_FILESIZE limit) {
825 int32_t taglen = tag.GetLength();
826 if (taglen == 0)
827 return FALSE;
828
829 FX_FILESIZE pos = m_Pos;
830 int32_t offset = 0;
831 if (!bForward)
832 offset = taglen - 1;
833
834 const uint8_t* tag_data = tag.GetPtr();
835 uint8_t byte;
836 while (1) {
837 if (bForward) {
838 if (limit && pos >= m_Pos + limit)
839 return FALSE;
840
841 if (!GetCharAt(pos, byte))
842 return FALSE;
843
844 } else {
845 if (limit && pos <= m_Pos - limit)
846 return FALSE;
847
848 if (!GetCharAtBackward(pos, byte))
849 return FALSE;
850 }
851
852 if (byte == tag_data[offset]) {
853 if (bForward) {
854 offset++;
855 if (offset < taglen) {
856 pos++;
857 continue;
858 }
859 } else {
860 offset--;
861 if (offset >= 0) {
862 pos--;
863 continue;
864 }
865 }
866
867 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
868 if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) {
869 m_Pos = startpos;
870 return TRUE;
871 }
872 }
873
874 if (bForward) {
875 offset = byte == tag_data[0] ? 1 : 0;
876 pos++;
877 } else {
878 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
879 pos--;
880 }
881
882 if (pos < 0)
883 return FALSE;
884 }
885
886 return FALSE;
887 }
888
889 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
890 FX_BOOL bWholeWord,
891 FX_FILESIZE limit) {
892 int32_t ntags = 1;
893 for (int i = 0; i < tags.GetLength(); ++i) {
894 if (tags[i] == 0)
895 ++ntags;
896 }
897
898 std::vector<SearchTagRecord> patterns(ntags);
899 FX_DWORD start = 0;
900 FX_DWORD itag = 0;
901 FX_DWORD max_len = 0;
902 for (int i = 0; i <= tags.GetLength(); ++i) {
903 if (tags[i] == 0) {
904 FX_DWORD len = i - start;
905 max_len = std::max(len, max_len);
906 patterns[itag].m_pTag = tags.GetCStr() + start;
907 patterns[itag].m_Len = len;
908 patterns[itag].m_Offset = 0;
909 start = i + 1;
910 ++itag;
911 }
912 }
913
914 const FX_FILESIZE pos_limit = m_Pos + limit;
915 for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) {
916 uint8_t byte;
917 if (!GetCharAt(pos, byte))
918 break;
919
920 for (int i = 0; i < ntags; ++i) {
921 SearchTagRecord& pat = patterns[i];
922 if (pat.m_pTag[pat.m_Offset] != byte) {
923 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
924 continue;
925 }
926
927 ++pat.m_Offset;
928 if (pat.m_Offset != pat.m_Len)
929 continue;
930
931 if (!bWholeWord ||
932 IsWholeWord(pos - pat.m_Len, limit,
933 CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {
934 return i;
935 }
936
937 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
938 }
939 }
940 return -1;
941 }
942
943 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
944 FX_FILESIZE limit) {
945 int32_t taglen = tag.GetLength();
946 int32_t match = 0;
947 limit += m_Pos;
948 FX_FILESIZE startpos = m_Pos;
949
950 while (1) {
951 uint8_t ch;
952 if (!GetNextChar(ch))
953 return -1;
954
955 if (ch == tag[match]) {
956 match++;
957 if (match == taglen)
958 return m_Pos - startpos - taglen;
959 } else {
960 match = ch == tag[0] ? 1 : 0;
961 }
962
963 if (limit && m_Pos == limit)
964 return -1;
965 }
966 return -1;
967 }
968
969 void CPDF_SyntaxParser::SetEncrypt(
970 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {
971 m_pCryptoHandler = std::move(pCryptoHandler);
972 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698