Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: core/fpdfapi/parser/cpdf_syntax_parser.cpp

Issue 2469833002: Clean up CPDF_SyntaxParser a little bit (Closed)
Patch Set: Comments Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 PDFium Authors. All rights reserved. 1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/fpdfapi/parser/cpdf_syntax_parser.h" 7 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
8 8
9 #include <vector> 9 #include <vector>
10 10
11 #include "core/fpdfapi/cpdf_modulemgr.h" 11 #include "core/fpdfapi/cpdf_modulemgr.h"
12 #include "core/fpdfapi/parser/cpdf_array.h" 12 #include "core/fpdfapi/parser/cpdf_array.h"
13 #include "core/fpdfapi/parser/cpdf_boolean.h" 13 #include "core/fpdfapi/parser/cpdf_boolean.h"
14 #include "core/fpdfapi/parser/cpdf_crypto_handler.h" 14 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h" 15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_name.h" 16 #include "core/fpdfapi/parser/cpdf_name.h"
17 #include "core/fpdfapi/parser/cpdf_null.h" 17 #include "core/fpdfapi/parser/cpdf_null.h"
18 #include "core/fpdfapi/parser/cpdf_number.h" 18 #include "core/fpdfapi/parser/cpdf_number.h"
19 #include "core/fpdfapi/parser/cpdf_reference.h" 19 #include "core/fpdfapi/parser/cpdf_reference.h"
20 #include "core/fpdfapi/parser/cpdf_stream.h" 20 #include "core/fpdfapi/parser/cpdf_stream.h"
21 #include "core/fpdfapi/parser/cpdf_string.h" 21 #include "core/fpdfapi/parser/cpdf_string.h"
22 #include "core/fpdfapi/parser/fpdf_parser_decode.h" 22 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
23 #include "core/fpdfapi/parser/fpdf_parser_utility.h" 23 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
24 #include "core/fxcrt/fx_ext.h" 24 #include "core/fxcrt/fx_ext.h"
25 #include "third_party/base/numerics/safe_math.h" 25 #include "third_party/base/numerics/safe_math.h"
26 26
27 namespace { 27 namespace {
28 28
29 struct SearchTagRecord { 29 enum class ReadStatus { Normal, Backslash, Octal, FinishOctal, CarriageReturn };
30 CFX_ByteStringC m_bsTag;
31 FX_STRSIZE m_Offset;
32 };
33 30
34 } // namespace 31 } // namespace
35 32
36 // static 33 // static
37 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0; 34 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
38 35
39 CPDF_SyntaxParser::CPDF_SyntaxParser() 36 CPDF_SyntaxParser::CPDF_SyntaxParser()
40 : CPDF_SyntaxParser(CFX_WeakPtr<CFX_ByteStringPool>()) {} 37 : CPDF_SyntaxParser(CFX_WeakPtr<CFX_ByteStringPool>()) {}
41 38
42 CPDF_SyntaxParser::CPDF_SyntaxParser( 39 CPDF_SyntaxParser::CPDF_SyntaxParser(
43 const CFX_WeakPtr<CFX_ByteStringPool>& pPool) 40 const CFX_WeakPtr<CFX_ByteStringPool>& pPool)
44 : m_MetadataObjnum(0), 41 : m_MetadataObjnum(0),
45 m_pFileAccess(nullptr), 42 m_pFileAccess(nullptr),
46 m_pFileBuf(nullptr), 43 m_pFileBuf(nullptr),
47 m_BufSize(CPDF_ModuleMgr::kFileBufSize), 44 m_BufSize(CPDF_ModuleMgr::kFileBufSize),
48 m_pPool(pPool) {} 45 m_pPool(pPool) {}
49 46
50 CPDF_SyntaxParser::~CPDF_SyntaxParser() { 47 CPDF_SyntaxParser::~CPDF_SyntaxParser() {
51 FX_Free(m_pFileBuf); 48 FX_Free(m_pFileBuf);
52 } 49 }
53 50
54 bool CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) { 51 bool CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
55 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); 52 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
56 m_Pos = pos; 53 m_Pos = pos;
57 return GetNextChar(ch); 54 return GetNextChar(ch);
58 } 55 }
59 56
57 bool CPDF_SyntaxParser::ReadChar(FX_FILESIZE read_pos, uint32_t read_size) {
58 if (static_cast<FX_FILESIZE>(read_pos + read_size) > m_FileLen) {
59 if (m_FileLen < static_cast<FX_FILESIZE>(read_size)) {
60 read_pos = 0;
61 read_size = static_cast<uint32_t>(m_FileLen);
62 } else {
63 read_pos = m_FileLen - read_size;
64 }
65 }
66 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
67 return false;
68
69 m_BufOffset = read_pos;
70 return true;
71 }
72
60 bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) { 73 bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
61 FX_FILESIZE pos = m_Pos + m_HeaderOffset; 74 FX_FILESIZE pos = m_Pos + m_HeaderOffset;
62 if (pos >= m_FileLen) 75 if (pos >= m_FileLen)
63 return false; 76 return false;
64 77
65 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { 78 if (CheckPosition(pos)) {
66 FX_FILESIZE read_pos = pos; 79 FX_FILESIZE read_pos = pos;
67 uint32_t read_size = m_BufSize; 80 uint32_t read_size = m_BufSize;
68 if ((FX_FILESIZE)read_size > m_FileLen) 81 read_size = std::min(read_size, static_cast<uint32_t>(m_FileLen));
69 read_size = (uint32_t)m_FileLen; 82 if (!ReadChar(read_pos, read_size))
70
71 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
72 if (m_FileLen < (FX_FILESIZE)read_size) {
73 read_pos = 0;
74 read_size = (uint32_t)m_FileLen;
75 } else {
76 read_pos = m_FileLen - read_size;
77 }
78 }
79
80 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
81 return false; 83 return false;
82
83 m_BufOffset = read_pos;
84 } 84 }
85 ch = m_pFileBuf[pos - m_BufOffset]; 85 ch = m_pFileBuf[pos - m_BufOffset];
86 m_Pos++; 86 m_Pos++;
87 return true; 87 return true;
88 } 88 }
89 89
90 bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) { 90 bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
91 pos += m_HeaderOffset; 91 pos += m_HeaderOffset;
92 if (pos >= m_FileLen) 92 if (pos >= m_FileLen)
93 return false; 93 return false;
94 94
95 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { 95 if (CheckPosition(pos)) {
96 FX_FILESIZE read_pos; 96 FX_FILESIZE read_pos;
97 if (pos < (FX_FILESIZE)m_BufSize) 97 if (pos < static_cast<FX_FILESIZE>(m_BufSize))
98 read_pos = 0; 98 read_pos = 0;
99 else 99 else
100 read_pos = pos - m_BufSize + 1; 100 read_pos = pos - m_BufSize + 1;
101
102 uint32_t read_size = m_BufSize; 101 uint32_t read_size = m_BufSize;
103 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { 102 if (!ReadChar(read_pos, read_size))
104 if (m_FileLen < (FX_FILESIZE)read_size) {
105 read_pos = 0;
106 read_size = (uint32_t)m_FileLen;
107 } else {
108 read_pos = m_FileLen - read_size;
109 }
110 }
111
112 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
113 return false; 103 return false;
114
115 m_BufOffset = read_pos;
116 } 104 }
117 ch = m_pFileBuf[pos - m_BufOffset]; 105 ch = m_pFileBuf[pos - m_BufOffset];
118 return true; 106 return true;
119 } 107 }
120 108
121 bool CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) { 109 bool CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) {
122 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) 110 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
123 return false; 111 return false;
124 m_Pos += size; 112 m_Pos += size;
125 return true; 113 return true;
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
208 } 196 }
209 } 197 }
210 198
211 CFX_ByteString CPDF_SyntaxParser::ReadString() { 199 CFX_ByteString CPDF_SyntaxParser::ReadString() {
212 uint8_t ch; 200 uint8_t ch;
213 if (!GetNextChar(ch)) 201 if (!GetNextChar(ch))
214 return CFX_ByteString(); 202 return CFX_ByteString();
215 203
216 CFX_ByteTextBuf buf; 204 CFX_ByteTextBuf buf;
217 int32_t parlevel = 0; 205 int32_t parlevel = 0;
218 int32_t status = 0; 206 ReadStatus status = ReadStatus::Normal;
219 int32_t iEscCode = 0; 207 int32_t iEscCode = 0;
220 while (1) { 208 while (1) {
221 switch (status) { 209 switch (status) {
222 case 0: 210 case ReadStatus::Normal:
223 if (ch == ')') { 211 if (ch == ')') {
224 if (parlevel == 0) { 212 if (parlevel == 0)
225 return buf.MakeString(); 213 return buf.MakeString();
226 }
227 parlevel--; 214 parlevel--;
228 buf.AppendChar(')');
229 } else if (ch == '(') { 215 } else if (ch == '(') {
230 parlevel++; 216 parlevel++;
231 buf.AppendChar('('); 217 }
232 } else if (ch == '\\') { 218 if (ch == '\\')
233 status = 1; 219 status = ReadStatus::Backslash;
234 } else { 220 else
235 buf.AppendChar(ch); 221 buf.AppendChar(ch);
236 }
237 break; 222 break;
238 case 1: 223 case ReadStatus::Backslash:
239 if (ch >= '0' && ch <= '7') { 224 if (ch >= '0' && ch <= '7') {
240 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); 225 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
241 status = 2; 226 status = ReadStatus::Octal;
242 break; 227 break;
243 } 228 }
244 229
245 if (ch == 'n') { 230 if (ch == 'n') {
246 buf.AppendChar('\n'); 231 buf.AppendChar('\n');
247 } else if (ch == 'r') { 232 } else if (ch == 'r') {
248 buf.AppendChar('\r'); 233 buf.AppendChar('\r');
249 } else if (ch == 't') { 234 } else if (ch == 't') {
250 buf.AppendChar('\t'); 235 buf.AppendChar('\t');
251 } else if (ch == 'b') { 236 } else if (ch == 'b') {
252 buf.AppendChar('\b'); 237 buf.AppendChar('\b');
253 } else if (ch == 'f') { 238 } else if (ch == 'f') {
254 buf.AppendChar('\f'); 239 buf.AppendChar('\f');
255 } else if (ch == '\r') { 240 } else if (ch == '\r') {
256 status = 4; 241 status = ReadStatus::CarriageReturn;
257 break; 242 break;
258 } else if (ch != '\n') { 243 } else if (ch != '\n') {
259 buf.AppendChar(ch); 244 buf.AppendChar(ch);
260 } 245 }
261 status = 0; 246 status = ReadStatus::Normal;
262 break; 247 break;
263 case 2: 248 case ReadStatus::Octal:
264 if (ch >= '0' && ch <= '7') { 249 if (ch >= '0' && ch <= '7') {
265 iEscCode = 250 iEscCode =
266 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); 251 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
267 status = 3; 252 status = ReadStatus::FinishOctal;
268 } else { 253 } else {
269 buf.AppendChar(iEscCode); 254 buf.AppendChar(iEscCode);
270 status = 0; 255 status = ReadStatus::Normal;
271 continue; 256 continue;
272 } 257 }
273 break; 258 break;
274 case 3: 259 case ReadStatus::FinishOctal:
260 status = ReadStatus::Normal;
275 if (ch >= '0' && ch <= '7') { 261 if (ch >= '0' && ch <= '7') {
276 iEscCode = 262 iEscCode =
277 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); 263 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
278 buf.AppendChar(iEscCode); 264 buf.AppendChar(iEscCode);
279 status = 0;
280 } else { 265 } else {
281 buf.AppendChar(iEscCode); 266 buf.AppendChar(iEscCode);
282 status = 0;
283 continue; 267 continue;
284 } 268 }
285 break; 269 break;
286 case 4: 270 case ReadStatus::CarriageReturn:
287 status = 0; 271 status = ReadStatus::Normal;
288 if (ch != '\n') 272 if (ch != '\n')
289 continue; 273 continue;
290 break; 274 break;
291 } 275 }
292 276
293 if (!GetNextChar(ch)) 277 if (!GetNextChar(ch))
294 break; 278 break;
295 } 279 }
296 280
297 GetNextChar(ch); 281 GetNextChar(ch);
(...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after
641 len = pLenObj->GetInteger(); 625 len = pLenObj->GetInteger();
642 626
643 // Locate the start of stream. 627 // Locate the start of stream.
644 ToNextLine(); 628 ToNextLine();
645 FX_FILESIZE streamStartPos = m_Pos; 629 FX_FILESIZE streamStartPos = m_Pos;
646 630
647 const CFX_ByteStringC kEndStreamStr("endstream"); 631 const CFX_ByteStringC kEndStreamStr("endstream");
648 const CFX_ByteStringC kEndObjStr("endobj"); 632 const CFX_ByteStringC kEndObjStr("endobj");
649 633
650 CPDF_CryptoHandler* pCryptoHandler = 634 CPDF_CryptoHandler* pCryptoHandler =
651 objnum == (uint32_t)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get(); 635 objnum == m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
652 if (!pCryptoHandler) { 636 if (!pCryptoHandler) {
653 bool bSearchForKeyword = true; 637 bool bSearchForKeyword = true;
654 if (len >= 0) { 638 if (len >= 0) {
655 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos; 639 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
656 pos += len; 640 pos += len;
657 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) 641 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
658 m_Pos = pos.ValueOrDie(); 642 m_Pos = pos.ValueOrDie();
659 643
660 m_Pos += ReadEOLMarkers(m_Pos); 644 m_Pos += ReadEOLMarkers(m_Pos);
661 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1); 645 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after
785 void CPDF_SyntaxParser::InitParser(IFX_SeekableReadStream* pFileAccess, 769 void CPDF_SyntaxParser::InitParser(IFX_SeekableReadStream* pFileAccess,
786 uint32_t HeaderOffset) { 770 uint32_t HeaderOffset) {
787 FX_Free(m_pFileBuf); 771 FX_Free(m_pFileBuf);
788 772
789 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize); 773 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
790 m_HeaderOffset = HeaderOffset; 774 m_HeaderOffset = HeaderOffset;
791 m_FileLen = pFileAccess->GetSize(); 775 m_FileLen = pFileAccess->GetSize();
792 m_Pos = 0; 776 m_Pos = 0;
793 m_pFileAccess = pFileAccess; 777 m_pFileAccess = pFileAccess;
794 m_BufOffset = 0; 778 m_BufOffset = 0;
795 pFileAccess->ReadBlock( 779 pFileAccess->ReadBlock(m_pFileBuf, 0,
796 m_pFileBuf, 0, 780 std::min(m_BufSize, static_cast<uint32_t>(m_FileLen)));
797 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
798 } 781 }
799 782
800 uint32_t CPDF_SyntaxParser::GetDirectNum() { 783 uint32_t CPDF_SyntaxParser::GetDirectNum() {
801 bool bIsNumber; 784 bool bIsNumber;
802 GetNextWordInternal(&bIsNumber); 785 GetNextWordInternal(&bIsNumber);
803 if (!bIsNumber) 786 if (!bIsNumber)
804 return 0; 787 return 0;
805 788
806 m_WordBuffer[m_WordSize] = 0; 789 m_WordBuffer[m_WordSize] = 0;
807 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer)); 790 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
898 pos--; 881 pos--;
899 } 882 }
900 883
901 if (pos < 0) 884 if (pos < 0)
902 return false; 885 return false;
903 } 886 }
904 887
905 return false; 888 return false;
906 } 889 }
907 890
908 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
909 bool bWholeWord,
910 FX_FILESIZE limit) {
911 int32_t ntags = 1;
912 for (int i = 0; i < tags.GetLength(); ++i) {
913 if (tags[i] == 0)
914 ++ntags;
915 }
916
917 // Ensure that the input byte string happens to be nul-terminated. This
918 // need not be the case, but the loop below uses this guarantee to put
919 // the last pattern into the vector.
920 ASSERT(tags[tags.GetLength()] == 0);
921 std::vector<SearchTagRecord> patterns(ntags);
922 uint32_t start = 0;
923 uint32_t itag = 0;
924 uint32_t max_len = 0;
925 for (int i = 0; i <= tags.GetLength(); ++i) {
926 if (tags[i] == 0) {
927 uint32_t len = i - start;
928 max_len = std::max(len, max_len);
929 patterns[itag].m_bsTag = tags.Mid(start, len);
930 patterns[itag].m_Offset = 0;
931 start = i + 1;
932 ++itag;
933 }
934 }
935
936 const FX_FILESIZE pos_limit = m_Pos + limit;
937 for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) {
938 uint8_t byte;
939 if (!GetCharAt(pos, byte))
940 break;
941
942 for (int i = 0; i < ntags; ++i) {
943 SearchTagRecord& pat = patterns[i];
944 if (pat.m_bsTag[pat.m_Offset] != byte) {
945 pat.m_Offset = (pat.m_bsTag[0] == byte) ? 1 : 0;
946 continue;
947 }
948
949 ++pat.m_Offset;
950 if (pat.m_Offset != pat.m_bsTag.GetLength())
951 continue;
952
953 if (!bWholeWord || IsWholeWord(pos - pat.m_bsTag.GetLength(), limit,
954 pat.m_bsTag, false)) {
955 return i;
956 }
957
958 pat.m_Offset = (pat.m_bsTag[0] == byte) ? 1 : 0;
959 }
960 }
961 return -1;
962 }
963
964 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag, 891 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
965 FX_FILESIZE limit) { 892 FX_FILESIZE limit) {
966 int32_t taglen = tag.GetLength(); 893 int32_t taglen = tag.GetLength();
967 int32_t match = 0; 894 int32_t match = 0;
968 limit += m_Pos; 895 limit += m_Pos;
969 FX_FILESIZE startpos = m_Pos; 896 FX_FILESIZE startpos = m_Pos;
970 897
971 while (1) { 898 while (1) {
972 uint8_t ch; 899 uint8_t ch;
973 if (!GetNextChar(ch)) 900 if (!GetNextChar(ch))
(...skipping 14 matching lines...) Expand all
988 } 915 }
989 916
990 void CPDF_SyntaxParser::SetEncrypt( 917 void CPDF_SyntaxParser::SetEncrypt(
991 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) { 918 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {
992 m_pCryptoHandler = std::move(pCryptoHandler); 919 m_pCryptoHandler = std::move(pCryptoHandler);
993 } 920 }
994 921
995 CFX_ByteString CPDF_SyntaxParser::MaybeIntern(const CFX_ByteString& str) { 922 CFX_ByteString CPDF_SyntaxParser::MaybeIntern(const CFX_ByteString& str) {
996 return m_pPool ? m_pPool->Intern(str) : str; 923 return m_pPool ? m_pPool->Intern(str) : str;
997 } 924 }
OLDNEW
« core/fpdfapi/parser/cpdf_syntax_parser.h ('K') | « core/fpdfapi/parser/cpdf_syntax_parser.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698