Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(95)

Side by Side Diff: core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp

Issue 1774753002: Split CPDF_SyntaxParser into its own named .cpp/.h files. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Copyright 2016. Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/include/fpdfapi/fpdf_parser.h" 7 #include "core/include/fpdfapi/fpdf_parser.h"
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <memory> 10 #include <memory>
11 #include <set> 11 #include <set>
12 #include <utility> 12 #include <utility>
13 #include <vector> 13 #include <vector>
14 14
15 #include "core/include/fpdfapi/fpdf_module.h" 15 #include "core/include/fpdfapi/fpdf_module.h"
16 #include "core/include/fpdfapi/fpdf_page.h" 16 #include "core/include/fpdfapi/fpdf_page.h"
17 #include "core/include/fxcrt/fx_ext.h" 17 #include "core/include/fxcrt/fx_ext.h"
18 #include "core/include/fxcrt/fx_safe_types.h" 18 #include "core/include/fxcrt/fx_safe_types.h"
19 #include "core/src/fpdfapi/fpdf_page/pageint.h" 19 #include "core/src/fpdfapi/fpdf_page/pageint.h"
20 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
20 #include "core/src/fpdfapi/fpdf_parser/parser_int.h" 21 #include "core/src/fpdfapi/fpdf_parser/parser_int.h"
21 #include "third_party/base/stl_util.h" 22 #include "third_party/base/stl_util.h"
22 23
23 namespace { 24 namespace {
24 25
25 // A limit on the size of the xref table. Theoretical limits are higher, but 26 // A limit on the size of the xref table. Theoretical limits are higher, but
26 // this may be large enough in practice. 27 // this may be large enough in practice.
27 const int32_t kMaxXRefSize = 1048576; 28 const int32_t kMaxXRefSize = 1048576;
28 29
29 // A limit on the maximum object number in the xref table. Theoretical limits 30 // A limit on the maximum object number in the xref table. Theoretical limits
30 // are higher, but this may be large enough in practice. 31 // are higher, but this may be large enough in practice.
31 const FX_DWORD kMaxObjectNumber = 1048576; 32 const FX_DWORD kMaxObjectNumber = 1048576;
32 33
33 struct SearchTagRecord {
34 const char* m_pTag;
35 FX_DWORD m_Len;
36 FX_DWORD m_Offset;
37 };
38
39 int32_t GetHeaderOffset(IFX_FileRead* pFile) { 34 int32_t GetHeaderOffset(IFX_FileRead* pFile) {
40 // TODO(dsinclair): This is a complicated way of saying %PDF, simplify? 35 // TODO(dsinclair): This is a complicated way of saying %PDF, simplify?
41 const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025); 36 const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
42 37
43 const size_t kBufSize = 4; 38 const size_t kBufSize = 4;
44 uint8_t buf[kBufSize]; 39 uint8_t buf[kBufSize];
45 int32_t offset = 0; 40 int32_t offset = 0;
46 while (offset <= 1024) { 41 while (offset <= 1024) {
47 if (!pFile->ReadBlock(buf, offset, kBufSize)) 42 if (!pFile->ReadBlock(buf, offset, kBufSize))
48 return -1; 43 return -1;
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
91 } 86 }
92 87
93 CPDF_Parser::CPDF_Parser() 88 CPDF_Parser::CPDF_Parser()
94 : m_pDocument(nullptr), 89 : m_pDocument(nullptr),
95 m_bOwnFileRead(true), 90 m_bOwnFileRead(true),
96 m_FileVersion(0), 91 m_FileVersion(0),
97 m_pTrailer(nullptr), 92 m_pTrailer(nullptr),
98 m_pEncryptDict(nullptr), 93 m_pEncryptDict(nullptr),
99 m_pLinearized(nullptr), 94 m_pLinearized(nullptr),
100 m_dwFirstPageNo(0), 95 m_dwFirstPageNo(0),
101 m_dwXrefStartObjNum(0) {} 96 m_dwXrefStartObjNum(0) {
97 m_pSyntax.reset(new CPDF_SyntaxParser);
98 }
102 99
103 CPDF_Parser::~CPDF_Parser() { 100 CPDF_Parser::~CPDF_Parser() {
104 CloseParser(); 101 CloseParser();
105 } 102 }
106 103
107 FX_DWORD CPDF_Parser::GetLastObjNum() const { 104 FX_DWORD CPDF_Parser::GetLastObjNum() const {
108 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first; 105 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
109 } 106 }
110 107
111 bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const { 108 bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const {
(...skipping 19 matching lines...) Expand all
131 128
132 bool CPDF_Parser::IsObjectFreeOrNull(FX_DWORD objnum) const { 129 bool CPDF_Parser::IsObjectFreeOrNull(FX_DWORD objnum) const {
133 uint8_t type = GetObjectType(objnum); 130 uint8_t type = GetObjectType(objnum);
134 return type == 0 || type == 255; 131 return type == 0 || type == 255;
135 } 132 }
136 133
137 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) { 134 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
138 m_pEncryptDict = pDict; 135 m_pEncryptDict = pDict;
139 } 136 }
140 137
138 CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() {
139 return m_pSyntax->m_pCryptoHandler.get();
140 }
141
142 IFX_FileRead* CPDF_Parser::GetFileAccess() const {
143 return m_pSyntax->m_pFileAccess;
144 }
145
141 void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) { 146 void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) {
142 if (objnum == 0) { 147 if (objnum == 0) {
143 m_ObjectInfo.clear(); 148 m_ObjectInfo.clear();
144 return; 149 return;
145 } 150 }
146 151
147 auto it = m_ObjectInfo.lower_bound(objnum); 152 auto it = m_ObjectInfo.lower_bound(objnum);
148 while (it != m_ObjectInfo.end()) { 153 while (it != m_ObjectInfo.end()) {
149 auto saved_it = it++; 154 auto saved_it = it++;
150 m_ObjectInfo.erase(saved_it); 155 m_ObjectInfo.erase(saved_it);
151 } 156 }
152 157
153 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1)) 158 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
154 m_ObjectInfo[objnum - 1].pos = 0; 159 m_ObjectInfo[objnum - 1].pos = 0;
155 } 160 }
156 161
157 void CPDF_Parser::CloseParser() { 162 void CPDF_Parser::CloseParser() {
158 m_bVersionUpdated = FALSE; 163 m_bVersionUpdated = FALSE;
159 delete m_pDocument; 164 delete m_pDocument;
160 m_pDocument = nullptr; 165 m_pDocument = nullptr;
161 166
162 if (m_pTrailer) { 167 if (m_pTrailer) {
163 m_pTrailer->Release(); 168 m_pTrailer->Release();
164 m_pTrailer = nullptr; 169 m_pTrailer = nullptr;
165 } 170 }
166 ReleaseEncryptHandler(); 171 ReleaseEncryptHandler();
167 SetEncryptDictionary(nullptr); 172 SetEncryptDictionary(nullptr);
168 173
169 if (m_bOwnFileRead && m_Syntax.m_pFileAccess) { 174 if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) {
170 m_Syntax.m_pFileAccess->Release(); 175 m_pSyntax->m_pFileAccess->Release();
171 m_Syntax.m_pFileAccess = nullptr; 176 m_pSyntax->m_pFileAccess = nullptr;
172 } 177 }
173 178
174 m_ObjectStreamMap.clear(); 179 m_ObjectStreamMap.clear();
175 m_ObjCache.clear(); 180 m_ObjCache.clear();
176 m_SortedOffset.clear(); 181 m_SortedOffset.clear();
177 m_ObjectInfo.clear(); 182 m_ObjectInfo.clear();
178 183
179 int32_t iLen = m_Trailers.GetSize(); 184 int32_t iLen = m_Trailers.GetSize();
180 for (int32_t i = 0; i < iLen; ++i) { 185 for (int32_t i = 0; i < iLen; ++i) {
181 if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i)) 186 if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i))
(...skipping 13 matching lines...) Expand all
195 m_bXRefStream = FALSE; 200 m_bXRefStream = FALSE;
196 m_LastXRefOffset = 0; 201 m_LastXRefOffset = 0;
197 m_bOwnFileRead = true; 202 m_bOwnFileRead = true;
198 203
199 int32_t offset = GetHeaderOffset(pFileAccess); 204 int32_t offset = GetHeaderOffset(pFileAccess);
200 if (offset == -1) { 205 if (offset == -1) {
201 if (pFileAccess) 206 if (pFileAccess)
202 pFileAccess->Release(); 207 pFileAccess->Release();
203 return FORMAT_ERROR; 208 return FORMAT_ERROR;
204 } 209 }
205 m_Syntax.InitParser(pFileAccess, offset); 210 m_pSyntax->InitParser(pFileAccess, offset);
206 211
207 uint8_t ch; 212 uint8_t ch;
208 if (!m_Syntax.GetCharAt(5, ch)) 213 if (!m_pSyntax->GetCharAt(5, ch))
209 return FORMAT_ERROR; 214 return FORMAT_ERROR;
210 if (std::isdigit(ch)) 215 if (std::isdigit(ch))
211 m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10; 216 m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10;
212 217
213 if (!m_Syntax.GetCharAt(7, ch)) 218 if (!m_pSyntax->GetCharAt(7, ch))
214 return FORMAT_ERROR; 219 return FORMAT_ERROR;
215 if (std::isdigit(ch)) 220 if (std::isdigit(ch))
216 m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); 221 m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
217 222
218 if (m_Syntax.m_FileLen < m_Syntax.m_HeaderOffset + 9) 223 if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9)
219 return FORMAT_ERROR; 224 return FORMAT_ERROR;
220 225
221 m_Syntax.RestorePos(m_Syntax.m_FileLen - m_Syntax.m_HeaderOffset - 9); 226 m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9);
222 m_pDocument = new CPDF_Document(this); 227 m_pDocument = new CPDF_Document(this);
223 228
224 FX_BOOL bXRefRebuilt = FALSE; 229 FX_BOOL bXRefRebuilt = FALSE;
225 if (m_Syntax.SearchWord("startxref", TRUE, FALSE, 4096)) { 230 if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) {
226 m_SortedOffset.insert(m_Syntax.SavePos()); 231 m_SortedOffset.insert(m_pSyntax->SavePos());
227 m_Syntax.GetKeyword(); 232 m_pSyntax->GetKeyword();
228 233
229 bool bNumber; 234 bool bNumber;
230 CFX_ByteString xrefpos_str = m_Syntax.GetNextWord(&bNumber); 235 CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber);
231 if (!bNumber) 236 if (!bNumber)
232 return FORMAT_ERROR; 237 return FORMAT_ERROR;
233 238
234 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str); 239 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
235 if (!LoadAllCrossRefV4(m_LastXRefOffset) && 240 if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
236 !LoadAllCrossRefV5(m_LastXRefOffset)) { 241 !LoadAllCrossRefV5(m_LastXRefOffset)) {
237 if (!RebuildCrossRef()) 242 if (!RebuildCrossRef())
238 return FORMAT_ERROR; 243 return FORMAT_ERROR;
239 244
240 bXRefRebuilt = TRUE; 245 bXRefRebuilt = TRUE;
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
273 return FORMAT_ERROR; 278 return FORMAT_ERROR;
274 279
275 eRet = SetEncryptHandler(); 280 eRet = SetEncryptHandler();
276 if (eRet != SUCCESS) 281 if (eRet != SUCCESS)
277 return eRet; 282 return eRet;
278 } 283 }
279 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) { 284 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
280 CPDF_Reference* pMetadata = 285 CPDF_Reference* pMetadata =
281 ToReference(m_pDocument->GetRoot()->GetElement("Metadata")); 286 ToReference(m_pDocument->GetRoot()->GetElement("Metadata"));
282 if (pMetadata) 287 if (pMetadata)
283 m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum(); 288 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
284 } 289 }
285 return SUCCESS; 290 return SUCCESS;
286 } 291 }
287 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() { 292 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
288 ReleaseEncryptHandler(); 293 ReleaseEncryptHandler();
289 SetEncryptDictionary(nullptr); 294 SetEncryptDictionary(nullptr);
290 295
291 if (!m_pTrailer) 296 if (!m_pTrailer)
292 return FORMAT_ERROR; 297 return FORMAT_ERROR;
293 298
(...skipping 20 matching lines...) Expand all
314 return HANDLER_ERROR; 319 return HANDLER_ERROR;
315 320
316 if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) 321 if (!pSecurityHandler->OnInit(this, m_pEncryptDict))
317 return err; 322 return err;
318 323
319 m_pSecurityHandler = std::move(pSecurityHandler); 324 m_pSecurityHandler = std::move(pSecurityHandler);
320 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler( 325 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(
321 m_pSecurityHandler->CreateCryptoHandler()); 326 m_pSecurityHandler->CreateCryptoHandler());
322 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) 327 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get()))
323 return HANDLER_ERROR; 328 return HANDLER_ERROR;
324 m_Syntax.SetEncrypt(std::move(pCryptoHandler)); 329 m_pSyntax->SetEncrypt(std::move(pCryptoHandler));
325 } 330 }
326 return SUCCESS; 331 return SUCCESS;
327 } 332 }
328 333
329 void CPDF_Parser::ReleaseEncryptHandler() { 334 void CPDF_Parser::ReleaseEncryptHandler() {
330 m_Syntax.m_pCryptoHandler.reset(); 335 m_pSyntax->m_pCryptoHandler.reset();
331 m_pSecurityHandler.reset(); 336 m_pSecurityHandler.reset();
332 } 337 }
333 338
334 FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const { 339 FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const {
335 if (!IsValidObjectNumber(objnum)) 340 if (!IsValidObjectNumber(objnum))
336 return 0; 341 return 0;
337 342
338 if (GetObjectType(objnum) == 1) 343 if (GetObjectType(objnum) == 1)
339 return GetObjectPositionOrZero(objnum); 344 return GetObjectPositionOrZero(objnum);
340 345
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
447 452
448 for (size_t i = 1; i < CrossRefList.size(); ++i) { 453 for (size_t i = 1; i < CrossRefList.size(); ++i) {
449 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) 454 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
450 return FALSE; 455 return FALSE;
451 } 456 }
452 return TRUE; 457 return TRUE;
453 } 458 }
454 459
455 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, 460 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
456 FX_DWORD dwObjCount) { 461 FX_DWORD dwObjCount) {
457 FX_FILESIZE dwStartPos = pos - m_Syntax.m_HeaderOffset; 462 FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset;
458 463
459 m_Syntax.RestorePos(dwStartPos); 464 m_pSyntax->RestorePos(dwStartPos);
460 m_SortedOffset.insert(pos); 465 m_SortedOffset.insert(pos);
461 466
462 FX_DWORD start_objnum = 0; 467 FX_DWORD start_objnum = 0;
463 FX_DWORD count = dwObjCount; 468 FX_DWORD count = dwObjCount;
464 FX_FILESIZE SavedPos = m_Syntax.SavePos(); 469 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
465 470
466 const int32_t recordsize = 20; 471 const int32_t recordsize = 20;
467 std::vector<char> buf(1024 * recordsize + 1); 472 std::vector<char> buf(1024 * recordsize + 1);
468 buf[1024 * recordsize] = '\0'; 473 buf[1024 * recordsize] = '\0';
469 474
470 int32_t nBlocks = count / 1024 + 1; 475 int32_t nBlocks = count / 1024 + 1;
471 for (int32_t block = 0; block < nBlocks; block++) { 476 for (int32_t block = 0; block < nBlocks; block++) {
472 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; 477 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
473 FX_DWORD dwReadSize = block_size * recordsize; 478 FX_DWORD dwReadSize = block_size * recordsize;
474 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_Syntax.m_FileLen) 479 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen)
475 return FALSE; 480 return FALSE;
476 481
477 if (!m_Syntax.ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), 482 if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
478 dwReadSize)) { 483 dwReadSize)) {
479 return FALSE; 484 return FALSE;
480 } 485 }
481 486
482 for (int32_t i = 0; i < block_size; i++) { 487 for (int32_t i = 0; i < block_size; i++) {
483 FX_DWORD objnum = start_objnum + block * 1024 + i; 488 FX_DWORD objnum = start_objnum + block * 1024 + i;
484 char* pEntry = &buf[i * recordsize]; 489 char* pEntry = &buf[i * recordsize];
485 if (pEntry[17] == 'f') { 490 if (pEntry[17] == 'f') {
486 m_ObjectInfo[objnum].pos = 0; 491 m_ObjectInfo[objnum].pos = 0;
487 m_ObjectInfo[objnum].type = 0; 492 m_ObjectInfo[objnum].type = 0;
488 } else { 493 } else {
489 int32_t offset = FXSYS_atoi(pEntry); 494 int32_t offset = FXSYS_atoi(pEntry);
490 if (offset == 0) { 495 if (offset == 0) {
491 for (int32_t c = 0; c < 10; c++) { 496 for (int32_t c = 0; c < 10; c++) {
492 if (!std::isdigit(pEntry[c])) 497 if (!std::isdigit(pEntry[c]))
493 return FALSE; 498 return FALSE;
494 } 499 }
495 } 500 }
496 501
497 m_ObjectInfo[objnum].pos = offset; 502 m_ObjectInfo[objnum].pos = offset;
498 int32_t version = FXSYS_atoi(pEntry + 11); 503 int32_t version = FXSYS_atoi(pEntry + 11);
499 if (version >= 1) 504 if (version >= 1)
500 m_bVersionUpdated = TRUE; 505 m_bVersionUpdated = TRUE;
501 506
502 m_ObjectInfo[objnum].gennum = version; 507 m_ObjectInfo[objnum].gennum = version;
503 if (m_ObjectInfo[objnum].pos < m_Syntax.m_FileLen) 508 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
504 m_SortedOffset.insert(m_ObjectInfo[objnum].pos); 509 m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
505 510
506 m_ObjectInfo[objnum].type = 1; 511 m_ObjectInfo[objnum].type = 1;
507 } 512 }
508 } 513 }
509 } 514 }
510 m_Syntax.RestorePos(SavedPos + count * recordsize); 515 m_pSyntax->RestorePos(SavedPos + count * recordsize);
511 return TRUE; 516 return TRUE;
512 } 517 }
513 518
514 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, 519 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
515 FX_FILESIZE streampos, 520 FX_FILESIZE streampos,
516 FX_BOOL bSkip) { 521 FX_BOOL bSkip) {
517 m_Syntax.RestorePos(pos); 522 m_pSyntax->RestorePos(pos);
518 if (m_Syntax.GetKeyword() != "xref") 523 if (m_pSyntax->GetKeyword() != "xref")
519 return false; 524 return false;
520 525
521 m_SortedOffset.insert(pos); 526 m_SortedOffset.insert(pos);
522 if (streampos) 527 if (streampos)
523 m_SortedOffset.insert(streampos); 528 m_SortedOffset.insert(streampos);
524 529
525 while (1) { 530 while (1) {
526 FX_FILESIZE SavedPos = m_Syntax.SavePos(); 531 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
527 bool bIsNumber; 532 bool bIsNumber;
528 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber); 533 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
529 if (word.IsEmpty()) 534 if (word.IsEmpty())
530 return false; 535 return false;
531 536
532 if (!bIsNumber) { 537 if (!bIsNumber) {
533 m_Syntax.RestorePos(SavedPos); 538 m_pSyntax->RestorePos(SavedPos);
534 break; 539 break;
535 } 540 }
536 541
537 FX_DWORD start_objnum = FXSYS_atoui(word); 542 FX_DWORD start_objnum = FXSYS_atoui(word);
538 if (start_objnum >= kMaxObjectNumber) 543 if (start_objnum >= kMaxObjectNumber)
539 return false; 544 return false;
540 545
541 FX_DWORD count = m_Syntax.GetDirectNum(); 546 FX_DWORD count = m_pSyntax->GetDirectNum();
542 m_Syntax.ToNextWord(); 547 m_pSyntax->ToNextWord();
543 SavedPos = m_Syntax.SavePos(); 548 SavedPos = m_pSyntax->SavePos();
544 const int32_t recordsize = 20; 549 const int32_t recordsize = 20;
545 550
546 m_dwXrefStartObjNum = start_objnum; 551 m_dwXrefStartObjNum = start_objnum;
547 if (!bSkip) { 552 if (!bSkip) {
548 std::vector<char> buf(1024 * recordsize + 1); 553 std::vector<char> buf(1024 * recordsize + 1);
549 buf[1024 * recordsize] = '\0'; 554 buf[1024 * recordsize] = '\0';
550 555
551 int32_t nBlocks = count / 1024 + 1; 556 int32_t nBlocks = count / 1024 + 1;
552 for (int32_t block = 0; block < nBlocks; block++) { 557 for (int32_t block = 0; block < nBlocks; block++) {
553 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; 558 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
554 m_Syntax.ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), 559 m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
555 block_size * recordsize); 560 block_size * recordsize);
556 561
557 for (int32_t i = 0; i < block_size; i++) { 562 for (int32_t i = 0; i < block_size; i++) {
558 FX_DWORD objnum = start_objnum + block * 1024 + i; 563 FX_DWORD objnum = start_objnum + block * 1024 + i;
559 char* pEntry = &buf[i * recordsize]; 564 char* pEntry = &buf[i * recordsize];
560 if (pEntry[17] == 'f') { 565 if (pEntry[17] == 'f') {
561 m_ObjectInfo[objnum].pos = 0; 566 m_ObjectInfo[objnum].pos = 0;
562 m_ObjectInfo[objnum].type = 0; 567 m_ObjectInfo[objnum].type = 0;
563 } else { 568 } else {
564 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry); 569 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
565 if (offset == 0) { 570 if (offset == 0) {
566 for (int32_t c = 0; c < 10; c++) { 571 for (int32_t c = 0; c < 10; c++) {
567 if (!std::isdigit(pEntry[c])) 572 if (!std::isdigit(pEntry[c]))
568 return false; 573 return false;
569 } 574 }
570 } 575 }
571 576
572 m_ObjectInfo[objnum].pos = offset; 577 m_ObjectInfo[objnum].pos = offset;
573 int32_t version = FXSYS_atoi(pEntry + 11); 578 int32_t version = FXSYS_atoi(pEntry + 11);
574 if (version >= 1) 579 if (version >= 1)
575 m_bVersionUpdated = TRUE; 580 m_bVersionUpdated = TRUE;
576 581
577 m_ObjectInfo[objnum].gennum = version; 582 m_ObjectInfo[objnum].gennum = version;
578 if (m_ObjectInfo[objnum].pos < m_Syntax.m_FileLen) 583 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
579 m_SortedOffset.insert(m_ObjectInfo[objnum].pos); 584 m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
580 585
581 m_ObjectInfo[objnum].type = 1; 586 m_ObjectInfo[objnum].type = 1;
582 } 587 }
583 } 588 }
584 } 589 }
585 } 590 }
586 m_Syntax.RestorePos(SavedPos + count * recordsize); 591 m_pSyntax->RestorePos(SavedPos + count * recordsize);
587 } 592 }
588 return !streampos || LoadCrossRefV5(&streampos, FALSE); 593 return !streampos || LoadCrossRefV5(&streampos, FALSE);
589 } 594 }
590 595
591 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) { 596 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
592 if (!LoadCrossRefV5(&xrefpos, TRUE)) 597 if (!LoadCrossRefV5(&xrefpos, TRUE))
593 return FALSE; 598 return FALSE;
594 599
595 std::set<FX_FILESIZE> seen_xrefpos; 600 std::set<FX_FILESIZE> seen_xrefpos;
596 while (xrefpos) { 601 while (xrefpos) {
(...skipping 21 matching lines...) Expand all
618 ParserState state = ParserState::kDefault; 623 ParserState state = ParserState::kDefault;
619 624
620 int32_t inside_index = 0; 625 int32_t inside_index = 0;
621 FX_DWORD objnum = 0; 626 FX_DWORD objnum = 0;
622 FX_DWORD gennum = 0; 627 FX_DWORD gennum = 0;
623 int32_t depth = 0; 628 int32_t depth = 0;
624 629
625 const FX_DWORD kBufferSize = 4096; 630 const FX_DWORD kBufferSize = 4096;
626 std::vector<uint8_t> buffer(kBufferSize); 631 std::vector<uint8_t> buffer(kBufferSize);
627 632
628 FX_FILESIZE pos = m_Syntax.m_HeaderOffset; 633 FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;
629 FX_FILESIZE start_pos = 0; 634 FX_FILESIZE start_pos = 0;
630 FX_FILESIZE start_pos1 = 0; 635 FX_FILESIZE start_pos1 = 0;
631 FX_FILESIZE last_obj = -1; 636 FX_FILESIZE last_obj = -1;
632 FX_FILESIZE last_xref = -1; 637 FX_FILESIZE last_xref = -1;
633 FX_FILESIZE last_trailer = -1; 638 FX_FILESIZE last_trailer = -1;
634 639
635 while (pos < m_Syntax.m_FileLen) { 640 while (pos < m_pSyntax->m_FileLen) {
636 const FX_FILESIZE saved_pos = pos; 641 const FX_FILESIZE saved_pos = pos;
637 bool bOverFlow = false; 642 bool bOverFlow = false;
638 FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize); 643 FX_DWORD size =
639 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size)) 644 std::min((FX_DWORD)(m_pSyntax->m_FileLen - pos), kBufferSize);
645 if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size))
640 break; 646 break;
641 647
642 for (FX_DWORD i = 0; i < size; i++) { 648 for (FX_DWORD i = 0; i < size; i++) {
643 uint8_t byte = buffer[i]; 649 uint8_t byte = buffer[i];
644 switch (state) { 650 switch (state) {
645 case ParserState::kDefault: 651 case ParserState::kDefault:
646 if (PDFCharIsWhitespace(byte)) { 652 if (PDFCharIsWhitespace(byte)) {
647 state = ParserState::kWhitespace; 653 state = ParserState::kWhitespace;
648 } else if (std::isdigit(byte)) { 654 } else if (std::isdigit(byte)) {
649 --i; 655 --i;
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
753 case 2: 759 case 2:
754 if (byte != 'j') { 760 if (byte != 'j') {
755 --i; 761 --i;
756 state = ParserState::kDefault; 762 state = ParserState::kDefault;
757 } else { 763 } else {
758 inside_index++; 764 inside_index++;
759 } 765 }
760 break; 766 break;
761 case 3: 767 case 3:
762 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { 768 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
763 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset; 769 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;
764 m_SortedOffset.insert(obj_pos); 770 m_SortedOffset.insert(obj_pos);
765 last_obj = start_pos; 771 last_obj = start_pos;
766 FX_FILESIZE obj_end = 0; 772 FX_FILESIZE obj_end = 0;
767 CPDF_Object* pObject = ParseIndirectObjectAtByStrict( 773 CPDF_Object* pObject = ParseIndirectObjectAtByStrict(
768 m_pDocument, obj_pos, objnum, &obj_end); 774 m_pDocument, obj_pos, objnum, &obj_end);
769 if (CPDF_Stream* pStream = ToStream(pObject)) { 775 if (CPDF_Stream* pStream = ToStream(pObject)) {
770 if (CPDF_Dictionary* pDict = pStream->GetDict()) { 776 if (CPDF_Dictionary* pDict = pStream->GetDict()) {
771 if ((pDict->KeyExist("Type")) && 777 if ((pDict->KeyExist("Type")) &&
772 (pDict->GetStringBy("Type") == "XRef" && 778 (pDict->GetStringBy("Type") == "XRef" &&
773 pDict->KeyExist("Size"))) { 779 pDict->KeyExist("Size"))) {
774 CPDF_Object* pRoot = pDict->GetElement("Root"); 780 CPDF_Object* pRoot = pDict->GetElement("Root");
775 if (pRoot && pRoot->GetDict() && 781 if (pRoot && pRoot->GetDict() &&
776 pRoot->GetDict()->GetElement("Pages")) { 782 pRoot->GetDict()->GetElement("Pages")) {
777 if (m_pTrailer) 783 if (m_pTrailer)
778 m_pTrailer->Release(); 784 m_pTrailer->Release();
779 m_pTrailer = ToDictionary(pDict->Clone()); 785 m_pTrailer = ToDictionary(pDict->Clone());
780 } 786 }
781 } 787 }
782 } 788 }
783 } 789 }
784 790
785 FX_FILESIZE offset = 0; 791 FX_FILESIZE offset = 0;
786 m_Syntax.RestorePos(obj_pos); 792 m_pSyntax->RestorePos(obj_pos);
787 offset = m_Syntax.FindTag("obj", 0); 793 offset = m_pSyntax->FindTag("obj", 0);
788 if (offset == -1) 794 if (offset == -1)
789 offset = 0; 795 offset = 0;
790 else 796 else
791 offset += 3; 797 offset += 3;
792 798
793 FX_FILESIZE nLen = obj_end - obj_pos - offset; 799 FX_FILESIZE nLen = obj_end - obj_pos - offset;
794 if ((FX_DWORD)nLen > size - i) { 800 if ((FX_DWORD)nLen > size - i) {
795 pos = obj_end + m_Syntax.m_HeaderOffset; 801 pos = obj_end + m_pSyntax->m_HeaderOffset;
796 bOverFlow = true; 802 bOverFlow = true;
797 } else { 803 } else {
798 i += (FX_DWORD)nLen; 804 i += (FX_DWORD)nLen;
799 } 805 }
800 806
801 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) && 807 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
802 m_ObjectInfo[objnum].pos) { 808 m_ObjectInfo[objnum].pos) {
803 if (pObject) { 809 if (pObject) {
804 FX_DWORD oldgen = GetObjectGenNum(objnum); 810 FX_DWORD oldgen = GetObjectGenNum(objnum);
805 m_ObjectInfo[objnum].pos = obj_pos; 811 m_ObjectInfo[objnum].pos = obj_pos;
(...skipping 13 matching lines...) Expand all
819 --i; 825 --i;
820 state = ParserState::kDefault; 826 state = ParserState::kDefault;
821 break; 827 break;
822 } 828 }
823 break; 829 break;
824 830
825 case ParserState::kTrailer: 831 case ParserState::kTrailer:
826 if (inside_index == 7) { 832 if (inside_index == 7) {
827 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { 833 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
828 last_trailer = pos + i - 7; 834 last_trailer = pos + i - 7;
829 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset); 835 m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset);
830 836
831 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, true); 837 CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true);
832 if (pObj) { 838 if (pObj) {
833 if (!pObj->IsDictionary() && !pObj->AsStream()) { 839 if (!pObj->IsDictionary() && !pObj->AsStream()) {
834 pObj->Release(); 840 pObj->Release();
835 } else { 841 } else {
836 CPDF_Stream* pStream = pObj->AsStream(); 842 CPDF_Stream* pStream = pObj->AsStream();
837 if (CPDF_Dictionary* pTrailer = 843 if (CPDF_Dictionary* pTrailer =
838 pStream ? pStream->GetDict() : pObj->AsDictionary()) { 844 pStream ? pStream->GetDict() : pObj->AsDictionary()) {
839 if (m_pTrailer) { 845 if (m_pTrailer) {
840 CPDF_Object* pRoot = pTrailer->GetElement("Root"); 846 CPDF_Object* pRoot = pTrailer->GetElement("Root");
841 CPDF_Reference* pRef = ToReference(pRoot); 847 CPDF_Reference* pRef = ToReference(pRoot);
(...skipping 17 matching lines...) Expand all
859 } 865 }
860 pObj->Release(); 866 pObj->Release();
861 } else { 867 } else {
862 if (pObj->IsStream()) { 868 if (pObj->IsStream()) {
863 m_pTrailer = ToDictionary(pTrailer->Clone()); 869 m_pTrailer = ToDictionary(pTrailer->Clone());
864 pObj->Release(); 870 pObj->Release();
865 } else { 871 } else {
866 m_pTrailer = pTrailer; 872 m_pTrailer = pTrailer;
867 } 873 }
868 874
869 FX_FILESIZE dwSavePos = m_Syntax.SavePos(); 875 FX_FILESIZE dwSavePos = m_pSyntax->SavePos();
870 CFX_ByteString strWord = m_Syntax.GetKeyword(); 876 CFX_ByteString strWord = m_pSyntax->GetKeyword();
871 if (!strWord.Compare("startxref")) { 877 if (!strWord.Compare("startxref")) {
872 bool bNumber; 878 bool bNumber;
873 CFX_ByteString bsOffset = 879 CFX_ByteString bsOffset =
874 m_Syntax.GetNextWord(&bNumber); 880 m_pSyntax->GetNextWord(&bNumber);
875 if (bNumber) 881 if (bNumber)
876 m_LastXRefOffset = FXSYS_atoi(bsOffset); 882 m_LastXRefOffset = FXSYS_atoi(bsOffset);
877 } 883 }
878 m_Syntax.RestorePos(dwSavePos); 884 m_pSyntax->RestorePos(dwSavePos);
879 } 885 }
880 } else { 886 } else {
881 pObj->Release(); 887 pObj->Release();
882 } 888 }
883 } 889 }
884 } 890 }
885 } 891 }
886 --i; 892 --i;
887 state = ParserState::kDefault; 893 state = ParserState::kDefault;
888 } else if (byte == "trailer"[inside_index]) { 894 } else if (byte == "trailer"[inside_index]) {
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
960 966
961 // If the position has not changed at all in a loop iteration, then break 967 // If the position has not changed at all in a loop iteration, then break
962 // out to prevent infinite looping. 968 // out to prevent infinite looping.
963 if (pos == saved_pos) 969 if (pos == saved_pos)
964 break; 970 break;
965 } 971 }
966 972
967 if (last_xref != -1 && last_xref > last_obj) 973 if (last_xref != -1 && last_xref > last_obj)
968 last_trailer = last_xref; 974 last_trailer = last_xref;
969 else if (last_trailer == -1 || last_xref < last_obj) 975 else if (last_trailer == -1 || last_xref < last_obj)
970 last_trailer = m_Syntax.m_FileLen; 976 last_trailer = m_pSyntax->m_FileLen;
971 977
972 m_SortedOffset.insert(last_trailer - m_Syntax.m_HeaderOffset); 978 m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset);
973 return m_pTrailer && !m_ObjectInfo.empty(); 979 return m_pTrailer && !m_ObjectInfo.empty();
974 } 980 }
975 981
976 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) { 982 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) {
977 CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0); 983 CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0);
978 if (!pObject) 984 if (!pObject)
979 return FALSE; 985 return FALSE;
980 986
981 if (m_pDocument) { 987 if (m_pDocument) {
982 FX_BOOL bInserted = FALSE; 988 FX_BOOL bInserted = FALSE;
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after
1159 1165
1160 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; 1166 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1161 auto it = m_SortedOffset.find(pos); 1167 auto it = m_SortedOffset.find(pos);
1162 if (it == m_SortedOffset.end()) 1168 if (it == m_SortedOffset.end())
1163 return TRUE; 1169 return TRUE;
1164 1170
1165 if (++it == m_SortedOffset.end()) 1171 if (++it == m_SortedOffset.end())
1166 return FALSE; 1172 return FALSE;
1167 1173
1168 FX_FILESIZE size = *it - pos; 1174 FX_FILESIZE size = *it - pos;
1169 FX_FILESIZE SavedPos = m_Syntax.SavePos(); 1175 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1170 m_Syntax.RestorePos(pos); 1176 m_pSyntax->RestorePos(pos);
1171 1177
1172 const char kFormStream[] = "/Form\0stream"; 1178 const char kFormStream[] = "/Form\0stream";
1173 const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1); 1179 const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1);
1174 bForm = m_Syntax.SearchMultiWord(kFormStreamStr, TRUE, size) == 0; 1180 bForm = m_pSyntax->SearchMultiWord(kFormStreamStr, TRUE, size) == 0;
1175 m_Syntax.RestorePos(SavedPos); 1181 m_pSyntax->RestorePos(SavedPos);
1176 return TRUE; 1182 return TRUE;
1177 } 1183 }
1178 1184
1179 CPDF_Object* CPDF_Parser::ParseIndirectObject( 1185 CPDF_Object* CPDF_Parser::ParseIndirectObject(
1180 CPDF_IndirectObjectHolder* pObjList, 1186 CPDF_IndirectObjectHolder* pObjList,
1181 FX_DWORD objnum) { 1187 FX_DWORD objnum) {
1182 if (!IsValidObjectNumber(objnum)) 1188 if (!IsValidObjectNumber(objnum))
1183 return nullptr; 1189 return nullptr;
1184 1190
1185 // Prevent circular parsing the same object. 1191 // Prevent circular parsing the same object.
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after
1304 return; 1310 return;
1305 } 1311 }
1306 1312
1307 if (GetObjectType(objnum) != 1) 1313 if (GetObjectType(objnum) != 1)
1308 return; 1314 return;
1309 1315
1310 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; 1316 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1311 if (pos == 0) 1317 if (pos == 0)
1312 return; 1318 return;
1313 1319
1314 FX_FILESIZE SavedPos = m_Syntax.SavePos(); 1320 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1315 m_Syntax.RestorePos(pos); 1321 m_pSyntax->RestorePos(pos);
1316 1322
1317 bool bIsNumber; 1323 bool bIsNumber;
1318 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber); 1324 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1319 if (!bIsNumber) { 1325 if (!bIsNumber) {
1320 m_Syntax.RestorePos(SavedPos); 1326 m_pSyntax->RestorePos(SavedPos);
1321 return; 1327 return;
1322 } 1328 }
1323 1329
1324 FX_DWORD parser_objnum = FXSYS_atoui(word); 1330 FX_DWORD parser_objnum = FXSYS_atoui(word);
1325 if (parser_objnum && parser_objnum != objnum) { 1331 if (parser_objnum && parser_objnum != objnum) {
1326 m_Syntax.RestorePos(SavedPos); 1332 m_pSyntax->RestorePos(SavedPos);
1327 return; 1333 return;
1328 } 1334 }
1329 1335
1330 word = m_Syntax.GetNextWord(&bIsNumber); 1336 word = m_pSyntax->GetNextWord(&bIsNumber);
1331 if (!bIsNumber) { 1337 if (!bIsNumber) {
1332 m_Syntax.RestorePos(SavedPos); 1338 m_pSyntax->RestorePos(SavedPos);
1333 return; 1339 return;
1334 } 1340 }
1335 1341
1336 if (m_Syntax.GetKeyword() != "obj") { 1342 if (m_pSyntax->GetKeyword() != "obj") {
1337 m_Syntax.RestorePos(SavedPos); 1343 m_pSyntax->RestorePos(SavedPos);
1338 return; 1344 return;
1339 } 1345 }
1340 1346
1341 auto it = m_SortedOffset.find(pos); 1347 auto it = m_SortedOffset.find(pos);
1342 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) { 1348 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) {
1343 m_Syntax.RestorePos(SavedPos); 1349 m_pSyntax->RestorePos(SavedPos);
1344 return; 1350 return;
1345 } 1351 }
1346 1352
1347 FX_FILESIZE nextoff = *it; 1353 FX_FILESIZE nextoff = *it;
1348 FX_BOOL bNextOffValid = FALSE; 1354 FX_BOOL bNextOffValid = FALSE;
1349 if (nextoff != pos) { 1355 if (nextoff != pos) {
1350 m_Syntax.RestorePos(nextoff); 1356 m_pSyntax->RestorePos(nextoff);
1351 word = m_Syntax.GetNextWord(&bIsNumber); 1357 word = m_pSyntax->GetNextWord(&bIsNumber);
1352 if (word == "xref") { 1358 if (word == "xref") {
1353 bNextOffValid = TRUE; 1359 bNextOffValid = TRUE;
1354 } else if (bIsNumber) { 1360 } else if (bIsNumber) {
1355 word = m_Syntax.GetNextWord(&bIsNumber); 1361 word = m_pSyntax->GetNextWord(&bIsNumber);
1356 if (bIsNumber && m_Syntax.GetKeyword() == "obj") { 1362 if (bIsNumber && m_pSyntax->GetKeyword() == "obj") {
1357 bNextOffValid = TRUE; 1363 bNextOffValid = TRUE;
1358 } 1364 }
1359 } 1365 }
1360 } 1366 }
1361 1367
1362 if (!bNextOffValid) { 1368 if (!bNextOffValid) {
1363 m_Syntax.RestorePos(pos); 1369 m_pSyntax->RestorePos(pos);
1364 while (1) { 1370 while (1) {
1365 if (m_Syntax.GetKeyword() == "endobj") 1371 if (m_pSyntax->GetKeyword() == "endobj")
1366 break; 1372 break;
1367 1373
1368 if (m_Syntax.SavePos() == m_Syntax.m_FileLen) 1374 if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen)
1369 break; 1375 break;
1370 } 1376 }
1371 nextoff = m_Syntax.SavePos(); 1377 nextoff = m_pSyntax->SavePos();
1372 } 1378 }
1373 1379
1374 size = (FX_DWORD)(nextoff - pos); 1380 size = (FX_DWORD)(nextoff - pos);
1375 pBuffer = FX_Alloc(uint8_t, size); 1381 pBuffer = FX_Alloc(uint8_t, size);
1376 m_Syntax.RestorePos(pos); 1382 m_pSyntax->RestorePos(pos);
1377 m_Syntax.ReadBlock(pBuffer, size); 1383 m_pSyntax->ReadBlock(pBuffer, size);
1378 m_Syntax.RestorePos(SavedPos); 1384 m_pSyntax->RestorePos(SavedPos);
1379 } 1385 }
1380 1386
1381 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt( 1387 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(
1382 CPDF_IndirectObjectHolder* pObjList, 1388 CPDF_IndirectObjectHolder* pObjList,
1383 FX_FILESIZE pos, 1389 FX_FILESIZE pos,
1384 FX_DWORD objnum) { 1390 FX_DWORD objnum) {
1385 FX_FILESIZE SavedPos = m_Syntax.SavePos(); 1391 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1386 m_Syntax.RestorePos(pos); 1392 m_pSyntax->RestorePos(pos);
1387 bool bIsNumber; 1393 bool bIsNumber;
1388 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber); 1394 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1389 if (!bIsNumber) { 1395 if (!bIsNumber) {
1390 m_Syntax.RestorePos(SavedPos); 1396 m_pSyntax->RestorePos(SavedPos);
1391 return nullptr; 1397 return nullptr;
1392 } 1398 }
1393 1399
1394 FX_FILESIZE objOffset = m_Syntax.SavePos(); 1400 FX_FILESIZE objOffset = m_pSyntax->SavePos();
1395 objOffset -= word.GetLength(); 1401 objOffset -= word.GetLength();
1396 FX_DWORD parser_objnum = FXSYS_atoui(word); 1402 FX_DWORD parser_objnum = FXSYS_atoui(word);
1397 if (objnum && parser_objnum != objnum) { 1403 if (objnum && parser_objnum != objnum) {
1398 m_Syntax.RestorePos(SavedPos); 1404 m_pSyntax->RestorePos(SavedPos);
1399 return nullptr; 1405 return nullptr;
1400 } 1406 }
1401 1407
1402 word = m_Syntax.GetNextWord(&bIsNumber); 1408 word = m_pSyntax->GetNextWord(&bIsNumber);
1403 if (!bIsNumber) { 1409 if (!bIsNumber) {
1404 m_Syntax.RestorePos(SavedPos); 1410 m_pSyntax->RestorePos(SavedPos);
1405 return nullptr; 1411 return nullptr;
1406 } 1412 }
1407 1413
1408 FX_DWORD parser_gennum = FXSYS_atoui(word); 1414 FX_DWORD parser_gennum = FXSYS_atoui(word);
1409 if (m_Syntax.GetKeyword() != "obj") { 1415 if (m_pSyntax->GetKeyword() != "obj") {
1410 m_Syntax.RestorePos(SavedPos); 1416 m_pSyntax->RestorePos(SavedPos);
1411 return nullptr; 1417 return nullptr;
1412 } 1418 }
1413 1419
1414 CPDF_Object* pObj = m_Syntax.GetObject(pObjList, objnum, parser_gennum, true); 1420 CPDF_Object* pObj =
1415 m_Syntax.SavePos(); 1421 m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true);
1422 m_pSyntax->SavePos();
1416 1423
1417 CFX_ByteString bsWord = m_Syntax.GetKeyword(); 1424 CFX_ByteString bsWord = m_pSyntax->GetKeyword();
1418 if (bsWord == "endobj") 1425 if (bsWord == "endobj")
1419 m_Syntax.SavePos(); 1426 m_pSyntax->SavePos();
1420 1427
1421 m_Syntax.RestorePos(SavedPos); 1428 m_pSyntax->RestorePos(SavedPos);
1422 if (pObj) { 1429 if (pObj) {
1423 if (!objnum) 1430 if (!objnum)
1424 pObj->m_ObjNum = parser_objnum; 1431 pObj->m_ObjNum = parser_objnum;
1425 pObj->m_GenNum = parser_gennum; 1432 pObj->m_GenNum = parser_gennum;
1426 } 1433 }
1427 return pObj; 1434 return pObj;
1428 } 1435 }
1429 1436
1430 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( 1437 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(
1431 CPDF_IndirectObjectHolder* pObjList, 1438 CPDF_IndirectObjectHolder* pObjList,
1432 FX_FILESIZE pos, 1439 FX_FILESIZE pos,
1433 FX_DWORD objnum, 1440 FX_DWORD objnum,
1434 FX_FILESIZE* pResultPos) { 1441 FX_FILESIZE* pResultPos) {
1435 FX_FILESIZE SavedPos = m_Syntax.SavePos(); 1442 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1436 m_Syntax.RestorePos(pos); 1443 m_pSyntax->RestorePos(pos);
1437 1444
1438 bool bIsNumber; 1445 bool bIsNumber;
1439 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber); 1446 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1440 if (!bIsNumber) { 1447 if (!bIsNumber) {
1441 m_Syntax.RestorePos(SavedPos); 1448 m_pSyntax->RestorePos(SavedPos);
1442 return nullptr; 1449 return nullptr;
1443 } 1450 }
1444 1451
1445 FX_DWORD parser_objnum = FXSYS_atoui(word); 1452 FX_DWORD parser_objnum = FXSYS_atoui(word);
1446 if (objnum && parser_objnum != objnum) { 1453 if (objnum && parser_objnum != objnum) {
1447 m_Syntax.RestorePos(SavedPos); 1454 m_pSyntax->RestorePos(SavedPos);
1448 return nullptr; 1455 return nullptr;
1449 } 1456 }
1450 1457
1451 word = m_Syntax.GetNextWord(&bIsNumber); 1458 word = m_pSyntax->GetNextWord(&bIsNumber);
1452 if (!bIsNumber) { 1459 if (!bIsNumber) {
1453 m_Syntax.RestorePos(SavedPos); 1460 m_pSyntax->RestorePos(SavedPos);
1454 return nullptr; 1461 return nullptr;
1455 } 1462 }
1456 1463
1457 FX_DWORD gennum = FXSYS_atoui(word); 1464 FX_DWORD gennum = FXSYS_atoui(word);
1458 if (m_Syntax.GetKeyword() != "obj") { 1465 if (m_pSyntax->GetKeyword() != "obj") {
1459 m_Syntax.RestorePos(SavedPos); 1466 m_pSyntax->RestorePos(SavedPos);
1460 return nullptr; 1467 return nullptr;
1461 } 1468 }
1462 1469
1463 CPDF_Object* pObj = m_Syntax.GetObjectByStrict(pObjList, objnum, gennum); 1470 CPDF_Object* pObj = m_pSyntax->GetObjectByStrict(pObjList, objnum, gennum);
1464 if (pResultPos) 1471 if (pResultPos)
1465 *pResultPos = m_Syntax.m_Pos; 1472 *pResultPos = m_pSyntax->m_Pos;
1466 1473
1467 m_Syntax.RestorePos(SavedPos); 1474 m_pSyntax->RestorePos(SavedPos);
1468 return pObj; 1475 return pObj;
1469 } 1476 }
1470 1477
1471 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() { 1478 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() {
1472 if (m_Syntax.GetKeyword() != "trailer") 1479 if (m_pSyntax->GetKeyword() != "trailer")
1473 return nullptr; 1480 return nullptr;
1474 1481
1475 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj( 1482 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(
1476 m_Syntax.GetObject(m_pDocument, 0, 0, true)); 1483 m_pSyntax->GetObject(m_pDocument, 0, 0, true));
1477 if (!ToDictionary(pObj.get())) 1484 if (!ToDictionary(pObj.get()))
1478 return nullptr; 1485 return nullptr;
1479 return pObj.release()->AsDictionary(); 1486 return pObj.release()->AsDictionary();
1480 } 1487 }
1481 1488
1482 FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) { 1489 FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) {
1483 if (!m_pSecurityHandler) 1490 if (!m_pSecurityHandler)
1484 return (FX_DWORD)-1; 1491 return (FX_DWORD)-1;
1485 1492
1486 FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions(); 1493 FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions();
1487 if (m_pEncryptDict && m_pEncryptDict->GetStringBy("Filter") == "Standard") { 1494 if (m_pEncryptDict && m_pEncryptDict->GetStringBy("Filter") == "Standard") {
1488 dwPermission &= 0xFFFFFFFC; 1495 dwPermission &= 0xFFFFFFFC;
1489 dwPermission |= 0xFFFFF0C0; 1496 dwPermission |= 0xFFFFF0C0;
1490 if (bCheckRevision && m_pEncryptDict->GetIntegerBy("R") == 2) 1497 if (bCheckRevision && m_pEncryptDict->GetIntegerBy("R") == 2)
1491 dwPermission &= 0xFFFFF0FF; 1498 dwPermission &= 0xFFFFF0FF;
1492 } 1499 }
1493 return dwPermission; 1500 return dwPermission;
1494 } 1501 }
1495 1502
1496 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, 1503 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess,
1497 FX_DWORD offset) { 1504 FX_DWORD offset) {
1498 m_Syntax.InitParser(pFileAccess, offset); 1505 m_pSyntax->InitParser(pFileAccess, offset);
1499 m_Syntax.RestorePos(m_Syntax.m_HeaderOffset + 9); 1506 m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9);
1500 1507
1501 FX_FILESIZE SavedPos = m_Syntax.SavePos(); 1508 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1502 bool bIsNumber; 1509 bool bIsNumber;
1503 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber); 1510 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1504 if (!bIsNumber) 1511 if (!bIsNumber)
1505 return FALSE; 1512 return FALSE;
1506 1513
1507 FX_DWORD objnum = FXSYS_atoui(word); 1514 FX_DWORD objnum = FXSYS_atoui(word);
1508 word = m_Syntax.GetNextWord(&bIsNumber); 1515 word = m_pSyntax->GetNextWord(&bIsNumber);
1509 if (!bIsNumber) 1516 if (!bIsNumber)
1510 return FALSE; 1517 return FALSE;
1511 1518
1512 FX_DWORD gennum = FXSYS_atoui(word); 1519 FX_DWORD gennum = FXSYS_atoui(word);
1513 if (m_Syntax.GetKeyword() != "obj") { 1520 if (m_pSyntax->GetKeyword() != "obj") {
1514 m_Syntax.RestorePos(SavedPos); 1521 m_pSyntax->RestorePos(SavedPos);
1515 return FALSE; 1522 return FALSE;
1516 } 1523 }
1517 1524
1518 m_pLinearized = m_Syntax.GetObject(nullptr, objnum, gennum, true); 1525 m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true);
1519 if (!m_pLinearized) 1526 if (!m_pLinearized)
1520 return FALSE; 1527 return FALSE;
1521 1528
1522 CPDF_Dictionary* pDict = m_pLinearized->GetDict(); 1529 CPDF_Dictionary* pDict = m_pLinearized->GetDict();
1523 if (pDict && pDict->GetElement("Linearized")) { 1530 if (pDict && pDict->GetElement("Linearized")) {
1524 m_Syntax.GetNextWord(nullptr); 1531 m_pSyntax->GetNextWord(nullptr);
1525 1532
1526 CPDF_Object* pLen = pDict->GetElement("L"); 1533 CPDF_Object* pLen = pDict->GetElement("L");
1527 if (!pLen) { 1534 if (!pLen) {
1528 m_pLinearized->Release(); 1535 m_pLinearized->Release();
1529 m_pLinearized = nullptr; 1536 m_pLinearized = nullptr;
1530 return FALSE; 1537 return FALSE;
1531 } 1538 }
1532 1539
1533 if (pLen->GetInteger() != (int)pFileAccess->GetSize()) 1540 if (pLen->GetInteger() != (int)pFileAccess->GetSize())
1534 return FALSE; 1541 return FALSE;
(...skipping 15 matching lines...) Expand all
1550 CloseParser(); 1557 CloseParser();
1551 m_bXRefStream = FALSE; 1558 m_bXRefStream = FALSE;
1552 m_LastXRefOffset = 0; 1559 m_LastXRefOffset = 0;
1553 m_bOwnFileRead = true; 1560 m_bOwnFileRead = true;
1554 1561
1555 int32_t offset = GetHeaderOffset(pFileAccess); 1562 int32_t offset = GetHeaderOffset(pFileAccess);
1556 if (offset == -1) 1563 if (offset == -1)
1557 return FORMAT_ERROR; 1564 return FORMAT_ERROR;
1558 1565
1559 if (!IsLinearizedFile(pFileAccess, offset)) { 1566 if (!IsLinearizedFile(pFileAccess, offset)) {
1560 m_Syntax.m_pFileAccess = nullptr; 1567 m_pSyntax->m_pFileAccess = nullptr;
1561 return StartParse(pFileAccess); 1568 return StartParse(pFileAccess);
1562 } 1569 }
1563 1570
1564 m_pDocument = new CPDF_Document(this); 1571 m_pDocument = new CPDF_Document(this);
1565 FX_FILESIZE dwFirstXRefOffset = m_Syntax.SavePos(); 1572 FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos();
1566 1573
1567 FX_BOOL bXRefRebuilt = FALSE; 1574 FX_BOOL bXRefRebuilt = FALSE;
1568 FX_BOOL bLoadV4 = FALSE; 1575 FX_BOOL bLoadV4 = FALSE;
1569 if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) && 1576 if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) &&
1570 !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) { 1577 !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) {
1571 if (!RebuildCrossRef()) 1578 if (!RebuildCrossRef())
1572 return FORMAT_ERROR; 1579 return FORMAT_ERROR;
1573 1580
1574 bXRefRebuilt = TRUE; 1581 bXRefRebuilt = TRUE;
1575 m_LastXRefOffset = 0; 1582 m_LastXRefOffset = 0;
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
1613 return FORMAT_ERROR; 1620 return FORMAT_ERROR;
1614 1621
1615 eRet = SetEncryptHandler(); 1622 eRet = SetEncryptHandler();
1616 if (eRet != SUCCESS) 1623 if (eRet != SUCCESS)
1617 return eRet; 1624 return eRet;
1618 } 1625 }
1619 1626
1620 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) { 1627 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1621 if (CPDF_Reference* pMetadata = 1628 if (CPDF_Reference* pMetadata =
1622 ToReference(m_pDocument->GetRoot()->GetElement("Metadata"))) 1629 ToReference(m_pDocument->GetRoot()->GetElement("Metadata")))
1623 m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum(); 1630 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
1624 } 1631 }
1625 return SUCCESS; 1632 return SUCCESS;
1626 } 1633 }
1627 1634
1628 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) { 1635 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
1629 if (!LoadCrossRefV5(&xrefpos, FALSE)) 1636 if (!LoadCrossRefV5(&xrefpos, FALSE))
1630 return FALSE; 1637 return FALSE;
1631 1638
1632 std::set<FX_FILESIZE> seen_xrefpos; 1639 std::set<FX_FILESIZE> seen_xrefpos;
1633 while (xrefpos) { 1640 while (xrefpos) {
1634 seen_xrefpos.insert(xrefpos); 1641 seen_xrefpos.insert(xrefpos);
1635 if (!LoadCrossRefV5(&xrefpos, FALSE)) 1642 if (!LoadCrossRefV5(&xrefpos, FALSE))
1636 return FALSE; 1643 return FALSE;
1637 1644
1638 // Check for circular references. 1645 // Check for circular references.
1639 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) 1646 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
1640 return FALSE; 1647 return FALSE;
1641 } 1648 }
1642 m_ObjectStreamMap.clear(); 1649 m_ObjectStreamMap.clear();
1643 m_bXRefStream = TRUE; 1650 m_bXRefStream = TRUE;
1644 return TRUE; 1651 return TRUE;
1645 } 1652 }
1646 1653
1647 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { 1654 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1648 FX_DWORD dwSaveMetadataObjnum = m_Syntax.m_MetadataObjnum; 1655 FX_DWORD dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum;
1649 m_Syntax.m_MetadataObjnum = 0; 1656 m_pSyntax->m_MetadataObjnum = 0;
1650 if (m_pTrailer) { 1657 if (m_pTrailer) {
1651 m_pTrailer->Release(); 1658 m_pTrailer->Release();
1652 m_pTrailer = nullptr; 1659 m_pTrailer = nullptr;
1653 } 1660 }
1654 1661
1655 m_Syntax.RestorePos(m_LastXRefOffset - m_Syntax.m_HeaderOffset); 1662 m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset);
1656 uint8_t ch = 0; 1663 uint8_t ch = 0;
1657 FX_DWORD dwCount = 0; 1664 FX_DWORD dwCount = 0;
1658 m_Syntax.GetNextChar(ch); 1665 m_pSyntax->GetNextChar(ch);
1659 while (PDFCharIsWhitespace(ch)) { 1666 while (PDFCharIsWhitespace(ch)) {
1660 ++dwCount; 1667 ++dwCount;
1661 if (m_Syntax.m_FileLen >= 1668 if (m_pSyntax->m_FileLen >=
1662 (FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) { 1669 (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) {
1663 break; 1670 break;
1664 } 1671 }
1665 m_Syntax.GetNextChar(ch); 1672 m_pSyntax->GetNextChar(ch);
1666 } 1673 }
1667 m_LastXRefOffset += dwCount; 1674 m_LastXRefOffset += dwCount;
1668 m_ObjectStreamMap.clear(); 1675 m_ObjectStreamMap.clear();
1669 m_ObjCache.clear(); 1676 m_ObjCache.clear();
1670 1677
1671 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && 1678 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&
1672 !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) { 1679 !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
1673 m_LastXRefOffset = 0; 1680 m_LastXRefOffset = 0;
1674 m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum; 1681 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
1675 return FORMAT_ERROR; 1682 return FORMAT_ERROR;
1676 } 1683 }
1677 1684
1678 m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum; 1685 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
1679 return SUCCESS; 1686 return SUCCESS;
1680 } 1687 }
1681 1688
1682 // static
1683 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
1684
1685 CPDF_SyntaxParser::CPDF_SyntaxParser()
1686 : m_MetadataObjnum(0),
1687 m_pFileAccess(nullptr),
1688 m_pFileBuf(nullptr),
1689 m_BufSize(CPDF_ModuleMgr::kFileBufSize) {}
1690
1691 CPDF_SyntaxParser::~CPDF_SyntaxParser() {
1692 FX_Free(m_pFileBuf);
1693 }
1694
1695 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
1696 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
1697 m_Pos = pos;
1698 return GetNextChar(ch);
1699 }
1700
1701 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
1702 FX_FILESIZE pos = m_Pos + m_HeaderOffset;
1703 if (pos >= m_FileLen)
1704 return FALSE;
1705
1706 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
1707 FX_FILESIZE read_pos = pos;
1708 FX_DWORD read_size = m_BufSize;
1709 if ((FX_FILESIZE)read_size > m_FileLen)
1710 read_size = (FX_DWORD)m_FileLen;
1711
1712 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
1713 if (m_FileLen < (FX_FILESIZE)read_size) {
1714 read_pos = 0;
1715 read_size = (FX_DWORD)m_FileLen;
1716 } else {
1717 read_pos = m_FileLen - read_size;
1718 }
1719 }
1720
1721 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
1722 return FALSE;
1723
1724 m_BufOffset = read_pos;
1725 }
1726 ch = m_pFileBuf[pos - m_BufOffset];
1727 m_Pos++;
1728 return TRUE;
1729 }
1730
1731 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
1732 pos += m_HeaderOffset;
1733 if (pos >= m_FileLen)
1734 return FALSE;
1735
1736 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
1737 FX_FILESIZE read_pos;
1738 if (pos < (FX_FILESIZE)m_BufSize)
1739 read_pos = 0;
1740 else
1741 read_pos = pos - m_BufSize + 1;
1742
1743 FX_DWORD read_size = m_BufSize;
1744 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
1745 if (m_FileLen < (FX_FILESIZE)read_size) {
1746 read_pos = 0;
1747 read_size = (FX_DWORD)m_FileLen;
1748 } else {
1749 read_pos = m_FileLen - read_size;
1750 }
1751 }
1752
1753 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
1754 return FALSE;
1755
1756 m_BufOffset = read_pos;
1757 }
1758 ch = m_pFileBuf[pos - m_BufOffset];
1759 return TRUE;
1760 }
1761
1762 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {
1763 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
1764 return FALSE;
1765 m_Pos += size;
1766 return TRUE;
1767 }
1768
1769 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
1770 m_WordSize = 0;
1771 if (bIsNumber)
1772 *bIsNumber = true;
1773
1774 uint8_t ch;
1775 if (!GetNextChar(ch))
1776 return;
1777
1778 while (1) {
1779 while (PDFCharIsWhitespace(ch)) {
1780 if (!GetNextChar(ch))
1781 return;
1782 }
1783
1784 if (ch != '%')
1785 break;
1786
1787 while (1) {
1788 if (!GetNextChar(ch))
1789 return;
1790 if (PDFCharIsLineEnding(ch))
1791 break;
1792 }
1793 }
1794
1795 if (PDFCharIsDelimiter(ch)) {
1796 if (bIsNumber)
1797 *bIsNumber = false;
1798
1799 m_WordBuffer[m_WordSize++] = ch;
1800 if (ch == '/') {
1801 while (1) {
1802 if (!GetNextChar(ch))
1803 return;
1804
1805 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
1806 m_Pos--;
1807 return;
1808 }
1809
1810 if (m_WordSize < sizeof(m_WordBuffer) - 1)
1811 m_WordBuffer[m_WordSize++] = ch;
1812 }
1813 } else if (ch == '<') {
1814 if (!GetNextChar(ch))
1815 return;
1816
1817 if (ch == '<')
1818 m_WordBuffer[m_WordSize++] = ch;
1819 else
1820 m_Pos--;
1821 } else if (ch == '>') {
1822 if (!GetNextChar(ch))
1823 return;
1824
1825 if (ch == '>')
1826 m_WordBuffer[m_WordSize++] = ch;
1827 else
1828 m_Pos--;
1829 }
1830 return;
1831 }
1832
1833 while (1) {
1834 if (m_WordSize < sizeof(m_WordBuffer) - 1)
1835 m_WordBuffer[m_WordSize++] = ch;
1836
1837 if (!PDFCharIsNumeric(ch)) {
1838 if (bIsNumber)
1839 *bIsNumber = false;
1840 }
1841
1842 if (!GetNextChar(ch))
1843 return;
1844
1845 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
1846 m_Pos--;
1847 break;
1848 }
1849 }
1850 }
1851
1852 CFX_ByteString CPDF_SyntaxParser::ReadString() {
1853 uint8_t ch;
1854 if (!GetNextChar(ch))
1855 return CFX_ByteString();
1856
1857 CFX_ByteTextBuf buf;
1858 int32_t parlevel = 0;
1859 int32_t status = 0;
1860 int32_t iEscCode = 0;
1861 while (1) {
1862 switch (status) {
1863 case 0:
1864 if (ch == ')') {
1865 if (parlevel == 0) {
1866 return buf.GetByteString();
1867 }
1868 parlevel--;
1869 buf.AppendChar(')');
1870 } else if (ch == '(') {
1871 parlevel++;
1872 buf.AppendChar('(');
1873 } else if (ch == '\\') {
1874 status = 1;
1875 } else {
1876 buf.AppendChar(ch);
1877 }
1878 break;
1879 case 1:
1880 if (ch >= '0' && ch <= '7') {
1881 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
1882 status = 2;
1883 break;
1884 }
1885
1886 if (ch == 'n') {
1887 buf.AppendChar('\n');
1888 } else if (ch == 'r') {
1889 buf.AppendChar('\r');
1890 } else if (ch == 't') {
1891 buf.AppendChar('\t');
1892 } else if (ch == 'b') {
1893 buf.AppendChar('\b');
1894 } else if (ch == 'f') {
1895 buf.AppendChar('\f');
1896 } else if (ch == '\r') {
1897 status = 4;
1898 break;
1899 } else if (ch != '\n') {
1900 buf.AppendChar(ch);
1901 }
1902 status = 0;
1903 break;
1904 case 2:
1905 if (ch >= '0' && ch <= '7') {
1906 iEscCode =
1907 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
1908 status = 3;
1909 } else {
1910 buf.AppendChar(iEscCode);
1911 status = 0;
1912 continue;
1913 }
1914 break;
1915 case 3:
1916 if (ch >= '0' && ch <= '7') {
1917 iEscCode =
1918 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
1919 buf.AppendChar(iEscCode);
1920 status = 0;
1921 } else {
1922 buf.AppendChar(iEscCode);
1923 status = 0;
1924 continue;
1925 }
1926 break;
1927 case 4:
1928 status = 0;
1929 if (ch != '\n')
1930 continue;
1931 break;
1932 }
1933
1934 if (!GetNextChar(ch))
1935 break;
1936 }
1937
1938 GetNextChar(ch);
1939 return buf.GetByteString();
1940 }
1941
1942 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {
1943 uint8_t ch;
1944 if (!GetNextChar(ch))
1945 return CFX_ByteString();
1946
1947 CFX_ByteTextBuf buf;
1948 bool bFirst = true;
1949 uint8_t code = 0;
1950 while (1) {
1951 if (ch == '>')
1952 break;
1953
1954 if (std::isxdigit(ch)) {
1955 int val = FXSYS_toHexDigit(ch);
1956 if (bFirst) {
1957 code = val * 16;
1958 } else {
1959 code += val;
1960 buf.AppendByte(code);
1961 }
1962 bFirst = !bFirst;
1963 }
1964
1965 if (!GetNextChar(ch))
1966 break;
1967 }
1968 if (!bFirst)
1969 buf.AppendByte(code);
1970
1971 return buf.GetByteString();
1972 }
1973
1974 void CPDF_SyntaxParser::ToNextLine() {
1975 uint8_t ch;
1976 while (GetNextChar(ch)) {
1977 if (ch == '\n')
1978 break;
1979
1980 if (ch == '\r') {
1981 GetNextChar(ch);
1982 if (ch != '\n')
1983 --m_Pos;
1984 break;
1985 }
1986 }
1987 }
1988
1989 void CPDF_SyntaxParser::ToNextWord() {
1990 uint8_t ch;
1991 if (!GetNextChar(ch))
1992 return;
1993
1994 while (1) {
1995 while (PDFCharIsWhitespace(ch)) {
1996 if (!GetNextChar(ch))
1997 return;
1998 }
1999
2000 if (ch != '%')
2001 break;
2002
2003 while (1) {
2004 if (!GetNextChar(ch))
2005 return;
2006 if (PDFCharIsLineEnding(ch))
2007 break;
2008 }
2009 }
2010 m_Pos--;
2011 }
2012
2013 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
2014 GetNextWordInternal(bIsNumber);
2015 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);
2016 }
2017
2018 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {
2019 return GetNextWord(nullptr);
2020 }
2021
2022 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,
2023 FX_DWORD objnum,
2024 FX_DWORD gennum,
2025 FX_BOOL bDecrypt) {
2026 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
2027 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
2028 return nullptr;
2029
2030 FX_FILESIZE SavedPos = m_Pos;
2031 bool bIsNumber;
2032 CFX_ByteString word = GetNextWord(&bIsNumber);
2033 if (word.GetLength() == 0)
2034 return nullptr;
2035
2036 if (bIsNumber) {
2037 FX_FILESIZE SavedPos = m_Pos;
2038 CFX_ByteString nextword = GetNextWord(&bIsNumber);
2039 if (bIsNumber) {
2040 CFX_ByteString nextword2 = GetNextWord(nullptr);
2041 if (nextword2 == "R") {
2042 FX_DWORD objnum = FXSYS_atoui(word);
2043 return new CPDF_Reference(pObjList, objnum);
2044 }
2045 }
2046 m_Pos = SavedPos;
2047 return new CPDF_Number(word);
2048 }
2049
2050 if (word == "true" || word == "false")
2051 return new CPDF_Boolean(word == "true");
2052
2053 if (word == "null")
2054 return new CPDF_Null;
2055
2056 if (word == "(") {
2057 CFX_ByteString str = ReadString();
2058 if (m_pCryptoHandler && bDecrypt)
2059 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2060 return new CPDF_String(str, FALSE);
2061 }
2062
2063 if (word == "<") {
2064 CFX_ByteString str = ReadHexString();
2065 if (m_pCryptoHandler && bDecrypt)
2066 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2067
2068 return new CPDF_String(str, TRUE);
2069 }
2070
2071 if (word == "[") {
2072 CPDF_Array* pArray = new CPDF_Array;
2073 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
2074 pArray->Add(pObj);
2075
2076 return pArray;
2077 }
2078
2079 if (word[0] == '/') {
2080 return new CPDF_Name(
2081 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
2082 }
2083
2084 if (word == "<<") {
2085 int32_t nKeys = 0;
2086 FX_FILESIZE dwSignValuePos = 0;
2087
2088 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
2089 new CPDF_Dictionary);
2090 while (1) {
2091 CFX_ByteString key = GetNextWord(nullptr);
2092 if (key.IsEmpty())
2093 return nullptr;
2094
2095 FX_FILESIZE SavedPos = m_Pos - key.GetLength();
2096 if (key == ">>")
2097 break;
2098
2099 if (key == "endobj") {
2100 m_Pos = SavedPos;
2101 break;
2102 }
2103
2104 if (key[0] != '/')
2105 continue;
2106
2107 ++nKeys;
2108 key = PDF_NameDecode(key);
2109 if (key.IsEmpty())
2110 continue;
2111
2112 if (key == "/Contents")
2113 dwSignValuePos = m_Pos;
2114
2115 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);
2116 if (!pObj)
2117 continue;
2118
2119 CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);
2120 pDict->SetAt(keyNoSlash, pObj);
2121 }
2122
2123 // Only when this is a signature dictionary and has contents, we reset the
2124 // contents to the un-decrypted form.
2125 if (IsSignatureDict(pDict.get()) && dwSignValuePos) {
2126 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
2127 m_Pos = dwSignValuePos;
2128 pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false));
2129 }
2130
2131 FX_FILESIZE SavedPos = m_Pos;
2132 CFX_ByteString nextword = GetNextWord(nullptr);
2133 if (nextword != "stream") {
2134 m_Pos = SavedPos;
2135 return pDict.release();
2136 }
2137 return ReadStream(pDict.release(), objnum, gennum);
2138 }
2139
2140 if (word == ">>")
2141 m_Pos = SavedPos;
2142
2143 return nullptr;
2144 }
2145
2146 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(
2147 CPDF_IndirectObjectHolder* pObjList,
2148 FX_DWORD objnum,
2149 FX_DWORD gennum) {
2150 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
2151 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
2152 return nullptr;
2153
2154 FX_FILESIZE SavedPos = m_Pos;
2155 bool bIsNumber;
2156 CFX_ByteString word = GetNextWord(&bIsNumber);
2157 if (word.GetLength() == 0)
2158 return nullptr;
2159
2160 if (bIsNumber) {
2161 FX_FILESIZE SavedPos = m_Pos;
2162 CFX_ByteString nextword = GetNextWord(&bIsNumber);
2163 if (bIsNumber) {
2164 CFX_ByteString nextword2 = GetNextWord(nullptr);
2165 if (nextword2 == "R")
2166 return new CPDF_Reference(pObjList, FXSYS_atoui(word));
2167 }
2168 m_Pos = SavedPos;
2169 return new CPDF_Number(word);
2170 }
2171
2172 if (word == "true" || word == "false")
2173 return new CPDF_Boolean(word == "true");
2174
2175 if (word == "null")
2176 return new CPDF_Null;
2177
2178 if (word == "(") {
2179 CFX_ByteString str = ReadString();
2180 if (m_pCryptoHandler)
2181 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2182 return new CPDF_String(str, FALSE);
2183 }
2184
2185 if (word == "<") {
2186 CFX_ByteString str = ReadHexString();
2187 if (m_pCryptoHandler)
2188 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2189 return new CPDF_String(str, TRUE);
2190 }
2191
2192 if (word == "[") {
2193 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(
2194 new CPDF_Array);
2195 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
2196 pArray->Add(pObj);
2197
2198 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;
2199 }
2200
2201 if (word[0] == '/') {
2202 return new CPDF_Name(
2203 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
2204 }
2205
2206 if (word == "<<") {
2207 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
2208 new CPDF_Dictionary);
2209 while (1) {
2210 FX_FILESIZE SavedPos = m_Pos;
2211 CFX_ByteString key = GetNextWord(nullptr);
2212 if (key.IsEmpty())
2213 return nullptr;
2214
2215 if (key == ">>")
2216 break;
2217
2218 if (key == "endobj") {
2219 m_Pos = SavedPos;
2220 break;
2221 }
2222
2223 if (key[0] != '/')
2224 continue;
2225
2226 key = PDF_NameDecode(key);
2227 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(
2228 GetObject(pObjList, objnum, gennum, true));
2229 if (!obj) {
2230 uint8_t ch;
2231 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {
2232 continue;
2233 }
2234 return nullptr;
2235 }
2236
2237 if (key.GetLength() > 1) {
2238 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),
2239 obj.release());
2240 }
2241 }
2242
2243 FX_FILESIZE SavedPos = m_Pos;
2244 CFX_ByteString nextword = GetNextWord(nullptr);
2245 if (nextword != "stream") {
2246 m_Pos = SavedPos;
2247 return pDict.release();
2248 }
2249
2250 return ReadStream(pDict.release(), objnum, gennum);
2251 }
2252
2253 if (word == ">>")
2254 m_Pos = SavedPos;
2255
2256 return nullptr;
2257 }
2258
2259 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
2260 unsigned char byte1 = 0;
2261 unsigned char byte2 = 0;
2262
2263 GetCharAt(pos, byte1);
2264 GetCharAt(pos + 1, byte2);
2265
2266 if (byte1 == '\r' && byte2 == '\n')
2267 return 2;
2268
2269 if (byte1 == '\r' || byte1 == '\n')
2270 return 1;
2271
2272 return 0;
2273 }
2274
2275 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
2276 FX_DWORD objnum,
2277 FX_DWORD gennum) {
2278 CPDF_Object* pLenObj = pDict->GetElement("Length");
2279 FX_FILESIZE len = -1;
2280 CPDF_Reference* pLenObjRef = ToReference(pLenObj);
2281
2282 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&
2283 pLenObjRef->GetRefObjNum() != objnum);
2284 if (pLenObj && differingObjNum)
2285 len = pLenObj->GetInteger();
2286
2287 // Locate the start of stream.
2288 ToNextLine();
2289 FX_FILESIZE streamStartPos = m_Pos;
2290
2291 const CFX_ByteStringC kEndStreamStr("endstream");
2292 const CFX_ByteStringC kEndObjStr("endobj");
2293
2294 CPDF_CryptoHandler* pCryptoHandler =
2295 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
2296 if (!pCryptoHandler) {
2297 FX_BOOL bSearchForKeyword = TRUE;
2298 if (len >= 0) {
2299 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
2300 pos += len;
2301 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
2302 m_Pos = pos.ValueOrDie();
2303
2304 m_Pos += ReadEOLMarkers(m_Pos);
2305 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
2306 GetNextWordInternal(nullptr);
2307 // Earlier version of PDF specification doesn't require EOL marker before
2308 // 'endstream' keyword. If keyword 'endstream' follows the bytes in
2309 // specified length, it signals the end of stream.
2310 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),
2311 kEndStreamStr.GetLength()) == 0) {
2312 bSearchForKeyword = FALSE;
2313 }
2314 }
2315
2316 if (bSearchForKeyword) {
2317 // If len is not available, len needs to be calculated
2318 // by searching the keywords "endstream" or "endobj".
2319 m_Pos = streamStartPos;
2320 FX_FILESIZE endStreamOffset = 0;
2321 while (endStreamOffset >= 0) {
2322 endStreamOffset = FindTag(kEndStreamStr, 0);
2323
2324 // Can't find "endstream".
2325 if (endStreamOffset < 0)
2326 break;
2327
2328 // Stop searching when "endstream" is found.
2329 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
2330 kEndStreamStr, TRUE)) {
2331 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
2332 break;
2333 }
2334 }
2335
2336 m_Pos = streamStartPos;
2337 FX_FILESIZE endObjOffset = 0;
2338 while (endObjOffset >= 0) {
2339 endObjOffset = FindTag(kEndObjStr, 0);
2340
2341 // Can't find "endobj".
2342 if (endObjOffset < 0)
2343 break;
2344
2345 // Stop searching when "endobj" is found.
2346 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
2347 TRUE)) {
2348 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
2349 break;
2350 }
2351 }
2352
2353 // Can't find "endstream" or "endobj".
2354 if (endStreamOffset < 0 && endObjOffset < 0) {
2355 pDict->Release();
2356 return nullptr;
2357 }
2358
2359 if (endStreamOffset < 0 && endObjOffset >= 0) {
2360 // Correct the position of end stream.
2361 endStreamOffset = endObjOffset;
2362 } else if (endStreamOffset >= 0 && endObjOffset < 0) {
2363 // Correct the position of end obj.
2364 endObjOffset = endStreamOffset;
2365 } else if (endStreamOffset > endObjOffset) {
2366 endStreamOffset = endObjOffset;
2367 }
2368
2369 len = endStreamOffset;
2370 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
2371 if (numMarkers == 2) {
2372 len -= 2;
2373 } else {
2374 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
2375 if (numMarkers == 1) {
2376 len -= 1;
2377 }
2378 }
2379
2380 if (len < 0) {
2381 pDict->Release();
2382 return nullptr;
2383 }
2384 pDict->SetAtInteger("Length", len);
2385 }
2386 m_Pos = streamStartPos;
2387 }
2388
2389 if (len < 0) {
2390 pDict->Release();
2391 return nullptr;
2392 }
2393
2394 uint8_t* pData = nullptr;
2395 if (len > 0) {
2396 pData = FX_Alloc(uint8_t, len);
2397 ReadBlock(pData, len);
2398 if (pCryptoHandler) {
2399 CFX_BinaryBuf dest_buf;
2400 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
2401
2402 void* context = pCryptoHandler->DecryptStart(objnum, gennum);
2403 pCryptoHandler->DecryptStream(context, pData, len, dest_buf);
2404 pCryptoHandler->DecryptFinish(context, dest_buf);
2405
2406 FX_Free(pData);
2407 pData = dest_buf.GetBuffer();
2408 len = dest_buf.GetSize();
2409 dest_buf.DetachBuffer();
2410 }
2411 }
2412
2413 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);
2414 streamStartPos = m_Pos;
2415 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
2416
2417 GetNextWordInternal(nullptr);
2418
2419 int numMarkers = ReadEOLMarkers(m_Pos);
2420 if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 &&
2421 FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==
2422 0) {
2423 m_Pos = streamStartPos;
2424 }
2425 return pStream;
2426 }
2427
2428 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,
2429 FX_DWORD HeaderOffset) {
2430 FX_Free(m_pFileBuf);
2431
2432 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
2433 m_HeaderOffset = HeaderOffset;
2434 m_FileLen = pFileAccess->GetSize();
2435 m_Pos = 0;
2436 m_pFileAccess = pFileAccess;
2437 m_BufOffset = 0;
2438 pFileAccess->ReadBlock(
2439 m_pFileBuf, 0,
2440 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
2441 }
2442
2443 uint32_t CPDF_SyntaxParser::GetDirectNum() {
2444 bool bIsNumber;
2445 GetNextWordInternal(&bIsNumber);
2446 if (!bIsNumber)
2447 return 0;
2448
2449 m_WordBuffer[m_WordSize] = 0;
2450 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
2451 }
2452
2453 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
2454 FX_FILESIZE limit,
2455 const CFX_ByteStringC& tag,
2456 FX_BOOL checkKeyword) {
2457 const FX_DWORD taglen = tag.GetLength();
2458
2459 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
2460 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
2461 !PDFCharIsWhitespace(tag[taglen - 1]);
2462
2463 uint8_t ch;
2464 if (bCheckRight && startpos + (int32_t)taglen <= limit &&
2465 GetCharAt(startpos + (int32_t)taglen, ch)) {
2466 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
2467 (checkKeyword && PDFCharIsDelimiter(ch))) {
2468 return false;
2469 }
2470 }
2471
2472 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
2473 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
2474 (checkKeyword && PDFCharIsDelimiter(ch))) {
2475 return false;
2476 }
2477 }
2478 return true;
2479 }
2480
2481 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards
2482 // and drop the bool.
2483 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
2484 FX_BOOL bWholeWord,
2485 FX_BOOL bForward,
2486 FX_FILESIZE limit) {
2487 int32_t taglen = tag.GetLength();
2488 if (taglen == 0)
2489 return FALSE;
2490
2491 FX_FILESIZE pos = m_Pos;
2492 int32_t offset = 0;
2493 if (!bForward)
2494 offset = taglen - 1;
2495
2496 const uint8_t* tag_data = tag.GetPtr();
2497 uint8_t byte;
2498 while (1) {
2499 if (bForward) {
2500 if (limit && pos >= m_Pos + limit)
2501 return FALSE;
2502
2503 if (!GetCharAt(pos, byte))
2504 return FALSE;
2505
2506 } else {
2507 if (limit && pos <= m_Pos - limit)
2508 return FALSE;
2509
2510 if (!GetCharAtBackward(pos, byte))
2511 return FALSE;
2512 }
2513
2514 if (byte == tag_data[offset]) {
2515 if (bForward) {
2516 offset++;
2517 if (offset < taglen) {
2518 pos++;
2519 continue;
2520 }
2521 } else {
2522 offset--;
2523 if (offset >= 0) {
2524 pos--;
2525 continue;
2526 }
2527 }
2528
2529 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
2530 if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) {
2531 m_Pos = startpos;
2532 return TRUE;
2533 }
2534 }
2535
2536 if (bForward) {
2537 offset = byte == tag_data[0] ? 1 : 0;
2538 pos++;
2539 } else {
2540 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
2541 pos--;
2542 }
2543
2544 if (pos < 0)
2545 return FALSE;
2546 }
2547
2548 return FALSE;
2549 }
2550
2551 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
2552 FX_BOOL bWholeWord,
2553 FX_FILESIZE limit) {
2554 int32_t ntags = 1;
2555 for (int i = 0; i < tags.GetLength(); ++i) {
2556 if (tags[i] == 0)
2557 ++ntags;
2558 }
2559
2560 std::vector<SearchTagRecord> patterns(ntags);
2561 FX_DWORD start = 0;
2562 FX_DWORD itag = 0;
2563 FX_DWORD max_len = 0;
2564 for (int i = 0; i <= tags.GetLength(); ++i) {
2565 if (tags[i] == 0) {
2566 FX_DWORD len = i - start;
2567 max_len = std::max(len, max_len);
2568 patterns[itag].m_pTag = tags.GetCStr() + start;
2569 patterns[itag].m_Len = len;
2570 patterns[itag].m_Offset = 0;
2571 start = i + 1;
2572 ++itag;
2573 }
2574 }
2575
2576 const FX_FILESIZE pos_limit = m_Pos + limit;
2577 for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) {
2578 uint8_t byte;
2579 if (!GetCharAt(pos, byte))
2580 break;
2581
2582 for (int i = 0; i < ntags; ++i) {
2583 SearchTagRecord& pat = patterns[i];
2584 if (pat.m_pTag[pat.m_Offset] != byte) {
2585 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
2586 continue;
2587 }
2588
2589 ++pat.m_Offset;
2590 if (pat.m_Offset != pat.m_Len)
2591 continue;
2592
2593 if (!bWholeWord ||
2594 IsWholeWord(pos - pat.m_Len, limit,
2595 CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {
2596 return i;
2597 }
2598
2599 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
2600 }
2601 }
2602 return -1;
2603 }
2604
2605 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
2606 FX_FILESIZE limit) {
2607 int32_t taglen = tag.GetLength();
2608 int32_t match = 0;
2609 limit += m_Pos;
2610 FX_FILESIZE startpos = m_Pos;
2611
2612 while (1) {
2613 uint8_t ch;
2614 if (!GetNextChar(ch))
2615 return -1;
2616
2617 if (ch == tag[match]) {
2618 match++;
2619 if (match == taglen)
2620 return m_Pos - startpos - taglen;
2621 } else {
2622 match = ch == tag[0] ? 1 : 0;
2623 }
2624
2625 if (limit && m_Pos == limit)
2626 return -1;
2627 }
2628 return -1;
2629 }
2630
2631 void CPDF_SyntaxParser::SetEncrypt(
2632 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {
2633 m_pCryptoHandler = std::move(pCryptoHandler);
2634 }
2635 1689
2636 class CPDF_DataAvail final : public IPDF_DataAvail { 1690 class CPDF_DataAvail final : public IPDF_DataAvail {
2637 public: 1691 public:
2638 CPDF_DataAvail(IFX_FileAvail* pFileAvail, 1692 CPDF_DataAvail(IFX_FileAvail* pFileAvail,
2639 IFX_FileRead* pFileRead, 1693 IFX_FileRead* pFileRead,
2640 FX_BOOL bSupportHintTable); 1694 FX_BOOL bSupportHintTable);
2641 ~CPDF_DataAvail() override; 1695 ~CPDF_DataAvail() override;
2642 1696
2643 // IPDF_DataAvail: 1697 // IPDF_DataAvail:
2644 DocAvailStatus IsDocAvail(IFX_DownloadHints* pHints) override; 1698 DocAvailStatus IsDocAvail(IFX_DownloadHints* pHints) override;
(...skipping 460 matching lines...) Expand 10 before | Expand all | Expand 10 after
3105 if (m_pFileAvail->IsDataAvail(0, (FX_DWORD)m_dwFileLen)) { 2159 if (m_pFileAvail->IsDataAvail(0, (FX_DWORD)m_dwFileLen)) {
3106 m_docStatus = PDF_DATAAVAIL_DONE; 2160 m_docStatus = PDF_DATAAVAIL_DONE;
3107 return TRUE; 2161 return TRUE;
3108 } 2162 }
3109 2163
3110 pHints->AddSegment(0, (FX_DWORD)m_dwFileLen); 2164 pHints->AddSegment(0, (FX_DWORD)m_dwFileLen);
3111 return FALSE; 2165 return FALSE;
3112 } 2166 }
3113 2167
3114 FX_BOOL CPDF_DataAvail::LoadAllXref(IFX_DownloadHints* pHints) { 2168 FX_BOOL CPDF_DataAvail::LoadAllXref(IFX_DownloadHints* pHints) {
3115 m_parser.m_Syntax.InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset); 2169 m_parser.m_pSyntax->InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset);
3116 m_parser.m_bOwnFileRead = false; 2170 m_parser.m_bOwnFileRead = false;
3117 if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) && 2171 if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) &&
3118 !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) { 2172 !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) {
3119 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 2173 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3120 return FALSE; 2174 return FALSE;
3121 } 2175 }
3122 2176
3123 m_dwRootObjNum = m_parser.GetRootObjNum(); 2177 m_dwRootObjNum = m_parser.GetRootObjNum();
3124 m_dwInfoObjNum = m_parser.GetInfoObjNum(); 2178 m_dwInfoObjNum = m_parser.GetInfoObjNum();
3125 m_pCurrentParser = &m_parser; 2179 m_pCurrentParser = &m_parser;
(...skipping 564 matching lines...) Expand 10 before | Expand all | Expand 10 after
3690 (FX_DWORD)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); 2744 (FX_DWORD)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3691 2745
3692 if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) { 2746 if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) {
3693 int32_t iSize = (int32_t)(m_Pos + req_size - m_dwCurrentXRefSteam); 2747 int32_t iSize = (int32_t)(m_Pos + req_size - m_dwCurrentXRefSteam);
3694 CFX_BinaryBuf buf(iSize); 2748 CFX_BinaryBuf buf(iSize);
3695 uint8_t* pBuf = buf.GetBuffer(); 2749 uint8_t* pBuf = buf.GetBuffer();
3696 2750
3697 m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize); 2751 m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize);
3698 2752
3699 ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE)); 2753 ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE));
3700 m_parser.m_Syntax.InitParser(file.get(), 0); 2754 m_parser.m_pSyntax->InitParser(file.get(), 0);
3701 2755
3702 bool bNumber; 2756 bool bNumber;
3703 CFX_ByteString objnum = m_parser.m_Syntax.GetNextWord(&bNumber); 2757 CFX_ByteString objnum = m_parser.m_pSyntax->GetNextWord(&bNumber);
3704 if (!bNumber) 2758 if (!bNumber)
3705 return -1; 2759 return -1;
3706 2760
3707 FX_DWORD objNum = FXSYS_atoui(objnum); 2761 FX_DWORD objNum = FXSYS_atoui(objnum);
3708 CPDF_Object* pObj = m_parser.ParseIndirectObjectAt(nullptr, 0, objNum); 2762 CPDF_Object* pObj = m_parser.ParseIndirectObjectAt(nullptr, 0, objNum);
3709 if (!pObj) { 2763 if (!pObj) {
3710 m_Pos += m_parser.m_Syntax.SavePos(); 2764 m_Pos += m_parser.m_pSyntax->SavePos();
3711 return 0; 2765 return 0;
3712 } 2766 }
3713 2767
3714 CPDF_Dictionary* pDict = pObj->GetDict(); 2768 CPDF_Dictionary* pDict = pObj->GetDict();
3715 CPDF_Name* pName = ToName(pDict ? pDict->GetElement("Type") : nullptr); 2769 CPDF_Name* pName = ToName(pDict ? pDict->GetElement("Type") : nullptr);
3716 if (pName) { 2770 if (pName) {
3717 if (pName->GetString() == "XRef") { 2771 if (pName->GetString() == "XRef") {
3718 m_Pos += m_parser.m_Syntax.SavePos(); 2772 m_Pos += m_parser.m_pSyntax->SavePos();
3719 xref_offset = pObj->GetDict()->GetIntegerBy("Prev"); 2773 xref_offset = pObj->GetDict()->GetIntegerBy("Prev");
3720 pObj->Release(); 2774 pObj->Release();
3721 return 1; 2775 return 1;
3722 } 2776 }
3723 } 2777 }
3724 pObj->Release(); 2778 pObj->Release();
3725 return -1; 2779 return -1;
3726 } 2780 }
3727 pHints->AddSegment(m_Pos, req_size); 2781 pHints->AddSegment(m_Pos, req_size);
3728 return 0; 2782 return 0;
(...skipping 1301 matching lines...) Expand 10 before | Expand all | Expand 10 after
5030 CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H"); 4084 CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H");
5031 if (!pRange) 4085 if (!pRange)
5032 return -1; 4086 return -1;
5033 4087
5034 CPDF_Object* pStreamLen = pRange->GetElementValue(1); 4088 CPDF_Object* pStreamLen = pRange->GetElementValue(1);
5035 if (!pStreamLen) 4089 if (!pStreamLen)
5036 return -1; 4090 return -1;
5037 4091
5038 return pStreamLen->GetInteger(); 4092 return pStreamLen->GetInteger();
5039 } 4093 }
OLDNEW
« no previous file with comments | « core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp ('k') | core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698