Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(52)

Side by Side Diff: core/src/fpdfapi/fpdf_parser/cpdf_parser.cpp

Issue 1773103003: Split off CPDF_Parser and CPDF_SimpleParser into .h/.cpp files (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Then address C#3. Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/include/fpdfapi/cpdf_parser.h"
8
9 #include "core/include/fpdfapi/cpdf_document.h"
10 #include "core/include/fpdfapi/fpdf_parser.h"
11 #include "core/include/fxcrt/fx_ext.h"
12 #include "core/include/fxcrt/fx_safe_types.h"
13 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
14 #include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h"
15 #include "third_party/base/stl_util.h"
16
17 namespace {
18
19 // A limit on the size of the xref table. Theoretical limits are higher, but
20 // this may be large enough in practice.
21 const int32_t kMaxXRefSize = 1048576;
22
23 // A limit on the maximum object number in the xref table. Theoretical limits
24 // are higher, but this may be large enough in practice.
25 const FX_DWORD kMaxObjectNumber = 1048576;
26
27 FX_DWORD GetVarInt(const uint8_t* p, int32_t n) {
28 FX_DWORD result = 0;
29 for (int32_t i = 0; i < n; ++i)
30 result = result * 256 + p[i];
31 return result;
32 }
33
34 int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) {
35 return pObjStream->GetDict()->GetIntegerBy("N");
36 }
37
38 int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) {
39 return pObjStream->GetDict()->GetIntegerBy("First");
40 }
41
42 } // namespace
43
44 CPDF_Parser::CPDF_Parser()
45 : m_pDocument(nullptr),
46 m_bOwnFileRead(true),
47 m_FileVersion(0),
48 m_pTrailer(nullptr),
49 m_pEncryptDict(nullptr),
50 m_pLinearized(nullptr),
51 m_dwFirstPageNo(0),
52 m_dwXrefStartObjNum(0) {
53 m_pSyntax.reset(new CPDF_SyntaxParser);
54 }
55
56 CPDF_Parser::~CPDF_Parser() {
57 CloseParser();
58 }
59
60 FX_DWORD CPDF_Parser::GetLastObjNum() const {
61 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
62 }
63
64 bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const {
65 return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first;
66 }
67
68 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(FX_DWORD objnum) const {
69 auto it = m_ObjectInfo.find(objnum);
70 return it != m_ObjectInfo.end() ? it->second.pos : 0;
71 }
72
73 uint8_t CPDF_Parser::GetObjectType(FX_DWORD objnum) const {
74 ASSERT(IsValidObjectNumber(objnum));
75 auto it = m_ObjectInfo.find(objnum);
76 return it != m_ObjectInfo.end() ? it->second.type : 0;
77 }
78
79 uint16_t CPDF_Parser::GetObjectGenNum(FX_DWORD objnum) const {
80 ASSERT(IsValidObjectNumber(objnum));
81 auto it = m_ObjectInfo.find(objnum);
82 return it != m_ObjectInfo.end() ? it->second.gennum : 0;
83 }
84
85 bool CPDF_Parser::IsObjectFreeOrNull(FX_DWORD objnum) const {
86 uint8_t type = GetObjectType(objnum);
87 return type == 0 || type == 255;
88 }
89
90 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
91 m_pEncryptDict = pDict;
92 }
93
94 CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() {
95 return m_pSyntax->m_pCryptoHandler.get();
96 }
97
98 IFX_FileRead* CPDF_Parser::GetFileAccess() const {
99 return m_pSyntax->m_pFileAccess;
100 }
101
102 void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) {
103 if (objnum == 0) {
104 m_ObjectInfo.clear();
105 return;
106 }
107
108 auto it = m_ObjectInfo.lower_bound(objnum);
109 while (it != m_ObjectInfo.end()) {
110 auto saved_it = it++;
111 m_ObjectInfo.erase(saved_it);
112 }
113
114 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
115 m_ObjectInfo[objnum - 1].pos = 0;
116 }
117
118 void CPDF_Parser::CloseParser() {
119 m_bVersionUpdated = FALSE;
120 delete m_pDocument;
121 m_pDocument = nullptr;
122
123 if (m_pTrailer) {
124 m_pTrailer->Release();
125 m_pTrailer = nullptr;
126 }
127 ReleaseEncryptHandler();
128 SetEncryptDictionary(nullptr);
129
130 if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) {
131 m_pSyntax->m_pFileAccess->Release();
132 m_pSyntax->m_pFileAccess = nullptr;
133 }
134
135 m_ObjectStreamMap.clear();
136 m_ObjCache.clear();
137 m_SortedOffset.clear();
138 m_ObjectInfo.clear();
139
140 int32_t iLen = m_Trailers.GetSize();
141 for (int32_t i = 0; i < iLen; ++i) {
142 if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i))
143 trailer->Release();
144 }
145 m_Trailers.RemoveAll();
146
147 if (m_pLinearized) {
148 m_pLinearized->Release();
149 m_pLinearized = nullptr;
150 }
151 }
152
153 CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess) {
154 CloseParser();
155
156 m_bXRefStream = FALSE;
157 m_LastXRefOffset = 0;
158 m_bOwnFileRead = true;
159
160 int32_t offset = GetHeaderOffset(pFileAccess);
161 if (offset == -1) {
162 if (pFileAccess)
163 pFileAccess->Release();
164 return FORMAT_ERROR;
165 }
166 m_pSyntax->InitParser(pFileAccess, offset);
167
168 uint8_t ch;
169 if (!m_pSyntax->GetCharAt(5, ch))
170 return FORMAT_ERROR;
171 if (std::isdigit(ch))
172 m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10;
173
174 if (!m_pSyntax->GetCharAt(7, ch))
175 return FORMAT_ERROR;
176 if (std::isdigit(ch))
177 m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
178
179 if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9)
180 return FORMAT_ERROR;
181
182 m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9);
183 m_pDocument = new CPDF_Document(this);
184
185 FX_BOOL bXRefRebuilt = FALSE;
186 if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) {
187 m_SortedOffset.insert(m_pSyntax->SavePos());
188 m_pSyntax->GetKeyword();
189
190 bool bNumber;
191 CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber);
192 if (!bNumber)
193 return FORMAT_ERROR;
194
195 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
196 if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
197 !LoadAllCrossRefV5(m_LastXRefOffset)) {
198 if (!RebuildCrossRef())
199 return FORMAT_ERROR;
200
201 bXRefRebuilt = TRUE;
202 m_LastXRefOffset = 0;
203 }
204 } else {
205 if (!RebuildCrossRef())
206 return FORMAT_ERROR;
207
208 bXRefRebuilt = TRUE;
209 }
210 Error eRet = SetEncryptHandler();
211 if (eRet != SUCCESS)
212 return eRet;
213
214 m_pDocument->LoadDoc();
215 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
216 if (bXRefRebuilt)
217 return FORMAT_ERROR;
218
219 ReleaseEncryptHandler();
220 if (!RebuildCrossRef())
221 return FORMAT_ERROR;
222
223 eRet = SetEncryptHandler();
224 if (eRet != SUCCESS)
225 return eRet;
226
227 m_pDocument->LoadDoc();
228 if (!m_pDocument->GetRoot())
229 return FORMAT_ERROR;
230 }
231 if (GetRootObjNum() == 0) {
232 ReleaseEncryptHandler();
233 if (!RebuildCrossRef() || GetRootObjNum() == 0)
234 return FORMAT_ERROR;
235
236 eRet = SetEncryptHandler();
237 if (eRet != SUCCESS)
238 return eRet;
239 }
240 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
241 CPDF_Reference* pMetadata =
242 ToReference(m_pDocument->GetRoot()->GetElement("Metadata"));
243 if (pMetadata)
244 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
245 }
246 return SUCCESS;
247 }
248 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
249 ReleaseEncryptHandler();
250 SetEncryptDictionary(nullptr);
251
252 if (!m_pTrailer)
253 return FORMAT_ERROR;
254
255 CPDF_Object* pEncryptObj = m_pTrailer->GetElement("Encrypt");
256 if (pEncryptObj) {
257 if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) {
258 SetEncryptDictionary(pEncryptDict);
259 } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) {
260 pEncryptObj = m_pDocument->GetIndirectObject(pRef->GetRefObjNum());
261 if (pEncryptObj)
262 SetEncryptDictionary(pEncryptObj->GetDict());
263 }
264 }
265
266 if (m_pEncryptDict) {
267 CFX_ByteString filter = m_pEncryptDict->GetStringBy("Filter");
268 std::unique_ptr<IPDF_SecurityHandler> pSecurityHandler;
269 Error err = HANDLER_ERROR;
270 if (filter == "Standard") {
271 pSecurityHandler.reset(new CPDF_StandardSecurityHandler);
272 err = PASSWORD_ERROR;
273 }
274 if (!pSecurityHandler)
275 return HANDLER_ERROR;
276
277 if (!pSecurityHandler->OnInit(this, m_pEncryptDict))
278 return err;
279
280 m_pSecurityHandler = std::move(pSecurityHandler);
281 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(
282 m_pSecurityHandler->CreateCryptoHandler());
283 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get()))
284 return HANDLER_ERROR;
285 m_pSyntax->SetEncrypt(std::move(pCryptoHandler));
286 }
287 return SUCCESS;
288 }
289
290 void CPDF_Parser::ReleaseEncryptHandler() {
291 m_pSyntax->m_pCryptoHandler.reset();
292 m_pSecurityHandler.reset();
293 }
294
295 FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const {
296 if (!IsValidObjectNumber(objnum))
297 return 0;
298
299 if (GetObjectType(objnum) == 1)
300 return GetObjectPositionOrZero(objnum);
301
302 if (GetObjectType(objnum) == 2) {
303 FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
304 return GetObjectPositionOrZero(pos);
305 }
306 return 0;
307 }
308
309 FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
310 if (!LoadCrossRefV4(xrefpos, 0, TRUE))
311 return FALSE;
312
313 m_pTrailer = LoadTrailerV4();
314 if (!m_pTrailer)
315 return FALSE;
316
317 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
318 if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
319 ShrinkObjectMap(xrefsize);
320
321 std::vector<FX_FILESIZE> CrossRefList;
322 std::vector<FX_FILESIZE> XRefStreamList;
323 std::set<FX_FILESIZE> seen_xrefpos;
324
325 CrossRefList.push_back(xrefpos);
326 XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm"));
327 seen_xrefpos.insert(xrefpos);
328
329 // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not
330 // numerical, GetDirectInteger() returns 0. Loading will end.
331 xrefpos = GetDirectInteger(m_pTrailer, "Prev");
332 while (xrefpos) {
333 // Check for circular references.
334 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
335 return FALSE;
336
337 seen_xrefpos.insert(xrefpos);
338
339 // SLOW ...
340 CrossRefList.insert(CrossRefList.begin(), xrefpos);
341 LoadCrossRefV4(xrefpos, 0, TRUE);
342
343 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
344 LoadTrailerV4());
345 if (!pDict)
346 return FALSE;
347
348 xrefpos = GetDirectInteger(pDict.get(), "Prev");
349
350 // SLOW ...
351 XRefStreamList.insert(XRefStreamList.begin(),
352 pDict->GetIntegerBy("XRefStm"));
353 m_Trailers.Add(pDict.release());
354 }
355
356 for (size_t i = 0; i < CrossRefList.size(); ++i) {
357 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
358 return FALSE;
359 }
360 return TRUE;
361 }
362
363 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
364 FX_DWORD dwObjCount) {
365 if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount))
366 return FALSE;
367
368 m_pTrailer = LoadTrailerV4();
369 if (!m_pTrailer)
370 return FALSE;
371
372 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
373 if (xrefsize == 0)
374 return FALSE;
375
376 std::vector<FX_FILESIZE> CrossRefList;
377 std::vector<FX_FILESIZE> XRefStreamList;
378 std::set<FX_FILESIZE> seen_xrefpos;
379
380 CrossRefList.push_back(xrefpos);
381 XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm"));
382 seen_xrefpos.insert(xrefpos);
383
384 xrefpos = GetDirectInteger(m_pTrailer, "Prev");
385 while (xrefpos) {
386 // Check for circular references.
387 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
388 return FALSE;
389
390 seen_xrefpos.insert(xrefpos);
391
392 // SLOW ...
393 CrossRefList.insert(CrossRefList.begin(), xrefpos);
394 LoadCrossRefV4(xrefpos, 0, TRUE);
395
396 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
397 LoadTrailerV4());
398 if (!pDict)
399 return FALSE;
400
401 xrefpos = GetDirectInteger(pDict.get(), "Prev");
402
403 // SLOW ...
404 XRefStreamList.insert(XRefStreamList.begin(),
405 pDict->GetIntegerBy("XRefStm"));
406 m_Trailers.Add(pDict.release());
407 }
408
409 for (size_t i = 1; i < CrossRefList.size(); ++i) {
410 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
411 return FALSE;
412 }
413 return TRUE;
414 }
415
416 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
417 FX_DWORD dwObjCount) {
418 FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset;
419
420 m_pSyntax->RestorePos(dwStartPos);
421 m_SortedOffset.insert(pos);
422
423 FX_DWORD start_objnum = 0;
424 FX_DWORD count = dwObjCount;
425 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
426
427 const int32_t recordsize = 20;
428 std::vector<char> buf(1024 * recordsize + 1);
429 buf[1024 * recordsize] = '\0';
430
431 int32_t nBlocks = count / 1024 + 1;
432 for (int32_t block = 0; block < nBlocks; block++) {
433 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
434 FX_DWORD dwReadSize = block_size * recordsize;
435 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen)
436 return FALSE;
437
438 if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
439 dwReadSize)) {
440 return FALSE;
441 }
442
443 for (int32_t i = 0; i < block_size; i++) {
444 FX_DWORD objnum = start_objnum + block * 1024 + i;
445 char* pEntry = &buf[i * recordsize];
446 if (pEntry[17] == 'f') {
447 m_ObjectInfo[objnum].pos = 0;
448 m_ObjectInfo[objnum].type = 0;
449 } else {
450 int32_t offset = FXSYS_atoi(pEntry);
451 if (offset == 0) {
452 for (int32_t c = 0; c < 10; c++) {
453 if (!std::isdigit(pEntry[c]))
454 return FALSE;
455 }
456 }
457
458 m_ObjectInfo[objnum].pos = offset;
459 int32_t version = FXSYS_atoi(pEntry + 11);
460 if (version >= 1)
461 m_bVersionUpdated = TRUE;
462
463 m_ObjectInfo[objnum].gennum = version;
464 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
465 m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
466
467 m_ObjectInfo[objnum].type = 1;
468 }
469 }
470 }
471 m_pSyntax->RestorePos(SavedPos + count * recordsize);
472 return TRUE;
473 }
474
475 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
476 FX_FILESIZE streampos,
477 FX_BOOL bSkip) {
478 m_pSyntax->RestorePos(pos);
479 if (m_pSyntax->GetKeyword() != "xref")
480 return false;
481
482 m_SortedOffset.insert(pos);
483 if (streampos)
484 m_SortedOffset.insert(streampos);
485
486 while (1) {
487 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
488 bool bIsNumber;
489 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
490 if (word.IsEmpty())
491 return false;
492
493 if (!bIsNumber) {
494 m_pSyntax->RestorePos(SavedPos);
495 break;
496 }
497
498 FX_DWORD start_objnum = FXSYS_atoui(word);
499 if (start_objnum >= kMaxObjectNumber)
500 return false;
501
502 FX_DWORD count = m_pSyntax->GetDirectNum();
503 m_pSyntax->ToNextWord();
504 SavedPos = m_pSyntax->SavePos();
505 const int32_t recordsize = 20;
506
507 m_dwXrefStartObjNum = start_objnum;
508 if (!bSkip) {
509 std::vector<char> buf(1024 * recordsize + 1);
510 buf[1024 * recordsize] = '\0';
511
512 int32_t nBlocks = count / 1024 + 1;
513 for (int32_t block = 0; block < nBlocks; block++) {
514 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
515 m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
516 block_size * recordsize);
517
518 for (int32_t i = 0; i < block_size; i++) {
519 FX_DWORD objnum = start_objnum + block * 1024 + i;
520 char* pEntry = &buf[i * recordsize];
521 if (pEntry[17] == 'f') {
522 m_ObjectInfo[objnum].pos = 0;
523 m_ObjectInfo[objnum].type = 0;
524 } else {
525 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
526 if (offset == 0) {
527 for (int32_t c = 0; c < 10; c++) {
528 if (!std::isdigit(pEntry[c]))
529 return false;
530 }
531 }
532
533 m_ObjectInfo[objnum].pos = offset;
534 int32_t version = FXSYS_atoi(pEntry + 11);
535 if (version >= 1)
536 m_bVersionUpdated = TRUE;
537
538 m_ObjectInfo[objnum].gennum = version;
539 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
540 m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
541
542 m_ObjectInfo[objnum].type = 1;
543 }
544 }
545 }
546 }
547 m_pSyntax->RestorePos(SavedPos + count * recordsize);
548 }
549 return !streampos || LoadCrossRefV5(&streampos, FALSE);
550 }
551
552 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
553 if (!LoadCrossRefV5(&xrefpos, TRUE))
554 return FALSE;
555
556 std::set<FX_FILESIZE> seen_xrefpos;
557 while (xrefpos) {
558 seen_xrefpos.insert(xrefpos);
559 if (!LoadCrossRefV5(&xrefpos, FALSE))
560 return FALSE;
561
562 // Check for circular references.
563 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
564 return FALSE;
565 }
566 m_ObjectStreamMap.clear();
567 m_bXRefStream = TRUE;
568 return TRUE;
569 }
570
571 FX_BOOL CPDF_Parser::RebuildCrossRef() {
572 m_ObjectInfo.clear();
573 m_SortedOffset.clear();
574 if (m_pTrailer) {
575 m_pTrailer->Release();
576 m_pTrailer = nullptr;
577 }
578
579 ParserState state = ParserState::kDefault;
580
581 int32_t inside_index = 0;
582 FX_DWORD objnum = 0;
583 FX_DWORD gennum = 0;
584 int32_t depth = 0;
585
586 const FX_DWORD kBufferSize = 4096;
587 std::vector<uint8_t> buffer(kBufferSize);
588
589 FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;
590 FX_FILESIZE start_pos = 0;
591 FX_FILESIZE start_pos1 = 0;
592 FX_FILESIZE last_obj = -1;
593 FX_FILESIZE last_xref = -1;
594 FX_FILESIZE last_trailer = -1;
595
596 while (pos < m_pSyntax->m_FileLen) {
597 const FX_FILESIZE saved_pos = pos;
598 bool bOverFlow = false;
599 FX_DWORD size =
600 std::min((FX_DWORD)(m_pSyntax->m_FileLen - pos), kBufferSize);
601 if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size))
602 break;
603
604 for (FX_DWORD i = 0; i < size; i++) {
605 uint8_t byte = buffer[i];
606 switch (state) {
607 case ParserState::kDefault:
608 if (PDFCharIsWhitespace(byte)) {
609 state = ParserState::kWhitespace;
610 } else if (std::isdigit(byte)) {
611 --i;
612 state = ParserState::kWhitespace;
613 } else if (byte == '%') {
614 inside_index = 0;
615 state = ParserState::kComment;
616 } else if (byte == '(') {
617 state = ParserState::kString;
618 depth = 1;
619 } else if (byte == '<') {
620 inside_index = 1;
621 state = ParserState::kHexString;
622 } else if (byte == '\\') {
623 state = ParserState::kEscapedString;
624 } else if (byte == 't') {
625 state = ParserState::kTrailer;
626 inside_index = 1;
627 }
628 break;
629
630 case ParserState::kWhitespace:
631 if (std::isdigit(byte)) {
632 start_pos = pos + i;
633 state = ParserState::kObjNum;
634 objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
635 } else if (byte == 't') {
636 state = ParserState::kTrailer;
637 inside_index = 1;
638 } else if (byte == 'x') {
639 state = ParserState::kXref;
640 inside_index = 1;
641 } else if (!PDFCharIsWhitespace(byte)) {
642 --i;
643 state = ParserState::kDefault;
644 }
645 break;
646
647 case ParserState::kObjNum:
648 if (std::isdigit(byte)) {
649 objnum =
650 objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
651 } else if (PDFCharIsWhitespace(byte)) {
652 state = ParserState::kPostObjNum;
653 } else {
654 --i;
655 state = ParserState::kEndObj;
656 inside_index = 0;
657 }
658 break;
659
660 case ParserState::kPostObjNum:
661 if (std::isdigit(byte)) {
662 start_pos1 = pos + i;
663 state = ParserState::kGenNum;
664 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
665 } else if (byte == 't') {
666 state = ParserState::kTrailer;
667 inside_index = 1;
668 } else if (!PDFCharIsWhitespace(byte)) {
669 --i;
670 state = ParserState::kDefault;
671 }
672 break;
673
674 case ParserState::kGenNum:
675 if (std::isdigit(byte)) {
676 gennum =
677 gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
678 } else if (PDFCharIsWhitespace(byte)) {
679 state = ParserState::kPostGenNum;
680 } else {
681 --i;
682 state = ParserState::kDefault;
683 }
684 break;
685
686 case ParserState::kPostGenNum:
687 if (byte == 'o') {
688 state = ParserState::kBeginObj;
689 inside_index = 1;
690 } else if (std::isdigit(byte)) {
691 objnum = gennum;
692 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
693 start_pos = start_pos1;
694 start_pos1 = pos + i;
695 state = ParserState::kGenNum;
696 } else if (byte == 't') {
697 state = ParserState::kTrailer;
698 inside_index = 1;
699 } else if (!PDFCharIsWhitespace(byte)) {
700 --i;
701 state = ParserState::kDefault;
702 }
703 break;
704
705 case ParserState::kBeginObj:
706 switch (inside_index) {
707 case 1:
708 if (byte != 'b') {
709 --i;
710 state = ParserState::kDefault;
711 } else {
712 inside_index++;
713 }
714 break;
715 case 2:
716 if (byte != 'j') {
717 --i;
718 state = ParserState::kDefault;
719 } else {
720 inside_index++;
721 }
722 break;
723 case 3:
724 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
725 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;
726 m_SortedOffset.insert(obj_pos);
727 last_obj = start_pos;
728 FX_FILESIZE obj_end = 0;
729 CPDF_Object* pObject = ParseIndirectObjectAtByStrict(
730 m_pDocument, obj_pos, objnum, &obj_end);
731 if (CPDF_Stream* pStream = ToStream(pObject)) {
732 if (CPDF_Dictionary* pDict = pStream->GetDict()) {
733 if ((pDict->KeyExist("Type")) &&
734 (pDict->GetStringBy("Type") == "XRef" &&
735 pDict->KeyExist("Size"))) {
736 CPDF_Object* pRoot = pDict->GetElement("Root");
737 if (pRoot && pRoot->GetDict() &&
738 pRoot->GetDict()->GetElement("Pages")) {
739 if (m_pTrailer)
740 m_pTrailer->Release();
741 m_pTrailer = ToDictionary(pDict->Clone());
742 }
743 }
744 }
745 }
746
747 FX_FILESIZE offset = 0;
748 m_pSyntax->RestorePos(obj_pos);
749 offset = m_pSyntax->FindTag("obj", 0);
750 if (offset == -1)
751 offset = 0;
752 else
753 offset += 3;
754
755 FX_FILESIZE nLen = obj_end - obj_pos - offset;
756 if ((FX_DWORD)nLen > size - i) {
757 pos = obj_end + m_pSyntax->m_HeaderOffset;
758 bOverFlow = true;
759 } else {
760 i += (FX_DWORD)nLen;
761 }
762
763 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
764 m_ObjectInfo[objnum].pos) {
765 if (pObject) {
766 FX_DWORD oldgen = GetObjectGenNum(objnum);
767 m_ObjectInfo[objnum].pos = obj_pos;
768 m_ObjectInfo[objnum].gennum = gennum;
769 if (oldgen != gennum)
770 m_bVersionUpdated = TRUE;
771 }
772 } else {
773 m_ObjectInfo[objnum].pos = obj_pos;
774 m_ObjectInfo[objnum].type = 1;
775 m_ObjectInfo[objnum].gennum = gennum;
776 }
777
778 if (pObject)
779 pObject->Release();
780 }
781 --i;
782 state = ParserState::kDefault;
783 break;
784 }
785 break;
786
787 case ParserState::kTrailer:
788 if (inside_index == 7) {
789 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
790 last_trailer = pos + i - 7;
791 m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset);
792
793 CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true);
794 if (pObj) {
795 if (!pObj->IsDictionary() && !pObj->AsStream()) {
796 pObj->Release();
797 } else {
798 CPDF_Stream* pStream = pObj->AsStream();
799 if (CPDF_Dictionary* pTrailer =
800 pStream ? pStream->GetDict() : pObj->AsDictionary()) {
801 if (m_pTrailer) {
802 CPDF_Object* pRoot = pTrailer->GetElement("Root");
803 CPDF_Reference* pRef = ToReference(pRoot);
804 if (!pRoot ||
805 (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
806 m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
807 auto it = pTrailer->begin();
808 while (it != pTrailer->end()) {
809 const CFX_ByteString& key = it->first;
810 CPDF_Object* pElement = it->second;
811 ++it;
812 FX_DWORD dwObjNum =
813 pElement ? pElement->GetObjNum() : 0;
814 if (dwObjNum) {
815 m_pTrailer->SetAtReference(key, m_pDocument,
816 dwObjNum);
817 } else {
818 m_pTrailer->SetAt(key, pElement->Clone());
819 }
820 }
821 }
822 pObj->Release();
823 } else {
824 if (pObj->IsStream()) {
825 m_pTrailer = ToDictionary(pTrailer->Clone());
826 pObj->Release();
827 } else {
828 m_pTrailer = pTrailer;
829 }
830
831 FX_FILESIZE dwSavePos = m_pSyntax->SavePos();
832 CFX_ByteString strWord = m_pSyntax->GetKeyword();
833 if (!strWord.Compare("startxref")) {
834 bool bNumber;
835 CFX_ByteString bsOffset =
836 m_pSyntax->GetNextWord(&bNumber);
837 if (bNumber)
838 m_LastXRefOffset = FXSYS_atoi(bsOffset);
839 }
840 m_pSyntax->RestorePos(dwSavePos);
841 }
842 } else {
843 pObj->Release();
844 }
845 }
846 }
847 }
848 --i;
849 state = ParserState::kDefault;
850 } else if (byte == "trailer"[inside_index]) {
851 inside_index++;
852 } else {
853 --i;
854 state = ParserState::kDefault;
855 }
856 break;
857
858 case ParserState::kXref:
859 if (inside_index == 4) {
860 last_xref = pos + i - 4;
861 state = ParserState::kWhitespace;
862 } else if (byte == "xref"[inside_index]) {
863 inside_index++;
864 } else {
865 --i;
866 state = ParserState::kDefault;
867 }
868 break;
869
870 case ParserState::kComment:
871 if (PDFCharIsLineEnding(byte))
872 state = ParserState::kDefault;
873 break;
874
875 case ParserState::kString:
876 if (byte == ')') {
877 if (depth > 0)
878 depth--;
879 } else if (byte == '(') {
880 depth++;
881 }
882
883 if (!depth)
884 state = ParserState::kDefault;
885 break;
886
887 case ParserState::kHexString:
888 if (byte == '>' || (byte == '<' && inside_index == 1))
889 state = ParserState::kDefault;
890 inside_index = 0;
891 break;
892
893 case ParserState::kEscapedString:
894 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
895 --i;
896 state = ParserState::kDefault;
897 }
898 break;
899
900 case ParserState::kEndObj:
901 if (PDFCharIsWhitespace(byte)) {
902 state = ParserState::kDefault;
903 } else if (byte == '%' || byte == '(' || byte == '<' ||
904 byte == '\\') {
905 state = ParserState::kDefault;
906 --i;
907 } else if (inside_index == 6) {
908 state = ParserState::kDefault;
909 --i;
910 } else if (byte == "endobj"[inside_index]) {
911 inside_index++;
912 }
913 break;
914 }
915
916 if (bOverFlow) {
917 size = 0;
918 break;
919 }
920 }
921 pos += size;
922
923 // If the position has not changed at all in a loop iteration, then break
924 // out to prevent infinite looping.
925 if (pos == saved_pos)
926 break;
927 }
928
929 if (last_xref != -1 && last_xref > last_obj)
930 last_trailer = last_xref;
931 else if (last_trailer == -1 || last_xref < last_obj)
932 last_trailer = m_pSyntax->m_FileLen;
933
934 m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset);
935 return m_pTrailer && !m_ObjectInfo.empty();
936 }
937
938 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) {
939 CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0);
940 if (!pObject)
941 return FALSE;
942
943 if (m_pDocument) {
944 FX_BOOL bInserted = FALSE;
945 CPDF_Dictionary* pDict = m_pDocument->GetRoot();
946 if (!pDict || pDict->GetObjNum() != pObject->m_ObjNum) {
947 bInserted = m_pDocument->InsertIndirectObject(pObject->m_ObjNum, pObject);
948 } else {
949 if (pObject->IsStream())
950 pObject->Release();
951 }
952
953 if (!bInserted)
954 return FALSE;
955 }
956
957 CPDF_Stream* pStream = pObject->AsStream();
958 if (!pStream)
959 return FALSE;
960
961 *pos = pStream->GetDict()->GetIntegerBy("Prev");
962 int32_t size = pStream->GetDict()->GetIntegerBy("Size");
963 if (size < 0) {
964 pStream->Release();
965 return FALSE;
966 }
967
968 if (bMainXRef) {
969 m_pTrailer = ToDictionary(pStream->GetDict()->Clone());
970 ShrinkObjectMap(size);
971 for (auto& it : m_ObjectInfo)
972 it.second.type = 0;
973 } else {
974 m_Trailers.Add(ToDictionary(pStream->GetDict()->Clone()));
975 }
976
977 std::vector<std::pair<int32_t, int32_t>> arrIndex;
978 CPDF_Array* pArray = pStream->GetDict()->GetArrayBy("Index");
979 if (pArray) {
980 FX_DWORD nPairSize = pArray->GetCount() / 2;
981 for (FX_DWORD i = 0; i < nPairSize; i++) {
982 CPDF_Object* pStartNumObj = pArray->GetElement(i * 2);
983 CPDF_Object* pCountObj = pArray->GetElement(i * 2 + 1);
984
985 if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
986 int nStartNum = pStartNumObj->GetInteger();
987 int nCount = pCountObj->GetInteger();
988 if (nStartNum >= 0 && nCount > 0)
989 arrIndex.push_back(std::make_pair(nStartNum, nCount));
990 }
991 }
992 }
993
994 if (arrIndex.size() == 0)
995 arrIndex.push_back(std::make_pair(0, size));
996
997 pArray = pStream->GetDict()->GetArrayBy("W");
998 if (!pArray) {
999 pStream->Release();
1000 return FALSE;
1001 }
1002
1003 CFX_DWordArray WidthArray;
1004 FX_SAFE_DWORD dwAccWidth = 0;
1005 for (FX_DWORD i = 0; i < pArray->GetCount(); i++) {
1006 WidthArray.Add(pArray->GetIntegerAt(i));
1007 dwAccWidth += WidthArray[i];
1008 }
1009
1010 if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) {
1011 pStream->Release();
1012 return FALSE;
1013 }
1014
1015 FX_DWORD totalWidth = dwAccWidth.ValueOrDie();
1016 CPDF_StreamAcc acc;
1017 acc.LoadAllData(pStream);
1018
1019 const uint8_t* pData = acc.GetData();
1020 FX_DWORD dwTotalSize = acc.GetSize();
1021 FX_DWORD segindex = 0;
1022 for (FX_DWORD i = 0; i < arrIndex.size(); i++) {
1023 int32_t startnum = arrIndex[i].first;
1024 if (startnum < 0)
1025 continue;
1026
1027 m_dwXrefStartObjNum =
1028 pdfium::base::checked_cast<FX_DWORD, int32_t>(startnum);
1029 FX_DWORD count =
1030 pdfium::base::checked_cast<FX_DWORD, int32_t>(arrIndex[i].second);
1031 FX_SAFE_DWORD dwCaculatedSize = segindex;
1032 dwCaculatedSize += count;
1033 dwCaculatedSize *= totalWidth;
1034 if (!dwCaculatedSize.IsValid() ||
1035 dwCaculatedSize.ValueOrDie() > dwTotalSize) {
1036 continue;
1037 }
1038
1039 const uint8_t* segstart = pData + segindex * totalWidth;
1040 FX_SAFE_DWORD dwMaxObjNum = startnum;
1041 dwMaxObjNum += count;
1042 FX_DWORD dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1;
1043 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
1044 continue;
1045
1046 for (FX_DWORD j = 0; j < count; j++) {
1047 int32_t type = 1;
1048 const uint8_t* entrystart = segstart + j * totalWidth;
1049 if (WidthArray[0])
1050 type = GetVarInt(entrystart, WidthArray[0]);
1051
1052 if (GetObjectType(startnum + j) == 255) {
1053 FX_FILESIZE offset =
1054 GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1055 m_ObjectInfo[startnum + j].pos = offset;
1056 m_SortedOffset.insert(offset);
1057 continue;
1058 }
1059
1060 if (GetObjectType(startnum + j))
1061 continue;
1062
1063 m_ObjectInfo[startnum + j].type = type;
1064 if (type == 0) {
1065 m_ObjectInfo[startnum + j].pos = 0;
1066 } else {
1067 FX_FILESIZE offset =
1068 GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1069 m_ObjectInfo[startnum + j].pos = offset;
1070 if (type == 1) {
1071 m_SortedOffset.insert(offset);
1072 } else {
1073 if (offset < 0 || !IsValidObjectNumber(offset)) {
1074 pStream->Release();
1075 return FALSE;
1076 }
1077 m_ObjectInfo[offset].type = 255;
1078 }
1079 }
1080 }
1081 segindex += count;
1082 }
1083 pStream->Release();
1084 return TRUE;
1085 }
1086
1087 CPDF_Array* CPDF_Parser::GetIDArray() {
1088 CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement("ID") : nullptr;
1089 if (!pID)
1090 return nullptr;
1091
1092 if (CPDF_Reference* pRef = pID->AsReference()) {
1093 pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum());
1094 m_pTrailer->SetAt("ID", pID);
1095 }
1096 return ToArray(pID);
1097 }
1098
1099 FX_DWORD CPDF_Parser::GetRootObjNum() {
1100 CPDF_Reference* pRef =
1101 ToReference(m_pTrailer ? m_pTrailer->GetElement("Root") : nullptr);
1102 return pRef ? pRef->GetRefObjNum() : 0;
1103 }
1104
1105 FX_DWORD CPDF_Parser::GetInfoObjNum() {
1106 CPDF_Reference* pRef =
1107 ToReference(m_pTrailer ? m_pTrailer->GetElement("Info") : nullptr);
1108 return pRef ? pRef->GetRefObjNum() : 0;
1109 }
1110
1111 FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) {
1112 bForm = FALSE;
1113 if (!IsValidObjectNumber(objnum))
1114 return TRUE;
1115
1116 if (GetObjectType(objnum) == 0)
1117 return TRUE;
1118
1119 if (GetObjectType(objnum) == 2)
1120 return TRUE;
1121
1122 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1123 auto it = m_SortedOffset.find(pos);
1124 if (it == m_SortedOffset.end())
1125 return TRUE;
1126
1127 if (++it == m_SortedOffset.end())
1128 return FALSE;
1129
1130 FX_FILESIZE size = *it - pos;
1131 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1132 m_pSyntax->RestorePos(pos);
1133
1134 const char kFormStream[] = "/Form\0stream";
1135 const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1);
1136 bForm = m_pSyntax->SearchMultiWord(kFormStreamStr, TRUE, size) == 0;
1137 m_pSyntax->RestorePos(SavedPos);
1138 return TRUE;
1139 }
1140
1141 CPDF_Object* CPDF_Parser::ParseIndirectObject(
1142 CPDF_IndirectObjectHolder* pObjList,
1143 FX_DWORD objnum) {
1144 if (!IsValidObjectNumber(objnum))
1145 return nullptr;
1146
1147 // Prevent circular parsing the same object.
1148 if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
1149 return nullptr;
1150 ScopedSetInsertion<FX_DWORD> local_insert(&m_ParsingObjNums, objnum);
1151
1152 if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) {
1153 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1154 if (pos <= 0)
1155 return nullptr;
1156 return ParseIndirectObjectAt(pObjList, pos, objnum);
1157 }
1158 if (GetObjectType(objnum) != 2)
1159 return nullptr;
1160
1161 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
1162 if (!pObjStream)
1163 return nullptr;
1164
1165 ScopedFileStream file(FX_CreateMemoryStream(
1166 (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE));
1167 CPDF_SyntaxParser syntax;
1168 syntax.InitParser(file.get(), 0);
1169 const int32_t offset = GetStreamFirst(pObjStream);
1170
1171 // Read object numbers from |pObjStream| into a cache.
1172 if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
1173 for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
1174 FX_DWORD thisnum = syntax.GetDirectNum();
1175 FX_DWORD thisoff = syntax.GetDirectNum();
1176 m_ObjCache[pObjStream][thisnum] = thisoff;
1177 }
1178 }
1179
1180 const auto it = m_ObjCache[pObjStream].find(objnum);
1181 if (it == m_ObjCache[pObjStream].end())
1182 return nullptr;
1183
1184 syntax.RestorePos(offset + it->second);
1185 return syntax.GetObject(pObjList, 0, 0, true);
1186 }
1187
1188 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum) {
1189 auto it = m_ObjectStreamMap.find(objnum);
1190 if (it != m_ObjectStreamMap.end())
1191 return it->second.get();
1192
1193 if (!m_pDocument)
1194 return nullptr;
1195
1196 const CPDF_Stream* pStream = ToStream(m_pDocument->GetIndirectObject(objnum));
1197 if (!pStream)
1198 return nullptr;
1199
1200 CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc;
1201 pStreamAcc->LoadAllData(pStream);
1202 m_ObjectStreamMap[objnum].reset(pStreamAcc);
1203 return pStreamAcc;
1204 }
1205
1206 FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum) const {
1207 if (!IsValidObjectNumber(objnum))
1208 return 0;
1209
1210 if (GetObjectType(objnum) == 2)
1211 objnum = GetObjectPositionOrZero(objnum);
1212
1213 if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255)
1214 return 0;
1215
1216 FX_FILESIZE offset = GetObjectPositionOrZero(objnum);
1217 if (offset == 0)
1218 return 0;
1219
1220 auto it = m_SortedOffset.find(offset);
1221 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end())
1222 return 0;
1223
1224 return *it - offset;
1225 }
1226
1227 void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum,
1228 uint8_t*& pBuffer,
1229 FX_DWORD& size) {
1230 pBuffer = nullptr;
1231 size = 0;
1232 if (!IsValidObjectNumber(objnum))
1233 return;
1234
1235 if (GetObjectType(objnum) == 2) {
1236 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
1237 if (!pObjStream)
1238 return;
1239
1240 int32_t offset = GetStreamFirst(pObjStream);
1241 const uint8_t* pData = pObjStream->GetData();
1242 FX_DWORD totalsize = pObjStream->GetSize();
1243 ScopedFileStream file(
1244 FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE));
1245
1246 CPDF_SyntaxParser syntax;
1247 syntax.InitParser(file.get(), 0);
1248 for (int i = GetStreamNCount(pObjStream); i > 0; --i) {
1249 FX_DWORD thisnum = syntax.GetDirectNum();
1250 FX_DWORD thisoff = syntax.GetDirectNum();
1251 if (thisnum != objnum)
1252 continue;
1253
1254 if (i == 1) {
1255 size = totalsize - (thisoff + offset);
1256 } else {
1257 syntax.GetDirectNum(); // Skip nextnum.
1258 FX_DWORD nextoff = syntax.GetDirectNum();
1259 size = nextoff - thisoff;
1260 }
1261
1262 pBuffer = FX_Alloc(uint8_t, size);
1263 FXSYS_memcpy(pBuffer, pData + thisoff + offset, size);
1264 return;
1265 }
1266 return;
1267 }
1268
1269 if (GetObjectType(objnum) != 1)
1270 return;
1271
1272 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1273 if (pos == 0)
1274 return;
1275
1276 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1277 m_pSyntax->RestorePos(pos);
1278
1279 bool bIsNumber;
1280 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1281 if (!bIsNumber) {
1282 m_pSyntax->RestorePos(SavedPos);
1283 return;
1284 }
1285
1286 FX_DWORD parser_objnum = FXSYS_atoui(word);
1287 if (parser_objnum && parser_objnum != objnum) {
1288 m_pSyntax->RestorePos(SavedPos);
1289 return;
1290 }
1291
1292 word = m_pSyntax->GetNextWord(&bIsNumber);
1293 if (!bIsNumber) {
1294 m_pSyntax->RestorePos(SavedPos);
1295 return;
1296 }
1297
1298 if (m_pSyntax->GetKeyword() != "obj") {
1299 m_pSyntax->RestorePos(SavedPos);
1300 return;
1301 }
1302
1303 auto it = m_SortedOffset.find(pos);
1304 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) {
1305 m_pSyntax->RestorePos(SavedPos);
1306 return;
1307 }
1308
1309 FX_FILESIZE nextoff = *it;
1310 FX_BOOL bNextOffValid = FALSE;
1311 if (nextoff != pos) {
1312 m_pSyntax->RestorePos(nextoff);
1313 word = m_pSyntax->GetNextWord(&bIsNumber);
1314 if (word == "xref") {
1315 bNextOffValid = TRUE;
1316 } else if (bIsNumber) {
1317 word = m_pSyntax->GetNextWord(&bIsNumber);
1318 if (bIsNumber && m_pSyntax->GetKeyword() == "obj") {
1319 bNextOffValid = TRUE;
1320 }
1321 }
1322 }
1323
1324 if (!bNextOffValid) {
1325 m_pSyntax->RestorePos(pos);
1326 while (1) {
1327 if (m_pSyntax->GetKeyword() == "endobj")
1328 break;
1329
1330 if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen)
1331 break;
1332 }
1333 nextoff = m_pSyntax->SavePos();
1334 }
1335
1336 size = (FX_DWORD)(nextoff - pos);
1337 pBuffer = FX_Alloc(uint8_t, size);
1338 m_pSyntax->RestorePos(pos);
1339 m_pSyntax->ReadBlock(pBuffer, size);
1340 m_pSyntax->RestorePos(SavedPos);
1341 }
1342
1343 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(
1344 CPDF_IndirectObjectHolder* pObjList,
1345 FX_FILESIZE pos,
1346 FX_DWORD objnum) {
1347 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1348 m_pSyntax->RestorePos(pos);
1349 bool bIsNumber;
1350 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1351 if (!bIsNumber) {
1352 m_pSyntax->RestorePos(SavedPos);
1353 return nullptr;
1354 }
1355
1356 FX_FILESIZE objOffset = m_pSyntax->SavePos();
1357 objOffset -= word.GetLength();
1358 FX_DWORD parser_objnum = FXSYS_atoui(word);
1359 if (objnum && parser_objnum != objnum) {
1360 m_pSyntax->RestorePos(SavedPos);
1361 return nullptr;
1362 }
1363
1364 word = m_pSyntax->GetNextWord(&bIsNumber);
1365 if (!bIsNumber) {
1366 m_pSyntax->RestorePos(SavedPos);
1367 return nullptr;
1368 }
1369
1370 FX_DWORD parser_gennum = FXSYS_atoui(word);
1371 if (m_pSyntax->GetKeyword() != "obj") {
1372 m_pSyntax->RestorePos(SavedPos);
1373 return nullptr;
1374 }
1375
1376 CPDF_Object* pObj =
1377 m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true);
1378 m_pSyntax->SavePos();
1379
1380 CFX_ByteString bsWord = m_pSyntax->GetKeyword();
1381 if (bsWord == "endobj")
1382 m_pSyntax->SavePos();
1383
1384 m_pSyntax->RestorePos(SavedPos);
1385 if (pObj) {
1386 if (!objnum)
1387 pObj->m_ObjNum = parser_objnum;
1388 pObj->m_GenNum = parser_gennum;
1389 }
1390 return pObj;
1391 }
1392
1393 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(
1394 CPDF_IndirectObjectHolder* pObjList,
1395 FX_FILESIZE pos,
1396 FX_DWORD objnum,
1397 FX_FILESIZE* pResultPos) {
1398 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1399 m_pSyntax->RestorePos(pos);
1400
1401 bool bIsNumber;
1402 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1403 if (!bIsNumber) {
1404 m_pSyntax->RestorePos(SavedPos);
1405 return nullptr;
1406 }
1407
1408 FX_DWORD parser_objnum = FXSYS_atoui(word);
1409 if (objnum && parser_objnum != objnum) {
1410 m_pSyntax->RestorePos(SavedPos);
1411 return nullptr;
1412 }
1413
1414 word = m_pSyntax->GetNextWord(&bIsNumber);
1415 if (!bIsNumber) {
1416 m_pSyntax->RestorePos(SavedPos);
1417 return nullptr;
1418 }
1419
1420 FX_DWORD gennum = FXSYS_atoui(word);
1421 if (m_pSyntax->GetKeyword() != "obj") {
1422 m_pSyntax->RestorePos(SavedPos);
1423 return nullptr;
1424 }
1425
1426 CPDF_Object* pObj = m_pSyntax->GetObjectByStrict(pObjList, objnum, gennum);
1427 if (pResultPos)
1428 *pResultPos = m_pSyntax->m_Pos;
1429
1430 m_pSyntax->RestorePos(SavedPos);
1431 return pObj;
1432 }
1433
1434 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() {
1435 if (m_pSyntax->GetKeyword() != "trailer")
1436 return nullptr;
1437
1438 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(
1439 m_pSyntax->GetObject(m_pDocument, 0, 0, true));
1440 if (!ToDictionary(pObj.get()))
1441 return nullptr;
1442 return pObj.release()->AsDictionary();
1443 }
1444
1445 FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) {
1446 if (!m_pSecurityHandler)
1447 return (FX_DWORD)-1;
1448
1449 FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions();
1450 if (m_pEncryptDict && m_pEncryptDict->GetStringBy("Filter") == "Standard") {
1451 dwPermission &= 0xFFFFFFFC;
1452 dwPermission |= 0xFFFFF0C0;
1453 if (bCheckRevision && m_pEncryptDict->GetIntegerBy("R") == 2)
1454 dwPermission &= 0xFFFFF0FF;
1455 }
1456 return dwPermission;
1457 }
1458
1459 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess,
1460 FX_DWORD offset) {
1461 m_pSyntax->InitParser(pFileAccess, offset);
1462 m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9);
1463
1464 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1465 bool bIsNumber;
1466 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1467 if (!bIsNumber)
1468 return FALSE;
1469
1470 FX_DWORD objnum = FXSYS_atoui(word);
1471 word = m_pSyntax->GetNextWord(&bIsNumber);
1472 if (!bIsNumber)
1473 return FALSE;
1474
1475 FX_DWORD gennum = FXSYS_atoui(word);
1476 if (m_pSyntax->GetKeyword() != "obj") {
1477 m_pSyntax->RestorePos(SavedPos);
1478 return FALSE;
1479 }
1480
1481 m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true);
1482 if (!m_pLinearized)
1483 return FALSE;
1484
1485 CPDF_Dictionary* pDict = m_pLinearized->GetDict();
1486 if (pDict && pDict->GetElement("Linearized")) {
1487 m_pSyntax->GetNextWord(nullptr);
1488
1489 CPDF_Object* pLen = pDict->GetElement("L");
1490 if (!pLen) {
1491 m_pLinearized->Release();
1492 m_pLinearized = nullptr;
1493 return FALSE;
1494 }
1495
1496 if (pLen->GetInteger() != (int)pFileAccess->GetSize())
1497 return FALSE;
1498
1499 if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P")))
1500 m_dwFirstPageNo = pNo->GetInteger();
1501
1502 if (CPDF_Number* pTable = ToNumber(pDict->GetElement("T")))
1503 m_LastXRefOffset = pTable->GetInteger();
1504
1505 return TRUE;
1506 }
1507 m_pLinearized->Release();
1508 m_pLinearized = nullptr;
1509 return FALSE;
1510 }
1511
1512 CPDF_Parser::Error CPDF_Parser::StartAsyncParse(IFX_FileRead* pFileAccess) {
1513 CloseParser();
1514 m_bXRefStream = FALSE;
1515 m_LastXRefOffset = 0;
1516 m_bOwnFileRead = true;
1517
1518 int32_t offset = GetHeaderOffset(pFileAccess);
1519 if (offset == -1)
1520 return FORMAT_ERROR;
1521
1522 if (!IsLinearizedFile(pFileAccess, offset)) {
1523 m_pSyntax->m_pFileAccess = nullptr;
1524 return StartParse(pFileAccess);
1525 }
1526
1527 m_pDocument = new CPDF_Document(this);
1528 FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos();
1529
1530 FX_BOOL bXRefRebuilt = FALSE;
1531 FX_BOOL bLoadV4 = FALSE;
1532 if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) &&
1533 !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) {
1534 if (!RebuildCrossRef())
1535 return FORMAT_ERROR;
1536
1537 bXRefRebuilt = TRUE;
1538 m_LastXRefOffset = 0;
1539 }
1540
1541 if (bLoadV4) {
1542 m_pTrailer = LoadTrailerV4();
1543 if (!m_pTrailer)
1544 return SUCCESS;
1545
1546 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
1547 if (xrefsize > 0)
1548 ShrinkObjectMap(xrefsize);
1549 }
1550
1551 Error eRet = SetEncryptHandler();
1552 if (eRet != SUCCESS)
1553 return eRet;
1554
1555 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
1556 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
1557 if (bXRefRebuilt)
1558 return FORMAT_ERROR;
1559
1560 ReleaseEncryptHandler();
1561 if (!RebuildCrossRef())
1562 return FORMAT_ERROR;
1563
1564 eRet = SetEncryptHandler();
1565 if (eRet != SUCCESS)
1566 return eRet;
1567
1568 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
1569 if (!m_pDocument->GetRoot())
1570 return FORMAT_ERROR;
1571 }
1572
1573 if (GetRootObjNum() == 0) {
1574 ReleaseEncryptHandler();
1575 if (!RebuildCrossRef() || GetRootObjNum() == 0)
1576 return FORMAT_ERROR;
1577
1578 eRet = SetEncryptHandler();
1579 if (eRet != SUCCESS)
1580 return eRet;
1581 }
1582
1583 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1584 if (CPDF_Reference* pMetadata =
1585 ToReference(m_pDocument->GetRoot()->GetElement("Metadata")))
1586 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
1587 }
1588 return SUCCESS;
1589 }
1590
1591 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
1592 if (!LoadCrossRefV5(&xrefpos, FALSE))
1593 return FALSE;
1594
1595 std::set<FX_FILESIZE> seen_xrefpos;
1596 while (xrefpos) {
1597 seen_xrefpos.insert(xrefpos);
1598 if (!LoadCrossRefV5(&xrefpos, FALSE))
1599 return FALSE;
1600
1601 // Check for circular references.
1602 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
1603 return FALSE;
1604 }
1605 m_ObjectStreamMap.clear();
1606 m_bXRefStream = TRUE;
1607 return TRUE;
1608 }
1609
1610 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1611 FX_DWORD dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum;
1612 m_pSyntax->m_MetadataObjnum = 0;
1613 if (m_pTrailer) {
1614 m_pTrailer->Release();
1615 m_pTrailer = nullptr;
1616 }
1617
1618 m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset);
1619 uint8_t ch = 0;
1620 FX_DWORD dwCount = 0;
1621 m_pSyntax->GetNextChar(ch);
1622 while (PDFCharIsWhitespace(ch)) {
1623 ++dwCount;
1624 if (m_pSyntax->m_FileLen >=
1625 (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) {
1626 break;
1627 }
1628 m_pSyntax->GetNextChar(ch);
1629 }
1630 m_LastXRefOffset += dwCount;
1631 m_ObjectStreamMap.clear();
1632 m_ObjCache.clear();
1633
1634 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&
1635 !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
1636 m_LastXRefOffset = 0;
1637 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
1638 return FORMAT_ERROR;
1639 }
1640
1641 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
1642 return SUCCESS;
1643 }
OLDNEW
« no previous file with comments | « core/src/fpdfapi/fpdf_parser/cpdf_document.cpp ('k') | core/src/fpdfapi/fpdf_parser/cpdf_simple_parser.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698