Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(190)

Side by Side Diff: core/fpdfapi/fpdf_parser/cpdf_parser.cpp

Issue 2392603004: Move core/fpdfapi/fpdf_parser to core/fpdfapi/parser (Closed)
Patch Set: Rebase to master Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/fpdf_parser/cpdf_parser.h"
8
9 #include <vector>
10
11 #include "core/fpdfapi/fpdf_parser/cpdf_array.h"
12 #include "core/fpdfapi/fpdf_parser/cpdf_crypto_handler.h"
13 #include "core/fpdfapi/fpdf_parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/fpdf_parser/cpdf_document.h"
15 #include "core/fpdfapi/fpdf_parser/cpdf_number.h"
16 #include "core/fpdfapi/fpdf_parser/cpdf_reference.h"
17 #include "core/fpdfapi/fpdf_parser/cpdf_security_handler.h"
18 #include "core/fpdfapi/fpdf_parser/cpdf_stream.h"
19 #include "core/fpdfapi/fpdf_parser/cpdf_stream_acc.h"
20 #include "core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
21 #include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h"
22 #include "core/fxcrt/fx_ext.h"
23 #include "core/fxcrt/fx_safe_types.h"
24 #include "third_party/base/stl_util.h"
25
26 namespace {
27
28 // A limit on the size of the xref table. Theoretical limits are higher, but
29 // this may be large enough in practice.
30 const int32_t kMaxXRefSize = 1048576;
31
32 uint32_t GetVarInt(const uint8_t* p, int32_t n) {
33 uint32_t result = 0;
34 for (int32_t i = 0; i < n; ++i)
35 result = result * 256 + p[i];
36 return result;
37 }
38
39 int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) {
40 return pObjStream->GetDict()->GetIntegerFor("N");
41 }
42
43 int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) {
44 return pObjStream->GetDict()->GetIntegerFor("First");
45 }
46
47 } // namespace
48
49 CPDF_Parser::CPDF_Parser()
50 : m_pDocument(nullptr),
51 m_bHasParsed(false),
52 m_bOwnFileRead(true),
53 m_FileVersion(0),
54 m_pTrailer(nullptr),
55 m_pEncryptDict(nullptr),
56 m_bVersionUpdated(false),
57 m_pLinearized(nullptr),
58 m_dwFirstPageNo(0),
59 m_dwXrefStartObjNum(0) {
60 m_pSyntax.reset(new CPDF_SyntaxParser);
61 }
62
63 CPDF_Parser::~CPDF_Parser() {
64 if (m_pTrailer)
65 m_pTrailer->Release();
66
67 ReleaseEncryptHandler();
68 SetEncryptDictionary(nullptr);
69
70 if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) {
71 m_pSyntax->m_pFileAccess->Release();
72 m_pSyntax->m_pFileAccess = nullptr;
73 }
74
75 int32_t iLen = m_Trailers.GetSize();
76 for (int32_t i = 0; i < iLen; ++i) {
77 if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i))
78 trailer->Release();
79 }
80
81 if (m_pLinearized)
82 m_pLinearized->Release();
83 }
84
85 uint32_t CPDF_Parser::GetLastObjNum() const {
86 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
87 }
88
89 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const {
90 return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first;
91 }
92
93 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const {
94 auto it = m_ObjectInfo.find(objnum);
95 return it != m_ObjectInfo.end() ? it->second.pos : 0;
96 }
97
98 uint8_t CPDF_Parser::GetObjectType(uint32_t objnum) const {
99 ASSERT(IsValidObjectNumber(objnum));
100 auto it = m_ObjectInfo.find(objnum);
101 return it != m_ObjectInfo.end() ? it->second.type : 0;
102 }
103
104 uint16_t CPDF_Parser::GetObjectGenNum(uint32_t objnum) const {
105 ASSERT(IsValidObjectNumber(objnum));
106 auto it = m_ObjectInfo.find(objnum);
107 return it != m_ObjectInfo.end() ? it->second.gennum : 0;
108 }
109
110 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const {
111 uint8_t type = GetObjectType(objnum);
112 return type == 0 || type == 255;
113 }
114
115 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
116 m_pEncryptDict = pDict;
117 }
118
119 CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() {
120 return m_pSyntax->m_pCryptoHandler.get();
121 }
122
123 IFX_FileRead* CPDF_Parser::GetFileAccess() const {
124 return m_pSyntax->m_pFileAccess;
125 }
126
127 void CPDF_Parser::ShrinkObjectMap(uint32_t objnum) {
128 if (objnum == 0) {
129 m_ObjectInfo.clear();
130 return;
131 }
132
133 auto it = m_ObjectInfo.lower_bound(objnum);
134 while (it != m_ObjectInfo.end()) {
135 auto saved_it = it++;
136 m_ObjectInfo.erase(saved_it);
137 }
138
139 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
140 m_ObjectInfo[objnum - 1].pos = 0;
141 }
142
143 CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess,
144 CPDF_Document* pDocument) {
145 ASSERT(!m_bHasParsed);
146 m_bHasParsed = true;
147
148 m_bXRefStream = FALSE;
149 m_LastXRefOffset = 0;
150 m_bOwnFileRead = true;
151
152 int32_t offset = GetHeaderOffset(pFileAccess);
153 if (offset == -1) {
154 if (pFileAccess)
155 pFileAccess->Release();
156 return FORMAT_ERROR;
157 }
158 m_pSyntax->InitParser(pFileAccess, offset);
159
160 uint8_t ch;
161 if (!m_pSyntax->GetCharAt(5, ch))
162 return FORMAT_ERROR;
163 if (std::isdigit(ch))
164 m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10;
165
166 if (!m_pSyntax->GetCharAt(7, ch))
167 return FORMAT_ERROR;
168 if (std::isdigit(ch))
169 m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
170
171 if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9)
172 return FORMAT_ERROR;
173
174 m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9);
175 m_pDocument = pDocument;
176
177 FX_BOOL bXRefRebuilt = FALSE;
178 if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) {
179 m_SortedOffset.insert(m_pSyntax->SavePos());
180 m_pSyntax->GetKeyword();
181
182 bool bNumber;
183 CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber);
184 if (!bNumber)
185 return FORMAT_ERROR;
186
187 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str.c_str());
188 if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
189 !LoadAllCrossRefV5(m_LastXRefOffset)) {
190 if (!RebuildCrossRef())
191 return FORMAT_ERROR;
192
193 bXRefRebuilt = TRUE;
194 m_LastXRefOffset = 0;
195 }
196 } else {
197 if (!RebuildCrossRef())
198 return FORMAT_ERROR;
199
200 bXRefRebuilt = TRUE;
201 }
202 Error eRet = SetEncryptHandler();
203 if (eRet != SUCCESS)
204 return eRet;
205
206 m_pDocument->LoadDoc();
207 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
208 if (bXRefRebuilt)
209 return FORMAT_ERROR;
210
211 ReleaseEncryptHandler();
212 if (!RebuildCrossRef())
213 return FORMAT_ERROR;
214
215 eRet = SetEncryptHandler();
216 if (eRet != SUCCESS)
217 return eRet;
218
219 m_pDocument->LoadDoc();
220 if (!m_pDocument->GetRoot())
221 return FORMAT_ERROR;
222 }
223 if (GetRootObjNum() == 0) {
224 ReleaseEncryptHandler();
225 if (!RebuildCrossRef() || GetRootObjNum() == 0)
226 return FORMAT_ERROR;
227
228 eRet = SetEncryptHandler();
229 if (eRet != SUCCESS)
230 return eRet;
231 }
232 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
233 CPDF_Reference* pMetadata =
234 ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata"));
235 if (pMetadata)
236 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
237 }
238 return SUCCESS;
239 }
240 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
241 ReleaseEncryptHandler();
242 SetEncryptDictionary(nullptr);
243
244 if (!m_pTrailer)
245 return FORMAT_ERROR;
246
247 CPDF_Object* pEncryptObj = m_pTrailer->GetObjectFor("Encrypt");
248 if (pEncryptObj) {
249 if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) {
250 SetEncryptDictionary(pEncryptDict);
251 } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) {
252 pEncryptObj = m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum());
253 if (pEncryptObj)
254 SetEncryptDictionary(pEncryptObj->GetDict());
255 }
256 }
257
258 if (m_pEncryptDict) {
259 CFX_ByteString filter = m_pEncryptDict->GetStringFor("Filter");
260 std::unique_ptr<CPDF_SecurityHandler> pSecurityHandler;
261 Error err = HANDLER_ERROR;
262 if (filter == "Standard") {
263 pSecurityHandler.reset(new CPDF_SecurityHandler);
264 err = PASSWORD_ERROR;
265 }
266 if (!pSecurityHandler)
267 return HANDLER_ERROR;
268
269 if (!pSecurityHandler->OnInit(this, m_pEncryptDict))
270 return err;
271
272 m_pSecurityHandler = std::move(pSecurityHandler);
273 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(
274 m_pSecurityHandler->CreateCryptoHandler());
275 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get()))
276 return HANDLER_ERROR;
277 m_pSyntax->SetEncrypt(std::move(pCryptoHandler));
278 }
279 return SUCCESS;
280 }
281
282 void CPDF_Parser::ReleaseEncryptHandler() {
283 m_pSyntax->m_pCryptoHandler.reset();
284 m_pSecurityHandler.reset();
285 }
286
287 FX_FILESIZE CPDF_Parser::GetObjectOffset(uint32_t objnum) const {
288 if (!IsValidObjectNumber(objnum))
289 return 0;
290
291 if (GetObjectType(objnum) == 1)
292 return GetObjectPositionOrZero(objnum);
293
294 if (GetObjectType(objnum) == 2) {
295 FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
296 return GetObjectPositionOrZero(pos);
297 }
298 return 0;
299 }
300
301 // Ideally, all the cross reference entries should be verified.
302 // In reality, we rarely see well-formed cross references don't match
303 // with the objects. crbug/602650 showed a case where object numbers
304 // in the cross reference table are all off by one.
305 bool CPDF_Parser::VerifyCrossRefV4() {
306 for (const auto& it : m_ObjectInfo) {
307 if (it.second.pos == 0)
308 continue;
309 // Find the first non-zero position.
310 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
311 m_pSyntax->RestorePos(it.second.pos);
312 bool is_num = false;
313 CFX_ByteString num_str = m_pSyntax->GetNextWord(&is_num);
314 m_pSyntax->RestorePos(SavedPos);
315 if (!is_num || num_str.IsEmpty() ||
316 FXSYS_atoui(num_str.c_str()) != it.first) {
317 // If the object number read doesn't match the one stored,
318 // something is wrong with the cross reference table.
319 return false;
320 } else {
321 return true;
322 }
323 }
324 return true;
325 }
326
327 FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
328 if (!LoadCrossRefV4(xrefpos, 0, TRUE))
329 return FALSE;
330
331 m_pTrailer = LoadTrailerV4();
332 if (!m_pTrailer)
333 return FALSE;
334
335 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
336 if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
337 ShrinkObjectMap(xrefsize);
338
339 std::vector<FX_FILESIZE> CrossRefList;
340 std::vector<FX_FILESIZE> XRefStreamList;
341 std::set<FX_FILESIZE> seen_xrefpos;
342
343 CrossRefList.push_back(xrefpos);
344 XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm"));
345 seen_xrefpos.insert(xrefpos);
346
347 // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not
348 // numerical, GetDirectInteger() returns 0. Loading will end.
349 xrefpos = GetDirectInteger(m_pTrailer, "Prev");
350 while (xrefpos) {
351 // Check for circular references.
352 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
353 return FALSE;
354
355 seen_xrefpos.insert(xrefpos);
356
357 // SLOW ...
358 CrossRefList.insert(CrossRefList.begin(), xrefpos);
359 LoadCrossRefV4(xrefpos, 0, TRUE);
360
361 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
362 LoadTrailerV4());
363 if (!pDict)
364 return FALSE;
365
366 xrefpos = GetDirectInteger(pDict.get(), "Prev");
367
368 // SLOW ...
369 XRefStreamList.insert(XRefStreamList.begin(),
370 pDict->GetIntegerFor("XRefStm"));
371 m_Trailers.Add(pDict.release());
372 }
373
374 for (size_t i = 0; i < CrossRefList.size(); ++i) {
375 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
376 return FALSE;
377 if (i == 0 && !VerifyCrossRefV4())
378 return FALSE;
379 }
380 return TRUE;
381 }
382
383 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
384 uint32_t dwObjCount) {
385 if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount))
386 return FALSE;
387
388 m_pTrailer = LoadTrailerV4();
389 if (!m_pTrailer)
390 return FALSE;
391
392 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
393 if (xrefsize == 0)
394 return FALSE;
395
396 std::vector<FX_FILESIZE> CrossRefList;
397 std::vector<FX_FILESIZE> XRefStreamList;
398 std::set<FX_FILESIZE> seen_xrefpos;
399
400 CrossRefList.push_back(xrefpos);
401 XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm"));
402 seen_xrefpos.insert(xrefpos);
403
404 xrefpos = GetDirectInteger(m_pTrailer, "Prev");
405 while (xrefpos) {
406 // Check for circular references.
407 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
408 return FALSE;
409
410 seen_xrefpos.insert(xrefpos);
411
412 // SLOW ...
413 CrossRefList.insert(CrossRefList.begin(), xrefpos);
414 LoadCrossRefV4(xrefpos, 0, TRUE);
415
416 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
417 LoadTrailerV4());
418 if (!pDict)
419 return FALSE;
420
421 xrefpos = GetDirectInteger(pDict.get(), "Prev");
422
423 // SLOW ...
424 XRefStreamList.insert(XRefStreamList.begin(),
425 pDict->GetIntegerFor("XRefStm"));
426 m_Trailers.Add(pDict.release());
427 }
428
429 for (size_t i = 1; i < CrossRefList.size(); ++i) {
430 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
431 return FALSE;
432 }
433 return TRUE;
434 }
435
436 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
437 uint32_t dwObjCount) {
438 FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset;
439
440 m_pSyntax->RestorePos(dwStartPos);
441 m_SortedOffset.insert(pos);
442
443 uint32_t start_objnum = 0;
444 uint32_t count = dwObjCount;
445 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
446
447 const int32_t recordsize = 20;
448 std::vector<char> buf(1024 * recordsize + 1);
449 buf[1024 * recordsize] = '\0';
450
451 int32_t nBlocks = count / 1024 + 1;
452 for (int32_t block = 0; block < nBlocks; block++) {
453 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
454 uint32_t dwReadSize = block_size * recordsize;
455 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen)
456 return FALSE;
457
458 if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
459 dwReadSize)) {
460 return FALSE;
461 }
462
463 for (int32_t i = 0; i < block_size; i++) {
464 uint32_t objnum = start_objnum + block * 1024 + i;
465 char* pEntry = &buf[i * recordsize];
466 if (pEntry[17] == 'f') {
467 m_ObjectInfo[objnum].pos = 0;
468 m_ObjectInfo[objnum].type = 0;
469 } else {
470 int32_t offset = FXSYS_atoi(pEntry);
471 if (offset == 0) {
472 for (int32_t c = 0; c < 10; c++) {
473 if (!std::isdigit(pEntry[c]))
474 return FALSE;
475 }
476 }
477
478 m_ObjectInfo[objnum].pos = offset;
479 int32_t version = FXSYS_atoi(pEntry + 11);
480 if (version >= 1)
481 m_bVersionUpdated = true;
482
483 m_ObjectInfo[objnum].gennum = version;
484 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
485 m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
486
487 m_ObjectInfo[objnum].type = 1;
488 }
489 }
490 }
491 m_pSyntax->RestorePos(SavedPos + count * recordsize);
492 return TRUE;
493 }
494
495 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
496 FX_FILESIZE streampos,
497 FX_BOOL bSkip) {
498 m_pSyntax->RestorePos(pos);
499 if (m_pSyntax->GetKeyword() != "xref")
500 return false;
501
502 m_SortedOffset.insert(pos);
503 if (streampos)
504 m_SortedOffset.insert(streampos);
505
506 while (1) {
507 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
508 bool bIsNumber;
509 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
510 if (word.IsEmpty())
511 return false;
512
513 if (!bIsNumber) {
514 m_pSyntax->RestorePos(SavedPos);
515 break;
516 }
517
518 uint32_t start_objnum = FXSYS_atoui(word.c_str());
519 if (start_objnum >= kMaxObjectNumber)
520 return false;
521
522 uint32_t count = m_pSyntax->GetDirectNum();
523 m_pSyntax->ToNextWord();
524 SavedPos = m_pSyntax->SavePos();
525 const int32_t recordsize = 20;
526
527 m_dwXrefStartObjNum = start_objnum;
528 if (!bSkip) {
529 std::vector<char> buf(1024 * recordsize + 1);
530 buf[1024 * recordsize] = '\0';
531
532 int32_t nBlocks = count / 1024 + 1;
533 for (int32_t block = 0; block < nBlocks; block++) {
534 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
535 m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
536 block_size * recordsize);
537
538 for (int32_t i = 0; i < block_size; i++) {
539 uint32_t objnum = start_objnum + block * 1024 + i;
540 char* pEntry = &buf[i * recordsize];
541 if (pEntry[17] == 'f') {
542 m_ObjectInfo[objnum].pos = 0;
543 m_ObjectInfo[objnum].type = 0;
544 } else {
545 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
546 if (offset == 0) {
547 for (int32_t c = 0; c < 10; c++) {
548 if (!std::isdigit(pEntry[c]))
549 return false;
550 }
551 }
552
553 m_ObjectInfo[objnum].pos = offset;
554 int32_t version = FXSYS_atoi(pEntry + 11);
555 if (version >= 1)
556 m_bVersionUpdated = true;
557
558 m_ObjectInfo[objnum].gennum = version;
559 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
560 m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
561
562 m_ObjectInfo[objnum].type = 1;
563 }
564 }
565 }
566 }
567 m_pSyntax->RestorePos(SavedPos + count * recordsize);
568 }
569 return !streampos || LoadCrossRefV5(&streampos, FALSE);
570 }
571
572 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
573 if (!LoadCrossRefV5(&xrefpos, TRUE))
574 return FALSE;
575
576 std::set<FX_FILESIZE> seen_xrefpos;
577 while (xrefpos) {
578 seen_xrefpos.insert(xrefpos);
579 if (!LoadCrossRefV5(&xrefpos, FALSE))
580 return FALSE;
581
582 // Check for circular references.
583 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
584 return FALSE;
585 }
586 m_ObjectStreamMap.clear();
587 m_bXRefStream = TRUE;
588 return TRUE;
589 }
590
591 FX_BOOL CPDF_Parser::RebuildCrossRef() {
592 m_ObjectInfo.clear();
593 m_SortedOffset.clear();
594 if (m_pTrailer) {
595 m_pTrailer->Release();
596 m_pTrailer = nullptr;
597 }
598
599 ParserState state = ParserState::kDefault;
600
601 int32_t inside_index = 0;
602 uint32_t objnum = 0;
603 uint32_t gennum = 0;
604 int32_t depth = 0;
605
606 const uint32_t kBufferSize = 4096;
607 std::vector<uint8_t> buffer(kBufferSize);
608
609 FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;
610 FX_FILESIZE start_pos = 0;
611 FX_FILESIZE start_pos1 = 0;
612 FX_FILESIZE last_obj = -1;
613 FX_FILESIZE last_xref = -1;
614 FX_FILESIZE last_trailer = -1;
615
616 while (pos < m_pSyntax->m_FileLen) {
617 const FX_FILESIZE saved_pos = pos;
618 bool bOverFlow = false;
619 uint32_t size =
620 std::min((uint32_t)(m_pSyntax->m_FileLen - pos), kBufferSize);
621 if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size))
622 break;
623
624 for (uint32_t i = 0; i < size; i++) {
625 uint8_t byte = buffer[i];
626 switch (state) {
627 case ParserState::kDefault:
628 if (PDFCharIsWhitespace(byte)) {
629 state = ParserState::kWhitespace;
630 } else if (std::isdigit(byte)) {
631 --i;
632 state = ParserState::kWhitespace;
633 } else if (byte == '%') {
634 inside_index = 0;
635 state = ParserState::kComment;
636 } else if (byte == '(') {
637 state = ParserState::kString;
638 depth = 1;
639 } else if (byte == '<') {
640 inside_index = 1;
641 state = ParserState::kHexString;
642 } else if (byte == '\\') {
643 state = ParserState::kEscapedString;
644 } else if (byte == 't') {
645 state = ParserState::kTrailer;
646 inside_index = 1;
647 }
648 break;
649
650 case ParserState::kWhitespace:
651 if (std::isdigit(byte)) {
652 start_pos = pos + i;
653 state = ParserState::kObjNum;
654 objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
655 } else if (byte == 't') {
656 state = ParserState::kTrailer;
657 inside_index = 1;
658 } else if (byte == 'x') {
659 state = ParserState::kXref;
660 inside_index = 1;
661 } else if (!PDFCharIsWhitespace(byte)) {
662 --i;
663 state = ParserState::kDefault;
664 }
665 break;
666
667 case ParserState::kObjNum:
668 if (std::isdigit(byte)) {
669 objnum =
670 objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
671 } else if (PDFCharIsWhitespace(byte)) {
672 state = ParserState::kPostObjNum;
673 } else {
674 --i;
675 state = ParserState::kEndObj;
676 inside_index = 0;
677 }
678 break;
679
680 case ParserState::kPostObjNum:
681 if (std::isdigit(byte)) {
682 start_pos1 = pos + i;
683 state = ParserState::kGenNum;
684 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
685 } else if (byte == 't') {
686 state = ParserState::kTrailer;
687 inside_index = 1;
688 } else if (!PDFCharIsWhitespace(byte)) {
689 --i;
690 state = ParserState::kDefault;
691 }
692 break;
693
694 case ParserState::kGenNum:
695 if (std::isdigit(byte)) {
696 gennum =
697 gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
698 } else if (PDFCharIsWhitespace(byte)) {
699 state = ParserState::kPostGenNum;
700 } else {
701 --i;
702 state = ParserState::kDefault;
703 }
704 break;
705
706 case ParserState::kPostGenNum:
707 if (byte == 'o') {
708 state = ParserState::kBeginObj;
709 inside_index = 1;
710 } else if (std::isdigit(byte)) {
711 objnum = gennum;
712 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
713 start_pos = start_pos1;
714 start_pos1 = pos + i;
715 state = ParserState::kGenNum;
716 } else if (byte == 't') {
717 state = ParserState::kTrailer;
718 inside_index = 1;
719 } else if (!PDFCharIsWhitespace(byte)) {
720 --i;
721 state = ParserState::kDefault;
722 }
723 break;
724
725 case ParserState::kBeginObj:
726 switch (inside_index) {
727 case 1:
728 if (byte != 'b') {
729 --i;
730 state = ParserState::kDefault;
731 } else {
732 inside_index++;
733 }
734 break;
735 case 2:
736 if (byte != 'j') {
737 --i;
738 state = ParserState::kDefault;
739 } else {
740 inside_index++;
741 }
742 break;
743 case 3:
744 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
745 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;
746 m_SortedOffset.insert(obj_pos);
747 last_obj = start_pos;
748 FX_FILESIZE obj_end = 0;
749 CPDF_Object* pObject = ParseIndirectObjectAtByStrict(
750 m_pDocument, obj_pos, objnum, &obj_end);
751 if (CPDF_Stream* pStream = ToStream(pObject)) {
752 if (CPDF_Dictionary* pDict = pStream->GetDict()) {
753 if ((pDict->KeyExist("Type")) &&
754 (pDict->GetStringFor("Type") == "XRef" &&
755 pDict->KeyExist("Size"))) {
756 CPDF_Object* pRoot = pDict->GetObjectFor("Root");
757 if (pRoot && pRoot->GetDict() &&
758 pRoot->GetDict()->GetObjectFor("Pages")) {
759 if (m_pTrailer)
760 m_pTrailer->Release();
761 m_pTrailer = ToDictionary(pDict->Clone());
762 }
763 }
764 }
765 }
766
767 FX_FILESIZE offset = 0;
768 m_pSyntax->RestorePos(obj_pos);
769 offset = m_pSyntax->FindTag("obj", 0);
770 if (offset == -1)
771 offset = 0;
772 else
773 offset += 3;
774
775 FX_FILESIZE nLen = obj_end - obj_pos - offset;
776 if ((uint32_t)nLen > size - i) {
777 pos = obj_end + m_pSyntax->m_HeaderOffset;
778 bOverFlow = true;
779 } else {
780 i += (uint32_t)nLen;
781 }
782
783 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
784 m_ObjectInfo[objnum].pos) {
785 if (pObject) {
786 uint32_t oldgen = GetObjectGenNum(objnum);
787 m_ObjectInfo[objnum].pos = obj_pos;
788 m_ObjectInfo[objnum].gennum = gennum;
789 if (oldgen != gennum)
790 m_bVersionUpdated = true;
791 }
792 } else {
793 m_ObjectInfo[objnum].pos = obj_pos;
794 m_ObjectInfo[objnum].type = 1;
795 m_ObjectInfo[objnum].gennum = gennum;
796 }
797
798 if (pObject)
799 pObject->Release();
800 }
801 --i;
802 state = ParserState::kDefault;
803 break;
804 }
805 break;
806
807 case ParserState::kTrailer:
808 if (inside_index == 7) {
809 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
810 last_trailer = pos + i - 7;
811 m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset);
812
813 CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true);
814 if (pObj) {
815 if (!pObj->IsDictionary() && !pObj->AsStream()) {
816 pObj->Release();
817 } else {
818 CPDF_Stream* pStream = pObj->AsStream();
819 if (CPDF_Dictionary* pTrailer =
820 pStream ? pStream->GetDict() : pObj->AsDictionary()) {
821 if (m_pTrailer) {
822 CPDF_Object* pRoot = pTrailer->GetObjectFor("Root");
823 CPDF_Reference* pRef = ToReference(pRoot);
824 if (!pRoot ||
825 (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
826 m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
827 auto it = pTrailer->begin();
828 while (it != pTrailer->end()) {
829 const CFX_ByteString& key = it->first;
830 CPDF_Object* pElement = it->second;
831 ++it;
832 uint32_t dwObjNum =
833 pElement ? pElement->GetObjNum() : 0;
834 if (dwObjNum) {
835 m_pTrailer->SetReferenceFor(key, m_pDocument,
836 dwObjNum);
837 } else {
838 m_pTrailer->SetFor(key, pElement->Clone());
839 }
840 }
841 }
842 pObj->Release();
843 } else {
844 if (pObj->IsStream()) {
845 m_pTrailer = ToDictionary(pTrailer->Clone());
846 pObj->Release();
847 } else {
848 m_pTrailer = pTrailer;
849 }
850
851 FX_FILESIZE dwSavePos = m_pSyntax->SavePos();
852 CFX_ByteString strWord = m_pSyntax->GetKeyword();
853 if (!strWord.Compare("startxref")) {
854 bool bNumber;
855 CFX_ByteString bsOffset =
856 m_pSyntax->GetNextWord(&bNumber);
857 if (bNumber)
858 m_LastXRefOffset = FXSYS_atoi(bsOffset.c_str());
859 }
860 m_pSyntax->RestorePos(dwSavePos);
861 }
862 } else {
863 pObj->Release();
864 }
865 }
866 }
867 }
868 --i;
869 state = ParserState::kDefault;
870 } else if (byte == "trailer"[inside_index]) {
871 inside_index++;
872 } else {
873 --i;
874 state = ParserState::kDefault;
875 }
876 break;
877
878 case ParserState::kXref:
879 if (inside_index == 4) {
880 last_xref = pos + i - 4;
881 state = ParserState::kWhitespace;
882 } else if (byte == "xref"[inside_index]) {
883 inside_index++;
884 } else {
885 --i;
886 state = ParserState::kDefault;
887 }
888 break;
889
890 case ParserState::kComment:
891 if (PDFCharIsLineEnding(byte))
892 state = ParserState::kDefault;
893 break;
894
895 case ParserState::kString:
896 if (byte == ')') {
897 if (depth > 0)
898 depth--;
899 } else if (byte == '(') {
900 depth++;
901 }
902
903 if (!depth)
904 state = ParserState::kDefault;
905 break;
906
907 case ParserState::kHexString:
908 if (byte == '>' || (byte == '<' && inside_index == 1))
909 state = ParserState::kDefault;
910 inside_index = 0;
911 break;
912
913 case ParserState::kEscapedString:
914 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
915 --i;
916 state = ParserState::kDefault;
917 }
918 break;
919
920 case ParserState::kEndObj:
921 if (PDFCharIsWhitespace(byte)) {
922 state = ParserState::kDefault;
923 } else if (byte == '%' || byte == '(' || byte == '<' ||
924 byte == '\\') {
925 state = ParserState::kDefault;
926 --i;
927 } else if (inside_index == 6) {
928 state = ParserState::kDefault;
929 --i;
930 } else if (byte == "endobj"[inside_index]) {
931 inside_index++;
932 }
933 break;
934 }
935
936 if (bOverFlow) {
937 size = 0;
938 break;
939 }
940 }
941 pos += size;
942
943 // If the position has not changed at all or went backwards in a loop
944 // iteration, then break out to prevent infinite looping.
945 if (pos <= saved_pos)
946 break;
947 }
948
949 if (last_xref != -1 && last_xref > last_obj)
950 last_trailer = last_xref;
951 else if (last_trailer == -1 || last_xref < last_obj)
952 last_trailer = m_pSyntax->m_FileLen;
953
954 m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset);
955 return m_pTrailer && !m_ObjectInfo.empty();
956 }
957
958 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) {
959 CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0);
960 if (!pObject)
961 return FALSE;
962
963 if (m_pDocument) {
964 CPDF_Dictionary* pRootDict = m_pDocument->GetRoot();
965 if (pRootDict && pRootDict->GetObjNum() == pObject->m_ObjNum) {
966 // If |pObject| has an objnum assigned then this will leak as Release()
967 // will early exit.
968 if (pObject->IsStream())
969 pObject->Release();
970 return FALSE;
971 }
972 if (!m_pDocument->ReplaceIndirectObjectIfHigherGeneration(pObject->m_ObjNum,
973 pObject)) {
974 return FALSE;
975 }
976 }
977
978 CPDF_Stream* pStream = pObject->AsStream();
979 if (!pStream)
980 return FALSE;
981
982 CPDF_Dictionary* pDict = pStream->GetDict();
983 *pos = pDict->GetIntegerFor("Prev");
984 int32_t size = pDict->GetIntegerFor("Size");
985 if (size < 0) {
986 pStream->Release();
987 return FALSE;
988 }
989
990 CPDF_Dictionary* pNewTrailer = ToDictionary(pDict->Clone());
991 if (bMainXRef) {
992 m_pTrailer = pNewTrailer;
993 ShrinkObjectMap(size);
994 for (auto& it : m_ObjectInfo)
995 it.second.type = 0;
996 } else {
997 m_Trailers.Add(pNewTrailer);
998 }
999
1000 std::vector<std::pair<int32_t, int32_t>> arrIndex;
1001 CPDF_Array* pArray = pDict->GetArrayFor("Index");
1002 if (pArray) {
1003 for (size_t i = 0; i < pArray->GetCount() / 2; i++) {
1004 CPDF_Object* pStartNumObj = pArray->GetObjectAt(i * 2);
1005 CPDF_Object* pCountObj = pArray->GetObjectAt(i * 2 + 1);
1006
1007 if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
1008 int nStartNum = pStartNumObj->GetInteger();
1009 int nCount = pCountObj->GetInteger();
1010 if (nStartNum >= 0 && nCount > 0)
1011 arrIndex.push_back(std::make_pair(nStartNum, nCount));
1012 }
1013 }
1014 }
1015
1016 if (arrIndex.size() == 0)
1017 arrIndex.push_back(std::make_pair(0, size));
1018
1019 pArray = pDict->GetArrayFor("W");
1020 if (!pArray) {
1021 pStream->Release();
1022 return FALSE;
1023 }
1024
1025 CFX_ArrayTemplate<uint32_t> WidthArray;
1026 FX_SAFE_UINT32 dwAccWidth = 0;
1027 for (size_t i = 0; i < pArray->GetCount(); ++i) {
1028 WidthArray.Add(pArray->GetIntegerAt(i));
1029 dwAccWidth += WidthArray[i];
1030 }
1031
1032 if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) {
1033 pStream->Release();
1034 return FALSE;
1035 }
1036
1037 uint32_t totalWidth = dwAccWidth.ValueOrDie();
1038 CPDF_StreamAcc acc;
1039 acc.LoadAllData(pStream);
1040
1041 const uint8_t* pData = acc.GetData();
1042 uint32_t dwTotalSize = acc.GetSize();
1043 uint32_t segindex = 0;
1044 for (uint32_t i = 0; i < arrIndex.size(); i++) {
1045 int32_t startnum = arrIndex[i].first;
1046 if (startnum < 0)
1047 continue;
1048
1049 m_dwXrefStartObjNum =
1050 pdfium::base::checked_cast<uint32_t, int32_t>(startnum);
1051 uint32_t count =
1052 pdfium::base::checked_cast<uint32_t, int32_t>(arrIndex[i].second);
1053 FX_SAFE_UINT32 dwCaculatedSize = segindex;
1054 dwCaculatedSize += count;
1055 dwCaculatedSize *= totalWidth;
1056 if (!dwCaculatedSize.IsValid() ||
1057 dwCaculatedSize.ValueOrDie() > dwTotalSize) {
1058 continue;
1059 }
1060
1061 const uint8_t* segstart = pData + segindex * totalWidth;
1062 FX_SAFE_UINT32 dwMaxObjNum = startnum;
1063 dwMaxObjNum += count;
1064 uint32_t dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1;
1065 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
1066 continue;
1067
1068 for (uint32_t j = 0; j < count; j++) {
1069 int32_t type = 1;
1070 const uint8_t* entrystart = segstart + j * totalWidth;
1071 if (WidthArray[0])
1072 type = GetVarInt(entrystart, WidthArray[0]);
1073
1074 if (GetObjectType(startnum + j) == 255) {
1075 FX_FILESIZE offset =
1076 GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1077 m_ObjectInfo[startnum + j].pos = offset;
1078 m_SortedOffset.insert(offset);
1079 continue;
1080 }
1081
1082 if (GetObjectType(startnum + j))
1083 continue;
1084
1085 m_ObjectInfo[startnum + j].type = type;
1086 if (type == 0) {
1087 m_ObjectInfo[startnum + j].pos = 0;
1088 } else {
1089 FX_FILESIZE offset =
1090 GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1091 m_ObjectInfo[startnum + j].pos = offset;
1092 if (type == 1) {
1093 m_SortedOffset.insert(offset);
1094 } else {
1095 if (offset < 0 || !IsValidObjectNumber(offset)) {
1096 pStream->Release();
1097 return FALSE;
1098 }
1099 m_ObjectInfo[offset].type = 255;
1100 }
1101 }
1102 }
1103 segindex += count;
1104 }
1105 pStream->Release();
1106 return TRUE;
1107 }
1108
1109 CPDF_Array* CPDF_Parser::GetIDArray() {
1110 CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetObjectFor("ID") : nullptr;
1111 if (!pID)
1112 return nullptr;
1113
1114 if (CPDF_Reference* pRef = pID->AsReference()) {
1115 pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum());
1116 m_pTrailer->SetFor("ID", pID);
1117 }
1118 return ToArray(pID);
1119 }
1120
1121 uint32_t CPDF_Parser::GetRootObjNum() {
1122 CPDF_Reference* pRef =
1123 ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Root") : nullptr);
1124 return pRef ? pRef->GetRefObjNum() : 0;
1125 }
1126
1127 uint32_t CPDF_Parser::GetInfoObjNum() {
1128 CPDF_Reference* pRef =
1129 ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Info") : nullptr);
1130 return pRef ? pRef->GetRefObjNum() : 0;
1131 }
1132
1133 CPDF_Object* CPDF_Parser::ParseIndirectObject(
1134 CPDF_IndirectObjectHolder* pObjList,
1135 uint32_t objnum) {
1136 if (!IsValidObjectNumber(objnum))
1137 return nullptr;
1138
1139 // Prevent circular parsing the same object.
1140 if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
1141 return nullptr;
1142
1143 pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
1144 if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) {
1145 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1146 if (pos <= 0)
1147 return nullptr;
1148 return ParseIndirectObjectAt(pObjList, pos, objnum);
1149 }
1150 if (GetObjectType(objnum) != 2)
1151 return nullptr;
1152
1153 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
1154 if (!pObjStream)
1155 return nullptr;
1156
1157 ScopedFileStream file(FX_CreateMemoryStream(
1158 (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE));
1159 CPDF_SyntaxParser syntax;
1160 syntax.InitParser(file.get(), 0);
1161 const int32_t offset = GetStreamFirst(pObjStream);
1162
1163 // Read object numbers from |pObjStream| into a cache.
1164 if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
1165 for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
1166 uint32_t thisnum = syntax.GetDirectNum();
1167 uint32_t thisoff = syntax.GetDirectNum();
1168 m_ObjCache[pObjStream][thisnum] = thisoff;
1169 }
1170 }
1171
1172 const auto it = m_ObjCache[pObjStream].find(objnum);
1173 if (it == m_ObjCache[pObjStream].end())
1174 return nullptr;
1175
1176 syntax.RestorePos(offset + it->second);
1177 return syntax.GetObject(pObjList, 0, 0, true);
1178 }
1179
1180 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(uint32_t objnum) {
1181 auto it = m_ObjectStreamMap.find(objnum);
1182 if (it != m_ObjectStreamMap.end())
1183 return it->second.get();
1184
1185 if (!m_pDocument)
1186 return nullptr;
1187
1188 const CPDF_Stream* pStream =
1189 ToStream(m_pDocument->GetOrParseIndirectObject(objnum));
1190 if (!pStream)
1191 return nullptr;
1192
1193 CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc;
1194 pStreamAcc->LoadAllData(pStream);
1195 m_ObjectStreamMap[objnum].reset(pStreamAcc);
1196 return pStreamAcc;
1197 }
1198
1199 FX_FILESIZE CPDF_Parser::GetObjectSize(uint32_t objnum) const {
1200 if (!IsValidObjectNumber(objnum))
1201 return 0;
1202
1203 if (GetObjectType(objnum) == 2)
1204 objnum = GetObjectPositionOrZero(objnum);
1205
1206 if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255)
1207 return 0;
1208
1209 FX_FILESIZE offset = GetObjectPositionOrZero(objnum);
1210 if (offset == 0)
1211 return 0;
1212
1213 auto it = m_SortedOffset.find(offset);
1214 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end())
1215 return 0;
1216
1217 return *it - offset;
1218 }
1219
1220 void CPDF_Parser::GetIndirectBinary(uint32_t objnum,
1221 uint8_t*& pBuffer,
1222 uint32_t& size) {
1223 pBuffer = nullptr;
1224 size = 0;
1225 if (!IsValidObjectNumber(objnum))
1226 return;
1227
1228 if (GetObjectType(objnum) == 2) {
1229 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
1230 if (!pObjStream)
1231 return;
1232
1233 int32_t offset = GetStreamFirst(pObjStream);
1234 const uint8_t* pData = pObjStream->GetData();
1235 uint32_t totalsize = pObjStream->GetSize();
1236 ScopedFileStream file(
1237 FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE));
1238
1239 CPDF_SyntaxParser syntax;
1240 syntax.InitParser(file.get(), 0);
1241 for (int i = GetStreamNCount(pObjStream); i > 0; --i) {
1242 uint32_t thisnum = syntax.GetDirectNum();
1243 uint32_t thisoff = syntax.GetDirectNum();
1244 if (thisnum != objnum)
1245 continue;
1246
1247 if (i == 1) {
1248 size = totalsize - (thisoff + offset);
1249 } else {
1250 syntax.GetDirectNum(); // Skip nextnum.
1251 uint32_t nextoff = syntax.GetDirectNum();
1252 size = nextoff - thisoff;
1253 }
1254
1255 pBuffer = FX_Alloc(uint8_t, size);
1256 FXSYS_memcpy(pBuffer, pData + thisoff + offset, size);
1257 return;
1258 }
1259 return;
1260 }
1261
1262 if (GetObjectType(objnum) != 1)
1263 return;
1264
1265 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1266 if (pos == 0)
1267 return;
1268
1269 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1270 m_pSyntax->RestorePos(pos);
1271
1272 bool bIsNumber;
1273 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1274 if (!bIsNumber) {
1275 m_pSyntax->RestorePos(SavedPos);
1276 return;
1277 }
1278
1279 uint32_t parser_objnum = FXSYS_atoui(word.c_str());
1280 if (parser_objnum && parser_objnum != objnum) {
1281 m_pSyntax->RestorePos(SavedPos);
1282 return;
1283 }
1284
1285 word = m_pSyntax->GetNextWord(&bIsNumber);
1286 if (!bIsNumber) {
1287 m_pSyntax->RestorePos(SavedPos);
1288 return;
1289 }
1290
1291 if (m_pSyntax->GetKeyword() != "obj") {
1292 m_pSyntax->RestorePos(SavedPos);
1293 return;
1294 }
1295
1296 auto it = m_SortedOffset.find(pos);
1297 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) {
1298 m_pSyntax->RestorePos(SavedPos);
1299 return;
1300 }
1301
1302 FX_FILESIZE nextoff = *it;
1303 FX_BOOL bNextOffValid = FALSE;
1304 if (nextoff != pos) {
1305 m_pSyntax->RestorePos(nextoff);
1306 word = m_pSyntax->GetNextWord(&bIsNumber);
1307 if (word == "xref") {
1308 bNextOffValid = TRUE;
1309 } else if (bIsNumber) {
1310 word = m_pSyntax->GetNextWord(&bIsNumber);
1311 if (bIsNumber && m_pSyntax->GetKeyword() == "obj") {
1312 bNextOffValid = TRUE;
1313 }
1314 }
1315 }
1316
1317 if (!bNextOffValid) {
1318 m_pSyntax->RestorePos(pos);
1319 while (1) {
1320 if (m_pSyntax->GetKeyword() == "endobj")
1321 break;
1322
1323 if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen)
1324 break;
1325 }
1326 nextoff = m_pSyntax->SavePos();
1327 }
1328
1329 size = (uint32_t)(nextoff - pos);
1330 pBuffer = FX_Alloc(uint8_t, size);
1331 m_pSyntax->RestorePos(pos);
1332 m_pSyntax->ReadBlock(pBuffer, size);
1333 m_pSyntax->RestorePos(SavedPos);
1334 }
1335
1336 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(
1337 CPDF_IndirectObjectHolder* pObjList,
1338 FX_FILESIZE pos,
1339 uint32_t objnum) {
1340 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1341 m_pSyntax->RestorePos(pos);
1342 bool bIsNumber;
1343 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1344 if (!bIsNumber) {
1345 m_pSyntax->RestorePos(SavedPos);
1346 return nullptr;
1347 }
1348
1349 FX_FILESIZE objOffset = m_pSyntax->SavePos();
1350 objOffset -= word.GetLength();
1351 uint32_t parser_objnum = FXSYS_atoui(word.c_str());
1352 if (objnum && parser_objnum != objnum) {
1353 m_pSyntax->RestorePos(SavedPos);
1354 return nullptr;
1355 }
1356
1357 word = m_pSyntax->GetNextWord(&bIsNumber);
1358 if (!bIsNumber) {
1359 m_pSyntax->RestorePos(SavedPos);
1360 return nullptr;
1361 }
1362
1363 uint32_t parser_gennum = FXSYS_atoui(word.c_str());
1364 if (m_pSyntax->GetKeyword() != "obj") {
1365 m_pSyntax->RestorePos(SavedPos);
1366 return nullptr;
1367 }
1368
1369 CPDF_Object* pObj =
1370 m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true);
1371 m_pSyntax->SavePos();
1372
1373 CFX_ByteString bsWord = m_pSyntax->GetKeyword();
1374 if (bsWord == "endobj")
1375 m_pSyntax->SavePos();
1376
1377 m_pSyntax->RestorePos(SavedPos);
1378 if (pObj) {
1379 if (!objnum)
1380 pObj->m_ObjNum = parser_objnum;
1381 pObj->m_GenNum = parser_gennum;
1382 }
1383 return pObj;
1384 }
1385
1386 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(
1387 CPDF_IndirectObjectHolder* pObjList,
1388 FX_FILESIZE pos,
1389 uint32_t objnum,
1390 FX_FILESIZE* pResultPos) {
1391 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1392 m_pSyntax->RestorePos(pos);
1393
1394 bool bIsNumber;
1395 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1396 if (!bIsNumber) {
1397 m_pSyntax->RestorePos(SavedPos);
1398 return nullptr;
1399 }
1400
1401 uint32_t parser_objnum = FXSYS_atoui(word.c_str());
1402 if (objnum && parser_objnum != objnum) {
1403 m_pSyntax->RestorePos(SavedPos);
1404 return nullptr;
1405 }
1406
1407 word = m_pSyntax->GetNextWord(&bIsNumber);
1408 if (!bIsNumber) {
1409 m_pSyntax->RestorePos(SavedPos);
1410 return nullptr;
1411 }
1412
1413 uint32_t gennum = FXSYS_atoui(word.c_str());
1414 if (m_pSyntax->GetKeyword() != "obj") {
1415 m_pSyntax->RestorePos(SavedPos);
1416 return nullptr;
1417 }
1418
1419 CPDF_Object* pObj = m_pSyntax->GetObjectForStrict(pObjList, objnum, gennum);
1420 if (pResultPos)
1421 *pResultPos = m_pSyntax->m_Pos;
1422
1423 m_pSyntax->RestorePos(SavedPos);
1424 return pObj;
1425 }
1426
1427 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() {
1428 if (m_pSyntax->GetKeyword() != "trailer")
1429 return nullptr;
1430
1431 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(
1432 m_pSyntax->GetObject(m_pDocument, 0, 0, true));
1433 if (!ToDictionary(pObj.get()))
1434 return nullptr;
1435 return pObj.release()->AsDictionary();
1436 }
1437
1438 uint32_t CPDF_Parser::GetPermissions() const {
1439 if (!m_pSecurityHandler)
1440 return 0xFFFFFFFF;
1441
1442 uint32_t dwPermission = m_pSecurityHandler->GetPermissions();
1443 if (m_pEncryptDict && m_pEncryptDict->GetStringFor("Filter") == "Standard") {
1444 // See PDF Reference 1.7, page 123, table 3.20.
1445 dwPermission &= 0xFFFFFFFC;
1446 dwPermission |= 0xFFFFF0C0;
1447 }
1448 return dwPermission;
1449 }
1450
1451 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess,
1452 uint32_t offset) {
1453 m_pSyntax->InitParser(pFileAccess, offset);
1454 m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9);
1455
1456 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1457 bool bIsNumber;
1458 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1459 if (!bIsNumber)
1460 return FALSE;
1461
1462 uint32_t objnum = FXSYS_atoui(word.c_str());
1463 word = m_pSyntax->GetNextWord(&bIsNumber);
1464 if (!bIsNumber)
1465 return FALSE;
1466
1467 uint32_t gennum = FXSYS_atoui(word.c_str());
1468 if (m_pSyntax->GetKeyword() != "obj") {
1469 m_pSyntax->RestorePos(SavedPos);
1470 return FALSE;
1471 }
1472
1473 m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true);
1474 if (!m_pLinearized)
1475 return FALSE;
1476
1477 CPDF_Dictionary* pDict = m_pLinearized->GetDict();
1478 if (pDict && pDict->GetObjectFor("Linearized")) {
1479 m_pSyntax->GetNextWord(nullptr);
1480
1481 CPDF_Object* pLen = pDict->GetObjectFor("L");
1482 if (!pLen) {
1483 m_pLinearized->Release();
1484 m_pLinearized = nullptr;
1485 return FALSE;
1486 }
1487
1488 if (pLen->GetInteger() != (int)pFileAccess->GetSize())
1489 return FALSE;
1490
1491 if (CPDF_Number* pNo = ToNumber(pDict->GetObjectFor("P")))
1492 m_dwFirstPageNo = pNo->GetInteger();
1493
1494 if (CPDF_Number* pTable = ToNumber(pDict->GetObjectFor("T")))
1495 m_LastXRefOffset = pTable->GetInteger();
1496
1497 return TRUE;
1498 }
1499 m_pLinearized->Release();
1500 m_pLinearized = nullptr;
1501 return FALSE;
1502 }
1503
1504 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(IFX_FileRead* pFileAccess,
1505 CPDF_Document* pDocument) {
1506 ASSERT(!m_bHasParsed);
1507
1508 m_bXRefStream = FALSE;
1509 m_LastXRefOffset = 0;
1510 m_bOwnFileRead = true;
1511
1512 int32_t offset = GetHeaderOffset(pFileAccess);
1513 if (offset == -1)
1514 return FORMAT_ERROR;
1515
1516 if (!IsLinearizedFile(pFileAccess, offset)) {
1517 m_pSyntax->m_pFileAccess = nullptr;
1518 return StartParse(pFileAccess, std::move(pDocument));
1519 }
1520 m_bHasParsed = true;
1521 m_pDocument = pDocument;
1522
1523 FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos();
1524
1525 FX_BOOL bXRefRebuilt = FALSE;
1526 FX_BOOL bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE);
1527 if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) {
1528 if (!RebuildCrossRef())
1529 return FORMAT_ERROR;
1530
1531 bXRefRebuilt = TRUE;
1532 m_LastXRefOffset = 0;
1533 }
1534
1535 if (bLoadV4) {
1536 m_pTrailer = LoadTrailerV4();
1537 if (!m_pTrailer)
1538 return SUCCESS;
1539
1540 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
1541 if (xrefsize > 0)
1542 ShrinkObjectMap(xrefsize);
1543 }
1544
1545 Error eRet = SetEncryptHandler();
1546 if (eRet != SUCCESS)
1547 return eRet;
1548
1549 m_pDocument->LoadLinearizedDoc(m_pLinearized->GetDict());
1550 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
1551 if (bXRefRebuilt)
1552 return FORMAT_ERROR;
1553
1554 ReleaseEncryptHandler();
1555 if (!RebuildCrossRef())
1556 return FORMAT_ERROR;
1557
1558 eRet = SetEncryptHandler();
1559 if (eRet != SUCCESS)
1560 return eRet;
1561
1562 m_pDocument->LoadLinearizedDoc(m_pLinearized->GetDict());
1563 if (!m_pDocument->GetRoot())
1564 return FORMAT_ERROR;
1565 }
1566
1567 if (GetRootObjNum() == 0) {
1568 ReleaseEncryptHandler();
1569 if (!RebuildCrossRef() || GetRootObjNum() == 0)
1570 return FORMAT_ERROR;
1571
1572 eRet = SetEncryptHandler();
1573 if (eRet != SUCCESS)
1574 return eRet;
1575 }
1576
1577 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1578 if (CPDF_Reference* pMetadata =
1579 ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata")))
1580 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
1581 }
1582 return SUCCESS;
1583 }
1584
1585 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
1586 if (!LoadCrossRefV5(&xrefpos, FALSE))
1587 return FALSE;
1588
1589 std::set<FX_FILESIZE> seen_xrefpos;
1590 while (xrefpos) {
1591 seen_xrefpos.insert(xrefpos);
1592 if (!LoadCrossRefV5(&xrefpos, FALSE))
1593 return FALSE;
1594
1595 // Check for circular references.
1596 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
1597 return FALSE;
1598 }
1599 m_ObjectStreamMap.clear();
1600 m_bXRefStream = TRUE;
1601 return TRUE;
1602 }
1603
1604 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1605 uint32_t dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum;
1606 m_pSyntax->m_MetadataObjnum = 0;
1607 if (m_pTrailer) {
1608 m_pTrailer->Release();
1609 m_pTrailer = nullptr;
1610 }
1611
1612 m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset);
1613 uint8_t ch = 0;
1614 uint32_t dwCount = 0;
1615 m_pSyntax->GetNextChar(ch);
1616 while (PDFCharIsWhitespace(ch)) {
1617 ++dwCount;
1618 if (m_pSyntax->m_FileLen >=
1619 (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) {
1620 break;
1621 }
1622 m_pSyntax->GetNextChar(ch);
1623 }
1624 m_LastXRefOffset += dwCount;
1625 m_ObjectStreamMap.clear();
1626 m_ObjCache.clear();
1627
1628 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&
1629 !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
1630 m_LastXRefOffset = 0;
1631 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
1632 return FORMAT_ERROR;
1633 }
1634
1635 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
1636 return SUCCESS;
1637 }
OLDNEW
« no previous file with comments | « core/fpdfapi/fpdf_parser/cpdf_parser.h ('k') | core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698