Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(117)

Side by Side Diff: core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp

Issue 1773103003: Split off CPDF_Parser and CPDF_SimpleParser into .h/.cpp files (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Then address C#3. Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/include/fpdfapi/fpdf_parser.h" 7 #include "core/include/fpdfapi/fpdf_parser.h"
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <memory> 10 #include <memory>
11 #include <set> 11 #include <set>
12 #include <utility> 12 #include <utility>
13 #include <vector> 13 #include <vector>
14 14
15 #include "core/include/fpdfapi/cpdf_document.h" 15 #include "core/include/fpdfapi/cpdf_document.h"
16 #include "core/include/fpdfapi/cpdf_parser.h"
16 #include "core/include/fpdfapi/fpdf_module.h" 17 #include "core/include/fpdfapi/fpdf_module.h"
17 #include "core/include/fpdfapi/fpdf_page.h" 18 #include "core/include/fpdfapi/fpdf_page.h"
18 #include "core/include/fxcrt/fx_ext.h" 19 #include "core/include/fxcrt/fx_ext.h"
19 #include "core/include/fxcrt/fx_safe_types.h" 20 #include "core/include/fxcrt/fx_safe_types.h"
20 #include "core/src/fpdfapi/fpdf_page/pageint.h" 21 #include "core/src/fpdfapi/fpdf_page/pageint.h"
21 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" 22 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
23 #include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h"
22 #include "core/src/fpdfapi/fpdf_parser/parser_int.h" 24 #include "core/src/fpdfapi/fpdf_parser/parser_int.h"
23 #include "third_party/base/stl_util.h" 25 #include "third_party/base/stl_util.h"
24 26
25 namespace { 27 namespace {
26 28
27 // A limit on the size of the xref table. Theoretical limits are higher, but
28 // this may be large enough in practice.
29 const int32_t kMaxXRefSize = 1048576;
30
31 // A limit on the maximum object number in the xref table. Theoretical limits
32 // are higher, but this may be large enough in practice.
33 const FX_DWORD kMaxObjectNumber = 1048576;
34
35 int32_t GetHeaderOffset(IFX_FileRead* pFile) {
36 // TODO(dsinclair): This is a complicated way of saying %PDF, simplify?
37 const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
38
39 const size_t kBufSize = 4;
40 uint8_t buf[kBufSize];
41 int32_t offset = 0;
42 while (offset <= 1024) {
43 if (!pFile->ReadBlock(buf, offset, kBufSize))
44 return -1;
45
46 if (*(FX_DWORD*)buf == tag)
47 return offset;
48
49 ++offset;
50 }
51 return -1;
52 }
53
54 int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) {
55 CPDF_Number* pObj = ToNumber(pDict->GetElement(key));
56 return pObj ? pObj->GetInteger() : 0;
57 }
58
59 FX_DWORD GetVarInt(const uint8_t* p, int32_t n) {
60 FX_DWORD result = 0;
61 for (int32_t i = 0; i < n; ++i)
62 result = result * 256 + p[i];
63 return result;
64 }
65
66 int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) {
67 return pObjStream->GetDict()->GetIntegerBy("N");
68 }
69
70 int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) {
71 return pObjStream->GetDict()->GetIntegerBy("First");
72 }
73
74 bool CanReadFromBitStream(const CFX_BitStream* hStream, 29 bool CanReadFromBitStream(const CFX_BitStream* hStream,
75 const FX_SAFE_DWORD& num_bits) { 30 const FX_SAFE_DWORD& num_bits) {
76 return (num_bits.IsValid() && 31 return num_bits.IsValid() &&
77 hStream->BitsRemaining() >= num_bits.ValueOrDie()); 32 hStream->BitsRemaining() >= num_bits.ValueOrDie();
78 } 33 }
79 34
80 } // namespace 35 } // namespace
81 36
82 bool IsSignatureDict(const CPDF_Dictionary* pDict) { 37 bool IsSignatureDict(const CPDF_Dictionary* pDict) {
83 CPDF_Object* pType = pDict->GetElementValue("Type"); 38 CPDF_Object* pType = pDict->GetElementValue("Type");
84 if (!pType) 39 if (!pType)
85 pType = pDict->GetElementValue("FT"); 40 pType = pDict->GetElementValue("FT");
86 return pType && pType->GetString() == "Sig"; 41 return pType && pType->GetString() == "Sig";
87 } 42 }
88 43
89 CPDF_Parser::CPDF_Parser()
90 : m_pDocument(nullptr),
91 m_bOwnFileRead(true),
92 m_FileVersion(0),
93 m_pTrailer(nullptr),
94 m_pEncryptDict(nullptr),
95 m_pLinearized(nullptr),
96 m_dwFirstPageNo(0),
97 m_dwXrefStartObjNum(0) {
98 m_pSyntax.reset(new CPDF_SyntaxParser);
99 }
100
101 CPDF_Parser::~CPDF_Parser() {
102 CloseParser();
103 }
104
105 FX_DWORD CPDF_Parser::GetLastObjNum() const {
106 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
107 }
108
109 bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const {
110 return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first;
111 }
112
113 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(FX_DWORD objnum) const {
114 auto it = m_ObjectInfo.find(objnum);
115 return it != m_ObjectInfo.end() ? it->second.pos : 0;
116 }
117
118 uint8_t CPDF_Parser::GetObjectType(FX_DWORD objnum) const {
119 ASSERT(IsValidObjectNumber(objnum));
120 auto it = m_ObjectInfo.find(objnum);
121 return it != m_ObjectInfo.end() ? it->second.type : 0;
122 }
123
124 uint16_t CPDF_Parser::GetObjectGenNum(FX_DWORD objnum) const {
125 ASSERT(IsValidObjectNumber(objnum));
126 auto it = m_ObjectInfo.find(objnum);
127 return it != m_ObjectInfo.end() ? it->second.gennum : 0;
128 }
129
130 bool CPDF_Parser::IsObjectFreeOrNull(FX_DWORD objnum) const {
131 uint8_t type = GetObjectType(objnum);
132 return type == 0 || type == 255;
133 }
134
135 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
136 m_pEncryptDict = pDict;
137 }
138
139 CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() {
140 return m_pSyntax->m_pCryptoHandler.get();
141 }
142
143 IFX_FileRead* CPDF_Parser::GetFileAccess() const {
144 return m_pSyntax->m_pFileAccess;
145 }
146
147 void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) {
148 if (objnum == 0) {
149 m_ObjectInfo.clear();
150 return;
151 }
152
153 auto it = m_ObjectInfo.lower_bound(objnum);
154 while (it != m_ObjectInfo.end()) {
155 auto saved_it = it++;
156 m_ObjectInfo.erase(saved_it);
157 }
158
159 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
160 m_ObjectInfo[objnum - 1].pos = 0;
161 }
162
163 void CPDF_Parser::CloseParser() {
164 m_bVersionUpdated = FALSE;
165 delete m_pDocument;
166 m_pDocument = nullptr;
167
168 if (m_pTrailer) {
169 m_pTrailer->Release();
170 m_pTrailer = nullptr;
171 }
172 ReleaseEncryptHandler();
173 SetEncryptDictionary(nullptr);
174
175 if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) {
176 m_pSyntax->m_pFileAccess->Release();
177 m_pSyntax->m_pFileAccess = nullptr;
178 }
179
180 m_ObjectStreamMap.clear();
181 m_ObjCache.clear();
182 m_SortedOffset.clear();
183 m_ObjectInfo.clear();
184
185 int32_t iLen = m_Trailers.GetSize();
186 for (int32_t i = 0; i < iLen; ++i) {
187 if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i))
188 trailer->Release();
189 }
190 m_Trailers.RemoveAll();
191
192 if (m_pLinearized) {
193 m_pLinearized->Release();
194 m_pLinearized = nullptr;
195 }
196 }
197
198 CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess) {
199 CloseParser();
200
201 m_bXRefStream = FALSE;
202 m_LastXRefOffset = 0;
203 m_bOwnFileRead = true;
204
205 int32_t offset = GetHeaderOffset(pFileAccess);
206 if (offset == -1) {
207 if (pFileAccess)
208 pFileAccess->Release();
209 return FORMAT_ERROR;
210 }
211 m_pSyntax->InitParser(pFileAccess, offset);
212
213 uint8_t ch;
214 if (!m_pSyntax->GetCharAt(5, ch))
215 return FORMAT_ERROR;
216 if (std::isdigit(ch))
217 m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10;
218
219 if (!m_pSyntax->GetCharAt(7, ch))
220 return FORMAT_ERROR;
221 if (std::isdigit(ch))
222 m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
223
224 if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9)
225 return FORMAT_ERROR;
226
227 m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9);
228 m_pDocument = new CPDF_Document(this);
229
230 FX_BOOL bXRefRebuilt = FALSE;
231 if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) {
232 m_SortedOffset.insert(m_pSyntax->SavePos());
233 m_pSyntax->GetKeyword();
234
235 bool bNumber;
236 CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber);
237 if (!bNumber)
238 return FORMAT_ERROR;
239
240 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
241 if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
242 !LoadAllCrossRefV5(m_LastXRefOffset)) {
243 if (!RebuildCrossRef())
244 return FORMAT_ERROR;
245
246 bXRefRebuilt = TRUE;
247 m_LastXRefOffset = 0;
248 }
249 } else {
250 if (!RebuildCrossRef())
251 return FORMAT_ERROR;
252
253 bXRefRebuilt = TRUE;
254 }
255 Error eRet = SetEncryptHandler();
256 if (eRet != SUCCESS)
257 return eRet;
258
259 m_pDocument->LoadDoc();
260 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
261 if (bXRefRebuilt)
262 return FORMAT_ERROR;
263
264 ReleaseEncryptHandler();
265 if (!RebuildCrossRef())
266 return FORMAT_ERROR;
267
268 eRet = SetEncryptHandler();
269 if (eRet != SUCCESS)
270 return eRet;
271
272 m_pDocument->LoadDoc();
273 if (!m_pDocument->GetRoot())
274 return FORMAT_ERROR;
275 }
276 if (GetRootObjNum() == 0) {
277 ReleaseEncryptHandler();
278 if (!RebuildCrossRef() || GetRootObjNum() == 0)
279 return FORMAT_ERROR;
280
281 eRet = SetEncryptHandler();
282 if (eRet != SUCCESS)
283 return eRet;
284 }
285 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
286 CPDF_Reference* pMetadata =
287 ToReference(m_pDocument->GetRoot()->GetElement("Metadata"));
288 if (pMetadata)
289 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
290 }
291 return SUCCESS;
292 }
293 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
294 ReleaseEncryptHandler();
295 SetEncryptDictionary(nullptr);
296
297 if (!m_pTrailer)
298 return FORMAT_ERROR;
299
300 CPDF_Object* pEncryptObj = m_pTrailer->GetElement("Encrypt");
301 if (pEncryptObj) {
302 if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) {
303 SetEncryptDictionary(pEncryptDict);
304 } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) {
305 pEncryptObj = m_pDocument->GetIndirectObject(pRef->GetRefObjNum());
306 if (pEncryptObj)
307 SetEncryptDictionary(pEncryptObj->GetDict());
308 }
309 }
310
311 if (m_pEncryptDict) {
312 CFX_ByteString filter = m_pEncryptDict->GetStringBy("Filter");
313 std::unique_ptr<IPDF_SecurityHandler> pSecurityHandler;
314 Error err = HANDLER_ERROR;
315 if (filter == "Standard") {
316 pSecurityHandler.reset(new CPDF_StandardSecurityHandler);
317 err = PASSWORD_ERROR;
318 }
319 if (!pSecurityHandler)
320 return HANDLER_ERROR;
321
322 if (!pSecurityHandler->OnInit(this, m_pEncryptDict))
323 return err;
324
325 m_pSecurityHandler = std::move(pSecurityHandler);
326 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(
327 m_pSecurityHandler->CreateCryptoHandler());
328 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get()))
329 return HANDLER_ERROR;
330 m_pSyntax->SetEncrypt(std::move(pCryptoHandler));
331 }
332 return SUCCESS;
333 }
334
335 void CPDF_Parser::ReleaseEncryptHandler() {
336 m_pSyntax->m_pCryptoHandler.reset();
337 m_pSecurityHandler.reset();
338 }
339
340 FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const {
341 if (!IsValidObjectNumber(objnum))
342 return 0;
343
344 if (GetObjectType(objnum) == 1)
345 return GetObjectPositionOrZero(objnum);
346
347 if (GetObjectType(objnum) == 2) {
348 FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
349 return GetObjectPositionOrZero(pos);
350 }
351 return 0;
352 }
353
354 FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
355 if (!LoadCrossRefV4(xrefpos, 0, TRUE))
356 return FALSE;
357
358 m_pTrailer = LoadTrailerV4();
359 if (!m_pTrailer)
360 return FALSE;
361
362 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
363 if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
364 ShrinkObjectMap(xrefsize);
365
366 std::vector<FX_FILESIZE> CrossRefList;
367 std::vector<FX_FILESIZE> XRefStreamList;
368 std::set<FX_FILESIZE> seen_xrefpos;
369
370 CrossRefList.push_back(xrefpos);
371 XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm"));
372 seen_xrefpos.insert(xrefpos);
373
374 // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not
375 // numerical, GetDirectInteger() returns 0. Loading will end.
376 xrefpos = GetDirectInteger(m_pTrailer, "Prev");
377 while (xrefpos) {
378 // Check for circular references.
379 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
380 return FALSE;
381
382 seen_xrefpos.insert(xrefpos);
383
384 // SLOW ...
385 CrossRefList.insert(CrossRefList.begin(), xrefpos);
386 LoadCrossRefV4(xrefpos, 0, TRUE);
387
388 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
389 LoadTrailerV4());
390 if (!pDict)
391 return FALSE;
392
393 xrefpos = GetDirectInteger(pDict.get(), "Prev");
394
395 // SLOW ...
396 XRefStreamList.insert(XRefStreamList.begin(),
397 pDict->GetIntegerBy("XRefStm"));
398 m_Trailers.Add(pDict.release());
399 }
400
401 for (size_t i = 0; i < CrossRefList.size(); ++i) {
402 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
403 return FALSE;
404 }
405 return TRUE;
406 }
407
408 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
409 FX_DWORD dwObjCount) {
410 if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount))
411 return FALSE;
412
413 m_pTrailer = LoadTrailerV4();
414 if (!m_pTrailer)
415 return FALSE;
416
417 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
418 if (xrefsize == 0)
419 return FALSE;
420
421 std::vector<FX_FILESIZE> CrossRefList;
422 std::vector<FX_FILESIZE> XRefStreamList;
423 std::set<FX_FILESIZE> seen_xrefpos;
424
425 CrossRefList.push_back(xrefpos);
426 XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm"));
427 seen_xrefpos.insert(xrefpos);
428
429 xrefpos = GetDirectInteger(m_pTrailer, "Prev");
430 while (xrefpos) {
431 // Check for circular references.
432 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
433 return FALSE;
434
435 seen_xrefpos.insert(xrefpos);
436
437 // SLOW ...
438 CrossRefList.insert(CrossRefList.begin(), xrefpos);
439 LoadCrossRefV4(xrefpos, 0, TRUE);
440
441 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
442 LoadTrailerV4());
443 if (!pDict)
444 return FALSE;
445
446 xrefpos = GetDirectInteger(pDict.get(), "Prev");
447
448 // SLOW ...
449 XRefStreamList.insert(XRefStreamList.begin(),
450 pDict->GetIntegerBy("XRefStm"));
451 m_Trailers.Add(pDict.release());
452 }
453
454 for (size_t i = 1; i < CrossRefList.size(); ++i) {
455 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
456 return FALSE;
457 }
458 return TRUE;
459 }
460
461 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
462 FX_DWORD dwObjCount) {
463 FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset;
464
465 m_pSyntax->RestorePos(dwStartPos);
466 m_SortedOffset.insert(pos);
467
468 FX_DWORD start_objnum = 0;
469 FX_DWORD count = dwObjCount;
470 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
471
472 const int32_t recordsize = 20;
473 std::vector<char> buf(1024 * recordsize + 1);
474 buf[1024 * recordsize] = '\0';
475
476 int32_t nBlocks = count / 1024 + 1;
477 for (int32_t block = 0; block < nBlocks; block++) {
478 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
479 FX_DWORD dwReadSize = block_size * recordsize;
480 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen)
481 return FALSE;
482
483 if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
484 dwReadSize)) {
485 return FALSE;
486 }
487
488 for (int32_t i = 0; i < block_size; i++) {
489 FX_DWORD objnum = start_objnum + block * 1024 + i;
490 char* pEntry = &buf[i * recordsize];
491 if (pEntry[17] == 'f') {
492 m_ObjectInfo[objnum].pos = 0;
493 m_ObjectInfo[objnum].type = 0;
494 } else {
495 int32_t offset = FXSYS_atoi(pEntry);
496 if (offset == 0) {
497 for (int32_t c = 0; c < 10; c++) {
498 if (!std::isdigit(pEntry[c]))
499 return FALSE;
500 }
501 }
502
503 m_ObjectInfo[objnum].pos = offset;
504 int32_t version = FXSYS_atoi(pEntry + 11);
505 if (version >= 1)
506 m_bVersionUpdated = TRUE;
507
508 m_ObjectInfo[objnum].gennum = version;
509 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
510 m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
511
512 m_ObjectInfo[objnum].type = 1;
513 }
514 }
515 }
516 m_pSyntax->RestorePos(SavedPos + count * recordsize);
517 return TRUE;
518 }
519
520 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
521 FX_FILESIZE streampos,
522 FX_BOOL bSkip) {
523 m_pSyntax->RestorePos(pos);
524 if (m_pSyntax->GetKeyword() != "xref")
525 return false;
526
527 m_SortedOffset.insert(pos);
528 if (streampos)
529 m_SortedOffset.insert(streampos);
530
531 while (1) {
532 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
533 bool bIsNumber;
534 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
535 if (word.IsEmpty())
536 return false;
537
538 if (!bIsNumber) {
539 m_pSyntax->RestorePos(SavedPos);
540 break;
541 }
542
543 FX_DWORD start_objnum = FXSYS_atoui(word);
544 if (start_objnum >= kMaxObjectNumber)
545 return false;
546
547 FX_DWORD count = m_pSyntax->GetDirectNum();
548 m_pSyntax->ToNextWord();
549 SavedPos = m_pSyntax->SavePos();
550 const int32_t recordsize = 20;
551
552 m_dwXrefStartObjNum = start_objnum;
553 if (!bSkip) {
554 std::vector<char> buf(1024 * recordsize + 1);
555 buf[1024 * recordsize] = '\0';
556
557 int32_t nBlocks = count / 1024 + 1;
558 for (int32_t block = 0; block < nBlocks; block++) {
559 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
560 m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
561 block_size * recordsize);
562
563 for (int32_t i = 0; i < block_size; i++) {
564 FX_DWORD objnum = start_objnum + block * 1024 + i;
565 char* pEntry = &buf[i * recordsize];
566 if (pEntry[17] == 'f') {
567 m_ObjectInfo[objnum].pos = 0;
568 m_ObjectInfo[objnum].type = 0;
569 } else {
570 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
571 if (offset == 0) {
572 for (int32_t c = 0; c < 10; c++) {
573 if (!std::isdigit(pEntry[c]))
574 return false;
575 }
576 }
577
578 m_ObjectInfo[objnum].pos = offset;
579 int32_t version = FXSYS_atoi(pEntry + 11);
580 if (version >= 1)
581 m_bVersionUpdated = TRUE;
582
583 m_ObjectInfo[objnum].gennum = version;
584 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
585 m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
586
587 m_ObjectInfo[objnum].type = 1;
588 }
589 }
590 }
591 }
592 m_pSyntax->RestorePos(SavedPos + count * recordsize);
593 }
594 return !streampos || LoadCrossRefV5(&streampos, FALSE);
595 }
596
597 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
598 if (!LoadCrossRefV5(&xrefpos, TRUE))
599 return FALSE;
600
601 std::set<FX_FILESIZE> seen_xrefpos;
602 while (xrefpos) {
603 seen_xrefpos.insert(xrefpos);
604 if (!LoadCrossRefV5(&xrefpos, FALSE))
605 return FALSE;
606
607 // Check for circular references.
608 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
609 return FALSE;
610 }
611 m_ObjectStreamMap.clear();
612 m_bXRefStream = TRUE;
613 return TRUE;
614 }
615
616 FX_BOOL CPDF_Parser::RebuildCrossRef() {
617 m_ObjectInfo.clear();
618 m_SortedOffset.clear();
619 if (m_pTrailer) {
620 m_pTrailer->Release();
621 m_pTrailer = nullptr;
622 }
623
624 ParserState state = ParserState::kDefault;
625
626 int32_t inside_index = 0;
627 FX_DWORD objnum = 0;
628 FX_DWORD gennum = 0;
629 int32_t depth = 0;
630
631 const FX_DWORD kBufferSize = 4096;
632 std::vector<uint8_t> buffer(kBufferSize);
633
634 FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;
635 FX_FILESIZE start_pos = 0;
636 FX_FILESIZE start_pos1 = 0;
637 FX_FILESIZE last_obj = -1;
638 FX_FILESIZE last_xref = -1;
639 FX_FILESIZE last_trailer = -1;
640
641 while (pos < m_pSyntax->m_FileLen) {
642 const FX_FILESIZE saved_pos = pos;
643 bool bOverFlow = false;
644 FX_DWORD size =
645 std::min((FX_DWORD)(m_pSyntax->m_FileLen - pos), kBufferSize);
646 if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size))
647 break;
648
649 for (FX_DWORD i = 0; i < size; i++) {
650 uint8_t byte = buffer[i];
651 switch (state) {
652 case ParserState::kDefault:
653 if (PDFCharIsWhitespace(byte)) {
654 state = ParserState::kWhitespace;
655 } else if (std::isdigit(byte)) {
656 --i;
657 state = ParserState::kWhitespace;
658 } else if (byte == '%') {
659 inside_index = 0;
660 state = ParserState::kComment;
661 } else if (byte == '(') {
662 state = ParserState::kString;
663 depth = 1;
664 } else if (byte == '<') {
665 inside_index = 1;
666 state = ParserState::kHexString;
667 } else if (byte == '\\') {
668 state = ParserState::kEscapedString;
669 } else if (byte == 't') {
670 state = ParserState::kTrailer;
671 inside_index = 1;
672 }
673 break;
674
675 case ParserState::kWhitespace:
676 if (std::isdigit(byte)) {
677 start_pos = pos + i;
678 state = ParserState::kObjNum;
679 objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
680 } else if (byte == 't') {
681 state = ParserState::kTrailer;
682 inside_index = 1;
683 } else if (byte == 'x') {
684 state = ParserState::kXref;
685 inside_index = 1;
686 } else if (!PDFCharIsWhitespace(byte)) {
687 --i;
688 state = ParserState::kDefault;
689 }
690 break;
691
692 case ParserState::kObjNum:
693 if (std::isdigit(byte)) {
694 objnum =
695 objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
696 } else if (PDFCharIsWhitespace(byte)) {
697 state = ParserState::kPostObjNum;
698 } else {
699 --i;
700 state = ParserState::kEndObj;
701 inside_index = 0;
702 }
703 break;
704
705 case ParserState::kPostObjNum:
706 if (std::isdigit(byte)) {
707 start_pos1 = pos + i;
708 state = ParserState::kGenNum;
709 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
710 } else if (byte == 't') {
711 state = ParserState::kTrailer;
712 inside_index = 1;
713 } else if (!PDFCharIsWhitespace(byte)) {
714 --i;
715 state = ParserState::kDefault;
716 }
717 break;
718
719 case ParserState::kGenNum:
720 if (std::isdigit(byte)) {
721 gennum =
722 gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
723 } else if (PDFCharIsWhitespace(byte)) {
724 state = ParserState::kPostGenNum;
725 } else {
726 --i;
727 state = ParserState::kDefault;
728 }
729 break;
730
731 case ParserState::kPostGenNum:
732 if (byte == 'o') {
733 state = ParserState::kBeginObj;
734 inside_index = 1;
735 } else if (std::isdigit(byte)) {
736 objnum = gennum;
737 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
738 start_pos = start_pos1;
739 start_pos1 = pos + i;
740 state = ParserState::kGenNum;
741 } else if (byte == 't') {
742 state = ParserState::kTrailer;
743 inside_index = 1;
744 } else if (!PDFCharIsWhitespace(byte)) {
745 --i;
746 state = ParserState::kDefault;
747 }
748 break;
749
750 case ParserState::kBeginObj:
751 switch (inside_index) {
752 case 1:
753 if (byte != 'b') {
754 --i;
755 state = ParserState::kDefault;
756 } else {
757 inside_index++;
758 }
759 break;
760 case 2:
761 if (byte != 'j') {
762 --i;
763 state = ParserState::kDefault;
764 } else {
765 inside_index++;
766 }
767 break;
768 case 3:
769 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
770 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;
771 m_SortedOffset.insert(obj_pos);
772 last_obj = start_pos;
773 FX_FILESIZE obj_end = 0;
774 CPDF_Object* pObject = ParseIndirectObjectAtByStrict(
775 m_pDocument, obj_pos, objnum, &obj_end);
776 if (CPDF_Stream* pStream = ToStream(pObject)) {
777 if (CPDF_Dictionary* pDict = pStream->GetDict()) {
778 if ((pDict->KeyExist("Type")) &&
779 (pDict->GetStringBy("Type") == "XRef" &&
780 pDict->KeyExist("Size"))) {
781 CPDF_Object* pRoot = pDict->GetElement("Root");
782 if (pRoot && pRoot->GetDict() &&
783 pRoot->GetDict()->GetElement("Pages")) {
784 if (m_pTrailer)
785 m_pTrailer->Release();
786 m_pTrailer = ToDictionary(pDict->Clone());
787 }
788 }
789 }
790 }
791
792 FX_FILESIZE offset = 0;
793 m_pSyntax->RestorePos(obj_pos);
794 offset = m_pSyntax->FindTag("obj", 0);
795 if (offset == -1)
796 offset = 0;
797 else
798 offset += 3;
799
800 FX_FILESIZE nLen = obj_end - obj_pos - offset;
801 if ((FX_DWORD)nLen > size - i) {
802 pos = obj_end + m_pSyntax->m_HeaderOffset;
803 bOverFlow = true;
804 } else {
805 i += (FX_DWORD)nLen;
806 }
807
808 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
809 m_ObjectInfo[objnum].pos) {
810 if (pObject) {
811 FX_DWORD oldgen = GetObjectGenNum(objnum);
812 m_ObjectInfo[objnum].pos = obj_pos;
813 m_ObjectInfo[objnum].gennum = gennum;
814 if (oldgen != gennum)
815 m_bVersionUpdated = TRUE;
816 }
817 } else {
818 m_ObjectInfo[objnum].pos = obj_pos;
819 m_ObjectInfo[objnum].type = 1;
820 m_ObjectInfo[objnum].gennum = gennum;
821 }
822
823 if (pObject)
824 pObject->Release();
825 }
826 --i;
827 state = ParserState::kDefault;
828 break;
829 }
830 break;
831
832 case ParserState::kTrailer:
833 if (inside_index == 7) {
834 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
835 last_trailer = pos + i - 7;
836 m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset);
837
838 CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true);
839 if (pObj) {
840 if (!pObj->IsDictionary() && !pObj->AsStream()) {
841 pObj->Release();
842 } else {
843 CPDF_Stream* pStream = pObj->AsStream();
844 if (CPDF_Dictionary* pTrailer =
845 pStream ? pStream->GetDict() : pObj->AsDictionary()) {
846 if (m_pTrailer) {
847 CPDF_Object* pRoot = pTrailer->GetElement("Root");
848 CPDF_Reference* pRef = ToReference(pRoot);
849 if (!pRoot ||
850 (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
851 m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
852 auto it = pTrailer->begin();
853 while (it != pTrailer->end()) {
854 const CFX_ByteString& key = it->first;
855 CPDF_Object* pElement = it->second;
856 ++it;
857 FX_DWORD dwObjNum =
858 pElement ? pElement->GetObjNum() : 0;
859 if (dwObjNum) {
860 m_pTrailer->SetAtReference(key, m_pDocument,
861 dwObjNum);
862 } else {
863 m_pTrailer->SetAt(key, pElement->Clone());
864 }
865 }
866 }
867 pObj->Release();
868 } else {
869 if (pObj->IsStream()) {
870 m_pTrailer = ToDictionary(pTrailer->Clone());
871 pObj->Release();
872 } else {
873 m_pTrailer = pTrailer;
874 }
875
876 FX_FILESIZE dwSavePos = m_pSyntax->SavePos();
877 CFX_ByteString strWord = m_pSyntax->GetKeyword();
878 if (!strWord.Compare("startxref")) {
879 bool bNumber;
880 CFX_ByteString bsOffset =
881 m_pSyntax->GetNextWord(&bNumber);
882 if (bNumber)
883 m_LastXRefOffset = FXSYS_atoi(bsOffset);
884 }
885 m_pSyntax->RestorePos(dwSavePos);
886 }
887 } else {
888 pObj->Release();
889 }
890 }
891 }
892 }
893 --i;
894 state = ParserState::kDefault;
895 } else if (byte == "trailer"[inside_index]) {
896 inside_index++;
897 } else {
898 --i;
899 state = ParserState::kDefault;
900 }
901 break;
902
903 case ParserState::kXref:
904 if (inside_index == 4) {
905 last_xref = pos + i - 4;
906 state = ParserState::kWhitespace;
907 } else if (byte == "xref"[inside_index]) {
908 inside_index++;
909 } else {
910 --i;
911 state = ParserState::kDefault;
912 }
913 break;
914
915 case ParserState::kComment:
916 if (byte == '\r' || byte == '\n')
917 state = ParserState::kDefault;
918 break;
919
920 case ParserState::kString:
921 if (byte == ')') {
922 if (depth > 0)
923 depth--;
924 } else if (byte == '(') {
925 depth++;
926 }
927
928 if (!depth)
929 state = ParserState::kDefault;
930 break;
931
932 case ParserState::kHexString:
933 if (byte == '>' || (byte == '<' && inside_index == 1))
934 state = ParserState::kDefault;
935 inside_index = 0;
936 break;
937
938 case ParserState::kEscapedString:
939 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
940 --i;
941 state = ParserState::kDefault;
942 }
943 break;
944
945 case ParserState::kEndObj:
946 if (PDFCharIsWhitespace(byte)) {
947 state = ParserState::kDefault;
948 } else if (byte == '%' || byte == '(' || byte == '<' ||
949 byte == '\\') {
950 state = ParserState::kDefault;
951 --i;
952 } else if (inside_index == 6) {
953 state = ParserState::kDefault;
954 --i;
955 } else if (byte == "endobj"[inside_index]) {
956 inside_index++;
957 }
958 break;
959 }
960
961 if (bOverFlow) {
962 size = 0;
963 break;
964 }
965 }
966 pos += size;
967
968 // If the position has not changed at all in a loop iteration, then break
969 // out to prevent infinite looping.
970 if (pos == saved_pos)
971 break;
972 }
973
974 if (last_xref != -1 && last_xref > last_obj)
975 last_trailer = last_xref;
976 else if (last_trailer == -1 || last_xref < last_obj)
977 last_trailer = m_pSyntax->m_FileLen;
978
979 m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset);
980 return m_pTrailer && !m_ObjectInfo.empty();
981 }
982
983 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) {
984 CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0);
985 if (!pObject)
986 return FALSE;
987
988 if (m_pDocument) {
989 FX_BOOL bInserted = FALSE;
990 CPDF_Dictionary* pDict = m_pDocument->GetRoot();
991 if (!pDict || pDict->GetObjNum() != pObject->m_ObjNum) {
992 bInserted = m_pDocument->InsertIndirectObject(pObject->m_ObjNum, pObject);
993 } else {
994 if (pObject->IsStream())
995 pObject->Release();
996 }
997
998 if (!bInserted)
999 return FALSE;
1000 }
1001
1002 CPDF_Stream* pStream = pObject->AsStream();
1003 if (!pStream)
1004 return FALSE;
1005
1006 *pos = pStream->GetDict()->GetIntegerBy("Prev");
1007 int32_t size = pStream->GetDict()->GetIntegerBy("Size");
1008 if (size < 0) {
1009 pStream->Release();
1010 return FALSE;
1011 }
1012
1013 if (bMainXRef) {
1014 m_pTrailer = ToDictionary(pStream->GetDict()->Clone());
1015 ShrinkObjectMap(size);
1016 for (auto& it : m_ObjectInfo)
1017 it.second.type = 0;
1018 } else {
1019 m_Trailers.Add(ToDictionary(pStream->GetDict()->Clone()));
1020 }
1021
1022 std::vector<std::pair<int32_t, int32_t> > arrIndex;
1023 CPDF_Array* pArray = pStream->GetDict()->GetArrayBy("Index");
1024 if (pArray) {
1025 FX_DWORD nPairSize = pArray->GetCount() / 2;
1026 for (FX_DWORD i = 0; i < nPairSize; i++) {
1027 CPDF_Object* pStartNumObj = pArray->GetElement(i * 2);
1028 CPDF_Object* pCountObj = pArray->GetElement(i * 2 + 1);
1029
1030 if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
1031 int nStartNum = pStartNumObj->GetInteger();
1032 int nCount = pCountObj->GetInteger();
1033 if (nStartNum >= 0 && nCount > 0)
1034 arrIndex.push_back(std::make_pair(nStartNum, nCount));
1035 }
1036 }
1037 }
1038
1039 if (arrIndex.size() == 0)
1040 arrIndex.push_back(std::make_pair(0, size));
1041
1042 pArray = pStream->GetDict()->GetArrayBy("W");
1043 if (!pArray) {
1044 pStream->Release();
1045 return FALSE;
1046 }
1047
1048 CFX_DWordArray WidthArray;
1049 FX_SAFE_DWORD dwAccWidth = 0;
1050 for (FX_DWORD i = 0; i < pArray->GetCount(); i++) {
1051 WidthArray.Add(pArray->GetIntegerAt(i));
1052 dwAccWidth += WidthArray[i];
1053 }
1054
1055 if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) {
1056 pStream->Release();
1057 return FALSE;
1058 }
1059
1060 FX_DWORD totalWidth = dwAccWidth.ValueOrDie();
1061 CPDF_StreamAcc acc;
1062 acc.LoadAllData(pStream);
1063
1064 const uint8_t* pData = acc.GetData();
1065 FX_DWORD dwTotalSize = acc.GetSize();
1066 FX_DWORD segindex = 0;
1067 for (FX_DWORD i = 0; i < arrIndex.size(); i++) {
1068 int32_t startnum = arrIndex[i].first;
1069 if (startnum < 0)
1070 continue;
1071
1072 m_dwXrefStartObjNum =
1073 pdfium::base::checked_cast<FX_DWORD, int32_t>(startnum);
1074 FX_DWORD count =
1075 pdfium::base::checked_cast<FX_DWORD, int32_t>(arrIndex[i].second);
1076 FX_SAFE_DWORD dwCaculatedSize = segindex;
1077 dwCaculatedSize += count;
1078 dwCaculatedSize *= totalWidth;
1079 if (!dwCaculatedSize.IsValid() ||
1080 dwCaculatedSize.ValueOrDie() > dwTotalSize) {
1081 continue;
1082 }
1083
1084 const uint8_t* segstart = pData + segindex * totalWidth;
1085 FX_SAFE_DWORD dwMaxObjNum = startnum;
1086 dwMaxObjNum += count;
1087 FX_DWORD dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1;
1088 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
1089 continue;
1090
1091 for (FX_DWORD j = 0; j < count; j++) {
1092 int32_t type = 1;
1093 const uint8_t* entrystart = segstart + j * totalWidth;
1094 if (WidthArray[0])
1095 type = GetVarInt(entrystart, WidthArray[0]);
1096
1097 if (GetObjectType(startnum + j) == 255) {
1098 FX_FILESIZE offset =
1099 GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1100 m_ObjectInfo[startnum + j].pos = offset;
1101 m_SortedOffset.insert(offset);
1102 continue;
1103 }
1104
1105 if (GetObjectType(startnum + j))
1106 continue;
1107
1108 m_ObjectInfo[startnum + j].type = type;
1109 if (type == 0) {
1110 m_ObjectInfo[startnum + j].pos = 0;
1111 } else {
1112 FX_FILESIZE offset =
1113 GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1114 m_ObjectInfo[startnum + j].pos = offset;
1115 if (type == 1) {
1116 m_SortedOffset.insert(offset);
1117 } else {
1118 if (offset < 0 || !IsValidObjectNumber(offset)) {
1119 pStream->Release();
1120 return FALSE;
1121 }
1122 m_ObjectInfo[offset].type = 255;
1123 }
1124 }
1125 }
1126 segindex += count;
1127 }
1128 pStream->Release();
1129 return TRUE;
1130 }
1131
1132 CPDF_Array* CPDF_Parser::GetIDArray() {
1133 CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement("ID") : nullptr;
1134 if (!pID)
1135 return nullptr;
1136
1137 if (CPDF_Reference* pRef = pID->AsReference()) {
1138 pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum());
1139 m_pTrailer->SetAt("ID", pID);
1140 }
1141 return ToArray(pID);
1142 }
1143
1144 FX_DWORD CPDF_Parser::GetRootObjNum() {
1145 CPDF_Reference* pRef =
1146 ToReference(m_pTrailer ? m_pTrailer->GetElement("Root") : nullptr);
1147 return pRef ? pRef->GetRefObjNum() : 0;
1148 }
1149
1150 FX_DWORD CPDF_Parser::GetInfoObjNum() {
1151 CPDF_Reference* pRef =
1152 ToReference(m_pTrailer ? m_pTrailer->GetElement("Info") : nullptr);
1153 return pRef ? pRef->GetRefObjNum() : 0;
1154 }
1155
1156 FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) {
1157 bForm = FALSE;
1158 if (!IsValidObjectNumber(objnum))
1159 return TRUE;
1160
1161 if (GetObjectType(objnum) == 0)
1162 return TRUE;
1163
1164 if (GetObjectType(objnum) == 2)
1165 return TRUE;
1166
1167 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1168 auto it = m_SortedOffset.find(pos);
1169 if (it == m_SortedOffset.end())
1170 return TRUE;
1171
1172 if (++it == m_SortedOffset.end())
1173 return FALSE;
1174
1175 FX_FILESIZE size = *it - pos;
1176 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1177 m_pSyntax->RestorePos(pos);
1178
1179 const char kFormStream[] = "/Form\0stream";
1180 const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1);
1181 bForm = m_pSyntax->SearchMultiWord(kFormStreamStr, TRUE, size) == 0;
1182 m_pSyntax->RestorePos(SavedPos);
1183 return TRUE;
1184 }
1185
1186 CPDF_Object* CPDF_Parser::ParseIndirectObject(
1187 CPDF_IndirectObjectHolder* pObjList,
1188 FX_DWORD objnum) {
1189 if (!IsValidObjectNumber(objnum))
1190 return nullptr;
1191
1192 // Prevent circular parsing the same object.
1193 if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
1194 return nullptr;
1195 ScopedSetInsertion<FX_DWORD> local_insert(&m_ParsingObjNums, objnum);
1196
1197 if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) {
1198 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1199 if (pos <= 0)
1200 return nullptr;
1201 return ParseIndirectObjectAt(pObjList, pos, objnum);
1202 }
1203 if (GetObjectType(objnum) != 2)
1204 return nullptr;
1205
1206 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
1207 if (!pObjStream)
1208 return nullptr;
1209
1210 ScopedFileStream file(FX_CreateMemoryStream(
1211 (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE));
1212 CPDF_SyntaxParser syntax;
1213 syntax.InitParser(file.get(), 0);
1214 const int32_t offset = GetStreamFirst(pObjStream);
1215
1216 // Read object numbers from |pObjStream| into a cache.
1217 if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
1218 for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
1219 FX_DWORD thisnum = syntax.GetDirectNum();
1220 FX_DWORD thisoff = syntax.GetDirectNum();
1221 m_ObjCache[pObjStream][thisnum] = thisoff;
1222 }
1223 }
1224
1225 const auto it = m_ObjCache[pObjStream].find(objnum);
1226 if (it == m_ObjCache[pObjStream].end())
1227 return nullptr;
1228
1229 syntax.RestorePos(offset + it->second);
1230 return syntax.GetObject(pObjList, 0, 0, true);
1231 }
1232
1233 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum) {
1234 auto it = m_ObjectStreamMap.find(objnum);
1235 if (it != m_ObjectStreamMap.end())
1236 return it->second.get();
1237
1238 if (!m_pDocument)
1239 return nullptr;
1240
1241 const CPDF_Stream* pStream = ToStream(m_pDocument->GetIndirectObject(objnum));
1242 if (!pStream)
1243 return nullptr;
1244
1245 CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc;
1246 pStreamAcc->LoadAllData(pStream);
1247 m_ObjectStreamMap[objnum].reset(pStreamAcc);
1248 return pStreamAcc;
1249 }
1250
1251 FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum) const {
1252 if (!IsValidObjectNumber(objnum))
1253 return 0;
1254
1255 if (GetObjectType(objnum) == 2)
1256 objnum = GetObjectPositionOrZero(objnum);
1257
1258 if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255)
1259 return 0;
1260
1261 FX_FILESIZE offset = GetObjectPositionOrZero(objnum);
1262 if (offset == 0)
1263 return 0;
1264
1265 auto it = m_SortedOffset.find(offset);
1266 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end())
1267 return 0;
1268
1269 return *it - offset;
1270 }
1271
1272 void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum,
1273 uint8_t*& pBuffer,
1274 FX_DWORD& size) {
1275 pBuffer = nullptr;
1276 size = 0;
1277 if (!IsValidObjectNumber(objnum))
1278 return;
1279
1280 if (GetObjectType(objnum) == 2) {
1281 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
1282 if (!pObjStream)
1283 return;
1284
1285 int32_t offset = GetStreamFirst(pObjStream);
1286 const uint8_t* pData = pObjStream->GetData();
1287 FX_DWORD totalsize = pObjStream->GetSize();
1288 ScopedFileStream file(
1289 FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE));
1290
1291 CPDF_SyntaxParser syntax;
1292 syntax.InitParser(file.get(), 0);
1293 for (int i = GetStreamNCount(pObjStream); i > 0; --i) {
1294 FX_DWORD thisnum = syntax.GetDirectNum();
1295 FX_DWORD thisoff = syntax.GetDirectNum();
1296 if (thisnum != objnum)
1297 continue;
1298
1299 if (i == 1) {
1300 size = totalsize - (thisoff + offset);
1301 } else {
1302 syntax.GetDirectNum(); // Skip nextnum.
1303 FX_DWORD nextoff = syntax.GetDirectNum();
1304 size = nextoff - thisoff;
1305 }
1306
1307 pBuffer = FX_Alloc(uint8_t, size);
1308 FXSYS_memcpy(pBuffer, pData + thisoff + offset, size);
1309 return;
1310 }
1311 return;
1312 }
1313
1314 if (GetObjectType(objnum) != 1)
1315 return;
1316
1317 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1318 if (pos == 0)
1319 return;
1320
1321 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1322 m_pSyntax->RestorePos(pos);
1323
1324 bool bIsNumber;
1325 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1326 if (!bIsNumber) {
1327 m_pSyntax->RestorePos(SavedPos);
1328 return;
1329 }
1330
1331 FX_DWORD parser_objnum = FXSYS_atoui(word);
1332 if (parser_objnum && parser_objnum != objnum) {
1333 m_pSyntax->RestorePos(SavedPos);
1334 return;
1335 }
1336
1337 word = m_pSyntax->GetNextWord(&bIsNumber);
1338 if (!bIsNumber) {
1339 m_pSyntax->RestorePos(SavedPos);
1340 return;
1341 }
1342
1343 if (m_pSyntax->GetKeyword() != "obj") {
1344 m_pSyntax->RestorePos(SavedPos);
1345 return;
1346 }
1347
1348 auto it = m_SortedOffset.find(pos);
1349 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) {
1350 m_pSyntax->RestorePos(SavedPos);
1351 return;
1352 }
1353
1354 FX_FILESIZE nextoff = *it;
1355 FX_BOOL bNextOffValid = FALSE;
1356 if (nextoff != pos) {
1357 m_pSyntax->RestorePos(nextoff);
1358 word = m_pSyntax->GetNextWord(&bIsNumber);
1359 if (word == "xref") {
1360 bNextOffValid = TRUE;
1361 } else if (bIsNumber) {
1362 word = m_pSyntax->GetNextWord(&bIsNumber);
1363 if (bIsNumber && m_pSyntax->GetKeyword() == "obj") {
1364 bNextOffValid = TRUE;
1365 }
1366 }
1367 }
1368
1369 if (!bNextOffValid) {
1370 m_pSyntax->RestorePos(pos);
1371 while (1) {
1372 if (m_pSyntax->GetKeyword() == "endobj")
1373 break;
1374
1375 if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen)
1376 break;
1377 }
1378 nextoff = m_pSyntax->SavePos();
1379 }
1380
1381 size = (FX_DWORD)(nextoff - pos);
1382 pBuffer = FX_Alloc(uint8_t, size);
1383 m_pSyntax->RestorePos(pos);
1384 m_pSyntax->ReadBlock(pBuffer, size);
1385 m_pSyntax->RestorePos(SavedPos);
1386 }
1387
1388 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(
1389 CPDF_IndirectObjectHolder* pObjList,
1390 FX_FILESIZE pos,
1391 FX_DWORD objnum) {
1392 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1393 m_pSyntax->RestorePos(pos);
1394 bool bIsNumber;
1395 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1396 if (!bIsNumber) {
1397 m_pSyntax->RestorePos(SavedPos);
1398 return nullptr;
1399 }
1400
1401 FX_FILESIZE objOffset = m_pSyntax->SavePos();
1402 objOffset -= word.GetLength();
1403 FX_DWORD parser_objnum = FXSYS_atoui(word);
1404 if (objnum && parser_objnum != objnum) {
1405 m_pSyntax->RestorePos(SavedPos);
1406 return nullptr;
1407 }
1408
1409 word = m_pSyntax->GetNextWord(&bIsNumber);
1410 if (!bIsNumber) {
1411 m_pSyntax->RestorePos(SavedPos);
1412 return nullptr;
1413 }
1414
1415 FX_DWORD parser_gennum = FXSYS_atoui(word);
1416 if (m_pSyntax->GetKeyword() != "obj") {
1417 m_pSyntax->RestorePos(SavedPos);
1418 return nullptr;
1419 }
1420
1421 CPDF_Object* pObj =
1422 m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true);
1423 m_pSyntax->SavePos();
1424
1425 CFX_ByteString bsWord = m_pSyntax->GetKeyword();
1426 if (bsWord == "endobj")
1427 m_pSyntax->SavePos();
1428
1429 m_pSyntax->RestorePos(SavedPos);
1430 if (pObj) {
1431 if (!objnum)
1432 pObj->m_ObjNum = parser_objnum;
1433 pObj->m_GenNum = parser_gennum;
1434 }
1435 return pObj;
1436 }
1437
1438 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(
1439 CPDF_IndirectObjectHolder* pObjList,
1440 FX_FILESIZE pos,
1441 FX_DWORD objnum,
1442 FX_FILESIZE* pResultPos) {
1443 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1444 m_pSyntax->RestorePos(pos);
1445
1446 bool bIsNumber;
1447 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1448 if (!bIsNumber) {
1449 m_pSyntax->RestorePos(SavedPos);
1450 return nullptr;
1451 }
1452
1453 FX_DWORD parser_objnum = FXSYS_atoui(word);
1454 if (objnum && parser_objnum != objnum) {
1455 m_pSyntax->RestorePos(SavedPos);
1456 return nullptr;
1457 }
1458
1459 word = m_pSyntax->GetNextWord(&bIsNumber);
1460 if (!bIsNumber) {
1461 m_pSyntax->RestorePos(SavedPos);
1462 return nullptr;
1463 }
1464
1465 FX_DWORD gennum = FXSYS_atoui(word);
1466 if (m_pSyntax->GetKeyword() != "obj") {
1467 m_pSyntax->RestorePos(SavedPos);
1468 return nullptr;
1469 }
1470
1471 CPDF_Object* pObj = m_pSyntax->GetObjectByStrict(pObjList, objnum, gennum);
1472 if (pResultPos)
1473 *pResultPos = m_pSyntax->m_Pos;
1474
1475 m_pSyntax->RestorePos(SavedPos);
1476 return pObj;
1477 }
1478
1479 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() {
1480 if (m_pSyntax->GetKeyword() != "trailer")
1481 return nullptr;
1482
1483 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(
1484 m_pSyntax->GetObject(m_pDocument, 0, 0, true));
1485 if (!ToDictionary(pObj.get()))
1486 return nullptr;
1487 return pObj.release()->AsDictionary();
1488 }
1489
1490 FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) {
1491 if (!m_pSecurityHandler)
1492 return (FX_DWORD)-1;
1493
1494 FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions();
1495 if (m_pEncryptDict && m_pEncryptDict->GetStringBy("Filter") == "Standard") {
1496 dwPermission &= 0xFFFFFFFC;
1497 dwPermission |= 0xFFFFF0C0;
1498 if (bCheckRevision && m_pEncryptDict->GetIntegerBy("R") == 2)
1499 dwPermission &= 0xFFFFF0FF;
1500 }
1501 return dwPermission;
1502 }
1503
1504 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess,
1505 FX_DWORD offset) {
1506 m_pSyntax->InitParser(pFileAccess, offset);
1507 m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9);
1508
1509 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1510 bool bIsNumber;
1511 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1512 if (!bIsNumber)
1513 return FALSE;
1514
1515 FX_DWORD objnum = FXSYS_atoui(word);
1516 word = m_pSyntax->GetNextWord(&bIsNumber);
1517 if (!bIsNumber)
1518 return FALSE;
1519
1520 FX_DWORD gennum = FXSYS_atoui(word);
1521 if (m_pSyntax->GetKeyword() != "obj") {
1522 m_pSyntax->RestorePos(SavedPos);
1523 return FALSE;
1524 }
1525
1526 m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true);
1527 if (!m_pLinearized)
1528 return FALSE;
1529
1530 CPDF_Dictionary* pDict = m_pLinearized->GetDict();
1531 if (pDict && pDict->GetElement("Linearized")) {
1532 m_pSyntax->GetNextWord(nullptr);
1533
1534 CPDF_Object* pLen = pDict->GetElement("L");
1535 if (!pLen) {
1536 m_pLinearized->Release();
1537 m_pLinearized = nullptr;
1538 return FALSE;
1539 }
1540
1541 if (pLen->GetInteger() != (int)pFileAccess->GetSize())
1542 return FALSE;
1543
1544 if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P")))
1545 m_dwFirstPageNo = pNo->GetInteger();
1546
1547 if (CPDF_Number* pTable = ToNumber(pDict->GetElement("T")))
1548 m_LastXRefOffset = pTable->GetInteger();
1549
1550 return TRUE;
1551 }
1552 m_pLinearized->Release();
1553 m_pLinearized = nullptr;
1554 return FALSE;
1555 }
1556
1557 CPDF_Parser::Error CPDF_Parser::StartAsyncParse(IFX_FileRead* pFileAccess) {
1558 CloseParser();
1559 m_bXRefStream = FALSE;
1560 m_LastXRefOffset = 0;
1561 m_bOwnFileRead = true;
1562
1563 int32_t offset = GetHeaderOffset(pFileAccess);
1564 if (offset == -1)
1565 return FORMAT_ERROR;
1566
1567 if (!IsLinearizedFile(pFileAccess, offset)) {
1568 m_pSyntax->m_pFileAccess = nullptr;
1569 return StartParse(pFileAccess);
1570 }
1571
1572 m_pDocument = new CPDF_Document(this);
1573 FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos();
1574
1575 FX_BOOL bXRefRebuilt = FALSE;
1576 FX_BOOL bLoadV4 = FALSE;
1577 if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) &&
1578 !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) {
1579 if (!RebuildCrossRef())
1580 return FORMAT_ERROR;
1581
1582 bXRefRebuilt = TRUE;
1583 m_LastXRefOffset = 0;
1584 }
1585
1586 if (bLoadV4) {
1587 m_pTrailer = LoadTrailerV4();
1588 if (!m_pTrailer)
1589 return SUCCESS;
1590
1591 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
1592 if (xrefsize > 0)
1593 ShrinkObjectMap(xrefsize);
1594 }
1595
1596 Error eRet = SetEncryptHandler();
1597 if (eRet != SUCCESS)
1598 return eRet;
1599
1600 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
1601 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
1602 if (bXRefRebuilt)
1603 return FORMAT_ERROR;
1604
1605 ReleaseEncryptHandler();
1606 if (!RebuildCrossRef())
1607 return FORMAT_ERROR;
1608
1609 eRet = SetEncryptHandler();
1610 if (eRet != SUCCESS)
1611 return eRet;
1612
1613 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
1614 if (!m_pDocument->GetRoot())
1615 return FORMAT_ERROR;
1616 }
1617
1618 if (GetRootObjNum() == 0) {
1619 ReleaseEncryptHandler();
1620 if (!RebuildCrossRef() || GetRootObjNum() == 0)
1621 return FORMAT_ERROR;
1622
1623 eRet = SetEncryptHandler();
1624 if (eRet != SUCCESS)
1625 return eRet;
1626 }
1627
1628 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1629 if (CPDF_Reference* pMetadata =
1630 ToReference(m_pDocument->GetRoot()->GetElement("Metadata")))
1631 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
1632 }
1633 return SUCCESS;
1634 }
1635
1636 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
1637 if (!LoadCrossRefV5(&xrefpos, FALSE))
1638 return FALSE;
1639
1640 std::set<FX_FILESIZE> seen_xrefpos;
1641 while (xrefpos) {
1642 seen_xrefpos.insert(xrefpos);
1643 if (!LoadCrossRefV5(&xrefpos, FALSE))
1644 return FALSE;
1645
1646 // Check for circular references.
1647 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
1648 return FALSE;
1649 }
1650 m_ObjectStreamMap.clear();
1651 m_bXRefStream = TRUE;
1652 return TRUE;
1653 }
1654
1655 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1656 FX_DWORD dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum;
1657 m_pSyntax->m_MetadataObjnum = 0;
1658 if (m_pTrailer) {
1659 m_pTrailer->Release();
1660 m_pTrailer = nullptr;
1661 }
1662
1663 m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset);
1664 uint8_t ch = 0;
1665 FX_DWORD dwCount = 0;
1666 m_pSyntax->GetNextChar(ch);
1667 while (PDFCharIsWhitespace(ch)) {
1668 ++dwCount;
1669 if (m_pSyntax->m_FileLen >=
1670 (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) {
1671 break;
1672 }
1673 m_pSyntax->GetNextChar(ch);
1674 }
1675 m_LastXRefOffset += dwCount;
1676 m_ObjectStreamMap.clear();
1677 m_ObjCache.clear();
1678
1679 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&
1680 !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
1681 m_LastXRefOffset = 0;
1682 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
1683 return FORMAT_ERROR;
1684 }
1685
1686 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
1687 return SUCCESS;
1688 }
1689
1690 44
1691 class CPDF_DataAvail final : public IPDF_DataAvail { 45 class CPDF_DataAvail final : public IPDF_DataAvail {
1692 public: 46 public:
1693 CPDF_DataAvail(IFX_FileAvail* pFileAvail, 47 CPDF_DataAvail(IFX_FileAvail* pFileAvail,
1694 IFX_FileRead* pFileRead, 48 IFX_FileRead* pFileRead,
1695 FX_BOOL bSupportHintTable); 49 FX_BOOL bSupportHintTable);
1696 ~CPDF_DataAvail() override; 50 ~CPDF_DataAvail() override;
1697 51
1698 // IPDF_DataAvail: 52 // IPDF_DataAvail:
1699 DocAvailStatus IsDocAvail(IFX_DownloadHints* pHints) override; 53 DocAvailStatus IsDocAvail(IFX_DownloadHints* pHints) override;
(...skipping 2387 matching lines...) Expand 10 before | Expand all | Expand 10 after
4087 CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H"); 2441 CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H");
4088 if (!pRange) 2442 if (!pRange)
4089 return -1; 2443 return -1;
4090 2444
4091 CPDF_Object* pStreamLen = pRange->GetElementValue(1); 2445 CPDF_Object* pStreamLen = pRange->GetElementValue(1);
4092 if (!pStreamLen) 2446 if (!pStreamLen)
4093 return -1; 2447 return -1;
4094 2448
4095 return pStreamLen->GetInteger(); 2449 return pStreamLen->GetInteger();
4096 } 2450 }
OLDNEW
« no previous file with comments | « core/src/fpdfapi/fpdf_parser/fpdf_parser_objects.cpp ('k') | core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698