core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp - Issue 1775023003: Re-land "Split CPDF_SyntaxParser into its own named .cpp/.h files."

Side by Side Diff: core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp

Issue 1775023003: Re-land "Split CPDF_SyntaxParser into its own named .cpp/.h files." (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master

Patch Set: Include <vector>. Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2014 PDFium Authors. All rights reserved.	1 // Copyright 2014 PDFium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com	5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

6	6

7 #include "core/include/fpdfapi/fpdf_parser.h"	7 #include "core/include/fpdfapi/fpdf_parser.h"

8	8

9 #include <algorithm>	9 #include <algorithm>

10 #include <memory>	10 #include <memory>

11 #include <set>	11 #include <set>

12 #include <utility>	12 #include <utility>

13 #include <vector>	13 #include <vector>

14	14

15 #include "core/include/fpdfapi/fpdf_module.h"	15 #include "core/include/fpdfapi/fpdf_module.h"

16 #include "core/include/fpdfapi/fpdf_page.h"	16 #include "core/include/fpdfapi/fpdf_page.h"

17 #include "core/include/fxcrt/fx_ext.h"	17 #include "core/include/fxcrt/fx_ext.h"

18 #include "core/include/fxcrt/fx_safe_types.h"	18 #include "core/include/fxcrt/fx_safe_types.h"

19 #include "core/src/fpdfapi/fpdf_page/pageint.h"	19 #include "core/src/fpdfapi/fpdf_page/pageint.h"

	20 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"

20 #include "core/src/fpdfapi/fpdf_parser/parser_int.h"	21 #include "core/src/fpdfapi/fpdf_parser/parser_int.h"

21 #include "third_party/base/stl_util.h"	22 #include "third_party/base/stl_util.h"

22	23

23 namespace {	24 namespace {

24	25

25 // A limit on the size of the xref table. Theoretical limits are higher, but	26 // A limit on the size of the xref table. Theoretical limits are higher, but

26 // this may be large enough in practice.	27 // this may be large enough in practice.

27 const int32_t kMaxXRefSize = 1048576;	28 const int32_t kMaxXRefSize = 1048576;

28	29

29 // A limit on the maximum object number in the xref table. Theoretical limits	30 // A limit on the maximum object number in the xref table. Theoretical limits

30 // are higher, but this may be large enough in practice.	31 // are higher, but this may be large enough in practice.

31 const FX_DWORD kMaxObjectNumber = 1048576;	32 const FX_DWORD kMaxObjectNumber = 1048576;

32	33

33 struct SearchTagRecord {

34 const char* m_pTag;

35 FX_DWORD m_Len;

36 FX_DWORD m_Offset;

37 };

38

39 int32_t GetHeaderOffset(IFX_FileRead* pFile) {	34 int32_t GetHeaderOffset(IFX_FileRead* pFile) {

40 // TODO(dsinclair): This is a complicated way of saying %PDF, simplify?	35 // TODO(dsinclair): This is a complicated way of saying %PDF, simplify?

41 const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);	36 const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);

42	37

43 const size_t kBufSize = 4;	38 const size_t kBufSize = 4;

44 uint8_t buf[kBufSize];	39 uint8_t buf[kBufSize];

45 int32_t offset = 0;	40 int32_t offset = 0;

46 while (offset <= 1024) {	41 while (offset <= 1024) {

47 if (!pFile->ReadBlock(buf, offset, kBufSize))	42 if (!pFile->ReadBlock(buf, offset, kBufSize))

48 return -1;	43 return -1;

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
91 }	86 }

92	87

93 CPDF_Parser::CPDF_Parser()	88 CPDF_Parser::CPDF_Parser()

94 : m_pDocument(nullptr),	89 : m_pDocument(nullptr),

95 m_bOwnFileRead(true),	90 m_bOwnFileRead(true),

96 m_FileVersion(0),	91 m_FileVersion(0),

97 m_pTrailer(nullptr),	92 m_pTrailer(nullptr),

98 m_pEncryptDict(nullptr),	93 m_pEncryptDict(nullptr),

99 m_pLinearized(nullptr),	94 m_pLinearized(nullptr),

100 m_dwFirstPageNo(0),	95 m_dwFirstPageNo(0),

101 m_dwXrefStartObjNum(0) {}	96 m_dwXrefStartObjNum(0) {

	97 m_pSyntax.reset(new CPDF_SyntaxParser);

	98 }

102	99

103 CPDF_Parser::~CPDF_Parser() {	100 CPDF_Parser::~CPDF_Parser() {

104 CloseParser();	101 CloseParser();

105 }	102 }

106	103

107 FX_DWORD CPDF_Parser::GetLastObjNum() const {	104 FX_DWORD CPDF_Parser::GetLastObjNum() const {

108 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;	105 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;

109 }	106 }

110	107

111 bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const {	108 bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const {

(...skipping 19 matching lines...) Expand all Loading...
131	128

132 bool CPDF_Parser::IsObjectFreeOrNull(FX_DWORD objnum) const {	129 bool CPDF_Parser::IsObjectFreeOrNull(FX_DWORD objnum) const {

133 uint8_t type = GetObjectType(objnum);	130 uint8_t type = GetObjectType(objnum);

134 return type == 0 \|\| type == 255;	131 return type == 0 \|\| type == 255;

135 }	132 }

136	133

137 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {	134 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {

138 m_pEncryptDict = pDict;	135 m_pEncryptDict = pDict;

139 }	136 }

140	137

	138 CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() {

	139 return m_pSyntax->m_pCryptoHandler.get();

	140 }

	141

	142 IFX_FileRead* CPDF_Parser::GetFileAccess() const {

	143 return m_pSyntax->m_pFileAccess;

	144 }

	145

141 void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) {	146 void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) {

142 if (objnum == 0) {	147 if (objnum == 0) {

143 m_ObjectInfo.clear();	148 m_ObjectInfo.clear();

144 return;	149 return;

145 }	150 }

146	151

147 auto it = m_ObjectInfo.lower_bound(objnum);	152 auto it = m_ObjectInfo.lower_bound(objnum);

148 while (it != m_ObjectInfo.end()) {	153 while (it != m_ObjectInfo.end()) {

149 auto saved_it = it++;	154 auto saved_it = it++;

150 m_ObjectInfo.erase(saved_it);	155 m_ObjectInfo.erase(saved_it);

151 }	156 }

152	157

153 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))	158 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))

154 m_ObjectInfo[objnum - 1].pos = 0;	159 m_ObjectInfo[objnum - 1].pos = 0;

155 }	160 }

156	161

157 void CPDF_Parser::CloseParser() {	162 void CPDF_Parser::CloseParser() {

158 m_bVersionUpdated = FALSE;	163 m_bVersionUpdated = FALSE;

159 delete m_pDocument;	164 delete m_pDocument;

160 m_pDocument = nullptr;	165 m_pDocument = nullptr;

161	166

162 if (m_pTrailer) {	167 if (m_pTrailer) {

163 m_pTrailer->Release();	168 m_pTrailer->Release();

164 m_pTrailer = nullptr;	169 m_pTrailer = nullptr;

165 }	170 }

166 ReleaseEncryptHandler();	171 ReleaseEncryptHandler();

167 SetEncryptDictionary(nullptr);	172 SetEncryptDictionary(nullptr);

168	173

169 if (m_bOwnFileRead && m_Syntax.m_pFileAccess) {	174 if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) {

170 m_Syntax.m_pFileAccess->Release();	175 m_pSyntax->m_pFileAccess->Release();

171 m_Syntax.m_pFileAccess = nullptr;	176 m_pSyntax->m_pFileAccess = nullptr;

172 }	177 }

173	178

174 m_ObjectStreamMap.clear();	179 m_ObjectStreamMap.clear();

175 m_ObjCache.clear();	180 m_ObjCache.clear();

176 m_SortedOffset.clear();	181 m_SortedOffset.clear();

177 m_ObjectInfo.clear();	182 m_ObjectInfo.clear();

178	183

179 int32_t iLen = m_Trailers.GetSize();	184 int32_t iLen = m_Trailers.GetSize();

180 for (int32_t i = 0; i < iLen; ++i) {	185 for (int32_t i = 0; i < iLen; ++i) {

181 if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i))	186 if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i))

(...skipping 13 matching lines...) Expand all Loading...
195 m_bXRefStream = FALSE;	200 m_bXRefStream = FALSE;

196 m_LastXRefOffset = 0;	201 m_LastXRefOffset = 0;

197 m_bOwnFileRead = true;	202 m_bOwnFileRead = true;

198	203

199 int32_t offset = GetHeaderOffset(pFileAccess);	204 int32_t offset = GetHeaderOffset(pFileAccess);

200 if (offset == -1) {	205 if (offset == -1) {

201 if (pFileAccess)	206 if (pFileAccess)

202 pFileAccess->Release();	207 pFileAccess->Release();

203 return FORMAT_ERROR;	208 return FORMAT_ERROR;

204 }	209 }

205 m_Syntax.InitParser(pFileAccess, offset);	210 m_pSyntax->InitParser(pFileAccess, offset);

206	211

207 uint8_t ch;	212 uint8_t ch;

208 if (!m_Syntax.GetCharAt(5, ch))	213 if (!m_pSyntax->GetCharAt(5, ch))

209 return FORMAT_ERROR;	214 return FORMAT_ERROR;

210 if (std::isdigit(ch))	215 if (std::isdigit(ch))

211 m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10;	216 m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10;

212	217

213 if (!m_Syntax.GetCharAt(7, ch))	218 if (!m_pSyntax->GetCharAt(7, ch))

214 return FORMAT_ERROR;	219 return FORMAT_ERROR;

215 if (std::isdigit(ch))	220 if (std::isdigit(ch))

216 m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));	221 m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

217	222

218 if (m_Syntax.m_FileLen < m_Syntax.m_HeaderOffset + 9)	223 if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9)

219 return FORMAT_ERROR;	224 return FORMAT_ERROR;

220	225

221 m_Syntax.RestorePos(m_Syntax.m_FileLen - m_Syntax.m_HeaderOffset - 9);	226 m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9);

222 m_pDocument = new CPDF_Document(this);	227 m_pDocument = new CPDF_Document(this);

223	228

224 FX_BOOL bXRefRebuilt = FALSE;	229 FX_BOOL bXRefRebuilt = FALSE;

225 if (m_Syntax.SearchWord("startxref", TRUE, FALSE, 4096)) {	230 if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) {

226 m_SortedOffset.insert(m_Syntax.SavePos());	231 m_SortedOffset.insert(m_pSyntax->SavePos());

227 m_Syntax.GetKeyword();	232 m_pSyntax->GetKeyword();

228	233

229 bool bNumber;	234 bool bNumber;

230 CFX_ByteString xrefpos_str = m_Syntax.GetNextWord(&bNumber);	235 CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber);

231 if (!bNumber)	236 if (!bNumber)

232 return FORMAT_ERROR;	237 return FORMAT_ERROR;

233	238

234 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);	239 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);

235 if (!LoadAllCrossRefV4(m_LastXRefOffset) &&	240 if (!LoadAllCrossRefV4(m_LastXRefOffset) &&

236 !LoadAllCrossRefV5(m_LastXRefOffset)) {	241 !LoadAllCrossRefV5(m_LastXRefOffset)) {

237 if (!RebuildCrossRef())	242 if (!RebuildCrossRef())

238 return FORMAT_ERROR;	243 return FORMAT_ERROR;

239	244

240 bXRefRebuilt = TRUE;	245 bXRefRebuilt = TRUE;

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
273 return FORMAT_ERROR;	278 return FORMAT_ERROR;

274	279

275 eRet = SetEncryptHandler();	280 eRet = SetEncryptHandler();

276 if (eRet != SUCCESS)	281 if (eRet != SUCCESS)

277 return eRet;	282 return eRet;

278 }	283 }

279 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {	284 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {

280 CPDF_Reference* pMetadata =	285 CPDF_Reference* pMetadata =

281 ToReference(m_pDocument->GetRoot()->GetElement("Metadata"));	286 ToReference(m_pDocument->GetRoot()->GetElement("Metadata"));

282 if (pMetadata)	287 if (pMetadata)

283 m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();	288 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();

284 }	289 }

285 return SUCCESS;	290 return SUCCESS;

286 }	291 }

287 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {	292 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {

288 ReleaseEncryptHandler();	293 ReleaseEncryptHandler();

289 SetEncryptDictionary(nullptr);	294 SetEncryptDictionary(nullptr);

290	295

291 if (!m_pTrailer)	296 if (!m_pTrailer)

292 return FORMAT_ERROR;	297 return FORMAT_ERROR;

293	298

(...skipping 20 matching lines...) Expand all Loading...
314 return HANDLER_ERROR;	319 return HANDLER_ERROR;

315	320

316 if (!pSecurityHandler->OnInit(this, m_pEncryptDict))	321 if (!pSecurityHandler->OnInit(this, m_pEncryptDict))

317 return err;	322 return err;

318	323

319 m_pSecurityHandler = std::move(pSecurityHandler);	324 m_pSecurityHandler = std::move(pSecurityHandler);

320 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(	325 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(

321 m_pSecurityHandler->CreateCryptoHandler());	326 m_pSecurityHandler->CreateCryptoHandler());

322 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get()))	327 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get()))

323 return HANDLER_ERROR;	328 return HANDLER_ERROR;

324 m_Syntax.SetEncrypt(std::move(pCryptoHandler));	329 m_pSyntax->SetEncrypt(std::move(pCryptoHandler));

325 }	330 }

326 return SUCCESS;	331 return SUCCESS;

327 }	332 }

328	333

329 void CPDF_Parser::ReleaseEncryptHandler() {	334 void CPDF_Parser::ReleaseEncryptHandler() {

330 m_Syntax.m_pCryptoHandler.reset();	335 m_pSyntax->m_pCryptoHandler.reset();

331 m_pSecurityHandler.reset();	336 m_pSecurityHandler.reset();

332 }	337 }

333	338

334 FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const {	339 FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const {

335 if (!IsValidObjectNumber(objnum))	340 if (!IsValidObjectNumber(objnum))

336 return 0;	341 return 0;

337	342

338 if (GetObjectType(objnum) == 1)	343 if (GetObjectType(objnum) == 1)

339 return GetObjectPositionOrZero(objnum);	344 return GetObjectPositionOrZero(objnum);

340	345

(...skipping 106 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
447	452

448 for (size_t i = 1; i < CrossRefList.size(); ++i) {	453 for (size_t i = 1; i < CrossRefList.size(); ++i) {

449 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))	454 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))

450 return FALSE;	455 return FALSE;

451 }	456 }

452 return TRUE;	457 return TRUE;

453 }	458 }

454	459

455 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,	460 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,

456 FX_DWORD dwObjCount) {	461 FX_DWORD dwObjCount) {

457 FX_FILESIZE dwStartPos = pos - m_Syntax.m_HeaderOffset;	462 FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset;

458	463

459 m_Syntax.RestorePos(dwStartPos);	464 m_pSyntax->RestorePos(dwStartPos);

460 m_SortedOffset.insert(pos);	465 m_SortedOffset.insert(pos);

461	466

462 FX_DWORD start_objnum = 0;	467 FX_DWORD start_objnum = 0;

463 FX_DWORD count = dwObjCount;	468 FX_DWORD count = dwObjCount;

464 FX_FILESIZE SavedPos = m_Syntax.SavePos();	469 FX_FILESIZE SavedPos = m_pSyntax->SavePos();

465	470

466 const int32_t recordsize = 20;	471 const int32_t recordsize = 20;

467 std::vector<char> buf(1024 * recordsize + 1);	472 std::vector<char> buf(1024 * recordsize + 1);

468 buf[1024 * recordsize] = '\0';	473 buf[1024 * recordsize] = '\0';

469	474

470 int32_t nBlocks = count / 1024 + 1;	475 int32_t nBlocks = count / 1024 + 1;

471 for (int32_t block = 0; block < nBlocks; block++) {	476 for (int32_t block = 0; block < nBlocks; block++) {

472 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;	477 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;

473 FX_DWORD dwReadSize = block_size * recordsize;	478 FX_DWORD dwReadSize = block_size * recordsize;

474 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_Syntax.m_FileLen)	479 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen)

475 return FALSE;	480 return FALSE;

476	481

477 if (!m_Syntax.ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),	482 if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),

478 dwReadSize)) {	483 dwReadSize)) {

479 return FALSE;	484 return FALSE;

480 }	485 }

481	486

482 for (int32_t i = 0; i < block_size; i++) {	487 for (int32_t i = 0; i < block_size; i++) {

483 FX_DWORD objnum = start_objnum + block * 1024 + i;	488 FX_DWORD objnum = start_objnum + block * 1024 + i;

484 char* pEntry = &buf[i * recordsize];	489 char* pEntry = &buf[i * recordsize];

485 if (pEntry[17] == 'f') {	490 if (pEntry[17] == 'f') {

486 m_ObjectInfo[objnum].pos = 0;	491 m_ObjectInfo[objnum].pos = 0;

487 m_ObjectInfo[objnum].type = 0;	492 m_ObjectInfo[objnum].type = 0;

488 } else {	493 } else {

489 int32_t offset = FXSYS_atoi(pEntry);	494 int32_t offset = FXSYS_atoi(pEntry);

490 if (offset == 0) {	495 if (offset == 0) {

491 for (int32_t c = 0; c < 10; c++) {	496 for (int32_t c = 0; c < 10; c++) {

492 if (!std::isdigit(pEntry[c]))	497 if (!std::isdigit(pEntry[c]))

493 return FALSE;	498 return FALSE;

494 }	499 }

495 }	500 }

496	501

497 m_ObjectInfo[objnum].pos = offset;	502 m_ObjectInfo[objnum].pos = offset;

498 int32_t version = FXSYS_atoi(pEntry + 11);	503 int32_t version = FXSYS_atoi(pEntry + 11);

499 if (version >= 1)	504 if (version >= 1)

500 m_bVersionUpdated = TRUE;	505 m_bVersionUpdated = TRUE;

501	506

502 m_ObjectInfo[objnum].gennum = version;	507 m_ObjectInfo[objnum].gennum = version;

503 if (m_ObjectInfo[objnum].pos < m_Syntax.m_FileLen)	508 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)

504 m_SortedOffset.insert(m_ObjectInfo[objnum].pos);	509 m_SortedOffset.insert(m_ObjectInfo[objnum].pos);

505	510

506 m_ObjectInfo[objnum].type = 1;	511 m_ObjectInfo[objnum].type = 1;

507 }	512 }

508 }	513 }

509 }	514 }

510 m_Syntax.RestorePos(SavedPos + count * recordsize);	515 m_pSyntax->RestorePos(SavedPos + count * recordsize);

511 return TRUE;	516 return TRUE;

512 }	517 }

513	518

514 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,	519 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,

515 FX_FILESIZE streampos,	520 FX_FILESIZE streampos,

516 FX_BOOL bSkip) {	521 FX_BOOL bSkip) {

517 m_Syntax.RestorePos(pos);	522 m_pSyntax->RestorePos(pos);

518 if (m_Syntax.GetKeyword() != "xref")	523 if (m_pSyntax->GetKeyword() != "xref")

519 return false;	524 return false;

520	525

521 m_SortedOffset.insert(pos);	526 m_SortedOffset.insert(pos);

522 if (streampos)	527 if (streampos)

523 m_SortedOffset.insert(streampos);	528 m_SortedOffset.insert(streampos);

524	529

525 while (1) {	530 while (1) {

526 FX_FILESIZE SavedPos = m_Syntax.SavePos();	531 FX_FILESIZE SavedPos = m_pSyntax->SavePos();

527 bool bIsNumber;	532 bool bIsNumber;

528 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);	533 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);

529 if (word.IsEmpty())	534 if (word.IsEmpty())

530 return false;	535 return false;

531	536

532 if (!bIsNumber) {	537 if (!bIsNumber) {

533 m_Syntax.RestorePos(SavedPos);	538 m_pSyntax->RestorePos(SavedPos);

534 break;	539 break;

535 }	540 }

536	541

537 FX_DWORD start_objnum = FXSYS_atoui(word);	542 FX_DWORD start_objnum = FXSYS_atoui(word);

538 if (start_objnum >= kMaxObjectNumber)	543 if (start_objnum >= kMaxObjectNumber)

539 return false;	544 return false;

540	545

541 FX_DWORD count = m_Syntax.GetDirectNum();	546 FX_DWORD count = m_pSyntax->GetDirectNum();

542 m_Syntax.ToNextWord();	547 m_pSyntax->ToNextWord();

543 SavedPos = m_Syntax.SavePos();	548 SavedPos = m_pSyntax->SavePos();

544 const int32_t recordsize = 20;	549 const int32_t recordsize = 20;

545	550

546 m_dwXrefStartObjNum = start_objnum;	551 m_dwXrefStartObjNum = start_objnum;

547 if (!bSkip) {	552 if (!bSkip) {

548 std::vector<char> buf(1024 * recordsize + 1);	553 std::vector<char> buf(1024 * recordsize + 1);

549 buf[1024 * recordsize] = '\0';	554 buf[1024 * recordsize] = '\0';

550	555

551 int32_t nBlocks = count / 1024 + 1;	556 int32_t nBlocks = count / 1024 + 1;

552 for (int32_t block = 0; block < nBlocks; block++) {	557 for (int32_t block = 0; block < nBlocks; block++) {

553 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;	558 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;

554 m_Syntax.ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),	559 m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),

555 block_size * recordsize);	560 block_size * recordsize);

556	561

557 for (int32_t i = 0; i < block_size; i++) {	562 for (int32_t i = 0; i < block_size; i++) {

558 FX_DWORD objnum = start_objnum + block * 1024 + i;	563 FX_DWORD objnum = start_objnum + block * 1024 + i;

559 char* pEntry = &buf[i * recordsize];	564 char* pEntry = &buf[i * recordsize];

560 if (pEntry[17] == 'f') {	565 if (pEntry[17] == 'f') {

561 m_ObjectInfo[objnum].pos = 0;	566 m_ObjectInfo[objnum].pos = 0;

562 m_ObjectInfo[objnum].type = 0;	567 m_ObjectInfo[objnum].type = 0;

563 } else {	568 } else {

564 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);	569 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);

565 if (offset == 0) {	570 if (offset == 0) {

566 for (int32_t c = 0; c < 10; c++) {	571 for (int32_t c = 0; c < 10; c++) {

567 if (!std::isdigit(pEntry[c]))	572 if (!std::isdigit(pEntry[c]))

568 return false;	573 return false;

569 }	574 }

570 }	575 }

571	576

572 m_ObjectInfo[objnum].pos = offset;	577 m_ObjectInfo[objnum].pos = offset;

573 int32_t version = FXSYS_atoi(pEntry + 11);	578 int32_t version = FXSYS_atoi(pEntry + 11);

574 if (version >= 1)	579 if (version >= 1)

575 m_bVersionUpdated = TRUE;	580 m_bVersionUpdated = TRUE;

576	581

577 m_ObjectInfo[objnum].gennum = version;	582 m_ObjectInfo[objnum].gennum = version;

578 if (m_ObjectInfo[objnum].pos < m_Syntax.m_FileLen)	583 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)

579 m_SortedOffset.insert(m_ObjectInfo[objnum].pos);	584 m_SortedOffset.insert(m_ObjectInfo[objnum].pos);

580	585

581 m_ObjectInfo[objnum].type = 1;	586 m_ObjectInfo[objnum].type = 1;

582 }	587 }

583 }	588 }

584 }	589 }

585 }	590 }

586 m_Syntax.RestorePos(SavedPos + count * recordsize);	591 m_pSyntax->RestorePos(SavedPos + count * recordsize);

587 }	592 }

588 return !streampos \|\| LoadCrossRefV5(&streampos, FALSE);	593 return !streampos \|\| LoadCrossRefV5(&streampos, FALSE);

589 }	594 }

590	595

591 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {	596 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {

592 if (!LoadCrossRefV5(&xrefpos, TRUE))	597 if (!LoadCrossRefV5(&xrefpos, TRUE))

593 return FALSE;	598 return FALSE;

594	599

595 std::set<FX_FILESIZE> seen_xrefpos;	600 std::set<FX_FILESIZE> seen_xrefpos;

596 while (xrefpos) {	601 while (xrefpos) {

(...skipping 21 matching lines...) Expand all Loading...
618 ParserState state = ParserState::kDefault;	623 ParserState state = ParserState::kDefault;

619	624

620 int32_t inside_index = 0;	625 int32_t inside_index = 0;

621 FX_DWORD objnum = 0;	626 FX_DWORD objnum = 0;

622 FX_DWORD gennum = 0;	627 FX_DWORD gennum = 0;

623 int32_t depth = 0;	628 int32_t depth = 0;

624	629

625 const FX_DWORD kBufferSize = 4096;	630 const FX_DWORD kBufferSize = 4096;

626 std::vector<uint8_t> buffer(kBufferSize);	631 std::vector<uint8_t> buffer(kBufferSize);

627	632

628 FX_FILESIZE pos = m_Syntax.m_HeaderOffset;	633 FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;

629 FX_FILESIZE start_pos = 0;	634 FX_FILESIZE start_pos = 0;

630 FX_FILESIZE start_pos1 = 0;	635 FX_FILESIZE start_pos1 = 0;

631 FX_FILESIZE last_obj = -1;	636 FX_FILESIZE last_obj = -1;

632 FX_FILESIZE last_xref = -1;	637 FX_FILESIZE last_xref = -1;

633 FX_FILESIZE last_trailer = -1;	638 FX_FILESIZE last_trailer = -1;

634	639

635 while (pos < m_Syntax.m_FileLen) {	640 while (pos < m_pSyntax->m_FileLen) {

636 const FX_FILESIZE saved_pos = pos;	641 const FX_FILESIZE saved_pos = pos;

637 bool bOverFlow = false;	642 bool bOverFlow = false;

638 FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize);	643 FX_DWORD size =

639 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size))	644 std::min((FX_DWORD)(m_pSyntax->m_FileLen - pos), kBufferSize);

	645 if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size))

640 break;	646 break;

641	647

642 for (FX_DWORD i = 0; i < size; i++) {	648 for (FX_DWORD i = 0; i < size; i++) {

643 uint8_t byte = buffer[i];	649 uint8_t byte = buffer[i];

644 switch (state) {	650 switch (state) {

645 case ParserState::kDefault:	651 case ParserState::kDefault:

646 if (PDFCharIsWhitespace(byte)) {	652 if (PDFCharIsWhitespace(byte)) {

647 state = ParserState::kWhitespace;	653 state = ParserState::kWhitespace;

648 } else if (std::isdigit(byte)) {	654 } else if (std::isdigit(byte)) {

649 --i;	655 --i;

(...skipping 103 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
753 case 2:	759 case 2:

754 if (byte != 'j') {	760 if (byte != 'j') {

755 --i;	761 --i;

756 state = ParserState::kDefault;	762 state = ParserState::kDefault;

757 } else {	763 } else {

758 inside_index++;	764 inside_index++;

759 }	765 }

760 break;	766 break;

761 case 3:	767 case 3:

762 if (PDFCharIsWhitespace(byte) \|\| PDFCharIsDelimiter(byte)) {	768 if (PDFCharIsWhitespace(byte) \|\| PDFCharIsDelimiter(byte)) {

763 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset;	769 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;

764 m_SortedOffset.insert(obj_pos);	770 m_SortedOffset.insert(obj_pos);

765 last_obj = start_pos;	771 last_obj = start_pos;

766 FX_FILESIZE obj_end = 0;	772 FX_FILESIZE obj_end = 0;

767 CPDF_Object* pObject = ParseIndirectObjectAtByStrict(	773 CPDF_Object* pObject = ParseIndirectObjectAtByStrict(

768 m_pDocument, obj_pos, objnum, &obj_end);	774 m_pDocument, obj_pos, objnum, &obj_end);

769 if (CPDF_Stream* pStream = ToStream(pObject)) {	775 if (CPDF_Stream* pStream = ToStream(pObject)) {

770 if (CPDF_Dictionary* pDict = pStream->GetDict()) {	776 if (CPDF_Dictionary* pDict = pStream->GetDict()) {

771 if ((pDict->KeyExist("Type")) &&	777 if ((pDict->KeyExist("Type")) &&

772 (pDict->GetStringBy("Type") == "XRef" &&	778 (pDict->GetStringBy("Type") == "XRef" &&

773 pDict->KeyExist("Size"))) {	779 pDict->KeyExist("Size"))) {

774 CPDF_Object* pRoot = pDict->GetElement("Root");	780 CPDF_Object* pRoot = pDict->GetElement("Root");

775 if (pRoot && pRoot->GetDict() &&	781 if (pRoot && pRoot->GetDict() &&

776 pRoot->GetDict()->GetElement("Pages")) {	782 pRoot->GetDict()->GetElement("Pages")) {

777 if (m_pTrailer)	783 if (m_pTrailer)

778 m_pTrailer->Release();	784 m_pTrailer->Release();

779 m_pTrailer = ToDictionary(pDict->Clone());	785 m_pTrailer = ToDictionary(pDict->Clone());

780 }	786 }

781 }	787 }

782 }	788 }

783 }	789 }

784	790

785 FX_FILESIZE offset = 0;	791 FX_FILESIZE offset = 0;

786 m_Syntax.RestorePos(obj_pos);	792 m_pSyntax->RestorePos(obj_pos);

787 offset = m_Syntax.FindTag("obj", 0);	793 offset = m_pSyntax->FindTag("obj", 0);

788 if (offset == -1)	794 if (offset == -1)

789 offset = 0;	795 offset = 0;

790 else	796 else

791 offset += 3;	797 offset += 3;

792	798

793 FX_FILESIZE nLen = obj_end - obj_pos - offset;	799 FX_FILESIZE nLen = obj_end - obj_pos - offset;

794 if ((FX_DWORD)nLen > size - i) {	800 if ((FX_DWORD)nLen > size - i) {

795 pos = obj_end + m_Syntax.m_HeaderOffset;	801 pos = obj_end + m_pSyntax->m_HeaderOffset;

796 bOverFlow = true;	802 bOverFlow = true;

797 } else {	803 } else {

798 i += (FX_DWORD)nLen;	804 i += (FX_DWORD)nLen;

799 }	805 }

800	806

801 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&	807 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&

802 m_ObjectInfo[objnum].pos) {	808 m_ObjectInfo[objnum].pos) {

803 if (pObject) {	809 if (pObject) {

804 FX_DWORD oldgen = GetObjectGenNum(objnum);	810 FX_DWORD oldgen = GetObjectGenNum(objnum);

805 m_ObjectInfo[objnum].pos = obj_pos;	811 m_ObjectInfo[objnum].pos = obj_pos;

(...skipping 13 matching lines...) Expand all Loading...
819 --i;	825 --i;

820 state = ParserState::kDefault;	826 state = ParserState::kDefault;

821 break;	827 break;

822 }	828 }

823 break;	829 break;

824	830

825 case ParserState::kTrailer:	831 case ParserState::kTrailer:

826 if (inside_index == 7) {	832 if (inside_index == 7) {

827 if (PDFCharIsWhitespace(byte) \|\| PDFCharIsDelimiter(byte)) {	833 if (PDFCharIsWhitespace(byte) \|\| PDFCharIsDelimiter(byte)) {

828 last_trailer = pos + i - 7;	834 last_trailer = pos + i - 7;

829 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset);	835 m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset);

830	836

831 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, true);	837 CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true);

832 if (pObj) {	838 if (pObj) {

833 if (!pObj->IsDictionary() && !pObj->AsStream()) {	839 if (!pObj->IsDictionary() && !pObj->AsStream()) {

834 pObj->Release();	840 pObj->Release();

835 } else {	841 } else {

836 CPDF_Stream* pStream = pObj->AsStream();	842 CPDF_Stream* pStream = pObj->AsStream();

837 if (CPDF_Dictionary* pTrailer =	843 if (CPDF_Dictionary* pTrailer =

838 pStream ? pStream->GetDict() : pObj->AsDictionary()) {	844 pStream ? pStream->GetDict() : pObj->AsDictionary()) {

839 if (m_pTrailer) {	845 if (m_pTrailer) {

840 CPDF_Object* pRoot = pTrailer->GetElement("Root");	846 CPDF_Object* pRoot = pTrailer->GetElement("Root");

841 CPDF_Reference* pRef = ToReference(pRoot);	847 CPDF_Reference* pRef = ToReference(pRoot);

(...skipping 17 matching lines...) Expand all Loading...
859 }	865 }

860 pObj->Release();	866 pObj->Release();

861 } else {	867 } else {

862 if (pObj->IsStream()) {	868 if (pObj->IsStream()) {

863 m_pTrailer = ToDictionary(pTrailer->Clone());	869 m_pTrailer = ToDictionary(pTrailer->Clone());

864 pObj->Release();	870 pObj->Release();

865 } else {	871 } else {

866 m_pTrailer = pTrailer;	872 m_pTrailer = pTrailer;

867 }	873 }

868	874

869 FX_FILESIZE dwSavePos = m_Syntax.SavePos();	875 FX_FILESIZE dwSavePos = m_pSyntax->SavePos();

870 CFX_ByteString strWord = m_Syntax.GetKeyword();	876 CFX_ByteString strWord = m_pSyntax->GetKeyword();

871 if (!strWord.Compare("startxref")) {	877 if (!strWord.Compare("startxref")) {

872 bool bNumber;	878 bool bNumber;

873 CFX_ByteString bsOffset =	879 CFX_ByteString bsOffset =

874 m_Syntax.GetNextWord(&bNumber);	880 m_pSyntax->GetNextWord(&bNumber);

875 if (bNumber)	881 if (bNumber)

876 m_LastXRefOffset = FXSYS_atoi(bsOffset);	882 m_LastXRefOffset = FXSYS_atoi(bsOffset);

877 }	883 }

878 m_Syntax.RestorePos(dwSavePos);	884 m_pSyntax->RestorePos(dwSavePos);

879 }	885 }

880 } else {	886 } else {

881 pObj->Release();	887 pObj->Release();

882 }	888 }

883 }	889 }

884 }	890 }

885 }	891 }

886 --i;	892 --i;

887 state = ParserState::kDefault;	893 state = ParserState::kDefault;

888 } else if (byte == "trailer"[inside_index]) {	894 } else if (byte == "trailer"[inside_index]) {

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
960	966

961 // If the position has not changed at all in a loop iteration, then break	967 // If the position has not changed at all in a loop iteration, then break

962 // out to prevent infinite looping.	968 // out to prevent infinite looping.

963 if (pos == saved_pos)	969 if (pos == saved_pos)

964 break;	970 break;

965 }	971 }

966	972

967 if (last_xref != -1 && last_xref > last_obj)	973 if (last_xref != -1 && last_xref > last_obj)

968 last_trailer = last_xref;	974 last_trailer = last_xref;

969 else if (last_trailer == -1 \|\| last_xref < last_obj)	975 else if (last_trailer == -1 \|\| last_xref < last_obj)

970 last_trailer = m_Syntax.m_FileLen;	976 last_trailer = m_pSyntax->m_FileLen;

971	977

972 m_SortedOffset.insert(last_trailer - m_Syntax.m_HeaderOffset);	978 m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset);

973 return m_pTrailer && !m_ObjectInfo.empty();	979 return m_pTrailer && !m_ObjectInfo.empty();

974 }	980 }

975	981

976 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) {	982 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) {

977 CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0);	983 CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0);

978 if (!pObject)	984 if (!pObject)

979 return FALSE;	985 return FALSE;

980	986

981 if (m_pDocument) {	987 if (m_pDocument) {

982 FX_BOOL bInserted = FALSE;	988 FX_BOOL bInserted = FALSE;

(...skipping 176 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1159	1165

1160 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;	1166 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;

1161 auto it = m_SortedOffset.find(pos);	1167 auto it = m_SortedOffset.find(pos);

1162 if (it == m_SortedOffset.end())	1168 if (it == m_SortedOffset.end())

1163 return TRUE;	1169 return TRUE;

1164	1170

1165 if (++it == m_SortedOffset.end())	1171 if (++it == m_SortedOffset.end())

1166 return FALSE;	1172 return FALSE;

1167	1173

1168 FX_FILESIZE size = *it - pos;	1174 FX_FILESIZE size = *it - pos;

1169 FX_FILESIZE SavedPos = m_Syntax.SavePos();	1175 FX_FILESIZE SavedPos = m_pSyntax->SavePos();

1170 m_Syntax.RestorePos(pos);	1176 m_pSyntax->RestorePos(pos);

1171	1177

1172 const char kFormStream[] = "/Form\0stream";	1178 const char kFormStream[] = "/Form\0stream";

1173 const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1);	1179 const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1);

1174 bForm = m_Syntax.SearchMultiWord(kFormStreamStr, TRUE, size) == 0;	1180 bForm = m_pSyntax->SearchMultiWord(kFormStreamStr, TRUE, size) == 0;

1175 m_Syntax.RestorePos(SavedPos);	1181 m_pSyntax->RestorePos(SavedPos);

1176 return TRUE;	1182 return TRUE;

1177 }	1183 }

1178	1184

1179 CPDF_Object* CPDF_Parser::ParseIndirectObject(	1185 CPDF_Object* CPDF_Parser::ParseIndirectObject(

1180 CPDF_IndirectObjectHolder* pObjList,	1186 CPDF_IndirectObjectHolder* pObjList,

1181 FX_DWORD objnum) {	1187 FX_DWORD objnum) {

1182 if (!IsValidObjectNumber(objnum))	1188 if (!IsValidObjectNumber(objnum))

1183 return nullptr;	1189 return nullptr;

1184	1190

1185 // Prevent circular parsing the same object.	1191 // Prevent circular parsing the same object.

(...skipping 118 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1304 return;	1310 return;

1305 }	1311 }

1306	1312

1307 if (GetObjectType(objnum) != 1)	1313 if (GetObjectType(objnum) != 1)

1308 return;	1314 return;

1309	1315

1310 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;	1316 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;

1311 if (pos == 0)	1317 if (pos == 0)

1312 return;	1318 return;

1313	1319

1314 FX_FILESIZE SavedPos = m_Syntax.SavePos();	1320 FX_FILESIZE SavedPos = m_pSyntax->SavePos();

1315 m_Syntax.RestorePos(pos);	1321 m_pSyntax->RestorePos(pos);

1316	1322

1317 bool bIsNumber;	1323 bool bIsNumber;

1318 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);	1324 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);

1319 if (!bIsNumber) {	1325 if (!bIsNumber) {

1320 m_Syntax.RestorePos(SavedPos);	1326 m_pSyntax->RestorePos(SavedPos);

1321 return;	1327 return;

1322 }	1328 }

1323	1329

1324 FX_DWORD parser_objnum = FXSYS_atoui(word);	1330 FX_DWORD parser_objnum = FXSYS_atoui(word);

1325 if (parser_objnum && parser_objnum != objnum) {	1331 if (parser_objnum && parser_objnum != objnum) {

1326 m_Syntax.RestorePos(SavedPos);	1332 m_pSyntax->RestorePos(SavedPos);

1327 return;	1333 return;

1328 }	1334 }

1329	1335

1330 word = m_Syntax.GetNextWord(&bIsNumber);	1336 word = m_pSyntax->GetNextWord(&bIsNumber);

1331 if (!bIsNumber) {	1337 if (!bIsNumber) {

1332 m_Syntax.RestorePos(SavedPos);	1338 m_pSyntax->RestorePos(SavedPos);

1333 return;	1339 return;

1334 }	1340 }

1335	1341

1336 if (m_Syntax.GetKeyword() != "obj") {	1342 if (m_pSyntax->GetKeyword() != "obj") {

1337 m_Syntax.RestorePos(SavedPos);	1343 m_pSyntax->RestorePos(SavedPos);

1338 return;	1344 return;

1339 }	1345 }

1340	1346

1341 auto it = m_SortedOffset.find(pos);	1347 auto it = m_SortedOffset.find(pos);

1342 if (it == m_SortedOffset.end() \|\| ++it == m_SortedOffset.end()) {	1348 if (it == m_SortedOffset.end() \|\| ++it == m_SortedOffset.end()) {

1343 m_Syntax.RestorePos(SavedPos);	1349 m_pSyntax->RestorePos(SavedPos);

1344 return;	1350 return;

1345 }	1351 }

1346	1352

1347 FX_FILESIZE nextoff = *it;	1353 FX_FILESIZE nextoff = *it;

1348 FX_BOOL bNextOffValid = FALSE;	1354 FX_BOOL bNextOffValid = FALSE;

1349 if (nextoff != pos) {	1355 if (nextoff != pos) {

1350 m_Syntax.RestorePos(nextoff);	1356 m_pSyntax->RestorePos(nextoff);

1351 word = m_Syntax.GetNextWord(&bIsNumber);	1357 word = m_pSyntax->GetNextWord(&bIsNumber);

1352 if (word == "xref") {	1358 if (word == "xref") {

1353 bNextOffValid = TRUE;	1359 bNextOffValid = TRUE;

1354 } else if (bIsNumber) {	1360 } else if (bIsNumber) {

1355 word = m_Syntax.GetNextWord(&bIsNumber);	1361 word = m_pSyntax->GetNextWord(&bIsNumber);

1356 if (bIsNumber && m_Syntax.GetKeyword() == "obj") {	1362 if (bIsNumber && m_pSyntax->GetKeyword() == "obj") {

1357 bNextOffValid = TRUE;	1363 bNextOffValid = TRUE;

1358 }	1364 }

1359 }	1365 }

1360 }	1366 }

1361	1367

1362 if (!bNextOffValid) {	1368 if (!bNextOffValid) {

1363 m_Syntax.RestorePos(pos);	1369 m_pSyntax->RestorePos(pos);

1364 while (1) {	1370 while (1) {

1365 if (m_Syntax.GetKeyword() == "endobj")	1371 if (m_pSyntax->GetKeyword() == "endobj")

1366 break;	1372 break;

1367	1373

1368 if (m_Syntax.SavePos() == m_Syntax.m_FileLen)	1374 if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen)

1369 break;	1375 break;

1370 }	1376 }

1371 nextoff = m_Syntax.SavePos();	1377 nextoff = m_pSyntax->SavePos();

1372 }	1378 }

1373	1379

1374 size = (FX_DWORD)(nextoff - pos);	1380 size = (FX_DWORD)(nextoff - pos);

1375 pBuffer = FX_Alloc(uint8_t, size);	1381 pBuffer = FX_Alloc(uint8_t, size);

1376 m_Syntax.RestorePos(pos);	1382 m_pSyntax->RestorePos(pos);

1377 m_Syntax.ReadBlock(pBuffer, size);	1383 m_pSyntax->ReadBlock(pBuffer, size);

1378 m_Syntax.RestorePos(SavedPos);	1384 m_pSyntax->RestorePos(SavedPos);

1379 }	1385 }

1380	1386

1381 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(	1387 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(

1382 CPDF_IndirectObjectHolder* pObjList,	1388 CPDF_IndirectObjectHolder* pObjList,

1383 FX_FILESIZE pos,	1389 FX_FILESIZE pos,

1384 FX_DWORD objnum) {	1390 FX_DWORD objnum) {

1385 FX_FILESIZE SavedPos = m_Syntax.SavePos();	1391 FX_FILESIZE SavedPos = m_pSyntax->SavePos();

1386 m_Syntax.RestorePos(pos);	1392 m_pSyntax->RestorePos(pos);

1387 bool bIsNumber;	1393 bool bIsNumber;

1388 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);	1394 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);

1389 if (!bIsNumber) {	1395 if (!bIsNumber) {

1390 m_Syntax.RestorePos(SavedPos);	1396 m_pSyntax->RestorePos(SavedPos);

1391 return nullptr;	1397 return nullptr;

1392 }	1398 }

1393	1399

1394 FX_FILESIZE objOffset = m_Syntax.SavePos();	1400 FX_FILESIZE objOffset = m_pSyntax->SavePos();

1395 objOffset -= word.GetLength();	1401 objOffset -= word.GetLength();

1396 FX_DWORD parser_objnum = FXSYS_atoui(word);	1402 FX_DWORD parser_objnum = FXSYS_atoui(word);

1397 if (objnum && parser_objnum != objnum) {	1403 if (objnum && parser_objnum != objnum) {

1398 m_Syntax.RestorePos(SavedPos);	1404 m_pSyntax->RestorePos(SavedPos);

1399 return nullptr;	1405 return nullptr;

1400 }	1406 }

1401	1407

1402 word = m_Syntax.GetNextWord(&bIsNumber);	1408 word = m_pSyntax->GetNextWord(&bIsNumber);

1403 if (!bIsNumber) {	1409 if (!bIsNumber) {

1404 m_Syntax.RestorePos(SavedPos);	1410 m_pSyntax->RestorePos(SavedPos);

1405 return nullptr;	1411 return nullptr;

1406 }	1412 }

1407	1413

1408 FX_DWORD parser_gennum = FXSYS_atoui(word);	1414 FX_DWORD parser_gennum = FXSYS_atoui(word);

1409 if (m_Syntax.GetKeyword() != "obj") {	1415 if (m_pSyntax->GetKeyword() != "obj") {

1410 m_Syntax.RestorePos(SavedPos);	1416 m_pSyntax->RestorePos(SavedPos);

1411 return nullptr;	1417 return nullptr;

1412 }	1418 }

1413	1419

1414 CPDF_Object* pObj = m_Syntax.GetObject(pObjList, objnum, parser_gennum, true);	1420 CPDF_Object* pObj =

1415 m_Syntax.SavePos();	1421 m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true);

	1422 m_pSyntax->SavePos();

1416	1423

1417 CFX_ByteString bsWord = m_Syntax.GetKeyword();	1424 CFX_ByteString bsWord = m_pSyntax->GetKeyword();

1418 if (bsWord == "endobj")	1425 if (bsWord == "endobj")

1419 m_Syntax.SavePos();	1426 m_pSyntax->SavePos();

1420	1427

1421 m_Syntax.RestorePos(SavedPos);	1428 m_pSyntax->RestorePos(SavedPos);

1422 if (pObj) {	1429 if (pObj) {

1423 if (!objnum)	1430 if (!objnum)

1424 pObj->m_ObjNum = parser_objnum;	1431 pObj->m_ObjNum = parser_objnum;

1425 pObj->m_GenNum = parser_gennum;	1432 pObj->m_GenNum = parser_gennum;

1426 }	1433 }

1427 return pObj;	1434 return pObj;

1428 }	1435 }

1429	1436

1430 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(	1437 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(

1431 CPDF_IndirectObjectHolder* pObjList,	1438 CPDF_IndirectObjectHolder* pObjList,

1432 FX_FILESIZE pos,	1439 FX_FILESIZE pos,

1433 FX_DWORD objnum,	1440 FX_DWORD objnum,

1434 FX_FILESIZE* pResultPos) {	1441 FX_FILESIZE* pResultPos) {

1435 FX_FILESIZE SavedPos = m_Syntax.SavePos();	1442 FX_FILESIZE SavedPos = m_pSyntax->SavePos();

1436 m_Syntax.RestorePos(pos);	1443 m_pSyntax->RestorePos(pos);

1437	1444

1438 bool bIsNumber;	1445 bool bIsNumber;

1439 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);	1446 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);

1440 if (!bIsNumber) {	1447 if (!bIsNumber) {

1441 m_Syntax.RestorePos(SavedPos);	1448 m_pSyntax->RestorePos(SavedPos);

1442 return nullptr;	1449 return nullptr;

1443 }	1450 }

1444	1451

1445 FX_DWORD parser_objnum = FXSYS_atoui(word);	1452 FX_DWORD parser_objnum = FXSYS_atoui(word);

1446 if (objnum && parser_objnum != objnum) {	1453 if (objnum && parser_objnum != objnum) {

1447 m_Syntax.RestorePos(SavedPos);	1454 m_pSyntax->RestorePos(SavedPos);

1448 return nullptr;	1455 return nullptr;

1449 }	1456 }

1450	1457

1451 word = m_Syntax.GetNextWord(&bIsNumber);	1458 word = m_pSyntax->GetNextWord(&bIsNumber);

1452 if (!bIsNumber) {	1459 if (!bIsNumber) {

1453 m_Syntax.RestorePos(SavedPos);	1460 m_pSyntax->RestorePos(SavedPos);

1454 return nullptr;	1461 return nullptr;

1455 }	1462 }

1456	1463

1457 FX_DWORD gennum = FXSYS_atoui(word);	1464 FX_DWORD gennum = FXSYS_atoui(word);

1458 if (m_Syntax.GetKeyword() != "obj") {	1465 if (m_pSyntax->GetKeyword() != "obj") {

1459 m_Syntax.RestorePos(SavedPos);	1466 m_pSyntax->RestorePos(SavedPos);

1460 return nullptr;	1467 return nullptr;

1461 }	1468 }

1462	1469

1463 CPDF_Object* pObj = m_Syntax.GetObjectByStrict(pObjList, objnum, gennum);	1470 CPDF_Object* pObj = m_pSyntax->GetObjectByStrict(pObjList, objnum, gennum);

1464 if (pResultPos)	1471 if (pResultPos)

1465 *pResultPos = m_Syntax.m_Pos;	1472 *pResultPos = m_pSyntax->m_Pos;

1466	1473

1467 m_Syntax.RestorePos(SavedPos);	1474 m_pSyntax->RestorePos(SavedPos);

1468 return pObj;	1475 return pObj;

1469 }	1476 }

1470	1477

1471 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() {	1478 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() {

1472 if (m_Syntax.GetKeyword() != "trailer")	1479 if (m_pSyntax->GetKeyword() != "trailer")

1473 return nullptr;	1480 return nullptr;

1474	1481

1475 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(	1482 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(

1476 m_Syntax.GetObject(m_pDocument, 0, 0, true));	1483 m_pSyntax->GetObject(m_pDocument, 0, 0, true));

1477 if (!ToDictionary(pObj.get()))	1484 if (!ToDictionary(pObj.get()))

1478 return nullptr;	1485 return nullptr;

1479 return pObj.release()->AsDictionary();	1486 return pObj.release()->AsDictionary();

1480 }	1487 }

1481	1488

1482 FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) {	1489 FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) {

1483 if (!m_pSecurityHandler)	1490 if (!m_pSecurityHandler)

1484 return (FX_DWORD)-1;	1491 return (FX_DWORD)-1;

1485	1492

1486 FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions();	1493 FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions();

1487 if (m_pEncryptDict && m_pEncryptDict->GetStringBy("Filter") == "Standard") {	1494 if (m_pEncryptDict && m_pEncryptDict->GetStringBy("Filter") == "Standard") {

1488 dwPermission &= 0xFFFFFFFC;	1495 dwPermission &= 0xFFFFFFFC;

1489 dwPermission \|= 0xFFFFF0C0;	1496 dwPermission \|= 0xFFFFF0C0;

1490 if (bCheckRevision && m_pEncryptDict->GetIntegerBy("R") == 2)	1497 if (bCheckRevision && m_pEncryptDict->GetIntegerBy("R") == 2)

1491 dwPermission &= 0xFFFFF0FF;	1498 dwPermission &= 0xFFFFF0FF;

1492 }	1499 }

1493 return dwPermission;	1500 return dwPermission;

1494 }	1501 }

1495	1502

1496 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess,	1503 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess,

1497 FX_DWORD offset) {	1504 FX_DWORD offset) {

1498 m_Syntax.InitParser(pFileAccess, offset);	1505 m_pSyntax->InitParser(pFileAccess, offset);

1499 m_Syntax.RestorePos(m_Syntax.m_HeaderOffset + 9);	1506 m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9);

1500	1507

1501 FX_FILESIZE SavedPos = m_Syntax.SavePos();	1508 FX_FILESIZE SavedPos = m_pSyntax->SavePos();

1502 bool bIsNumber;	1509 bool bIsNumber;

1503 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);	1510 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);

1504 if (!bIsNumber)	1511 if (!bIsNumber)

1505 return FALSE;	1512 return FALSE;

1506	1513

1507 FX_DWORD objnum = FXSYS_atoui(word);	1514 FX_DWORD objnum = FXSYS_atoui(word);

1508 word = m_Syntax.GetNextWord(&bIsNumber);	1515 word = m_pSyntax->GetNextWord(&bIsNumber);

1509 if (!bIsNumber)	1516 if (!bIsNumber)

1510 return FALSE;	1517 return FALSE;

1511	1518

1512 FX_DWORD gennum = FXSYS_atoui(word);	1519 FX_DWORD gennum = FXSYS_atoui(word);

1513 if (m_Syntax.GetKeyword() != "obj") {	1520 if (m_pSyntax->GetKeyword() != "obj") {

1514 m_Syntax.RestorePos(SavedPos);	1521 m_pSyntax->RestorePos(SavedPos);

1515 return FALSE;	1522 return FALSE;

1516 }	1523 }

1517	1524

1518 m_pLinearized = m_Syntax.GetObject(nullptr, objnum, gennum, true);	1525 m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true);

1519 if (!m_pLinearized)	1526 if (!m_pLinearized)

1520 return FALSE;	1527 return FALSE;

1521	1528

1522 CPDF_Dictionary* pDict = m_pLinearized->GetDict();	1529 CPDF_Dictionary* pDict = m_pLinearized->GetDict();

1523 if (pDict && pDict->GetElement("Linearized")) {	1530 if (pDict && pDict->GetElement("Linearized")) {

1524 m_Syntax.GetNextWord(nullptr);	1531 m_pSyntax->GetNextWord(nullptr);

1525	1532

1526 CPDF_Object* pLen = pDict->GetElement("L");	1533 CPDF_Object* pLen = pDict->GetElement("L");

1527 if (!pLen) {	1534 if (!pLen) {

1528 m_pLinearized->Release();	1535 m_pLinearized->Release();

1529 m_pLinearized = nullptr;	1536 m_pLinearized = nullptr;

1530 return FALSE;	1537 return FALSE;

1531 }	1538 }

1532	1539

1533 if (pLen->GetInteger() != (int)pFileAccess->GetSize())	1540 if (pLen->GetInteger() != (int)pFileAccess->GetSize())

1534 return FALSE;	1541 return FALSE;

(...skipping 15 matching lines...) Expand all Loading...
1550 CloseParser();	1557 CloseParser();

1551 m_bXRefStream = FALSE;	1558 m_bXRefStream = FALSE;

1552 m_LastXRefOffset = 0;	1559 m_LastXRefOffset = 0;

1553 m_bOwnFileRead = true;	1560 m_bOwnFileRead = true;

1554	1561

1555 int32_t offset = GetHeaderOffset(pFileAccess);	1562 int32_t offset = GetHeaderOffset(pFileAccess);

1556 if (offset == -1)	1563 if (offset == -1)

1557 return FORMAT_ERROR;	1564 return FORMAT_ERROR;

1558	1565

1559 if (!IsLinearizedFile(pFileAccess, offset)) {	1566 if (!IsLinearizedFile(pFileAccess, offset)) {

1560 m_Syntax.m_pFileAccess = nullptr;	1567 m_pSyntax->m_pFileAccess = nullptr;

1561 return StartParse(pFileAccess);	1568 return StartParse(pFileAccess);

1562 }	1569 }

1563	1570

1564 m_pDocument = new CPDF_Document(this);	1571 m_pDocument = new CPDF_Document(this);

1565 FX_FILESIZE dwFirstXRefOffset = m_Syntax.SavePos();	1572 FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos();

1566	1573

1567 FX_BOOL bXRefRebuilt = FALSE;	1574 FX_BOOL bXRefRebuilt = FALSE;

1568 FX_BOOL bLoadV4 = FALSE;	1575 FX_BOOL bLoadV4 = FALSE;

1569 if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) &&	1576 if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) &&

1570 !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) {	1577 !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) {

1571 if (!RebuildCrossRef())	1578 if (!RebuildCrossRef())

1572 return FORMAT_ERROR;	1579 return FORMAT_ERROR;

1573	1580

1574 bXRefRebuilt = TRUE;	1581 bXRefRebuilt = TRUE;

1575 m_LastXRefOffset = 0;	1582 m_LastXRefOffset = 0;

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1613 return FORMAT_ERROR;	1620 return FORMAT_ERROR;

1614	1621

1615 eRet = SetEncryptHandler();	1622 eRet = SetEncryptHandler();

1616 if (eRet != SUCCESS)	1623 if (eRet != SUCCESS)

1617 return eRet;	1624 return eRet;

1618 }	1625 }

1619	1626

1620 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {	1627 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {

1621 if (CPDF_Reference* pMetadata =	1628 if (CPDF_Reference* pMetadata =

1622 ToReference(m_pDocument->GetRoot()->GetElement("Metadata")))	1629 ToReference(m_pDocument->GetRoot()->GetElement("Metadata")))

1623 m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();	1630 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();

1624 }	1631 }

1625 return SUCCESS;	1632 return SUCCESS;

1626 }	1633 }

1627	1634

1628 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {	1635 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {

1629 if (!LoadCrossRefV5(&xrefpos, FALSE))	1636 if (!LoadCrossRefV5(&xrefpos, FALSE))

1630 return FALSE;	1637 return FALSE;

1631	1638

1632 std::set<FX_FILESIZE> seen_xrefpos;	1639 std::set<FX_FILESIZE> seen_xrefpos;

1633 while (xrefpos) {	1640 while (xrefpos) {

1634 seen_xrefpos.insert(xrefpos);	1641 seen_xrefpos.insert(xrefpos);

1635 if (!LoadCrossRefV5(&xrefpos, FALSE))	1642 if (!LoadCrossRefV5(&xrefpos, FALSE))

1636 return FALSE;	1643 return FALSE;

1637	1644

1638 // Check for circular references.	1645 // Check for circular references.

1639 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))	1646 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))

1640 return FALSE;	1647 return FALSE;

1641 }	1648 }

1642 m_ObjectStreamMap.clear();	1649 m_ObjectStreamMap.clear();

1643 m_bXRefStream = TRUE;	1650 m_bXRefStream = TRUE;

1644 return TRUE;	1651 return TRUE;

1645 }	1652 }

1646	1653

1647 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {	1654 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {

1648 FX_DWORD dwSaveMetadataObjnum = m_Syntax.m_MetadataObjnum;	1655 FX_DWORD dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum;

1649 m_Syntax.m_MetadataObjnum = 0;	1656 m_pSyntax->m_MetadataObjnum = 0;

1650 if (m_pTrailer) {	1657 if (m_pTrailer) {

1651 m_pTrailer->Release();	1658 m_pTrailer->Release();

1652 m_pTrailer = nullptr;	1659 m_pTrailer = nullptr;

1653 }	1660 }

1654	1661

1655 m_Syntax.RestorePos(m_LastXRefOffset - m_Syntax.m_HeaderOffset);	1662 m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset);

1656 uint8_t ch = 0;	1663 uint8_t ch = 0;

1657 FX_DWORD dwCount = 0;	1664 FX_DWORD dwCount = 0;

1658 m_Syntax.GetNextChar(ch);	1665 m_pSyntax->GetNextChar(ch);

1659 while (PDFCharIsWhitespace(ch)) {	1666 while (PDFCharIsWhitespace(ch)) {

1660 ++dwCount;	1667 ++dwCount;

1661 if (m_Syntax.m_FileLen >=	1668 if (m_pSyntax->m_FileLen >=

1662 (FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) {	1669 (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) {

1663 break;	1670 break;

1664 }	1671 }

1665 m_Syntax.GetNextChar(ch);	1672 m_pSyntax->GetNextChar(ch);

1666 }	1673 }

1667 m_LastXRefOffset += dwCount;	1674 m_LastXRefOffset += dwCount;

1668 m_ObjectStreamMap.clear();	1675 m_ObjectStreamMap.clear();

1669 m_ObjCache.clear();	1676 m_ObjCache.clear();

1670	1677

1671 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&	1678 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&

1672 !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {	1679 !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {

1673 m_LastXRefOffset = 0;	1680 m_LastXRefOffset = 0;

1674 m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;	1681 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;

1675 return FORMAT_ERROR;	1682 return FORMAT_ERROR;

1676 }	1683 }

1677	1684

1678 m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;	1685 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;

1679 return SUCCESS;	1686 return SUCCESS;

1680 }	1687 }

1681	1688

1682 // static

1683 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;

1684

1685 CPDF_SyntaxParser::CPDF_SyntaxParser()

1686 : m_MetadataObjnum(0),

1687 m_pFileAccess(nullptr),

1688 m_pFileBuf(nullptr),

1689 m_BufSize(CPDF_ModuleMgr::kFileBufSize) {}

1690

1691 CPDF_SyntaxParser::~CPDF_SyntaxParser() {

1692 FX_Free(m_pFileBuf);

1693 }

1694

1695 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {

1696 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);

1697 m_Pos = pos;

1698 return GetNextChar(ch);

1699 }

1700

1701 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {

1702 FX_FILESIZE pos = m_Pos + m_HeaderOffset;

1703 if (pos >= m_FileLen)

1704 return FALSE;

1705

1706 if (m_BufOffset >= pos \|\| (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {

1707 FX_FILESIZE read_pos = pos;

1708 FX_DWORD read_size = m_BufSize;

1709 if ((FX_FILESIZE)read_size > m_FileLen)

1710 read_size = (FX_DWORD)m_FileLen;

1711

1712 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {

1713 if (m_FileLen < (FX_FILESIZE)read_size) {

1714 read_pos = 0;

1715 read_size = (FX_DWORD)m_FileLen;

1716 } else {

1717 read_pos = m_FileLen - read_size;

1718 }

1719 }

1720

1721 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))

1722 return FALSE;

1723

1724 m_BufOffset = read_pos;

1725 }

1726 ch = m_pFileBuf[pos - m_BufOffset];

1727 m_Pos++;

1728 return TRUE;

1729 }

1730

1731 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {

1732 pos += m_HeaderOffset;

1733 if (pos >= m_FileLen)

1734 return FALSE;

1735

1736 if (m_BufOffset >= pos \|\| (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {

1737 FX_FILESIZE read_pos;

1738 if (pos < (FX_FILESIZE)m_BufSize)

1739 read_pos = 0;

1740 else

1741 read_pos = pos - m_BufSize + 1;

1742

1743 FX_DWORD read_size = m_BufSize;

1744 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {

1745 if (m_FileLen < (FX_FILESIZE)read_size) {

1746 read_pos = 0;

1747 read_size = (FX_DWORD)m_FileLen;

1748 } else {

1749 read_pos = m_FileLen - read_size;

1750 }

1751 }

1752

1753 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))

1754 return FALSE;

1755

1756 m_BufOffset = read_pos;

1757 }

1758 ch = m_pFileBuf[pos - m_BufOffset];

1759 return TRUE;

1760 }

1761

1762 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {

1763 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))

1764 return FALSE;

1765 m_Pos += size;

1766 return TRUE;

1767 }

1768

1769 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {

1770 m_WordSize = 0;

1771 if (bIsNumber)

1772 *bIsNumber = true;

1773

1774 uint8_t ch;

1775 if (!GetNextChar(ch))

1776 return;

1777

1778 while (1) {

1779 while (PDFCharIsWhitespace(ch)) {

1780 if (!GetNextChar(ch))

1781 return;

1782 }

1783

1784 if (ch != '%')

1785 break;

1786

1787 while (1) {

1788 if (!GetNextChar(ch))

1789 return;

1790 if (PDFCharIsLineEnding(ch))

1791 break;

1792 }

1793 }

1794

1795 if (PDFCharIsDelimiter(ch)) {

1796 if (bIsNumber)

1797 *bIsNumber = false;

1798

1799 m_WordBuffer[m_WordSize++] = ch;

1800 if (ch == '/') {

1801 while (1) {

1802 if (!GetNextChar(ch))

1803 return;

1804

1805 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {

1806 m_Pos--;

1807 return;

1808 }

1809

1810 if (m_WordSize < sizeof(m_WordBuffer) - 1)

1811 m_WordBuffer[m_WordSize++] = ch;

1812 }

1813 } else if (ch == '<') {

1814 if (!GetNextChar(ch))

1815 return;

1816

1817 if (ch == '<')

1818 m_WordBuffer[m_WordSize++] = ch;

1819 else

1820 m_Pos--;

1821 } else if (ch == '>') {

1822 if (!GetNextChar(ch))

1823 return;

1824

1825 if (ch == '>')

1826 m_WordBuffer[m_WordSize++] = ch;

1827 else

1828 m_Pos--;

1829 }

1830 return;

1831 }

1832

1833 while (1) {

1834 if (m_WordSize < sizeof(m_WordBuffer) - 1)

1835 m_WordBuffer[m_WordSize++] = ch;

1836

1837 if (!PDFCharIsNumeric(ch)) {

1838 if (bIsNumber)

1839 *bIsNumber = false;

1840 }

1841

1842 if (!GetNextChar(ch))

1843 return;

1844

1845 if (PDFCharIsDelimiter(ch) \|\| PDFCharIsWhitespace(ch)) {

1846 m_Pos--;

1847 break;

1848 }

1849 }

1850 }

1851

1852 CFX_ByteString CPDF_SyntaxParser::ReadString() {

1853 uint8_t ch;

1854 if (!GetNextChar(ch))

1855 return CFX_ByteString();

1856

1857 CFX_ByteTextBuf buf;

1858 int32_t parlevel = 0;

1859 int32_t status = 0;

1860 int32_t iEscCode = 0;

1861 while (1) {

1862 switch (status) {

1863 case 0:

1864 if (ch == ')') {

1865 if (parlevel == 0) {

1866 return buf.GetByteString();

1867 }

1868 parlevel--;

1869 buf.AppendChar(')');

1870 } else if (ch == '(') {

1871 parlevel++;

1872 buf.AppendChar('(');

1873 } else if (ch == '\\') {

1874 status = 1;

1875 } else {

1876 buf.AppendChar(ch);

1877 }

1878 break;

1879 case 1:

1880 if (ch >= '0' && ch <= '7') {

1881 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

1882 status = 2;

1883 break;

1884 }

1885

1886 if (ch == 'n') {

1887 buf.AppendChar('\n');

1888 } else if (ch == 'r') {

1889 buf.AppendChar('\r');

1890 } else if (ch == 't') {

1891 buf.AppendChar('\t');

1892 } else if (ch == 'b') {

1893 buf.AppendChar('\b');

1894 } else if (ch == 'f') {

1895 buf.AppendChar('\f');

1896 } else if (ch == '\r') {

1897 status = 4;

1898 break;

1899 } else if (ch != '\n') {

1900 buf.AppendChar(ch);

1901 }

1902 status = 0;

1903 break;

1904 case 2:

1905 if (ch >= '0' && ch <= '7') {

1906 iEscCode =

1907 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

1908 status = 3;

1909 } else {

1910 buf.AppendChar(iEscCode);

1911 status = 0;

1912 continue;

1913 }

1914 break;

1915 case 3:

1916 if (ch >= '0' && ch <= '7') {

1917 iEscCode =

1918 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

1919 buf.AppendChar(iEscCode);

1920 status = 0;

1921 } else {

1922 buf.AppendChar(iEscCode);

1923 status = 0;

1924 continue;

1925 }

1926 break;

1927 case 4:

1928 status = 0;

1929 if (ch != '\n')

1930 continue;

1931 break;

1932 }

1933

1934 if (!GetNextChar(ch))

1935 break;

1936 }

1937

1938 GetNextChar(ch);

1939 return buf.GetByteString();

1940 }

1941

1942 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {

1943 uint8_t ch;

1944 if (!GetNextChar(ch))

1945 return CFX_ByteString();

1946

1947 CFX_ByteTextBuf buf;

1948 bool bFirst = true;

1949 uint8_t code = 0;

1950 while (1) {

1951 if (ch == '>')

1952 break;

1953

1954 if (std::isxdigit(ch)) {

1955 int val = FXSYS_toHexDigit(ch);

1956 if (bFirst) {

1957 code = val * 16;

1958 } else {

1959 code += val;

1960 buf.AppendByte(code);

1961 }

1962 bFirst = !bFirst;

1963 }

1964

1965 if (!GetNextChar(ch))

1966 break;

1967 }

1968 if (!bFirst)

1969 buf.AppendByte(code);

1970

1971 return buf.GetByteString();

1972 }

1973

1974 void CPDF_SyntaxParser::ToNextLine() {

1975 uint8_t ch;

1976 while (GetNextChar(ch)) {

1977 if (ch == '\n')

1978 break;

1979

1980 if (ch == '\r') {

1981 GetNextChar(ch);

1982 if (ch != '\n')

1983 --m_Pos;

1984 break;

1985 }

1986 }

1987 }

1988

1989 void CPDF_SyntaxParser::ToNextWord() {

1990 uint8_t ch;

1991 if (!GetNextChar(ch))

1992 return;

1993

1994 while (1) {

1995 while (PDFCharIsWhitespace(ch)) {

1996 if (!GetNextChar(ch))

1997 return;

1998 }

1999

2000 if (ch != '%')

2001 break;

2002

2003 while (1) {

2004 if (!GetNextChar(ch))

2005 return;

2006 if (PDFCharIsLineEnding(ch))

2007 break;

2008 }

2009 }

2010 m_Pos--;

2011 }

2012

2013 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {

2014 GetNextWordInternal(bIsNumber);

2015 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);

2016 }

2017

2018 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {

2019 return GetNextWord(nullptr);

2020 }

2021

2022 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,

2023 FX_DWORD objnum,

2024 FX_DWORD gennum,

2025 FX_BOOL bDecrypt) {

2026 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);

2027 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)

2028 return nullptr;

2029

2030 FX_FILESIZE SavedPos = m_Pos;

2031 bool bIsNumber;

2032 CFX_ByteString word = GetNextWord(&bIsNumber);

2033 if (word.GetLength() == 0)

2034 return nullptr;

2035

2036 if (bIsNumber) {

2037 FX_FILESIZE SavedPos = m_Pos;

2038 CFX_ByteString nextword = GetNextWord(&bIsNumber);

2039 if (bIsNumber) {

2040 CFX_ByteString nextword2 = GetNextWord(nullptr);

2041 if (nextword2 == "R") {

2042 FX_DWORD objnum = FXSYS_atoui(word);

2043 return new CPDF_Reference(pObjList, objnum);

2044 }

2045 }

2046 m_Pos = SavedPos;

2047 return new CPDF_Number(word);

2048 }

2049

2050 if (word == "true" \|\| word == "false")

2051 return new CPDF_Boolean(word == "true");

2052

2053 if (word == "null")

2054 return new CPDF_Null;

2055

2056 if (word == "(") {

2057 CFX_ByteString str = ReadString();

2058 if (m_pCryptoHandler && bDecrypt)

2059 m_pCryptoHandler->Decrypt(objnum, gennum, str);

2060 return new CPDF_String(str, FALSE);

2061 }

2062

2063 if (word == "<") {

2064 CFX_ByteString str = ReadHexString();

2065 if (m_pCryptoHandler && bDecrypt)

2066 m_pCryptoHandler->Decrypt(objnum, gennum, str);

2067

2068 return new CPDF_String(str, TRUE);

2069 }

2070

2071 if (word == "[") {

2072 CPDF_Array* pArray = new CPDF_Array;

2073 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))

2074 pArray->Add(pObj);

2075

2076 return pArray;

2077 }

2078

2079 if (word[0] == '/') {

2080 return new CPDF_Name(

2081 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));

2082 }

2083

2084 if (word == "<<") {

2085 int32_t nKeys = 0;

2086 FX_FILESIZE dwSignValuePos = 0;

2087

2088 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(

2089 new CPDF_Dictionary);

2090 while (1) {

2091 CFX_ByteString key = GetNextWord(nullptr);

2092 if (key.IsEmpty())

2093 return nullptr;

2094

2095 FX_FILESIZE SavedPos = m_Pos - key.GetLength();

2096 if (key == ">>")

2097 break;

2098

2099 if (key == "endobj") {

2100 m_Pos = SavedPos;

2101 break;

2102 }

2103

2104 if (key[0] != '/')

2105 continue;

2106

2107 ++nKeys;

2108 key = PDF_NameDecode(key);

2109 if (key.IsEmpty())

2110 continue;

2111

2112 if (key == "/Contents")

2113 dwSignValuePos = m_Pos;

2114

2115 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);

2116 if (!pObj)

2117 continue;

2118

2119 CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);

2120 pDict->SetAt(keyNoSlash, pObj);

2121 }

2122

2123 // Only when this is a signature dictionary and has contents, we reset the

2124 // contents to the un-decrypted form.

2125 if (IsSignatureDict(pDict.get()) && dwSignValuePos) {

2126 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);

2127 m_Pos = dwSignValuePos;

2128 pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false));

2129 }

2130

2131 FX_FILESIZE SavedPos = m_Pos;

2132 CFX_ByteString nextword = GetNextWord(nullptr);

2133 if (nextword != "stream") {

2134 m_Pos = SavedPos;

2135 return pDict.release();

2136 }

2137 return ReadStream(pDict.release(), objnum, gennum);

2138 }

2139

2140 if (word == ">>")

2141 m_Pos = SavedPos;

2142

2143 return nullptr;

2144 }

2145

2146 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(

2147 CPDF_IndirectObjectHolder* pObjList,

2148 FX_DWORD objnum,

2149 FX_DWORD gennum) {

2150 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);

2151 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)

2152 return nullptr;

2153

2154 FX_FILESIZE SavedPos = m_Pos;

2155 bool bIsNumber;

2156 CFX_ByteString word = GetNextWord(&bIsNumber);

2157 if (word.GetLength() == 0)

2158 return nullptr;

2159

2160 if (bIsNumber) {

2161 FX_FILESIZE SavedPos = m_Pos;

2162 CFX_ByteString nextword = GetNextWord(&bIsNumber);

2163 if (bIsNumber) {

2164 CFX_ByteString nextword2 = GetNextWord(nullptr);

2165 if (nextword2 == "R")

2166 return new CPDF_Reference(pObjList, FXSYS_atoui(word));

2167 }

2168 m_Pos = SavedPos;

2169 return new CPDF_Number(word);

2170 }

2171

2172 if (word == "true" \|\| word == "false")

2173 return new CPDF_Boolean(word == "true");

2174

2175 if (word == "null")

2176 return new CPDF_Null;

2177

2178 if (word == "(") {

2179 CFX_ByteString str = ReadString();

2180 if (m_pCryptoHandler)

2181 m_pCryptoHandler->Decrypt(objnum, gennum, str);

2182 return new CPDF_String(str, FALSE);

2183 }

2184

2185 if (word == "<") {

2186 CFX_ByteString str = ReadHexString();

2187 if (m_pCryptoHandler)

2188 m_pCryptoHandler->Decrypt(objnum, gennum, str);

2189 return new CPDF_String(str, TRUE);

2190 }

2191

2192 if (word == "[") {

2193 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(

2194 new CPDF_Array);

2195 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))

2196 pArray->Add(pObj);

2197

2198 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;

2199 }

2200

2201 if (word[0] == '/') {

2202 return new CPDF_Name(

2203 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));

2204 }

2205

2206 if (word == "<<") {

2207 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(

2208 new CPDF_Dictionary);

2209 while (1) {

2210 FX_FILESIZE SavedPos = m_Pos;

2211 CFX_ByteString key = GetNextWord(nullptr);

2212 if (key.IsEmpty())

2213 return nullptr;

2214

2215 if (key == ">>")

2216 break;

2217

2218 if (key == "endobj") {

2219 m_Pos = SavedPos;

2220 break;

2221 }

2222

2223 if (key[0] != '/')

2224 continue;

2225

2226 key = PDF_NameDecode(key);

2227 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(

2228 GetObject(pObjList, objnum, gennum, true));

2229 if (!obj) {

2230 uint8_t ch;

2231 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {

2232 continue;

2233 }

2234 return nullptr;

2235 }

2236

2237 if (key.GetLength() > 1) {

2238 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),

2239 obj.release());

2240 }

2241 }

2242

2243 FX_FILESIZE SavedPos = m_Pos;

2244 CFX_ByteString nextword = GetNextWord(nullptr);

2245 if (nextword != "stream") {

2246 m_Pos = SavedPos;

2247 return pDict.release();

2248 }

2249

2250 return ReadStream(pDict.release(), objnum, gennum);

2251 }

2252

2253 if (word == ">>")

2254 m_Pos = SavedPos;

2255

2256 return nullptr;

2257 }

2258

2259 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {

2260 unsigned char byte1 = 0;

2261 unsigned char byte2 = 0;

2262

2263 GetCharAt(pos, byte1);

2264 GetCharAt(pos + 1, byte2);

2265

2266 if (byte1 == '\r' && byte2 == '\n')

2267 return 2;

2268

2269 if (byte1 == '\r' \|\| byte1 == '\n')

2270 return 1;

2271

2272 return 0;

2273 }

2274

2275 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,

2276 FX_DWORD objnum,

2277 FX_DWORD gennum) {

2278 CPDF_Object* pLenObj = pDict->GetElement("Length");

2279 FX_FILESIZE len = -1;

2280 CPDF_Reference* pLenObjRef = ToReference(pLenObj);

2281

2282 bool differingObjNum = !pLenObjRef \|\| (pLenObjRef->GetObjList() &&

2283 pLenObjRef->GetRefObjNum() != objnum);

2284 if (pLenObj && differingObjNum)

2285 len = pLenObj->GetInteger();

2286

2287 // Locate the start of stream.

2288 ToNextLine();

2289 FX_FILESIZE streamStartPos = m_Pos;

2290

2291 const CFX_ByteStringC kEndStreamStr("endstream");

2292 const CFX_ByteStringC kEndObjStr("endobj");

2293

2294 CPDF_CryptoHandler* pCryptoHandler =

2295 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();

2296 if (!pCryptoHandler) {

2297 FX_BOOL bSearchForKeyword = TRUE;

2298 if (len >= 0) {

2299 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;

2300 pos += len;

2301 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)

2302 m_Pos = pos.ValueOrDie();

2303

2304 m_Pos += ReadEOLMarkers(m_Pos);

2305 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);

2306 GetNextWordInternal(nullptr);

2307 // Earlier version of PDF specification doesn't require EOL marker before

2308 // 'endstream' keyword. If keyword 'endstream' follows the bytes in

2309 // specified length, it signals the end of stream.

2310 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),

2311 kEndStreamStr.GetLength()) == 0) {

2312 bSearchForKeyword = FALSE;

2313 }

2314 }

2315

2316 if (bSearchForKeyword) {

2317 // If len is not available, len needs to be calculated

2318 // by searching the keywords "endstream" or "endobj".

2319 m_Pos = streamStartPos;

2320 FX_FILESIZE endStreamOffset = 0;

2321 while (endStreamOffset >= 0) {

2322 endStreamOffset = FindTag(kEndStreamStr, 0);

2323

2324 // Can't find "endstream".

2325 if (endStreamOffset < 0)

2326 break;

2327

2328 // Stop searching when "endstream" is found.

2329 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,

2330 kEndStreamStr, TRUE)) {

2331 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();

2332 break;

2333 }

2334 }

2335

2336 m_Pos = streamStartPos;

2337 FX_FILESIZE endObjOffset = 0;

2338 while (endObjOffset >= 0) {

2339 endObjOffset = FindTag(kEndObjStr, 0);

2340

2341 // Can't find "endobj".

2342 if (endObjOffset < 0)

2343 break;

2344

2345 // Stop searching when "endobj" is found.

2346 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,

2347 TRUE)) {

2348 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();

2349 break;

2350 }

2351 }

2352

2353 // Can't find "endstream" or "endobj".

2354 if (endStreamOffset < 0 && endObjOffset < 0) {

2355 pDict->Release();

2356 return nullptr;

2357 }

2358

2359 if (endStreamOffset < 0 && endObjOffset >= 0) {

2360 // Correct the position of end stream.

2361 endStreamOffset = endObjOffset;

2362 } else if (endStreamOffset >= 0 && endObjOffset < 0) {

2363 // Correct the position of end obj.

2364 endObjOffset = endStreamOffset;

2365 } else if (endStreamOffset > endObjOffset) {

2366 endStreamOffset = endObjOffset;

2367 }

2368

2369 len = endStreamOffset;

2370 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);

2371 if (numMarkers == 2) {

2372 len -= 2;

2373 } else {

2374 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);

2375 if (numMarkers == 1) {

2376 len -= 1;

2377 }

2378 }

2379

2380 if (len < 0) {

2381 pDict->Release();

2382 return nullptr;

2383 }

2384 pDict->SetAtInteger("Length", len);

2385 }

2386 m_Pos = streamStartPos;

2387 }

2388

2389 if (len < 0) {

2390 pDict->Release();

2391 return nullptr;

2392 }

2393

2394 uint8_t* pData = nullptr;

2395 if (len > 0) {

2396 pData = FX_Alloc(uint8_t, len);

2397 ReadBlock(pData, len);

2398 if (pCryptoHandler) {

2399 CFX_BinaryBuf dest_buf;

2400 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));

2401

2402 void* context = pCryptoHandler->DecryptStart(objnum, gennum);

2403 pCryptoHandler->DecryptStream(context, pData, len, dest_buf);

2404 pCryptoHandler->DecryptFinish(context, dest_buf);

2405

2406 FX_Free(pData);

2407 pData = dest_buf.GetBuffer();

2408 len = dest_buf.GetSize();

2409 dest_buf.DetachBuffer();

2410 }

2411 }

2412

2413 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);

2414 streamStartPos = m_Pos;

2415 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);

2416

2417 GetNextWordInternal(nullptr);

2418

2419 int numMarkers = ReadEOLMarkers(m_Pos);

2420 if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 &&

2421 FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==

2422 0) {

2423 m_Pos = streamStartPos;

2424 }

2425 return pStream;

2426 }

2427

2428 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,

2429 FX_DWORD HeaderOffset) {

2430 FX_Free(m_pFileBuf);

2431

2432 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);

2433 m_HeaderOffset = HeaderOffset;

2434 m_FileLen = pFileAccess->GetSize();

2435 m_Pos = 0;

2436 m_pFileAccess = pFileAccess;

2437 m_BufOffset = 0;

2438 pFileAccess->ReadBlock(

2439 m_pFileBuf, 0,

2440 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));

2441 }

2442

2443 uint32_t CPDF_SyntaxParser::GetDirectNum() {

2444 bool bIsNumber;

2445 GetNextWordInternal(&bIsNumber);

2446 if (!bIsNumber)

2447 return 0;

2448

2449 m_WordBuffer[m_WordSize] = 0;

2450 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));

2451 }

2452

2453 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,

2454 FX_FILESIZE limit,

2455 const CFX_ByteStringC& tag,

2456 FX_BOOL checkKeyword) {

2457 const FX_DWORD taglen = tag.GetLength();

2458

2459 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);

2460 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&

2461 !PDFCharIsWhitespace(tag[taglen - 1]);

2462

2463 uint8_t ch;

2464 if (bCheckRight && startpos + (int32_t)taglen <= limit &&

2465 GetCharAt(startpos + (int32_t)taglen, ch)) {

2466 if (PDFCharIsNumeric(ch) \|\| PDFCharIsOther(ch) \|\|

2467 (checkKeyword && PDFCharIsDelimiter(ch))) {

2468 return false;

2469 }

2470 }

2471

2472 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {

2473 if (PDFCharIsNumeric(ch) \|\| PDFCharIsOther(ch) \|\|

2474 (checkKeyword && PDFCharIsDelimiter(ch))) {

2475 return false;

2476 }

2477 }

2478 return true;

2479 }

2480

2481 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards

2482 // and drop the bool.

2483 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,

2484 FX_BOOL bWholeWord,

2485 FX_BOOL bForward,

2486 FX_FILESIZE limit) {

2487 int32_t taglen = tag.GetLength();

2488 if (taglen == 0)

2489 return FALSE;

2490

2491 FX_FILESIZE pos = m_Pos;

2492 int32_t offset = 0;

2493 if (!bForward)

2494 offset = taglen - 1;

2495

2496 const uint8_t* tag_data = tag.GetPtr();

2497 uint8_t byte;

2498 while (1) {

2499 if (bForward) {

2500 if (limit && pos >= m_Pos + limit)

2501 return FALSE;

2502

2503 if (!GetCharAt(pos, byte))

2504 return FALSE;

2505

2506 } else {

2507 if (limit && pos <= m_Pos - limit)

2508 return FALSE;

2509

2510 if (!GetCharAtBackward(pos, byte))

2511 return FALSE;

2512 }

2513

2514 if (byte == tag_data[offset]) {

2515 if (bForward) {

2516 offset++;

2517 if (offset < taglen) {

2518 pos++;

2519 continue;

2520 }

2521 } else {

2522 offset--;

2523 if (offset >= 0) {

2524 pos--;

2525 continue;

2526 }

2527 }

2528

2529 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;

2530 if (!bWholeWord \|\| IsWholeWord(startpos, limit, tag, FALSE)) {

2531 m_Pos = startpos;

2532 return TRUE;

2533 }

2534 }

2535

2536 if (bForward) {

2537 offset = byte == tag_data[0] ? 1 : 0;

2538 pos++;

2539 } else {

2540 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;

2541 pos--;

2542 }

2543

2544 if (pos < 0)

2545 return FALSE;

2546 }

2547

2548 return FALSE;

2549 }

2550

2551 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,

2552 FX_BOOL bWholeWord,

2553 FX_FILESIZE limit) {

2554 int32_t ntags = 1;

2555 for (int i = 0; i < tags.GetLength(); ++i) {

2556 if (tags[i] == 0)

2557 ++ntags;

2558 }

2559

2560 std::vector<SearchTagRecord> patterns(ntags);

2561 FX_DWORD start = 0;

2562 FX_DWORD itag = 0;

2563 FX_DWORD max_len = 0;

2564 for (int i = 0; i <= tags.GetLength(); ++i) {

2565 if (tags[i] == 0) {

2566 FX_DWORD len = i - start;

2567 max_len = std::max(len, max_len);

2568 patterns[itag].m_pTag = tags.GetCStr() + start;

2569 patterns[itag].m_Len = len;

2570 patterns[itag].m_Offset = 0;

2571 start = i + 1;

2572 ++itag;

2573 }

2574 }

2575

2576 const FX_FILESIZE pos_limit = m_Pos + limit;

2577 for (FX_FILESIZE pos = m_Pos; !limit \|\| pos < pos_limit; ++pos) {

2578 uint8_t byte;

2579 if (!GetCharAt(pos, byte))

2580 break;

2581

2582 for (int i = 0; i < ntags; ++i) {

2583 SearchTagRecord& pat = patterns[i];

2584 if (pat.m_pTag[pat.m_Offset] != byte) {

2585 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;

2586 continue;

2587 }

2588

2589 ++pat.m_Offset;

2590 if (pat.m_Offset != pat.m_Len)

2591 continue;

2592

2593 if (!bWholeWord \|\|

2594 IsWholeWord(pos - pat.m_Len, limit,

2595 CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {

2596 return i;

2597 }

2598

2599 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;

2600 }

2601 }

2602 return -1;

2603 }

2604

2605 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,

2606 FX_FILESIZE limit) {

2607 int32_t taglen = tag.GetLength();

2608 int32_t match = 0;

2609 limit += m_Pos;

2610 FX_FILESIZE startpos = m_Pos;

2611

2612 while (1) {

2613 uint8_t ch;

2614 if (!GetNextChar(ch))

2615 return -1;

2616

2617 if (ch == tag[match]) {

2618 match++;

2619 if (match == taglen)

2620 return m_Pos - startpos - taglen;

2621 } else {

2622 match = ch == tag[0] ? 1 : 0;

2623 }

2624

2625 if (limit && m_Pos == limit)

2626 return -1;

2627 }

2628 return -1;

2629 }

2630

2631 void CPDF_SyntaxParser::SetEncrypt(

2632 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {

2633 m_pCryptoHandler = std::move(pCryptoHandler);

2634 }

2635	1689

2636 class CPDF_DataAvail final : public IPDF_DataAvail {	1690 class CPDF_DataAvail final : public IPDF_DataAvail {

2637 public:	1691 public:

2638 CPDF_DataAvail(IFX_FileAvail* pFileAvail,	1692 CPDF_DataAvail(IFX_FileAvail* pFileAvail,

2639 IFX_FileRead* pFileRead,	1693 IFX_FileRead* pFileRead,

2640 FX_BOOL bSupportHintTable);	1694 FX_BOOL bSupportHintTable);

2641 ~CPDF_DataAvail() override;	1695 ~CPDF_DataAvail() override;

2642	1696

2643 // IPDF_DataAvail:	1697 // IPDF_DataAvail:

2644 DocAvailStatus IsDocAvail(IFX_DownloadHints* pHints) override;	1698 DocAvailStatus IsDocAvail(IFX_DownloadHints* pHints) override;

(...skipping 460 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3105 if (m_pFileAvail->IsDataAvail(0, (FX_DWORD)m_dwFileLen)) {	2159 if (m_pFileAvail->IsDataAvail(0, (FX_DWORD)m_dwFileLen)) {

3106 m_docStatus = PDF_DATAAVAIL_DONE;	2160 m_docStatus = PDF_DATAAVAIL_DONE;

3107 return TRUE;	2161 return TRUE;

3108 }	2162 }

3109	2163

3110 pHints->AddSegment(0, (FX_DWORD)m_dwFileLen);	2164 pHints->AddSegment(0, (FX_DWORD)m_dwFileLen);

3111 return FALSE;	2165 return FALSE;

3112 }	2166 }

3113	2167

3114 FX_BOOL CPDF_DataAvail::LoadAllXref(IFX_DownloadHints* pHints) {	2168 FX_BOOL CPDF_DataAvail::LoadAllXref(IFX_DownloadHints* pHints) {

3115 m_parser.m_Syntax.InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset);	2169 m_parser.m_pSyntax->InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset);

3116 m_parser.m_bOwnFileRead = false;	2170 m_parser.m_bOwnFileRead = false;

3117 if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) &&	2171 if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) &&

3118 !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) {	2172 !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) {

3119 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;	2173 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;

3120 return FALSE;	2174 return FALSE;

3121 }	2175 }

3122	2176

3123 m_dwRootObjNum = m_parser.GetRootObjNum();	2177 m_dwRootObjNum = m_parser.GetRootObjNum();

3124 m_dwInfoObjNum = m_parser.GetInfoObjNum();	2178 m_dwInfoObjNum = m_parser.GetInfoObjNum();

3125 m_pCurrentParser = &m_parser;	2179 m_pCurrentParser = &m_parser;

(...skipping 564 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3690 (FX_DWORD)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);	2744 (FX_DWORD)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);

3691	2745

3692 if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) {	2746 if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) {

3693 int32_t iSize = (int32_t)(m_Pos + req_size - m_dwCurrentXRefSteam);	2747 int32_t iSize = (int32_t)(m_Pos + req_size - m_dwCurrentXRefSteam);

3694 CFX_BinaryBuf buf(iSize);	2748 CFX_BinaryBuf buf(iSize);

3695 uint8_t* pBuf = buf.GetBuffer();	2749 uint8_t* pBuf = buf.GetBuffer();

3696	2750

3697 m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize);	2751 m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize);

3698	2752

3699 ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE));	2753 ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE));

3700 m_parser.m_Syntax.InitParser(file.get(), 0);	2754 m_parser.m_pSyntax->InitParser(file.get(), 0);

3701	2755

3702 bool bNumber;	2756 bool bNumber;

3703 CFX_ByteString objnum = m_parser.m_Syntax.GetNextWord(&bNumber);	2757 CFX_ByteString objnum = m_parser.m_pSyntax->GetNextWord(&bNumber);

3704 if (!bNumber)	2758 if (!bNumber)

3705 return -1;	2759 return -1;

3706	2760

3707 FX_DWORD objNum = FXSYS_atoui(objnum);	2761 FX_DWORD objNum = FXSYS_atoui(objnum);

3708 CPDF_Object* pObj = m_parser.ParseIndirectObjectAt(nullptr, 0, objNum);	2762 CPDF_Object* pObj = m_parser.ParseIndirectObjectAt(nullptr, 0, objNum);

3709 if (!pObj) {	2763 if (!pObj) {

3710 m_Pos += m_parser.m_Syntax.SavePos();	2764 m_Pos += m_parser.m_pSyntax->SavePos();

3711 return 0;	2765 return 0;

3712 }	2766 }

3713	2767

3714 CPDF_Dictionary* pDict = pObj->GetDict();	2768 CPDF_Dictionary* pDict = pObj->GetDict();

3715 CPDF_Name* pName = ToName(pDict ? pDict->GetElement("Type") : nullptr);	2769 CPDF_Name* pName = ToName(pDict ? pDict->GetElement("Type") : nullptr);

3716 if (pName) {	2770 if (pName) {

3717 if (pName->GetString() == "XRef") {	2771 if (pName->GetString() == "XRef") {

3718 m_Pos += m_parser.m_Syntax.SavePos();	2772 m_Pos += m_parser.m_pSyntax->SavePos();

3719 xref_offset = pObj->GetDict()->GetIntegerBy("Prev");	2773 xref_offset = pObj->GetDict()->GetIntegerBy("Prev");

3720 pObj->Release();	2774 pObj->Release();

3721 return 1;	2775 return 1;

3722 }	2776 }

3723 }	2777 }

3724 pObj->Release();	2778 pObj->Release();

3725 return -1;	2779 return -1;

3726 }	2780 }

3727 pHints->AddSegment(m_Pos, req_size);	2781 pHints->AddSegment(m_Pos, req_size);

3728 return 0;	2782 return 0;

(...skipping 1303 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5032 CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H");	4086 CPDF_Array* pRange = m_pLinearizedDict->GetArrayBy("H");

5033 if (!pRange)	4087 if (!pRange)

5034 return -1;	4088 return -1;

5035	4089

5036 CPDF_Object* pStreamLen = pRange->GetElementValue(1);	4090 CPDF_Object* pStreamLen = pRange->GetElementValue(1);

5037 if (!pStreamLen)	4091 if (!pStreamLen)

5038 return -1;	4092 return -1;

5039	4093

5040 return pStreamLen->GetInteger();	4094 return pStreamLen->GetInteger();

5041 }	4095 }

OLD	NEW

« no previous file with comments | « core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp ('k') | core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp » ('j') | no next file with comments »