core/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp - Issue 2392603004: Move core/fpdfapi/fpdf_parser to core/fpdfapi/parser

Side by Side Diff: core/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp

Issue 2392603004: Move core/fpdfapi/fpdf_parser to core/fpdfapi/parser (Closed)

Patch Set: Rebase to master Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright 2016 PDFium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

6

7 #include "core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"

8

9 #include <vector>

10

11 #include "core/fpdfapi/cpdf_modulemgr.h"

12 #include "core/fpdfapi/fpdf_parser/cpdf_array.h"

13 #include "core/fpdfapi/fpdf_parser/cpdf_boolean.h"

14 #include "core/fpdfapi/fpdf_parser/cpdf_crypto_handler.h"

15 #include "core/fpdfapi/fpdf_parser/cpdf_dictionary.h"

16 #include "core/fpdfapi/fpdf_parser/cpdf_name.h"

17 #include "core/fpdfapi/fpdf_parser/cpdf_null.h"

18 #include "core/fpdfapi/fpdf_parser/cpdf_number.h"

19 #include "core/fpdfapi/fpdf_parser/cpdf_reference.h"

20 #include "core/fpdfapi/fpdf_parser/cpdf_stream.h"

21 #include "core/fpdfapi/fpdf_parser/cpdf_string.h"

22 #include "core/fpdfapi/fpdf_parser/fpdf_parser_decode.h"

23 #include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h"

24 #include "core/fxcrt/fx_ext.h"

25 #include "third_party/base/numerics/safe_math.h"

26

27 namespace {

28

29 struct SearchTagRecord {

30 CFX_ByteStringC m_bsTag;

31 FX_STRSIZE m_Offset;

32 };

33

34 } // namespace

35

36 // static

37 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;

38

39 CPDF_SyntaxParser::CPDF_SyntaxParser()

40 : CPDF_SyntaxParser(CFX_WeakPtr<CFX_ByteStringPool>()) {}

41

42 CPDF_SyntaxParser::CPDF_SyntaxParser(

43 const CFX_WeakPtr<CFX_ByteStringPool>& pPool)

44 : m_MetadataObjnum(0),

45 m_pFileAccess(nullptr),

46 m_pFileBuf(nullptr),

47 m_BufSize(CPDF_ModuleMgr::kFileBufSize),

48 m_pPool(pPool) {}

49

50 CPDF_SyntaxParser::~CPDF_SyntaxParser() {

51 FX_Free(m_pFileBuf);

52 }

53

54 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {

55 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);

56 m_Pos = pos;

57 return GetNextChar(ch);

58 }

59

60 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {

61 FX_FILESIZE pos = m_Pos + m_HeaderOffset;

62 if (pos >= m_FileLen)

63 return FALSE;

64

65 if (m_BufOffset >= pos \|\| (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {

66 FX_FILESIZE read_pos = pos;

67 uint32_t read_size = m_BufSize;

68 if ((FX_FILESIZE)read_size > m_FileLen)

69 read_size = (uint32_t)m_FileLen;

70

71 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {

72 if (m_FileLen < (FX_FILESIZE)read_size) {

73 read_pos = 0;

74 read_size = (uint32_t)m_FileLen;

75 } else {

76 read_pos = m_FileLen - read_size;

77 }

78 }

79

80 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))

81 return FALSE;

82

83 m_BufOffset = read_pos;

84 }

85 ch = m_pFileBuf[pos - m_BufOffset];

86 m_Pos++;

87 return TRUE;

88 }

89

90 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {

91 pos += m_HeaderOffset;

92 if (pos >= m_FileLen)

93 return FALSE;

94

95 if (m_BufOffset >= pos \|\| (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {

96 FX_FILESIZE read_pos;

97 if (pos < (FX_FILESIZE)m_BufSize)

98 read_pos = 0;

99 else

100 read_pos = pos - m_BufSize + 1;

101

102 uint32_t read_size = m_BufSize;

103 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {

104 if (m_FileLen < (FX_FILESIZE)read_size) {

105 read_pos = 0;

106 read_size = (uint32_t)m_FileLen;

107 } else {

108 read_pos = m_FileLen - read_size;

109 }

110 }

111

112 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))

113 return FALSE;

114

115 m_BufOffset = read_pos;

116 }

117 ch = m_pFileBuf[pos - m_BufOffset];

118 return TRUE;

119 }

120

121 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) {

122 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))

123 return FALSE;

124 m_Pos += size;

125 return TRUE;

126 }

127

128 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {

129 m_WordSize = 0;

130 if (bIsNumber)

131 *bIsNumber = true;

132

133 uint8_t ch;

134 if (!GetNextChar(ch))

135 return;

136

137 while (1) {

138 while (PDFCharIsWhitespace(ch)) {

139 if (!GetNextChar(ch))

140 return;

141 }

142

143 if (ch != '%')

144 break;

145

146 while (1) {

147 if (!GetNextChar(ch))

148 return;

149 if (PDFCharIsLineEnding(ch))

150 break;

151 }

152 }

153

154 if (PDFCharIsDelimiter(ch)) {

155 if (bIsNumber)

156 *bIsNumber = false;

157

158 m_WordBuffer[m_WordSize++] = ch;

159 if (ch == '/') {

160 while (1) {

161 if (!GetNextChar(ch))

162 return;

163

164 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {

165 m_Pos--;

166 return;

167 }

168

169 if (m_WordSize < sizeof(m_WordBuffer) - 1)

170 m_WordBuffer[m_WordSize++] = ch;

171 }

172 } else if (ch == '<') {

173 if (!GetNextChar(ch))

174 return;

175

176 if (ch == '<')

177 m_WordBuffer[m_WordSize++] = ch;

178 else

179 m_Pos--;

180 } else if (ch == '>') {

181 if (!GetNextChar(ch))

182 return;

183

184 if (ch == '>')

185 m_WordBuffer[m_WordSize++] = ch;

186 else

187 m_Pos--;

188 }

189 return;

190 }

191

192 while (1) {

193 if (m_WordSize < sizeof(m_WordBuffer) - 1)

194 m_WordBuffer[m_WordSize++] = ch;

195

196 if (!PDFCharIsNumeric(ch)) {

197 if (bIsNumber)

198 *bIsNumber = false;

199 }

200

201 if (!GetNextChar(ch))

202 return;

203

204 if (PDFCharIsDelimiter(ch) \|\| PDFCharIsWhitespace(ch)) {

205 m_Pos--;

206 break;

207 }

208 }

209 }

210

211 CFX_ByteString CPDF_SyntaxParser::ReadString() {

212 uint8_t ch;

213 if (!GetNextChar(ch))

214 return CFX_ByteString();

215

216 CFX_ByteTextBuf buf;

217 int32_t parlevel = 0;

218 int32_t status = 0;

219 int32_t iEscCode = 0;

220 while (1) {

221 switch (status) {

222 case 0:

223 if (ch == ')') {

224 if (parlevel == 0) {

225 return buf.MakeString();

226 }

227 parlevel--;

228 buf.AppendChar(')');

229 } else if (ch == '(') {

230 parlevel++;

231 buf.AppendChar('(');

232 } else if (ch == '\\') {

233 status = 1;

234 } else {

235 buf.AppendChar(ch);

236 }

237 break;

238 case 1:

239 if (ch >= '0' && ch <= '7') {

240 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

241 status = 2;

242 break;

243 }

244

245 if (ch == 'n') {

246 buf.AppendChar('\n');

247 } else if (ch == 'r') {

248 buf.AppendChar('\r');

249 } else if (ch == 't') {

250 buf.AppendChar('\t');

251 } else if (ch == 'b') {

252 buf.AppendChar('\b');

253 } else if (ch == 'f') {

254 buf.AppendChar('\f');

255 } else if (ch == '\r') {

256 status = 4;

257 break;

258 } else if (ch != '\n') {

259 buf.AppendChar(ch);

260 }

261 status = 0;

262 break;

263 case 2:

264 if (ch >= '0' && ch <= '7') {

265 iEscCode =

266 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

267 status = 3;

268 } else {

269 buf.AppendChar(iEscCode);

270 status = 0;

271 continue;

272 }

273 break;

274 case 3:

275 if (ch >= '0' && ch <= '7') {

276 iEscCode =

277 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

278 buf.AppendChar(iEscCode);

279 status = 0;

280 } else {

281 buf.AppendChar(iEscCode);

282 status = 0;

283 continue;

284 }

285 break;

286 case 4:

287 status = 0;

288 if (ch != '\n')

289 continue;

290 break;

291 }

292

293 if (!GetNextChar(ch))

294 break;

295 }

296

297 GetNextChar(ch);

298 return buf.MakeString();

299 }

300

301 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {

302 uint8_t ch;

303 if (!GetNextChar(ch))

304 return CFX_ByteString();

305

306 CFX_ByteTextBuf buf;

307 bool bFirst = true;

308 uint8_t code = 0;

309 while (1) {

310 if (ch == '>')

311 break;

312

313 if (std::isxdigit(ch)) {

314 int val = FXSYS_toHexDigit(ch);

315 if (bFirst) {

316 code = val * 16;

317 } else {

318 code += val;

319 buf.AppendByte(code);

320 }

321 bFirst = !bFirst;

322 }

323

324 if (!GetNextChar(ch))

325 break;

326 }

327 if (!bFirst)

328 buf.AppendByte(code);

329

330 return buf.MakeString();

331 }

332

333 void CPDF_SyntaxParser::ToNextLine() {

334 uint8_t ch;

335 while (GetNextChar(ch)) {

336 if (ch == '\n')

337 break;

338

339 if (ch == '\r') {

340 GetNextChar(ch);

341 if (ch != '\n')

342 --m_Pos;

343 break;

344 }

345 }

346 }

347

348 void CPDF_SyntaxParser::ToNextWord() {

349 uint8_t ch;

350 if (!GetNextChar(ch))

351 return;

352

353 while (1) {

354 while (PDFCharIsWhitespace(ch)) {

355 if (!GetNextChar(ch))

356 return;

357 }

358

359 if (ch != '%')

360 break;

361

362 while (1) {

363 if (!GetNextChar(ch))

364 return;

365 if (PDFCharIsLineEnding(ch))

366 break;

367 }

368 }

369 m_Pos--;

370 }

371

372 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {

373 GetNextWordInternal(bIsNumber);

374 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);

375 }

376

377 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {

378 return GetNextWord(nullptr);

379 }

380

381 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,

382 uint32_t objnum,

383 uint32_t gennum,

384 FX_BOOL bDecrypt) {

385 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);

386 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)

387 return nullptr;

388

389 FX_FILESIZE SavedObjPos = m_Pos;

390 bool bIsNumber;

391 CFX_ByteString word = GetNextWord(&bIsNumber);

392 if (word.GetLength() == 0)

393 return nullptr;

394

395 if (bIsNumber) {

396 FX_FILESIZE SavedPos = m_Pos;

397 CFX_ByteString nextword = GetNextWord(&bIsNumber);

398 if (bIsNumber) {

399 CFX_ByteString nextword2 = GetNextWord(nullptr);

400 if (nextword2 == "R")

401 return new CPDF_Reference(pObjList, FXSYS_atoui(word.c_str()));

402 }

403 m_Pos = SavedPos;

404 return new CPDF_Number(word.AsStringC());

405 }

406

407 if (word == "true" \|\| word == "false")

408 return new CPDF_Boolean(word == "true");

409

410 if (word == "null")

411 return new CPDF_Null;

412

413 if (word == "(") {

414 CFX_ByteString str = ReadString();

415 if (m_pCryptoHandler && bDecrypt)

416 m_pCryptoHandler->Decrypt(objnum, gennum, str);

417 return new CPDF_String(MaybeIntern(str), FALSE);

418 }

419

420 if (word == "<") {

421 CFX_ByteString str = ReadHexString();

422 if (m_pCryptoHandler && bDecrypt)

423 m_pCryptoHandler->Decrypt(objnum, gennum, str);

424 return new CPDF_String(MaybeIntern(str), TRUE);

425 }

426

427 if (word == "[") {

428 CPDF_Array* pArray = new CPDF_Array;

429 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))

430 pArray->Add(pObj);

431

432 return pArray;

433 }

434

435 if (word[0] == '/') {

436 return new CPDF_Name(MaybeIntern(

437 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))));

438 }

439

440 if (word == "<<") {

441 int32_t nKeys = 0;

442 FX_FILESIZE dwSignValuePos = 0;

443

444 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(

445 new CPDF_Dictionary(m_pPool));

446 while (1) {

447 CFX_ByteString key = GetNextWord(nullptr);

448 if (key.IsEmpty())

449 return nullptr;

450

451 FX_FILESIZE SavedPos = m_Pos - key.GetLength();

452 if (key == ">>")

453 break;

454

455 if (key == "endobj") {

456 m_Pos = SavedPos;

457 break;

458 }

459

460 if (key[0] != '/')

461 continue;

462

463 ++nKeys;

464 key = PDF_NameDecode(key);

465 if (key.IsEmpty())

466 continue;

467

468 if (key == "/Contents")

469 dwSignValuePos = m_Pos;

470

471 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);

472 if (!pObj)

473 continue;

474

475 CFX_ByteString keyNoSlash(key.raw_str() + 1, key.GetLength() - 1);

476 pDict->SetFor(keyNoSlash, pObj);

477 }

478

479 // Only when this is a signature dictionary and has contents, we reset the

480 // contents to the un-decrypted form.

481 if (pDict->IsSignatureDict() && dwSignValuePos) {

482 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);

483 m_Pos = dwSignValuePos;

484 pDict->SetFor("Contents", GetObject(pObjList, objnum, gennum, false));

485 }

486

487 FX_FILESIZE SavedPos = m_Pos;

488 CFX_ByteString nextword = GetNextWord(nullptr);

489 if (nextword != "stream") {

490 m_Pos = SavedPos;

491 return pDict.release();

492 }

493 return ReadStream(pDict.release(), objnum, gennum);

494 }

495

496 if (word == ">>")

497 m_Pos = SavedObjPos;

498

499 return nullptr;

500 }

501

502 CPDF_Object* CPDF_SyntaxParser::GetObjectForStrict(

503 CPDF_IndirectObjectHolder* pObjList,

504 uint32_t objnum,

505 uint32_t gennum) {

506 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);

507 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)

508 return nullptr;

509

510 FX_FILESIZE SavedObjPos = m_Pos;

511 bool bIsNumber;

512 CFX_ByteString word = GetNextWord(&bIsNumber);

513 if (word.GetLength() == 0)

514 return nullptr;

515

516 if (bIsNumber) {

517 FX_FILESIZE SavedPos = m_Pos;

518 CFX_ByteString nextword = GetNextWord(&bIsNumber);

519 if (bIsNumber) {

520 CFX_ByteString nextword2 = GetNextWord(nullptr);

521 if (nextword2 == "R")

522 return new CPDF_Reference(pObjList, FXSYS_atoui(word.c_str()));

523 }

524 m_Pos = SavedPos;

525 return new CPDF_Number(word.AsStringC());

526 }

527

528 if (word == "true" \|\| word == "false")

529 return new CPDF_Boolean(word == "true");

530

531 if (word == "null")

532 return new CPDF_Null;

533

534 if (word == "(") {

535 CFX_ByteString str = ReadString();

536 if (m_pCryptoHandler)

537 m_pCryptoHandler->Decrypt(objnum, gennum, str);

538 return new CPDF_String(MaybeIntern(str), FALSE);

539 }

540

541 if (word == "<") {

542 CFX_ByteString str = ReadHexString();

543 if (m_pCryptoHandler)

544 m_pCryptoHandler->Decrypt(objnum, gennum, str);

545 return new CPDF_String(MaybeIntern(str), TRUE);

546 }

547

548 if (word == "[") {

549 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(

550 new CPDF_Array);

551 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))

552 pArray->Add(pObj);

553

554 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;

555 }

556

557 if (word[0] == '/') {

558 return new CPDF_Name(MaybeIntern(

559 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))));

560 }

561

562 if (word == "<<") {

563 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(

564 new CPDF_Dictionary(m_pPool));

565 while (1) {

566 FX_FILESIZE SavedPos = m_Pos;

567 CFX_ByteString key = GetNextWord(nullptr);

568 if (key.IsEmpty())

569 return nullptr;

570

571 if (key == ">>")

572 break;

573

574 if (key == "endobj") {

575 m_Pos = SavedPos;

576 break;

577 }

578

579 if (key[0] != '/')

580 continue;

581

582 key = PDF_NameDecode(key);

583 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(

584 GetObject(pObjList, objnum, gennum, true));

585 if (!obj) {

586 uint8_t ch;

587 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {

588 continue;

589 }

590 return nullptr;

591 }

592

593 if (key.GetLength() > 1) {

594 pDict->SetFor(CFX_ByteString(key.c_str() + 1, key.GetLength() - 1),

595 obj.release());

596 }

597 }

598

599 FX_FILESIZE SavedPos = m_Pos;

600 CFX_ByteString nextword = GetNextWord(nullptr);

601 if (nextword != "stream") {

602 m_Pos = SavedPos;

603 return pDict.release();

604 }

605

606 return ReadStream(pDict.release(), objnum, gennum);

607 }

608

609 if (word == ">>")

610 m_Pos = SavedObjPos;

611

612 return nullptr;

613 }

614

615 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {

616 unsigned char byte1 = 0;

617 unsigned char byte2 = 0;

618

619 GetCharAt(pos, byte1);

620 GetCharAt(pos + 1, byte2);

621

622 if (byte1 == '\r' && byte2 == '\n')

623 return 2;

624

625 if (byte1 == '\r' \|\| byte1 == '\n')

626 return 1;

627

628 return 0;

629 }

630

631 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,

632 uint32_t objnum,

633 uint32_t gennum) {

634 CPDF_Object* pLenObj = pDict->GetObjectFor("Length");

635 FX_FILESIZE len = -1;

636 CPDF_Reference* pLenObjRef = ToReference(pLenObj);

637

638 bool differingObjNum = !pLenObjRef \|\| (pLenObjRef->GetObjList() &&

639 pLenObjRef->GetRefObjNum() != objnum);

640 if (pLenObj && differingObjNum)

641 len = pLenObj->GetInteger();

642

643 // Locate the start of stream.

644 ToNextLine();

645 FX_FILESIZE streamStartPos = m_Pos;

646

647 const CFX_ByteStringC kEndStreamStr("endstream");

648 const CFX_ByteStringC kEndObjStr("endobj");

649

650 CPDF_CryptoHandler* pCryptoHandler =

651 objnum == (uint32_t)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();

652 if (!pCryptoHandler) {

653 FX_BOOL bSearchForKeyword = TRUE;

654 if (len >= 0) {

655 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;

656 pos += len;

657 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)

658 m_Pos = pos.ValueOrDie();

659

660 m_Pos += ReadEOLMarkers(m_Pos);

661 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);

662 GetNextWordInternal(nullptr);

663 // Earlier version of PDF specification doesn't require EOL marker before

664 // 'endstream' keyword. If keyword 'endstream' follows the bytes in

665 // specified length, it signals the end of stream.

666 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.raw_str(),

667 kEndStreamStr.GetLength()) == 0) {

668 bSearchForKeyword = FALSE;

669 }

670 }

671

672 if (bSearchForKeyword) {

673 // If len is not available, len needs to be calculated

674 // by searching the keywords "endstream" or "endobj".

675 m_Pos = streamStartPos;

676 FX_FILESIZE endStreamOffset = 0;

677 while (endStreamOffset >= 0) {

678 endStreamOffset = FindTag(kEndStreamStr, 0);

679

680 // Can't find "endstream".

681 if (endStreamOffset < 0)

682 break;

683

684 // Stop searching when "endstream" is found.

685 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,

686 kEndStreamStr, TRUE)) {

687 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();

688 break;

689 }

690 }

691

692 m_Pos = streamStartPos;

693 FX_FILESIZE endObjOffset = 0;

694 while (endObjOffset >= 0) {

695 endObjOffset = FindTag(kEndObjStr, 0);

696

697 // Can't find "endobj".

698 if (endObjOffset < 0)

699 break;

700

701 // Stop searching when "endobj" is found.

702 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,

703 TRUE)) {

704 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();

705 break;

706 }

707 }

708

709 // Can't find "endstream" or "endobj".

710 if (endStreamOffset < 0 && endObjOffset < 0) {

711 pDict->Release();

712 return nullptr;

713 }

714

715 if (endStreamOffset < 0 && endObjOffset >= 0) {

716 // Correct the position of end stream.

717 endStreamOffset = endObjOffset;

718 } else if (endStreamOffset >= 0 && endObjOffset < 0) {

719 // Correct the position of end obj.

720 endObjOffset = endStreamOffset;

721 } else if (endStreamOffset > endObjOffset) {

722 endStreamOffset = endObjOffset;

723 }

724

725 len = endStreamOffset;

726 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);

727 if (numMarkers == 2) {

728 len -= 2;

729 } else {

730 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);

731 if (numMarkers == 1) {

732 len -= 1;

733 }

734 }

735

736 if (len < 0) {

737 pDict->Release();

738 return nullptr;

739 }

740 pDict->SetIntegerFor("Length", len);

741 }

742 m_Pos = streamStartPos;

743 }

744

745 if (len < 0) {

746 pDict->Release();

747 return nullptr;

748 }

749

750 uint8_t* pData = nullptr;

751 if (len > 0) {

752 pData = FX_Alloc(uint8_t, len);

753 ReadBlock(pData, len);

754 if (pCryptoHandler) {

755 CFX_BinaryBuf dest_buf;

756 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));

757

758 void* context = pCryptoHandler->DecryptStart(objnum, gennum);

759 pCryptoHandler->DecryptStream(context, pData, len, dest_buf);

760 pCryptoHandler->DecryptFinish(context, dest_buf);

761

762 FX_Free(pData);

763 pData = dest_buf.GetBuffer();

764 len = dest_buf.GetSize();

765 dest_buf.DetachBuffer();

766 }

767 }

768

769 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);

770 streamStartPos = m_Pos;

771 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);

772

773 GetNextWordInternal(nullptr);

774

775 int numMarkers = ReadEOLMarkers(m_Pos);

776 if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) &&

777 numMarkers != 0 &&

778 FXSYS_memcmp(m_WordBuffer, kEndObjStr.raw_str(),

779 kEndObjStr.GetLength()) == 0) {

780 m_Pos = streamStartPos;

781 }

782 return pStream;

783 }

784

785 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,

786 uint32_t HeaderOffset) {

787 FX_Free(m_pFileBuf);

788

789 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);

790 m_HeaderOffset = HeaderOffset;

791 m_FileLen = pFileAccess->GetSize();

792 m_Pos = 0;

793 m_pFileAccess = pFileAccess;

794 m_BufOffset = 0;

795 pFileAccess->ReadBlock(

796 m_pFileBuf, 0,

797 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));

798 }

799

800 uint32_t CPDF_SyntaxParser::GetDirectNum() {

801 bool bIsNumber;

802 GetNextWordInternal(&bIsNumber);

803 if (!bIsNumber)

804 return 0;

805

806 m_WordBuffer[m_WordSize] = 0;

807 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));

808 }

809

810 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,

811 FX_FILESIZE limit,

812 const CFX_ByteStringC& tag,

813 FX_BOOL checkKeyword) {

814 const uint32_t taglen = tag.GetLength();

815

816 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);

817 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&

818 !PDFCharIsWhitespace(tag[taglen - 1]);

819

820 uint8_t ch;

821 if (bCheckRight && startpos + (int32_t)taglen <= limit &&

822 GetCharAt(startpos + (int32_t)taglen, ch)) {

823 if (PDFCharIsNumeric(ch) \|\| PDFCharIsOther(ch) \|\|

824 (checkKeyword && PDFCharIsDelimiter(ch))) {

825 return false;

826 }

827 }

828

829 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {

830 if (PDFCharIsNumeric(ch) \|\| PDFCharIsOther(ch) \|\|

831 (checkKeyword && PDFCharIsDelimiter(ch))) {

832 return false;

833 }

834 }

835 return true;

836 }

837

838 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards

839 // and drop the bool.

840 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,

841 FX_BOOL bWholeWord,

842 FX_BOOL bForward,

843 FX_FILESIZE limit) {

844 int32_t taglen = tag.GetLength();

845 if (taglen == 0)

846 return FALSE;

847

848 FX_FILESIZE pos = m_Pos;

849 int32_t offset = 0;

850 if (!bForward)

851 offset = taglen - 1;

852

853 const uint8_t* tag_data = tag.raw_str();

854 uint8_t byte;

855 while (1) {

856 if (bForward) {

857 if (limit && pos >= m_Pos + limit)

858 return FALSE;

859

860 if (!GetCharAt(pos, byte))

861 return FALSE;

862

863 } else {

864 if (limit && pos <= m_Pos - limit)

865 return FALSE;

866

867 if (!GetCharAtBackward(pos, byte))

868 return FALSE;

869 }

870

871 if (byte == tag_data[offset]) {

872 if (bForward) {

873 offset++;

874 if (offset < taglen) {

875 pos++;

876 continue;

877 }

878 } else {

879 offset--;

880 if (offset >= 0) {

881 pos--;

882 continue;

883 }

884 }

885

886 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;

887 if (!bWholeWord \|\| IsWholeWord(startpos, limit, tag, FALSE)) {

888 m_Pos = startpos;

889 return TRUE;

890 }

891 }

892

893 if (bForward) {

894 offset = byte == tag_data[0] ? 1 : 0;

895 pos++;

896 } else {

897 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;

898 pos--;

899 }

900

901 if (pos < 0)

902 return FALSE;

903 }

904

905 return FALSE;

906 }

907

908 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,

909 FX_BOOL bWholeWord,

910 FX_FILESIZE limit) {

911 int32_t ntags = 1;

912 for (int i = 0; i < tags.GetLength(); ++i) {

913 if (tags[i] == 0)

914 ++ntags;

915 }

916

917 // Ensure that the input byte string happens to be nul-terminated. This

918 // need not be the case, but the loop below uses this guarantee to put

919 // the last pattern into the vector.

920 ASSERT(tags[tags.GetLength()] == 0);

921 std::vector<SearchTagRecord> patterns(ntags);

922 uint32_t start = 0;

923 uint32_t itag = 0;

924 uint32_t max_len = 0;

925 for (int i = 0; i <= tags.GetLength(); ++i) {

926 if (tags[i] == 0) {

927 uint32_t len = i - start;

928 max_len = std::max(len, max_len);

929 patterns[itag].m_bsTag = tags.Mid(start, len);

930 patterns[itag].m_Offset = 0;

931 start = i + 1;

932 ++itag;

933 }

934 }

935

936 const FX_FILESIZE pos_limit = m_Pos + limit;

937 for (FX_FILESIZE pos = m_Pos; !limit \|\| pos < pos_limit; ++pos) {

938 uint8_t byte;

939 if (!GetCharAt(pos, byte))

940 break;

941

942 for (int i = 0; i < ntags; ++i) {

943 SearchTagRecord& pat = patterns[i];

944 if (pat.m_bsTag[pat.m_Offset] != byte) {

945 pat.m_Offset = (pat.m_bsTag[0] == byte) ? 1 : 0;

946 continue;

947 }

948

949 ++pat.m_Offset;

950 if (pat.m_Offset != pat.m_bsTag.GetLength())

951 continue;

952

953 if (!bWholeWord \|\| IsWholeWord(pos - pat.m_bsTag.GetLength(), limit,

954 pat.m_bsTag, FALSE)) {

955 return i;

956 }

957

958 pat.m_Offset = (pat.m_bsTag[0] == byte) ? 1 : 0;

959 }

960 }

961 return -1;

962 }

963

964 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,

965 FX_FILESIZE limit) {

966 int32_t taglen = tag.GetLength();

967 int32_t match = 0;

968 limit += m_Pos;

969 FX_FILESIZE startpos = m_Pos;

970

971 while (1) {

972 uint8_t ch;

973 if (!GetNextChar(ch))

974 return -1;

975

976 if (ch == tag[match]) {

977 match++;

978 if (match == taglen)

979 return m_Pos - startpos - taglen;

980 } else {

981 match = ch == tag[0] ? 1 : 0;

982 }

983

984 if (limit && m_Pos == limit)

985 return -1;

986 }

987 return -1;

988 }

989

990 void CPDF_SyntaxParser::SetEncrypt(

991 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {

992 m_pCryptoHandler = std::move(pCryptoHandler);

993 }

994

995 CFX_ByteString CPDF_SyntaxParser::MaybeIntern(const CFX_ByteString& str) {

996 return m_pPool ? m_pPool->Intern(str) : str;

997 }

OLD	NEW

« no previous file with comments | « core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h ('k') | core/fpdfapi/fpdf_parser/cpdf_syntax_parser_unittest.cpp » ('j') | no next file with comments »