core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp - Issue 1775023003: Re-land "Split CPDF_SyntaxParser into its own named .cpp/.h files."

Side by Side Diff: core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp

Issue 1775023003: Re-land "Split CPDF_SyntaxParser into its own named .cpp/.h files." (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master

Patch Set: Include <vector>. Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2016 PDFium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

	6

	7 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"

	8

	9 #include <vector>

	10

	11 #include "core/include/fpdfapi/fpdf_module.h"

	12 #include "core/include/fpdfapi/fpdf_parser.h"

	13 #include "core/include/fxcrt/fx_ext.h"

	14 #include "third_party/base/numerics/safe_math.h"

	15

	16 namespace {

	17

	18 struct SearchTagRecord {

	19 const char* m_pTag;

	20 FX_DWORD m_Len;

	21 FX_DWORD m_Offset;

	22 };

	23

	24 } // namespace

	25

	26 // static

	27 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;

	28

	29 CPDF_SyntaxParser::CPDF_SyntaxParser()

	30 : m_MetadataObjnum(0),

	31 m_pFileAccess(nullptr),

	32 m_pFileBuf(nullptr),

	33 m_BufSize(CPDF_ModuleMgr::kFileBufSize) {}

	34

	35 CPDF_SyntaxParser::~CPDF_SyntaxParser() {

	36 FX_Free(m_pFileBuf);

	37 }

	38

	39 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {

	40 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);

	41 m_Pos = pos;

	42 return GetNextChar(ch);

	43 }

	44

	45 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {

	46 FX_FILESIZE pos = m_Pos + m_HeaderOffset;

	47 if (pos >= m_FileLen)

	48 return FALSE;

	49

	50 if (m_BufOffset >= pos \|\| (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {

	51 FX_FILESIZE read_pos = pos;

	52 FX_DWORD read_size = m_BufSize;

	53 if ((FX_FILESIZE)read_size > m_FileLen)

	54 read_size = (FX_DWORD)m_FileLen;

	55

	56 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {

	57 if (m_FileLen < (FX_FILESIZE)read_size) {

	58 read_pos = 0;

	59 read_size = (FX_DWORD)m_FileLen;

	60 } else {

	61 read_pos = m_FileLen - read_size;

	62 }

	63 }

	64

	65 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))

	66 return FALSE;

	67

	68 m_BufOffset = read_pos;

	69 }

	70 ch = m_pFileBuf[pos - m_BufOffset];

	71 m_Pos++;

	72 return TRUE;

	73 }

	74

	75 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {

	76 pos += m_HeaderOffset;

	77 if (pos >= m_FileLen)

	78 return FALSE;

	79

	80 if (m_BufOffset >= pos \|\| (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {

	81 FX_FILESIZE read_pos;

	82 if (pos < (FX_FILESIZE)m_BufSize)

	83 read_pos = 0;

	84 else

	85 read_pos = pos - m_BufSize + 1;

	86

	87 FX_DWORD read_size = m_BufSize;

	88 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {

	89 if (m_FileLen < (FX_FILESIZE)read_size) {

	90 read_pos = 0;

	91 read_size = (FX_DWORD)m_FileLen;

	92 } else {

	93 read_pos = m_FileLen - read_size;

	94 }

	95 }

	96

	97 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))

	98 return FALSE;

	99

	100 m_BufOffset = read_pos;

	101 }

	102 ch = m_pFileBuf[pos - m_BufOffset];

	103 return TRUE;

	104 }

	105

	106 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {

	107 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))

	108 return FALSE;

	109 m_Pos += size;

	110 return TRUE;

	111 }

	112

	113 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {

	114 m_WordSize = 0;

	115 if (bIsNumber)

	116 *bIsNumber = true;

	117

	118 uint8_t ch;

	119 if (!GetNextChar(ch))

	120 return;

	121

	122 while (1) {

	123 while (PDFCharIsWhitespace(ch)) {

	124 if (!GetNextChar(ch))

	125 return;

	126 }

	127

	128 if (ch != '%')

	129 break;

	130

	131 while (1) {

	132 if (!GetNextChar(ch))

	133 return;

	134 if (PDFCharIsLineEnding(ch))

	135 break;

	136 }

	137 }

	138

	139 if (PDFCharIsDelimiter(ch)) {

	140 if (bIsNumber)

	141 *bIsNumber = false;

	142

	143 m_WordBuffer[m_WordSize++] = ch;

	144 if (ch == '/') {

	145 while (1) {

	146 if (!GetNextChar(ch))

	147 return;

	148

	149 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {

	150 m_Pos--;

	151 return;

	152 }

	153

	154 if (m_WordSize < sizeof(m_WordBuffer) - 1)

	155 m_WordBuffer[m_WordSize++] = ch;

	156 }

	157 } else if (ch == '<') {

	158 if (!GetNextChar(ch))

	159 return;

	160

	161 if (ch == '<')

	162 m_WordBuffer[m_WordSize++] = ch;

	163 else

	164 m_Pos--;

	165 } else if (ch == '>') {

	166 if (!GetNextChar(ch))

	167 return;

	168

	169 if (ch == '>')

	170 m_WordBuffer[m_WordSize++] = ch;

	171 else

	172 m_Pos--;

	173 }

	174 return;

	175 }

	176

	177 while (1) {

	178 if (m_WordSize < sizeof(m_WordBuffer) - 1)

	179 m_WordBuffer[m_WordSize++] = ch;

	180

	181 if (!PDFCharIsNumeric(ch)) {

	182 if (bIsNumber)

	183 *bIsNumber = false;

	184 }

	185

	186 if (!GetNextChar(ch))

	187 return;

	188

	189 if (PDFCharIsDelimiter(ch) \|\| PDFCharIsWhitespace(ch)) {

	190 m_Pos--;

	191 break;

	192 }

	193 }

	194 }

	195

	196 CFX_ByteString CPDF_SyntaxParser::ReadString() {

	197 uint8_t ch;

	198 if (!GetNextChar(ch))

	199 return CFX_ByteString();

	200

	201 CFX_ByteTextBuf buf;

	202 int32_t parlevel = 0;

	203 int32_t status = 0;

	204 int32_t iEscCode = 0;

	205 while (1) {

	206 switch (status) {

	207 case 0:

	208 if (ch == ')') {

	209 if (parlevel == 0) {

	210 return buf.GetByteString();

	211 }

	212 parlevel--;

	213 buf.AppendChar(')');

	214 } else if (ch == '(') {

	215 parlevel++;

	216 buf.AppendChar('(');

	217 } else if (ch == '\\') {

	218 status = 1;

	219 } else {

	220 buf.AppendChar(ch);

	221 }

	222 break;

	223 case 1:

	224 if (ch >= '0' && ch <= '7') {

	225 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

	226 status = 2;

	227 break;

	228 }

	229

	230 if (ch == 'n') {

	231 buf.AppendChar('\n');

	232 } else if (ch == 'r') {

	233 buf.AppendChar('\r');

	234 } else if (ch == 't') {

	235 buf.AppendChar('\t');

	236 } else if (ch == 'b') {

	237 buf.AppendChar('\b');

	238 } else if (ch == 'f') {

	239 buf.AppendChar('\f');

	240 } else if (ch == '\r') {

	241 status = 4;

	242 break;

	243 } else if (ch != '\n') {

	244 buf.AppendChar(ch);

	245 }

	246 status = 0;

	247 break;

	248 case 2:

	249 if (ch >= '0' && ch <= '7') {

	250 iEscCode =

	251 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

	252 status = 3;

	253 } else {

	254 buf.AppendChar(iEscCode);

	255 status = 0;

	256 continue;

	257 }

	258 break;

	259 case 3:

	260 if (ch >= '0' && ch <= '7') {

	261 iEscCode =

	262 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

	263 buf.AppendChar(iEscCode);

	264 status = 0;

	265 } else {

	266 buf.AppendChar(iEscCode);

	267 status = 0;

	268 continue;

	269 }

	270 break;

	271 case 4:

	272 status = 0;

	273 if (ch != '\n')

	274 continue;

	275 break;

	276 }

	277

	278 if (!GetNextChar(ch))

	279 break;

	280 }

	281

	282 GetNextChar(ch);

	283 return buf.GetByteString();

	284 }

	285

	286 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {

	287 uint8_t ch;

	288 if (!GetNextChar(ch))

	289 return CFX_ByteString();

	290

	291 CFX_ByteTextBuf buf;

	292 bool bFirst = true;

	293 uint8_t code = 0;

	294 while (1) {

	295 if (ch == '>')

	296 break;

	297

	298 if (std::isxdigit(ch)) {

	299 int val = FXSYS_toHexDigit(ch);

	300 if (bFirst) {

	301 code = val * 16;

	302 } else {

	303 code += val;

	304 buf.AppendByte(code);

	305 }

	306 bFirst = !bFirst;

	307 }

	308

	309 if (!GetNextChar(ch))

	310 break;

	311 }

	312 if (!bFirst)

	313 buf.AppendByte(code);

	314

	315 return buf.GetByteString();

	316 }

	317

	318 void CPDF_SyntaxParser::ToNextLine() {

	319 uint8_t ch;

	320 while (GetNextChar(ch)) {

	321 if (ch == '\n')

	322 break;

	323

	324 if (ch == '\r') {

	325 GetNextChar(ch);

	326 if (ch != '\n')

	327 --m_Pos;

	328 break;

	329 }

	330 }

	331 }

	332

	333 void CPDF_SyntaxParser::ToNextWord() {

	334 uint8_t ch;

	335 if (!GetNextChar(ch))

	336 return;

	337

	338 while (1) {

	339 while (PDFCharIsWhitespace(ch)) {

	340 if (!GetNextChar(ch))

	341 return;

	342 }

	343

	344 if (ch != '%')

	345 break;

	346

	347 while (1) {

	348 if (!GetNextChar(ch))

	349 return;

	350 if (PDFCharIsLineEnding(ch))

	351 break;

	352 }

	353 }

	354 m_Pos--;

	355 }

	356

	357 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {

	358 GetNextWordInternal(bIsNumber);

	359 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);

	360 }

	361

	362 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {

	363 return GetNextWord(nullptr);

	364 }

	365

	366 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,

	367 FX_DWORD objnum,

	368 FX_DWORD gennum,

	369 FX_BOOL bDecrypt) {

	370 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);

	371 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)

	372 return nullptr;

	373

	374 FX_FILESIZE SavedPos = m_Pos;

	375 bool bIsNumber;

	376 CFX_ByteString word = GetNextWord(&bIsNumber);

	377 if (word.GetLength() == 0)

	378 return nullptr;

	379

	380 if (bIsNumber) {

	381 FX_FILESIZE SavedPos = m_Pos;

	382 CFX_ByteString nextword = GetNextWord(&bIsNumber);

	383 if (bIsNumber) {

	384 CFX_ByteString nextword2 = GetNextWord(nullptr);

	385 if (nextword2 == "R") {

	386 FX_DWORD objnum = FXSYS_atoui(word);

	387 return new CPDF_Reference(pObjList, objnum);

	388 }

	389 }

	390 m_Pos = SavedPos;

	391 return new CPDF_Number(word);

	392 }

	393

	394 if (word == "true" \|\| word == "false")

	395 return new CPDF_Boolean(word == "true");

	396

	397 if (word == "null")

	398 return new CPDF_Null;

	399

	400 if (word == "(") {

	401 CFX_ByteString str = ReadString();

	402 if (m_pCryptoHandler && bDecrypt)

	403 m_pCryptoHandler->Decrypt(objnum, gennum, str);

	404 return new CPDF_String(str, FALSE);

	405 }

	406

	407 if (word == "<") {

	408 CFX_ByteString str = ReadHexString();

	409 if (m_pCryptoHandler && bDecrypt)

	410 m_pCryptoHandler->Decrypt(objnum, gennum, str);

	411

	412 return new CPDF_String(str, TRUE);

	413 }

	414

	415 if (word == "[") {

	416 CPDF_Array* pArray = new CPDF_Array;

	417 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))

	418 pArray->Add(pObj);

	419

	420 return pArray;

	421 }

	422

	423 if (word[0] == '/') {

	424 return new CPDF_Name(

	425 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));

	426 }

	427

	428 if (word == "<<") {

	429 int32_t nKeys = 0;

	430 FX_FILESIZE dwSignValuePos = 0;

	431

	432 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(

	433 new CPDF_Dictionary);

	434 while (1) {

	435 CFX_ByteString key = GetNextWord(nullptr);

	436 if (key.IsEmpty())

	437 return nullptr;

	438

	439 FX_FILESIZE SavedPos = m_Pos - key.GetLength();

	440 if (key == ">>")

	441 break;

	442

	443 if (key == "endobj") {

	444 m_Pos = SavedPos;

	445 break;

	446 }

	447

	448 if (key[0] != '/')

	449 continue;

	450

	451 ++nKeys;

	452 key = PDF_NameDecode(key);

	453 if (key.IsEmpty())

	454 continue;

	455

	456 if (key == "/Contents")

	457 dwSignValuePos = m_Pos;

	458

	459 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);

	460 if (!pObj)

	461 continue;

	462

	463 CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);

	464 pDict->SetAt(keyNoSlash, pObj);

	465 }

	466

	467 // Only when this is a signature dictionary and has contents, we reset the

	468 // contents to the un-decrypted form.

	469 if (IsSignatureDict(pDict.get()) && dwSignValuePos) {

	470 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);

	471 m_Pos = dwSignValuePos;

	472 pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false));

	473 }

	474

	475 FX_FILESIZE SavedPos = m_Pos;

	476 CFX_ByteString nextword = GetNextWord(nullptr);

	477 if (nextword != "stream") {

	478 m_Pos = SavedPos;

	479 return pDict.release();

	480 }

	481 return ReadStream(pDict.release(), objnum, gennum);

	482 }

	483

	484 if (word == ">>")

	485 m_Pos = SavedPos;

	486

	487 return nullptr;

	488 }

	489

	490 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(

	491 CPDF_IndirectObjectHolder* pObjList,

	492 FX_DWORD objnum,

	493 FX_DWORD gennum) {

	494 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);

	495 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)

	496 return nullptr;

	497

	498 FX_FILESIZE SavedPos = m_Pos;

	499 bool bIsNumber;

	500 CFX_ByteString word = GetNextWord(&bIsNumber);

	501 if (word.GetLength() == 0)

	502 return nullptr;

	503

	504 if (bIsNumber) {

	505 FX_FILESIZE SavedPos = m_Pos;

	506 CFX_ByteString nextword = GetNextWord(&bIsNumber);

	507 if (bIsNumber) {

	508 CFX_ByteString nextword2 = GetNextWord(nullptr);

	509 if (nextword2 == "R")

	510 return new CPDF_Reference(pObjList, FXSYS_atoui(word));

	511 }

	512 m_Pos = SavedPos;

	513 return new CPDF_Number(word);

	514 }

	515

	516 if (word == "true" \|\| word == "false")

	517 return new CPDF_Boolean(word == "true");

	518

	519 if (word == "null")

	520 return new CPDF_Null;

	521

	522 if (word == "(") {

	523 CFX_ByteString str = ReadString();

	524 if (m_pCryptoHandler)

	525 m_pCryptoHandler->Decrypt(objnum, gennum, str);

	526 return new CPDF_String(str, FALSE);

	527 }

	528

	529 if (word == "<") {

	530 CFX_ByteString str = ReadHexString();

	531 if (m_pCryptoHandler)

	532 m_pCryptoHandler->Decrypt(objnum, gennum, str);

	533 return new CPDF_String(str, TRUE);

	534 }

	535

	536 if (word == "[") {

	537 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(

	538 new CPDF_Array);

	539 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))

	540 pArray->Add(pObj);

	541

	542 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;

	543 }

	544

	545 if (word[0] == '/') {

	546 return new CPDF_Name(

	547 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));

	548 }

	549

	550 if (word == "<<") {

	551 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(

	552 new CPDF_Dictionary);

	553 while (1) {

	554 FX_FILESIZE SavedPos = m_Pos;

	555 CFX_ByteString key = GetNextWord(nullptr);

	556 if (key.IsEmpty())

	557 return nullptr;

	558

	559 if (key == ">>")

	560 break;

	561

	562 if (key == "endobj") {

	563 m_Pos = SavedPos;

	564 break;

	565 }

	566

	567 if (key[0] != '/')

	568 continue;

	569

	570 key = PDF_NameDecode(key);

	571 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(

	572 GetObject(pObjList, objnum, gennum, true));

	573 if (!obj) {

	574 uint8_t ch;

	575 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {

	576 continue;

	577 }

	578 return nullptr;

	579 }

	580

	581 if (key.GetLength() > 1) {

	582 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),

	583 obj.release());

	584 }

	585 }

	586

	587 FX_FILESIZE SavedPos = m_Pos;

	588 CFX_ByteString nextword = GetNextWord(nullptr);

	589 if (nextword != "stream") {

	590 m_Pos = SavedPos;

	591 return pDict.release();

	592 }

	593

	594 return ReadStream(pDict.release(), objnum, gennum);

	595 }

	596

	597 if (word == ">>")

	598 m_Pos = SavedPos;

	599

	600 return nullptr;

	601 }

	602

	603 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {

	604 unsigned char byte1 = 0;

	605 unsigned char byte2 = 0;

	606

	607 GetCharAt(pos, byte1);

	608 GetCharAt(pos + 1, byte2);

	609

	610 if (byte1 == '\r' && byte2 == '\n')

	611 return 2;

	612

	613 if (byte1 == '\r' \|\| byte1 == '\n')

	614 return 1;

	615

	616 return 0;

	617 }

	618

	619 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,

	620 FX_DWORD objnum,

	621 FX_DWORD gennum) {

	622 CPDF_Object* pLenObj = pDict->GetElement("Length");

	623 FX_FILESIZE len = -1;

	624 CPDF_Reference* pLenObjRef = ToReference(pLenObj);

	625

	626 bool differingObjNum = !pLenObjRef \|\| (pLenObjRef->GetObjList() &&

	627 pLenObjRef->GetRefObjNum() != objnum);

	628 if (pLenObj && differingObjNum)

	629 len = pLenObj->GetInteger();

	630

	631 // Locate the start of stream.

	632 ToNextLine();

	633 FX_FILESIZE streamStartPos = m_Pos;

	634

	635 const CFX_ByteStringC kEndStreamStr("endstream");

	636 const CFX_ByteStringC kEndObjStr("endobj");

	637

	638 CPDF_CryptoHandler* pCryptoHandler =

	639 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();

	640 if (!pCryptoHandler) {

	641 FX_BOOL bSearchForKeyword = TRUE;

	642 if (len >= 0) {

	643 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;

	644 pos += len;

	645 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)

	646 m_Pos = pos.ValueOrDie();

	647

	648 m_Pos += ReadEOLMarkers(m_Pos);

	649 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);

	650 GetNextWordInternal(nullptr);

	651 // Earlier version of PDF specification doesn't require EOL marker before

	652 // 'endstream' keyword. If keyword 'endstream' follows the bytes in

	653 // specified length, it signals the end of stream.

	654 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),

	655 kEndStreamStr.GetLength()) == 0) {

	656 bSearchForKeyword = FALSE;

	657 }

	658 }

	659

	660 if (bSearchForKeyword) {

	661 // If len is not available, len needs to be calculated

	662 // by searching the keywords "endstream" or "endobj".

	663 m_Pos = streamStartPos;

	664 FX_FILESIZE endStreamOffset = 0;

	665 while (endStreamOffset >= 0) {

	666 endStreamOffset = FindTag(kEndStreamStr, 0);

	667

	668 // Can't find "endstream".

	669 if (endStreamOffset < 0)

	670 break;

	671

	672 // Stop searching when "endstream" is found.

	673 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,

	674 kEndStreamStr, TRUE)) {

	675 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();

	676 break;

	677 }

	678 }

	679

	680 m_Pos = streamStartPos;

	681 FX_FILESIZE endObjOffset = 0;

	682 while (endObjOffset >= 0) {

	683 endObjOffset = FindTag(kEndObjStr, 0);

	684

	685 // Can't find "endobj".

	686 if (endObjOffset < 0)

	687 break;

	688

	689 // Stop searching when "endobj" is found.

	690 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,

	691 TRUE)) {

	692 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();

	693 break;

	694 }

	695 }

	696

	697 // Can't find "endstream" or "endobj".

	698 if (endStreamOffset < 0 && endObjOffset < 0) {

	699 pDict->Release();

	700 return nullptr;

	701 }

	702

	703 if (endStreamOffset < 0 && endObjOffset >= 0) {

	704 // Correct the position of end stream.

	705 endStreamOffset = endObjOffset;

	706 } else if (endStreamOffset >= 0 && endObjOffset < 0) {

	707 // Correct the position of end obj.

	708 endObjOffset = endStreamOffset;

	709 } else if (endStreamOffset > endObjOffset) {

	710 endStreamOffset = endObjOffset;

	711 }

	712

	713 len = endStreamOffset;

	714 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);

	715 if (numMarkers == 2) {

	716 len -= 2;

	717 } else {

	718 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);

	719 if (numMarkers == 1) {

	720 len -= 1;

	721 }

	722 }

	723

	724 if (len < 0) {

	725 pDict->Release();

	726 return nullptr;

	727 }

	728 pDict->SetAtInteger("Length", len);

	729 }

	730 m_Pos = streamStartPos;

	731 }

	732

	733 if (len < 0) {

	734 pDict->Release();

	735 return nullptr;

	736 }

	737

	738 uint8_t* pData = nullptr;

	739 if (len > 0) {

	740 pData = FX_Alloc(uint8_t, len);

	741 ReadBlock(pData, len);

	742 if (pCryptoHandler) {

	743 CFX_BinaryBuf dest_buf;

	744 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));

	745

	746 void* context = pCryptoHandler->DecryptStart(objnum, gennum);

	747 pCryptoHandler->DecryptStream(context, pData, len, dest_buf);

	748 pCryptoHandler->DecryptFinish(context, dest_buf);

	749

	750 FX_Free(pData);

	751 pData = dest_buf.GetBuffer();

	752 len = dest_buf.GetSize();

	753 dest_buf.DetachBuffer();

	754 }

	755 }

	756

	757 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);

	758 streamStartPos = m_Pos;

	759 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);

	760

	761 GetNextWordInternal(nullptr);

	762

	763 int numMarkers = ReadEOLMarkers(m_Pos);

	764 if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 &&

	765 FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==

	766 0) {

	767 m_Pos = streamStartPos;

	768 }

	769 return pStream;

	770 }

	771

	772 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,

	773 FX_DWORD HeaderOffset) {

	774 FX_Free(m_pFileBuf);

	775

	776 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);

	777 m_HeaderOffset = HeaderOffset;

	778 m_FileLen = pFileAccess->GetSize();

	779 m_Pos = 0;

	780 m_pFileAccess = pFileAccess;

	781 m_BufOffset = 0;

	782 pFileAccess->ReadBlock(

	783 m_pFileBuf, 0,

	784 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));

	785 }

	786

	787 uint32_t CPDF_SyntaxParser::GetDirectNum() {

	788 bool bIsNumber;

	789 GetNextWordInternal(&bIsNumber);

	790 if (!bIsNumber)

	791 return 0;

	792

	793 m_WordBuffer[m_WordSize] = 0;

	794 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));

	795 }

	796

	797 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,

	798 FX_FILESIZE limit,

	799 const CFX_ByteStringC& tag,

	800 FX_BOOL checkKeyword) {

	801 const FX_DWORD taglen = tag.GetLength();

	802

	803 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);

	804 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&

	805 !PDFCharIsWhitespace(tag[taglen - 1]);

	806

	807 uint8_t ch;

	808 if (bCheckRight && startpos + (int32_t)taglen <= limit &&

	809 GetCharAt(startpos + (int32_t)taglen, ch)) {

	810 if (PDFCharIsNumeric(ch) \|\| PDFCharIsOther(ch) \|\|

	811 (checkKeyword && PDFCharIsDelimiter(ch))) {

	812 return false;

	813 }

	814 }

	815

	816 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {

	817 if (PDFCharIsNumeric(ch) \|\| PDFCharIsOther(ch) \|\|

	818 (checkKeyword && PDFCharIsDelimiter(ch))) {

	819 return false;

	820 }

	821 }

	822 return true;

	823 }

	824

	825 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards

	826 // and drop the bool.

	827 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,

	828 FX_BOOL bWholeWord,

	829 FX_BOOL bForward,

	830 FX_FILESIZE limit) {

	831 int32_t taglen = tag.GetLength();

	832 if (taglen == 0)

	833 return FALSE;

	834

	835 FX_FILESIZE pos = m_Pos;

	836 int32_t offset = 0;

	837 if (!bForward)

	838 offset = taglen - 1;

	839

	840 const uint8_t* tag_data = tag.GetPtr();

	841 uint8_t byte;

	842 while (1) {

	843 if (bForward) {

	844 if (limit && pos >= m_Pos + limit)

	845 return FALSE;

	846

	847 if (!GetCharAt(pos, byte))

	848 return FALSE;

	849

	850 } else {

	851 if (limit && pos <= m_Pos - limit)

	852 return FALSE;

	853

	854 if (!GetCharAtBackward(pos, byte))

	855 return FALSE;

	856 }

	857

	858 if (byte == tag_data[offset]) {

	859 if (bForward) {

	860 offset++;

	861 if (offset < taglen) {

	862 pos++;

	863 continue;

	864 }

	865 } else {

	866 offset--;

	867 if (offset >= 0) {

	868 pos--;

	869 continue;

	870 }

	871 }

	872

	873 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;

	874 if (!bWholeWord \|\| IsWholeWord(startpos, limit, tag, FALSE)) {

	875 m_Pos = startpos;

	876 return TRUE;

	877 }

	878 }

	879

	880 if (bForward) {

	881 offset = byte == tag_data[0] ? 1 : 0;

	882 pos++;

	883 } else {

	884 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;

	885 pos--;

	886 }

	887

	888 if (pos < 0)

	889 return FALSE;

	890 }

	891

	892 return FALSE;

	893 }

	894

	895 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,

	896 FX_BOOL bWholeWord,

	897 FX_FILESIZE limit) {

	898 int32_t ntags = 1;

	899 for (int i = 0; i < tags.GetLength(); ++i) {

	900 if (tags[i] == 0)

	901 ++ntags;

	902 }

	903

	904 std::vector<SearchTagRecord> patterns(ntags);

	905 FX_DWORD start = 0;

	906 FX_DWORD itag = 0;

	907 FX_DWORD max_len = 0;

	908 for (int i = 0; i <= tags.GetLength(); ++i) {

	909 if (tags[i] == 0) {

	910 FX_DWORD len = i - start;

	911 max_len = std::max(len, max_len);

	912 patterns[itag].m_pTag = tags.GetCStr() + start;

	913 patterns[itag].m_Len = len;

	914 patterns[itag].m_Offset = 0;

	915 start = i + 1;

	916 ++itag;

	917 }

	918 }

	919

	920 const FX_FILESIZE pos_limit = m_Pos + limit;

	921 for (FX_FILESIZE pos = m_Pos; !limit \|\| pos < pos_limit; ++pos) {

	922 uint8_t byte;

	923 if (!GetCharAt(pos, byte))

	924 break;

	925

	926 for (int i = 0; i < ntags; ++i) {

	927 SearchTagRecord& pat = patterns[i];

	928 if (pat.m_pTag[pat.m_Offset] != byte) {

	929 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;

	930 continue;

	931 }

	932

	933 ++pat.m_Offset;

	934 if (pat.m_Offset != pat.m_Len)

	935 continue;

	936

	937 if (!bWholeWord \|\|

	938 IsWholeWord(pos - pat.m_Len, limit,

	939 CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {

	940 return i;

	941 }

	942

	943 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;

	944 }

	945 }

	946 return -1;

	947 }

	948

	949 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,

	950 FX_FILESIZE limit) {

	951 int32_t taglen = tag.GetLength();

	952 int32_t match = 0;

	953 limit += m_Pos;

	954 FX_FILESIZE startpos = m_Pos;

	955

	956 while (1) {

	957 uint8_t ch;

	958 if (!GetNextChar(ch))

	959 return -1;

	960

	961 if (ch == tag[match]) {

	962 match++;

	963 if (match == taglen)

	964 return m_Pos - startpos - taglen;

	965 } else {

	966 match = ch == tag[0] ? 1 : 0;

	967 }

	968

	969 if (limit && m_Pos == limit)

	970 return -1;

	971 }

	972 return -1;

	973 }

	974

	975 void CPDF_SyntaxParser::SetEncrypt(

	976 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {

	977 m_pCryptoHandler = std::move(pCryptoHandler);

	978 }

OLD	NEW

« no previous file with comments | « core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h ('k') | core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp » ('j') | no next file with comments »