core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp - Issue 1774753002: Split CPDF_SyntaxParser into its own named .cpp/.h files.

Side by Side Diff: core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp

Issue 1774753002: Split CPDF_SyntaxParser into its own named .cpp/.h files. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master

Patch Set: Stray file. Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h ('K') | « core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h ('k') | core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp » ('j') | core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright 2014 PDFium Authors. All rights reserved.
	dsinclair 2016/03/08 01:15:05 nit: 2016? nit: 2016? Tom Sepez 2016/03/08 19:35:41 Done. Show quoted text On 2016/03/08 01:15:05, dsinclair wrote: > nit: 2016? Done.
	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

	6

	7 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"

	8

	9 #include "core/include/fpdfapi/fpdf_module.h"

	10 #include "core/include/fpdfapi/fpdf_parser.h"

	11 #include "core/include/fxcrt/fx_ext.h"

	12 #include "third_party/base/numerics/safe_math.h"

	13

	14 struct SearchTagRecord {
	dsinclair 2016/03/08 01:15:05 Can this go in namespace {}? Can this go in namespace {}? Tom Sepez 2016/03/08 19:35:42 Done. Show quoted text On 2016/03/08 01:15:05, dsinclair wrote: > Can this go in namespace {}? Done.
	15 const char* m_pTag;

	16 FX_DWORD m_Len;

	17 FX_DWORD m_Offset;

	18 };

	19

	20 // static

	21 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;

	22

	23 CPDF_SyntaxParser::CPDF_SyntaxParser()

	24 : m_MetadataObjnum(0),

	25 m_pFileAccess(nullptr),

	26 m_pFileBuf(nullptr),

	27 m_BufSize(CPDF_ModuleMgr::kFileBufSize) {}

	28

	29 CPDF_SyntaxParser::~CPDF_SyntaxParser() {

	30 FX_Free(m_pFileBuf);

	31 }

	32

	33 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {

	34 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);

	35 m_Pos = pos;

	36 return GetNextChar(ch);

	37 }

	38

	39 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {

	40 FX_FILESIZE pos = m_Pos + m_HeaderOffset;

	41 if (pos >= m_FileLen)

	42 return FALSE;

	43

	44 if (m_BufOffset >= pos \|\| (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {

	45 FX_FILESIZE read_pos = pos;

	46 FX_DWORD read_size = m_BufSize;

	47 if ((FX_FILESIZE)read_size > m_FileLen)

	48 read_size = (FX_DWORD)m_FileLen;

	49

	50 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {

	51 if (m_FileLen < (FX_FILESIZE)read_size) {

	52 read_pos = 0;

	53 read_size = (FX_DWORD)m_FileLen;

	54 } else {

	55 read_pos = m_FileLen - read_size;

	56 }

	57 }

	58

	59 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))

	60 return FALSE;

	61

	62 m_BufOffset = read_pos;

	63 }

	64 ch = m_pFileBuf[pos - m_BufOffset];

	65 m_Pos++;

	66 return TRUE;

	67 }

	68

	69 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {

	70 pos += m_HeaderOffset;

	71 if (pos >= m_FileLen)

	72 return FALSE;

	73

	74 if (m_BufOffset >= pos \|\| (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {

	75 FX_FILESIZE read_pos;

	76 if (pos < (FX_FILESIZE)m_BufSize)

	77 read_pos = 0;

	78 else

	79 read_pos = pos - m_BufSize + 1;

	80

	81 FX_DWORD read_size = m_BufSize;

	82 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {

	83 if (m_FileLen < (FX_FILESIZE)read_size) {

	84 read_pos = 0;

	85 read_size = (FX_DWORD)m_FileLen;

	86 } else {

	87 read_pos = m_FileLen - read_size;

	88 }

	89 }

	90

	91 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))

	92 return FALSE;

	93

	94 m_BufOffset = read_pos;

	95 }

	96 ch = m_pFileBuf[pos - m_BufOffset];

	97 return TRUE;

	98 }

	99

	100 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {

	101 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))

	102 return FALSE;

	103 m_Pos += size;

	104 return TRUE;

	105 }

	106

	107 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {

	108 m_WordSize = 0;

	109 if (bIsNumber)

	110 *bIsNumber = true;

	111

	112 uint8_t ch;

	113 if (!GetNextChar(ch))

	114 return;

	115

	116 while (1) {

	117 while (PDFCharIsWhitespace(ch)) {

	118 if (!GetNextChar(ch))

	119 return;

	120 }

	121

	122 if (ch != '%')

	123 break;

	124

	125 while (1) {

	126 if (!GetNextChar(ch))

	127 return;

	128 if (PDFCharIsLineEnding(ch))

	129 break;

	130 }

	131 }

	132

	133 if (PDFCharIsDelimiter(ch)) {

	134 if (bIsNumber)

	135 *bIsNumber = false;

	136

	137 m_WordBuffer[m_WordSize++] = ch;

	138 if (ch == '/') {

	139 while (1) {

	140 if (!GetNextChar(ch))

	141 return;

	142

	143 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {

	144 m_Pos--;

	145 return;

	146 }

	147

	148 if (m_WordSize < sizeof(m_WordBuffer) - 1)

	149 m_WordBuffer[m_WordSize++] = ch;

	150 }

	151 } else if (ch == '<') {

	152 if (!GetNextChar(ch))

	153 return;

	154

	155 if (ch == '<')

	156 m_WordBuffer[m_WordSize++] = ch;

	157 else

	158 m_Pos--;

	159 } else if (ch == '>') {

	160 if (!GetNextChar(ch))

	161 return;

	162

	163 if (ch == '>')

	164 m_WordBuffer[m_WordSize++] = ch;

	165 else

	166 m_Pos--;

	167 }

	168 return;

	169 }

	170

	171 while (1) {

	172 if (m_WordSize < sizeof(m_WordBuffer) - 1)

	173 m_WordBuffer[m_WordSize++] = ch;

	174

	175 if (!PDFCharIsNumeric(ch)) {

	176 if (bIsNumber)

	177 *bIsNumber = false;

	178 }

	179

	180 if (!GetNextChar(ch))

	181 return;

	182

	183 if (PDFCharIsDelimiter(ch) \|\| PDFCharIsWhitespace(ch)) {

	184 m_Pos--;

	185 break;

	186 }

	187 }

	188 }

	189

	190 CFX_ByteString CPDF_SyntaxParser::ReadString() {

	191 uint8_t ch;

	192 if (!GetNextChar(ch))

	193 return CFX_ByteString();

	194

	195 CFX_ByteTextBuf buf;

	196 int32_t parlevel = 0;

	197 int32_t status = 0;

	198 int32_t iEscCode = 0;

	199 while (1) {

	200 switch (status) {

	201 case 0:

	202 if (ch == ')') {

	203 if (parlevel == 0) {

	204 return buf.GetByteString();

	205 }

	206 parlevel--;

	207 buf.AppendChar(')');

	208 } else if (ch == '(') {

	209 parlevel++;

	210 buf.AppendChar('(');

	211 } else if (ch == '\\') {

	212 status = 1;

	213 } else {

	214 buf.AppendChar(ch);

	215 }

	216 break;

	217 case 1:

	218 if (ch >= '0' && ch <= '7') {

	219 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

	220 status = 2;

	221 break;

	222 }

	223

	224 if (ch == 'n') {

	225 buf.AppendChar('\n');

	226 } else if (ch == 'r') {

	227 buf.AppendChar('\r');

	228 } else if (ch == 't') {

	229 buf.AppendChar('\t');

	230 } else if (ch == 'b') {

	231 buf.AppendChar('\b');

	232 } else if (ch == 'f') {

	233 buf.AppendChar('\f');

	234 } else if (ch == '\r') {

	235 status = 4;

	236 break;

	237 } else if (ch != '\n') {

	238 buf.AppendChar(ch);

	239 }

	240 status = 0;

	241 break;

	242 case 2:

	243 if (ch >= '0' && ch <= '7') {

	244 iEscCode =

	245 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

	246 status = 3;

	247 } else {

	248 buf.AppendChar(iEscCode);

	249 status = 0;

	250 continue;

	251 }

	252 break;

	253 case 3:

	254 if (ch >= '0' && ch <= '7') {

	255 iEscCode =

	256 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

	257 buf.AppendChar(iEscCode);

	258 status = 0;

	259 } else {

	260 buf.AppendChar(iEscCode);

	261 status = 0;

	262 continue;

	263 }

	264 break;

	265 case 4:

	266 status = 0;

	267 if (ch != '\n')

	268 continue;

	269 break;

	270 }

	271

	272 if (!GetNextChar(ch))

	273 break;

	274 }

	275

	276 GetNextChar(ch);

	277 return buf.GetByteString();

	278 }

	279

	280 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {

	281 uint8_t ch;

	282 if (!GetNextChar(ch))

	283 return CFX_ByteString();

	284

	285 CFX_ByteTextBuf buf;

	286 bool bFirst = true;

	287 uint8_t code = 0;

	288 while (1) {

	289 if (ch == '>')

	290 break;

	291

	292 if (std::isxdigit(ch)) {

	293 int val = FXSYS_toHexDigit(ch);

	294 if (bFirst) {

	295 code = val * 16;

	296 } else {

	297 code += val;

	298 buf.AppendByte(code);

	299 }

	300 bFirst = !bFirst;

	301 }

	302

	303 if (!GetNextChar(ch))

	304 break;

	305 }

	306 if (!bFirst)

	307 buf.AppendByte(code);

	308

	309 return buf.GetByteString();

	310 }

	311

	312 void CPDF_SyntaxParser::ToNextLine() {

	313 uint8_t ch;

	314 while (GetNextChar(ch)) {

	315 if (ch == '\n')

	316 break;

	317

	318 if (ch == '\r') {

	319 GetNextChar(ch);

	320 if (ch != '\n')

	321 --m_Pos;

	322 break;

	323 }

	324 }

	325 }

	326

	327 void CPDF_SyntaxParser::ToNextWord() {

	328 uint8_t ch;

	329 if (!GetNextChar(ch))

	330 return;

	331

	332 while (1) {

	333 while (PDFCharIsWhitespace(ch)) {

	334 if (!GetNextChar(ch))

	335 return;

	336 }

	337

	338 if (ch != '%')

	339 break;

	340

	341 while (1) {

	342 if (!GetNextChar(ch))

	343 return;

	344 if (PDFCharIsLineEnding(ch))

	345 break;

	346 }

	347 }

	348 m_Pos--;

	349 }

	350

	351 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {

	352 GetNextWordInternal(bIsNumber);

	353 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);

	354 }

	355

	356 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {

	357 return GetNextWord(nullptr);

	358 }

	359

	360 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,

	361 FX_DWORD objnum,

	362 FX_DWORD gennum,

	363 FX_BOOL bDecrypt) {

	364 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);

	365 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)

	366 return nullptr;

	367

	368 FX_FILESIZE SavedPos = m_Pos;

	369 bool bIsNumber;

	370 CFX_ByteString word = GetNextWord(&bIsNumber);

	371 if (word.GetLength() == 0)

	372 return nullptr;

	373

	374 if (bIsNumber) {

	375 FX_FILESIZE SavedPos = m_Pos;

	376 CFX_ByteString nextword = GetNextWord(&bIsNumber);

	377 if (bIsNumber) {

	378 CFX_ByteString nextword2 = GetNextWord(nullptr);

	379 if (nextword2 == "R") {

	380 FX_DWORD objnum = FXSYS_atoui(word);

	381 return new CPDF_Reference(pObjList, objnum);

	382 }

	383 }

	384 m_Pos = SavedPos;

	385 return new CPDF_Number(word);

	386 }

	387

	388 if (word == "true" \|\| word == "false")

	389 return new CPDF_Boolean(word == "true");

	390

	391 if (word == "null")

	392 return new CPDF_Null;

	393

	394 if (word == "(") {

	395 CFX_ByteString str = ReadString();

	396 if (m_pCryptoHandler && bDecrypt)

	397 m_pCryptoHandler->Decrypt(objnum, gennum, str);

	398 return new CPDF_String(str, FALSE);

	399 }

	400

	401 if (word == "<") {

	402 CFX_ByteString str = ReadHexString();

	403 if (m_pCryptoHandler && bDecrypt)

	404 m_pCryptoHandler->Decrypt(objnum, gennum, str);

	405

	406 return new CPDF_String(str, TRUE);

	407 }

	408

	409 if (word == "[") {

	410 CPDF_Array* pArray = new CPDF_Array;

	411 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))

	412 pArray->Add(pObj);

	413

	414 return pArray;

	415 }

	416

	417 if (word[0] == '/') {

	418 return new CPDF_Name(

	419 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));

	420 }

	421

	422 if (word == "<<") {

	423 int32_t nKeys = 0;

	424 FX_FILESIZE dwSignValuePos = 0;

	425

	426 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(

	427 new CPDF_Dictionary);

	428 while (1) {

	429 CFX_ByteString key = GetNextWord(nullptr);

	430 if (key.IsEmpty())

	431 return nullptr;

	432

	433 FX_FILESIZE SavedPos = m_Pos - key.GetLength();

	434 if (key == ">>")

	435 break;

	436

	437 if (key == "endobj") {

	438 m_Pos = SavedPos;

	439 break;

	440 }

	441

	442 if (key[0] != '/')

	443 continue;

	444

	445 ++nKeys;

	446 key = PDF_NameDecode(key);

	447 if (key.IsEmpty())

	448 continue;

	449

	450 if (key == "/Contents")

	451 dwSignValuePos = m_Pos;

	452

	453 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);

	454 if (!pObj)

	455 continue;

	456

	457 CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);

	458 pDict->SetAt(keyNoSlash, pObj);

	459 }

	460

	461 // Only when this is a signature dictionary and has contents, we reset the

	462 // contents to the un-decrypted form.

	463 if (IsSignatureDict(pDict.get()) && dwSignValuePos) {

	464 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);

	465 m_Pos = dwSignValuePos;

	466 pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false));

	467 }

	468

	469 FX_FILESIZE SavedPos = m_Pos;

	470 CFX_ByteString nextword = GetNextWord(nullptr);

	471 if (nextword != "stream") {

	472 m_Pos = SavedPos;

	473 return pDict.release();

	474 }

	475 return ReadStream(pDict.release(), objnum, gennum);

	476 }

	477

	478 if (word == ">>")

	479 m_Pos = SavedPos;

	480

	481 return nullptr;

	482 }

	483

	484 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(

	485 CPDF_IndirectObjectHolder* pObjList,

	486 FX_DWORD objnum,

	487 FX_DWORD gennum) {

	488 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);

	489 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)

	490 return nullptr;

	491

	492 FX_FILESIZE SavedPos = m_Pos;

	493 bool bIsNumber;

	494 CFX_ByteString word = GetNextWord(&bIsNumber);

	495 if (word.GetLength() == 0)

	496 return nullptr;

	497

	498 if (bIsNumber) {

	499 FX_FILESIZE SavedPos = m_Pos;

	500 CFX_ByteString nextword = GetNextWord(&bIsNumber);

	501 if (bIsNumber) {

	502 CFX_ByteString nextword2 = GetNextWord(nullptr);

	503 if (nextword2 == "R")

	504 return new CPDF_Reference(pObjList, FXSYS_atoui(word));

	505 }

	506 m_Pos = SavedPos;

	507 return new CPDF_Number(word);

	508 }

	509

	510 if (word == "true" \|\| word == "false")

	511 return new CPDF_Boolean(word == "true");

	512

	513 if (word == "null")

	514 return new CPDF_Null;

	515

	516 if (word == "(") {

	517 CFX_ByteString str = ReadString();

	518 if (m_pCryptoHandler)

	519 m_pCryptoHandler->Decrypt(objnum, gennum, str);

	520 return new CPDF_String(str, FALSE);

	521 }

	522

	523 if (word == "<") {

	524 CFX_ByteString str = ReadHexString();

	525 if (m_pCryptoHandler)

	526 m_pCryptoHandler->Decrypt(objnum, gennum, str);

	527 return new CPDF_String(str, TRUE);

	528 }

	529

	530 if (word == "[") {

	531 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(

	532 new CPDF_Array);

	533 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))

	534 pArray->Add(pObj);

	535

	536 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;

	537 }

	538

	539 if (word[0] == '/') {

	540 return new CPDF_Name(

	541 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));

	542 }

	543

	544 if (word == "<<") {

	545 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(

	546 new CPDF_Dictionary);

	547 while (1) {

	548 FX_FILESIZE SavedPos = m_Pos;

	549 CFX_ByteString key = GetNextWord(nullptr);

	550 if (key.IsEmpty())

	551 return nullptr;

	552

	553 if (key == ">>")

	554 break;

	555

	556 if (key == "endobj") {

	557 m_Pos = SavedPos;

	558 break;

	559 }

	560

	561 if (key[0] != '/')

	562 continue;

	563

	564 key = PDF_NameDecode(key);

	565 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(

	566 GetObject(pObjList, objnum, gennum, true));

	567 if (!obj) {

	568 uint8_t ch;

	569 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {

	570 continue;

	571 }

	572 return nullptr;

	573 }

	574

	575 if (key.GetLength() > 1) {

	576 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),

	577 obj.release());

	578 }

	579 }

	580

	581 FX_FILESIZE SavedPos = m_Pos;

	582 CFX_ByteString nextword = GetNextWord(nullptr);

	583 if (nextword != "stream") {

	584 m_Pos = SavedPos;

	585 return pDict.release();

	586 }

	587

	588 return ReadStream(pDict.release(), objnum, gennum);

	589 }

	590

	591 if (word == ">>")

	592 m_Pos = SavedPos;

	593

	594 return nullptr;

	595 }

	596

	597 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {

	598 unsigned char byte1 = 0;

	599 unsigned char byte2 = 0;

	600

	601 GetCharAt(pos, byte1);

	602 GetCharAt(pos + 1, byte2);

	603

	604 if (byte1 == '\r' && byte2 == '\n')

	605 return 2;

	606

	607 if (byte1 == '\r' \|\| byte1 == '\n')

	608 return 1;

	609

	610 return 0;

	611 }

	612

	613 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,

	614 FX_DWORD objnum,

	615 FX_DWORD gennum) {

	616 CPDF_Object* pLenObj = pDict->GetElement("Length");

	617 FX_FILESIZE len = -1;

	618 CPDF_Reference* pLenObjRef = ToReference(pLenObj);

	619

	620 bool differingObjNum = !pLenObjRef \|\| (pLenObjRef->GetObjList() &&

	621 pLenObjRef->GetRefObjNum() != objnum);

	622 if (pLenObj && differingObjNum)

	623 len = pLenObj->GetInteger();

	624

	625 // Locate the start of stream.

	626 ToNextLine();

	627 FX_FILESIZE streamStartPos = m_Pos;

	628

	629 const CFX_ByteStringC kEndStreamStr("endstream");

	630 const CFX_ByteStringC kEndObjStr("endobj");

	631

	632 CPDF_CryptoHandler* pCryptoHandler =

	633 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();

	634 if (!pCryptoHandler) {

	635 FX_BOOL bSearchForKeyword = TRUE;

	636 if (len >= 0) {

	637 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;

	638 pos += len;

	639 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)

	640 m_Pos = pos.ValueOrDie();

	641

	642 m_Pos += ReadEOLMarkers(m_Pos);

	643 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);

	644 GetNextWordInternal(nullptr);

	645 // Earlier version of PDF specification doesn't require EOL marker before

	646 // 'endstream' keyword. If keyword 'endstream' follows the bytes in

	647 // specified length, it signals the end of stream.

	648 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),

	649 kEndStreamStr.GetLength()) == 0) {

	650 bSearchForKeyword = FALSE;

	651 }

	652 }

	653

	654 if (bSearchForKeyword) {

	655 // If len is not available, len needs to be calculated

	656 // by searching the keywords "endstream" or "endobj".

	657 m_Pos = streamStartPos;

	658 FX_FILESIZE endStreamOffset = 0;

	659 while (endStreamOffset >= 0) {

	660 endStreamOffset = FindTag(kEndStreamStr, 0);

	661

	662 // Can't find "endstream".

	663 if (endStreamOffset < 0)

	664 break;

	665

	666 // Stop searching when "endstream" is found.

	667 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,

	668 kEndStreamStr, TRUE)) {

	669 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();

	670 break;

	671 }

	672 }

	673

	674 m_Pos = streamStartPos;

	675 FX_FILESIZE endObjOffset = 0;

	676 while (endObjOffset >= 0) {

	677 endObjOffset = FindTag(kEndObjStr, 0);

	678

	679 // Can't find "endobj".

	680 if (endObjOffset < 0)

	681 break;

	682

	683 // Stop searching when "endobj" is found.

	684 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,

	685 TRUE)) {

	686 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();

	687 break;

	688 }

	689 }

	690

	691 // Can't find "endstream" or "endobj".

	692 if (endStreamOffset < 0 && endObjOffset < 0) {

	693 pDict->Release();

	694 return nullptr;

	695 }

	696

	697 if (endStreamOffset < 0 && endObjOffset >= 0) {

	698 // Correct the position of end stream.

	699 endStreamOffset = endObjOffset;

	700 } else if (endStreamOffset >= 0 && endObjOffset < 0) {

	701 // Correct the position of end obj.

	702 endObjOffset = endStreamOffset;

	703 } else if (endStreamOffset > endObjOffset) {

	704 endStreamOffset = endObjOffset;

	705 }

	706

	707 len = endStreamOffset;

	708 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);

	709 if (numMarkers == 2) {

	710 len -= 2;

	711 } else {

	712 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);

	713 if (numMarkers == 1) {

	714 len -= 1;

	715 }

	716 }

	717

	718 if (len < 0) {

	719 pDict->Release();

	720 return nullptr;

	721 }

	722 pDict->SetAtInteger("Length", len);

	723 }

	724 m_Pos = streamStartPos;

	725 }

	726

	727 if (len < 0) {

	728 pDict->Release();

	729 return nullptr;

	730 }

	731

	732 uint8_t* pData = nullptr;

	733 if (len > 0) {

	734 pData = FX_Alloc(uint8_t, len);

	735 ReadBlock(pData, len);

	736 if (pCryptoHandler) {

	737 CFX_BinaryBuf dest_buf;

	738 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));

	739

	740 void* context = pCryptoHandler->DecryptStart(objnum, gennum);

	741 pCryptoHandler->DecryptStream(context, pData, len, dest_buf);

	742 pCryptoHandler->DecryptFinish(context, dest_buf);

	743

	744 FX_Free(pData);

	745 pData = dest_buf.GetBuffer();

	746 len = dest_buf.GetSize();

	747 dest_buf.DetachBuffer();

	748 }

	749 }

	750

	751 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);

	752 streamStartPos = m_Pos;

	753 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);

	754

	755 GetNextWordInternal(nullptr);

	756

	757 int numMarkers = ReadEOLMarkers(m_Pos);

	758 if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 &&

	759 FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==

	760 0) {

	761 m_Pos = streamStartPos;

	762 }

	763 return pStream;

	764 }

	765

	766 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,

	767 FX_DWORD HeaderOffset) {

	768 FX_Free(m_pFileBuf);

	769

	770 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);

	771 m_HeaderOffset = HeaderOffset;

	772 m_FileLen = pFileAccess->GetSize();

	773 m_Pos = 0;

	774 m_pFileAccess = pFileAccess;

	775 m_BufOffset = 0;

	776 pFileAccess->ReadBlock(

	777 m_pFileBuf, 0,

	778 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));

	779 }

	780

	781 uint32_t CPDF_SyntaxParser::GetDirectNum() {

	782 bool bIsNumber;

	783 GetNextWordInternal(&bIsNumber);

	784 if (!bIsNumber)

	785 return 0;

	786

	787 m_WordBuffer[m_WordSize] = 0;

	788 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));

	789 }

	790

	791 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,

	792 FX_FILESIZE limit,

	793 const CFX_ByteStringC& tag,

	794 FX_BOOL checkKeyword) {

	795 const FX_DWORD taglen = tag.GetLength();

	796

	797 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);

	798 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&

	799 !PDFCharIsWhitespace(tag[taglen - 1]);

	800

	801 uint8_t ch;

	802 if (bCheckRight && startpos + (int32_t)taglen <= limit &&

	803 GetCharAt(startpos + (int32_t)taglen, ch)) {

	804 if (PDFCharIsNumeric(ch) \|\| PDFCharIsOther(ch) \|\|

	805 (checkKeyword && PDFCharIsDelimiter(ch))) {

	806 return false;

	807 }

	808 }

	809

	810 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {

	811 if (PDFCharIsNumeric(ch) \|\| PDFCharIsOther(ch) \|\|

	812 (checkKeyword && PDFCharIsDelimiter(ch))) {

	813 return false;

	814 }

	815 }

	816 return true;

	817 }

	818

	819 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards

	820 // and drop the bool.

	821 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,

	822 FX_BOOL bWholeWord,

	823 FX_BOOL bForward,

	824 FX_FILESIZE limit) {

	825 int32_t taglen = tag.GetLength();

	826 if (taglen == 0)

	827 return FALSE;

	828

	829 FX_FILESIZE pos = m_Pos;

	830 int32_t offset = 0;

	831 if (!bForward)

	832 offset = taglen - 1;

	833

	834 const uint8_t* tag_data = tag.GetPtr();

	835 uint8_t byte;

	836 while (1) {

	837 if (bForward) {

	838 if (limit && pos >= m_Pos + limit)

	839 return FALSE;

	840

	841 if (!GetCharAt(pos, byte))

	842 return FALSE;

	843

	844 } else {

	845 if (limit && pos <= m_Pos - limit)

	846 return FALSE;

	847

	848 if (!GetCharAtBackward(pos, byte))

	849 return FALSE;

	850 }

	851

	852 if (byte == tag_data[offset]) {

	853 if (bForward) {

	854 offset++;

	855 if (offset < taglen) {

	856 pos++;

	857 continue;

	858 }

	859 } else {

	860 offset--;

	861 if (offset >= 0) {

	862 pos--;

	863 continue;

	864 }

	865 }

	866

	867 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;

	868 if (!bWholeWord \|\| IsWholeWord(startpos, limit, tag, FALSE)) {

	869 m_Pos = startpos;

	870 return TRUE;

	871 }

	872 }

	873

	874 if (bForward) {

	875 offset = byte == tag_data[0] ? 1 : 0;

	876 pos++;

	877 } else {

	878 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;

	879 pos--;

	880 }

	881

	882 if (pos < 0)

	883 return FALSE;

	884 }

	885

	886 return FALSE;

	887 }

	888

	889 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,

	890 FX_BOOL bWholeWord,

	891 FX_FILESIZE limit) {

	892 int32_t ntags = 1;

	893 for (int i = 0; i < tags.GetLength(); ++i) {

	894 if (tags[i] == 0)

	895 ++ntags;

	896 }

	897

	898 std::vector<SearchTagRecord> patterns(ntags);

	899 FX_DWORD start = 0;

	900 FX_DWORD itag = 0;

	901 FX_DWORD max_len = 0;

	902 for (int i = 0; i <= tags.GetLength(); ++i) {

	903 if (tags[i] == 0) {

	904 FX_DWORD len = i - start;

	905 max_len = std::max(len, max_len);

	906 patterns[itag].m_pTag = tags.GetCStr() + start;

	907 patterns[itag].m_Len = len;

	908 patterns[itag].m_Offset = 0;

	909 start = i + 1;

	910 ++itag;

	911 }

	912 }

	913

	914 const FX_FILESIZE pos_limit = m_Pos + limit;

	915 for (FX_FILESIZE pos = m_Pos; !limit \|\| pos < pos_limit; ++pos) {

	916 uint8_t byte;

	917 if (!GetCharAt(pos, byte))

	918 break;

	919

	920 for (int i = 0; i < ntags; ++i) {

	921 SearchTagRecord& pat = patterns[i];

	922 if (pat.m_pTag[pat.m_Offset] != byte) {

	923 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;

	924 continue;

	925 }

	926

	927 ++pat.m_Offset;

	928 if (pat.m_Offset != pat.m_Len)

	929 continue;

	930

	931 if (!bWholeWord \|\|

	932 IsWholeWord(pos - pat.m_Len, limit,

	933 CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {

	934 return i;

	935 }

	936

	937 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;

	938 }

	939 }

	940 return -1;

	941 }

	942

	943 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,

	944 FX_FILESIZE limit) {

	945 int32_t taglen = tag.GetLength();

	946 int32_t match = 0;

	947 limit += m_Pos;

	948 FX_FILESIZE startpos = m_Pos;

	949

	950 while (1) {

	951 uint8_t ch;

	952 if (!GetNextChar(ch))

	953 return -1;

	954

	955 if (ch == tag[match]) {

	956 match++;

	957 if (match == taglen)

	958 return m_Pos - startpos - taglen;

	959 } else {

	960 match = ch == tag[0] ? 1 : 0;

	961 }

	962

	963 if (limit && m_Pos == limit)

	964 return -1;

	965 }

	966 return -1;

	967 }

	968

	969 void CPDF_SyntaxParser::SetEncrypt(

	970 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {

	971 m_pCryptoHandler = std::move(pCryptoHandler);

	972 }

OLD	NEW