core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp - Issue 1774753002: Split CPDF_SyntaxParser into its own named .cpp/.h files.

Side by Side Diff: core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp

Issue 1774753002: Split CPDF_SyntaxParser into its own named .cpp/.h files. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2016 PDFium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

	6

	7 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"

	8

	9 #include "core/include/fpdfapi/fpdf_module.h"

	10 #include "core/include/fpdfapi/fpdf_parser.h"

	11 #include "core/include/fxcrt/fx_ext.h"

	12 #include "third_party/base/numerics/safe_math.h"

	13

	14 namespace {

	15

	16 struct SearchTagRecord {

	17 const char* m_pTag;

	18 FX_DWORD m_Len;

	19 FX_DWORD m_Offset;

	20 };

	21

	22 } // namespace

	23

	24 // static

	25 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;

	26

	27 CPDF_SyntaxParser::CPDF_SyntaxParser()

	28 : m_MetadataObjnum(0),

	29 m_pFileAccess(nullptr),

	30 m_pFileBuf(nullptr),

	31 m_BufSize(CPDF_ModuleMgr::kFileBufSize) {}

	32

	33 CPDF_SyntaxParser::~CPDF_SyntaxParser() {

	34 FX_Free(m_pFileBuf);

	35 }

	36

	37 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {

	38 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);

	39 m_Pos = pos;

	40 return GetNextChar(ch);

	41 }

	42

	43 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {

	44 FX_FILESIZE pos = m_Pos + m_HeaderOffset;

	45 if (pos >= m_FileLen)

	46 return FALSE;

	47

	48 if (m_BufOffset >= pos \|\| (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {

	49 FX_FILESIZE read_pos = pos;

	50 FX_DWORD read_size = m_BufSize;

	51 if ((FX_FILESIZE)read_size > m_FileLen)

	52 read_size = (FX_DWORD)m_FileLen;

	53

	54 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {

	55 if (m_FileLen < (FX_FILESIZE)read_size) {

	56 read_pos = 0;

	57 read_size = (FX_DWORD)m_FileLen;

	58 } else {

	59 read_pos = m_FileLen - read_size;

	60 }

	61 }

	62

	63 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))

	64 return FALSE;

	65

	66 m_BufOffset = read_pos;

	67 }

	68 ch = m_pFileBuf[pos - m_BufOffset];

	69 m_Pos++;

	70 return TRUE;

	71 }

	72

	73 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {

	74 pos += m_HeaderOffset;

	75 if (pos >= m_FileLen)

	76 return FALSE;

	77

	78 if (m_BufOffset >= pos \|\| (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {

	79 FX_FILESIZE read_pos;

	80 if (pos < (FX_FILESIZE)m_BufSize)

	81 read_pos = 0;

	82 else

	83 read_pos = pos - m_BufSize + 1;

	84

	85 FX_DWORD read_size = m_BufSize;

	86 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {

	87 if (m_FileLen < (FX_FILESIZE)read_size) {

	88 read_pos = 0;

	89 read_size = (FX_DWORD)m_FileLen;

	90 } else {

	91 read_pos = m_FileLen - read_size;

	92 }

	93 }

	94

	95 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))

	96 return FALSE;

	97

	98 m_BufOffset = read_pos;

	99 }

	100 ch = m_pFileBuf[pos - m_BufOffset];

	101 return TRUE;

	102 }

	103

	104 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {

	105 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))

	106 return FALSE;

	107 m_Pos += size;

	108 return TRUE;

	109 }

	110

	111 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {

	112 m_WordSize = 0;

	113 if (bIsNumber)

	114 *bIsNumber = true;

	115

	116 uint8_t ch;

	117 if (!GetNextChar(ch))

	118 return;

	119

	120 while (1) {

	121 while (PDFCharIsWhitespace(ch)) {

	122 if (!GetNextChar(ch))

	123 return;

	124 }

	125

	126 if (ch != '%')

	127 break;

	128

	129 while (1) {

	130 if (!GetNextChar(ch))

	131 return;

	132 if (PDFCharIsLineEnding(ch))

	133 break;

	134 }

	135 }

	136

	137 if (PDFCharIsDelimiter(ch)) {

	138 if (bIsNumber)

	139 *bIsNumber = false;

	140

	141 m_WordBuffer[m_WordSize++] = ch;

	142 if (ch == '/') {

	143 while (1) {

	144 if (!GetNextChar(ch))

	145 return;

	146

	147 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {

	148 m_Pos--;

	149 return;

	150 }

	151

	152 if (m_WordSize < sizeof(m_WordBuffer) - 1)

	153 m_WordBuffer[m_WordSize++] = ch;

	154 }

	155 } else if (ch == '<') {

	156 if (!GetNextChar(ch))

	157 return;

	158

	159 if (ch == '<')

	160 m_WordBuffer[m_WordSize++] = ch;

	161 else

	162 m_Pos--;

	163 } else if (ch == '>') {

	164 if (!GetNextChar(ch))

	165 return;

	166

	167 if (ch == '>')

	168 m_WordBuffer[m_WordSize++] = ch;

	169 else

	170 m_Pos--;

	171 }

	172 return;

	173 }

	174

	175 while (1) {

	176 if (m_WordSize < sizeof(m_WordBuffer) - 1)

	177 m_WordBuffer[m_WordSize++] = ch;

	178

	179 if (!PDFCharIsNumeric(ch)) {

	180 if (bIsNumber)

	181 *bIsNumber = false;

	182 }

	183

	184 if (!GetNextChar(ch))

	185 return;

	186

	187 if (PDFCharIsDelimiter(ch) \|\| PDFCharIsWhitespace(ch)) {

	188 m_Pos--;

	189 break;

	190 }

	191 }

	192 }

	193

	194 CFX_ByteString CPDF_SyntaxParser::ReadString() {

	195 uint8_t ch;

	196 if (!GetNextChar(ch))

	197 return CFX_ByteString();

	198

	199 CFX_ByteTextBuf buf;

	200 int32_t parlevel = 0;

	201 int32_t status = 0;

	202 int32_t iEscCode = 0;

	203 while (1) {

	204 switch (status) {

	205 case 0:

	206 if (ch == ')') {

	207 if (parlevel == 0) {

	208 return buf.GetByteString();

	209 }

	210 parlevel--;

	211 buf.AppendChar(')');

	212 } else if (ch == '(') {

	213 parlevel++;

	214 buf.AppendChar('(');

	215 } else if (ch == '\\') {

	216 status = 1;

	217 } else {

	218 buf.AppendChar(ch);

	219 }

	220 break;

	221 case 1:

	222 if (ch >= '0' && ch <= '7') {

	223 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

	224 status = 2;

	225 break;

	226 }

	227

	228 if (ch == 'n') {

	229 buf.AppendChar('\n');

	230 } else if (ch == 'r') {

	231 buf.AppendChar('\r');

	232 } else if (ch == 't') {

	233 buf.AppendChar('\t');

	234 } else if (ch == 'b') {

	235 buf.AppendChar('\b');

	236 } else if (ch == 'f') {

	237 buf.AppendChar('\f');

	238 } else if (ch == '\r') {

	239 status = 4;

	240 break;

	241 } else if (ch != '\n') {

	242 buf.AppendChar(ch);

	243 }

	244 status = 0;

	245 break;

	246 case 2:

	247 if (ch >= '0' && ch <= '7') {

	248 iEscCode =

	249 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

	250 status = 3;

	251 } else {

	252 buf.AppendChar(iEscCode);

	253 status = 0;

	254 continue;

	255 }

	256 break;

	257 case 3:

	258 if (ch >= '0' && ch <= '7') {

	259 iEscCode =

	260 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

	261 buf.AppendChar(iEscCode);

	262 status = 0;

	263 } else {

	264 buf.AppendChar(iEscCode);

	265 status = 0;

	266 continue;

	267 }

	268 break;

	269 case 4:

	270 status = 0;

	271 if (ch != '\n')

	272 continue;

	273 break;

	274 }

	275

	276 if (!GetNextChar(ch))

	277 break;

	278 }

	279

	280 GetNextChar(ch);

	281 return buf.GetByteString();

	282 }

	283

	284 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {

	285 uint8_t ch;

	286 if (!GetNextChar(ch))

	287 return CFX_ByteString();

	288

	289 CFX_ByteTextBuf buf;

	290 bool bFirst = true;

	291 uint8_t code = 0;

	292 while (1) {

	293 if (ch == '>')

	294 break;

	295

	296 if (std::isxdigit(ch)) {

	297 int val = FXSYS_toHexDigit(ch);

	298 if (bFirst) {

	299 code = val * 16;

	300 } else {

	301 code += val;

	302 buf.AppendByte(code);

	303 }

	304 bFirst = !bFirst;

	305 }

	306

	307 if (!GetNextChar(ch))

	308 break;

	309 }

	310 if (!bFirst)

	311 buf.AppendByte(code);

	312

	313 return buf.GetByteString();

	314 }

	315

	316 void CPDF_SyntaxParser::ToNextLine() {

	317 uint8_t ch;

	318 while (GetNextChar(ch)) {

	319 if (ch == '\n')

	320 break;

	321

	322 if (ch == '\r') {

	323 GetNextChar(ch);

	324 if (ch != '\n')

	325 --m_Pos;

	326 break;

	327 }

	328 }

	329 }

	330

	331 void CPDF_SyntaxParser::ToNextWord() {

	332 uint8_t ch;

	333 if (!GetNextChar(ch))

	334 return;

	335

	336 while (1) {

	337 while (PDFCharIsWhitespace(ch)) {

	338 if (!GetNextChar(ch))

	339 return;

	340 }

	341

	342 if (ch != '%')

	343 break;

	344

	345 while (1) {

	346 if (!GetNextChar(ch))

	347 return;

	348 if (PDFCharIsLineEnding(ch))

	349 break;

	350 }

	351 }

	352 m_Pos--;

	353 }

	354

	355 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {

	356 GetNextWordInternal(bIsNumber);

	357 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);

	358 }

	359

	360 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {

	361 return GetNextWord(nullptr);

	362 }

	363

	364 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,

	365 FX_DWORD objnum,

	366 FX_DWORD gennum,

	367 FX_BOOL bDecrypt) {

	368 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);

	369 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)

	370 return nullptr;

	371

	372 FX_FILESIZE SavedPos = m_Pos;

	373 bool bIsNumber;

	374 CFX_ByteString word = GetNextWord(&bIsNumber);

	375 if (word.GetLength() == 0)

	376 return nullptr;

	377

	378 if (bIsNumber) {

	379 FX_FILESIZE SavedPos = m_Pos;

	380 CFX_ByteString nextword = GetNextWord(&bIsNumber);

	381 if (bIsNumber) {

	382 CFX_ByteString nextword2 = GetNextWord(nullptr);

	383 if (nextword2 == "R") {

	384 FX_DWORD objnum = FXSYS_atoui(word);

	385 return new CPDF_Reference(pObjList, objnum);

	386 }

	387 }

	388 m_Pos = SavedPos;

	389 return new CPDF_Number(word);

	390 }

	391

	392 if (word == "true" \|\| word == "false")

	393 return new CPDF_Boolean(word == "true");

	394

	395 if (word == "null")

	396 return new CPDF_Null;

	397

	398 if (word == "(") {

	399 CFX_ByteString str = ReadString();

	400 if (m_pCryptoHandler && bDecrypt)

	401 m_pCryptoHandler->Decrypt(objnum, gennum, str);

	402 return new CPDF_String(str, FALSE);

	403 }

	404

	405 if (word == "<") {

	406 CFX_ByteString str = ReadHexString();

	407 if (m_pCryptoHandler && bDecrypt)

	408 m_pCryptoHandler->Decrypt(objnum, gennum, str);

	409

	410 return new CPDF_String(str, TRUE);

	411 }

	412

	413 if (word == "[") {

	414 CPDF_Array* pArray = new CPDF_Array;

	415 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))

	416 pArray->Add(pObj);

	417

	418 return pArray;

	419 }

	420

	421 if (word[0] == '/') {

	422 return new CPDF_Name(

	423 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));

	424 }

	425

	426 if (word == "<<") {

	427 int32_t nKeys = 0;

	428 FX_FILESIZE dwSignValuePos = 0;

	429

	430 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(

	431 new CPDF_Dictionary);

	432 while (1) {

	433 CFX_ByteString key = GetNextWord(nullptr);

	434 if (key.IsEmpty())

	435 return nullptr;

	436

	437 FX_FILESIZE SavedPos = m_Pos - key.GetLength();

	438 if (key == ">>")

	439 break;

	440

	441 if (key == "endobj") {

	442 m_Pos = SavedPos;

	443 break;

	444 }

	445

	446 if (key[0] != '/')

	447 continue;

	448

	449 ++nKeys;

	450 key = PDF_NameDecode(key);

	451 if (key.IsEmpty())

	452 continue;

	453

	454 if (key == "/Contents")

	455 dwSignValuePos = m_Pos;

	456

	457 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);

	458 if (!pObj)

	459 continue;

	460

	461 CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);

	462 pDict->SetAt(keyNoSlash, pObj);

	463 }

	464

	465 // Only when this is a signature dictionary and has contents, we reset the

	466 // contents to the un-decrypted form.

	467 if (IsSignatureDict(pDict.get()) && dwSignValuePos) {

	468 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);

	469 m_Pos = dwSignValuePos;

	470 pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false));

	471 }

	472

	473 FX_FILESIZE SavedPos = m_Pos;

	474 CFX_ByteString nextword = GetNextWord(nullptr);

	475 if (nextword != "stream") {

	476 m_Pos = SavedPos;

	477 return pDict.release();

	478 }

	479 return ReadStream(pDict.release(), objnum, gennum);

	480 }

	481

	482 if (word == ">>")

	483 m_Pos = SavedPos;

	484

	485 return nullptr;

	486 }

	487

	488 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(

	489 CPDF_IndirectObjectHolder* pObjList,

	490 FX_DWORD objnum,

	491 FX_DWORD gennum) {

	492 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);

	493 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)

	494 return nullptr;

	495

	496 FX_FILESIZE SavedPos = m_Pos;

	497 bool bIsNumber;

	498 CFX_ByteString word = GetNextWord(&bIsNumber);

	499 if (word.GetLength() == 0)

	500 return nullptr;

	501

	502 if (bIsNumber) {

	503 FX_FILESIZE SavedPos = m_Pos;

	504 CFX_ByteString nextword = GetNextWord(&bIsNumber);

	505 if (bIsNumber) {

	506 CFX_ByteString nextword2 = GetNextWord(nullptr);

	507 if (nextword2 == "R")

	508 return new CPDF_Reference(pObjList, FXSYS_atoui(word));

	509 }

	510 m_Pos = SavedPos;

	511 return new CPDF_Number(word);

	512 }

	513

	514 if (word == "true" \|\| word == "false")

	515 return new CPDF_Boolean(word == "true");

	516

	517 if (word == "null")

	518 return new CPDF_Null;

	519

	520 if (word == "(") {

	521 CFX_ByteString str = ReadString();

	522 if (m_pCryptoHandler)

	523 m_pCryptoHandler->Decrypt(objnum, gennum, str);

	524 return new CPDF_String(str, FALSE);

	525 }

	526

	527 if (word == "<") {

	528 CFX_ByteString str = ReadHexString();

	529 if (m_pCryptoHandler)

	530 m_pCryptoHandler->Decrypt(objnum, gennum, str);

	531 return new CPDF_String(str, TRUE);

	532 }

	533

	534 if (word == "[") {

	535 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(

	536 new CPDF_Array);

	537 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))

	538 pArray->Add(pObj);

	539

	540 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;

	541 }

	542

	543 if (word[0] == '/') {

	544 return new CPDF_Name(

	545 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));

	546 }

	547

	548 if (word == "<<") {

	549 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(

	550 new CPDF_Dictionary);

	551 while (1) {

	552 FX_FILESIZE SavedPos = m_Pos;

	553 CFX_ByteString key = GetNextWord(nullptr);

	554 if (key.IsEmpty())

	555 return nullptr;

	556

	557 if (key == ">>")

	558 break;

	559

	560 if (key == "endobj") {

	561 m_Pos = SavedPos;

	562 break;

	563 }

	564

	565 if (key[0] != '/')

	566 continue;

	567

	568 key = PDF_NameDecode(key);

	569 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(

	570 GetObject(pObjList, objnum, gennum, true));

	571 if (!obj) {

	572 uint8_t ch;

	573 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {

	574 continue;

	575 }

	576 return nullptr;

	577 }

	578

	579 if (key.GetLength() > 1) {

	580 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),

	581 obj.release());

	582 }

	583 }

	584

	585 FX_FILESIZE SavedPos = m_Pos;

	586 CFX_ByteString nextword = GetNextWord(nullptr);

	587 if (nextword != "stream") {

	588 m_Pos = SavedPos;

	589 return pDict.release();

	590 }

	591

	592 return ReadStream(pDict.release(), objnum, gennum);

	593 }

	594

	595 if (word == ">>")

	596 m_Pos = SavedPos;

	597

	598 return nullptr;

	599 }

	600

	601 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {

	602 unsigned char byte1 = 0;

	603 unsigned char byte2 = 0;

	604

	605 GetCharAt(pos, byte1);

	606 GetCharAt(pos + 1, byte2);

	607

	608 if (byte1 == '\r' && byte2 == '\n')

	609 return 2;

	610

	611 if (byte1 == '\r' \|\| byte1 == '\n')

	612 return 1;

	613

	614 return 0;

	615 }

	616

	617 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,

	618 FX_DWORD objnum,

	619 FX_DWORD gennum) {

	620 CPDF_Object* pLenObj = pDict->GetElement("Length");

	621 FX_FILESIZE len = -1;

	622 CPDF_Reference* pLenObjRef = ToReference(pLenObj);

	623

	624 bool differingObjNum = !pLenObjRef \|\| (pLenObjRef->GetObjList() &&

	625 pLenObjRef->GetRefObjNum() != objnum);

	626 if (pLenObj && differingObjNum)

	627 len = pLenObj->GetInteger();

	628

	629 // Locate the start of stream.

	630 ToNextLine();

	631 FX_FILESIZE streamStartPos = m_Pos;

	632

	633 const CFX_ByteStringC kEndStreamStr("endstream");

	634 const CFX_ByteStringC kEndObjStr("endobj");

	635

	636 CPDF_CryptoHandler* pCryptoHandler =

	637 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();

	638 if (!pCryptoHandler) {

	639 FX_BOOL bSearchForKeyword = TRUE;

	640 if (len >= 0) {

	641 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;

	642 pos += len;

	643 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)

	644 m_Pos = pos.ValueOrDie();

	645

	646 m_Pos += ReadEOLMarkers(m_Pos);

	647 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);

	648 GetNextWordInternal(nullptr);

	649 // Earlier version of PDF specification doesn't require EOL marker before

	650 // 'endstream' keyword. If keyword 'endstream' follows the bytes in

	651 // specified length, it signals the end of stream.

	652 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),

	653 kEndStreamStr.GetLength()) == 0) {

	654 bSearchForKeyword = FALSE;

	655 }

	656 }

	657

	658 if (bSearchForKeyword) {

	659 // If len is not available, len needs to be calculated

	660 // by searching the keywords "endstream" or "endobj".

	661 m_Pos = streamStartPos;

	662 FX_FILESIZE endStreamOffset = 0;

	663 while (endStreamOffset >= 0) {

	664 endStreamOffset = FindTag(kEndStreamStr, 0);

	665

	666 // Can't find "endstream".

	667 if (endStreamOffset < 0)

	668 break;

	669

	670 // Stop searching when "endstream" is found.

	671 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,

	672 kEndStreamStr, TRUE)) {

	673 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();

	674 break;

	675 }

	676 }

	677

	678 m_Pos = streamStartPos;

	679 FX_FILESIZE endObjOffset = 0;

	680 while (endObjOffset >= 0) {

	681 endObjOffset = FindTag(kEndObjStr, 0);

	682

	683 // Can't find "endobj".

	684 if (endObjOffset < 0)

	685 break;

	686

	687 // Stop searching when "endobj" is found.

	688 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,

	689 TRUE)) {

	690 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();

	691 break;

	692 }

	693 }

	694

	695 // Can't find "endstream" or "endobj".

	696 if (endStreamOffset < 0 && endObjOffset < 0) {

	697 pDict->Release();

	698 return nullptr;

	699 }

	700

	701 if (endStreamOffset < 0 && endObjOffset >= 0) {

	702 // Correct the position of end stream.

	703 endStreamOffset = endObjOffset;

	704 } else if (endStreamOffset >= 0 && endObjOffset < 0) {

	705 // Correct the position of end obj.

	706 endObjOffset = endStreamOffset;

	707 } else if (endStreamOffset > endObjOffset) {

	708 endStreamOffset = endObjOffset;

	709 }

	710

	711 len = endStreamOffset;

	712 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);

	713 if (numMarkers == 2) {

	714 len -= 2;

	715 } else {

	716 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);

	717 if (numMarkers == 1) {

	718 len -= 1;

	719 }

	720 }

	721

	722 if (len < 0) {

	723 pDict->Release();

	724 return nullptr;

	725 }

	726 pDict->SetAtInteger("Length", len);

	727 }

	728 m_Pos = streamStartPos;

	729 }

	730

	731 if (len < 0) {

	732 pDict->Release();

	733 return nullptr;

	734 }

	735

	736 uint8_t* pData = nullptr;

	737 if (len > 0) {

	738 pData = FX_Alloc(uint8_t, len);

	739 ReadBlock(pData, len);

	740 if (pCryptoHandler) {

	741 CFX_BinaryBuf dest_buf;

	742 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));

	743

	744 void* context = pCryptoHandler->DecryptStart(objnum, gennum);

	745 pCryptoHandler->DecryptStream(context, pData, len, dest_buf);

	746 pCryptoHandler->DecryptFinish(context, dest_buf);

	747

	748 FX_Free(pData);

	749 pData = dest_buf.GetBuffer();

	750 len = dest_buf.GetSize();

	751 dest_buf.DetachBuffer();

	752 }

	753 }

	754

	755 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);

	756 streamStartPos = m_Pos;

	757 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);

	758

	759 GetNextWordInternal(nullptr);

	760

	761 int numMarkers = ReadEOLMarkers(m_Pos);

	762 if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 &&

	763 FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==

	764 0) {

	765 m_Pos = streamStartPos;

	766 }

	767 return pStream;

	768 }

	769

	770 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,

	771 FX_DWORD HeaderOffset) {

	772 FX_Free(m_pFileBuf);

	773

	774 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);

	775 m_HeaderOffset = HeaderOffset;

	776 m_FileLen = pFileAccess->GetSize();

	777 m_Pos = 0;

	778 m_pFileAccess = pFileAccess;

	779 m_BufOffset = 0;

	780 pFileAccess->ReadBlock(

	781 m_pFileBuf, 0,

	782 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));

	783 }

	784

	785 uint32_t CPDF_SyntaxParser::GetDirectNum() {

	786 bool bIsNumber;

	787 GetNextWordInternal(&bIsNumber);

	788 if (!bIsNumber)

	789 return 0;

	790

	791 m_WordBuffer[m_WordSize] = 0;

	792 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));

	793 }

	794

	795 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,

	796 FX_FILESIZE limit,

	797 const CFX_ByteStringC& tag,

	798 FX_BOOL checkKeyword) {

	799 const FX_DWORD taglen = tag.GetLength();

	800

	801 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);

	802 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&

	803 !PDFCharIsWhitespace(tag[taglen - 1]);

	804

	805 uint8_t ch;

	806 if (bCheckRight && startpos + (int32_t)taglen <= limit &&

	807 GetCharAt(startpos + (int32_t)taglen, ch)) {

	808 if (PDFCharIsNumeric(ch) \|\| PDFCharIsOther(ch) \|\|

	809 (checkKeyword && PDFCharIsDelimiter(ch))) {

	810 return false;

	811 }

	812 }

	813

	814 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {

	815 if (PDFCharIsNumeric(ch) \|\| PDFCharIsOther(ch) \|\|

	816 (checkKeyword && PDFCharIsDelimiter(ch))) {

	817 return false;

	818 }

	819 }

	820 return true;

	821 }

	822

	823 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards

	824 // and drop the bool.

	825 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,

	826 FX_BOOL bWholeWord,

	827 FX_BOOL bForward,

	828 FX_FILESIZE limit) {

	829 int32_t taglen = tag.GetLength();

	830 if (taglen == 0)

	831 return FALSE;

	832

	833 FX_FILESIZE pos = m_Pos;

	834 int32_t offset = 0;

	835 if (!bForward)

	836 offset = taglen - 1;

	837

	838 const uint8_t* tag_data = tag.GetPtr();

	839 uint8_t byte;

	840 while (1) {

	841 if (bForward) {

	842 if (limit && pos >= m_Pos + limit)

	843 return FALSE;

	844

	845 if (!GetCharAt(pos, byte))

	846 return FALSE;

	847

	848 } else {

	849 if (limit && pos <= m_Pos - limit)

	850 return FALSE;

	851

	852 if (!GetCharAtBackward(pos, byte))

	853 return FALSE;

	854 }

	855

	856 if (byte == tag_data[offset]) {

	857 if (bForward) {

	858 offset++;

	859 if (offset < taglen) {

	860 pos++;

	861 continue;

	862 }

	863 } else {

	864 offset--;

	865 if (offset >= 0) {

	866 pos--;

	867 continue;

	868 }

	869 }

	870

	871 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;

	872 if (!bWholeWord \|\| IsWholeWord(startpos, limit, tag, FALSE)) {

	873 m_Pos = startpos;

	874 return TRUE;

	875 }

	876 }

	877

	878 if (bForward) {

	879 offset = byte == tag_data[0] ? 1 : 0;

	880 pos++;

	881 } else {

	882 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;

	883 pos--;

	884 }

	885

	886 if (pos < 0)

	887 return FALSE;

	888 }

	889

	890 return FALSE;

	891 }

	892

	893 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,

	894 FX_BOOL bWholeWord,

	895 FX_FILESIZE limit) {

	896 int32_t ntags = 1;

	897 for (int i = 0; i < tags.GetLength(); ++i) {

	898 if (tags[i] == 0)

	899 ++ntags;

	900 }

	901

	902 std::vector<SearchTagRecord> patterns(ntags);

	903 FX_DWORD start = 0;

	904 FX_DWORD itag = 0;

	905 FX_DWORD max_len = 0;

	906 for (int i = 0; i <= tags.GetLength(); ++i) {

	907 if (tags[i] == 0) {

	908 FX_DWORD len = i - start;

	909 max_len = std::max(len, max_len);

	910 patterns[itag].m_pTag = tags.GetCStr() + start;

	911 patterns[itag].m_Len = len;

	912 patterns[itag].m_Offset = 0;

	913 start = i + 1;

	914 ++itag;

	915 }

	916 }

	917

	918 const FX_FILESIZE pos_limit = m_Pos + limit;

	919 for (FX_FILESIZE pos = m_Pos; !limit \|\| pos < pos_limit; ++pos) {

	920 uint8_t byte;

	921 if (!GetCharAt(pos, byte))

	922 break;

	923

	924 for (int i = 0; i < ntags; ++i) {

	925 SearchTagRecord& pat = patterns[i];

	926 if (pat.m_pTag[pat.m_Offset] != byte) {

	927 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;

	928 continue;

	929 }

	930

	931 ++pat.m_Offset;

	932 if (pat.m_Offset != pat.m_Len)

	933 continue;

	934

	935 if (!bWholeWord \|\|

	936 IsWholeWord(pos - pat.m_Len, limit,

	937 CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {

	938 return i;

	939 }

	940

	941 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;

	942 }

	943 }

	944 return -1;

	945 }

	946

	947 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,

	948 FX_FILESIZE limit) {

	949 int32_t taglen = tag.GetLength();

	950 int32_t match = 0;

	951 limit += m_Pos;

	952 FX_FILESIZE startpos = m_Pos;

	953

	954 while (1) {

	955 uint8_t ch;

	956 if (!GetNextChar(ch))

	957 return -1;

	958

	959 if (ch == tag[match]) {

	960 match++;

	961 if (match == taglen)

	962 return m_Pos - startpos - taglen;

	963 } else {

	964 match = ch == tag[0] ? 1 : 0;

	965 }

	966

	967 if (limit && m_Pos == limit)

	968 return -1;

	969 }

	970 return -1;

	971 }

	972

	973 void CPDF_SyntaxParser::SetEncrypt(

	974 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {

	975 m_pCryptoHandler = std::move(pCryptoHandler);

	976 }

OLD	NEW

« no previous file with comments | « core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h ('k') | core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp » ('j') | no next file with comments »