core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp - Issue 1774753002: Split CPDF_SyntaxParser into its own named .cpp/.h files.

Unified Diff: core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp

Issue 1774753002: Split CPDF_SyntaxParser into its own named .cpp/.h files. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master

Patch Set: Stray file. Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h ('K') | « core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h ('k') | core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp » ('j') | core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp

diff --git a/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp b/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp

new file mode 100644

index 0000000000000000000000000000000000000000..2f8c748be14ce53993fdaab8a2360046aa667fb1

--- /dev/null

+++ b/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp

@@ -0,0 +1,972 @@

dsinclair 2016/03/08 01:15:05 nit: 2016?

Tom Sepez 2016/03/08 19:35:41 Done.

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"

+#include "core/include/fpdfapi/fpdf_module.h"

+#include "core/include/fpdfapi/fpdf_parser.h"

+#include "core/include/fxcrt/fx_ext.h"

+#include "third_party/base/numerics/safe_math.h"

+struct SearchTagRecord {

dsinclair 2016/03/08 01:15:05 Can this go in namespace {}?

Tom Sepez 2016/03/08 19:35:42 Done.

+ const char* m_pTag;

+ FX_DWORD m_Len;

+ FX_DWORD m_Offset;

+};

+// static

+int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;

+CPDF_SyntaxParser::CPDF_SyntaxParser()

+ : m_MetadataObjnum(0),

+ m_pFileAccess(nullptr),

+ m_pFileBuf(nullptr),

+ m_BufSize(CPDF_ModuleMgr::kFileBufSize) {}

+CPDF_SyntaxParser::~CPDF_SyntaxParser() {

+ FX_Free(m_pFileBuf);

+FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {

+ CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);

+ m_Pos = pos;

+ return GetNextChar(ch);

+FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {

+ FX_FILESIZE pos = m_Pos + m_HeaderOffset;

+ if (pos >= m_FileLen)

+ return FALSE;

+ if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {

+ FX_FILESIZE read_pos = pos;

+ FX_DWORD read_size = m_BufSize;

+ if ((FX_FILESIZE)read_size > m_FileLen)

+ read_size = (FX_DWORD)m_FileLen;

+ if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {

+ if (m_FileLen < (FX_FILESIZE)read_size) {

+ read_pos = 0;

+ read_size = (FX_DWORD)m_FileLen;

+ } else {

+ read_pos = m_FileLen - read_size;

+ }

+ if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))

+ return FALSE;

+ m_BufOffset = read_pos;

+ }

+ ch = m_pFileBuf[pos - m_BufOffset];

+ m_Pos++;

+ return TRUE;

+FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {

+ pos += m_HeaderOffset;

+ if (pos >= m_FileLen)

+ return FALSE;

+ if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {

+ FX_FILESIZE read_pos;

+ if (pos < (FX_FILESIZE)m_BufSize)

+ read_pos = 0;

+ else

+ read_pos = pos - m_BufSize + 1;

+ FX_DWORD read_size = m_BufSize;

+ if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {

+ if (m_FileLen < (FX_FILESIZE)read_size) {

+ read_pos = 0;

+ read_size = (FX_DWORD)m_FileLen;

+ } else {

+ read_pos = m_FileLen - read_size;

+ }

+ if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))

+ return FALSE;

+ m_BufOffset = read_pos;

+ }

+ ch = m_pFileBuf[pos - m_BufOffset];

+ return TRUE;

+FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {

+ if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))

+ return FALSE;

+ m_Pos += size;

+ return TRUE;

+void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {

+ m_WordSize = 0;

+ if (bIsNumber)

+ *bIsNumber = true;

+ uint8_t ch;

+ if (!GetNextChar(ch))

+ return;

+ while (1) {

+ while (PDFCharIsWhitespace(ch)) {

+ if (!GetNextChar(ch))

+ return;

+ }

+ if (ch != '%')

+ break;

+ while (1) {

+ if (!GetNextChar(ch))

+ return;

+ if (PDFCharIsLineEnding(ch))

+ break;

+ }

+ if (PDFCharIsDelimiter(ch)) {

+ if (bIsNumber)

+ *bIsNumber = false;

+ m_WordBuffer[m_WordSize++] = ch;

+ if (ch == '/') {

+ while (1) {

+ if (!GetNextChar(ch))

+ return;

+ if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {

+ m_Pos--;

+ return;

+ }

+ if (m_WordSize < sizeof(m_WordBuffer) - 1)

+ m_WordBuffer[m_WordSize++] = ch;

+ }

+ } else if (ch == '<') {

+ if (!GetNextChar(ch))

+ return;

+ if (ch == '<')

+ m_WordBuffer[m_WordSize++] = ch;

+ else

+ m_Pos--;

+ } else if (ch == '>') {

+ if (!GetNextChar(ch))

+ return;

+ if (ch == '>')

+ m_WordBuffer[m_WordSize++] = ch;

+ else

+ m_Pos--;

+ }

+ return;

+ }

+ while (1) {

+ if (m_WordSize < sizeof(m_WordBuffer) - 1)

+ m_WordBuffer[m_WordSize++] = ch;

+ if (!PDFCharIsNumeric(ch)) {

+ if (bIsNumber)

+ *bIsNumber = false;

+ }

+ if (!GetNextChar(ch))

+ return;

+ if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {

+ m_Pos--;

+ break;

+ }

+CFX_ByteString CPDF_SyntaxParser::ReadString() {

+ uint8_t ch;

+ if (!GetNextChar(ch))

+ return CFX_ByteString();

+ CFX_ByteTextBuf buf;

+ int32_t parlevel = 0;

+ int32_t status = 0;

+ int32_t iEscCode = 0;

+ while (1) {

+ switch (status) {

+ case 0:

+ if (ch == ')') {

+ if (parlevel == 0) {

+ return buf.GetByteString();

+ }

+ parlevel--;

+ buf.AppendChar(')');

+ } else if (ch == '(') {

+ parlevel++;

+ buf.AppendChar('(');

+ } else if (ch == '\\') {

+ status = 1;

+ } else {

+ buf.AppendChar(ch);

+ }

+ break;

+ case 1:

+ if (ch >= '0' && ch <= '7') {

+ iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

+ status = 2;

+ break;

+ }

+ if (ch == 'n') {

+ buf.AppendChar('\n');

+ } else if (ch == 'r') {

+ buf.AppendChar('\r');

+ } else if (ch == 't') {

+ buf.AppendChar('\t');

+ } else if (ch == 'b') {

+ buf.AppendChar('\b');

+ } else if (ch == 'f') {

+ buf.AppendChar('\f');

+ } else if (ch == '\r') {

+ status = 4;

+ break;

+ } else if (ch != '\n') {

+ buf.AppendChar(ch);

+ }

+ status = 0;

+ break;

+ case 2:

+ if (ch >= '0' && ch <= '7') {

+ iEscCode =

+ iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

+ status = 3;

+ } else {

+ buf.AppendChar(iEscCode);

+ status = 0;

+ continue;

+ }

+ break;

+ case 3:

+ if (ch >= '0' && ch <= '7') {

+ iEscCode =

+ iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));

+ buf.AppendChar(iEscCode);

+ status = 0;

+ } else {

+ buf.AppendChar(iEscCode);

+ status = 0;

+ continue;

+ }

+ break;

+ case 4:

+ status = 0;

+ if (ch != '\n')

+ continue;

+ break;

+ }

+ if (!GetNextChar(ch))

+ break;

+ }

+ GetNextChar(ch);

+ return buf.GetByteString();

+CFX_ByteString CPDF_SyntaxParser::ReadHexString() {

+ uint8_t ch;

+ if (!GetNextChar(ch))

+ return CFX_ByteString();

+ CFX_ByteTextBuf buf;

+ bool bFirst = true;

+ uint8_t code = 0;

+ while (1) {

+ if (ch == '>')

+ break;

+ if (std::isxdigit(ch)) {

+ int val = FXSYS_toHexDigit(ch);

+ if (bFirst) {

+ code = val * 16;

+ } else {

+ code += val;

+ buf.AppendByte(code);

+ }

+ bFirst = !bFirst;

+ }

+ if (!GetNextChar(ch))

+ break;

+ }

+ if (!bFirst)

+ buf.AppendByte(code);

+ return buf.GetByteString();

+void CPDF_SyntaxParser::ToNextLine() {

+ uint8_t ch;

+ while (GetNextChar(ch)) {

+ if (ch == '\n')

+ break;

+ if (ch == '\r') {

+ GetNextChar(ch);

+ if (ch != '\n')

+ --m_Pos;

+ break;

+ }

+void CPDF_SyntaxParser::ToNextWord() {

+ uint8_t ch;

+ if (!GetNextChar(ch))

+ return;

+ while (1) {

+ while (PDFCharIsWhitespace(ch)) {

+ if (!GetNextChar(ch))

+ return;

+ }

+ if (ch != '%')

+ break;

+ while (1) {

+ if (!GetNextChar(ch))

+ return;

+ if (PDFCharIsLineEnding(ch))

+ break;

+ }

+ m_Pos--;

+CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {

+ GetNextWordInternal(bIsNumber);

+ return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);

+CFX_ByteString CPDF_SyntaxParser::GetKeyword() {

+ return GetNextWord(nullptr);

+CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,

+ FX_DWORD objnum,

+ FX_DWORD gennum,

+ FX_BOOL bDecrypt) {

+ CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);

+ if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)

+ return nullptr;

+ FX_FILESIZE SavedPos = m_Pos;

+ bool bIsNumber;

+ CFX_ByteString word = GetNextWord(&bIsNumber);

+ if (word.GetLength() == 0)

+ return nullptr;

+ if (bIsNumber) {

+ FX_FILESIZE SavedPos = m_Pos;

+ CFX_ByteString nextword = GetNextWord(&bIsNumber);

+ if (bIsNumber) {

+ CFX_ByteString nextword2 = GetNextWord(nullptr);

+ if (nextword2 == "R") {

+ FX_DWORD objnum = FXSYS_atoui(word);

+ return new CPDF_Reference(pObjList, objnum);

+ }

+ m_Pos = SavedPos;

+ return new CPDF_Number(word);

+ }

+ if (word == "true" || word == "false")

+ return new CPDF_Boolean(word == "true");

+ if (word == "null")

+ return new CPDF_Null;

+ if (word == "(") {

+ CFX_ByteString str = ReadString();

+ if (m_pCryptoHandler && bDecrypt)

+ m_pCryptoHandler->Decrypt(objnum, gennum, str);

+ return new CPDF_String(str, FALSE);

+ }

+ if (word == "<") {

+ CFX_ByteString str = ReadHexString();

+ if (m_pCryptoHandler && bDecrypt)

+ m_pCryptoHandler->Decrypt(objnum, gennum, str);

+ return new CPDF_String(str, TRUE);

+ }

+ if (word == "[") {

+ CPDF_Array* pArray = new CPDF_Array;

+ while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))

+ pArray->Add(pObj);

+ return pArray;

+ }

+ if (word[0] == '/') {

+ return new CPDF_Name(

+ PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));

+ }

+ if (word == "<<") {

+ int32_t nKeys = 0;

+ FX_FILESIZE dwSignValuePos = 0;

+ std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(

+ new CPDF_Dictionary);

+ while (1) {

+ CFX_ByteString key = GetNextWord(nullptr);

+ if (key.IsEmpty())

+ return nullptr;

+ FX_FILESIZE SavedPos = m_Pos - key.GetLength();

+ if (key == ">>")

+ break;

+ if (key == "endobj") {

+ m_Pos = SavedPos;

+ break;

+ }

+ if (key[0] != '/')

+ continue;

+ ++nKeys;

+ key = PDF_NameDecode(key);

+ if (key.IsEmpty())

+ continue;

+ if (key == "/Contents")

+ dwSignValuePos = m_Pos;

+ CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);

+ if (!pObj)

+ continue;

+ CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);

+ pDict->SetAt(keyNoSlash, pObj);

+ }

+ // Only when this is a signature dictionary and has contents, we reset the

+ // contents to the un-decrypted form.

+ if (IsSignatureDict(pDict.get()) && dwSignValuePos) {

+ CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);

+ m_Pos = dwSignValuePos;

+ pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false));

+ }

+ FX_FILESIZE SavedPos = m_Pos;

+ CFX_ByteString nextword = GetNextWord(nullptr);

+ if (nextword != "stream") {

+ m_Pos = SavedPos;

+ return pDict.release();

+ }

+ return ReadStream(pDict.release(), objnum, gennum);

+ }

+ if (word == ">>")

+ m_Pos = SavedPos;

+ return nullptr;

+CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(

+ CPDF_IndirectObjectHolder* pObjList,

+ FX_DWORD objnum,

+ FX_DWORD gennum) {

+ CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);

+ if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)

+ return nullptr;

+ FX_FILESIZE SavedPos = m_Pos;

+ bool bIsNumber;

+ CFX_ByteString word = GetNextWord(&bIsNumber);

+ if (word.GetLength() == 0)

+ return nullptr;

+ if (bIsNumber) {

+ FX_FILESIZE SavedPos = m_Pos;

+ CFX_ByteString nextword = GetNextWord(&bIsNumber);

+ if (bIsNumber) {

+ CFX_ByteString nextword2 = GetNextWord(nullptr);

+ if (nextword2 == "R")

+ return new CPDF_Reference(pObjList, FXSYS_atoui(word));

+ }

+ m_Pos = SavedPos;

+ return new CPDF_Number(word);

+ }

+ if (word == "true" || word == "false")

+ return new CPDF_Boolean(word == "true");

+ if (word == "null")

+ return new CPDF_Null;

+ if (word == "(") {

+ CFX_ByteString str = ReadString();

+ if (m_pCryptoHandler)

+ m_pCryptoHandler->Decrypt(objnum, gennum, str);

+ return new CPDF_String(str, FALSE);

+ }

+ if (word == "<") {

+ CFX_ByteString str = ReadHexString();

+ if (m_pCryptoHandler)

+ m_pCryptoHandler->Decrypt(objnum, gennum, str);

+ return new CPDF_String(str, TRUE);

+ }

+ if (word == "[") {

+ std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(

+ new CPDF_Array);

+ while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))

+ pArray->Add(pObj);

+ return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;

+ }

+ if (word[0] == '/') {

+ return new CPDF_Name(

+ PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));

+ }

+ if (word == "<<") {

+ std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(

+ new CPDF_Dictionary);

+ while (1) {

+ FX_FILESIZE SavedPos = m_Pos;

+ CFX_ByteString key = GetNextWord(nullptr);

+ if (key.IsEmpty())

+ return nullptr;

+ if (key == ">>")

+ break;

+ if (key == "endobj") {

+ m_Pos = SavedPos;

+ break;

+ }

+ if (key[0] != '/')

+ continue;

+ key = PDF_NameDecode(key);

+ std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(

+ GetObject(pObjList, objnum, gennum, true));

+ if (!obj) {

+ uint8_t ch;

+ while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {

+ continue;

+ }

+ return nullptr;

+ }

+ if (key.GetLength() > 1) {

+ pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),

+ obj.release());

+ }

+ FX_FILESIZE SavedPos = m_Pos;

+ CFX_ByteString nextword = GetNextWord(nullptr);

+ if (nextword != "stream") {

+ m_Pos = SavedPos;

+ return pDict.release();

+ }

+ return ReadStream(pDict.release(), objnum, gennum);

+ }

+ if (word == ">>")

+ m_Pos = SavedPos;

+ return nullptr;

+unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {

+ unsigned char byte1 = 0;

+ unsigned char byte2 = 0;

+ GetCharAt(pos, byte1);

+ GetCharAt(pos + 1, byte2);

+ if (byte1 == '\r' && byte2 == '\n')

+ return 2;

+ if (byte1 == '\r' || byte1 == '\n')

+ return 1;

+ return 0;

+CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,

+ FX_DWORD objnum,

+ FX_DWORD gennum) {

+ CPDF_Object* pLenObj = pDict->GetElement("Length");

+ FX_FILESIZE len = -1;

+ CPDF_Reference* pLenObjRef = ToReference(pLenObj);

+ bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&

+ pLenObjRef->GetRefObjNum() != objnum);

+ if (pLenObj && differingObjNum)

+ len = pLenObj->GetInteger();

+ // Locate the start of stream.

+ ToNextLine();

+ FX_FILESIZE streamStartPos = m_Pos;

+ const CFX_ByteStringC kEndStreamStr("endstream");

+ const CFX_ByteStringC kEndObjStr("endobj");

+ CPDF_CryptoHandler* pCryptoHandler =

+ objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();

+ if (!pCryptoHandler) {

+ FX_BOOL bSearchForKeyword = TRUE;

+ if (len >= 0) {

+ pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;

+ pos += len;

+ if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)

+ m_Pos = pos.ValueOrDie();

+ m_Pos += ReadEOLMarkers(m_Pos);

+ FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);

+ GetNextWordInternal(nullptr);

+ // Earlier version of PDF specification doesn't require EOL marker before

+ // 'endstream' keyword. If keyword 'endstream' follows the bytes in

+ // specified length, it signals the end of stream.

+ if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),

+ kEndStreamStr.GetLength()) == 0) {

+ bSearchForKeyword = FALSE;

+ }

+ if (bSearchForKeyword) {

+ // If len is not available, len needs to be calculated

+ // by searching the keywords "endstream" or "endobj".

+ m_Pos = streamStartPos;

+ FX_FILESIZE endStreamOffset = 0;

+ while (endStreamOffset >= 0) {

+ endStreamOffset = FindTag(kEndStreamStr, 0);

+ // Can't find "endstream".

+ if (endStreamOffset < 0)

+ break;

+ // Stop searching when "endstream" is found.

+ if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,

+ kEndStreamStr, TRUE)) {

+ endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();

+ break;

+ }

+ m_Pos = streamStartPos;

+ FX_FILESIZE endObjOffset = 0;

+ while (endObjOffset >= 0) {

+ endObjOffset = FindTag(kEndObjStr, 0);

+ // Can't find "endobj".

+ if (endObjOffset < 0)

+ break;

+ // Stop searching when "endobj" is found.

+ if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,

+ TRUE)) {

+ endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();

+ break;

+ }

+ // Can't find "endstream" or "endobj".

+ if (endStreamOffset < 0 && endObjOffset < 0) {

+ pDict->Release();

+ return nullptr;

+ }

+ if (endStreamOffset < 0 && endObjOffset >= 0) {

+ // Correct the position of end stream.

+ endStreamOffset = endObjOffset;

+ } else if (endStreamOffset >= 0 && endObjOffset < 0) {

+ // Correct the position of end obj.

+ endObjOffset = endStreamOffset;

+ } else if (endStreamOffset > endObjOffset) {

+ endStreamOffset = endObjOffset;

+ }

+ len = endStreamOffset;

+ int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);

+ if (numMarkers == 2) {

+ len -= 2;

+ } else {

+ numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);

+ if (numMarkers == 1) {

+ len -= 1;

+ }

+ if (len < 0) {

+ pDict->Release();

+ return nullptr;

+ }

+ pDict->SetAtInteger("Length", len);

+ }

+ m_Pos = streamStartPos;

+ }

+ if (len < 0) {

+ pDict->Release();

+ return nullptr;

+ }

+ uint8_t* pData = nullptr;

+ if (len > 0) {

+ pData = FX_Alloc(uint8_t, len);

+ ReadBlock(pData, len);

+ if (pCryptoHandler) {

+ CFX_BinaryBuf dest_buf;

+ dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));

+ void* context = pCryptoHandler->DecryptStart(objnum, gennum);

+ pCryptoHandler->DecryptStream(context, pData, len, dest_buf);

+ pCryptoHandler->DecryptFinish(context, dest_buf);

+ FX_Free(pData);

+ pData = dest_buf.GetBuffer();

+ len = dest_buf.GetSize();

+ dest_buf.DetachBuffer();

+ }

+ CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);

+ streamStartPos = m_Pos;

+ FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);

+ GetNextWordInternal(nullptr);

+ int numMarkers = ReadEOLMarkers(m_Pos);

+ if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 &&

+ FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==

+ 0) {

+ m_Pos = streamStartPos;

+ }

+ return pStream;

+void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,

+ FX_DWORD HeaderOffset) {

+ FX_Free(m_pFileBuf);

+ m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);

+ m_HeaderOffset = HeaderOffset;

+ m_FileLen = pFileAccess->GetSize();

+ m_Pos = 0;

+ m_pFileAccess = pFileAccess;

+ m_BufOffset = 0;

+ pFileAccess->ReadBlock(

+ m_pFileBuf, 0,

+ (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));

+uint32_t CPDF_SyntaxParser::GetDirectNum() {

+ bool bIsNumber;

+ GetNextWordInternal(&bIsNumber);

+ if (!bIsNumber)

+ return 0;

+ m_WordBuffer[m_WordSize] = 0;

+ return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));

+bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,

+ FX_FILESIZE limit,

+ const CFX_ByteStringC& tag,

+ FX_BOOL checkKeyword) {

+ const FX_DWORD taglen = tag.GetLength();

+ bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);

+ bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&

+ !PDFCharIsWhitespace(tag[taglen - 1]);

+ uint8_t ch;

+ if (bCheckRight && startpos + (int32_t)taglen <= limit &&

+ GetCharAt(startpos + (int32_t)taglen, ch)) {

+ if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||

+ (checkKeyword && PDFCharIsDelimiter(ch))) {

+ return false;

+ }

+ if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {

+ if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||

+ (checkKeyword && PDFCharIsDelimiter(ch))) {

+ return false;

+ }

+ return true;

+// TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards

+// and drop the bool.

+FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,

+ FX_BOOL bWholeWord,

+ FX_BOOL bForward,

+ FX_FILESIZE limit) {

+ int32_t taglen = tag.GetLength();

+ if (taglen == 0)

+ return FALSE;

+ FX_FILESIZE pos = m_Pos;

+ int32_t offset = 0;

+ if (!bForward)

+ offset = taglen - 1;

+ const uint8_t* tag_data = tag.GetPtr();

+ uint8_t byte;

+ while (1) {

+ if (bForward) {

+ if (limit && pos >= m_Pos + limit)

+ return FALSE;

+ if (!GetCharAt(pos, byte))

+ return FALSE;

+ } else {

+ if (limit && pos <= m_Pos - limit)

+ return FALSE;

+ if (!GetCharAtBackward(pos, byte))

+ return FALSE;

+ }

+ if (byte == tag_data[offset]) {

+ if (bForward) {

+ offset++;

+ if (offset < taglen) {

+ pos++;

+ continue;

+ }

+ } else {

+ offset--;

+ if (offset >= 0) {

+ pos--;

+ continue;

+ }

+ FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;

+ if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) {

+ m_Pos = startpos;

+ return TRUE;

+ }

+ if (bForward) {

+ offset = byte == tag_data[0] ? 1 : 0;

+ pos++;

+ } else {

+ offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;

+ pos--;

+ }

+ if (pos < 0)

+ return FALSE;

+ }

+ return FALSE;

+int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,

+ FX_BOOL bWholeWord,

+ FX_FILESIZE limit) {

+ int32_t ntags = 1;

+ for (int i = 0; i < tags.GetLength(); ++i) {

+ if (tags[i] == 0)

+ ++ntags;

+ }

+ std::vector<SearchTagRecord> patterns(ntags);

+ FX_DWORD start = 0;

+ FX_DWORD itag = 0;

+ FX_DWORD max_len = 0;

+ for (int i = 0; i <= tags.GetLength(); ++i) {

+ if (tags[i] == 0) {

+ FX_DWORD len = i - start;

+ max_len = std::max(len, max_len);

+ patterns[itag].m_pTag = tags.GetCStr() + start;

+ patterns[itag].m_Len = len;

+ patterns[itag].m_Offset = 0;

+ start = i + 1;

+ ++itag;

+ }

+ const FX_FILESIZE pos_limit = m_Pos + limit;

+ for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) {

+ uint8_t byte;

+ if (!GetCharAt(pos, byte))

+ break;

+ for (int i = 0; i < ntags; ++i) {

+ SearchTagRecord& pat = patterns[i];

+ if (pat.m_pTag[pat.m_Offset] != byte) {

+ pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;

+ continue;

+ }

+ ++pat.m_Offset;

+ if (pat.m_Offset != pat.m_Len)

+ continue;

+ if (!bWholeWord ||

+ IsWholeWord(pos - pat.m_Len, limit,

+ CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {

+ return i;

+ }

+ pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;

+ }

+ return -1;

+FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,

+ FX_FILESIZE limit) {

+ int32_t taglen = tag.GetLength();

+ int32_t match = 0;

+ limit += m_Pos;

+ FX_FILESIZE startpos = m_Pos;

+ while (1) {

+ uint8_t ch;

+ if (!GetNextChar(ch))

+ return -1;

+ if (ch == tag[match]) {

+ match++;

+ if (match == taglen)

+ return m_Pos - startpos - taglen;

+ } else {

+ match = ch == tag[0] ? 1 : 0;

+ }

+ if (limit && m_Pos == limit)

+ return -1;

+ }

+ return -1;

+void CPDF_SyntaxParser::SetEncrypt(

+ std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {

+ m_pCryptoHandler = std::move(pCryptoHandler);