| OLD | NEW |
| (Empty) |
| 1 // Copyright 2016 PDFium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
| 6 | |
| 7 #include "core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" | |
| 8 | |
| 9 #include <vector> | |
| 10 | |
| 11 #include "core/fpdfapi/cpdf_modulemgr.h" | |
| 12 #include "core/fpdfapi/fpdf_parser/cpdf_array.h" | |
| 13 #include "core/fpdfapi/fpdf_parser/cpdf_boolean.h" | |
| 14 #include "core/fpdfapi/fpdf_parser/cpdf_crypto_handler.h" | |
| 15 #include "core/fpdfapi/fpdf_parser/cpdf_dictionary.h" | |
| 16 #include "core/fpdfapi/fpdf_parser/cpdf_name.h" | |
| 17 #include "core/fpdfapi/fpdf_parser/cpdf_null.h" | |
| 18 #include "core/fpdfapi/fpdf_parser/cpdf_number.h" | |
| 19 #include "core/fpdfapi/fpdf_parser/cpdf_reference.h" | |
| 20 #include "core/fpdfapi/fpdf_parser/cpdf_stream.h" | |
| 21 #include "core/fpdfapi/fpdf_parser/cpdf_string.h" | |
| 22 #include "core/fpdfapi/fpdf_parser/fpdf_parser_decode.h" | |
| 23 #include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h" | |
| 24 #include "core/fxcrt/fx_ext.h" | |
| 25 #include "third_party/base/numerics/safe_math.h" | |
| 26 | |
| 27 namespace { | |
| 28 | |
| 29 struct SearchTagRecord { | |
| 30 CFX_ByteStringC m_bsTag; | |
| 31 FX_STRSIZE m_Offset; | |
| 32 }; | |
| 33 | |
| 34 } // namespace | |
| 35 | |
| 36 // static | |
| 37 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0; | |
| 38 | |
| 39 CPDF_SyntaxParser::CPDF_SyntaxParser() | |
| 40 : CPDF_SyntaxParser(CFX_WeakPtr<CFX_ByteStringPool>()) {} | |
| 41 | |
| 42 CPDF_SyntaxParser::CPDF_SyntaxParser( | |
| 43 const CFX_WeakPtr<CFX_ByteStringPool>& pPool) | |
| 44 : m_MetadataObjnum(0), | |
| 45 m_pFileAccess(nullptr), | |
| 46 m_pFileBuf(nullptr), | |
| 47 m_BufSize(CPDF_ModuleMgr::kFileBufSize), | |
| 48 m_pPool(pPool) {} | |
| 49 | |
| 50 CPDF_SyntaxParser::~CPDF_SyntaxParser() { | |
| 51 FX_Free(m_pFileBuf); | |
| 52 } | |
| 53 | |
| 54 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) { | |
| 55 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); | |
| 56 m_Pos = pos; | |
| 57 return GetNextChar(ch); | |
| 58 } | |
| 59 | |
| 60 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) { | |
| 61 FX_FILESIZE pos = m_Pos + m_HeaderOffset; | |
| 62 if (pos >= m_FileLen) | |
| 63 return FALSE; | |
| 64 | |
| 65 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { | |
| 66 FX_FILESIZE read_pos = pos; | |
| 67 uint32_t read_size = m_BufSize; | |
| 68 if ((FX_FILESIZE)read_size > m_FileLen) | |
| 69 read_size = (uint32_t)m_FileLen; | |
| 70 | |
| 71 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { | |
| 72 if (m_FileLen < (FX_FILESIZE)read_size) { | |
| 73 read_pos = 0; | |
| 74 read_size = (uint32_t)m_FileLen; | |
| 75 } else { | |
| 76 read_pos = m_FileLen - read_size; | |
| 77 } | |
| 78 } | |
| 79 | |
| 80 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) | |
| 81 return FALSE; | |
| 82 | |
| 83 m_BufOffset = read_pos; | |
| 84 } | |
| 85 ch = m_pFileBuf[pos - m_BufOffset]; | |
| 86 m_Pos++; | |
| 87 return TRUE; | |
| 88 } | |
| 89 | |
| 90 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) { | |
| 91 pos += m_HeaderOffset; | |
| 92 if (pos >= m_FileLen) | |
| 93 return FALSE; | |
| 94 | |
| 95 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { | |
| 96 FX_FILESIZE read_pos; | |
| 97 if (pos < (FX_FILESIZE)m_BufSize) | |
| 98 read_pos = 0; | |
| 99 else | |
| 100 read_pos = pos - m_BufSize + 1; | |
| 101 | |
| 102 uint32_t read_size = m_BufSize; | |
| 103 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { | |
| 104 if (m_FileLen < (FX_FILESIZE)read_size) { | |
| 105 read_pos = 0; | |
| 106 read_size = (uint32_t)m_FileLen; | |
| 107 } else { | |
| 108 read_pos = m_FileLen - read_size; | |
| 109 } | |
| 110 } | |
| 111 | |
| 112 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) | |
| 113 return FALSE; | |
| 114 | |
| 115 m_BufOffset = read_pos; | |
| 116 } | |
| 117 ch = m_pFileBuf[pos - m_BufOffset]; | |
| 118 return TRUE; | |
| 119 } | |
| 120 | |
| 121 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) { | |
| 122 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) | |
| 123 return FALSE; | |
| 124 m_Pos += size; | |
| 125 return TRUE; | |
| 126 } | |
| 127 | |
| 128 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) { | |
| 129 m_WordSize = 0; | |
| 130 if (bIsNumber) | |
| 131 *bIsNumber = true; | |
| 132 | |
| 133 uint8_t ch; | |
| 134 if (!GetNextChar(ch)) | |
| 135 return; | |
| 136 | |
| 137 while (1) { | |
| 138 while (PDFCharIsWhitespace(ch)) { | |
| 139 if (!GetNextChar(ch)) | |
| 140 return; | |
| 141 } | |
| 142 | |
| 143 if (ch != '%') | |
| 144 break; | |
| 145 | |
| 146 while (1) { | |
| 147 if (!GetNextChar(ch)) | |
| 148 return; | |
| 149 if (PDFCharIsLineEnding(ch)) | |
| 150 break; | |
| 151 } | |
| 152 } | |
| 153 | |
| 154 if (PDFCharIsDelimiter(ch)) { | |
| 155 if (bIsNumber) | |
| 156 *bIsNumber = false; | |
| 157 | |
| 158 m_WordBuffer[m_WordSize++] = ch; | |
| 159 if (ch == '/') { | |
| 160 while (1) { | |
| 161 if (!GetNextChar(ch)) | |
| 162 return; | |
| 163 | |
| 164 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { | |
| 165 m_Pos--; | |
| 166 return; | |
| 167 } | |
| 168 | |
| 169 if (m_WordSize < sizeof(m_WordBuffer) - 1) | |
| 170 m_WordBuffer[m_WordSize++] = ch; | |
| 171 } | |
| 172 } else if (ch == '<') { | |
| 173 if (!GetNextChar(ch)) | |
| 174 return; | |
| 175 | |
| 176 if (ch == '<') | |
| 177 m_WordBuffer[m_WordSize++] = ch; | |
| 178 else | |
| 179 m_Pos--; | |
| 180 } else if (ch == '>') { | |
| 181 if (!GetNextChar(ch)) | |
| 182 return; | |
| 183 | |
| 184 if (ch == '>') | |
| 185 m_WordBuffer[m_WordSize++] = ch; | |
| 186 else | |
| 187 m_Pos--; | |
| 188 } | |
| 189 return; | |
| 190 } | |
| 191 | |
| 192 while (1) { | |
| 193 if (m_WordSize < sizeof(m_WordBuffer) - 1) | |
| 194 m_WordBuffer[m_WordSize++] = ch; | |
| 195 | |
| 196 if (!PDFCharIsNumeric(ch)) { | |
| 197 if (bIsNumber) | |
| 198 *bIsNumber = false; | |
| 199 } | |
| 200 | |
| 201 if (!GetNextChar(ch)) | |
| 202 return; | |
| 203 | |
| 204 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { | |
| 205 m_Pos--; | |
| 206 break; | |
| 207 } | |
| 208 } | |
| 209 } | |
| 210 | |
| 211 CFX_ByteString CPDF_SyntaxParser::ReadString() { | |
| 212 uint8_t ch; | |
| 213 if (!GetNextChar(ch)) | |
| 214 return CFX_ByteString(); | |
| 215 | |
| 216 CFX_ByteTextBuf buf; | |
| 217 int32_t parlevel = 0; | |
| 218 int32_t status = 0; | |
| 219 int32_t iEscCode = 0; | |
| 220 while (1) { | |
| 221 switch (status) { | |
| 222 case 0: | |
| 223 if (ch == ')') { | |
| 224 if (parlevel == 0) { | |
| 225 return buf.MakeString(); | |
| 226 } | |
| 227 parlevel--; | |
| 228 buf.AppendChar(')'); | |
| 229 } else if (ch == '(') { | |
| 230 parlevel++; | |
| 231 buf.AppendChar('('); | |
| 232 } else if (ch == '\\') { | |
| 233 status = 1; | |
| 234 } else { | |
| 235 buf.AppendChar(ch); | |
| 236 } | |
| 237 break; | |
| 238 case 1: | |
| 239 if (ch >= '0' && ch <= '7') { | |
| 240 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
| 241 status = 2; | |
| 242 break; | |
| 243 } | |
| 244 | |
| 245 if (ch == 'n') { | |
| 246 buf.AppendChar('\n'); | |
| 247 } else if (ch == 'r') { | |
| 248 buf.AppendChar('\r'); | |
| 249 } else if (ch == 't') { | |
| 250 buf.AppendChar('\t'); | |
| 251 } else if (ch == 'b') { | |
| 252 buf.AppendChar('\b'); | |
| 253 } else if (ch == 'f') { | |
| 254 buf.AppendChar('\f'); | |
| 255 } else if (ch == '\r') { | |
| 256 status = 4; | |
| 257 break; | |
| 258 } else if (ch != '\n') { | |
| 259 buf.AppendChar(ch); | |
| 260 } | |
| 261 status = 0; | |
| 262 break; | |
| 263 case 2: | |
| 264 if (ch >= '0' && ch <= '7') { | |
| 265 iEscCode = | |
| 266 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
| 267 status = 3; | |
| 268 } else { | |
| 269 buf.AppendChar(iEscCode); | |
| 270 status = 0; | |
| 271 continue; | |
| 272 } | |
| 273 break; | |
| 274 case 3: | |
| 275 if (ch >= '0' && ch <= '7') { | |
| 276 iEscCode = | |
| 277 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
| 278 buf.AppendChar(iEscCode); | |
| 279 status = 0; | |
| 280 } else { | |
| 281 buf.AppendChar(iEscCode); | |
| 282 status = 0; | |
| 283 continue; | |
| 284 } | |
| 285 break; | |
| 286 case 4: | |
| 287 status = 0; | |
| 288 if (ch != '\n') | |
| 289 continue; | |
| 290 break; | |
| 291 } | |
| 292 | |
| 293 if (!GetNextChar(ch)) | |
| 294 break; | |
| 295 } | |
| 296 | |
| 297 GetNextChar(ch); | |
| 298 return buf.MakeString(); | |
| 299 } | |
| 300 | |
| 301 CFX_ByteString CPDF_SyntaxParser::ReadHexString() { | |
| 302 uint8_t ch; | |
| 303 if (!GetNextChar(ch)) | |
| 304 return CFX_ByteString(); | |
| 305 | |
| 306 CFX_ByteTextBuf buf; | |
| 307 bool bFirst = true; | |
| 308 uint8_t code = 0; | |
| 309 while (1) { | |
| 310 if (ch == '>') | |
| 311 break; | |
| 312 | |
| 313 if (std::isxdigit(ch)) { | |
| 314 int val = FXSYS_toHexDigit(ch); | |
| 315 if (bFirst) { | |
| 316 code = val * 16; | |
| 317 } else { | |
| 318 code += val; | |
| 319 buf.AppendByte(code); | |
| 320 } | |
| 321 bFirst = !bFirst; | |
| 322 } | |
| 323 | |
| 324 if (!GetNextChar(ch)) | |
| 325 break; | |
| 326 } | |
| 327 if (!bFirst) | |
| 328 buf.AppendByte(code); | |
| 329 | |
| 330 return buf.MakeString(); | |
| 331 } | |
| 332 | |
| 333 void CPDF_SyntaxParser::ToNextLine() { | |
| 334 uint8_t ch; | |
| 335 while (GetNextChar(ch)) { | |
| 336 if (ch == '\n') | |
| 337 break; | |
| 338 | |
| 339 if (ch == '\r') { | |
| 340 GetNextChar(ch); | |
| 341 if (ch != '\n') | |
| 342 --m_Pos; | |
| 343 break; | |
| 344 } | |
| 345 } | |
| 346 } | |
| 347 | |
| 348 void CPDF_SyntaxParser::ToNextWord() { | |
| 349 uint8_t ch; | |
| 350 if (!GetNextChar(ch)) | |
| 351 return; | |
| 352 | |
| 353 while (1) { | |
| 354 while (PDFCharIsWhitespace(ch)) { | |
| 355 if (!GetNextChar(ch)) | |
| 356 return; | |
| 357 } | |
| 358 | |
| 359 if (ch != '%') | |
| 360 break; | |
| 361 | |
| 362 while (1) { | |
| 363 if (!GetNextChar(ch)) | |
| 364 return; | |
| 365 if (PDFCharIsLineEnding(ch)) | |
| 366 break; | |
| 367 } | |
| 368 } | |
| 369 m_Pos--; | |
| 370 } | |
| 371 | |
| 372 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) { | |
| 373 GetNextWordInternal(bIsNumber); | |
| 374 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize); | |
| 375 } | |
| 376 | |
| 377 CFX_ByteString CPDF_SyntaxParser::GetKeyword() { | |
| 378 return GetNextWord(nullptr); | |
| 379 } | |
| 380 | |
| 381 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList, | |
| 382 uint32_t objnum, | |
| 383 uint32_t gennum, | |
| 384 FX_BOOL bDecrypt) { | |
| 385 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); | |
| 386 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) | |
| 387 return nullptr; | |
| 388 | |
| 389 FX_FILESIZE SavedObjPos = m_Pos; | |
| 390 bool bIsNumber; | |
| 391 CFX_ByteString word = GetNextWord(&bIsNumber); | |
| 392 if (word.GetLength() == 0) | |
| 393 return nullptr; | |
| 394 | |
| 395 if (bIsNumber) { | |
| 396 FX_FILESIZE SavedPos = m_Pos; | |
| 397 CFX_ByteString nextword = GetNextWord(&bIsNumber); | |
| 398 if (bIsNumber) { | |
| 399 CFX_ByteString nextword2 = GetNextWord(nullptr); | |
| 400 if (nextword2 == "R") | |
| 401 return new CPDF_Reference(pObjList, FXSYS_atoui(word.c_str())); | |
| 402 } | |
| 403 m_Pos = SavedPos; | |
| 404 return new CPDF_Number(word.AsStringC()); | |
| 405 } | |
| 406 | |
| 407 if (word == "true" || word == "false") | |
| 408 return new CPDF_Boolean(word == "true"); | |
| 409 | |
| 410 if (word == "null") | |
| 411 return new CPDF_Null; | |
| 412 | |
| 413 if (word == "(") { | |
| 414 CFX_ByteString str = ReadString(); | |
| 415 if (m_pCryptoHandler && bDecrypt) | |
| 416 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
| 417 return new CPDF_String(MaybeIntern(str), FALSE); | |
| 418 } | |
| 419 | |
| 420 if (word == "<") { | |
| 421 CFX_ByteString str = ReadHexString(); | |
| 422 if (m_pCryptoHandler && bDecrypt) | |
| 423 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
| 424 return new CPDF_String(MaybeIntern(str), TRUE); | |
| 425 } | |
| 426 | |
| 427 if (word == "[") { | |
| 428 CPDF_Array* pArray = new CPDF_Array; | |
| 429 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) | |
| 430 pArray->Add(pObj); | |
| 431 | |
| 432 return pArray; | |
| 433 } | |
| 434 | |
| 435 if (word[0] == '/') { | |
| 436 return new CPDF_Name(MaybeIntern( | |
| 437 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)))); | |
| 438 } | |
| 439 | |
| 440 if (word == "<<") { | |
| 441 int32_t nKeys = 0; | |
| 442 FX_FILESIZE dwSignValuePos = 0; | |
| 443 | |
| 444 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( | |
| 445 new CPDF_Dictionary(m_pPool)); | |
| 446 while (1) { | |
| 447 CFX_ByteString key = GetNextWord(nullptr); | |
| 448 if (key.IsEmpty()) | |
| 449 return nullptr; | |
| 450 | |
| 451 FX_FILESIZE SavedPos = m_Pos - key.GetLength(); | |
| 452 if (key == ">>") | |
| 453 break; | |
| 454 | |
| 455 if (key == "endobj") { | |
| 456 m_Pos = SavedPos; | |
| 457 break; | |
| 458 } | |
| 459 | |
| 460 if (key[0] != '/') | |
| 461 continue; | |
| 462 | |
| 463 ++nKeys; | |
| 464 key = PDF_NameDecode(key); | |
| 465 if (key.IsEmpty()) | |
| 466 continue; | |
| 467 | |
| 468 if (key == "/Contents") | |
| 469 dwSignValuePos = m_Pos; | |
| 470 | |
| 471 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true); | |
| 472 if (!pObj) | |
| 473 continue; | |
| 474 | |
| 475 CFX_ByteString keyNoSlash(key.raw_str() + 1, key.GetLength() - 1); | |
| 476 pDict->SetFor(keyNoSlash, pObj); | |
| 477 } | |
| 478 | |
| 479 // Only when this is a signature dictionary and has contents, we reset the | |
| 480 // contents to the un-decrypted form. | |
| 481 if (pDict->IsSignatureDict() && dwSignValuePos) { | |
| 482 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); | |
| 483 m_Pos = dwSignValuePos; | |
| 484 pDict->SetFor("Contents", GetObject(pObjList, objnum, gennum, false)); | |
| 485 } | |
| 486 | |
| 487 FX_FILESIZE SavedPos = m_Pos; | |
| 488 CFX_ByteString nextword = GetNextWord(nullptr); | |
| 489 if (nextword != "stream") { | |
| 490 m_Pos = SavedPos; | |
| 491 return pDict.release(); | |
| 492 } | |
| 493 return ReadStream(pDict.release(), objnum, gennum); | |
| 494 } | |
| 495 | |
| 496 if (word == ">>") | |
| 497 m_Pos = SavedObjPos; | |
| 498 | |
| 499 return nullptr; | |
| 500 } | |
| 501 | |
| 502 CPDF_Object* CPDF_SyntaxParser::GetObjectForStrict( | |
| 503 CPDF_IndirectObjectHolder* pObjList, | |
| 504 uint32_t objnum, | |
| 505 uint32_t gennum) { | |
| 506 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); | |
| 507 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) | |
| 508 return nullptr; | |
| 509 | |
| 510 FX_FILESIZE SavedObjPos = m_Pos; | |
| 511 bool bIsNumber; | |
| 512 CFX_ByteString word = GetNextWord(&bIsNumber); | |
| 513 if (word.GetLength() == 0) | |
| 514 return nullptr; | |
| 515 | |
| 516 if (bIsNumber) { | |
| 517 FX_FILESIZE SavedPos = m_Pos; | |
| 518 CFX_ByteString nextword = GetNextWord(&bIsNumber); | |
| 519 if (bIsNumber) { | |
| 520 CFX_ByteString nextword2 = GetNextWord(nullptr); | |
| 521 if (nextword2 == "R") | |
| 522 return new CPDF_Reference(pObjList, FXSYS_atoui(word.c_str())); | |
| 523 } | |
| 524 m_Pos = SavedPos; | |
| 525 return new CPDF_Number(word.AsStringC()); | |
| 526 } | |
| 527 | |
| 528 if (word == "true" || word == "false") | |
| 529 return new CPDF_Boolean(word == "true"); | |
| 530 | |
| 531 if (word == "null") | |
| 532 return new CPDF_Null; | |
| 533 | |
| 534 if (word == "(") { | |
| 535 CFX_ByteString str = ReadString(); | |
| 536 if (m_pCryptoHandler) | |
| 537 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
| 538 return new CPDF_String(MaybeIntern(str), FALSE); | |
| 539 } | |
| 540 | |
| 541 if (word == "<") { | |
| 542 CFX_ByteString str = ReadHexString(); | |
| 543 if (m_pCryptoHandler) | |
| 544 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
| 545 return new CPDF_String(MaybeIntern(str), TRUE); | |
| 546 } | |
| 547 | |
| 548 if (word == "[") { | |
| 549 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray( | |
| 550 new CPDF_Array); | |
| 551 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) | |
| 552 pArray->Add(pObj); | |
| 553 | |
| 554 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr; | |
| 555 } | |
| 556 | |
| 557 if (word[0] == '/') { | |
| 558 return new CPDF_Name(MaybeIntern( | |
| 559 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)))); | |
| 560 } | |
| 561 | |
| 562 if (word == "<<") { | |
| 563 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( | |
| 564 new CPDF_Dictionary(m_pPool)); | |
| 565 while (1) { | |
| 566 FX_FILESIZE SavedPos = m_Pos; | |
| 567 CFX_ByteString key = GetNextWord(nullptr); | |
| 568 if (key.IsEmpty()) | |
| 569 return nullptr; | |
| 570 | |
| 571 if (key == ">>") | |
| 572 break; | |
| 573 | |
| 574 if (key == "endobj") { | |
| 575 m_Pos = SavedPos; | |
| 576 break; | |
| 577 } | |
| 578 | |
| 579 if (key[0] != '/') | |
| 580 continue; | |
| 581 | |
| 582 key = PDF_NameDecode(key); | |
| 583 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj( | |
| 584 GetObject(pObjList, objnum, gennum, true)); | |
| 585 if (!obj) { | |
| 586 uint8_t ch; | |
| 587 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) { | |
| 588 continue; | |
| 589 } | |
| 590 return nullptr; | |
| 591 } | |
| 592 | |
| 593 if (key.GetLength() > 1) { | |
| 594 pDict->SetFor(CFX_ByteString(key.c_str() + 1, key.GetLength() - 1), | |
| 595 obj.release()); | |
| 596 } | |
| 597 } | |
| 598 | |
| 599 FX_FILESIZE SavedPos = m_Pos; | |
| 600 CFX_ByteString nextword = GetNextWord(nullptr); | |
| 601 if (nextword != "stream") { | |
| 602 m_Pos = SavedPos; | |
| 603 return pDict.release(); | |
| 604 } | |
| 605 | |
| 606 return ReadStream(pDict.release(), objnum, gennum); | |
| 607 } | |
| 608 | |
| 609 if (word == ">>") | |
| 610 m_Pos = SavedObjPos; | |
| 611 | |
| 612 return nullptr; | |
| 613 } | |
| 614 | |
| 615 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) { | |
| 616 unsigned char byte1 = 0; | |
| 617 unsigned char byte2 = 0; | |
| 618 | |
| 619 GetCharAt(pos, byte1); | |
| 620 GetCharAt(pos + 1, byte2); | |
| 621 | |
| 622 if (byte1 == '\r' && byte2 == '\n') | |
| 623 return 2; | |
| 624 | |
| 625 if (byte1 == '\r' || byte1 == '\n') | |
| 626 return 1; | |
| 627 | |
| 628 return 0; | |
| 629 } | |
| 630 | |
| 631 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, | |
| 632 uint32_t objnum, | |
| 633 uint32_t gennum) { | |
| 634 CPDF_Object* pLenObj = pDict->GetObjectFor("Length"); | |
| 635 FX_FILESIZE len = -1; | |
| 636 CPDF_Reference* pLenObjRef = ToReference(pLenObj); | |
| 637 | |
| 638 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() && | |
| 639 pLenObjRef->GetRefObjNum() != objnum); | |
| 640 if (pLenObj && differingObjNum) | |
| 641 len = pLenObj->GetInteger(); | |
| 642 | |
| 643 // Locate the start of stream. | |
| 644 ToNextLine(); | |
| 645 FX_FILESIZE streamStartPos = m_Pos; | |
| 646 | |
| 647 const CFX_ByteStringC kEndStreamStr("endstream"); | |
| 648 const CFX_ByteStringC kEndObjStr("endobj"); | |
| 649 | |
| 650 CPDF_CryptoHandler* pCryptoHandler = | |
| 651 objnum == (uint32_t)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get(); | |
| 652 if (!pCryptoHandler) { | |
| 653 FX_BOOL bSearchForKeyword = TRUE; | |
| 654 if (len >= 0) { | |
| 655 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos; | |
| 656 pos += len; | |
| 657 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) | |
| 658 m_Pos = pos.ValueOrDie(); | |
| 659 | |
| 660 m_Pos += ReadEOLMarkers(m_Pos); | |
| 661 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1); | |
| 662 GetNextWordInternal(nullptr); | |
| 663 // Earlier version of PDF specification doesn't require EOL marker before | |
| 664 // 'endstream' keyword. If keyword 'endstream' follows the bytes in | |
| 665 // specified length, it signals the end of stream. | |
| 666 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.raw_str(), | |
| 667 kEndStreamStr.GetLength()) == 0) { | |
| 668 bSearchForKeyword = FALSE; | |
| 669 } | |
| 670 } | |
| 671 | |
| 672 if (bSearchForKeyword) { | |
| 673 // If len is not available, len needs to be calculated | |
| 674 // by searching the keywords "endstream" or "endobj". | |
| 675 m_Pos = streamStartPos; | |
| 676 FX_FILESIZE endStreamOffset = 0; | |
| 677 while (endStreamOffset >= 0) { | |
| 678 endStreamOffset = FindTag(kEndStreamStr, 0); | |
| 679 | |
| 680 // Can't find "endstream". | |
| 681 if (endStreamOffset < 0) | |
| 682 break; | |
| 683 | |
| 684 // Stop searching when "endstream" is found. | |
| 685 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen, | |
| 686 kEndStreamStr, TRUE)) { | |
| 687 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength(); | |
| 688 break; | |
| 689 } | |
| 690 } | |
| 691 | |
| 692 m_Pos = streamStartPos; | |
| 693 FX_FILESIZE endObjOffset = 0; | |
| 694 while (endObjOffset >= 0) { | |
| 695 endObjOffset = FindTag(kEndObjStr, 0); | |
| 696 | |
| 697 // Can't find "endobj". | |
| 698 if (endObjOffset < 0) | |
| 699 break; | |
| 700 | |
| 701 // Stop searching when "endobj" is found. | |
| 702 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr, | |
| 703 TRUE)) { | |
| 704 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength(); | |
| 705 break; | |
| 706 } | |
| 707 } | |
| 708 | |
| 709 // Can't find "endstream" or "endobj". | |
| 710 if (endStreamOffset < 0 && endObjOffset < 0) { | |
| 711 pDict->Release(); | |
| 712 return nullptr; | |
| 713 } | |
| 714 | |
| 715 if (endStreamOffset < 0 && endObjOffset >= 0) { | |
| 716 // Correct the position of end stream. | |
| 717 endStreamOffset = endObjOffset; | |
| 718 } else if (endStreamOffset >= 0 && endObjOffset < 0) { | |
| 719 // Correct the position of end obj. | |
| 720 endObjOffset = endStreamOffset; | |
| 721 } else if (endStreamOffset > endObjOffset) { | |
| 722 endStreamOffset = endObjOffset; | |
| 723 } | |
| 724 | |
| 725 len = endStreamOffset; | |
| 726 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); | |
| 727 if (numMarkers == 2) { | |
| 728 len -= 2; | |
| 729 } else { | |
| 730 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); | |
| 731 if (numMarkers == 1) { | |
| 732 len -= 1; | |
| 733 } | |
| 734 } | |
| 735 | |
| 736 if (len < 0) { | |
| 737 pDict->Release(); | |
| 738 return nullptr; | |
| 739 } | |
| 740 pDict->SetIntegerFor("Length", len); | |
| 741 } | |
| 742 m_Pos = streamStartPos; | |
| 743 } | |
| 744 | |
| 745 if (len < 0) { | |
| 746 pDict->Release(); | |
| 747 return nullptr; | |
| 748 } | |
| 749 | |
| 750 uint8_t* pData = nullptr; | |
| 751 if (len > 0) { | |
| 752 pData = FX_Alloc(uint8_t, len); | |
| 753 ReadBlock(pData, len); | |
| 754 if (pCryptoHandler) { | |
| 755 CFX_BinaryBuf dest_buf; | |
| 756 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len)); | |
| 757 | |
| 758 void* context = pCryptoHandler->DecryptStart(objnum, gennum); | |
| 759 pCryptoHandler->DecryptStream(context, pData, len, dest_buf); | |
| 760 pCryptoHandler->DecryptFinish(context, dest_buf); | |
| 761 | |
| 762 FX_Free(pData); | |
| 763 pData = dest_buf.GetBuffer(); | |
| 764 len = dest_buf.GetSize(); | |
| 765 dest_buf.DetachBuffer(); | |
| 766 } | |
| 767 } | |
| 768 | |
| 769 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict); | |
| 770 streamStartPos = m_Pos; | |
| 771 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1); | |
| 772 | |
| 773 GetNextWordInternal(nullptr); | |
| 774 | |
| 775 int numMarkers = ReadEOLMarkers(m_Pos); | |
| 776 if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) && | |
| 777 numMarkers != 0 && | |
| 778 FXSYS_memcmp(m_WordBuffer, kEndObjStr.raw_str(), | |
| 779 kEndObjStr.GetLength()) == 0) { | |
| 780 m_Pos = streamStartPos; | |
| 781 } | |
| 782 return pStream; | |
| 783 } | |
| 784 | |
| 785 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess, | |
| 786 uint32_t HeaderOffset) { | |
| 787 FX_Free(m_pFileBuf); | |
| 788 | |
| 789 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize); | |
| 790 m_HeaderOffset = HeaderOffset; | |
| 791 m_FileLen = pFileAccess->GetSize(); | |
| 792 m_Pos = 0; | |
| 793 m_pFileAccess = pFileAccess; | |
| 794 m_BufOffset = 0; | |
| 795 pFileAccess->ReadBlock( | |
| 796 m_pFileBuf, 0, | |
| 797 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize)); | |
| 798 } | |
| 799 | |
| 800 uint32_t CPDF_SyntaxParser::GetDirectNum() { | |
| 801 bool bIsNumber; | |
| 802 GetNextWordInternal(&bIsNumber); | |
| 803 if (!bIsNumber) | |
| 804 return 0; | |
| 805 | |
| 806 m_WordBuffer[m_WordSize] = 0; | |
| 807 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer)); | |
| 808 } | |
| 809 | |
| 810 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, | |
| 811 FX_FILESIZE limit, | |
| 812 const CFX_ByteStringC& tag, | |
| 813 FX_BOOL checkKeyword) { | |
| 814 const uint32_t taglen = tag.GetLength(); | |
| 815 | |
| 816 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]); | |
| 817 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) && | |
| 818 !PDFCharIsWhitespace(tag[taglen - 1]); | |
| 819 | |
| 820 uint8_t ch; | |
| 821 if (bCheckRight && startpos + (int32_t)taglen <= limit && | |
| 822 GetCharAt(startpos + (int32_t)taglen, ch)) { | |
| 823 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || | |
| 824 (checkKeyword && PDFCharIsDelimiter(ch))) { | |
| 825 return false; | |
| 826 } | |
| 827 } | |
| 828 | |
| 829 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { | |
| 830 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || | |
| 831 (checkKeyword && PDFCharIsDelimiter(ch))) { | |
| 832 return false; | |
| 833 } | |
| 834 } | |
| 835 return true; | |
| 836 } | |
| 837 | |
| 838 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards | |
| 839 // and drop the bool. | |
| 840 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag, | |
| 841 FX_BOOL bWholeWord, | |
| 842 FX_BOOL bForward, | |
| 843 FX_FILESIZE limit) { | |
| 844 int32_t taglen = tag.GetLength(); | |
| 845 if (taglen == 0) | |
| 846 return FALSE; | |
| 847 | |
| 848 FX_FILESIZE pos = m_Pos; | |
| 849 int32_t offset = 0; | |
| 850 if (!bForward) | |
| 851 offset = taglen - 1; | |
| 852 | |
| 853 const uint8_t* tag_data = tag.raw_str(); | |
| 854 uint8_t byte; | |
| 855 while (1) { | |
| 856 if (bForward) { | |
| 857 if (limit && pos >= m_Pos + limit) | |
| 858 return FALSE; | |
| 859 | |
| 860 if (!GetCharAt(pos, byte)) | |
| 861 return FALSE; | |
| 862 | |
| 863 } else { | |
| 864 if (limit && pos <= m_Pos - limit) | |
| 865 return FALSE; | |
| 866 | |
| 867 if (!GetCharAtBackward(pos, byte)) | |
| 868 return FALSE; | |
| 869 } | |
| 870 | |
| 871 if (byte == tag_data[offset]) { | |
| 872 if (bForward) { | |
| 873 offset++; | |
| 874 if (offset < taglen) { | |
| 875 pos++; | |
| 876 continue; | |
| 877 } | |
| 878 } else { | |
| 879 offset--; | |
| 880 if (offset >= 0) { | |
| 881 pos--; | |
| 882 continue; | |
| 883 } | |
| 884 } | |
| 885 | |
| 886 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos; | |
| 887 if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) { | |
| 888 m_Pos = startpos; | |
| 889 return TRUE; | |
| 890 } | |
| 891 } | |
| 892 | |
| 893 if (bForward) { | |
| 894 offset = byte == tag_data[0] ? 1 : 0; | |
| 895 pos++; | |
| 896 } else { | |
| 897 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1; | |
| 898 pos--; | |
| 899 } | |
| 900 | |
| 901 if (pos < 0) | |
| 902 return FALSE; | |
| 903 } | |
| 904 | |
| 905 return FALSE; | |
| 906 } | |
| 907 | |
| 908 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags, | |
| 909 FX_BOOL bWholeWord, | |
| 910 FX_FILESIZE limit) { | |
| 911 int32_t ntags = 1; | |
| 912 for (int i = 0; i < tags.GetLength(); ++i) { | |
| 913 if (tags[i] == 0) | |
| 914 ++ntags; | |
| 915 } | |
| 916 | |
| 917 // Ensure that the input byte string happens to be nul-terminated. This | |
| 918 // need not be the case, but the loop below uses this guarantee to put | |
| 919 // the last pattern into the vector. | |
| 920 ASSERT(tags[tags.GetLength()] == 0); | |
| 921 std::vector<SearchTagRecord> patterns(ntags); | |
| 922 uint32_t start = 0; | |
| 923 uint32_t itag = 0; | |
| 924 uint32_t max_len = 0; | |
| 925 for (int i = 0; i <= tags.GetLength(); ++i) { | |
| 926 if (tags[i] == 0) { | |
| 927 uint32_t len = i - start; | |
| 928 max_len = std::max(len, max_len); | |
| 929 patterns[itag].m_bsTag = tags.Mid(start, len); | |
| 930 patterns[itag].m_Offset = 0; | |
| 931 start = i + 1; | |
| 932 ++itag; | |
| 933 } | |
| 934 } | |
| 935 | |
| 936 const FX_FILESIZE pos_limit = m_Pos + limit; | |
| 937 for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) { | |
| 938 uint8_t byte; | |
| 939 if (!GetCharAt(pos, byte)) | |
| 940 break; | |
| 941 | |
| 942 for (int i = 0; i < ntags; ++i) { | |
| 943 SearchTagRecord& pat = patterns[i]; | |
| 944 if (pat.m_bsTag[pat.m_Offset] != byte) { | |
| 945 pat.m_Offset = (pat.m_bsTag[0] == byte) ? 1 : 0; | |
| 946 continue; | |
| 947 } | |
| 948 | |
| 949 ++pat.m_Offset; | |
| 950 if (pat.m_Offset != pat.m_bsTag.GetLength()) | |
| 951 continue; | |
| 952 | |
| 953 if (!bWholeWord || IsWholeWord(pos - pat.m_bsTag.GetLength(), limit, | |
| 954 pat.m_bsTag, FALSE)) { | |
| 955 return i; | |
| 956 } | |
| 957 | |
| 958 pat.m_Offset = (pat.m_bsTag[0] == byte) ? 1 : 0; | |
| 959 } | |
| 960 } | |
| 961 return -1; | |
| 962 } | |
| 963 | |
| 964 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag, | |
| 965 FX_FILESIZE limit) { | |
| 966 int32_t taglen = tag.GetLength(); | |
| 967 int32_t match = 0; | |
| 968 limit += m_Pos; | |
| 969 FX_FILESIZE startpos = m_Pos; | |
| 970 | |
| 971 while (1) { | |
| 972 uint8_t ch; | |
| 973 if (!GetNextChar(ch)) | |
| 974 return -1; | |
| 975 | |
| 976 if (ch == tag[match]) { | |
| 977 match++; | |
| 978 if (match == taglen) | |
| 979 return m_Pos - startpos - taglen; | |
| 980 } else { | |
| 981 match = ch == tag[0] ? 1 : 0; | |
| 982 } | |
| 983 | |
| 984 if (limit && m_Pos == limit) | |
| 985 return -1; | |
| 986 } | |
| 987 return -1; | |
| 988 } | |
| 989 | |
| 990 void CPDF_SyntaxParser::SetEncrypt( | |
| 991 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) { | |
| 992 m_pCryptoHandler = std::move(pCryptoHandler); | |
| 993 } | |
| 994 | |
| 995 CFX_ByteString CPDF_SyntaxParser::MaybeIntern(const CFX_ByteString& str) { | |
| 996 return m_pPool ? m_pPool->Intern(str) : str; | |
| 997 } | |
| OLD | NEW |