OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 |
| 7 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" |
| 8 |
| 9 #include <vector> |
| 10 |
| 11 #include "core/include/fpdfapi/fpdf_module.h" |
| 12 #include "core/include/fpdfapi/fpdf_parser.h" |
| 13 #include "core/include/fxcrt/fx_ext.h" |
| 14 #include "third_party/base/numerics/safe_math.h" |
| 15 |
| 16 namespace { |
| 17 |
| 18 struct SearchTagRecord { |
| 19 const char* m_pTag; |
| 20 FX_DWORD m_Len; |
| 21 FX_DWORD m_Offset; |
| 22 }; |
| 23 |
| 24 } // namespace |
| 25 |
| 26 // static |
| 27 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0; |
| 28 |
| 29 CPDF_SyntaxParser::CPDF_SyntaxParser() |
| 30 : m_MetadataObjnum(0), |
| 31 m_pFileAccess(nullptr), |
| 32 m_pFileBuf(nullptr), |
| 33 m_BufSize(CPDF_ModuleMgr::kFileBufSize) {} |
| 34 |
| 35 CPDF_SyntaxParser::~CPDF_SyntaxParser() { |
| 36 FX_Free(m_pFileBuf); |
| 37 } |
| 38 |
| 39 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) { |
| 40 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); |
| 41 m_Pos = pos; |
| 42 return GetNextChar(ch); |
| 43 } |
| 44 |
| 45 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) { |
| 46 FX_FILESIZE pos = m_Pos + m_HeaderOffset; |
| 47 if (pos >= m_FileLen) |
| 48 return FALSE; |
| 49 |
| 50 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { |
| 51 FX_FILESIZE read_pos = pos; |
| 52 FX_DWORD read_size = m_BufSize; |
| 53 if ((FX_FILESIZE)read_size > m_FileLen) |
| 54 read_size = (FX_DWORD)m_FileLen; |
| 55 |
| 56 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { |
| 57 if (m_FileLen < (FX_FILESIZE)read_size) { |
| 58 read_pos = 0; |
| 59 read_size = (FX_DWORD)m_FileLen; |
| 60 } else { |
| 61 read_pos = m_FileLen - read_size; |
| 62 } |
| 63 } |
| 64 |
| 65 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) |
| 66 return FALSE; |
| 67 |
| 68 m_BufOffset = read_pos; |
| 69 } |
| 70 ch = m_pFileBuf[pos - m_BufOffset]; |
| 71 m_Pos++; |
| 72 return TRUE; |
| 73 } |
| 74 |
| 75 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) { |
| 76 pos += m_HeaderOffset; |
| 77 if (pos >= m_FileLen) |
| 78 return FALSE; |
| 79 |
| 80 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { |
| 81 FX_FILESIZE read_pos; |
| 82 if (pos < (FX_FILESIZE)m_BufSize) |
| 83 read_pos = 0; |
| 84 else |
| 85 read_pos = pos - m_BufSize + 1; |
| 86 |
| 87 FX_DWORD read_size = m_BufSize; |
| 88 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { |
| 89 if (m_FileLen < (FX_FILESIZE)read_size) { |
| 90 read_pos = 0; |
| 91 read_size = (FX_DWORD)m_FileLen; |
| 92 } else { |
| 93 read_pos = m_FileLen - read_size; |
| 94 } |
| 95 } |
| 96 |
| 97 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) |
| 98 return FALSE; |
| 99 |
| 100 m_BufOffset = read_pos; |
| 101 } |
| 102 ch = m_pFileBuf[pos - m_BufOffset]; |
| 103 return TRUE; |
| 104 } |
| 105 |
| 106 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) { |
| 107 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) |
| 108 return FALSE; |
| 109 m_Pos += size; |
| 110 return TRUE; |
| 111 } |
| 112 |
| 113 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) { |
| 114 m_WordSize = 0; |
| 115 if (bIsNumber) |
| 116 *bIsNumber = true; |
| 117 |
| 118 uint8_t ch; |
| 119 if (!GetNextChar(ch)) |
| 120 return; |
| 121 |
| 122 while (1) { |
| 123 while (PDFCharIsWhitespace(ch)) { |
| 124 if (!GetNextChar(ch)) |
| 125 return; |
| 126 } |
| 127 |
| 128 if (ch != '%') |
| 129 break; |
| 130 |
| 131 while (1) { |
| 132 if (!GetNextChar(ch)) |
| 133 return; |
| 134 if (PDFCharIsLineEnding(ch)) |
| 135 break; |
| 136 } |
| 137 } |
| 138 |
| 139 if (PDFCharIsDelimiter(ch)) { |
| 140 if (bIsNumber) |
| 141 *bIsNumber = false; |
| 142 |
| 143 m_WordBuffer[m_WordSize++] = ch; |
| 144 if (ch == '/') { |
| 145 while (1) { |
| 146 if (!GetNextChar(ch)) |
| 147 return; |
| 148 |
| 149 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { |
| 150 m_Pos--; |
| 151 return; |
| 152 } |
| 153 |
| 154 if (m_WordSize < sizeof(m_WordBuffer) - 1) |
| 155 m_WordBuffer[m_WordSize++] = ch; |
| 156 } |
| 157 } else if (ch == '<') { |
| 158 if (!GetNextChar(ch)) |
| 159 return; |
| 160 |
| 161 if (ch == '<') |
| 162 m_WordBuffer[m_WordSize++] = ch; |
| 163 else |
| 164 m_Pos--; |
| 165 } else if (ch == '>') { |
| 166 if (!GetNextChar(ch)) |
| 167 return; |
| 168 |
| 169 if (ch == '>') |
| 170 m_WordBuffer[m_WordSize++] = ch; |
| 171 else |
| 172 m_Pos--; |
| 173 } |
| 174 return; |
| 175 } |
| 176 |
| 177 while (1) { |
| 178 if (m_WordSize < sizeof(m_WordBuffer) - 1) |
| 179 m_WordBuffer[m_WordSize++] = ch; |
| 180 |
| 181 if (!PDFCharIsNumeric(ch)) { |
| 182 if (bIsNumber) |
| 183 *bIsNumber = false; |
| 184 } |
| 185 |
| 186 if (!GetNextChar(ch)) |
| 187 return; |
| 188 |
| 189 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { |
| 190 m_Pos--; |
| 191 break; |
| 192 } |
| 193 } |
| 194 } |
| 195 |
| 196 CFX_ByteString CPDF_SyntaxParser::ReadString() { |
| 197 uint8_t ch; |
| 198 if (!GetNextChar(ch)) |
| 199 return CFX_ByteString(); |
| 200 |
| 201 CFX_ByteTextBuf buf; |
| 202 int32_t parlevel = 0; |
| 203 int32_t status = 0; |
| 204 int32_t iEscCode = 0; |
| 205 while (1) { |
| 206 switch (status) { |
| 207 case 0: |
| 208 if (ch == ')') { |
| 209 if (parlevel == 0) { |
| 210 return buf.GetByteString(); |
| 211 } |
| 212 parlevel--; |
| 213 buf.AppendChar(')'); |
| 214 } else if (ch == '(') { |
| 215 parlevel++; |
| 216 buf.AppendChar('('); |
| 217 } else if (ch == '\\') { |
| 218 status = 1; |
| 219 } else { |
| 220 buf.AppendChar(ch); |
| 221 } |
| 222 break; |
| 223 case 1: |
| 224 if (ch >= '0' && ch <= '7') { |
| 225 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); |
| 226 status = 2; |
| 227 break; |
| 228 } |
| 229 |
| 230 if (ch == 'n') { |
| 231 buf.AppendChar('\n'); |
| 232 } else if (ch == 'r') { |
| 233 buf.AppendChar('\r'); |
| 234 } else if (ch == 't') { |
| 235 buf.AppendChar('\t'); |
| 236 } else if (ch == 'b') { |
| 237 buf.AppendChar('\b'); |
| 238 } else if (ch == 'f') { |
| 239 buf.AppendChar('\f'); |
| 240 } else if (ch == '\r') { |
| 241 status = 4; |
| 242 break; |
| 243 } else if (ch != '\n') { |
| 244 buf.AppendChar(ch); |
| 245 } |
| 246 status = 0; |
| 247 break; |
| 248 case 2: |
| 249 if (ch >= '0' && ch <= '7') { |
| 250 iEscCode = |
| 251 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); |
| 252 status = 3; |
| 253 } else { |
| 254 buf.AppendChar(iEscCode); |
| 255 status = 0; |
| 256 continue; |
| 257 } |
| 258 break; |
| 259 case 3: |
| 260 if (ch >= '0' && ch <= '7') { |
| 261 iEscCode = |
| 262 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); |
| 263 buf.AppendChar(iEscCode); |
| 264 status = 0; |
| 265 } else { |
| 266 buf.AppendChar(iEscCode); |
| 267 status = 0; |
| 268 continue; |
| 269 } |
| 270 break; |
| 271 case 4: |
| 272 status = 0; |
| 273 if (ch != '\n') |
| 274 continue; |
| 275 break; |
| 276 } |
| 277 |
| 278 if (!GetNextChar(ch)) |
| 279 break; |
| 280 } |
| 281 |
| 282 GetNextChar(ch); |
| 283 return buf.GetByteString(); |
| 284 } |
| 285 |
| 286 CFX_ByteString CPDF_SyntaxParser::ReadHexString() { |
| 287 uint8_t ch; |
| 288 if (!GetNextChar(ch)) |
| 289 return CFX_ByteString(); |
| 290 |
| 291 CFX_ByteTextBuf buf; |
| 292 bool bFirst = true; |
| 293 uint8_t code = 0; |
| 294 while (1) { |
| 295 if (ch == '>') |
| 296 break; |
| 297 |
| 298 if (std::isxdigit(ch)) { |
| 299 int val = FXSYS_toHexDigit(ch); |
| 300 if (bFirst) { |
| 301 code = val * 16; |
| 302 } else { |
| 303 code += val; |
| 304 buf.AppendByte(code); |
| 305 } |
| 306 bFirst = !bFirst; |
| 307 } |
| 308 |
| 309 if (!GetNextChar(ch)) |
| 310 break; |
| 311 } |
| 312 if (!bFirst) |
| 313 buf.AppendByte(code); |
| 314 |
| 315 return buf.GetByteString(); |
| 316 } |
| 317 |
| 318 void CPDF_SyntaxParser::ToNextLine() { |
| 319 uint8_t ch; |
| 320 while (GetNextChar(ch)) { |
| 321 if (ch == '\n') |
| 322 break; |
| 323 |
| 324 if (ch == '\r') { |
| 325 GetNextChar(ch); |
| 326 if (ch != '\n') |
| 327 --m_Pos; |
| 328 break; |
| 329 } |
| 330 } |
| 331 } |
| 332 |
| 333 void CPDF_SyntaxParser::ToNextWord() { |
| 334 uint8_t ch; |
| 335 if (!GetNextChar(ch)) |
| 336 return; |
| 337 |
| 338 while (1) { |
| 339 while (PDFCharIsWhitespace(ch)) { |
| 340 if (!GetNextChar(ch)) |
| 341 return; |
| 342 } |
| 343 |
| 344 if (ch != '%') |
| 345 break; |
| 346 |
| 347 while (1) { |
| 348 if (!GetNextChar(ch)) |
| 349 return; |
| 350 if (PDFCharIsLineEnding(ch)) |
| 351 break; |
| 352 } |
| 353 } |
| 354 m_Pos--; |
| 355 } |
| 356 |
| 357 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) { |
| 358 GetNextWordInternal(bIsNumber); |
| 359 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize); |
| 360 } |
| 361 |
| 362 CFX_ByteString CPDF_SyntaxParser::GetKeyword() { |
| 363 return GetNextWord(nullptr); |
| 364 } |
| 365 |
| 366 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList, |
| 367 FX_DWORD objnum, |
| 368 FX_DWORD gennum, |
| 369 FX_BOOL bDecrypt) { |
| 370 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); |
| 371 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) |
| 372 return nullptr; |
| 373 |
| 374 FX_FILESIZE SavedPos = m_Pos; |
| 375 bool bIsNumber; |
| 376 CFX_ByteString word = GetNextWord(&bIsNumber); |
| 377 if (word.GetLength() == 0) |
| 378 return nullptr; |
| 379 |
| 380 if (bIsNumber) { |
| 381 FX_FILESIZE SavedPos = m_Pos; |
| 382 CFX_ByteString nextword = GetNextWord(&bIsNumber); |
| 383 if (bIsNumber) { |
| 384 CFX_ByteString nextword2 = GetNextWord(nullptr); |
| 385 if (nextword2 == "R") { |
| 386 FX_DWORD objnum = FXSYS_atoui(word); |
| 387 return new CPDF_Reference(pObjList, objnum); |
| 388 } |
| 389 } |
| 390 m_Pos = SavedPos; |
| 391 return new CPDF_Number(word); |
| 392 } |
| 393 |
| 394 if (word == "true" || word == "false") |
| 395 return new CPDF_Boolean(word == "true"); |
| 396 |
| 397 if (word == "null") |
| 398 return new CPDF_Null; |
| 399 |
| 400 if (word == "(") { |
| 401 CFX_ByteString str = ReadString(); |
| 402 if (m_pCryptoHandler && bDecrypt) |
| 403 m_pCryptoHandler->Decrypt(objnum, gennum, str); |
| 404 return new CPDF_String(str, FALSE); |
| 405 } |
| 406 |
| 407 if (word == "<") { |
| 408 CFX_ByteString str = ReadHexString(); |
| 409 if (m_pCryptoHandler && bDecrypt) |
| 410 m_pCryptoHandler->Decrypt(objnum, gennum, str); |
| 411 |
| 412 return new CPDF_String(str, TRUE); |
| 413 } |
| 414 |
| 415 if (word == "[") { |
| 416 CPDF_Array* pArray = new CPDF_Array; |
| 417 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) |
| 418 pArray->Add(pObj); |
| 419 |
| 420 return pArray; |
| 421 } |
| 422 |
| 423 if (word[0] == '/') { |
| 424 return new CPDF_Name( |
| 425 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); |
| 426 } |
| 427 |
| 428 if (word == "<<") { |
| 429 int32_t nKeys = 0; |
| 430 FX_FILESIZE dwSignValuePos = 0; |
| 431 |
| 432 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( |
| 433 new CPDF_Dictionary); |
| 434 while (1) { |
| 435 CFX_ByteString key = GetNextWord(nullptr); |
| 436 if (key.IsEmpty()) |
| 437 return nullptr; |
| 438 |
| 439 FX_FILESIZE SavedPos = m_Pos - key.GetLength(); |
| 440 if (key == ">>") |
| 441 break; |
| 442 |
| 443 if (key == "endobj") { |
| 444 m_Pos = SavedPos; |
| 445 break; |
| 446 } |
| 447 |
| 448 if (key[0] != '/') |
| 449 continue; |
| 450 |
| 451 ++nKeys; |
| 452 key = PDF_NameDecode(key); |
| 453 if (key.IsEmpty()) |
| 454 continue; |
| 455 |
| 456 if (key == "/Contents") |
| 457 dwSignValuePos = m_Pos; |
| 458 |
| 459 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true); |
| 460 if (!pObj) |
| 461 continue; |
| 462 |
| 463 CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1); |
| 464 pDict->SetAt(keyNoSlash, pObj); |
| 465 } |
| 466 |
| 467 // Only when this is a signature dictionary and has contents, we reset the |
| 468 // contents to the un-decrypted form. |
| 469 if (IsSignatureDict(pDict.get()) && dwSignValuePos) { |
| 470 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); |
| 471 m_Pos = dwSignValuePos; |
| 472 pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false)); |
| 473 } |
| 474 |
| 475 FX_FILESIZE SavedPos = m_Pos; |
| 476 CFX_ByteString nextword = GetNextWord(nullptr); |
| 477 if (nextword != "stream") { |
| 478 m_Pos = SavedPos; |
| 479 return pDict.release(); |
| 480 } |
| 481 return ReadStream(pDict.release(), objnum, gennum); |
| 482 } |
| 483 |
| 484 if (word == ">>") |
| 485 m_Pos = SavedPos; |
| 486 |
| 487 return nullptr; |
| 488 } |
| 489 |
| 490 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict( |
| 491 CPDF_IndirectObjectHolder* pObjList, |
| 492 FX_DWORD objnum, |
| 493 FX_DWORD gennum) { |
| 494 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); |
| 495 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) |
| 496 return nullptr; |
| 497 |
| 498 FX_FILESIZE SavedPos = m_Pos; |
| 499 bool bIsNumber; |
| 500 CFX_ByteString word = GetNextWord(&bIsNumber); |
| 501 if (word.GetLength() == 0) |
| 502 return nullptr; |
| 503 |
| 504 if (bIsNumber) { |
| 505 FX_FILESIZE SavedPos = m_Pos; |
| 506 CFX_ByteString nextword = GetNextWord(&bIsNumber); |
| 507 if (bIsNumber) { |
| 508 CFX_ByteString nextword2 = GetNextWord(nullptr); |
| 509 if (nextword2 == "R") |
| 510 return new CPDF_Reference(pObjList, FXSYS_atoui(word)); |
| 511 } |
| 512 m_Pos = SavedPos; |
| 513 return new CPDF_Number(word); |
| 514 } |
| 515 |
| 516 if (word == "true" || word == "false") |
| 517 return new CPDF_Boolean(word == "true"); |
| 518 |
| 519 if (word == "null") |
| 520 return new CPDF_Null; |
| 521 |
| 522 if (word == "(") { |
| 523 CFX_ByteString str = ReadString(); |
| 524 if (m_pCryptoHandler) |
| 525 m_pCryptoHandler->Decrypt(objnum, gennum, str); |
| 526 return new CPDF_String(str, FALSE); |
| 527 } |
| 528 |
| 529 if (word == "<") { |
| 530 CFX_ByteString str = ReadHexString(); |
| 531 if (m_pCryptoHandler) |
| 532 m_pCryptoHandler->Decrypt(objnum, gennum, str); |
| 533 return new CPDF_String(str, TRUE); |
| 534 } |
| 535 |
| 536 if (word == "[") { |
| 537 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray( |
| 538 new CPDF_Array); |
| 539 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) |
| 540 pArray->Add(pObj); |
| 541 |
| 542 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr; |
| 543 } |
| 544 |
| 545 if (word[0] == '/') { |
| 546 return new CPDF_Name( |
| 547 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); |
| 548 } |
| 549 |
| 550 if (word == "<<") { |
| 551 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( |
| 552 new CPDF_Dictionary); |
| 553 while (1) { |
| 554 FX_FILESIZE SavedPos = m_Pos; |
| 555 CFX_ByteString key = GetNextWord(nullptr); |
| 556 if (key.IsEmpty()) |
| 557 return nullptr; |
| 558 |
| 559 if (key == ">>") |
| 560 break; |
| 561 |
| 562 if (key == "endobj") { |
| 563 m_Pos = SavedPos; |
| 564 break; |
| 565 } |
| 566 |
| 567 if (key[0] != '/') |
| 568 continue; |
| 569 |
| 570 key = PDF_NameDecode(key); |
| 571 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj( |
| 572 GetObject(pObjList, objnum, gennum, true)); |
| 573 if (!obj) { |
| 574 uint8_t ch; |
| 575 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) { |
| 576 continue; |
| 577 } |
| 578 return nullptr; |
| 579 } |
| 580 |
| 581 if (key.GetLength() > 1) { |
| 582 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1), |
| 583 obj.release()); |
| 584 } |
| 585 } |
| 586 |
| 587 FX_FILESIZE SavedPos = m_Pos; |
| 588 CFX_ByteString nextword = GetNextWord(nullptr); |
| 589 if (nextword != "stream") { |
| 590 m_Pos = SavedPos; |
| 591 return pDict.release(); |
| 592 } |
| 593 |
| 594 return ReadStream(pDict.release(), objnum, gennum); |
| 595 } |
| 596 |
| 597 if (word == ">>") |
| 598 m_Pos = SavedPos; |
| 599 |
| 600 return nullptr; |
| 601 } |
| 602 |
| 603 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) { |
| 604 unsigned char byte1 = 0; |
| 605 unsigned char byte2 = 0; |
| 606 |
| 607 GetCharAt(pos, byte1); |
| 608 GetCharAt(pos + 1, byte2); |
| 609 |
| 610 if (byte1 == '\r' && byte2 == '\n') |
| 611 return 2; |
| 612 |
| 613 if (byte1 == '\r' || byte1 == '\n') |
| 614 return 1; |
| 615 |
| 616 return 0; |
| 617 } |
| 618 |
| 619 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, |
| 620 FX_DWORD objnum, |
| 621 FX_DWORD gennum) { |
| 622 CPDF_Object* pLenObj = pDict->GetElement("Length"); |
| 623 FX_FILESIZE len = -1; |
| 624 CPDF_Reference* pLenObjRef = ToReference(pLenObj); |
| 625 |
| 626 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() && |
| 627 pLenObjRef->GetRefObjNum() != objnum); |
| 628 if (pLenObj && differingObjNum) |
| 629 len = pLenObj->GetInteger(); |
| 630 |
| 631 // Locate the start of stream. |
| 632 ToNextLine(); |
| 633 FX_FILESIZE streamStartPos = m_Pos; |
| 634 |
| 635 const CFX_ByteStringC kEndStreamStr("endstream"); |
| 636 const CFX_ByteStringC kEndObjStr("endobj"); |
| 637 |
| 638 CPDF_CryptoHandler* pCryptoHandler = |
| 639 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get(); |
| 640 if (!pCryptoHandler) { |
| 641 FX_BOOL bSearchForKeyword = TRUE; |
| 642 if (len >= 0) { |
| 643 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos; |
| 644 pos += len; |
| 645 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) |
| 646 m_Pos = pos.ValueOrDie(); |
| 647 |
| 648 m_Pos += ReadEOLMarkers(m_Pos); |
| 649 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1); |
| 650 GetNextWordInternal(nullptr); |
| 651 // Earlier version of PDF specification doesn't require EOL marker before |
| 652 // 'endstream' keyword. If keyword 'endstream' follows the bytes in |
| 653 // specified length, it signals the end of stream. |
| 654 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(), |
| 655 kEndStreamStr.GetLength()) == 0) { |
| 656 bSearchForKeyword = FALSE; |
| 657 } |
| 658 } |
| 659 |
| 660 if (bSearchForKeyword) { |
| 661 // If len is not available, len needs to be calculated |
| 662 // by searching the keywords "endstream" or "endobj". |
| 663 m_Pos = streamStartPos; |
| 664 FX_FILESIZE endStreamOffset = 0; |
| 665 while (endStreamOffset >= 0) { |
| 666 endStreamOffset = FindTag(kEndStreamStr, 0); |
| 667 |
| 668 // Can't find "endstream". |
| 669 if (endStreamOffset < 0) |
| 670 break; |
| 671 |
| 672 // Stop searching when "endstream" is found. |
| 673 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen, |
| 674 kEndStreamStr, TRUE)) { |
| 675 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength(); |
| 676 break; |
| 677 } |
| 678 } |
| 679 |
| 680 m_Pos = streamStartPos; |
| 681 FX_FILESIZE endObjOffset = 0; |
| 682 while (endObjOffset >= 0) { |
| 683 endObjOffset = FindTag(kEndObjStr, 0); |
| 684 |
| 685 // Can't find "endobj". |
| 686 if (endObjOffset < 0) |
| 687 break; |
| 688 |
| 689 // Stop searching when "endobj" is found. |
| 690 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr, |
| 691 TRUE)) { |
| 692 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength(); |
| 693 break; |
| 694 } |
| 695 } |
| 696 |
| 697 // Can't find "endstream" or "endobj". |
| 698 if (endStreamOffset < 0 && endObjOffset < 0) { |
| 699 pDict->Release(); |
| 700 return nullptr; |
| 701 } |
| 702 |
| 703 if (endStreamOffset < 0 && endObjOffset >= 0) { |
| 704 // Correct the position of end stream. |
| 705 endStreamOffset = endObjOffset; |
| 706 } else if (endStreamOffset >= 0 && endObjOffset < 0) { |
| 707 // Correct the position of end obj. |
| 708 endObjOffset = endStreamOffset; |
| 709 } else if (endStreamOffset > endObjOffset) { |
| 710 endStreamOffset = endObjOffset; |
| 711 } |
| 712 |
| 713 len = endStreamOffset; |
| 714 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); |
| 715 if (numMarkers == 2) { |
| 716 len -= 2; |
| 717 } else { |
| 718 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); |
| 719 if (numMarkers == 1) { |
| 720 len -= 1; |
| 721 } |
| 722 } |
| 723 |
| 724 if (len < 0) { |
| 725 pDict->Release(); |
| 726 return nullptr; |
| 727 } |
| 728 pDict->SetAtInteger("Length", len); |
| 729 } |
| 730 m_Pos = streamStartPos; |
| 731 } |
| 732 |
| 733 if (len < 0) { |
| 734 pDict->Release(); |
| 735 return nullptr; |
| 736 } |
| 737 |
| 738 uint8_t* pData = nullptr; |
| 739 if (len > 0) { |
| 740 pData = FX_Alloc(uint8_t, len); |
| 741 ReadBlock(pData, len); |
| 742 if (pCryptoHandler) { |
| 743 CFX_BinaryBuf dest_buf; |
| 744 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len)); |
| 745 |
| 746 void* context = pCryptoHandler->DecryptStart(objnum, gennum); |
| 747 pCryptoHandler->DecryptStream(context, pData, len, dest_buf); |
| 748 pCryptoHandler->DecryptFinish(context, dest_buf); |
| 749 |
| 750 FX_Free(pData); |
| 751 pData = dest_buf.GetBuffer(); |
| 752 len = dest_buf.GetSize(); |
| 753 dest_buf.DetachBuffer(); |
| 754 } |
| 755 } |
| 756 |
| 757 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict); |
| 758 streamStartPos = m_Pos; |
| 759 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1); |
| 760 |
| 761 GetNextWordInternal(nullptr); |
| 762 |
| 763 int numMarkers = ReadEOLMarkers(m_Pos); |
| 764 if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 && |
| 765 FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) == |
| 766 0) { |
| 767 m_Pos = streamStartPos; |
| 768 } |
| 769 return pStream; |
| 770 } |
| 771 |
| 772 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess, |
| 773 FX_DWORD HeaderOffset) { |
| 774 FX_Free(m_pFileBuf); |
| 775 |
| 776 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize); |
| 777 m_HeaderOffset = HeaderOffset; |
| 778 m_FileLen = pFileAccess->GetSize(); |
| 779 m_Pos = 0; |
| 780 m_pFileAccess = pFileAccess; |
| 781 m_BufOffset = 0; |
| 782 pFileAccess->ReadBlock( |
| 783 m_pFileBuf, 0, |
| 784 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize)); |
| 785 } |
| 786 |
| 787 uint32_t CPDF_SyntaxParser::GetDirectNum() { |
| 788 bool bIsNumber; |
| 789 GetNextWordInternal(&bIsNumber); |
| 790 if (!bIsNumber) |
| 791 return 0; |
| 792 |
| 793 m_WordBuffer[m_WordSize] = 0; |
| 794 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer)); |
| 795 } |
| 796 |
| 797 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, |
| 798 FX_FILESIZE limit, |
| 799 const CFX_ByteStringC& tag, |
| 800 FX_BOOL checkKeyword) { |
| 801 const FX_DWORD taglen = tag.GetLength(); |
| 802 |
| 803 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]); |
| 804 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) && |
| 805 !PDFCharIsWhitespace(tag[taglen - 1]); |
| 806 |
| 807 uint8_t ch; |
| 808 if (bCheckRight && startpos + (int32_t)taglen <= limit && |
| 809 GetCharAt(startpos + (int32_t)taglen, ch)) { |
| 810 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || |
| 811 (checkKeyword && PDFCharIsDelimiter(ch))) { |
| 812 return false; |
| 813 } |
| 814 } |
| 815 |
| 816 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { |
| 817 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || |
| 818 (checkKeyword && PDFCharIsDelimiter(ch))) { |
| 819 return false; |
| 820 } |
| 821 } |
| 822 return true; |
| 823 } |
| 824 |
| 825 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards |
| 826 // and drop the bool. |
| 827 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag, |
| 828 FX_BOOL bWholeWord, |
| 829 FX_BOOL bForward, |
| 830 FX_FILESIZE limit) { |
| 831 int32_t taglen = tag.GetLength(); |
| 832 if (taglen == 0) |
| 833 return FALSE; |
| 834 |
| 835 FX_FILESIZE pos = m_Pos; |
| 836 int32_t offset = 0; |
| 837 if (!bForward) |
| 838 offset = taglen - 1; |
| 839 |
| 840 const uint8_t* tag_data = tag.GetPtr(); |
| 841 uint8_t byte; |
| 842 while (1) { |
| 843 if (bForward) { |
| 844 if (limit && pos >= m_Pos + limit) |
| 845 return FALSE; |
| 846 |
| 847 if (!GetCharAt(pos, byte)) |
| 848 return FALSE; |
| 849 |
| 850 } else { |
| 851 if (limit && pos <= m_Pos - limit) |
| 852 return FALSE; |
| 853 |
| 854 if (!GetCharAtBackward(pos, byte)) |
| 855 return FALSE; |
| 856 } |
| 857 |
| 858 if (byte == tag_data[offset]) { |
| 859 if (bForward) { |
| 860 offset++; |
| 861 if (offset < taglen) { |
| 862 pos++; |
| 863 continue; |
| 864 } |
| 865 } else { |
| 866 offset--; |
| 867 if (offset >= 0) { |
| 868 pos--; |
| 869 continue; |
| 870 } |
| 871 } |
| 872 |
| 873 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos; |
| 874 if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) { |
| 875 m_Pos = startpos; |
| 876 return TRUE; |
| 877 } |
| 878 } |
| 879 |
| 880 if (bForward) { |
| 881 offset = byte == tag_data[0] ? 1 : 0; |
| 882 pos++; |
| 883 } else { |
| 884 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1; |
| 885 pos--; |
| 886 } |
| 887 |
| 888 if (pos < 0) |
| 889 return FALSE; |
| 890 } |
| 891 |
| 892 return FALSE; |
| 893 } |
| 894 |
| 895 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags, |
| 896 FX_BOOL bWholeWord, |
| 897 FX_FILESIZE limit) { |
| 898 int32_t ntags = 1; |
| 899 for (int i = 0; i < tags.GetLength(); ++i) { |
| 900 if (tags[i] == 0) |
| 901 ++ntags; |
| 902 } |
| 903 |
| 904 std::vector<SearchTagRecord> patterns(ntags); |
| 905 FX_DWORD start = 0; |
| 906 FX_DWORD itag = 0; |
| 907 FX_DWORD max_len = 0; |
| 908 for (int i = 0; i <= tags.GetLength(); ++i) { |
| 909 if (tags[i] == 0) { |
| 910 FX_DWORD len = i - start; |
| 911 max_len = std::max(len, max_len); |
| 912 patterns[itag].m_pTag = tags.GetCStr() + start; |
| 913 patterns[itag].m_Len = len; |
| 914 patterns[itag].m_Offset = 0; |
| 915 start = i + 1; |
| 916 ++itag; |
| 917 } |
| 918 } |
| 919 |
| 920 const FX_FILESIZE pos_limit = m_Pos + limit; |
| 921 for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) { |
| 922 uint8_t byte; |
| 923 if (!GetCharAt(pos, byte)) |
| 924 break; |
| 925 |
| 926 for (int i = 0; i < ntags; ++i) { |
| 927 SearchTagRecord& pat = patterns[i]; |
| 928 if (pat.m_pTag[pat.m_Offset] != byte) { |
| 929 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0; |
| 930 continue; |
| 931 } |
| 932 |
| 933 ++pat.m_Offset; |
| 934 if (pat.m_Offset != pat.m_Len) |
| 935 continue; |
| 936 |
| 937 if (!bWholeWord || |
| 938 IsWholeWord(pos - pat.m_Len, limit, |
| 939 CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) { |
| 940 return i; |
| 941 } |
| 942 |
| 943 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0; |
| 944 } |
| 945 } |
| 946 return -1; |
| 947 } |
| 948 |
| 949 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag, |
| 950 FX_FILESIZE limit) { |
| 951 int32_t taglen = tag.GetLength(); |
| 952 int32_t match = 0; |
| 953 limit += m_Pos; |
| 954 FX_FILESIZE startpos = m_Pos; |
| 955 |
| 956 while (1) { |
| 957 uint8_t ch; |
| 958 if (!GetNextChar(ch)) |
| 959 return -1; |
| 960 |
| 961 if (ch == tag[match]) { |
| 962 match++; |
| 963 if (match == taglen) |
| 964 return m_Pos - startpos - taglen; |
| 965 } else { |
| 966 match = ch == tag[0] ? 1 : 0; |
| 967 } |
| 968 |
| 969 if (limit && m_Pos == limit) |
| 970 return -1; |
| 971 } |
| 972 return -1; |
| 973 } |
| 974 |
| 975 void CPDF_SyntaxParser::SetEncrypt( |
| 976 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) { |
| 977 m_pCryptoHandler = std::move(pCryptoHandler); |
| 978 } |
OLD | NEW |