OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 |
| 7 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" |
| 8 |
| 9 #include "core/include/fpdfapi/fpdf_module.h" |
| 10 #include "core/include/fpdfapi/fpdf_parser.h" |
| 11 #include "core/include/fxcrt/fx_ext.h" |
| 12 #include "third_party/base/numerics/safe_math.h" |
| 13 |
| 14 namespace { |
| 15 |
| 16 struct SearchTagRecord { |
| 17 const char* m_pTag; |
| 18 FX_DWORD m_Len; |
| 19 FX_DWORD m_Offset; |
| 20 }; |
| 21 |
| 22 } // namespace |
| 23 |
| 24 // static |
| 25 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0; |
| 26 |
| 27 CPDF_SyntaxParser::CPDF_SyntaxParser() |
| 28 : m_MetadataObjnum(0), |
| 29 m_pFileAccess(nullptr), |
| 30 m_pFileBuf(nullptr), |
| 31 m_BufSize(CPDF_ModuleMgr::kFileBufSize) {} |
| 32 |
| 33 CPDF_SyntaxParser::~CPDF_SyntaxParser() { |
| 34 FX_Free(m_pFileBuf); |
| 35 } |
| 36 |
| 37 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) { |
| 38 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); |
| 39 m_Pos = pos; |
| 40 return GetNextChar(ch); |
| 41 } |
| 42 |
| 43 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) { |
| 44 FX_FILESIZE pos = m_Pos + m_HeaderOffset; |
| 45 if (pos >= m_FileLen) |
| 46 return FALSE; |
| 47 |
| 48 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { |
| 49 FX_FILESIZE read_pos = pos; |
| 50 FX_DWORD read_size = m_BufSize; |
| 51 if ((FX_FILESIZE)read_size > m_FileLen) |
| 52 read_size = (FX_DWORD)m_FileLen; |
| 53 |
| 54 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { |
| 55 if (m_FileLen < (FX_FILESIZE)read_size) { |
| 56 read_pos = 0; |
| 57 read_size = (FX_DWORD)m_FileLen; |
| 58 } else { |
| 59 read_pos = m_FileLen - read_size; |
| 60 } |
| 61 } |
| 62 |
| 63 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) |
| 64 return FALSE; |
| 65 |
| 66 m_BufOffset = read_pos; |
| 67 } |
| 68 ch = m_pFileBuf[pos - m_BufOffset]; |
| 69 m_Pos++; |
| 70 return TRUE; |
| 71 } |
| 72 |
| 73 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) { |
| 74 pos += m_HeaderOffset; |
| 75 if (pos >= m_FileLen) |
| 76 return FALSE; |
| 77 |
| 78 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { |
| 79 FX_FILESIZE read_pos; |
| 80 if (pos < (FX_FILESIZE)m_BufSize) |
| 81 read_pos = 0; |
| 82 else |
| 83 read_pos = pos - m_BufSize + 1; |
| 84 |
| 85 FX_DWORD read_size = m_BufSize; |
| 86 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { |
| 87 if (m_FileLen < (FX_FILESIZE)read_size) { |
| 88 read_pos = 0; |
| 89 read_size = (FX_DWORD)m_FileLen; |
| 90 } else { |
| 91 read_pos = m_FileLen - read_size; |
| 92 } |
| 93 } |
| 94 |
| 95 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) |
| 96 return FALSE; |
| 97 |
| 98 m_BufOffset = read_pos; |
| 99 } |
| 100 ch = m_pFileBuf[pos - m_BufOffset]; |
| 101 return TRUE; |
| 102 } |
| 103 |
| 104 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) { |
| 105 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) |
| 106 return FALSE; |
| 107 m_Pos += size; |
| 108 return TRUE; |
| 109 } |
| 110 |
| 111 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) { |
| 112 m_WordSize = 0; |
| 113 if (bIsNumber) |
| 114 *bIsNumber = true; |
| 115 |
| 116 uint8_t ch; |
| 117 if (!GetNextChar(ch)) |
| 118 return; |
| 119 |
| 120 while (1) { |
| 121 while (PDFCharIsWhitespace(ch)) { |
| 122 if (!GetNextChar(ch)) |
| 123 return; |
| 124 } |
| 125 |
| 126 if (ch != '%') |
| 127 break; |
| 128 |
| 129 while (1) { |
| 130 if (!GetNextChar(ch)) |
| 131 return; |
| 132 if (PDFCharIsLineEnding(ch)) |
| 133 break; |
| 134 } |
| 135 } |
| 136 |
| 137 if (PDFCharIsDelimiter(ch)) { |
| 138 if (bIsNumber) |
| 139 *bIsNumber = false; |
| 140 |
| 141 m_WordBuffer[m_WordSize++] = ch; |
| 142 if (ch == '/') { |
| 143 while (1) { |
| 144 if (!GetNextChar(ch)) |
| 145 return; |
| 146 |
| 147 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { |
| 148 m_Pos--; |
| 149 return; |
| 150 } |
| 151 |
| 152 if (m_WordSize < sizeof(m_WordBuffer) - 1) |
| 153 m_WordBuffer[m_WordSize++] = ch; |
| 154 } |
| 155 } else if (ch == '<') { |
| 156 if (!GetNextChar(ch)) |
| 157 return; |
| 158 |
| 159 if (ch == '<') |
| 160 m_WordBuffer[m_WordSize++] = ch; |
| 161 else |
| 162 m_Pos--; |
| 163 } else if (ch == '>') { |
| 164 if (!GetNextChar(ch)) |
| 165 return; |
| 166 |
| 167 if (ch == '>') |
| 168 m_WordBuffer[m_WordSize++] = ch; |
| 169 else |
| 170 m_Pos--; |
| 171 } |
| 172 return; |
| 173 } |
| 174 |
| 175 while (1) { |
| 176 if (m_WordSize < sizeof(m_WordBuffer) - 1) |
| 177 m_WordBuffer[m_WordSize++] = ch; |
| 178 |
| 179 if (!PDFCharIsNumeric(ch)) { |
| 180 if (bIsNumber) |
| 181 *bIsNumber = false; |
| 182 } |
| 183 |
| 184 if (!GetNextChar(ch)) |
| 185 return; |
| 186 |
| 187 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { |
| 188 m_Pos--; |
| 189 break; |
| 190 } |
| 191 } |
| 192 } |
| 193 |
| 194 CFX_ByteString CPDF_SyntaxParser::ReadString() { |
| 195 uint8_t ch; |
| 196 if (!GetNextChar(ch)) |
| 197 return CFX_ByteString(); |
| 198 |
| 199 CFX_ByteTextBuf buf; |
| 200 int32_t parlevel = 0; |
| 201 int32_t status = 0; |
| 202 int32_t iEscCode = 0; |
| 203 while (1) { |
| 204 switch (status) { |
| 205 case 0: |
| 206 if (ch == ')') { |
| 207 if (parlevel == 0) { |
| 208 return buf.GetByteString(); |
| 209 } |
| 210 parlevel--; |
| 211 buf.AppendChar(')'); |
| 212 } else if (ch == '(') { |
| 213 parlevel++; |
| 214 buf.AppendChar('('); |
| 215 } else if (ch == '\\') { |
| 216 status = 1; |
| 217 } else { |
| 218 buf.AppendChar(ch); |
| 219 } |
| 220 break; |
| 221 case 1: |
| 222 if (ch >= '0' && ch <= '7') { |
| 223 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); |
| 224 status = 2; |
| 225 break; |
| 226 } |
| 227 |
| 228 if (ch == 'n') { |
| 229 buf.AppendChar('\n'); |
| 230 } else if (ch == 'r') { |
| 231 buf.AppendChar('\r'); |
| 232 } else if (ch == 't') { |
| 233 buf.AppendChar('\t'); |
| 234 } else if (ch == 'b') { |
| 235 buf.AppendChar('\b'); |
| 236 } else if (ch == 'f') { |
| 237 buf.AppendChar('\f'); |
| 238 } else if (ch == '\r') { |
| 239 status = 4; |
| 240 break; |
| 241 } else if (ch != '\n') { |
| 242 buf.AppendChar(ch); |
| 243 } |
| 244 status = 0; |
| 245 break; |
| 246 case 2: |
| 247 if (ch >= '0' && ch <= '7') { |
| 248 iEscCode = |
| 249 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); |
| 250 status = 3; |
| 251 } else { |
| 252 buf.AppendChar(iEscCode); |
| 253 status = 0; |
| 254 continue; |
| 255 } |
| 256 break; |
| 257 case 3: |
| 258 if (ch >= '0' && ch <= '7') { |
| 259 iEscCode = |
| 260 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); |
| 261 buf.AppendChar(iEscCode); |
| 262 status = 0; |
| 263 } else { |
| 264 buf.AppendChar(iEscCode); |
| 265 status = 0; |
| 266 continue; |
| 267 } |
| 268 break; |
| 269 case 4: |
| 270 status = 0; |
| 271 if (ch != '\n') |
| 272 continue; |
| 273 break; |
| 274 } |
| 275 |
| 276 if (!GetNextChar(ch)) |
| 277 break; |
| 278 } |
| 279 |
| 280 GetNextChar(ch); |
| 281 return buf.GetByteString(); |
| 282 } |
| 283 |
| 284 CFX_ByteString CPDF_SyntaxParser::ReadHexString() { |
| 285 uint8_t ch; |
| 286 if (!GetNextChar(ch)) |
| 287 return CFX_ByteString(); |
| 288 |
| 289 CFX_ByteTextBuf buf; |
| 290 bool bFirst = true; |
| 291 uint8_t code = 0; |
| 292 while (1) { |
| 293 if (ch == '>') |
| 294 break; |
| 295 |
| 296 if (std::isxdigit(ch)) { |
| 297 int val = FXSYS_toHexDigit(ch); |
| 298 if (bFirst) { |
| 299 code = val * 16; |
| 300 } else { |
| 301 code += val; |
| 302 buf.AppendByte(code); |
| 303 } |
| 304 bFirst = !bFirst; |
| 305 } |
| 306 |
| 307 if (!GetNextChar(ch)) |
| 308 break; |
| 309 } |
| 310 if (!bFirst) |
| 311 buf.AppendByte(code); |
| 312 |
| 313 return buf.GetByteString(); |
| 314 } |
| 315 |
| 316 void CPDF_SyntaxParser::ToNextLine() { |
| 317 uint8_t ch; |
| 318 while (GetNextChar(ch)) { |
| 319 if (ch == '\n') |
| 320 break; |
| 321 |
| 322 if (ch == '\r') { |
| 323 GetNextChar(ch); |
| 324 if (ch != '\n') |
| 325 --m_Pos; |
| 326 break; |
| 327 } |
| 328 } |
| 329 } |
| 330 |
| 331 void CPDF_SyntaxParser::ToNextWord() { |
| 332 uint8_t ch; |
| 333 if (!GetNextChar(ch)) |
| 334 return; |
| 335 |
| 336 while (1) { |
| 337 while (PDFCharIsWhitespace(ch)) { |
| 338 if (!GetNextChar(ch)) |
| 339 return; |
| 340 } |
| 341 |
| 342 if (ch != '%') |
| 343 break; |
| 344 |
| 345 while (1) { |
| 346 if (!GetNextChar(ch)) |
| 347 return; |
| 348 if (PDFCharIsLineEnding(ch)) |
| 349 break; |
| 350 } |
| 351 } |
| 352 m_Pos--; |
| 353 } |
| 354 |
| 355 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) { |
| 356 GetNextWordInternal(bIsNumber); |
| 357 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize); |
| 358 } |
| 359 |
| 360 CFX_ByteString CPDF_SyntaxParser::GetKeyword() { |
| 361 return GetNextWord(nullptr); |
| 362 } |
| 363 |
| 364 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList, |
| 365 FX_DWORD objnum, |
| 366 FX_DWORD gennum, |
| 367 FX_BOOL bDecrypt) { |
| 368 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); |
| 369 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) |
| 370 return nullptr; |
| 371 |
| 372 FX_FILESIZE SavedPos = m_Pos; |
| 373 bool bIsNumber; |
| 374 CFX_ByteString word = GetNextWord(&bIsNumber); |
| 375 if (word.GetLength() == 0) |
| 376 return nullptr; |
| 377 |
| 378 if (bIsNumber) { |
| 379 FX_FILESIZE SavedPos = m_Pos; |
| 380 CFX_ByteString nextword = GetNextWord(&bIsNumber); |
| 381 if (bIsNumber) { |
| 382 CFX_ByteString nextword2 = GetNextWord(nullptr); |
| 383 if (nextword2 == "R") { |
| 384 FX_DWORD objnum = FXSYS_atoui(word); |
| 385 return new CPDF_Reference(pObjList, objnum); |
| 386 } |
| 387 } |
| 388 m_Pos = SavedPos; |
| 389 return new CPDF_Number(word); |
| 390 } |
| 391 |
| 392 if (word == "true" || word == "false") |
| 393 return new CPDF_Boolean(word == "true"); |
| 394 |
| 395 if (word == "null") |
| 396 return new CPDF_Null; |
| 397 |
| 398 if (word == "(") { |
| 399 CFX_ByteString str = ReadString(); |
| 400 if (m_pCryptoHandler && bDecrypt) |
| 401 m_pCryptoHandler->Decrypt(objnum, gennum, str); |
| 402 return new CPDF_String(str, FALSE); |
| 403 } |
| 404 |
| 405 if (word == "<") { |
| 406 CFX_ByteString str = ReadHexString(); |
| 407 if (m_pCryptoHandler && bDecrypt) |
| 408 m_pCryptoHandler->Decrypt(objnum, gennum, str); |
| 409 |
| 410 return new CPDF_String(str, TRUE); |
| 411 } |
| 412 |
| 413 if (word == "[") { |
| 414 CPDF_Array* pArray = new CPDF_Array; |
| 415 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) |
| 416 pArray->Add(pObj); |
| 417 |
| 418 return pArray; |
| 419 } |
| 420 |
| 421 if (word[0] == '/') { |
| 422 return new CPDF_Name( |
| 423 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); |
| 424 } |
| 425 |
| 426 if (word == "<<") { |
| 427 int32_t nKeys = 0; |
| 428 FX_FILESIZE dwSignValuePos = 0; |
| 429 |
| 430 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( |
| 431 new CPDF_Dictionary); |
| 432 while (1) { |
| 433 CFX_ByteString key = GetNextWord(nullptr); |
| 434 if (key.IsEmpty()) |
| 435 return nullptr; |
| 436 |
| 437 FX_FILESIZE SavedPos = m_Pos - key.GetLength(); |
| 438 if (key == ">>") |
| 439 break; |
| 440 |
| 441 if (key == "endobj") { |
| 442 m_Pos = SavedPos; |
| 443 break; |
| 444 } |
| 445 |
| 446 if (key[0] != '/') |
| 447 continue; |
| 448 |
| 449 ++nKeys; |
| 450 key = PDF_NameDecode(key); |
| 451 if (key.IsEmpty()) |
| 452 continue; |
| 453 |
| 454 if (key == "/Contents") |
| 455 dwSignValuePos = m_Pos; |
| 456 |
| 457 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true); |
| 458 if (!pObj) |
| 459 continue; |
| 460 |
| 461 CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1); |
| 462 pDict->SetAt(keyNoSlash, pObj); |
| 463 } |
| 464 |
| 465 // Only when this is a signature dictionary and has contents, we reset the |
| 466 // contents to the un-decrypted form. |
| 467 if (IsSignatureDict(pDict.get()) && dwSignValuePos) { |
| 468 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); |
| 469 m_Pos = dwSignValuePos; |
| 470 pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false)); |
| 471 } |
| 472 |
| 473 FX_FILESIZE SavedPos = m_Pos; |
| 474 CFX_ByteString nextword = GetNextWord(nullptr); |
| 475 if (nextword != "stream") { |
| 476 m_Pos = SavedPos; |
| 477 return pDict.release(); |
| 478 } |
| 479 return ReadStream(pDict.release(), objnum, gennum); |
| 480 } |
| 481 |
| 482 if (word == ">>") |
| 483 m_Pos = SavedPos; |
| 484 |
| 485 return nullptr; |
| 486 } |
| 487 |
| 488 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict( |
| 489 CPDF_IndirectObjectHolder* pObjList, |
| 490 FX_DWORD objnum, |
| 491 FX_DWORD gennum) { |
| 492 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); |
| 493 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) |
| 494 return nullptr; |
| 495 |
| 496 FX_FILESIZE SavedPos = m_Pos; |
| 497 bool bIsNumber; |
| 498 CFX_ByteString word = GetNextWord(&bIsNumber); |
| 499 if (word.GetLength() == 0) |
| 500 return nullptr; |
| 501 |
| 502 if (bIsNumber) { |
| 503 FX_FILESIZE SavedPos = m_Pos; |
| 504 CFX_ByteString nextword = GetNextWord(&bIsNumber); |
| 505 if (bIsNumber) { |
| 506 CFX_ByteString nextword2 = GetNextWord(nullptr); |
| 507 if (nextword2 == "R") |
| 508 return new CPDF_Reference(pObjList, FXSYS_atoui(word)); |
| 509 } |
| 510 m_Pos = SavedPos; |
| 511 return new CPDF_Number(word); |
| 512 } |
| 513 |
| 514 if (word == "true" || word == "false") |
| 515 return new CPDF_Boolean(word == "true"); |
| 516 |
| 517 if (word == "null") |
| 518 return new CPDF_Null; |
| 519 |
| 520 if (word == "(") { |
| 521 CFX_ByteString str = ReadString(); |
| 522 if (m_pCryptoHandler) |
| 523 m_pCryptoHandler->Decrypt(objnum, gennum, str); |
| 524 return new CPDF_String(str, FALSE); |
| 525 } |
| 526 |
| 527 if (word == "<") { |
| 528 CFX_ByteString str = ReadHexString(); |
| 529 if (m_pCryptoHandler) |
| 530 m_pCryptoHandler->Decrypt(objnum, gennum, str); |
| 531 return new CPDF_String(str, TRUE); |
| 532 } |
| 533 |
| 534 if (word == "[") { |
| 535 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray( |
| 536 new CPDF_Array); |
| 537 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) |
| 538 pArray->Add(pObj); |
| 539 |
| 540 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr; |
| 541 } |
| 542 |
| 543 if (word[0] == '/') { |
| 544 return new CPDF_Name( |
| 545 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); |
| 546 } |
| 547 |
| 548 if (word == "<<") { |
| 549 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( |
| 550 new CPDF_Dictionary); |
| 551 while (1) { |
| 552 FX_FILESIZE SavedPos = m_Pos; |
| 553 CFX_ByteString key = GetNextWord(nullptr); |
| 554 if (key.IsEmpty()) |
| 555 return nullptr; |
| 556 |
| 557 if (key == ">>") |
| 558 break; |
| 559 |
| 560 if (key == "endobj") { |
| 561 m_Pos = SavedPos; |
| 562 break; |
| 563 } |
| 564 |
| 565 if (key[0] != '/') |
| 566 continue; |
| 567 |
| 568 key = PDF_NameDecode(key); |
| 569 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj( |
| 570 GetObject(pObjList, objnum, gennum, true)); |
| 571 if (!obj) { |
| 572 uint8_t ch; |
| 573 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) { |
| 574 continue; |
| 575 } |
| 576 return nullptr; |
| 577 } |
| 578 |
| 579 if (key.GetLength() > 1) { |
| 580 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1), |
| 581 obj.release()); |
| 582 } |
| 583 } |
| 584 |
| 585 FX_FILESIZE SavedPos = m_Pos; |
| 586 CFX_ByteString nextword = GetNextWord(nullptr); |
| 587 if (nextword != "stream") { |
| 588 m_Pos = SavedPos; |
| 589 return pDict.release(); |
| 590 } |
| 591 |
| 592 return ReadStream(pDict.release(), objnum, gennum); |
| 593 } |
| 594 |
| 595 if (word == ">>") |
| 596 m_Pos = SavedPos; |
| 597 |
| 598 return nullptr; |
| 599 } |
| 600 |
| 601 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) { |
| 602 unsigned char byte1 = 0; |
| 603 unsigned char byte2 = 0; |
| 604 |
| 605 GetCharAt(pos, byte1); |
| 606 GetCharAt(pos + 1, byte2); |
| 607 |
| 608 if (byte1 == '\r' && byte2 == '\n') |
| 609 return 2; |
| 610 |
| 611 if (byte1 == '\r' || byte1 == '\n') |
| 612 return 1; |
| 613 |
| 614 return 0; |
| 615 } |
| 616 |
| 617 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, |
| 618 FX_DWORD objnum, |
| 619 FX_DWORD gennum) { |
| 620 CPDF_Object* pLenObj = pDict->GetElement("Length"); |
| 621 FX_FILESIZE len = -1; |
| 622 CPDF_Reference* pLenObjRef = ToReference(pLenObj); |
| 623 |
| 624 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() && |
| 625 pLenObjRef->GetRefObjNum() != objnum); |
| 626 if (pLenObj && differingObjNum) |
| 627 len = pLenObj->GetInteger(); |
| 628 |
| 629 // Locate the start of stream. |
| 630 ToNextLine(); |
| 631 FX_FILESIZE streamStartPos = m_Pos; |
| 632 |
| 633 const CFX_ByteStringC kEndStreamStr("endstream"); |
| 634 const CFX_ByteStringC kEndObjStr("endobj"); |
| 635 |
| 636 CPDF_CryptoHandler* pCryptoHandler = |
| 637 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get(); |
| 638 if (!pCryptoHandler) { |
| 639 FX_BOOL bSearchForKeyword = TRUE; |
| 640 if (len >= 0) { |
| 641 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos; |
| 642 pos += len; |
| 643 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) |
| 644 m_Pos = pos.ValueOrDie(); |
| 645 |
| 646 m_Pos += ReadEOLMarkers(m_Pos); |
| 647 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1); |
| 648 GetNextWordInternal(nullptr); |
| 649 // Earlier version of PDF specification doesn't require EOL marker before |
| 650 // 'endstream' keyword. If keyword 'endstream' follows the bytes in |
| 651 // specified length, it signals the end of stream. |
| 652 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(), |
| 653 kEndStreamStr.GetLength()) == 0) { |
| 654 bSearchForKeyword = FALSE; |
| 655 } |
| 656 } |
| 657 |
| 658 if (bSearchForKeyword) { |
| 659 // If len is not available, len needs to be calculated |
| 660 // by searching the keywords "endstream" or "endobj". |
| 661 m_Pos = streamStartPos; |
| 662 FX_FILESIZE endStreamOffset = 0; |
| 663 while (endStreamOffset >= 0) { |
| 664 endStreamOffset = FindTag(kEndStreamStr, 0); |
| 665 |
| 666 // Can't find "endstream". |
| 667 if (endStreamOffset < 0) |
| 668 break; |
| 669 |
| 670 // Stop searching when "endstream" is found. |
| 671 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen, |
| 672 kEndStreamStr, TRUE)) { |
| 673 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength(); |
| 674 break; |
| 675 } |
| 676 } |
| 677 |
| 678 m_Pos = streamStartPos; |
| 679 FX_FILESIZE endObjOffset = 0; |
| 680 while (endObjOffset >= 0) { |
| 681 endObjOffset = FindTag(kEndObjStr, 0); |
| 682 |
| 683 // Can't find "endobj". |
| 684 if (endObjOffset < 0) |
| 685 break; |
| 686 |
| 687 // Stop searching when "endobj" is found. |
| 688 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr, |
| 689 TRUE)) { |
| 690 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength(); |
| 691 break; |
| 692 } |
| 693 } |
| 694 |
| 695 // Can't find "endstream" or "endobj". |
| 696 if (endStreamOffset < 0 && endObjOffset < 0) { |
| 697 pDict->Release(); |
| 698 return nullptr; |
| 699 } |
| 700 |
| 701 if (endStreamOffset < 0 && endObjOffset >= 0) { |
| 702 // Correct the position of end stream. |
| 703 endStreamOffset = endObjOffset; |
| 704 } else if (endStreamOffset >= 0 && endObjOffset < 0) { |
| 705 // Correct the position of end obj. |
| 706 endObjOffset = endStreamOffset; |
| 707 } else if (endStreamOffset > endObjOffset) { |
| 708 endStreamOffset = endObjOffset; |
| 709 } |
| 710 |
| 711 len = endStreamOffset; |
| 712 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); |
| 713 if (numMarkers == 2) { |
| 714 len -= 2; |
| 715 } else { |
| 716 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); |
| 717 if (numMarkers == 1) { |
| 718 len -= 1; |
| 719 } |
| 720 } |
| 721 |
| 722 if (len < 0) { |
| 723 pDict->Release(); |
| 724 return nullptr; |
| 725 } |
| 726 pDict->SetAtInteger("Length", len); |
| 727 } |
| 728 m_Pos = streamStartPos; |
| 729 } |
| 730 |
| 731 if (len < 0) { |
| 732 pDict->Release(); |
| 733 return nullptr; |
| 734 } |
| 735 |
| 736 uint8_t* pData = nullptr; |
| 737 if (len > 0) { |
| 738 pData = FX_Alloc(uint8_t, len); |
| 739 ReadBlock(pData, len); |
| 740 if (pCryptoHandler) { |
| 741 CFX_BinaryBuf dest_buf; |
| 742 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len)); |
| 743 |
| 744 void* context = pCryptoHandler->DecryptStart(objnum, gennum); |
| 745 pCryptoHandler->DecryptStream(context, pData, len, dest_buf); |
| 746 pCryptoHandler->DecryptFinish(context, dest_buf); |
| 747 |
| 748 FX_Free(pData); |
| 749 pData = dest_buf.GetBuffer(); |
| 750 len = dest_buf.GetSize(); |
| 751 dest_buf.DetachBuffer(); |
| 752 } |
| 753 } |
| 754 |
| 755 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict); |
| 756 streamStartPos = m_Pos; |
| 757 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1); |
| 758 |
| 759 GetNextWordInternal(nullptr); |
| 760 |
| 761 int numMarkers = ReadEOLMarkers(m_Pos); |
| 762 if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 && |
| 763 FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) == |
| 764 0) { |
| 765 m_Pos = streamStartPos; |
| 766 } |
| 767 return pStream; |
| 768 } |
| 769 |
| 770 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess, |
| 771 FX_DWORD HeaderOffset) { |
| 772 FX_Free(m_pFileBuf); |
| 773 |
| 774 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize); |
| 775 m_HeaderOffset = HeaderOffset; |
| 776 m_FileLen = pFileAccess->GetSize(); |
| 777 m_Pos = 0; |
| 778 m_pFileAccess = pFileAccess; |
| 779 m_BufOffset = 0; |
| 780 pFileAccess->ReadBlock( |
| 781 m_pFileBuf, 0, |
| 782 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize)); |
| 783 } |
| 784 |
| 785 uint32_t CPDF_SyntaxParser::GetDirectNum() { |
| 786 bool bIsNumber; |
| 787 GetNextWordInternal(&bIsNumber); |
| 788 if (!bIsNumber) |
| 789 return 0; |
| 790 |
| 791 m_WordBuffer[m_WordSize] = 0; |
| 792 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer)); |
| 793 } |
| 794 |
| 795 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, |
| 796 FX_FILESIZE limit, |
| 797 const CFX_ByteStringC& tag, |
| 798 FX_BOOL checkKeyword) { |
| 799 const FX_DWORD taglen = tag.GetLength(); |
| 800 |
| 801 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]); |
| 802 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) && |
| 803 !PDFCharIsWhitespace(tag[taglen - 1]); |
| 804 |
| 805 uint8_t ch; |
| 806 if (bCheckRight && startpos + (int32_t)taglen <= limit && |
| 807 GetCharAt(startpos + (int32_t)taglen, ch)) { |
| 808 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || |
| 809 (checkKeyword && PDFCharIsDelimiter(ch))) { |
| 810 return false; |
| 811 } |
| 812 } |
| 813 |
| 814 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { |
| 815 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || |
| 816 (checkKeyword && PDFCharIsDelimiter(ch))) { |
| 817 return false; |
| 818 } |
| 819 } |
| 820 return true; |
| 821 } |
| 822 |
| 823 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards |
| 824 // and drop the bool. |
| 825 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag, |
| 826 FX_BOOL bWholeWord, |
| 827 FX_BOOL bForward, |
| 828 FX_FILESIZE limit) { |
| 829 int32_t taglen = tag.GetLength(); |
| 830 if (taglen == 0) |
| 831 return FALSE; |
| 832 |
| 833 FX_FILESIZE pos = m_Pos; |
| 834 int32_t offset = 0; |
| 835 if (!bForward) |
| 836 offset = taglen - 1; |
| 837 |
| 838 const uint8_t* tag_data = tag.GetPtr(); |
| 839 uint8_t byte; |
| 840 while (1) { |
| 841 if (bForward) { |
| 842 if (limit && pos >= m_Pos + limit) |
| 843 return FALSE; |
| 844 |
| 845 if (!GetCharAt(pos, byte)) |
| 846 return FALSE; |
| 847 |
| 848 } else { |
| 849 if (limit && pos <= m_Pos - limit) |
| 850 return FALSE; |
| 851 |
| 852 if (!GetCharAtBackward(pos, byte)) |
| 853 return FALSE; |
| 854 } |
| 855 |
| 856 if (byte == tag_data[offset]) { |
| 857 if (bForward) { |
| 858 offset++; |
| 859 if (offset < taglen) { |
| 860 pos++; |
| 861 continue; |
| 862 } |
| 863 } else { |
| 864 offset--; |
| 865 if (offset >= 0) { |
| 866 pos--; |
| 867 continue; |
| 868 } |
| 869 } |
| 870 |
| 871 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos; |
| 872 if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) { |
| 873 m_Pos = startpos; |
| 874 return TRUE; |
| 875 } |
| 876 } |
| 877 |
| 878 if (bForward) { |
| 879 offset = byte == tag_data[0] ? 1 : 0; |
| 880 pos++; |
| 881 } else { |
| 882 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1; |
| 883 pos--; |
| 884 } |
| 885 |
| 886 if (pos < 0) |
| 887 return FALSE; |
| 888 } |
| 889 |
| 890 return FALSE; |
| 891 } |
| 892 |
| 893 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags, |
| 894 FX_BOOL bWholeWord, |
| 895 FX_FILESIZE limit) { |
| 896 int32_t ntags = 1; |
| 897 for (int i = 0; i < tags.GetLength(); ++i) { |
| 898 if (tags[i] == 0) |
| 899 ++ntags; |
| 900 } |
| 901 |
| 902 std::vector<SearchTagRecord> patterns(ntags); |
| 903 FX_DWORD start = 0; |
| 904 FX_DWORD itag = 0; |
| 905 FX_DWORD max_len = 0; |
| 906 for (int i = 0; i <= tags.GetLength(); ++i) { |
| 907 if (tags[i] == 0) { |
| 908 FX_DWORD len = i - start; |
| 909 max_len = std::max(len, max_len); |
| 910 patterns[itag].m_pTag = tags.GetCStr() + start; |
| 911 patterns[itag].m_Len = len; |
| 912 patterns[itag].m_Offset = 0; |
| 913 start = i + 1; |
| 914 ++itag; |
| 915 } |
| 916 } |
| 917 |
| 918 const FX_FILESIZE pos_limit = m_Pos + limit; |
| 919 for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) { |
| 920 uint8_t byte; |
| 921 if (!GetCharAt(pos, byte)) |
| 922 break; |
| 923 |
| 924 for (int i = 0; i < ntags; ++i) { |
| 925 SearchTagRecord& pat = patterns[i]; |
| 926 if (pat.m_pTag[pat.m_Offset] != byte) { |
| 927 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0; |
| 928 continue; |
| 929 } |
| 930 |
| 931 ++pat.m_Offset; |
| 932 if (pat.m_Offset != pat.m_Len) |
| 933 continue; |
| 934 |
| 935 if (!bWholeWord || |
| 936 IsWholeWord(pos - pat.m_Len, limit, |
| 937 CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) { |
| 938 return i; |
| 939 } |
| 940 |
| 941 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0; |
| 942 } |
| 943 } |
| 944 return -1; |
| 945 } |
| 946 |
| 947 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag, |
| 948 FX_FILESIZE limit) { |
| 949 int32_t taglen = tag.GetLength(); |
| 950 int32_t match = 0; |
| 951 limit += m_Pos; |
| 952 FX_FILESIZE startpos = m_Pos; |
| 953 |
| 954 while (1) { |
| 955 uint8_t ch; |
| 956 if (!GetNextChar(ch)) |
| 957 return -1; |
| 958 |
| 959 if (ch == tag[match]) { |
| 960 match++; |
| 961 if (match == taglen) |
| 962 return m_Pos - startpos - taglen; |
| 963 } else { |
| 964 match = ch == tag[0] ? 1 : 0; |
| 965 } |
| 966 |
| 967 if (limit && m_Pos == limit) |
| 968 return -1; |
| 969 } |
| 970 return -1; |
| 971 } |
| 972 |
| 973 void CPDF_SyntaxParser::SetEncrypt( |
| 974 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) { |
| 975 m_pCryptoHandler = std::move(pCryptoHandler); |
| 976 } |
OLD | NEW |