Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | |
|
dsinclair
2016/03/08 01:15:05
nit: 2016?
Tom Sepez
2016/03/08 19:35:41
Done.
| |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
| 6 | |
| 7 #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" | |
| 8 | |
| 9 #include "core/include/fpdfapi/fpdf_module.h" | |
| 10 #include "core/include/fpdfapi/fpdf_parser.h" | |
| 11 #include "core/include/fxcrt/fx_ext.h" | |
| 12 #include "third_party/base/numerics/safe_math.h" | |
| 13 | |
| 14 struct SearchTagRecord { | |
|
dsinclair
2016/03/08 01:15:05
Can this go in namespace {}?
Tom Sepez
2016/03/08 19:35:42
Done.
| |
| 15 const char* m_pTag; | |
| 16 FX_DWORD m_Len; | |
| 17 FX_DWORD m_Offset; | |
| 18 }; | |
| 19 | |
| 20 // static | |
| 21 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0; | |
| 22 | |
| 23 CPDF_SyntaxParser::CPDF_SyntaxParser() | |
| 24 : m_MetadataObjnum(0), | |
| 25 m_pFileAccess(nullptr), | |
| 26 m_pFileBuf(nullptr), | |
| 27 m_BufSize(CPDF_ModuleMgr::kFileBufSize) {} | |
| 28 | |
| 29 CPDF_SyntaxParser::~CPDF_SyntaxParser() { | |
| 30 FX_Free(m_pFileBuf); | |
| 31 } | |
| 32 | |
| 33 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) { | |
| 34 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); | |
| 35 m_Pos = pos; | |
| 36 return GetNextChar(ch); | |
| 37 } | |
| 38 | |
| 39 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) { | |
| 40 FX_FILESIZE pos = m_Pos + m_HeaderOffset; | |
| 41 if (pos >= m_FileLen) | |
| 42 return FALSE; | |
| 43 | |
| 44 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { | |
| 45 FX_FILESIZE read_pos = pos; | |
| 46 FX_DWORD read_size = m_BufSize; | |
| 47 if ((FX_FILESIZE)read_size > m_FileLen) | |
| 48 read_size = (FX_DWORD)m_FileLen; | |
| 49 | |
| 50 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { | |
| 51 if (m_FileLen < (FX_FILESIZE)read_size) { | |
| 52 read_pos = 0; | |
| 53 read_size = (FX_DWORD)m_FileLen; | |
| 54 } else { | |
| 55 read_pos = m_FileLen - read_size; | |
| 56 } | |
| 57 } | |
| 58 | |
| 59 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) | |
| 60 return FALSE; | |
| 61 | |
| 62 m_BufOffset = read_pos; | |
| 63 } | |
| 64 ch = m_pFileBuf[pos - m_BufOffset]; | |
| 65 m_Pos++; | |
| 66 return TRUE; | |
| 67 } | |
| 68 | |
| 69 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) { | |
| 70 pos += m_HeaderOffset; | |
| 71 if (pos >= m_FileLen) | |
| 72 return FALSE; | |
| 73 | |
| 74 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { | |
| 75 FX_FILESIZE read_pos; | |
| 76 if (pos < (FX_FILESIZE)m_BufSize) | |
| 77 read_pos = 0; | |
| 78 else | |
| 79 read_pos = pos - m_BufSize + 1; | |
| 80 | |
| 81 FX_DWORD read_size = m_BufSize; | |
| 82 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { | |
| 83 if (m_FileLen < (FX_FILESIZE)read_size) { | |
| 84 read_pos = 0; | |
| 85 read_size = (FX_DWORD)m_FileLen; | |
| 86 } else { | |
| 87 read_pos = m_FileLen - read_size; | |
| 88 } | |
| 89 } | |
| 90 | |
| 91 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) | |
| 92 return FALSE; | |
| 93 | |
| 94 m_BufOffset = read_pos; | |
| 95 } | |
| 96 ch = m_pFileBuf[pos - m_BufOffset]; | |
| 97 return TRUE; | |
| 98 } | |
| 99 | |
| 100 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) { | |
| 101 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) | |
| 102 return FALSE; | |
| 103 m_Pos += size; | |
| 104 return TRUE; | |
| 105 } | |
| 106 | |
| 107 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) { | |
| 108 m_WordSize = 0; | |
| 109 if (bIsNumber) | |
| 110 *bIsNumber = true; | |
| 111 | |
| 112 uint8_t ch; | |
| 113 if (!GetNextChar(ch)) | |
| 114 return; | |
| 115 | |
| 116 while (1) { | |
| 117 while (PDFCharIsWhitespace(ch)) { | |
| 118 if (!GetNextChar(ch)) | |
| 119 return; | |
| 120 } | |
| 121 | |
| 122 if (ch != '%') | |
| 123 break; | |
| 124 | |
| 125 while (1) { | |
| 126 if (!GetNextChar(ch)) | |
| 127 return; | |
| 128 if (PDFCharIsLineEnding(ch)) | |
| 129 break; | |
| 130 } | |
| 131 } | |
| 132 | |
| 133 if (PDFCharIsDelimiter(ch)) { | |
| 134 if (bIsNumber) | |
| 135 *bIsNumber = false; | |
| 136 | |
| 137 m_WordBuffer[m_WordSize++] = ch; | |
| 138 if (ch == '/') { | |
| 139 while (1) { | |
| 140 if (!GetNextChar(ch)) | |
| 141 return; | |
| 142 | |
| 143 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { | |
| 144 m_Pos--; | |
| 145 return; | |
| 146 } | |
| 147 | |
| 148 if (m_WordSize < sizeof(m_WordBuffer) - 1) | |
| 149 m_WordBuffer[m_WordSize++] = ch; | |
| 150 } | |
| 151 } else if (ch == '<') { | |
| 152 if (!GetNextChar(ch)) | |
| 153 return; | |
| 154 | |
| 155 if (ch == '<') | |
| 156 m_WordBuffer[m_WordSize++] = ch; | |
| 157 else | |
| 158 m_Pos--; | |
| 159 } else if (ch == '>') { | |
| 160 if (!GetNextChar(ch)) | |
| 161 return; | |
| 162 | |
| 163 if (ch == '>') | |
| 164 m_WordBuffer[m_WordSize++] = ch; | |
| 165 else | |
| 166 m_Pos--; | |
| 167 } | |
| 168 return; | |
| 169 } | |
| 170 | |
| 171 while (1) { | |
| 172 if (m_WordSize < sizeof(m_WordBuffer) - 1) | |
| 173 m_WordBuffer[m_WordSize++] = ch; | |
| 174 | |
| 175 if (!PDFCharIsNumeric(ch)) { | |
| 176 if (bIsNumber) | |
| 177 *bIsNumber = false; | |
| 178 } | |
| 179 | |
| 180 if (!GetNextChar(ch)) | |
| 181 return; | |
| 182 | |
| 183 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { | |
| 184 m_Pos--; | |
| 185 break; | |
| 186 } | |
| 187 } | |
| 188 } | |
| 189 | |
| 190 CFX_ByteString CPDF_SyntaxParser::ReadString() { | |
| 191 uint8_t ch; | |
| 192 if (!GetNextChar(ch)) | |
| 193 return CFX_ByteString(); | |
| 194 | |
| 195 CFX_ByteTextBuf buf; | |
| 196 int32_t parlevel = 0; | |
| 197 int32_t status = 0; | |
| 198 int32_t iEscCode = 0; | |
| 199 while (1) { | |
| 200 switch (status) { | |
| 201 case 0: | |
| 202 if (ch == ')') { | |
| 203 if (parlevel == 0) { | |
| 204 return buf.GetByteString(); | |
| 205 } | |
| 206 parlevel--; | |
| 207 buf.AppendChar(')'); | |
| 208 } else if (ch == '(') { | |
| 209 parlevel++; | |
| 210 buf.AppendChar('('); | |
| 211 } else if (ch == '\\') { | |
| 212 status = 1; | |
| 213 } else { | |
| 214 buf.AppendChar(ch); | |
| 215 } | |
| 216 break; | |
| 217 case 1: | |
| 218 if (ch >= '0' && ch <= '7') { | |
| 219 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
| 220 status = 2; | |
| 221 break; | |
| 222 } | |
| 223 | |
| 224 if (ch == 'n') { | |
| 225 buf.AppendChar('\n'); | |
| 226 } else if (ch == 'r') { | |
| 227 buf.AppendChar('\r'); | |
| 228 } else if (ch == 't') { | |
| 229 buf.AppendChar('\t'); | |
| 230 } else if (ch == 'b') { | |
| 231 buf.AppendChar('\b'); | |
| 232 } else if (ch == 'f') { | |
| 233 buf.AppendChar('\f'); | |
| 234 } else if (ch == '\r') { | |
| 235 status = 4; | |
| 236 break; | |
| 237 } else if (ch != '\n') { | |
| 238 buf.AppendChar(ch); | |
| 239 } | |
| 240 status = 0; | |
| 241 break; | |
| 242 case 2: | |
| 243 if (ch >= '0' && ch <= '7') { | |
| 244 iEscCode = | |
| 245 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
| 246 status = 3; | |
| 247 } else { | |
| 248 buf.AppendChar(iEscCode); | |
| 249 status = 0; | |
| 250 continue; | |
| 251 } | |
| 252 break; | |
| 253 case 3: | |
| 254 if (ch >= '0' && ch <= '7') { | |
| 255 iEscCode = | |
| 256 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); | |
| 257 buf.AppendChar(iEscCode); | |
| 258 status = 0; | |
| 259 } else { | |
| 260 buf.AppendChar(iEscCode); | |
| 261 status = 0; | |
| 262 continue; | |
| 263 } | |
| 264 break; | |
| 265 case 4: | |
| 266 status = 0; | |
| 267 if (ch != '\n') | |
| 268 continue; | |
| 269 break; | |
| 270 } | |
| 271 | |
| 272 if (!GetNextChar(ch)) | |
| 273 break; | |
| 274 } | |
| 275 | |
| 276 GetNextChar(ch); | |
| 277 return buf.GetByteString(); | |
| 278 } | |
| 279 | |
| 280 CFX_ByteString CPDF_SyntaxParser::ReadHexString() { | |
| 281 uint8_t ch; | |
| 282 if (!GetNextChar(ch)) | |
| 283 return CFX_ByteString(); | |
| 284 | |
| 285 CFX_ByteTextBuf buf; | |
| 286 bool bFirst = true; | |
| 287 uint8_t code = 0; | |
| 288 while (1) { | |
| 289 if (ch == '>') | |
| 290 break; | |
| 291 | |
| 292 if (std::isxdigit(ch)) { | |
| 293 int val = FXSYS_toHexDigit(ch); | |
| 294 if (bFirst) { | |
| 295 code = val * 16; | |
| 296 } else { | |
| 297 code += val; | |
| 298 buf.AppendByte(code); | |
| 299 } | |
| 300 bFirst = !bFirst; | |
| 301 } | |
| 302 | |
| 303 if (!GetNextChar(ch)) | |
| 304 break; | |
| 305 } | |
| 306 if (!bFirst) | |
| 307 buf.AppendByte(code); | |
| 308 | |
| 309 return buf.GetByteString(); | |
| 310 } | |
| 311 | |
| 312 void CPDF_SyntaxParser::ToNextLine() { | |
| 313 uint8_t ch; | |
| 314 while (GetNextChar(ch)) { | |
| 315 if (ch == '\n') | |
| 316 break; | |
| 317 | |
| 318 if (ch == '\r') { | |
| 319 GetNextChar(ch); | |
| 320 if (ch != '\n') | |
| 321 --m_Pos; | |
| 322 break; | |
| 323 } | |
| 324 } | |
| 325 } | |
| 326 | |
| 327 void CPDF_SyntaxParser::ToNextWord() { | |
| 328 uint8_t ch; | |
| 329 if (!GetNextChar(ch)) | |
| 330 return; | |
| 331 | |
| 332 while (1) { | |
| 333 while (PDFCharIsWhitespace(ch)) { | |
| 334 if (!GetNextChar(ch)) | |
| 335 return; | |
| 336 } | |
| 337 | |
| 338 if (ch != '%') | |
| 339 break; | |
| 340 | |
| 341 while (1) { | |
| 342 if (!GetNextChar(ch)) | |
| 343 return; | |
| 344 if (PDFCharIsLineEnding(ch)) | |
| 345 break; | |
| 346 } | |
| 347 } | |
| 348 m_Pos--; | |
| 349 } | |
| 350 | |
| 351 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) { | |
| 352 GetNextWordInternal(bIsNumber); | |
| 353 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize); | |
| 354 } | |
| 355 | |
| 356 CFX_ByteString CPDF_SyntaxParser::GetKeyword() { | |
| 357 return GetNextWord(nullptr); | |
| 358 } | |
| 359 | |
| 360 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList, | |
| 361 FX_DWORD objnum, | |
| 362 FX_DWORD gennum, | |
| 363 FX_BOOL bDecrypt) { | |
| 364 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); | |
| 365 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) | |
| 366 return nullptr; | |
| 367 | |
| 368 FX_FILESIZE SavedPos = m_Pos; | |
| 369 bool bIsNumber; | |
| 370 CFX_ByteString word = GetNextWord(&bIsNumber); | |
| 371 if (word.GetLength() == 0) | |
| 372 return nullptr; | |
| 373 | |
| 374 if (bIsNumber) { | |
| 375 FX_FILESIZE SavedPos = m_Pos; | |
| 376 CFX_ByteString nextword = GetNextWord(&bIsNumber); | |
| 377 if (bIsNumber) { | |
| 378 CFX_ByteString nextword2 = GetNextWord(nullptr); | |
| 379 if (nextword2 == "R") { | |
| 380 FX_DWORD objnum = FXSYS_atoui(word); | |
| 381 return new CPDF_Reference(pObjList, objnum); | |
| 382 } | |
| 383 } | |
| 384 m_Pos = SavedPos; | |
| 385 return new CPDF_Number(word); | |
| 386 } | |
| 387 | |
| 388 if (word == "true" || word == "false") | |
| 389 return new CPDF_Boolean(word == "true"); | |
| 390 | |
| 391 if (word == "null") | |
| 392 return new CPDF_Null; | |
| 393 | |
| 394 if (word == "(") { | |
| 395 CFX_ByteString str = ReadString(); | |
| 396 if (m_pCryptoHandler && bDecrypt) | |
| 397 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
| 398 return new CPDF_String(str, FALSE); | |
| 399 } | |
| 400 | |
| 401 if (word == "<") { | |
| 402 CFX_ByteString str = ReadHexString(); | |
| 403 if (m_pCryptoHandler && bDecrypt) | |
| 404 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
| 405 | |
| 406 return new CPDF_String(str, TRUE); | |
| 407 } | |
| 408 | |
| 409 if (word == "[") { | |
| 410 CPDF_Array* pArray = new CPDF_Array; | |
| 411 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) | |
| 412 pArray->Add(pObj); | |
| 413 | |
| 414 return pArray; | |
| 415 } | |
| 416 | |
| 417 if (word[0] == '/') { | |
| 418 return new CPDF_Name( | |
| 419 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); | |
| 420 } | |
| 421 | |
| 422 if (word == "<<") { | |
| 423 int32_t nKeys = 0; | |
| 424 FX_FILESIZE dwSignValuePos = 0; | |
| 425 | |
| 426 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( | |
| 427 new CPDF_Dictionary); | |
| 428 while (1) { | |
| 429 CFX_ByteString key = GetNextWord(nullptr); | |
| 430 if (key.IsEmpty()) | |
| 431 return nullptr; | |
| 432 | |
| 433 FX_FILESIZE SavedPos = m_Pos - key.GetLength(); | |
| 434 if (key == ">>") | |
| 435 break; | |
| 436 | |
| 437 if (key == "endobj") { | |
| 438 m_Pos = SavedPos; | |
| 439 break; | |
| 440 } | |
| 441 | |
| 442 if (key[0] != '/') | |
| 443 continue; | |
| 444 | |
| 445 ++nKeys; | |
| 446 key = PDF_NameDecode(key); | |
| 447 if (key.IsEmpty()) | |
| 448 continue; | |
| 449 | |
| 450 if (key == "/Contents") | |
| 451 dwSignValuePos = m_Pos; | |
| 452 | |
| 453 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true); | |
| 454 if (!pObj) | |
| 455 continue; | |
| 456 | |
| 457 CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1); | |
| 458 pDict->SetAt(keyNoSlash, pObj); | |
| 459 } | |
| 460 | |
| 461 // Only when this is a signature dictionary and has contents, we reset the | |
| 462 // contents to the un-decrypted form. | |
| 463 if (IsSignatureDict(pDict.get()) && dwSignValuePos) { | |
| 464 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); | |
| 465 m_Pos = dwSignValuePos; | |
| 466 pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false)); | |
| 467 } | |
| 468 | |
| 469 FX_FILESIZE SavedPos = m_Pos; | |
| 470 CFX_ByteString nextword = GetNextWord(nullptr); | |
| 471 if (nextword != "stream") { | |
| 472 m_Pos = SavedPos; | |
| 473 return pDict.release(); | |
| 474 } | |
| 475 return ReadStream(pDict.release(), objnum, gennum); | |
| 476 } | |
| 477 | |
| 478 if (word == ">>") | |
| 479 m_Pos = SavedPos; | |
| 480 | |
| 481 return nullptr; | |
| 482 } | |
| 483 | |
| 484 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict( | |
| 485 CPDF_IndirectObjectHolder* pObjList, | |
| 486 FX_DWORD objnum, | |
| 487 FX_DWORD gennum) { | |
| 488 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); | |
| 489 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) | |
| 490 return nullptr; | |
| 491 | |
| 492 FX_FILESIZE SavedPos = m_Pos; | |
| 493 bool bIsNumber; | |
| 494 CFX_ByteString word = GetNextWord(&bIsNumber); | |
| 495 if (word.GetLength() == 0) | |
| 496 return nullptr; | |
| 497 | |
| 498 if (bIsNumber) { | |
| 499 FX_FILESIZE SavedPos = m_Pos; | |
| 500 CFX_ByteString nextword = GetNextWord(&bIsNumber); | |
| 501 if (bIsNumber) { | |
| 502 CFX_ByteString nextword2 = GetNextWord(nullptr); | |
| 503 if (nextword2 == "R") | |
| 504 return new CPDF_Reference(pObjList, FXSYS_atoui(word)); | |
| 505 } | |
| 506 m_Pos = SavedPos; | |
| 507 return new CPDF_Number(word); | |
| 508 } | |
| 509 | |
| 510 if (word == "true" || word == "false") | |
| 511 return new CPDF_Boolean(word == "true"); | |
| 512 | |
| 513 if (word == "null") | |
| 514 return new CPDF_Null; | |
| 515 | |
| 516 if (word == "(") { | |
| 517 CFX_ByteString str = ReadString(); | |
| 518 if (m_pCryptoHandler) | |
| 519 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
| 520 return new CPDF_String(str, FALSE); | |
| 521 } | |
| 522 | |
| 523 if (word == "<") { | |
| 524 CFX_ByteString str = ReadHexString(); | |
| 525 if (m_pCryptoHandler) | |
| 526 m_pCryptoHandler->Decrypt(objnum, gennum, str); | |
| 527 return new CPDF_String(str, TRUE); | |
| 528 } | |
| 529 | |
| 530 if (word == "[") { | |
| 531 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray( | |
| 532 new CPDF_Array); | |
| 533 while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true)) | |
| 534 pArray->Add(pObj); | |
| 535 | |
| 536 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr; | |
| 537 } | |
| 538 | |
| 539 if (word[0] == '/') { | |
| 540 return new CPDF_Name( | |
| 541 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); | |
| 542 } | |
| 543 | |
| 544 if (word == "<<") { | |
| 545 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( | |
| 546 new CPDF_Dictionary); | |
| 547 while (1) { | |
| 548 FX_FILESIZE SavedPos = m_Pos; | |
| 549 CFX_ByteString key = GetNextWord(nullptr); | |
| 550 if (key.IsEmpty()) | |
| 551 return nullptr; | |
| 552 | |
| 553 if (key == ">>") | |
| 554 break; | |
| 555 | |
| 556 if (key == "endobj") { | |
| 557 m_Pos = SavedPos; | |
| 558 break; | |
| 559 } | |
| 560 | |
| 561 if (key[0] != '/') | |
| 562 continue; | |
| 563 | |
| 564 key = PDF_NameDecode(key); | |
| 565 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj( | |
| 566 GetObject(pObjList, objnum, gennum, true)); | |
| 567 if (!obj) { | |
| 568 uint8_t ch; | |
| 569 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) { | |
| 570 continue; | |
| 571 } | |
| 572 return nullptr; | |
| 573 } | |
| 574 | |
| 575 if (key.GetLength() > 1) { | |
| 576 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1), | |
| 577 obj.release()); | |
| 578 } | |
| 579 } | |
| 580 | |
| 581 FX_FILESIZE SavedPos = m_Pos; | |
| 582 CFX_ByteString nextword = GetNextWord(nullptr); | |
| 583 if (nextword != "stream") { | |
| 584 m_Pos = SavedPos; | |
| 585 return pDict.release(); | |
| 586 } | |
| 587 | |
| 588 return ReadStream(pDict.release(), objnum, gennum); | |
| 589 } | |
| 590 | |
| 591 if (word == ">>") | |
| 592 m_Pos = SavedPos; | |
| 593 | |
| 594 return nullptr; | |
| 595 } | |
| 596 | |
| 597 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) { | |
| 598 unsigned char byte1 = 0; | |
| 599 unsigned char byte2 = 0; | |
| 600 | |
| 601 GetCharAt(pos, byte1); | |
| 602 GetCharAt(pos + 1, byte2); | |
| 603 | |
| 604 if (byte1 == '\r' && byte2 == '\n') | |
| 605 return 2; | |
| 606 | |
| 607 if (byte1 == '\r' || byte1 == '\n') | |
| 608 return 1; | |
| 609 | |
| 610 return 0; | |
| 611 } | |
| 612 | |
| 613 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, | |
| 614 FX_DWORD objnum, | |
| 615 FX_DWORD gennum) { | |
| 616 CPDF_Object* pLenObj = pDict->GetElement("Length"); | |
| 617 FX_FILESIZE len = -1; | |
| 618 CPDF_Reference* pLenObjRef = ToReference(pLenObj); | |
| 619 | |
| 620 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() && | |
| 621 pLenObjRef->GetRefObjNum() != objnum); | |
| 622 if (pLenObj && differingObjNum) | |
| 623 len = pLenObj->GetInteger(); | |
| 624 | |
| 625 // Locate the start of stream. | |
| 626 ToNextLine(); | |
| 627 FX_FILESIZE streamStartPos = m_Pos; | |
| 628 | |
| 629 const CFX_ByteStringC kEndStreamStr("endstream"); | |
| 630 const CFX_ByteStringC kEndObjStr("endobj"); | |
| 631 | |
| 632 CPDF_CryptoHandler* pCryptoHandler = | |
| 633 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get(); | |
| 634 if (!pCryptoHandler) { | |
| 635 FX_BOOL bSearchForKeyword = TRUE; | |
| 636 if (len >= 0) { | |
| 637 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos; | |
| 638 pos += len; | |
| 639 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) | |
| 640 m_Pos = pos.ValueOrDie(); | |
| 641 | |
| 642 m_Pos += ReadEOLMarkers(m_Pos); | |
| 643 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1); | |
| 644 GetNextWordInternal(nullptr); | |
| 645 // Earlier version of PDF specification doesn't require EOL marker before | |
| 646 // 'endstream' keyword. If keyword 'endstream' follows the bytes in | |
| 647 // specified length, it signals the end of stream. | |
| 648 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(), | |
| 649 kEndStreamStr.GetLength()) == 0) { | |
| 650 bSearchForKeyword = FALSE; | |
| 651 } | |
| 652 } | |
| 653 | |
| 654 if (bSearchForKeyword) { | |
| 655 // If len is not available, len needs to be calculated | |
| 656 // by searching the keywords "endstream" or "endobj". | |
| 657 m_Pos = streamStartPos; | |
| 658 FX_FILESIZE endStreamOffset = 0; | |
| 659 while (endStreamOffset >= 0) { | |
| 660 endStreamOffset = FindTag(kEndStreamStr, 0); | |
| 661 | |
| 662 // Can't find "endstream". | |
| 663 if (endStreamOffset < 0) | |
| 664 break; | |
| 665 | |
| 666 // Stop searching when "endstream" is found. | |
| 667 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen, | |
| 668 kEndStreamStr, TRUE)) { | |
| 669 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength(); | |
| 670 break; | |
| 671 } | |
| 672 } | |
| 673 | |
| 674 m_Pos = streamStartPos; | |
| 675 FX_FILESIZE endObjOffset = 0; | |
| 676 while (endObjOffset >= 0) { | |
| 677 endObjOffset = FindTag(kEndObjStr, 0); | |
| 678 | |
| 679 // Can't find "endobj". | |
| 680 if (endObjOffset < 0) | |
| 681 break; | |
| 682 | |
| 683 // Stop searching when "endobj" is found. | |
| 684 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr, | |
| 685 TRUE)) { | |
| 686 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength(); | |
| 687 break; | |
| 688 } | |
| 689 } | |
| 690 | |
| 691 // Can't find "endstream" or "endobj". | |
| 692 if (endStreamOffset < 0 && endObjOffset < 0) { | |
| 693 pDict->Release(); | |
| 694 return nullptr; | |
| 695 } | |
| 696 | |
| 697 if (endStreamOffset < 0 && endObjOffset >= 0) { | |
| 698 // Correct the position of end stream. | |
| 699 endStreamOffset = endObjOffset; | |
| 700 } else if (endStreamOffset >= 0 && endObjOffset < 0) { | |
| 701 // Correct the position of end obj. | |
| 702 endObjOffset = endStreamOffset; | |
| 703 } else if (endStreamOffset > endObjOffset) { | |
| 704 endStreamOffset = endObjOffset; | |
| 705 } | |
| 706 | |
| 707 len = endStreamOffset; | |
| 708 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); | |
| 709 if (numMarkers == 2) { | |
| 710 len -= 2; | |
| 711 } else { | |
| 712 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); | |
| 713 if (numMarkers == 1) { | |
| 714 len -= 1; | |
| 715 } | |
| 716 } | |
| 717 | |
| 718 if (len < 0) { | |
| 719 pDict->Release(); | |
| 720 return nullptr; | |
| 721 } | |
| 722 pDict->SetAtInteger("Length", len); | |
| 723 } | |
| 724 m_Pos = streamStartPos; | |
| 725 } | |
| 726 | |
| 727 if (len < 0) { | |
| 728 pDict->Release(); | |
| 729 return nullptr; | |
| 730 } | |
| 731 | |
| 732 uint8_t* pData = nullptr; | |
| 733 if (len > 0) { | |
| 734 pData = FX_Alloc(uint8_t, len); | |
| 735 ReadBlock(pData, len); | |
| 736 if (pCryptoHandler) { | |
| 737 CFX_BinaryBuf dest_buf; | |
| 738 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len)); | |
| 739 | |
| 740 void* context = pCryptoHandler->DecryptStart(objnum, gennum); | |
| 741 pCryptoHandler->DecryptStream(context, pData, len, dest_buf); | |
| 742 pCryptoHandler->DecryptFinish(context, dest_buf); | |
| 743 | |
| 744 FX_Free(pData); | |
| 745 pData = dest_buf.GetBuffer(); | |
| 746 len = dest_buf.GetSize(); | |
| 747 dest_buf.DetachBuffer(); | |
| 748 } | |
| 749 } | |
| 750 | |
| 751 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict); | |
| 752 streamStartPos = m_Pos; | |
| 753 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1); | |
| 754 | |
| 755 GetNextWordInternal(nullptr); | |
| 756 | |
| 757 int numMarkers = ReadEOLMarkers(m_Pos); | |
| 758 if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 && | |
| 759 FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) == | |
| 760 0) { | |
| 761 m_Pos = streamStartPos; | |
| 762 } | |
| 763 return pStream; | |
| 764 } | |
| 765 | |
| 766 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess, | |
| 767 FX_DWORD HeaderOffset) { | |
| 768 FX_Free(m_pFileBuf); | |
| 769 | |
| 770 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize); | |
| 771 m_HeaderOffset = HeaderOffset; | |
| 772 m_FileLen = pFileAccess->GetSize(); | |
| 773 m_Pos = 0; | |
| 774 m_pFileAccess = pFileAccess; | |
| 775 m_BufOffset = 0; | |
| 776 pFileAccess->ReadBlock( | |
| 777 m_pFileBuf, 0, | |
| 778 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize)); | |
| 779 } | |
| 780 | |
| 781 uint32_t CPDF_SyntaxParser::GetDirectNum() { | |
| 782 bool bIsNumber; | |
| 783 GetNextWordInternal(&bIsNumber); | |
| 784 if (!bIsNumber) | |
| 785 return 0; | |
| 786 | |
| 787 m_WordBuffer[m_WordSize] = 0; | |
| 788 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer)); | |
| 789 } | |
| 790 | |
| 791 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, | |
| 792 FX_FILESIZE limit, | |
| 793 const CFX_ByteStringC& tag, | |
| 794 FX_BOOL checkKeyword) { | |
| 795 const FX_DWORD taglen = tag.GetLength(); | |
| 796 | |
| 797 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]); | |
| 798 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) && | |
| 799 !PDFCharIsWhitespace(tag[taglen - 1]); | |
| 800 | |
| 801 uint8_t ch; | |
| 802 if (bCheckRight && startpos + (int32_t)taglen <= limit && | |
| 803 GetCharAt(startpos + (int32_t)taglen, ch)) { | |
| 804 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || | |
| 805 (checkKeyword && PDFCharIsDelimiter(ch))) { | |
| 806 return false; | |
| 807 } | |
| 808 } | |
| 809 | |
| 810 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { | |
| 811 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || | |
| 812 (checkKeyword && PDFCharIsDelimiter(ch))) { | |
| 813 return false; | |
| 814 } | |
| 815 } | |
| 816 return true; | |
| 817 } | |
| 818 | |
| 819 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards | |
| 820 // and drop the bool. | |
| 821 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag, | |
| 822 FX_BOOL bWholeWord, | |
| 823 FX_BOOL bForward, | |
| 824 FX_FILESIZE limit) { | |
| 825 int32_t taglen = tag.GetLength(); | |
| 826 if (taglen == 0) | |
| 827 return FALSE; | |
| 828 | |
| 829 FX_FILESIZE pos = m_Pos; | |
| 830 int32_t offset = 0; | |
| 831 if (!bForward) | |
| 832 offset = taglen - 1; | |
| 833 | |
| 834 const uint8_t* tag_data = tag.GetPtr(); | |
| 835 uint8_t byte; | |
| 836 while (1) { | |
| 837 if (bForward) { | |
| 838 if (limit && pos >= m_Pos + limit) | |
| 839 return FALSE; | |
| 840 | |
| 841 if (!GetCharAt(pos, byte)) | |
| 842 return FALSE; | |
| 843 | |
| 844 } else { | |
| 845 if (limit && pos <= m_Pos - limit) | |
| 846 return FALSE; | |
| 847 | |
| 848 if (!GetCharAtBackward(pos, byte)) | |
| 849 return FALSE; | |
| 850 } | |
| 851 | |
| 852 if (byte == tag_data[offset]) { | |
| 853 if (bForward) { | |
| 854 offset++; | |
| 855 if (offset < taglen) { | |
| 856 pos++; | |
| 857 continue; | |
| 858 } | |
| 859 } else { | |
| 860 offset--; | |
| 861 if (offset >= 0) { | |
| 862 pos--; | |
| 863 continue; | |
| 864 } | |
| 865 } | |
| 866 | |
| 867 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos; | |
| 868 if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) { | |
| 869 m_Pos = startpos; | |
| 870 return TRUE; | |
| 871 } | |
| 872 } | |
| 873 | |
| 874 if (bForward) { | |
| 875 offset = byte == tag_data[0] ? 1 : 0; | |
| 876 pos++; | |
| 877 } else { | |
| 878 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1; | |
| 879 pos--; | |
| 880 } | |
| 881 | |
| 882 if (pos < 0) | |
| 883 return FALSE; | |
| 884 } | |
| 885 | |
| 886 return FALSE; | |
| 887 } | |
| 888 | |
| 889 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags, | |
| 890 FX_BOOL bWholeWord, | |
| 891 FX_FILESIZE limit) { | |
| 892 int32_t ntags = 1; | |
| 893 for (int i = 0; i < tags.GetLength(); ++i) { | |
| 894 if (tags[i] == 0) | |
| 895 ++ntags; | |
| 896 } | |
| 897 | |
| 898 std::vector<SearchTagRecord> patterns(ntags); | |
| 899 FX_DWORD start = 0; | |
| 900 FX_DWORD itag = 0; | |
| 901 FX_DWORD max_len = 0; | |
| 902 for (int i = 0; i <= tags.GetLength(); ++i) { | |
| 903 if (tags[i] == 0) { | |
| 904 FX_DWORD len = i - start; | |
| 905 max_len = std::max(len, max_len); | |
| 906 patterns[itag].m_pTag = tags.GetCStr() + start; | |
| 907 patterns[itag].m_Len = len; | |
| 908 patterns[itag].m_Offset = 0; | |
| 909 start = i + 1; | |
| 910 ++itag; | |
| 911 } | |
| 912 } | |
| 913 | |
| 914 const FX_FILESIZE pos_limit = m_Pos + limit; | |
| 915 for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) { | |
| 916 uint8_t byte; | |
| 917 if (!GetCharAt(pos, byte)) | |
| 918 break; | |
| 919 | |
| 920 for (int i = 0; i < ntags; ++i) { | |
| 921 SearchTagRecord& pat = patterns[i]; | |
| 922 if (pat.m_pTag[pat.m_Offset] != byte) { | |
| 923 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0; | |
| 924 continue; | |
| 925 } | |
| 926 | |
| 927 ++pat.m_Offset; | |
| 928 if (pat.m_Offset != pat.m_Len) | |
| 929 continue; | |
| 930 | |
| 931 if (!bWholeWord || | |
| 932 IsWholeWord(pos - pat.m_Len, limit, | |
| 933 CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) { | |
| 934 return i; | |
| 935 } | |
| 936 | |
| 937 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0; | |
| 938 } | |
| 939 } | |
| 940 return -1; | |
| 941 } | |
| 942 | |
| 943 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag, | |
| 944 FX_FILESIZE limit) { | |
| 945 int32_t taglen = tag.GetLength(); | |
| 946 int32_t match = 0; | |
| 947 limit += m_Pos; | |
| 948 FX_FILESIZE startpos = m_Pos; | |
| 949 | |
| 950 while (1) { | |
| 951 uint8_t ch; | |
| 952 if (!GetNextChar(ch)) | |
| 953 return -1; | |
| 954 | |
| 955 if (ch == tag[match]) { | |
| 956 match++; | |
| 957 if (match == taglen) | |
| 958 return m_Pos - startpos - taglen; | |
| 959 } else { | |
| 960 match = ch == tag[0] ? 1 : 0; | |
| 961 } | |
| 962 | |
| 963 if (limit && m_Pos == limit) | |
| 964 return -1; | |
| 965 } | |
| 966 return -1; | |
| 967 } | |
| 968 | |
| 969 void CPDF_SyntaxParser::SetEncrypt( | |
| 970 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) { | |
| 971 m_pCryptoHandler = std::move(pCryptoHandler); | |
| 972 } | |
| OLD | NEW |