| OLD | NEW |
| 1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | 6 |
| 7 #include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h" |
| 8 |
| 7 #include "core/include/fpdfapi/fpdf_parser.h" | 9 #include "core/include/fpdfapi/fpdf_parser.h" |
| 8 | |
| 9 #include "core/include/fxcrt/fx_ext.h" | 10 #include "core/include/fxcrt/fx_ext.h" |
| 10 | 11 |
| 11 // Indexed by 8-bit character code, contains either: | 12 // Indexed by 8-bit character code, contains either: |
| 12 // 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff | 13 // 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff |
| 13 // 'N' - for numeric: 0123456789+-. | 14 // 'N' - for numeric: 0123456789+-. |
| 14 // 'D' - for delimiter: %()/<>[]{} | 15 // 'D' - for delimiter: %()/<>[]{} |
| 15 // 'R' - otherwise. | 16 // 'R' - otherwise. |
| 16 const char PDF_CharType[256] = { | 17 const char PDF_CharType[256] = { |
| 17 // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO | 18 // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO |
| 18 // SI | 19 // SI |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 53 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 54 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 54 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 55 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 55 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 56 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 56 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 57 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 57 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 58 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 58 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 59 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 59 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 60 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 60 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 61 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
| 61 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'}; | 62 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'}; |
| 62 | 63 |
| 63 CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize) { | 64 int32_t GetHeaderOffset(IFX_FileRead* pFile) { |
| 64 m_pData = pData; | 65 // TODO(dsinclair): This is a complicated way of saying %PDF, simplify? |
| 65 m_dwSize = dwSize; | 66 const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025); |
| 66 m_dwCurPos = 0; | 67 |
| 68 const size_t kBufSize = 4; |
| 69 uint8_t buf[kBufSize]; |
| 70 int32_t offset = 0; |
| 71 while (offset <= 1024) { |
| 72 if (!pFile->ReadBlock(buf, offset, kBufSize)) |
| 73 return -1; |
| 74 |
| 75 if (*(FX_DWORD*)buf == tag) |
| 76 return offset; |
| 77 |
| 78 ++offset; |
| 79 } |
| 80 return -1; |
| 67 } | 81 } |
| 68 | 82 |
| 69 CPDF_SimpleParser::CPDF_SimpleParser(const CFX_ByteStringC& str) { | 83 int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) { |
| 70 m_pData = str.GetPtr(); | 84 CPDF_Number* pObj = ToNumber(pDict->GetElement(key)); |
| 71 m_dwSize = str.GetLength(); | 85 return pObj ? pObj->GetInteger() : 0; |
| 72 m_dwCurPos = 0; | |
| 73 } | |
| 74 | |
| 75 void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) { | |
| 76 pStart = NULL; | |
| 77 dwSize = 0; | |
| 78 uint8_t ch; | |
| 79 while (1) { | |
| 80 if (m_dwSize <= m_dwCurPos) | |
| 81 return; | |
| 82 ch = m_pData[m_dwCurPos++]; | |
| 83 while (PDFCharIsWhitespace(ch)) { | |
| 84 if (m_dwSize <= m_dwCurPos) | |
| 85 return; | |
| 86 ch = m_pData[m_dwCurPos++]; | |
| 87 } | |
| 88 | |
| 89 if (ch != '%') | |
| 90 break; | |
| 91 | |
| 92 while (1) { | |
| 93 if (m_dwSize <= m_dwCurPos) | |
| 94 return; | |
| 95 ch = m_pData[m_dwCurPos++]; | |
| 96 if (ch == '\r' || ch == '\n') | |
| 97 break; | |
| 98 } | |
| 99 } | |
| 100 | |
| 101 FX_DWORD start_pos = m_dwCurPos - 1; | |
| 102 pStart = m_pData + start_pos; | |
| 103 if (PDFCharIsDelimiter(ch)) { | |
| 104 if (ch == '/') { | |
| 105 while (1) { | |
| 106 if (m_dwSize <= m_dwCurPos) | |
| 107 return; | |
| 108 ch = m_pData[m_dwCurPos++]; | |
| 109 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { | |
| 110 m_dwCurPos--; | |
| 111 dwSize = m_dwCurPos - start_pos; | |
| 112 return; | |
| 113 } | |
| 114 } | |
| 115 } else { | |
| 116 dwSize = 1; | |
| 117 if (ch == '<') { | |
| 118 if (m_dwSize <= m_dwCurPos) | |
| 119 return; | |
| 120 ch = m_pData[m_dwCurPos++]; | |
| 121 if (ch == '<') | |
| 122 dwSize = 2; | |
| 123 else | |
| 124 m_dwCurPos--; | |
| 125 } else if (ch == '>') { | |
| 126 if (m_dwSize <= m_dwCurPos) | |
| 127 return; | |
| 128 ch = m_pData[m_dwCurPos++]; | |
| 129 if (ch == '>') | |
| 130 dwSize = 2; | |
| 131 else | |
| 132 m_dwCurPos--; | |
| 133 } | |
| 134 } | |
| 135 return; | |
| 136 } | |
| 137 | |
| 138 dwSize = 1; | |
| 139 while (1) { | |
| 140 if (m_dwSize <= m_dwCurPos) | |
| 141 return; | |
| 142 ch = m_pData[m_dwCurPos++]; | |
| 143 | |
| 144 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { | |
| 145 m_dwCurPos--; | |
| 146 break; | |
| 147 } | |
| 148 dwSize++; | |
| 149 } | |
| 150 } | |
| 151 | |
| 152 CFX_ByteStringC CPDF_SimpleParser::GetWord() { | |
| 153 const uint8_t* pStart; | |
| 154 FX_DWORD dwSize; | |
| 155 ParseWord(pStart, dwSize); | |
| 156 if (dwSize == 1 && pStart[0] == '<') { | |
| 157 while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') { | |
| 158 m_dwCurPos++; | |
| 159 } | |
| 160 if (m_dwCurPos < m_dwSize) { | |
| 161 m_dwCurPos++; | |
| 162 } | |
| 163 return CFX_ByteStringC(pStart, | |
| 164 (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); | |
| 165 } | |
| 166 if (dwSize == 1 && pStart[0] == '(') { | |
| 167 int level = 1; | |
| 168 while (m_dwCurPos < m_dwSize) { | |
| 169 if (m_pData[m_dwCurPos] == ')') { | |
| 170 level--; | |
| 171 if (level == 0) { | |
| 172 break; | |
| 173 } | |
| 174 } | |
| 175 if (m_pData[m_dwCurPos] == '\\') { | |
| 176 if (m_dwSize <= m_dwCurPos) { | |
| 177 break; | |
| 178 } | |
| 179 m_dwCurPos++; | |
| 180 } else if (m_pData[m_dwCurPos] == '(') { | |
| 181 level++; | |
| 182 } | |
| 183 if (m_dwSize <= m_dwCurPos) { | |
| 184 break; | |
| 185 } | |
| 186 m_dwCurPos++; | |
| 187 } | |
| 188 if (m_dwCurPos < m_dwSize) { | |
| 189 m_dwCurPos++; | |
| 190 } | |
| 191 return CFX_ByteStringC(pStart, | |
| 192 (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); | |
| 193 } | |
| 194 return CFX_ByteStringC(pStart, dwSize); | |
| 195 } | |
| 196 | |
| 197 bool CPDF_SimpleParser::FindTagParamFromStart(const CFX_ByteStringC& token, | |
| 198 int nParams) { | |
| 199 nParams++; | |
| 200 FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams); | |
| 201 int buf_index = 0; | |
| 202 int buf_count = 0; | |
| 203 m_dwCurPos = 0; | |
| 204 while (1) { | |
| 205 pBuf[buf_index++] = m_dwCurPos; | |
| 206 if (buf_index == nParams) { | |
| 207 buf_index = 0; | |
| 208 } | |
| 209 buf_count++; | |
| 210 if (buf_count > nParams) { | |
| 211 buf_count = nParams; | |
| 212 } | |
| 213 CFX_ByteStringC word = GetWord(); | |
| 214 if (word.IsEmpty()) { | |
| 215 FX_Free(pBuf); | |
| 216 return false; | |
| 217 } | |
| 218 if (word == token) { | |
| 219 if (buf_count < nParams) { | |
| 220 continue; | |
| 221 } | |
| 222 m_dwCurPos = pBuf[buf_index]; | |
| 223 FX_Free(pBuf); | |
| 224 return true; | |
| 225 } | |
| 226 } | |
| 227 return false; | |
| 228 } | 86 } |
| 229 | 87 |
| 230 CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) { | 88 CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) { |
| 231 int size = bstr.GetLength(); | 89 int size = bstr.GetLength(); |
| 232 const FX_CHAR* pSrc = bstr.GetCStr(); | 90 const FX_CHAR* pSrc = bstr.GetCStr(); |
| 233 if (!FXSYS_memchr(pSrc, '#', size)) { | 91 if (!FXSYS_memchr(pSrc, '#', size)) { |
| 234 return bstr; | 92 return bstr; |
| 235 } | 93 } |
| 236 CFX_ByteString result; | 94 CFX_ByteString result; |
| 237 FX_CHAR* pDestStart = result.GetBuffer(size); | 95 FX_CHAR* pDestStart = result.GetBuffer(size); |
| (...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 396 if (pFound) { | 254 if (pFound) { |
| 397 return pFound; | 255 return pFound; |
| 398 } | 256 } |
| 399 } | 257 } |
| 400 return NULL; | 258 return NULL; |
| 401 } | 259 } |
| 402 | 260 |
| 403 CPDF_Object* CPDF_NumberTree::LookupValue(int num) { | 261 CPDF_Object* CPDF_NumberTree::LookupValue(int num) { |
| 404 return SearchNumberNode(m_pRoot, num); | 262 return SearchNumberNode(m_pRoot, num); |
| 405 } | 263 } |
| OLD | NEW |