OLD | NEW |
1 // Copyright 2014 PDFium Authors. All rights reserved. | 1 // Copyright 2014 PDFium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
6 | 6 |
| 7 #include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h" |
| 8 |
7 #include "core/include/fpdfapi/fpdf_parser.h" | 9 #include "core/include/fpdfapi/fpdf_parser.h" |
8 | |
9 #include "core/include/fxcrt/fx_ext.h" | 10 #include "core/include/fxcrt/fx_ext.h" |
10 | 11 |
11 // Indexed by 8-bit character code, contains either: | 12 // Indexed by 8-bit character code, contains either: |
12 // 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff | 13 // 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff |
13 // 'N' - for numeric: 0123456789+-. | 14 // 'N' - for numeric: 0123456789+-. |
14 // 'D' - for delimiter: %()/<>[]{} | 15 // 'D' - for delimiter: %()/<>[]{} |
15 // 'R' - otherwise. | 16 // 'R' - otherwise. |
16 const char PDF_CharType[256] = { | 17 const char PDF_CharType[256] = { |
17 // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO | 18 // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO |
18 // SI | 19 // SI |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
53 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 54 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
54 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 55 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
55 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 56 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
56 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 57 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
57 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 58 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
58 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 59 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
59 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 60 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
60 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', | 61 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', |
61 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'}; | 62 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'}; |
62 | 63 |
63 CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize) { | 64 int32_t GetHeaderOffset(IFX_FileRead* pFile) { |
64 m_pData = pData; | 65 // TODO(dsinclair): This is a complicated way of saying %PDF, simplify? |
65 m_dwSize = dwSize; | 66 const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025); |
66 m_dwCurPos = 0; | 67 |
| 68 const size_t kBufSize = 4; |
| 69 uint8_t buf[kBufSize]; |
| 70 int32_t offset = 0; |
| 71 while (offset <= 1024) { |
| 72 if (!pFile->ReadBlock(buf, offset, kBufSize)) |
| 73 return -1; |
| 74 |
| 75 if (*(FX_DWORD*)buf == tag) |
| 76 return offset; |
| 77 |
| 78 ++offset; |
| 79 } |
| 80 return -1; |
67 } | 81 } |
68 | 82 |
69 CPDF_SimpleParser::CPDF_SimpleParser(const CFX_ByteStringC& str) { | 83 int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) { |
70 m_pData = str.GetPtr(); | 84 CPDF_Number* pObj = ToNumber(pDict->GetElement(key)); |
71 m_dwSize = str.GetLength(); | 85 return pObj ? pObj->GetInteger() : 0; |
72 m_dwCurPos = 0; | |
73 } | |
74 | |
75 void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) { | |
76 pStart = NULL; | |
77 dwSize = 0; | |
78 uint8_t ch; | |
79 while (1) { | |
80 if (m_dwSize <= m_dwCurPos) | |
81 return; | |
82 ch = m_pData[m_dwCurPos++]; | |
83 while (PDFCharIsWhitespace(ch)) { | |
84 if (m_dwSize <= m_dwCurPos) | |
85 return; | |
86 ch = m_pData[m_dwCurPos++]; | |
87 } | |
88 | |
89 if (ch != '%') | |
90 break; | |
91 | |
92 while (1) { | |
93 if (m_dwSize <= m_dwCurPos) | |
94 return; | |
95 ch = m_pData[m_dwCurPos++]; | |
96 if (ch == '\r' || ch == '\n') | |
97 break; | |
98 } | |
99 } | |
100 | |
101 FX_DWORD start_pos = m_dwCurPos - 1; | |
102 pStart = m_pData + start_pos; | |
103 if (PDFCharIsDelimiter(ch)) { | |
104 if (ch == '/') { | |
105 while (1) { | |
106 if (m_dwSize <= m_dwCurPos) | |
107 return; | |
108 ch = m_pData[m_dwCurPos++]; | |
109 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { | |
110 m_dwCurPos--; | |
111 dwSize = m_dwCurPos - start_pos; | |
112 return; | |
113 } | |
114 } | |
115 } else { | |
116 dwSize = 1; | |
117 if (ch == '<') { | |
118 if (m_dwSize <= m_dwCurPos) | |
119 return; | |
120 ch = m_pData[m_dwCurPos++]; | |
121 if (ch == '<') | |
122 dwSize = 2; | |
123 else | |
124 m_dwCurPos--; | |
125 } else if (ch == '>') { | |
126 if (m_dwSize <= m_dwCurPos) | |
127 return; | |
128 ch = m_pData[m_dwCurPos++]; | |
129 if (ch == '>') | |
130 dwSize = 2; | |
131 else | |
132 m_dwCurPos--; | |
133 } | |
134 } | |
135 return; | |
136 } | |
137 | |
138 dwSize = 1; | |
139 while (1) { | |
140 if (m_dwSize <= m_dwCurPos) | |
141 return; | |
142 ch = m_pData[m_dwCurPos++]; | |
143 | |
144 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { | |
145 m_dwCurPos--; | |
146 break; | |
147 } | |
148 dwSize++; | |
149 } | |
150 } | |
151 | |
152 CFX_ByteStringC CPDF_SimpleParser::GetWord() { | |
153 const uint8_t* pStart; | |
154 FX_DWORD dwSize; | |
155 ParseWord(pStart, dwSize); | |
156 if (dwSize == 1 && pStart[0] == '<') { | |
157 while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') { | |
158 m_dwCurPos++; | |
159 } | |
160 if (m_dwCurPos < m_dwSize) { | |
161 m_dwCurPos++; | |
162 } | |
163 return CFX_ByteStringC(pStart, | |
164 (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); | |
165 } | |
166 if (dwSize == 1 && pStart[0] == '(') { | |
167 int level = 1; | |
168 while (m_dwCurPos < m_dwSize) { | |
169 if (m_pData[m_dwCurPos] == ')') { | |
170 level--; | |
171 if (level == 0) { | |
172 break; | |
173 } | |
174 } | |
175 if (m_pData[m_dwCurPos] == '\\') { | |
176 if (m_dwSize <= m_dwCurPos) { | |
177 break; | |
178 } | |
179 m_dwCurPos++; | |
180 } else if (m_pData[m_dwCurPos] == '(') { | |
181 level++; | |
182 } | |
183 if (m_dwSize <= m_dwCurPos) { | |
184 break; | |
185 } | |
186 m_dwCurPos++; | |
187 } | |
188 if (m_dwCurPos < m_dwSize) { | |
189 m_dwCurPos++; | |
190 } | |
191 return CFX_ByteStringC(pStart, | |
192 (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); | |
193 } | |
194 return CFX_ByteStringC(pStart, dwSize); | |
195 } | |
196 | |
197 bool CPDF_SimpleParser::FindTagParamFromStart(const CFX_ByteStringC& token, | |
198 int nParams) { | |
199 nParams++; | |
200 FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams); | |
201 int buf_index = 0; | |
202 int buf_count = 0; | |
203 m_dwCurPos = 0; | |
204 while (1) { | |
205 pBuf[buf_index++] = m_dwCurPos; | |
206 if (buf_index == nParams) { | |
207 buf_index = 0; | |
208 } | |
209 buf_count++; | |
210 if (buf_count > nParams) { | |
211 buf_count = nParams; | |
212 } | |
213 CFX_ByteStringC word = GetWord(); | |
214 if (word.IsEmpty()) { | |
215 FX_Free(pBuf); | |
216 return false; | |
217 } | |
218 if (word == token) { | |
219 if (buf_count < nParams) { | |
220 continue; | |
221 } | |
222 m_dwCurPos = pBuf[buf_index]; | |
223 FX_Free(pBuf); | |
224 return true; | |
225 } | |
226 } | |
227 return false; | |
228 } | 86 } |
229 | 87 |
230 CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) { | 88 CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) { |
231 int size = bstr.GetLength(); | 89 int size = bstr.GetLength(); |
232 const FX_CHAR* pSrc = bstr.GetCStr(); | 90 const FX_CHAR* pSrc = bstr.GetCStr(); |
233 if (!FXSYS_memchr(pSrc, '#', size)) { | 91 if (!FXSYS_memchr(pSrc, '#', size)) { |
234 return bstr; | 92 return bstr; |
235 } | 93 } |
236 CFX_ByteString result; | 94 CFX_ByteString result; |
237 FX_CHAR* pDestStart = result.GetBuffer(size); | 95 FX_CHAR* pDestStart = result.GetBuffer(size); |
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
396 if (pFound) { | 254 if (pFound) { |
397 return pFound; | 255 return pFound; |
398 } | 256 } |
399 } | 257 } |
400 return NULL; | 258 return NULL; |
401 } | 259 } |
402 | 260 |
403 CPDF_Object* CPDF_NumberTree::LookupValue(int num) { | 261 CPDF_Object* CPDF_NumberTree::LookupValue(int num) { |
404 return SearchNumberNode(m_pRoot, num); | 262 return SearchNumberNode(m_pRoot, num); |
405 } | 263 } |
OLD | NEW |