Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(169)

Side by Side Diff: core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp

Issue 1773103003: Split off CPDF_Parser and CPDF_SimpleParser into .h/.cpp files (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Then address C#3. Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 PDFium Authors. All rights reserved. 1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 6
7 #include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h"
8
7 #include "core/include/fpdfapi/fpdf_parser.h" 9 #include "core/include/fpdfapi/fpdf_parser.h"
8
9 #include "core/include/fxcrt/fx_ext.h" 10 #include "core/include/fxcrt/fx_ext.h"
10 11
11 // Indexed by 8-bit character code, contains either: 12 // Indexed by 8-bit character code, contains either:
12 // 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff 13 // 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff
13 // 'N' - for numeric: 0123456789+-. 14 // 'N' - for numeric: 0123456789+-.
14 // 'D' - for delimiter: %()/<>[]{} 15 // 'D' - for delimiter: %()/<>[]{}
15 // 'R' - otherwise. 16 // 'R' - otherwise.
16 const char PDF_CharType[256] = { 17 const char PDF_CharType[256] = {
17 // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO 18 // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO
18 // SI 19 // SI
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
53 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 54 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
54 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 55 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
55 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 56 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
56 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 57 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
57 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 58 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
58 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 59 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
59 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 60 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
60 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 61 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
61 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'}; 62 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'};
62 63
63 CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize) { 64 int32_t GetHeaderOffset(IFX_FileRead* pFile) {
64 m_pData = pData; 65 // TODO(dsinclair): This is a complicated way of saying %PDF, simplify?
65 m_dwSize = dwSize; 66 const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
66 m_dwCurPos = 0; 67
68 const size_t kBufSize = 4;
69 uint8_t buf[kBufSize];
70 int32_t offset = 0;
71 while (offset <= 1024) {
72 if (!pFile->ReadBlock(buf, offset, kBufSize))
73 return -1;
74
75 if (*(FX_DWORD*)buf == tag)
76 return offset;
77
78 ++offset;
79 }
80 return -1;
67 } 81 }
68 82
69 CPDF_SimpleParser::CPDF_SimpleParser(const CFX_ByteStringC& str) { 83 int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) {
70 m_pData = str.GetPtr(); 84 CPDF_Number* pObj = ToNumber(pDict->GetElement(key));
71 m_dwSize = str.GetLength(); 85 return pObj ? pObj->GetInteger() : 0;
72 m_dwCurPos = 0;
73 }
74
75 void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) {
76 pStart = NULL;
77 dwSize = 0;
78 uint8_t ch;
79 while (1) {
80 if (m_dwSize <= m_dwCurPos)
81 return;
82 ch = m_pData[m_dwCurPos++];
83 while (PDFCharIsWhitespace(ch)) {
84 if (m_dwSize <= m_dwCurPos)
85 return;
86 ch = m_pData[m_dwCurPos++];
87 }
88
89 if (ch != '%')
90 break;
91
92 while (1) {
93 if (m_dwSize <= m_dwCurPos)
94 return;
95 ch = m_pData[m_dwCurPos++];
96 if (ch == '\r' || ch == '\n')
97 break;
98 }
99 }
100
101 FX_DWORD start_pos = m_dwCurPos - 1;
102 pStart = m_pData + start_pos;
103 if (PDFCharIsDelimiter(ch)) {
104 if (ch == '/') {
105 while (1) {
106 if (m_dwSize <= m_dwCurPos)
107 return;
108 ch = m_pData[m_dwCurPos++];
109 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
110 m_dwCurPos--;
111 dwSize = m_dwCurPos - start_pos;
112 return;
113 }
114 }
115 } else {
116 dwSize = 1;
117 if (ch == '<') {
118 if (m_dwSize <= m_dwCurPos)
119 return;
120 ch = m_pData[m_dwCurPos++];
121 if (ch == '<')
122 dwSize = 2;
123 else
124 m_dwCurPos--;
125 } else if (ch == '>') {
126 if (m_dwSize <= m_dwCurPos)
127 return;
128 ch = m_pData[m_dwCurPos++];
129 if (ch == '>')
130 dwSize = 2;
131 else
132 m_dwCurPos--;
133 }
134 }
135 return;
136 }
137
138 dwSize = 1;
139 while (1) {
140 if (m_dwSize <= m_dwCurPos)
141 return;
142 ch = m_pData[m_dwCurPos++];
143
144 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
145 m_dwCurPos--;
146 break;
147 }
148 dwSize++;
149 }
150 }
151
152 CFX_ByteStringC CPDF_SimpleParser::GetWord() {
153 const uint8_t* pStart;
154 FX_DWORD dwSize;
155 ParseWord(pStart, dwSize);
156 if (dwSize == 1 && pStart[0] == '<') {
157 while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') {
158 m_dwCurPos++;
159 }
160 if (m_dwCurPos < m_dwSize) {
161 m_dwCurPos++;
162 }
163 return CFX_ByteStringC(pStart,
164 (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
165 }
166 if (dwSize == 1 && pStart[0] == '(') {
167 int level = 1;
168 while (m_dwCurPos < m_dwSize) {
169 if (m_pData[m_dwCurPos] == ')') {
170 level--;
171 if (level == 0) {
172 break;
173 }
174 }
175 if (m_pData[m_dwCurPos] == '\\') {
176 if (m_dwSize <= m_dwCurPos) {
177 break;
178 }
179 m_dwCurPos++;
180 } else if (m_pData[m_dwCurPos] == '(') {
181 level++;
182 }
183 if (m_dwSize <= m_dwCurPos) {
184 break;
185 }
186 m_dwCurPos++;
187 }
188 if (m_dwCurPos < m_dwSize) {
189 m_dwCurPos++;
190 }
191 return CFX_ByteStringC(pStart,
192 (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
193 }
194 return CFX_ByteStringC(pStart, dwSize);
195 }
196
197 bool CPDF_SimpleParser::FindTagParamFromStart(const CFX_ByteStringC& token,
198 int nParams) {
199 nParams++;
200 FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams);
201 int buf_index = 0;
202 int buf_count = 0;
203 m_dwCurPos = 0;
204 while (1) {
205 pBuf[buf_index++] = m_dwCurPos;
206 if (buf_index == nParams) {
207 buf_index = 0;
208 }
209 buf_count++;
210 if (buf_count > nParams) {
211 buf_count = nParams;
212 }
213 CFX_ByteStringC word = GetWord();
214 if (word.IsEmpty()) {
215 FX_Free(pBuf);
216 return false;
217 }
218 if (word == token) {
219 if (buf_count < nParams) {
220 continue;
221 }
222 m_dwCurPos = pBuf[buf_index];
223 FX_Free(pBuf);
224 return true;
225 }
226 }
227 return false;
228 } 86 }
229 87
230 CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) { 88 CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) {
231 int size = bstr.GetLength(); 89 int size = bstr.GetLength();
232 const FX_CHAR* pSrc = bstr.GetCStr(); 90 const FX_CHAR* pSrc = bstr.GetCStr();
233 if (!FXSYS_memchr(pSrc, '#', size)) { 91 if (!FXSYS_memchr(pSrc, '#', size)) {
234 return bstr; 92 return bstr;
235 } 93 }
236 CFX_ByteString result; 94 CFX_ByteString result;
237 FX_CHAR* pDestStart = result.GetBuffer(size); 95 FX_CHAR* pDestStart = result.GetBuffer(size);
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after
396 if (pFound) { 254 if (pFound) {
397 return pFound; 255 return pFound;
398 } 256 }
399 } 257 }
400 return NULL; 258 return NULL;
401 } 259 }
402 260
403 CPDF_Object* CPDF_NumberTree::LookupValue(int num) { 261 CPDF_Object* CPDF_NumberTree::LookupValue(int num) {
404 return SearchNumberNode(m_pRoot, num); 262 return SearchNumberNode(m_pRoot, num);
405 } 263 }
OLDNEW
« no previous file with comments | « core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h ('k') | core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698