OLD | NEW |
| (Empty) |
1 // Copyright 2014 PDFium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com | |
6 | |
7 #include "xfa/src/fxfa/fm2js/xfa_lexer.h" | |
8 | |
9 #include "core/include/fxcrt/fx_ext.h" | |
10 | |
11 namespace { | |
12 | |
13 struct XFA_FMDChar { | |
14 static const FX_WCHAR* inc(const FX_WCHAR*& p) { | |
15 ++p; | |
16 return p; | |
17 } | |
18 static const FX_WCHAR* dec(const FX_WCHAR*& p) { | |
19 --p; | |
20 return p; | |
21 } | |
22 static uint16_t get(const FX_WCHAR* p) { return *p; } | |
23 static FX_BOOL isWhiteSpace(const FX_WCHAR* p) { | |
24 return (*p) == 0x09 || (*p) == 0x0b || (*p) == 0x0c || (*p) == 0x20; | |
25 } | |
26 static FX_BOOL isLineTerminator(const FX_WCHAR* p) { | |
27 return *p == 0x0A || *p == 0x0D; | |
28 } | |
29 static FX_BOOL isBinary(const FX_WCHAR* p) { | |
30 return (*p) >= '0' && (*p) <= '1'; | |
31 } | |
32 static FX_BOOL isOctal(const FX_WCHAR* p) { | |
33 return (*p) >= '0' && (*p) <= '7'; | |
34 } | |
35 static FX_BOOL isDigital(const FX_WCHAR* p) { | |
36 return (*p) >= '0' && (*p) <= '9'; | |
37 } | |
38 static FX_BOOL isHex(const FX_WCHAR* p) { | |
39 return isDigital(p) || ((*p) >= 'a' && (*p) <= 'f') || | |
40 ((*p) >= 'A' && (*p) <= 'F'); | |
41 } | |
42 static FX_BOOL isAlpha(const FX_WCHAR* p) { | |
43 return ((*p) <= 'z' && (*p) >= 'a') || ((*p) <= 'Z' && (*p) >= 'A'); | |
44 } | |
45 static FX_BOOL isAvalid(const FX_WCHAR* p, FX_BOOL flag = 0); | |
46 static FX_BOOL string2number(const FX_WCHAR* s, | |
47 FX_DOUBLE* pValue, | |
48 const FX_WCHAR*& pEnd); | |
49 static FX_BOOL isUnicodeAlpha(uint16_t ch); | |
50 }; | |
51 | |
52 inline FX_BOOL XFA_FMDChar::isAvalid(const FX_WCHAR* p, FX_BOOL flag) { | |
53 if (*p == 0) { | |
54 return 1; | |
55 } | |
56 if ((*p <= 0x0A && *p >= 0x09) || *p == 0x0D || | |
57 (*p <= 0xd7ff && *p >= 0x20) || (*p <= 0xfffd && *p >= 0xe000)) { | |
58 return 1; | |
59 } | |
60 if (!flag) { | |
61 if (*p == 0x0B || *p == 0x0C) { | |
62 return 1; | |
63 } | |
64 } | |
65 return 0; | |
66 } | |
67 | |
68 inline FX_BOOL XFA_FMDChar::string2number(const FX_WCHAR* s, | |
69 FX_DOUBLE* pValue, | |
70 const FX_WCHAR*& pEnd) { | |
71 if (s) { | |
72 *pValue = wcstod((wchar_t*)s, (wchar_t**)&pEnd); | |
73 } | |
74 return 0; | |
75 } | |
76 | |
77 inline FX_BOOL XFA_FMDChar::isUnicodeAlpha(uint16_t ch) { | |
78 if (ch == 0 || ch == 0x0A || ch == 0x0D || ch == 0x09 || ch == 0x0B || | |
79 ch == 0x0C || ch == 0x20 || ch == '.' || ch == ';' || ch == '"' || | |
80 ch == '=' || ch == '<' || ch == '>' || ch == ',' || ch == '(' || | |
81 ch == ')' || ch == ']' || ch == '[' || ch == '&' || ch == '|' || | |
82 ch == '+' || ch == '-' || ch == '*' || ch == '/') { | |
83 return FALSE; | |
84 } | |
85 return TRUE; | |
86 } | |
87 | |
88 const XFA_FMKeyword keyWords[] = { | |
89 {TOKand, 0x00000026, L"&"}, | |
90 {TOKlparen, 0x00000028, L"("}, | |
91 {TOKrparen, 0x00000029, L")"}, | |
92 {TOKmul, 0x0000002a, L"*"}, | |
93 {TOKplus, 0x0000002b, L"+"}, | |
94 {TOKcomma, 0x0000002c, L","}, | |
95 {TOKminus, 0x0000002d, L"-"}, | |
96 {TOKdot, 0x0000002e, L"."}, | |
97 {TOKdiv, 0x0000002f, L"/"}, | |
98 {TOKlt, 0x0000003c, L"<"}, | |
99 {TOKassign, 0x0000003d, L"="}, | |
100 {TOKgt, 0x0000003e, L">"}, | |
101 {TOKlbracket, 0x0000005b, L"["}, | |
102 {TOKrbracket, 0x0000005d, L"]"}, | |
103 {TOKor, 0x0000007c, L"|"}, | |
104 {TOKdotscream, 0x0000ec11, L".#"}, | |
105 {TOKdotstar, 0x0000ec18, L".*"}, | |
106 {TOKdotdot, 0x0000ec1c, L".."}, | |
107 {TOKle, 0x000133f9, L"<="}, | |
108 {TOKne, 0x000133fa, L"<>"}, | |
109 {TOKeq, 0x0001391a, L"=="}, | |
110 {TOKge, 0x00013e3b, L">="}, | |
111 {TOKdo, 0x00020153, L"do"}, | |
112 {TOKkseq, 0x00020676, L"eq"}, | |
113 {TOKksge, 0x000210ac, L"ge"}, | |
114 {TOKksgt, 0x000210bb, L"gt"}, | |
115 {TOKif, 0x00021aef, L"if"}, | |
116 {TOKin, 0x00021af7, L"in"}, | |
117 {TOKksle, 0x00022a51, L"le"}, | |
118 {TOKkslt, 0x00022a60, L"lt"}, | |
119 {TOKksne, 0x00023493, L"ne"}, | |
120 {TOKksor, 0x000239c1, L"or"}, | |
121 {TOKnull, 0x052931bb, L"null"}, | |
122 {TOKbreak, 0x05518c25, L"break"}, | |
123 {TOKksand, 0x09f9db33, L"and"}, | |
124 {TOKend, 0x0a631437, L"end"}, | |
125 {TOKeof, 0x0a63195a, L"eof"}, | |
126 {TOKfor, 0x0a7d67a7, L"for"}, | |
127 {TOKnan, 0x0b4f91dd, L"nan"}, | |
128 {TOKksnot, 0x0b4fd9b1, L"not"}, | |
129 {TOKvar, 0x0c2203e9, L"var"}, | |
130 {TOKthen, 0x2d5738cf, L"then"}, | |
131 {TOKelse, 0x45f65ee9, L"else"}, | |
132 {TOKexit, 0x4731d6ba, L"exit"}, | |
133 {TOKdownto, 0x4caadc3b, L"downto"}, | |
134 {TOKreturn, 0x4db8bd60, L"return"}, | |
135 {TOKinfinity, 0x5c0a010a, L"infinity"}, | |
136 {TOKendwhile, 0x5c64bff0, L"endwhile"}, | |
137 {TOKforeach, 0x67e31f38, L"foreach"}, | |
138 {TOKendfunc, 0x68f984a3, L"endfunc"}, | |
139 {TOKelseif, 0x78253218, L"elseif"}, | |
140 {TOKwhile, 0x84229259, L"while"}, | |
141 {TOKendfor, 0x8ab49d7e, L"endfor"}, | |
142 {TOKthrow, 0x8db05c94, L"throw"}, | |
143 {TOKstep, 0xa7a7887c, L"step"}, | |
144 {TOKupto, 0xb5155328, L"upto"}, | |
145 {TOKcontinue, 0xc0340685, L"continue"}, | |
146 {TOKfunc, 0xcdce60ec, L"func"}, | |
147 {TOKendif, 0xe0e8fee6, L"endif"}, | |
148 }; | |
149 | |
150 const XFA_FM_TOKEN KEYWORD_START = TOKdo; | |
151 const XFA_FM_TOKEN KEYWORD_END = TOKendif; | |
152 | |
153 } // namespace | |
154 | |
155 const FX_WCHAR* XFA_FM_KeywordToString(XFA_FM_TOKEN op) { | |
156 if (op < KEYWORD_START || op > KEYWORD_END) | |
157 return L""; | |
158 return keyWords[op].m_keyword; | |
159 } | |
160 | |
161 CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_uLinenum(1) {} | |
162 | |
163 CXFA_FMToken::CXFA_FMToken(FX_DWORD uLineNum) | |
164 : m_type(TOKreserver), m_uLinenum(uLineNum) {} | |
165 | |
166 CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc, | |
167 CXFA_FMErrorInfo* pErrorInfo) | |
168 : m_ptr(wsFormCalc.GetPtr()), m_uCurrentLine(1), m_pErrorInfo(pErrorInfo) {} | |
169 | |
170 CXFA_FMToken* CXFA_FMLexer::NextToken() { | |
171 m_pToken.reset(Scan()); | |
172 return m_pToken.get(); | |
173 } | |
174 | |
175 CXFA_FMToken* CXFA_FMLexer::Scan() { | |
176 uint16_t ch = 0; | |
177 CXFA_FMToken* p = new CXFA_FMToken(m_uCurrentLine); | |
178 if (!XFA_FMDChar::isAvalid(m_ptr)) { | |
179 ch = XFA_FMDChar::get(m_ptr); | |
180 Error(FMERR_UNSUPPORTED_CHAR, ch); | |
181 return p; | |
182 } | |
183 int iRet = 0; | |
184 while (1) { | |
185 if (!XFA_FMDChar::isAvalid(m_ptr)) { | |
186 ch = XFA_FMDChar::get(m_ptr); | |
187 Error(FMERR_UNSUPPORTED_CHAR, ch); | |
188 return p; | |
189 } | |
190 ch = XFA_FMDChar::get(m_ptr); | |
191 switch (ch) { | |
192 case 0: | |
193 p->m_type = TOKeof; | |
194 return p; | |
195 case 0x0A: | |
196 ++m_uCurrentLine; | |
197 p->m_uLinenum = m_uCurrentLine; | |
198 XFA_FMDChar::inc(m_ptr); | |
199 break; | |
200 case 0x0D: | |
201 XFA_FMDChar::inc(m_ptr); | |
202 break; | |
203 case ';': { | |
204 const FX_WCHAR* pTemp = 0; | |
205 Comment(m_ptr, pTemp); | |
206 m_ptr = pTemp; | |
207 } break; | |
208 case '"': { | |
209 const FX_WCHAR* pTemp = 0; | |
210 p->m_type = TOKstring; | |
211 iRet = String(p, m_ptr, pTemp); | |
212 m_ptr = pTemp; | |
213 } | |
214 return p; | |
215 case '0': | |
216 case '1': | |
217 case '2': | |
218 case '3': | |
219 case '4': | |
220 case '5': | |
221 case '6': | |
222 case '7': | |
223 case '8': | |
224 case '9': { | |
225 p->m_type = TOKnumber; | |
226 const FX_WCHAR* pTemp = 0; | |
227 iRet = Number(p, m_ptr, pTemp); | |
228 m_ptr = pTemp; | |
229 if (iRet) { | |
230 Error(FMERR_BAD_SUFFIX_NUMBER); | |
231 return p; | |
232 } | |
233 } | |
234 return p; | |
235 case '=': | |
236 XFA_FMDChar::inc(m_ptr); | |
237 if (XFA_FMDChar::isAvalid(m_ptr)) { | |
238 ch = XFA_FMDChar::get(m_ptr); | |
239 if (ch == '=') { | |
240 p->m_type = TOKeq; | |
241 XFA_FMDChar::inc(m_ptr); | |
242 return p; | |
243 } else { | |
244 p->m_type = TOKassign; | |
245 return p; | |
246 } | |
247 } else { | |
248 ch = XFA_FMDChar::get(m_ptr); | |
249 Error(FMERR_UNSUPPORTED_CHAR, ch); | |
250 return p; | |
251 } | |
252 break; | |
253 case '<': | |
254 XFA_FMDChar::inc(m_ptr); | |
255 if (XFA_FMDChar::isAvalid(m_ptr)) { | |
256 ch = XFA_FMDChar::get(m_ptr); | |
257 if (ch == '=') { | |
258 p->m_type = TOKle; | |
259 XFA_FMDChar::inc(m_ptr); | |
260 return p; | |
261 } else if (ch == '>') { | |
262 p->m_type = TOKne; | |
263 XFA_FMDChar::inc(m_ptr); | |
264 return p; | |
265 } else { | |
266 p->m_type = TOKlt; | |
267 return p; | |
268 } | |
269 } else { | |
270 ch = XFA_FMDChar::get(m_ptr); | |
271 Error(FMERR_UNSUPPORTED_CHAR, ch); | |
272 return p; | |
273 } | |
274 break; | |
275 case '>': | |
276 XFA_FMDChar::inc(m_ptr); | |
277 if (XFA_FMDChar::isAvalid(m_ptr)) { | |
278 ch = XFA_FMDChar::get(m_ptr); | |
279 if (ch == '=') { | |
280 p->m_type = TOKge; | |
281 XFA_FMDChar::inc(m_ptr); | |
282 return p; | |
283 } else { | |
284 p->m_type = TOKgt; | |
285 return p; | |
286 } | |
287 } else { | |
288 ch = XFA_FMDChar::get(m_ptr); | |
289 Error(FMERR_UNSUPPORTED_CHAR, ch); | |
290 return p; | |
291 } | |
292 break; | |
293 case ',': | |
294 p->m_type = TOKcomma; | |
295 XFA_FMDChar::inc(m_ptr); | |
296 return p; | |
297 case '(': | |
298 p->m_type = TOKlparen; | |
299 XFA_FMDChar::inc(m_ptr); | |
300 return p; | |
301 case ')': | |
302 p->m_type = TOKrparen; | |
303 XFA_FMDChar::inc(m_ptr); | |
304 return p; | |
305 case '[': | |
306 p->m_type = TOKlbracket; | |
307 XFA_FMDChar::inc(m_ptr); | |
308 return p; | |
309 case ']': | |
310 p->m_type = TOKrbracket; | |
311 XFA_FMDChar::inc(m_ptr); | |
312 return p; | |
313 case '&': | |
314 XFA_FMDChar::inc(m_ptr); | |
315 p->m_type = TOKand; | |
316 return p; | |
317 case '|': | |
318 XFA_FMDChar::inc(m_ptr); | |
319 p->m_type = TOKor; | |
320 return p; | |
321 case '+': | |
322 XFA_FMDChar::inc(m_ptr); | |
323 p->m_type = TOKplus; | |
324 return p; | |
325 case '-': | |
326 XFA_FMDChar::inc(m_ptr); | |
327 p->m_type = TOKminus; | |
328 return p; | |
329 case '*': | |
330 XFA_FMDChar::inc(m_ptr); | |
331 p->m_type = TOKmul; | |
332 return p; | |
333 case '/': | |
334 XFA_FMDChar::inc(m_ptr); | |
335 if (XFA_FMDChar::isAvalid(m_ptr)) { | |
336 ch = XFA_FMDChar::get(m_ptr); | |
337 if (ch == '/') { | |
338 const FX_WCHAR* pTemp = 0; | |
339 Comment(m_ptr, pTemp); | |
340 m_ptr = pTemp; | |
341 break; | |
342 } else { | |
343 p->m_type = TOKdiv; | |
344 return p; | |
345 } | |
346 } else { | |
347 ch = XFA_FMDChar::get(m_ptr); | |
348 Error(FMERR_UNSUPPORTED_CHAR, ch); | |
349 return p; | |
350 } | |
351 break; | |
352 case '.': | |
353 XFA_FMDChar::inc(m_ptr); | |
354 if (XFA_FMDChar::isAvalid(m_ptr)) { | |
355 ch = XFA_FMDChar::get(m_ptr); | |
356 if (ch == '.') { | |
357 p->m_type = TOKdotdot; | |
358 XFA_FMDChar::inc(m_ptr); | |
359 return p; | |
360 } else if (ch == '*') { | |
361 p->m_type = TOKdotstar; | |
362 XFA_FMDChar::inc(m_ptr); | |
363 return p; | |
364 } else if (ch == '#') { | |
365 p->m_type = TOKdotscream; | |
366 XFA_FMDChar::inc(m_ptr); | |
367 return p; | |
368 } else if (ch <= '9' && ch >= '0') { | |
369 p->m_type = TOKnumber; | |
370 const FX_WCHAR* pTemp = 0; | |
371 XFA_FMDChar::dec(m_ptr); | |
372 iRet = Number(p, m_ptr, pTemp); | |
373 m_ptr = pTemp; | |
374 if (iRet) { | |
375 Error(FMERR_BAD_SUFFIX_NUMBER); | |
376 } | |
377 return p; | |
378 } else { | |
379 p->m_type = TOKdot; | |
380 return p; | |
381 } | |
382 } else { | |
383 ch = XFA_FMDChar::get(m_ptr); | |
384 Error(FMERR_UNSUPPORTED_CHAR, ch); | |
385 return p; | |
386 } | |
387 case 0x09: | |
388 case 0x0B: | |
389 case 0x0C: | |
390 case 0x20: | |
391 XFA_FMDChar::inc(m_ptr); | |
392 break; | |
393 default: { | |
394 const FX_WCHAR* pTemp = 0; | |
395 iRet = Identifiers(p, m_ptr, pTemp); | |
396 m_ptr = pTemp; | |
397 if (iRet) { | |
398 return p; | |
399 } | |
400 p->m_type = IsKeyword(p->m_wstring); | |
401 } | |
402 return p; | |
403 } | |
404 } | |
405 } | |
406 | |
407 FX_DWORD CXFA_FMLexer::Number(CXFA_FMToken* t, | |
408 const FX_WCHAR* p, | |
409 const FX_WCHAR*& pEnd) { | |
410 FX_DOUBLE number = 0; | |
411 if (XFA_FMDChar::string2number(p, &number, pEnd)) { | |
412 return 1; | |
413 } | |
414 if (pEnd && XFA_FMDChar::isAlpha(pEnd)) { | |
415 return 1; | |
416 } | |
417 t->m_wstring = CFX_WideStringC(p, (pEnd - p)); | |
418 return 0; | |
419 } | |
420 | |
421 FX_DWORD CXFA_FMLexer::String(CXFA_FMToken* t, | |
422 const FX_WCHAR* p, | |
423 const FX_WCHAR*& pEnd) { | |
424 const FX_WCHAR* pStart = p; | |
425 uint16_t ch = 0; | |
426 XFA_FMDChar::inc(p); | |
427 ch = XFA_FMDChar::get(p); | |
428 while (ch) { | |
429 if (!XFA_FMDChar::isAvalid(p)) { | |
430 ch = XFA_FMDChar::get(p); | |
431 pEnd = p; | |
432 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); | |
433 Error(FMERR_UNSUPPORTED_CHAR, ch); | |
434 return 1; | |
435 } | |
436 if (ch == '"') { | |
437 XFA_FMDChar::inc(p); | |
438 if (!XFA_FMDChar::isAvalid(p)) { | |
439 ch = XFA_FMDChar::get(p); | |
440 pEnd = p; | |
441 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); | |
442 Error(FMERR_UNSUPPORTED_CHAR, ch); | |
443 return 1; | |
444 } | |
445 ch = XFA_FMDChar::get(p); | |
446 if (ch == '"') { | |
447 goto NEXT; | |
448 } else { | |
449 break; | |
450 } | |
451 } | |
452 NEXT: | |
453 XFA_FMDChar::inc(p); | |
454 ch = XFA_FMDChar::get(p); | |
455 } | |
456 pEnd = p; | |
457 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); | |
458 return 0; | |
459 } | |
460 | |
461 FX_DWORD CXFA_FMLexer::Identifiers(CXFA_FMToken* t, | |
462 const FX_WCHAR* p, | |
463 const FX_WCHAR*& pEnd) { | |
464 const FX_WCHAR* pStart = p; | |
465 uint16_t ch = 0; | |
466 ch = XFA_FMDChar::get(p); | |
467 XFA_FMDChar::inc(p); | |
468 if (!XFA_FMDChar::isAvalid(p)) { | |
469 pEnd = p; | |
470 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); | |
471 Error(FMERR_UNSUPPORTED_CHAR, ch); | |
472 return 1; | |
473 } | |
474 ch = XFA_FMDChar::get(p); | |
475 while (ch) { | |
476 if (!XFA_FMDChar::isAvalid(p)) { | |
477 pEnd = p; | |
478 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); | |
479 Error(FMERR_UNSUPPORTED_CHAR, ch); | |
480 return 1; | |
481 } | |
482 ch = XFA_FMDChar::get(p); | |
483 if (XFA_FMDChar::isUnicodeAlpha(ch)) { | |
484 XFA_FMDChar::inc(p); | |
485 } else { | |
486 pEnd = p; | |
487 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); | |
488 return 0; | |
489 } | |
490 } | |
491 pEnd = p; | |
492 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart)); | |
493 return 0; | |
494 } | |
495 | |
496 void CXFA_FMLexer::Comment(const FX_WCHAR* p, const FX_WCHAR*& pEnd) { | |
497 unsigned ch = 0; | |
498 XFA_FMDChar::inc(p); | |
499 ch = XFA_FMDChar::get(p); | |
500 while (ch) { | |
501 if (ch == 0x0D) { | |
502 XFA_FMDChar::inc(p); | |
503 pEnd = p; | |
504 return; | |
505 } | |
506 if (ch == 0x0A) { | |
507 ++m_uCurrentLine; | |
508 XFA_FMDChar::inc(p); | |
509 pEnd = p; | |
510 return; | |
511 } | |
512 XFA_FMDChar::inc(p); | |
513 ch = XFA_FMDChar::get(p); | |
514 } | |
515 pEnd = p; | |
516 } | |
517 | |
518 XFA_FM_TOKEN CXFA_FMLexer::IsKeyword(const CFX_WideStringC& str) { | |
519 int32_t iLength = str.GetLength(); | |
520 uint32_t uHash = FX_HashCode_String_GetW(str.GetPtr(), iLength, TRUE); | |
521 int32_t iStart = KEYWORD_START, iEnd = KEYWORD_END; | |
522 int32_t iMid = (iStart + iEnd) / 2; | |
523 XFA_FMKeyword keyword; | |
524 do { | |
525 iMid = (iStart + iEnd) / 2; | |
526 keyword = keyWords[iMid]; | |
527 if (uHash == keyword.m_uHash) { | |
528 return keyword.m_type; | |
529 } else if (uHash < keyword.m_uHash) { | |
530 iEnd = iMid - 1; | |
531 } else { | |
532 iStart = iMid + 1; | |
533 } | |
534 } while (iStart <= iEnd); | |
535 return TOKidentifier; | |
536 } | |
537 | |
538 void CXFA_FMLexer::Error(XFA_FM_ERRMSG msg, ...) { | |
539 m_pErrorInfo->linenum = m_uCurrentLine; | |
540 const FX_WCHAR* lpMessageInfo = XFA_FM_ErrorMsg(msg); | |
541 va_list ap; | |
542 va_start(ap, msg); | |
543 m_pErrorInfo->message.FormatV(lpMessageInfo, ap); | |
544 va_end(ap); | |
545 } | |
546 | |
547 FX_BOOL CXFA_FMLexer::HasError() const { | |
548 if (m_pErrorInfo->message.IsEmpty()) { | |
549 return FALSE; | |
550 } | |
551 return TRUE; | |
552 } | |
OLD | NEW |