Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(87)

Side by Side Diff: xfa/src/fxfa/fm2js/xfa_lexer.cpp

Issue 1803723002: Move xfa/src up to xfa/. (Closed) Base URL: https://pdfium.googlesource.com/pdfium.git@master
Patch Set: Rebase to master Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « xfa/src/fxfa/fm2js/xfa_lexer.h ('k') | xfa/src/fxfa/fm2js/xfa_program.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "xfa/src/fxfa/fm2js/xfa_lexer.h"
8
9 #include "core/include/fxcrt/fx_ext.h"
10
11 namespace {
12
13 struct XFA_FMDChar {
14 static const FX_WCHAR* inc(const FX_WCHAR*& p) {
15 ++p;
16 return p;
17 }
18 static const FX_WCHAR* dec(const FX_WCHAR*& p) {
19 --p;
20 return p;
21 }
22 static uint16_t get(const FX_WCHAR* p) { return *p; }
23 static FX_BOOL isWhiteSpace(const FX_WCHAR* p) {
24 return (*p) == 0x09 || (*p) == 0x0b || (*p) == 0x0c || (*p) == 0x20;
25 }
26 static FX_BOOL isLineTerminator(const FX_WCHAR* p) {
27 return *p == 0x0A || *p == 0x0D;
28 }
29 static FX_BOOL isBinary(const FX_WCHAR* p) {
30 return (*p) >= '0' && (*p) <= '1';
31 }
32 static FX_BOOL isOctal(const FX_WCHAR* p) {
33 return (*p) >= '0' && (*p) <= '7';
34 }
35 static FX_BOOL isDigital(const FX_WCHAR* p) {
36 return (*p) >= '0' && (*p) <= '9';
37 }
38 static FX_BOOL isHex(const FX_WCHAR* p) {
39 return isDigital(p) || ((*p) >= 'a' && (*p) <= 'f') ||
40 ((*p) >= 'A' && (*p) <= 'F');
41 }
42 static FX_BOOL isAlpha(const FX_WCHAR* p) {
43 return ((*p) <= 'z' && (*p) >= 'a') || ((*p) <= 'Z' && (*p) >= 'A');
44 }
45 static FX_BOOL isAvalid(const FX_WCHAR* p, FX_BOOL flag = 0);
46 static FX_BOOL string2number(const FX_WCHAR* s,
47 FX_DOUBLE* pValue,
48 const FX_WCHAR*& pEnd);
49 static FX_BOOL isUnicodeAlpha(uint16_t ch);
50 };
51
52 inline FX_BOOL XFA_FMDChar::isAvalid(const FX_WCHAR* p, FX_BOOL flag) {
53 if (*p == 0) {
54 return 1;
55 }
56 if ((*p <= 0x0A && *p >= 0x09) || *p == 0x0D ||
57 (*p <= 0xd7ff && *p >= 0x20) || (*p <= 0xfffd && *p >= 0xe000)) {
58 return 1;
59 }
60 if (!flag) {
61 if (*p == 0x0B || *p == 0x0C) {
62 return 1;
63 }
64 }
65 return 0;
66 }
67
68 inline FX_BOOL XFA_FMDChar::string2number(const FX_WCHAR* s,
69 FX_DOUBLE* pValue,
70 const FX_WCHAR*& pEnd) {
71 if (s) {
72 *pValue = wcstod((wchar_t*)s, (wchar_t**)&pEnd);
73 }
74 return 0;
75 }
76
77 inline FX_BOOL XFA_FMDChar::isUnicodeAlpha(uint16_t ch) {
78 if (ch == 0 || ch == 0x0A || ch == 0x0D || ch == 0x09 || ch == 0x0B ||
79 ch == 0x0C || ch == 0x20 || ch == '.' || ch == ';' || ch == '"' ||
80 ch == '=' || ch == '<' || ch == '>' || ch == ',' || ch == '(' ||
81 ch == ')' || ch == ']' || ch == '[' || ch == '&' || ch == '|' ||
82 ch == '+' || ch == '-' || ch == '*' || ch == '/') {
83 return FALSE;
84 }
85 return TRUE;
86 }
87
88 const XFA_FMKeyword keyWords[] = {
89 {TOKand, 0x00000026, L"&"},
90 {TOKlparen, 0x00000028, L"("},
91 {TOKrparen, 0x00000029, L")"},
92 {TOKmul, 0x0000002a, L"*"},
93 {TOKplus, 0x0000002b, L"+"},
94 {TOKcomma, 0x0000002c, L","},
95 {TOKminus, 0x0000002d, L"-"},
96 {TOKdot, 0x0000002e, L"."},
97 {TOKdiv, 0x0000002f, L"/"},
98 {TOKlt, 0x0000003c, L"<"},
99 {TOKassign, 0x0000003d, L"="},
100 {TOKgt, 0x0000003e, L">"},
101 {TOKlbracket, 0x0000005b, L"["},
102 {TOKrbracket, 0x0000005d, L"]"},
103 {TOKor, 0x0000007c, L"|"},
104 {TOKdotscream, 0x0000ec11, L".#"},
105 {TOKdotstar, 0x0000ec18, L".*"},
106 {TOKdotdot, 0x0000ec1c, L".."},
107 {TOKle, 0x000133f9, L"<="},
108 {TOKne, 0x000133fa, L"<>"},
109 {TOKeq, 0x0001391a, L"=="},
110 {TOKge, 0x00013e3b, L">="},
111 {TOKdo, 0x00020153, L"do"},
112 {TOKkseq, 0x00020676, L"eq"},
113 {TOKksge, 0x000210ac, L"ge"},
114 {TOKksgt, 0x000210bb, L"gt"},
115 {TOKif, 0x00021aef, L"if"},
116 {TOKin, 0x00021af7, L"in"},
117 {TOKksle, 0x00022a51, L"le"},
118 {TOKkslt, 0x00022a60, L"lt"},
119 {TOKksne, 0x00023493, L"ne"},
120 {TOKksor, 0x000239c1, L"or"},
121 {TOKnull, 0x052931bb, L"null"},
122 {TOKbreak, 0x05518c25, L"break"},
123 {TOKksand, 0x09f9db33, L"and"},
124 {TOKend, 0x0a631437, L"end"},
125 {TOKeof, 0x0a63195a, L"eof"},
126 {TOKfor, 0x0a7d67a7, L"for"},
127 {TOKnan, 0x0b4f91dd, L"nan"},
128 {TOKksnot, 0x0b4fd9b1, L"not"},
129 {TOKvar, 0x0c2203e9, L"var"},
130 {TOKthen, 0x2d5738cf, L"then"},
131 {TOKelse, 0x45f65ee9, L"else"},
132 {TOKexit, 0x4731d6ba, L"exit"},
133 {TOKdownto, 0x4caadc3b, L"downto"},
134 {TOKreturn, 0x4db8bd60, L"return"},
135 {TOKinfinity, 0x5c0a010a, L"infinity"},
136 {TOKendwhile, 0x5c64bff0, L"endwhile"},
137 {TOKforeach, 0x67e31f38, L"foreach"},
138 {TOKendfunc, 0x68f984a3, L"endfunc"},
139 {TOKelseif, 0x78253218, L"elseif"},
140 {TOKwhile, 0x84229259, L"while"},
141 {TOKendfor, 0x8ab49d7e, L"endfor"},
142 {TOKthrow, 0x8db05c94, L"throw"},
143 {TOKstep, 0xa7a7887c, L"step"},
144 {TOKupto, 0xb5155328, L"upto"},
145 {TOKcontinue, 0xc0340685, L"continue"},
146 {TOKfunc, 0xcdce60ec, L"func"},
147 {TOKendif, 0xe0e8fee6, L"endif"},
148 };
149
150 const XFA_FM_TOKEN KEYWORD_START = TOKdo;
151 const XFA_FM_TOKEN KEYWORD_END = TOKendif;
152
153 } // namespace
154
155 const FX_WCHAR* XFA_FM_KeywordToString(XFA_FM_TOKEN op) {
156 if (op < KEYWORD_START || op > KEYWORD_END)
157 return L"";
158 return keyWords[op].m_keyword;
159 }
160
161 CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_uLinenum(1) {}
162
163 CXFA_FMToken::CXFA_FMToken(FX_DWORD uLineNum)
164 : m_type(TOKreserver), m_uLinenum(uLineNum) {}
165
166 CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc,
167 CXFA_FMErrorInfo* pErrorInfo)
168 : m_ptr(wsFormCalc.GetPtr()), m_uCurrentLine(1), m_pErrorInfo(pErrorInfo) {}
169
170 CXFA_FMToken* CXFA_FMLexer::NextToken() {
171 m_pToken.reset(Scan());
172 return m_pToken.get();
173 }
174
175 CXFA_FMToken* CXFA_FMLexer::Scan() {
176 uint16_t ch = 0;
177 CXFA_FMToken* p = new CXFA_FMToken(m_uCurrentLine);
178 if (!XFA_FMDChar::isAvalid(m_ptr)) {
179 ch = XFA_FMDChar::get(m_ptr);
180 Error(FMERR_UNSUPPORTED_CHAR, ch);
181 return p;
182 }
183 int iRet = 0;
184 while (1) {
185 if (!XFA_FMDChar::isAvalid(m_ptr)) {
186 ch = XFA_FMDChar::get(m_ptr);
187 Error(FMERR_UNSUPPORTED_CHAR, ch);
188 return p;
189 }
190 ch = XFA_FMDChar::get(m_ptr);
191 switch (ch) {
192 case 0:
193 p->m_type = TOKeof;
194 return p;
195 case 0x0A:
196 ++m_uCurrentLine;
197 p->m_uLinenum = m_uCurrentLine;
198 XFA_FMDChar::inc(m_ptr);
199 break;
200 case 0x0D:
201 XFA_FMDChar::inc(m_ptr);
202 break;
203 case ';': {
204 const FX_WCHAR* pTemp = 0;
205 Comment(m_ptr, pTemp);
206 m_ptr = pTemp;
207 } break;
208 case '"': {
209 const FX_WCHAR* pTemp = 0;
210 p->m_type = TOKstring;
211 iRet = String(p, m_ptr, pTemp);
212 m_ptr = pTemp;
213 }
214 return p;
215 case '0':
216 case '1':
217 case '2':
218 case '3':
219 case '4':
220 case '5':
221 case '6':
222 case '7':
223 case '8':
224 case '9': {
225 p->m_type = TOKnumber;
226 const FX_WCHAR* pTemp = 0;
227 iRet = Number(p, m_ptr, pTemp);
228 m_ptr = pTemp;
229 if (iRet) {
230 Error(FMERR_BAD_SUFFIX_NUMBER);
231 return p;
232 }
233 }
234 return p;
235 case '=':
236 XFA_FMDChar::inc(m_ptr);
237 if (XFA_FMDChar::isAvalid(m_ptr)) {
238 ch = XFA_FMDChar::get(m_ptr);
239 if (ch == '=') {
240 p->m_type = TOKeq;
241 XFA_FMDChar::inc(m_ptr);
242 return p;
243 } else {
244 p->m_type = TOKassign;
245 return p;
246 }
247 } else {
248 ch = XFA_FMDChar::get(m_ptr);
249 Error(FMERR_UNSUPPORTED_CHAR, ch);
250 return p;
251 }
252 break;
253 case '<':
254 XFA_FMDChar::inc(m_ptr);
255 if (XFA_FMDChar::isAvalid(m_ptr)) {
256 ch = XFA_FMDChar::get(m_ptr);
257 if (ch == '=') {
258 p->m_type = TOKle;
259 XFA_FMDChar::inc(m_ptr);
260 return p;
261 } else if (ch == '>') {
262 p->m_type = TOKne;
263 XFA_FMDChar::inc(m_ptr);
264 return p;
265 } else {
266 p->m_type = TOKlt;
267 return p;
268 }
269 } else {
270 ch = XFA_FMDChar::get(m_ptr);
271 Error(FMERR_UNSUPPORTED_CHAR, ch);
272 return p;
273 }
274 break;
275 case '>':
276 XFA_FMDChar::inc(m_ptr);
277 if (XFA_FMDChar::isAvalid(m_ptr)) {
278 ch = XFA_FMDChar::get(m_ptr);
279 if (ch == '=') {
280 p->m_type = TOKge;
281 XFA_FMDChar::inc(m_ptr);
282 return p;
283 } else {
284 p->m_type = TOKgt;
285 return p;
286 }
287 } else {
288 ch = XFA_FMDChar::get(m_ptr);
289 Error(FMERR_UNSUPPORTED_CHAR, ch);
290 return p;
291 }
292 break;
293 case ',':
294 p->m_type = TOKcomma;
295 XFA_FMDChar::inc(m_ptr);
296 return p;
297 case '(':
298 p->m_type = TOKlparen;
299 XFA_FMDChar::inc(m_ptr);
300 return p;
301 case ')':
302 p->m_type = TOKrparen;
303 XFA_FMDChar::inc(m_ptr);
304 return p;
305 case '[':
306 p->m_type = TOKlbracket;
307 XFA_FMDChar::inc(m_ptr);
308 return p;
309 case ']':
310 p->m_type = TOKrbracket;
311 XFA_FMDChar::inc(m_ptr);
312 return p;
313 case '&':
314 XFA_FMDChar::inc(m_ptr);
315 p->m_type = TOKand;
316 return p;
317 case '|':
318 XFA_FMDChar::inc(m_ptr);
319 p->m_type = TOKor;
320 return p;
321 case '+':
322 XFA_FMDChar::inc(m_ptr);
323 p->m_type = TOKplus;
324 return p;
325 case '-':
326 XFA_FMDChar::inc(m_ptr);
327 p->m_type = TOKminus;
328 return p;
329 case '*':
330 XFA_FMDChar::inc(m_ptr);
331 p->m_type = TOKmul;
332 return p;
333 case '/':
334 XFA_FMDChar::inc(m_ptr);
335 if (XFA_FMDChar::isAvalid(m_ptr)) {
336 ch = XFA_FMDChar::get(m_ptr);
337 if (ch == '/') {
338 const FX_WCHAR* pTemp = 0;
339 Comment(m_ptr, pTemp);
340 m_ptr = pTemp;
341 break;
342 } else {
343 p->m_type = TOKdiv;
344 return p;
345 }
346 } else {
347 ch = XFA_FMDChar::get(m_ptr);
348 Error(FMERR_UNSUPPORTED_CHAR, ch);
349 return p;
350 }
351 break;
352 case '.':
353 XFA_FMDChar::inc(m_ptr);
354 if (XFA_FMDChar::isAvalid(m_ptr)) {
355 ch = XFA_FMDChar::get(m_ptr);
356 if (ch == '.') {
357 p->m_type = TOKdotdot;
358 XFA_FMDChar::inc(m_ptr);
359 return p;
360 } else if (ch == '*') {
361 p->m_type = TOKdotstar;
362 XFA_FMDChar::inc(m_ptr);
363 return p;
364 } else if (ch == '#') {
365 p->m_type = TOKdotscream;
366 XFA_FMDChar::inc(m_ptr);
367 return p;
368 } else if (ch <= '9' && ch >= '0') {
369 p->m_type = TOKnumber;
370 const FX_WCHAR* pTemp = 0;
371 XFA_FMDChar::dec(m_ptr);
372 iRet = Number(p, m_ptr, pTemp);
373 m_ptr = pTemp;
374 if (iRet) {
375 Error(FMERR_BAD_SUFFIX_NUMBER);
376 }
377 return p;
378 } else {
379 p->m_type = TOKdot;
380 return p;
381 }
382 } else {
383 ch = XFA_FMDChar::get(m_ptr);
384 Error(FMERR_UNSUPPORTED_CHAR, ch);
385 return p;
386 }
387 case 0x09:
388 case 0x0B:
389 case 0x0C:
390 case 0x20:
391 XFA_FMDChar::inc(m_ptr);
392 break;
393 default: {
394 const FX_WCHAR* pTemp = 0;
395 iRet = Identifiers(p, m_ptr, pTemp);
396 m_ptr = pTemp;
397 if (iRet) {
398 return p;
399 }
400 p->m_type = IsKeyword(p->m_wstring);
401 }
402 return p;
403 }
404 }
405 }
406
407 FX_DWORD CXFA_FMLexer::Number(CXFA_FMToken* t,
408 const FX_WCHAR* p,
409 const FX_WCHAR*& pEnd) {
410 FX_DOUBLE number = 0;
411 if (XFA_FMDChar::string2number(p, &number, pEnd)) {
412 return 1;
413 }
414 if (pEnd && XFA_FMDChar::isAlpha(pEnd)) {
415 return 1;
416 }
417 t->m_wstring = CFX_WideStringC(p, (pEnd - p));
418 return 0;
419 }
420
421 FX_DWORD CXFA_FMLexer::String(CXFA_FMToken* t,
422 const FX_WCHAR* p,
423 const FX_WCHAR*& pEnd) {
424 const FX_WCHAR* pStart = p;
425 uint16_t ch = 0;
426 XFA_FMDChar::inc(p);
427 ch = XFA_FMDChar::get(p);
428 while (ch) {
429 if (!XFA_FMDChar::isAvalid(p)) {
430 ch = XFA_FMDChar::get(p);
431 pEnd = p;
432 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
433 Error(FMERR_UNSUPPORTED_CHAR, ch);
434 return 1;
435 }
436 if (ch == '"') {
437 XFA_FMDChar::inc(p);
438 if (!XFA_FMDChar::isAvalid(p)) {
439 ch = XFA_FMDChar::get(p);
440 pEnd = p;
441 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
442 Error(FMERR_UNSUPPORTED_CHAR, ch);
443 return 1;
444 }
445 ch = XFA_FMDChar::get(p);
446 if (ch == '"') {
447 goto NEXT;
448 } else {
449 break;
450 }
451 }
452 NEXT:
453 XFA_FMDChar::inc(p);
454 ch = XFA_FMDChar::get(p);
455 }
456 pEnd = p;
457 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
458 return 0;
459 }
460
461 FX_DWORD CXFA_FMLexer::Identifiers(CXFA_FMToken* t,
462 const FX_WCHAR* p,
463 const FX_WCHAR*& pEnd) {
464 const FX_WCHAR* pStart = p;
465 uint16_t ch = 0;
466 ch = XFA_FMDChar::get(p);
467 XFA_FMDChar::inc(p);
468 if (!XFA_FMDChar::isAvalid(p)) {
469 pEnd = p;
470 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
471 Error(FMERR_UNSUPPORTED_CHAR, ch);
472 return 1;
473 }
474 ch = XFA_FMDChar::get(p);
475 while (ch) {
476 if (!XFA_FMDChar::isAvalid(p)) {
477 pEnd = p;
478 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
479 Error(FMERR_UNSUPPORTED_CHAR, ch);
480 return 1;
481 }
482 ch = XFA_FMDChar::get(p);
483 if (XFA_FMDChar::isUnicodeAlpha(ch)) {
484 XFA_FMDChar::inc(p);
485 } else {
486 pEnd = p;
487 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
488 return 0;
489 }
490 }
491 pEnd = p;
492 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
493 return 0;
494 }
495
496 void CXFA_FMLexer::Comment(const FX_WCHAR* p, const FX_WCHAR*& pEnd) {
497 unsigned ch = 0;
498 XFA_FMDChar::inc(p);
499 ch = XFA_FMDChar::get(p);
500 while (ch) {
501 if (ch == 0x0D) {
502 XFA_FMDChar::inc(p);
503 pEnd = p;
504 return;
505 }
506 if (ch == 0x0A) {
507 ++m_uCurrentLine;
508 XFA_FMDChar::inc(p);
509 pEnd = p;
510 return;
511 }
512 XFA_FMDChar::inc(p);
513 ch = XFA_FMDChar::get(p);
514 }
515 pEnd = p;
516 }
517
518 XFA_FM_TOKEN CXFA_FMLexer::IsKeyword(const CFX_WideStringC& str) {
519 int32_t iLength = str.GetLength();
520 uint32_t uHash = FX_HashCode_String_GetW(str.GetPtr(), iLength, TRUE);
521 int32_t iStart = KEYWORD_START, iEnd = KEYWORD_END;
522 int32_t iMid = (iStart + iEnd) / 2;
523 XFA_FMKeyword keyword;
524 do {
525 iMid = (iStart + iEnd) / 2;
526 keyword = keyWords[iMid];
527 if (uHash == keyword.m_uHash) {
528 return keyword.m_type;
529 } else if (uHash < keyword.m_uHash) {
530 iEnd = iMid - 1;
531 } else {
532 iStart = iMid + 1;
533 }
534 } while (iStart <= iEnd);
535 return TOKidentifier;
536 }
537
538 void CXFA_FMLexer::Error(XFA_FM_ERRMSG msg, ...) {
539 m_pErrorInfo->linenum = m_uCurrentLine;
540 const FX_WCHAR* lpMessageInfo = XFA_FM_ErrorMsg(msg);
541 va_list ap;
542 va_start(ap, msg);
543 m_pErrorInfo->message.FormatV(lpMessageInfo, ap);
544 va_end(ap);
545 }
546
547 FX_BOOL CXFA_FMLexer::HasError() const {
548 if (m_pErrorInfo->message.IsEmpty()) {
549 return FALSE;
550 }
551 return TRUE;
552 }
OLDNEW
« no previous file with comments | « xfa/src/fxfa/fm2js/xfa_lexer.h ('k') | xfa/src/fxfa/fm2js/xfa_program.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698