| OLD | NEW |
| 1 #include <fcntl.h> | 1 #include <fcntl.h> |
| 2 #include <stdio.h> | 2 #include <stdio.h> |
| 3 #include <stddef.h> | 3 #include <stddef.h> |
| 4 #include <stdlib.h> | 4 #include <stdlib.h> |
| 5 #include <string.h> | 5 #include <string.h> |
| 6 | 6 |
| 7 |
| 8 /* |
| 9 TODO: |
| 10 - SpiderMonkey compatibility hack: " --> something" is treated as a single li
ne comment. |
| 11 - An identifier cannot start immediately after a number. |
| 12 |
| 13 */ |
| 14 |
| 15 |
| 7 /*!types:re2c */ | 16 /*!types:re2c */ |
| 8 | 17 |
| 9 #if defined(WIN32) | 18 #if defined(WIN32) |
| 10 | 19 |
| 11 typedef signed char int8_t; | 20 typedef signed char int8_t; |
| 12 typedef signed short int16_t; | 21 typedef signed short int16_t; |
| 13 typedef signed int int32_t; | 22 typedef signed int int32_t; |
| 14 | 23 |
| 15 typedef unsigned char uint8_t; | 24 typedef unsigned char uint8_t; |
| 16 typedef unsigned short uint16_t; | 25 typedef unsigned short uint16_t; |
| (...skipping 15 matching lines...) Expand all Loading... |
| 32 #define PUSH_TOKEN(T) { \ | 41 #define PUSH_TOKEN(T) { \ |
| 33 printf("got token %s (%d)\n", tokenNames[T], T); \ | 42 printf("got token %s (%d)\n", tokenNames[T], T); \ |
| 34 SKIP(); } | 43 SKIP(); } |
| 35 #define PUSH_STRING() { \ | 44 #define PUSH_STRING() { \ |
| 36 printf("got string\n"); \ | 45 printf("got string\n"); \ |
| 37 size_t tokenSize = cursor-start; \ | 46 size_t tokenSize = cursor-start; \ |
| 38 fwrite(start, tokenSize, 1, stdout); \ | 47 fwrite(start, tokenSize, 1, stdout); \ |
| 39 printf("\n"); \ | 48 printf("\n"); \ |
| 40 SKIP(); } | 49 SKIP(); } |
| 41 #define PUSH_NUMBER() { \ | 50 #define PUSH_NUMBER() { \ |
| 42 --cursor; \ | |
| 43 printf("got number\n"); \ | 51 printf("got number\n"); \ |
| 44 size_t tokenSize = cursor-start; \ | 52 size_t tokenSize = cursor-start; \ |
| 45 fwrite(start, tokenSize, 1, stdout); \ | 53 fwrite(start, tokenSize, 1, stdout); \ |
| 46 printf("\n"); \ | 54 printf("\n"); \ |
| 47 SKIP(); } | 55 SKIP(); } |
| 48 #define PUSH_IDENTIFIER() { \ | 56 #define PUSH_IDENTIFIER() { \ |
| 49 --cursor; \ | 57 --cursor; \ |
| 50 printf("got identifier: "); \ | 58 printf("got identifier: "); \ |
| 51 size_t tokenSize = cursor-start; \ | 59 size_t tokenSize = cursor-start; \ |
| 52 fwrite(start, tokenSize, 1, stdout); \ | 60 fwrite(start, tokenSize, 1, stdout); \ |
| (...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 282 re2c:yych:conversion = 0; | 290 re2c:yych:conversion = 0; |
| 283 re2c:condenumprefix = ECondition; | 291 re2c:condenumprefix = ECondition; |
| 284 re2c:define:YYCONDTYPE = Condition; | 292 re2c:define:YYCONDTYPE = Condition; |
| 285 | 293 |
| 286 eof = "\000"; | 294 eof = "\000"; |
| 287 any = [\000-\377]; | 295 any = [\000-\377]; |
| 288 whitespace_char = [ \t\v\f\r]; | 296 whitespace_char = [ \t\v\f\r]; |
| 289 whitespace = whitespace_char+; | 297 whitespace = whitespace_char+; |
| 290 identifier_start = [$_\\a-zA-z]; | 298 identifier_start = [$_\\a-zA-z]; |
| 291 identifier_char = [$_\\a-zA-z0-9]; | 299 identifier_char = [$_\\a-zA-z0-9]; |
| 292 number_start = [0-9]; | |
| 293 number_char = [0-9\.e]; | |
| 294 line_terminator = [\n\r]+; | 300 line_terminator = [\n\r]+; |
| 301 digit = [0-9]; |
| 302 hex_digit = [0-9a-fA-F]; |
| 303 maybe_exponent = ('e' [-+]? digit+)?; |
| 304 |
| 305 <Normal> "|=" { PUSH_TOKEN(ASSIGN_BIT_OR); } |
| 306 <Normal> "^=" { PUSH_TOKEN(ASSIGN_BIT_XOR); } |
| 307 <Normal> "&=" { PUSH_TOKEN(ASSIGN_BIT_AND); } |
| 308 <Normal> "+=" { PUSH_TOKEN(ASSIGN_ADD); } |
| 309 <Normal> "-=" { PUSH_TOKEN(ASSIGN_SUB); } |
| 310 <Normal> "*=" { PUSH_TOKEN(ASSIGN_MUL); } |
| 311 <Normal> "/=" { PUSH_TOKEN(ASSIGN_DIV); } |
| 312 <Normal> "%=" { PUSH_TOKEN(ASSIGN_MOD); } |
| 313 |
| 314 <Normal> "===" { PUSH_TOKEN(EQ_STRICT); } |
| 315 <Normal> "==" { PUSH_TOKEN(EQ); } |
| 316 <Normal> "=" { PUSH_TOKEN(ASSIGN); } |
| 317 <Normal> "!==" { PUSH_TOKEN(NE_STRICT); } |
| 318 <Normal> "!=" { PUSH_TOKEN(NE); } |
| 319 <Normal> "!" { PUSH_TOKEN(NOT); } |
| 320 |
| 321 <Normal> "//" :=> SingleLineComment |
| 322 <Normal> "/*" :=> MultiLineComment |
| 323 <Normal> "<!--" :=> HtmlComment |
| 324 |
| 325 <Normal> ">>>=" { PUSH_TOKEN(ASSIGN_SHR); } |
| 326 <Normal> "<<=" { PUSH_TOKEN(ASSIGN_SHL); } |
| 327 <Normal> ">>=" { PUSH_TOKEN(ASSIGN_SAR); } |
| 328 <Normal> "<=" { PUSH_TOKEN(LTE); } |
| 329 <Normal> ">=" { PUSH_TOKEN(GTE); } |
| 330 <Normal> "<<" { PUSH_TOKEN(SHL); } |
| 331 <Normal> ">>" { PUSH_TOKEN(SAR); } |
| 332 <Normal> "<" { PUSH_TOKEN(LT); } |
| 333 <Normal> ">" { PUSH_TOKEN(GT); } |
| 334 |
| 335 <Normal> '0x' hex_digit+ { PUSH_NUMBER(); } |
| 336 <Normal> "." digit+ maybe_exponent { PUSH_NUMBER(); } |
| 337 <Normal> digit+ ("." digit+)? maybe_exponent { PUSH_NUMBER(); } |
| 295 | 338 |
| 296 <Normal> "(" { PUSH_TOKEN(LPAREN); } | 339 <Normal> "(" { PUSH_TOKEN(LPAREN); } |
| 297 <Normal> ")" { PUSH_TOKEN(RPAREN); } | 340 <Normal> ")" { PUSH_TOKEN(RPAREN); } |
| 298 <Normal> "[" { PUSH_TOKEN(LBRACK); } | 341 <Normal> "[" { PUSH_TOKEN(LBRACK); } |
| 299 <Normal> "]" { PUSH_TOKEN(RBRACK); } | 342 <Normal> "]" { PUSH_TOKEN(RBRACK); } |
| 300 <Normal> "{" { PUSH_TOKEN(LBRACE); } | 343 <Normal> "{" { PUSH_TOKEN(LBRACE); } |
| 301 <Normal> "}" { PUSH_TOKEN(RBRACE); } | 344 <Normal> "}" { PUSH_TOKEN(RBRACE); } |
| 302 <Normal> ":" { PUSH_TOKEN(COLON); } | 345 <Normal> ":" { PUSH_TOKEN(COLON); } |
| 303 <Normal> ";" { PUSH_TOKEN(SEMICOLON); } | 346 <Normal> ";" { PUSH_TOKEN(SEMICOLON); } |
| 304 <Normal> "." { PUSH_TOKEN(PERIOD); } | 347 <Normal> "." { PUSH_TOKEN(PERIOD); } |
| 305 <Normal> "?" { PUSH_TOKEN(CONDITIONAL); } | 348 <Normal> "?" { PUSH_TOKEN(CONDITIONAL); } |
| 306 <Normal> "++" { PUSH_TOKEN(INC); } | 349 <Normal> "++" { PUSH_TOKEN(INC); } |
| 307 <Normal> "--" { PUSH_TOKEN(DEC); } | 350 <Normal> "--" { PUSH_TOKEN(DEC); } |
| 308 | 351 |
| 309 <Normal> "|=" { PUSH_TOKEN(ASSIGN_BIT_OR); } | |
| 310 <Normal> "^=" { PUSH_TOKEN(ASSIGN_BIT_XOR); } | |
| 311 <Normal> "&=" { PUSH_TOKEN(ASSIGN_BIT_AND); } | |
| 312 <Normal> "<<=" { PUSH_TOKEN(ASSIGN_SHL); } | |
| 313 <Normal> ">>=" { PUSH_TOKEN(ASSIGN_SAR); } | |
| 314 <Normal> ">>>=" { PUSH_TOKEN(ASSIGN_SHR); } | |
| 315 <Normal> "+=" { PUSH_TOKEN(ASSIGN_ADD); } | |
| 316 <Normal> "-=" { PUSH_TOKEN(ASSIGN_SUB); } | |
| 317 <Normal> "*=" { PUSH_TOKEN(ASSIGN_MUL); } | |
| 318 <Normal> "/=" { PUSH_TOKEN(ASSIGN_DIV); } | |
| 319 <Normal> "%=" { PUSH_TOKEN(ASSIGN_MOD); } | |
| 320 | |
| 321 <Normal> "," { PUSH_TOKEN(COMMA); } | |
| 322 <Normal> "||" { PUSH_TOKEN(OR); } | 352 <Normal> "||" { PUSH_TOKEN(OR); } |
| 323 <Normal> "&&" { PUSH_TOKEN(AND); } | 353 <Normal> "&&" { PUSH_TOKEN(AND); } |
| 354 |
| 324 <Normal> "|" { PUSH_TOKEN(BIT_OR); } | 355 <Normal> "|" { PUSH_TOKEN(BIT_OR); } |
| 325 <Normal> "^" { PUSH_TOKEN(BIT_XOR); } | 356 <Normal> "^" { PUSH_TOKEN(BIT_XOR); } |
| 326 <Normal> "&" { PUSH_TOKEN(BIT_AND); } | 357 <Normal> "&" { PUSH_TOKEN(BIT_AND); } |
| 327 <Normal> "<<" { PUSH_TOKEN(SHL); } | |
| 328 <Normal> ">>" { PUSH_TOKEN(SAR); } | |
| 329 <Normal> "+" { PUSH_TOKEN(ADD); } | 358 <Normal> "+" { PUSH_TOKEN(ADD); } |
| 330 <Normal> "-" { PUSH_TOKEN(SUB); } | 359 <Normal> "-" { PUSH_TOKEN(SUB); } |
| 331 <Normal> "*" { PUSH_TOKEN(MUL); } | 360 <Normal> "*" { PUSH_TOKEN(MUL); } |
| 332 <Normal> "/" { PUSH_TOKEN(DIV); } | 361 <Normal> "/" { PUSH_TOKEN(DIV); } |
| 333 <Normal> "%" { PUSH_TOKEN(MOD); } | 362 <Normal> "%" { PUSH_TOKEN(MOD); } |
| 334 | |
| 335 <Normal> "===" { PUSH_TOKEN(EQ_STRICT); } | |
| 336 <Normal> "==" { PUSH_TOKEN(EQ); } | |
| 337 <Normal> "!==" { PUSH_TOKEN(NE_STRICT); } | |
| 338 <Normal> "!=" { PUSH_TOKEN(NE); } | |
| 339 <Normal> "<=" { PUSH_TOKEN(LTE); } | |
| 340 <Normal> ">=" { PUSH_TOKEN(GTE); } | |
| 341 <Normal> "<" { PUSH_TOKEN(LT); } | |
| 342 <Normal> ">" { PUSH_TOKEN(GT); } | |
| 343 | |
| 344 <Normal> "=" { PUSH_TOKEN(ASSIGN); } | |
| 345 | |
| 346 <Normal> "!" { PUSH_TOKEN(NOT); } | |
| 347 <Normal> "~" { PUSH_TOKEN(BIT_NOT); } | 363 <Normal> "~" { PUSH_TOKEN(BIT_NOT); } |
| 364 <Normal> "," { PUSH_TOKEN(COMMA); } |
| 348 | 365 |
| 349 <Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); } | 366 <Normal> line_terminator+ { PUSH_LINE_TERMINATOR(); } |
| 350 <Normal> whitespace { SKIP();} | 367 <Normal> whitespace { SKIP(); } |
| 351 | |
| 352 <Normal> "//" :=> SingleLineComment | |
| 353 <Normal> "/*" :=> MultiLineComment | |
| 354 <Normal> "<!--" :=> HtmlComment | |
| 355 | 368 |
| 356 <Normal> ["] :=> DoubleQuoteString | 369 <Normal> ["] :=> DoubleQuoteString |
| 357 <Normal> ['] :=> SingleQuoteString | 370 <Normal> ['] :=> SingleQuoteString |
| 358 | 371 |
| 359 <Normal> identifier_start :=> Identifier | 372 <Normal> identifier_start :=> Identifier |
| 360 <Normal> number_start :=> Number | |
| 361 | 373 |
| 362 <Normal> eof { PUSH_EOS(); return 1; } | 374 <Normal> eof { PUSH_EOS(); return 1; } |
| 363 <Normal> any { TERMINATE_ILLEGAL(); } | 375 <Normal> any { TERMINATE_ILLEGAL(); } |
| 364 | 376 |
| 365 <DoubleQuoteString> "\\\"" { goto yy0; } | 377 <DoubleQuoteString> "\\\"" { goto yy0; } |
| 366 <DoubleQuoteString> '"' { PUSH_STRING();} | 378 <DoubleQuoteString> '"' { PUSH_STRING();} |
| 367 <DoubleQuoteString> any { goto yy0; } | 379 <DoubleQuoteString> any { goto yy0; } |
| 368 | 380 |
| 369 <SingleQuoteString> "\\'" { goto yy0; } | 381 <SingleQuoteString> "\\'" { goto yy0; } |
| 370 <SingleQuoteString> "'" { PUSH_STRING();} | 382 <SingleQuoteString> "'" { PUSH_STRING();} |
| 371 <SingleQuoteString> any { goto yy0; } | 383 <SingleQuoteString> any { goto yy0; } |
| 372 | 384 |
| 373 <Identifier> identifier_char+ { goto yy0; } | 385 <Identifier> identifier_char+ { goto yy0; } |
| 374 <Identifier> any { PUSH_IDENTIFIER(); } | 386 <Identifier> any { PUSH_IDENTIFIER(); } |
| 375 | 387 |
| 376 <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();} | 388 <SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();} |
| 377 <SingleLineComment> eof { PUSH_LINE_TERMINATOR();} | 389 <SingleLineComment> eof { PUSH_LINE_TERMINATOR();} |
| 378 <SingleLineComment> any { goto yy0; } | 390 <SingleLineComment> any { goto yy0; } |
| 379 | 391 |
| 380 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();} | 392 <MultiLineComment> [*][//] { PUSH_LINE_TERMINATOR();} |
| 381 <MultiLineComment> eof { TERMINATE_ILLEGAL(); } | 393 <MultiLineComment> eof { TERMINATE_ILLEGAL(); } |
| 382 <MultiLineComment> any { goto yy0; } | 394 <MultiLineComment> any { goto yy0; } |
| 383 | 395 |
| 384 <HtmlComment> eof { TERMINATE_ILLEGAL(); } | 396 <HtmlComment> eof { TERMINATE_ILLEGAL(); } |
| 385 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();} | 397 <HtmlComment> "-->" { PUSH_LINE_TERMINATOR();} |
| 386 <HtmlComment> any { goto yy0; } | 398 <HtmlComment> any { goto yy0; } |
| 387 | |
| 388 <Number> number_char+ { goto yy0; } | |
| 389 <Number> any { PUSH_NUMBER(); } | |
| 390 | |
| 391 */ | 399 */ |
| 392 | 400 |
| 393 fill: | 401 fill: |
| 394 ssize_t unfinishedSize = cursor-start; | 402 ssize_t unfinishedSize = cursor-start; |
| 395 printf( | 403 printf( |
| 396 "scanner needs a refill. Exiting for now with:\n" | 404 "scanner needs a refill. Exiting for now with:\n" |
| 397 " saved fill state = %d\n" | 405 " saved fill state = %d\n" |
| 398 " unfinished token size = %ld\n", | 406 " unfinished token size = %ld\n", |
| 399 state, | 407 state, |
| 400 unfinishedSize | 408 unfinishedSize |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 463 return 1; | 471 return 1; |
| 464 } | 472 } |
| 465 if(n<batchSize) break; | 473 if(n<batchSize) break; |
| 466 } | 474 } |
| 467 scanner.push(0, -1); | 475 scanner.push(0, -1); |
| 468 close(input); | 476 close(input); |
| 469 | 477 |
| 470 // Done | 478 // Done |
| 471 return 0; | 479 return 0; |
| 472 } | 480 } |
| OLD | NEW |