OLD | NEW |
1 // Copyright 2009 the V8 project authors. All rights reserved. | 1 // Copyright 2009 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
53 * so that rsi+rdi points to the current character. | 53 * so that rsi+rdi points to the current character. |
54 * - rbp : frame pointer. Used to access arguments, local variables and | 54 * - rbp : frame pointer. Used to access arguments, local variables and |
55 * RegExp registers. | 55 * RegExp registers. |
56 * - rsp : points to tip of C stack. | 56 * - rsp : points to tip of C stack. |
57 * - rcx : points to tip of backtrack stack. The backtrack stack contains | 57 * - rcx : points to tip of backtrack stack. The backtrack stack contains |
58 * only 32-bit values. Most are offsets from some base (e.g., character | 58 * only 32-bit values. Most are offsets from some base (e.g., character |
59 * positions from end of string or code location from Code* pointer). | 59 * positions from end of string or code location from Code* pointer). |
60 * - r8 : code object pointer. Used to convert between absolute and | 60 * - r8 : code object pointer. Used to convert between absolute and |
61 * code-object-relative addresses. | 61 * code-object-relative addresses. |
62 * | 62 * |
63 * The registers rax, rbx, rcx, r9 and r11 are free to use for computations. | 63 * The registers rax, rbx, r9 and r11 are free to use for computations. |
64 * If changed to use r12+, they should be saved as callee-save registers. | 64 * If changed to use r12+, they should be saved as callee-save registers. |
65 * | 65 * |
66 * Each call to a C++ method should retain these registers. | 66 * Each call to a C++ method should retain these registers. |
67 * | 67 * |
68 * The stack will have the following content, in some order, indexable from the | 68 * The stack will have the following content, in some order, indexable from the |
69 * frame pointer (see, e.g., kStackHighEnd): | 69 * frame pointer (see, e.g., kStackHighEnd): |
70 * - stack_area_base (High end of the memory area to use as | 70 * - stack_area_base (High end of the memory area to use as |
71 * backtracking stack) | 71 * backtracking stack) |
72 * - at_start (if 1, start at start of string, if 0, don't) | 72 * - at_start (if 1, start at start of string, if 0, don't) |
73 * - int* capture_array (int[num_saved_registers_], for output). | 73 * - int* capture_array (int[num_saved_registers_], for output). |
(...skipping 409 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
483 Label* on_not_equal) { | 483 Label* on_not_equal) { |
484 ASSERT(minus < String::kMaxUC16CharCode); | 484 ASSERT(minus < String::kMaxUC16CharCode); |
485 __ lea(rax, Operand(current_character(), -minus)); | 485 __ lea(rax, Operand(current_character(), -minus)); |
486 __ and_(rax, Immediate(mask)); | 486 __ and_(rax, Immediate(mask)); |
487 __ cmpl(rax, Immediate(c)); | 487 __ cmpl(rax, Immediate(c)); |
488 BranchOrBacktrack(not_equal, on_not_equal); | 488 BranchOrBacktrack(not_equal, on_not_equal); |
489 } | 489 } |
490 | 490 |
491 | 491 |
492 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, | 492 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, |
493 int cp_offset, | |
494 bool check_offset, | |
495 Label* on_no_match) { | 493 Label* on_no_match) { |
496 // Range checks (c in min..max) are generally implemented by an unsigned | 494 // Range checks (c in min..max) are generally implemented by an unsigned |
497 // (c - min) <= (max - min) check | 495 // (c - min) <= (max - min) check, using the sequence: |
| 496 // lea(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min)) |
| 497 // cmp(rax, Immediate(max - min)) |
498 switch (type) { | 498 switch (type) { |
499 case 's': | 499 case 's': |
500 // Match space-characters | 500 // Match space-characters |
501 if (mode_ == ASCII) { | 501 if (mode_ == ASCII) { |
502 // ASCII space characters are '\t'..'\r' and ' '. | 502 // ASCII space characters are '\t'..'\r' and ' '. |
503 if (check_offset) { | |
504 LoadCurrentCharacter(cp_offset, on_no_match); | |
505 } else { | |
506 LoadCurrentCharacterUnchecked(cp_offset, 1); | |
507 } | |
508 Label success; | 503 Label success; |
509 __ cmpl(current_character(), Immediate(' ')); | 504 __ cmpl(current_character(), Immediate(' ')); |
510 __ j(equal, &success); | 505 __ j(equal, &success); |
511 // Check range 0x09..0x0d | 506 // Check range 0x09..0x0d |
512 __ subl(current_character(), Immediate('\t')); | 507 __ lea(rax, Operand(current_character(), -'\t')); |
513 __ cmpl(current_character(), Immediate('\r' - '\t')); | 508 __ cmpl(rax, Immediate('\r' - '\t')); |
514 BranchOrBacktrack(above, on_no_match); | 509 BranchOrBacktrack(above, on_no_match); |
515 __ bind(&success); | 510 __ bind(&success); |
516 return true; | 511 return true; |
517 } | 512 } |
518 return false; | 513 return false; |
519 case 'S': | 514 case 'S': |
520 // Match non-space characters. | 515 // Match non-space characters. |
521 if (check_offset) { | |
522 LoadCurrentCharacter(cp_offset, on_no_match, 1); | |
523 } else { | |
524 LoadCurrentCharacterUnchecked(cp_offset, 1); | |
525 } | |
526 if (mode_ == ASCII) { | 516 if (mode_ == ASCII) { |
527 // ASCII space characters are '\t'..'\r' and ' '. | 517 // ASCII space characters are '\t'..'\r' and ' '. |
528 __ cmpl(current_character(), Immediate(' ')); | 518 __ cmpl(current_character(), Immediate(' ')); |
529 BranchOrBacktrack(equal, on_no_match); | 519 BranchOrBacktrack(equal, on_no_match); |
530 __ subl(current_character(), Immediate('\t')); | 520 __ lea(rax, Operand(current_character(), -'\t')); |
531 __ cmpl(current_character(), Immediate('\r' - '\t')); | 521 __ cmpl(rax, Immediate('\r' - '\t')); |
532 BranchOrBacktrack(below_equal, on_no_match); | 522 BranchOrBacktrack(below_equal, on_no_match); |
533 return true; | 523 return true; |
534 } | 524 } |
535 return false; | 525 return false; |
536 case 'd': | 526 case 'd': |
537 // Match ASCII digits ('0'..'9') | 527 // Match ASCII digits ('0'..'9') |
538 if (check_offset) { | 528 __ lea(rax, Operand(current_character(), -'0')); |
539 LoadCurrentCharacter(cp_offset, on_no_match, 1); | 529 __ cmpl(rax, Immediate('9' - '0')); |
540 } else { | |
541 LoadCurrentCharacterUnchecked(cp_offset, 1); | |
542 } | |
543 __ subl(current_character(), Immediate('0')); | |
544 __ cmpl(current_character(), Immediate('9' - '0')); | |
545 BranchOrBacktrack(above, on_no_match); | 530 BranchOrBacktrack(above, on_no_match); |
546 return true; | 531 return true; |
547 case 'D': | 532 case 'D': |
548 // Match non ASCII-digits | 533 // Match non ASCII-digits |
549 if (check_offset) { | 534 __ lea(rax, Operand(current_character(), -'0')); |
550 LoadCurrentCharacter(cp_offset, on_no_match, 1); | 535 __ cmpl(rax, Immediate('9' - '0')); |
551 } else { | |
552 LoadCurrentCharacterUnchecked(cp_offset, 1); | |
553 } | |
554 __ subl(current_character(), Immediate('0')); | |
555 __ cmpl(current_character(), Immediate('9' - '0')); | |
556 BranchOrBacktrack(below_equal, on_no_match); | 536 BranchOrBacktrack(below_equal, on_no_match); |
557 return true; | 537 return true; |
558 case '.': { | 538 case '.': { |
559 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) | 539 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) |
560 if (check_offset) { | 540 __ movl(rax, current_character()); |
561 LoadCurrentCharacter(cp_offset, on_no_match, 1); | 541 __ xor_(rax, Immediate(0x01)); |
562 } else { | |
563 LoadCurrentCharacterUnchecked(cp_offset, 1); | |
564 } | |
565 __ xor_(current_character(), Immediate(0x01)); | |
566 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c | 542 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c |
567 __ subl(current_character(), Immediate(0x0b)); | 543 __ subl(rax, Immediate(0x0b)); |
568 __ cmpl(current_character(), Immediate(0x0c - 0x0b)); | 544 __ cmpl(rax, Immediate(0x0c - 0x0b)); |
569 BranchOrBacktrack(below_equal, on_no_match); | 545 BranchOrBacktrack(below_equal, on_no_match); |
570 if (mode_ == UC16) { | 546 if (mode_ == UC16) { |
571 // Compare original value to 0x2028 and 0x2029, using the already | 547 // Compare original value to 0x2028 and 0x2029, using the already |
572 // computed (current_char ^ 0x01 - 0x0b). I.e., check for | 548 // computed (current_char ^ 0x01 - 0x0b). I.e., check for |
573 // 0x201d (0x2028 - 0x0b) or 0x201e. | 549 // 0x201d (0x2028 - 0x0b) or 0x201e. |
574 __ subl(current_character(), Immediate(0x2028 - 0x0b)); | 550 __ subl(rax, Immediate(0x2028 - 0x0b)); |
575 __ cmpl(current_character(), Immediate(1)); | 551 __ cmpl(rax, Immediate(0x2029 - 0x2028)); |
576 BranchOrBacktrack(below_equal, on_no_match); | 552 BranchOrBacktrack(below_equal, on_no_match); |
577 } | 553 } |
578 return true; | 554 return true; |
579 } | 555 } |
| 556 case 'n': { |
| 557 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) |
| 558 __ movl(rax, current_character()); |
| 559 __ xor_(rax, Immediate(0x01)); |
| 560 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c |
| 561 __ subl(rax, Immediate(0x0b)); |
| 562 __ cmpl(rax, Immediate(0x0c - 0x0b)); |
| 563 if (mode_ == ASCII) { |
| 564 BranchOrBacktrack(above, on_no_match); |
| 565 } else { |
| 566 Label done; |
| 567 BranchOrBacktrack(below_equal, &done); |
| 568 // Compare original value to 0x2028 and 0x2029, using the already |
| 569 // computed (current_char ^ 0x01 - 0x0b). I.e., check for |
| 570 // 0x201d (0x2028 - 0x0b) or 0x201e. |
| 571 __ subl(rax, Immediate(0x2028 - 0x0b)); |
| 572 __ cmpl(rax, Immediate(0x2029 - 0x2028)); |
| 573 BranchOrBacktrack(above, on_no_match); |
| 574 __ bind(&done); |
| 575 } |
| 576 return true; |
| 577 } |
| 578 case 'w': { |
| 579 Label done, check_digits; |
| 580 __ cmpl(current_character(), Immediate('9')); |
| 581 __ j(less_equal, &check_digits); |
| 582 __ cmpl(current_character(), Immediate('_')); |
| 583 __ j(equal, &done); |
| 584 // Convert to lower case if letter. |
| 585 __ movl(rax, current_character()); |
| 586 __ orl(rax, Immediate(0x20)); |
| 587 // check rax in range ['a'..'z']. |
| 588 __ subl(rax, Immediate('a')); |
| 589 __ cmpl(rax, Immediate('z' - 'a')); |
| 590 BranchOrBacktrack(above, on_no_match); |
| 591 __ jmp(&done); |
| 592 __ bind(&check_digits); |
| 593 // Check current character in range ['0'..'9']. |
| 594 __ cmpl(current_character(), Immediate('0')); |
| 595 BranchOrBacktrack(below, on_no_match); |
| 596 __ bind(&done); |
| 597 |
| 598 return true; |
| 599 } |
| 600 case 'W': { |
| 601 Label done, check_digits; |
| 602 __ cmpl(current_character(), Immediate('9')); |
| 603 __ j(less_equal, &check_digits); |
| 604 __ cmpl(current_character(), Immediate('_')); |
| 605 BranchOrBacktrack(equal, on_no_match); |
| 606 // Convert to lower case if letter. |
| 607 __ movl(rax, current_character()); |
| 608 __ orl(rax, Immediate(0x20)); |
| 609 // check current character in range ['a'..'z'], nondestructively. |
| 610 __ subl(rax, Immediate('a')); |
| 611 __ cmpl(rax, Immediate('z' - 'a')); |
| 612 BranchOrBacktrack(below_equal, on_no_match); |
| 613 __ jmp(&done); |
| 614 __ bind(&check_digits); |
| 615 // Check current character in range ['0'..'9']. |
| 616 __ cmpl(current_character(), Immediate('0')); |
| 617 BranchOrBacktrack(above_equal, on_no_match); |
| 618 __ bind(&done); |
| 619 |
| 620 return true; |
| 621 } |
580 case '*': | 622 case '*': |
581 // Match any character. | 623 // Match any character. |
582 if (check_offset) { | |
583 CheckPosition(cp_offset, on_no_match); | |
584 } | |
585 return true; | 624 return true; |
586 // No custom implementation (yet): w, W, s(UC16), S(UC16). | 625 // No custom implementation (yet): s(UC16), S(UC16). |
587 default: | 626 default: |
588 return false; | 627 return false; |
589 } | 628 } |
590 } | 629 } |
591 | 630 |
592 | 631 |
593 void RegExpMacroAssemblerX64::Fail() { | 632 void RegExpMacroAssemblerX64::Fail() { |
594 ASSERT(FAILURE == 0); // Return value for failure is zero. | 633 ASSERT(FAILURE == 0); // Return value for failure is zero. |
595 __ xor_(rax, rax); // zero rax. | 634 __ xor_(rax, rax); // zero rax. |
596 __ jmp(&exit_label_); | 635 __ jmp(&exit_label_); |
(...skipping 686 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1283 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); | 1322 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); |
1284 } | 1323 } |
1285 } | 1324 } |
1286 } | 1325 } |
1287 | 1326 |
1288 #undef __ | 1327 #undef __ |
1289 | 1328 |
1290 #endif // V8_NATIVE_REGEXP | 1329 #endif // V8_NATIVE_REGEXP |
1291 | 1330 |
1292 }} // namespace v8::internal | 1331 }} // namespace v8::internal |
OLD | NEW |