Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(223)

Side by Side Diff: src/x64/regexp-macro-assembler-x64.cc

Issue 507051: Attempt to make \b\w+ faster. Slight performance increase on, e.g., string unpacking. (Closed)
Patch Set: Addressed review comments. Created 10 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/x64/regexp-macro-assembler-x64.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2009 the V8 project authors. All rights reserved. 1 // Copyright 2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
53 * so that rsi+rdi points to the current character. 53 * so that rsi+rdi points to the current character.
54 * - rbp : frame pointer. Used to access arguments, local variables and 54 * - rbp : frame pointer. Used to access arguments, local variables and
55 * RegExp registers. 55 * RegExp registers.
56 * - rsp : points to tip of C stack. 56 * - rsp : points to tip of C stack.
57 * - rcx : points to tip of backtrack stack. The backtrack stack contains 57 * - rcx : points to tip of backtrack stack. The backtrack stack contains
58 * only 32-bit values. Most are offsets from some base (e.g., character 58 * only 32-bit values. Most are offsets from some base (e.g., character
59 * positions from end of string or code location from Code* pointer). 59 * positions from end of string or code location from Code* pointer).
60 * - r8 : code object pointer. Used to convert between absolute and 60 * - r8 : code object pointer. Used to convert between absolute and
61 * code-object-relative addresses. 61 * code-object-relative addresses.
62 * 62 *
63 * The registers rax, rbx, rcx, r9 and r11 are free to use for computations. 63 * The registers rax, rbx, r9 and r11 are free to use for computations.
64 * If changed to use r12+, they should be saved as callee-save registers. 64 * If changed to use r12+, they should be saved as callee-save registers.
65 * 65 *
66 * Each call to a C++ method should retain these registers. 66 * Each call to a C++ method should retain these registers.
67 * 67 *
68 * The stack will have the following content, in some order, indexable from the 68 * The stack will have the following content, in some order, indexable from the
69 * frame pointer (see, e.g., kStackHighEnd): 69 * frame pointer (see, e.g., kStackHighEnd):
70 * - stack_area_base (High end of the memory area to use as 70 * - stack_area_base (High end of the memory area to use as
71 * backtracking stack) 71 * backtracking stack)
72 * - at_start (if 1, start at start of string, if 0, don't) 72 * - at_start (if 1, start at start of string, if 0, don't)
73 * - int* capture_array (int[num_saved_registers_], for output). 73 * - int* capture_array (int[num_saved_registers_], for output).
(...skipping 409 matching lines...) Expand 10 before | Expand all | Expand 10 after
483 Label* on_not_equal) { 483 Label* on_not_equal) {
484 ASSERT(minus < String::kMaxUC16CharCode); 484 ASSERT(minus < String::kMaxUC16CharCode);
485 __ lea(rax, Operand(current_character(), -minus)); 485 __ lea(rax, Operand(current_character(), -minus));
486 __ and_(rax, Immediate(mask)); 486 __ and_(rax, Immediate(mask));
487 __ cmpl(rax, Immediate(c)); 487 __ cmpl(rax, Immediate(c));
488 BranchOrBacktrack(not_equal, on_not_equal); 488 BranchOrBacktrack(not_equal, on_not_equal);
489 } 489 }
490 490
491 491
492 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type, 492 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
493 int cp_offset,
494 bool check_offset,
495 Label* on_no_match) { 493 Label* on_no_match) {
496 // Range checks (c in min..max) are generally implemented by an unsigned 494 // Range checks (c in min..max) are generally implemented by an unsigned
497 // (c - min) <= (max - min) check 495 // (c - min) <= (max - min) check, using the sequence:
496 // lea(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min))
497 // cmp(rax, Immediate(max - min))
498 switch (type) { 498 switch (type) {
499 case 's': 499 case 's':
500 // Match space-characters 500 // Match space-characters
501 if (mode_ == ASCII) { 501 if (mode_ == ASCII) {
502 // ASCII space characters are '\t'..'\r' and ' '. 502 // ASCII space characters are '\t'..'\r' and ' '.
503 if (check_offset) {
504 LoadCurrentCharacter(cp_offset, on_no_match);
505 } else {
506 LoadCurrentCharacterUnchecked(cp_offset, 1);
507 }
508 Label success; 503 Label success;
509 __ cmpl(current_character(), Immediate(' ')); 504 __ cmpl(current_character(), Immediate(' '));
510 __ j(equal, &success); 505 __ j(equal, &success);
511 // Check range 0x09..0x0d 506 // Check range 0x09..0x0d
512 __ subl(current_character(), Immediate('\t')); 507 __ lea(rax, Operand(current_character(), -'\t'));
513 __ cmpl(current_character(), Immediate('\r' - '\t')); 508 __ cmpl(rax, Immediate('\r' - '\t'));
514 BranchOrBacktrack(above, on_no_match); 509 BranchOrBacktrack(above, on_no_match);
515 __ bind(&success); 510 __ bind(&success);
516 return true; 511 return true;
517 } 512 }
518 return false; 513 return false;
519 case 'S': 514 case 'S':
520 // Match non-space characters. 515 // Match non-space characters.
521 if (check_offset) {
522 LoadCurrentCharacter(cp_offset, on_no_match, 1);
523 } else {
524 LoadCurrentCharacterUnchecked(cp_offset, 1);
525 }
526 if (mode_ == ASCII) { 516 if (mode_ == ASCII) {
527 // ASCII space characters are '\t'..'\r' and ' '. 517 // ASCII space characters are '\t'..'\r' and ' '.
528 __ cmpl(current_character(), Immediate(' ')); 518 __ cmpl(current_character(), Immediate(' '));
529 BranchOrBacktrack(equal, on_no_match); 519 BranchOrBacktrack(equal, on_no_match);
530 __ subl(current_character(), Immediate('\t')); 520 __ lea(rax, Operand(current_character(), -'\t'));
531 __ cmpl(current_character(), Immediate('\r' - '\t')); 521 __ cmpl(rax, Immediate('\r' - '\t'));
532 BranchOrBacktrack(below_equal, on_no_match); 522 BranchOrBacktrack(below_equal, on_no_match);
533 return true; 523 return true;
534 } 524 }
535 return false; 525 return false;
536 case 'd': 526 case 'd':
537 // Match ASCII digits ('0'..'9') 527 // Match ASCII digits ('0'..'9')
538 if (check_offset) { 528 __ lea(rax, Operand(current_character(), -'0'));
539 LoadCurrentCharacter(cp_offset, on_no_match, 1); 529 __ cmpl(rax, Immediate('9' - '0'));
540 } else {
541 LoadCurrentCharacterUnchecked(cp_offset, 1);
542 }
543 __ subl(current_character(), Immediate('0'));
544 __ cmpl(current_character(), Immediate('9' - '0'));
545 BranchOrBacktrack(above, on_no_match); 530 BranchOrBacktrack(above, on_no_match);
546 return true; 531 return true;
547 case 'D': 532 case 'D':
548 // Match non ASCII-digits 533 // Match non ASCII-digits
549 if (check_offset) { 534 __ lea(rax, Operand(current_character(), -'0'));
550 LoadCurrentCharacter(cp_offset, on_no_match, 1); 535 __ cmpl(rax, Immediate('9' - '0'));
551 } else {
552 LoadCurrentCharacterUnchecked(cp_offset, 1);
553 }
554 __ subl(current_character(), Immediate('0'));
555 __ cmpl(current_character(), Immediate('9' - '0'));
556 BranchOrBacktrack(below_equal, on_no_match); 536 BranchOrBacktrack(below_equal, on_no_match);
557 return true; 537 return true;
558 case '.': { 538 case '.': {
559 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 539 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
560 if (check_offset) { 540 __ movl(rax, current_character());
561 LoadCurrentCharacter(cp_offset, on_no_match, 1); 541 __ xor_(rax, Immediate(0x01));
562 } else {
563 LoadCurrentCharacterUnchecked(cp_offset, 1);
564 }
565 __ xor_(current_character(), Immediate(0x01));
566 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c 542 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
567 __ subl(current_character(), Immediate(0x0b)); 543 __ subl(rax, Immediate(0x0b));
568 __ cmpl(current_character(), Immediate(0x0c - 0x0b)); 544 __ cmpl(rax, Immediate(0x0c - 0x0b));
569 BranchOrBacktrack(below_equal, on_no_match); 545 BranchOrBacktrack(below_equal, on_no_match);
570 if (mode_ == UC16) { 546 if (mode_ == UC16) {
571 // Compare original value to 0x2028 and 0x2029, using the already 547 // Compare original value to 0x2028 and 0x2029, using the already
572 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 548 // computed (current_char ^ 0x01 - 0x0b). I.e., check for
573 // 0x201d (0x2028 - 0x0b) or 0x201e. 549 // 0x201d (0x2028 - 0x0b) or 0x201e.
574 __ subl(current_character(), Immediate(0x2028 - 0x0b)); 550 __ subl(rax, Immediate(0x2028 - 0x0b));
575 __ cmpl(current_character(), Immediate(1)); 551 __ cmpl(rax, Immediate(0x2029 - 0x2028));
576 BranchOrBacktrack(below_equal, on_no_match); 552 BranchOrBacktrack(below_equal, on_no_match);
577 } 553 }
578 return true; 554 return true;
579 } 555 }
556 case 'n': {
557 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
558 __ movl(rax, current_character());
559 __ xor_(rax, Immediate(0x01));
560 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
561 __ subl(rax, Immediate(0x0b));
562 __ cmpl(rax, Immediate(0x0c - 0x0b));
563 if (mode_ == ASCII) {
564 BranchOrBacktrack(above, on_no_match);
565 } else {
566 Label done;
567 BranchOrBacktrack(below_equal, &done);
568 // Compare original value to 0x2028 and 0x2029, using the already
569 // computed (current_char ^ 0x01 - 0x0b). I.e., check for
570 // 0x201d (0x2028 - 0x0b) or 0x201e.
571 __ subl(rax, Immediate(0x2028 - 0x0b));
572 __ cmpl(rax, Immediate(0x2029 - 0x2028));
573 BranchOrBacktrack(above, on_no_match);
574 __ bind(&done);
575 }
576 return true;
577 }
578 case 'w': {
579 Label done, check_digits;
580 __ cmpl(current_character(), Immediate('9'));
581 __ j(less_equal, &check_digits);
582 __ cmpl(current_character(), Immediate('_'));
583 __ j(equal, &done);
584 // Convert to lower case if letter.
585 __ movl(rax, current_character());
586 __ orl(rax, Immediate(0x20));
587 // check rax in range ['a'..'z'].
588 __ subl(rax, Immediate('a'));
589 __ cmpl(rax, Immediate('z' - 'a'));
590 BranchOrBacktrack(above, on_no_match);
591 __ jmp(&done);
592 __ bind(&check_digits);
593 // Check current character in range ['0'..'9'].
594 __ cmpl(current_character(), Immediate('0'));
595 BranchOrBacktrack(below, on_no_match);
596 __ bind(&done);
597
598 return true;
599 }
600 case 'W': {
601 Label done, check_digits;
602 __ cmpl(current_character(), Immediate('9'));
603 __ j(less_equal, &check_digits);
604 __ cmpl(current_character(), Immediate('_'));
605 BranchOrBacktrack(equal, on_no_match);
606 // Convert to lower case if letter.
607 __ movl(rax, current_character());
608 __ orl(rax, Immediate(0x20));
609 // check current character in range ['a'..'z'], nondestructively.
610 __ subl(rax, Immediate('a'));
611 __ cmpl(rax, Immediate('z' - 'a'));
612 BranchOrBacktrack(below_equal, on_no_match);
613 __ jmp(&done);
614 __ bind(&check_digits);
615 // Check current character in range ['0'..'9'].
616 __ cmpl(current_character(), Immediate('0'));
617 BranchOrBacktrack(above_equal, on_no_match);
618 __ bind(&done);
619
620 return true;
621 }
580 case '*': 622 case '*':
581 // Match any character. 623 // Match any character.
582 if (check_offset) {
583 CheckPosition(cp_offset, on_no_match);
584 }
585 return true; 624 return true;
586 // No custom implementation (yet): w, W, s(UC16), S(UC16). 625 // No custom implementation (yet): s(UC16), S(UC16).
587 default: 626 default:
588 return false; 627 return false;
589 } 628 }
590 } 629 }
591 630
592 631
593 void RegExpMacroAssemblerX64::Fail() { 632 void RegExpMacroAssemblerX64::Fail() {
594 ASSERT(FAILURE == 0); // Return value for failure is zero. 633 ASSERT(FAILURE == 0); // Return value for failure is zero.
595 __ xor_(rax, rax); // zero rax. 634 __ xor_(rax, rax); // zero rax.
596 __ jmp(&exit_label_); 635 __ jmp(&exit_label_);
(...skipping 686 matching lines...) Expand 10 before | Expand all | Expand 10 after
1283 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16))); 1322 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));
1284 } 1323 }
1285 } 1324 }
1286 } 1325 }
1287 1326
1288 #undef __ 1327 #undef __
1289 1328
1290 #endif // V8_NATIVE_REGEXP 1329 #endif // V8_NATIVE_REGEXP
1291 1330
1292 }} // namespace v8::internal 1331 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/x64/regexp-macro-assembler-x64.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698