src/x64/regexp-macro-assembler-x64.cc - Issue 507051: Attempt to make \b\w+ faster. Slight performance increase on, e.g., string unpacking.

Side by Side Diff: src/x64/regexp-macro-assembler-x64.cc

Issue 507051: Attempt to make \b\w+ faster. Slight performance increase on, e.g., string unpacking. (Closed)

Patch Set: Addressed review comments. Created 10 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2009 the V8 project authors. All rights reserved.	1 // Copyright 2009 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
53 * so that rsi+rdi points to the current character.	53 * so that rsi+rdi points to the current character.

54 * - rbp : frame pointer. Used to access arguments, local variables and	54 * - rbp : frame pointer. Used to access arguments, local variables and

55 * RegExp registers.	55 * RegExp registers.

56 * - rsp : points to tip of C stack.	56 * - rsp : points to tip of C stack.

57 * - rcx : points to tip of backtrack stack. The backtrack stack contains	57 * - rcx : points to tip of backtrack stack. The backtrack stack contains

58 * only 32-bit values. Most are offsets from some base (e.g., character	58 * only 32-bit values. Most are offsets from some base (e.g., character

59 * positions from end of string or code location from Code* pointer).	59 * positions from end of string or code location from Code* pointer).

60 * - r8 : code object pointer. Used to convert between absolute and	60 * - r8 : code object pointer. Used to convert between absolute and

61 * code-object-relative addresses.	61 * code-object-relative addresses.

62 *	62 *

63 * The registers rax, rbx, rcx, r9 and r11 are free to use for computations.	63 * The registers rax, rbx, r9 and r11 are free to use for computations.

64 * If changed to use r12+, they should be saved as callee-save registers.	64 * If changed to use r12+, they should be saved as callee-save registers.

65 *	65 *

66 * Each call to a C++ method should retain these registers.	66 * Each call to a C++ method should retain these registers.

67 *	67 *

68 * The stack will have the following content, in some order, indexable from the	68 * The stack will have the following content, in some order, indexable from the

69 * frame pointer (see, e.g., kStackHighEnd):	69 * frame pointer (see, e.g., kStackHighEnd):

70 * - stack_area_base (High end of the memory area to use as	70 * - stack_area_base (High end of the memory area to use as

71 * backtracking stack)	71 * backtracking stack)

72 * - at_start (if 1, start at start of string, if 0, don't)	72 * - at_start (if 1, start at start of string, if 0, don't)

73 * - int* capture_array (int[num_saved_registers_], for output).	73 * - int* capture_array (int[num_saved_registers_], for output).

(...skipping 409 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
483 Label* on_not_equal) {	483 Label* on_not_equal) {

484 ASSERT(minus < String::kMaxUC16CharCode);	484 ASSERT(minus < String::kMaxUC16CharCode);

485 __ lea(rax, Operand(current_character(), -minus));	485 __ lea(rax, Operand(current_character(), -minus));

486 __ and_(rax, Immediate(mask));	486 __ and_(rax, Immediate(mask));

487 __ cmpl(rax, Immediate(c));	487 __ cmpl(rax, Immediate(c));

488 BranchOrBacktrack(not_equal, on_not_equal);	488 BranchOrBacktrack(not_equal, on_not_equal);

489 }	489 }

490	490

491	491

492 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,	492 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,

493 int cp_offset,

494 bool check_offset,

495 Label* on_no_match) {	493 Label* on_no_match) {

496 // Range checks (c in min..max) are generally implemented by an unsigned	494 // Range checks (c in min..max) are generally implemented by an unsigned

497 // (c - min) <= (max - min) check	495 // (c - min) <= (max - min) check, using the sequence:

	496 // lea(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min))

	497 // cmp(rax, Immediate(max - min))

498 switch (type) {	498 switch (type) {

499 case 's':	499 case 's':

500 // Match space-characters	500 // Match space-characters

501 if (mode_ == ASCII) {	501 if (mode_ == ASCII) {

502 // ASCII space characters are '\t'..'\r' and ' '.	502 // ASCII space characters are '\t'..'\r' and ' '.

503 if (check_offset) {

504 LoadCurrentCharacter(cp_offset, on_no_match);

505 } else {

506 LoadCurrentCharacterUnchecked(cp_offset, 1);

507 }

508 Label success;	503 Label success;

509 __ cmpl(current_character(), Immediate(' '));	504 __ cmpl(current_character(), Immediate(' '));

510 __ j(equal, &success);	505 __ j(equal, &success);

511 // Check range 0x09..0x0d	506 // Check range 0x09..0x0d

512 __ subl(current_character(), Immediate('\t'));	507 __ lea(rax, Operand(current_character(), -'\t'));

513 __ cmpl(current_character(), Immediate('\r' - '\t'));	508 __ cmpl(rax, Immediate('\r' - '\t'));

514 BranchOrBacktrack(above, on_no_match);	509 BranchOrBacktrack(above, on_no_match);

515 __ bind(&success);	510 __ bind(&success);

516 return true;	511 return true;

517 }	512 }

518 return false;	513 return false;

519 case 'S':	514 case 'S':

520 // Match non-space characters.	515 // Match non-space characters.

521 if (check_offset) {

522 LoadCurrentCharacter(cp_offset, on_no_match, 1);

523 } else {

524 LoadCurrentCharacterUnchecked(cp_offset, 1);

525 }

526 if (mode_ == ASCII) {	516 if (mode_ == ASCII) {

527 // ASCII space characters are '\t'..'\r' and ' '.	517 // ASCII space characters are '\t'..'\r' and ' '.

528 __ cmpl(current_character(), Immediate(' '));	518 __ cmpl(current_character(), Immediate(' '));

529 BranchOrBacktrack(equal, on_no_match);	519 BranchOrBacktrack(equal, on_no_match);

530 __ subl(current_character(), Immediate('\t'));	520 __ lea(rax, Operand(current_character(), -'\t'));

531 __ cmpl(current_character(), Immediate('\r' - '\t'));	521 __ cmpl(rax, Immediate('\r' - '\t'));

532 BranchOrBacktrack(below_equal, on_no_match);	522 BranchOrBacktrack(below_equal, on_no_match);

533 return true;	523 return true;

534 }	524 }

535 return false;	525 return false;

536 case 'd':	526 case 'd':

537 // Match ASCII digits ('0'..'9')	527 // Match ASCII digits ('0'..'9')

538 if (check_offset) {	528 __ lea(rax, Operand(current_character(), -'0'));

539 LoadCurrentCharacter(cp_offset, on_no_match, 1);	529 __ cmpl(rax, Immediate('9' - '0'));

540 } else {

541 LoadCurrentCharacterUnchecked(cp_offset, 1);

542 }

543 __ subl(current_character(), Immediate('0'));

544 __ cmpl(current_character(), Immediate('9' - '0'));

545 BranchOrBacktrack(above, on_no_match);	530 BranchOrBacktrack(above, on_no_match);

546 return true;	531 return true;

547 case 'D':	532 case 'D':

548 // Match non ASCII-digits	533 // Match non ASCII-digits

549 if (check_offset) {	534 __ lea(rax, Operand(current_character(), -'0'));

550 LoadCurrentCharacter(cp_offset, on_no_match, 1);	535 __ cmpl(rax, Immediate('9' - '0'));

551 } else {

552 LoadCurrentCharacterUnchecked(cp_offset, 1);

553 }

554 __ subl(current_character(), Immediate('0'));

555 __ cmpl(current_character(), Immediate('9' - '0'));

556 BranchOrBacktrack(below_equal, on_no_match);	536 BranchOrBacktrack(below_equal, on_no_match);

557 return true;	537 return true;

558 case '.': {	538 case '.': {

559 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)	539 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)

560 if (check_offset) {	540 __ movl(rax, current_character());

561 LoadCurrentCharacter(cp_offset, on_no_match, 1);	541 __ xor_(rax, Immediate(0x01));

562 } else {

563 LoadCurrentCharacterUnchecked(cp_offset, 1);

564 }

565 __ xor_(current_character(), Immediate(0x01));

566 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c	542 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c

567 __ subl(current_character(), Immediate(0x0b));	543 __ subl(rax, Immediate(0x0b));

568 __ cmpl(current_character(), Immediate(0x0c - 0x0b));	544 __ cmpl(rax, Immediate(0x0c - 0x0b));

569 BranchOrBacktrack(below_equal, on_no_match);	545 BranchOrBacktrack(below_equal, on_no_match);

570 if (mode_ == UC16) {	546 if (mode_ == UC16) {

571 // Compare original value to 0x2028 and 0x2029, using the already	547 // Compare original value to 0x2028 and 0x2029, using the already

572 // computed (current_char ^ 0x01 - 0x0b). I.e., check for	548 // computed (current_char ^ 0x01 - 0x0b). I.e., check for

573 // 0x201d (0x2028 - 0x0b) or 0x201e.	549 // 0x201d (0x2028 - 0x0b) or 0x201e.

574 __ subl(current_character(), Immediate(0x2028 - 0x0b));	550 __ subl(rax, Immediate(0x2028 - 0x0b));

575 __ cmpl(current_character(), Immediate(1));	551 __ cmpl(rax, Immediate(0x2029 - 0x2028));

576 BranchOrBacktrack(below_equal, on_no_match);	552 BranchOrBacktrack(below_equal, on_no_match);

577 }	553 }

578 return true;	554 return true;

579 }	555 }

	556 case 'n': {

	557 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)

	558 __ movl(rax, current_character());

	559 __ xor_(rax, Immediate(0x01));

	560 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c

	561 __ subl(rax, Immediate(0x0b));

	562 __ cmpl(rax, Immediate(0x0c - 0x0b));

	563 if (mode_ == ASCII) {

	564 BranchOrBacktrack(above, on_no_match);

	565 } else {

	566 Label done;

	567 BranchOrBacktrack(below_equal, &done);

	568 // Compare original value to 0x2028 and 0x2029, using the already

	569 // computed (current_char ^ 0x01 - 0x0b). I.e., check for

	570 // 0x201d (0x2028 - 0x0b) or 0x201e.

	571 __ subl(rax, Immediate(0x2028 - 0x0b));

	572 __ cmpl(rax, Immediate(0x2029 - 0x2028));

	573 BranchOrBacktrack(above, on_no_match);

	574 __ bind(&done);

	575 }

	576 return true;

	577 }

	578 case 'w': {

	579 Label done, check_digits;

	580 __ cmpl(current_character(), Immediate('9'));

	581 __ j(less_equal, &check_digits);

	582 __ cmpl(current_character(), Immediate('_'));

	583 __ j(equal, &done);

	584 // Convert to lower case if letter.

	585 __ movl(rax, current_character());

	586 __ orl(rax, Immediate(0x20));

	587 // check rax in range ['a'..'z'].

	588 __ subl(rax, Immediate('a'));

	589 __ cmpl(rax, Immediate('z' - 'a'));

	590 BranchOrBacktrack(above, on_no_match);

	591 __ jmp(&done);

	592 __ bind(&check_digits);

	593 // Check current character in range ['0'..'9'].

	594 __ cmpl(current_character(), Immediate('0'));

	595 BranchOrBacktrack(below, on_no_match);

	596 __ bind(&done);

	597

	598 return true;

	599 }

	600 case 'W': {

	601 Label done, check_digits;

	602 __ cmpl(current_character(), Immediate('9'));

	603 __ j(less_equal, &check_digits);

	604 __ cmpl(current_character(), Immediate('_'));

	605 BranchOrBacktrack(equal, on_no_match);

	606 // Convert to lower case if letter.

	607 __ movl(rax, current_character());

	608 __ orl(rax, Immediate(0x20));

	609 // check current character in range ['a'..'z'], nondestructively.

	610 __ subl(rax, Immediate('a'));

	611 __ cmpl(rax, Immediate('z' - 'a'));

	612 BranchOrBacktrack(below_equal, on_no_match);

	613 __ jmp(&done);

	614 __ bind(&check_digits);

	615 // Check current character in range ['0'..'9'].

	616 __ cmpl(current_character(), Immediate('0'));

	617 BranchOrBacktrack(above_equal, on_no_match);

	618 __ bind(&done);

	619

	620 return true;

	621 }

580 case '*':	622 case '*':

581 // Match any character.	623 // Match any character.

582 if (check_offset) {

583 CheckPosition(cp_offset, on_no_match);

584 }

585 return true;	624 return true;

586 // No custom implementation (yet): w, W, s(UC16), S(UC16).	625 // No custom implementation (yet): s(UC16), S(UC16).

587 default:	626 default:

588 return false;	627 return false;

589 }	628 }

590 }	629 }

591	630

592	631

593 void RegExpMacroAssemblerX64::Fail() {	632 void RegExpMacroAssemblerX64::Fail() {

594 ASSERT(FAILURE == 0); // Return value for failure is zero.	633 ASSERT(FAILURE == 0); // Return value for failure is zero.

595 __ xor_(rax, rax); // zero rax.	634 __ xor_(rax, rax); // zero rax.

596 __ jmp(&exit_label_);	635 __ jmp(&exit_label_);

(...skipping 686 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1283 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));	1322 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));

1284 }	1323 }

1285 }	1324 }

1286 }	1325 }

1287	1326

1288 #undef __	1327 #undef __

1289	1328

1290 #endif // V8_NATIVE_REGEXP	1329 #endif // V8_NATIVE_REGEXP

1291	1330

1292 }} // namespace v8::internal	1331 }} // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/x64/regexp-macro-assembler-x64.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »