Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(467)

Unified Diff: src/x64/regexp-macro-assembler-x64.cc

Issue 507051: Attempt to make \b\w+ faster. Slight performance increase on, e.g., string unpacking. (Closed)
Patch Set: Addressed review comments. Created 10 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/x64/regexp-macro-assembler-x64.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/x64/regexp-macro-assembler-x64.cc
diff --git a/src/x64/regexp-macro-assembler-x64.cc b/src/x64/regexp-macro-assembler-x64.cc
index 639f5e95b618df123630b4f252d207eaff960682..02f428aaab30735dd857dc52160d26e4dfb001f9 100644
--- a/src/x64/regexp-macro-assembler-x64.cc
+++ b/src/x64/regexp-macro-assembler-x64.cc
@@ -60,7 +60,7 @@ namespace internal {
* - r8 : code object pointer. Used to convert between absolute and
* code-object-relative addresses.
*
- * The registers rax, rbx, rcx, r9 and r11 are free to use for computations.
+ * The registers rax, rbx, r9 and r11 are free to use for computations.
* If changed to use r12+, they should be saved as callee-save registers.
*
* Each call to a C++ method should retain these registers.
@@ -490,27 +490,22 @@ void RegExpMacroAssemblerX64::CheckNotCharacterAfterMinusAnd(
bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
- int cp_offset,
- bool check_offset,
Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned
- // (c - min) <= (max - min) check
+ // (c - min) <= (max - min) check, using the sequence:
+ // lea(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min))
+ // cmp(rax, Immediate(max - min))
switch (type) {
case 's':
// Match space-characters
if (mode_ == ASCII) {
// ASCII space characters are '\t'..'\r' and ' '.
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
Label success;
__ cmpl(current_character(), Immediate(' '));
__ j(equal, &success);
// Check range 0x09..0x0d
- __ subl(current_character(), Immediate('\t'));
- __ cmpl(current_character(), Immediate('\r' - '\t'));
+ __ lea(rax, Operand(current_character(), -'\t'));
+ __ cmpl(rax, Immediate('\r' - '\t'));
BranchOrBacktrack(above, on_no_match);
__ bind(&success);
return true;
@@ -518,72 +513,116 @@ bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
return false;
case 'S':
// Match non-space characters.
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match, 1);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
if (mode_ == ASCII) {
// ASCII space characters are '\t'..'\r' and ' '.
__ cmpl(current_character(), Immediate(' '));
BranchOrBacktrack(equal, on_no_match);
- __ subl(current_character(), Immediate('\t'));
- __ cmpl(current_character(), Immediate('\r' - '\t'));
+ __ lea(rax, Operand(current_character(), -'\t'));
+ __ cmpl(rax, Immediate('\r' - '\t'));
BranchOrBacktrack(below_equal, on_no_match);
return true;
}
return false;
case 'd':
// Match ASCII digits ('0'..'9')
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match, 1);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
- __ subl(current_character(), Immediate('0'));
- __ cmpl(current_character(), Immediate('9' - '0'));
+ __ lea(rax, Operand(current_character(), -'0'));
+ __ cmpl(rax, Immediate('9' - '0'));
BranchOrBacktrack(above, on_no_match);
return true;
case 'D':
// Match non ASCII-digits
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match, 1);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
- __ subl(current_character(), Immediate('0'));
- __ cmpl(current_character(), Immediate('9' - '0'));
+ __ lea(rax, Operand(current_character(), -'0'));
+ __ cmpl(rax, Immediate('9' - '0'));
BranchOrBacktrack(below_equal, on_no_match);
return true;
case '.': {
// Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match, 1);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
- __ xor_(current_character(), Immediate(0x01));
+ __ movl(rax, current_character());
+ __ xor_(rax, Immediate(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ subl(current_character(), Immediate(0x0b));
- __ cmpl(current_character(), Immediate(0x0c - 0x0b));
+ __ subl(rax, Immediate(0x0b));
+ __ cmpl(rax, Immediate(0x0c - 0x0b));
BranchOrBacktrack(below_equal, on_no_match);
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
// computed (current_char ^ 0x01 - 0x0b). I.e., check for
// 0x201d (0x2028 - 0x0b) or 0x201e.
- __ subl(current_character(), Immediate(0x2028 - 0x0b));
- __ cmpl(current_character(), Immediate(1));
+ __ subl(rax, Immediate(0x2028 - 0x0b));
+ __ cmpl(rax, Immediate(0x2029 - 0x2028));
BranchOrBacktrack(below_equal, on_no_match);
}
return true;
}
+ case 'n': {
+ // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ __ movl(rax, current_character());
+ __ xor_(rax, Immediate(0x01));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
+ __ subl(rax, Immediate(0x0b));
+ __ cmpl(rax, Immediate(0x0c - 0x0b));
+ if (mode_ == ASCII) {
+ BranchOrBacktrack(above, on_no_match);
+ } else {
+ Label done;
+ BranchOrBacktrack(below_equal, &done);
+ // Compare original value to 0x2028 and 0x2029, using the already
+ // computed (current_char ^ 0x01 - 0x0b). I.e., check for
+ // 0x201d (0x2028 - 0x0b) or 0x201e.
+ __ subl(rax, Immediate(0x2028 - 0x0b));
+ __ cmpl(rax, Immediate(0x2029 - 0x2028));
+ BranchOrBacktrack(above, on_no_match);
+ __ bind(&done);
+ }
+ return true;
+ }
+ case 'w': {
+ Label done, check_digits;
+ __ cmpl(current_character(), Immediate('9'));
+ __ j(less_equal, &check_digits);
+ __ cmpl(current_character(), Immediate('_'));
+ __ j(equal, &done);
+ // Convert to lower case if letter.
+ __ movl(rax, current_character());
+ __ orl(rax, Immediate(0x20));
+ // check rax in range ['a'..'z'].
+ __ subl(rax, Immediate('a'));
+ __ cmpl(rax, Immediate('z' - 'a'));
+ BranchOrBacktrack(above, on_no_match);
+ __ jmp(&done);
+ __ bind(&check_digits);
+ // Check current character in range ['0'..'9'].
+ __ cmpl(current_character(), Immediate('0'));
+ BranchOrBacktrack(below, on_no_match);
+ __ bind(&done);
+
+ return true;
+ }
+ case 'W': {
+ Label done, check_digits;
+ __ cmpl(current_character(), Immediate('9'));
+ __ j(less_equal, &check_digits);
+ __ cmpl(current_character(), Immediate('_'));
+ BranchOrBacktrack(equal, on_no_match);
+ // Convert to lower case if letter.
+ __ movl(rax, current_character());
+ __ orl(rax, Immediate(0x20));
+ // check current character in range ['a'..'z'], nondestructively.
+ __ subl(rax, Immediate('a'));
+ __ cmpl(rax, Immediate('z' - 'a'));
+ BranchOrBacktrack(below_equal, on_no_match);
+ __ jmp(&done);
+ __ bind(&check_digits);
+ // Check current character in range ['0'..'9'].
+ __ cmpl(current_character(), Immediate('0'));
+ BranchOrBacktrack(above_equal, on_no_match);
+ __ bind(&done);
+
+ return true;
+ }
case '*':
// Match any character.
- if (check_offset) {
- CheckPosition(cp_offset, on_no_match);
- }
return true;
- // No custom implementation (yet): w, W, s(UC16), S(UC16).
+ // No custom implementation (yet): s(UC16), S(UC16).
default:
return false;
}
« no previous file with comments | « src/x64/regexp-macro-assembler-x64.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698