Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(587)

Unified Diff: src/ia32/regexp-macro-assembler-ia32.cc

Issue 507051: Attempt to make \b\w+ faster. Slight performance increase on, e.g., string unpacking. (Closed)
Patch Set: Addressed review comments. Created 10 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/ia32/regexp-macro-assembler-ia32.h ('k') | src/jsregexp.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/ia32/regexp-macro-assembler-ia32.cc
diff --git a/src/ia32/regexp-macro-assembler-ia32.cc b/src/ia32/regexp-macro-assembler-ia32.cc
index 2e13d8aeed6e7ae768960bd667e084535d3f8a1c..283eae15f4dcdce710e009777b6b1a8affdbf05b 100644
--- a/src/ia32/regexp-macro-assembler-ia32.cc
+++ b/src/ia32/regexp-macro-assembler-ia32.cc
@@ -471,8 +471,6 @@ void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusAnd(
bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
- int cp_offset,
- bool check_offset,
Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned
// (c - min) <= (max - min) check
@@ -481,17 +479,12 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
// Match space-characters
if (mode_ == ASCII) {
// ASCII space characters are '\t'..'\r' and ' '.
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
Label success;
__ cmp(current_character(), ' ');
__ j(equal, &success);
// Check range 0x09..0x0d
- __ sub(Operand(current_character()), Immediate('\t'));
- __ cmp(current_character(), '\r' - '\t');
+ __ lea(eax, Operand(current_character(), -'\t'));
+ __ cmp(eax, '\r' - '\t');
BranchOrBacktrack(above, on_no_match);
__ bind(&success);
return true;
@@ -499,72 +492,118 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
return false;
case 'S':
// Match non-space characters.
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match, 1);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
if (mode_ == ASCII) {
// ASCII space characters are '\t'..'\r' and ' '.
__ cmp(current_character(), ' ');
BranchOrBacktrack(equal, on_no_match);
- __ sub(Operand(current_character()), Immediate('\t'));
- __ cmp(current_character(), '\r' - '\t');
+ __ lea(eax, Operand(current_character(), -'\t'));
+ __ cmp(eax, '\r' - '\t');
BranchOrBacktrack(below_equal, on_no_match);
return true;
}
return false;
case 'd':
// Match ASCII digits ('0'..'9')
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match, 1);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
- __ sub(Operand(current_character()), Immediate('0'));
- __ cmp(current_character(), '9' - '0');
+ __ lea(eax, Operand(current_character(), -'0'));
+ __ cmp(eax, '9' - '0');
BranchOrBacktrack(above, on_no_match);
return true;
case 'D':
// Match non ASCII-digits
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match, 1);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
- __ sub(Operand(current_character()), Immediate('0'));
- __ cmp(current_character(), '9' - '0');
+ __ lea(eax, Operand(current_character(), -'0'));
+ __ cmp(eax, '9' - '0');
BranchOrBacktrack(below_equal, on_no_match);
return true;
case '.': {
// Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match, 1);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
- __ xor_(Operand(current_character()), Immediate(0x01));
+ __ mov(Operand(eax), current_character());
+ __ xor_(Operand(eax), Immediate(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
- __ sub(Operand(current_character()), Immediate(0x0b));
- __ cmp(current_character(), 0x0c - 0x0b);
+ __ sub(Operand(eax), Immediate(0x0b));
+ __ cmp(eax, 0x0c - 0x0b);
BranchOrBacktrack(below_equal, on_no_match);
if (mode_ == UC16) {
// Compare original value to 0x2028 and 0x2029, using the already
// computed (current_char ^ 0x01 - 0x0b). I.e., check for
// 0x201d (0x2028 - 0x0b) or 0x201e.
- __ sub(Operand(current_character()), Immediate(0x2028 - 0x0b));
- __ cmp(current_character(), 1);
+ __ sub(Operand(eax), Immediate(0x2028 - 0x0b));
+ __ cmp(eax, 0x2029 - 0x2028);
BranchOrBacktrack(below_equal, on_no_match);
}
return true;
}
+ case 'w': {
+ Label done, check_digits;
+ __ cmp(Operand(current_character()), Immediate('9'));
+ __ j(less_equal, &check_digits);
+ __ cmp(Operand(current_character()), Immediate('_'));
+ __ j(equal, &done);
+ // Convert to lower case if letter.
+ __ mov(Operand(eax), current_character());
+ __ or_(eax, 0x20);
+ // check current character in range ['a'..'z'], nondestructively.
+ __ sub(Operand(eax), Immediate('a'));
+ __ cmp(Operand(eax), Immediate('z' - 'a'));
+ BranchOrBacktrack(above, on_no_match);
+ __ jmp(&done);
+ __ bind(&check_digits);
+ // Check current character in range ['0'..'9'].
+ __ cmp(Operand(current_character()), Immediate('0'));
+ BranchOrBacktrack(below, on_no_match);
+ __ bind(&done);
+
+ return true;
+ }
+ case 'W': {
+ Label done, check_digits;
+ __ cmp(Operand(current_character()), Immediate('9'));
+ __ j(less_equal, &check_digits);
+ __ cmp(Operand(current_character()), Immediate('_'));
+ BranchOrBacktrack(equal, on_no_match);
+ // Convert to lower case if letter.
+ __ mov(Operand(eax), current_character());
+ __ or_(eax, 0x20);
+ // check current character in range ['a'..'z'], nondestructively.
+ __ sub(Operand(eax), Immediate('a'));
+ __ cmp(Operand(eax), Immediate('z' - 'a'));
+ BranchOrBacktrack(below_equal, on_no_match);
+ __ jmp(&done);
+ __ bind(&check_digits);
+ // Check current character in range ['0'..'9'].
+ __ cmp(Operand(current_character()), Immediate('0'));
+ BranchOrBacktrack(above_equal, on_no_match);
+ __ bind(&done);
+ return true;
+ }
+ // Non-standard classes (with no syntactic shorthand) used internally.
case '*':
// Match any character.
- if (check_offset) {
- CheckPosition(cp_offset, on_no_match);
+ return true;
+ case 'n': {
+ // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
+ // The opposite of '.'.
+ __ mov(Operand(eax), current_character());
+ __ xor_(Operand(eax), Immediate(0x01));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
+ __ sub(Operand(eax), Immediate(0x0b));
+ __ cmp(eax, 0x0c - 0x0b);
+ if (mode_ == ASCII) {
+ BranchOrBacktrack(above, on_no_match);
+ } else {
+ Label done;
+ BranchOrBacktrack(below_equal, &done);
+ ASSERT_EQ(UC16, mode_);
+ // Compare original value to 0x2028 and 0x2029, using the already
+ // computed (current_char ^ 0x01 - 0x0b). I.e., check for
+ // 0x201d (0x2028 - 0x0b) or 0x201e.
+ __ sub(Operand(eax), Immediate(0x2028 - 0x0b));
+ __ cmp(eax, 1);
+ BranchOrBacktrack(above, on_no_match);
+ __ bind(&done);
}
return true;
- // No custom implementation (yet): w, W, s(UC16), S(UC16).
+ }
+ // No custom implementation (yet): s(UC16), S(UC16).
default:
return false;
}
« no previous file with comments | « src/ia32/regexp-macro-assembler-ia32.h ('k') | src/jsregexp.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698