Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(550)

Unified Diff: src/arm/regexp-macro-assembler-arm.cc

Issue 507051: Attempt to make \b\w+ faster. Slight performance increase on, e.g., string unpacking. (Closed)
Patch Set: Addressed review comments. Created 10 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/arm/regexp-macro-assembler-arm.h ('k') | src/ast.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/arm/regexp-macro-assembler-arm.cc
diff --git a/src/arm/regexp-macro-assembler-arm.cc b/src/arm/regexp-macro-assembler-arm.cc
index 24b6a9c81a8b2dd6e09040a2cfe65075c32437a9..5b17dc18ad5a8d8018dcff5c96274f1c15e9139d 100644
--- a/src/arm/regexp-macro-assembler-arm.cc
+++ b/src/arm/regexp-macro-assembler-arm.cc
@@ -459,8 +459,6 @@ void RegExpMacroAssemblerARM::CheckNotCharacterAfterMinusAnd(
bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type,
- int cp_offset,
- bool check_offset,
Label* on_no_match) {
// Range checks (c in min..max) are generally implemented by an unsigned
// (c - min) <= (max - min) check
@@ -469,11 +467,6 @@ bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type,
// Match space-characters
if (mode_ == ASCII) {
// ASCII space characters are '\t'..'\r' and ' '.
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
Label success;
__ cmp(current_character(), Operand(' '));
__ b(eq, &success);
@@ -487,11 +480,6 @@ bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type,
return false;
case 'S':
// Match non-space characters.
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match, 1);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
if (mode_ == ASCII) {
// ASCII space characters are '\t'..'\r' and ' '.
__ cmp(current_character(), Operand(' '));
@@ -504,33 +492,18 @@ bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type,
return false;
case 'd':
// Match ASCII digits ('0'..'9')
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match, 1);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
__ sub(r0, current_character(), Operand('0'));
__ cmp(current_character(), Operand('9' - '0'));
BranchOrBacktrack(hi, on_no_match);
return true;
case 'D':
// Match non ASCII-digits
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match, 1);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
__ sub(r0, current_character(), Operand('0'));
__ cmp(r0, Operand('9' - '0'));
BranchOrBacktrack(ls, on_no_match);
return true;
case '.': {
// Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
- if (check_offset) {
- LoadCurrentCharacter(cp_offset, on_no_match, 1);
- } else {
- LoadCurrentCharacterUnchecked(cp_offset, 1);
- }
__ eor(r0, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
__ sub(r0, r0, Operand(0x0b));
@@ -546,13 +519,71 @@ bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type,
}
return true;
}
+ case 'n': {
+ // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ __ eor(r0, current_character(), Operand(0x01));
+ // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
+ __ sub(r0, r0, Operand(0x0b));
+ __ cmp(r0, Operand(0x0c - 0x0b));
+ if (mode_ == ASCII) {
+ BranchOrBacktrack(hi, on_no_match);
+ } else {
+ Label done;
+ __ b(ls, &done);
+ // Compare original value to 0x2028 and 0x2029, using the already
+ // computed (current_char ^ 0x01 - 0x0b). I.e., check for
+ // 0x201d (0x2028 - 0x0b) or 0x201e.
+ __ sub(r0, r0, Operand(0x2028 - 0x0b));
+ __ cmp(r0, Operand(1));
+ BranchOrBacktrack(hi, on_no_match);
+ __ bind(&done);
+ }
+ return true;
+ }
+ case 'w': {
+ // Match word character (0-9, A-Z, a-z and _).
+ Label digits, done;
+ __ cmp(current_character(), Operand('9'));
+ __ b(ls, &digits);
+ __ cmp(current_character(), Operand('_'));
+ __ b(eq, &done);
+ __ orr(r0, current_character(), Operand(0x20));
+ __ sub(r0, r0, Operand('a'));
+ __ cmp(r0, Operand('z' - 'a'));
+ BranchOrBacktrack(hi, on_no_match);
+ __ jmp(&done);
+
+ __ bind(&digits);
+ __ cmp(current_character(), Operand('0'));
+ BranchOrBacktrack(lo, on_no_match);
+ __ bind(&done);
+
+ return true;
+ }
+ case 'W': {
+ // Match non-word character (not 0-9, A-Z, a-z and _).
+ Label digits, done;
+ __ cmp(current_character(), Operand('9'));
+ __ b(ls, &digits);
+ __ cmp(current_character(), Operand('_'));
+ BranchOrBacktrack(eq, on_no_match);
+ __ orr(r0, current_character(), Operand(0x20));
+ __ sub(r0, r0, Operand('a'));
+ __ cmp(r0, Operand('z' - 'a'));
+ BranchOrBacktrack(ls, on_no_match);
+ __ jmp(&done);
+
+ __ bind(&digits);
+ __ cmp(current_character(), Operand('0'));
+ BranchOrBacktrack(hs, on_no_match);
+ __ bind(&done);
+
+ return true;
+ }
case '*':
// Match any character.
- if (check_offset) {
- CheckPosition(cp_offset, on_no_match);
- }
return true;
- // No custom implementation (yet): w, W, s(UC16), S(UC16).
+ // No custom implementation (yet): s(UC16), S(UC16).
default:
return false;
}
« no previous file with comments | « src/arm/regexp-macro-assembler-arm.h ('k') | src/ast.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698