| Index: src/ia32/regexp-macro-assembler-ia32.cc
|
| diff --git a/src/ia32/regexp-macro-assembler-ia32.cc b/src/ia32/regexp-macro-assembler-ia32.cc
|
| index 2e13d8aeed6e7ae768960bd667e084535d3f8a1c..283eae15f4dcdce710e009777b6b1a8affdbf05b 100644
|
| --- a/src/ia32/regexp-macro-assembler-ia32.cc
|
| +++ b/src/ia32/regexp-macro-assembler-ia32.cc
|
| @@ -471,8 +471,6 @@ void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusAnd(
|
|
|
|
|
| bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
|
| - int cp_offset,
|
| - bool check_offset,
|
| Label* on_no_match) {
|
| // Range checks (c in min..max) are generally implemented by an unsigned
|
| // (c - min) <= (max - min) check
|
| @@ -481,17 +479,12 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
|
| // Match space-characters
|
| if (mode_ == ASCII) {
|
| // ASCII space characters are '\t'..'\r' and ' '.
|
| - if (check_offset) {
|
| - LoadCurrentCharacter(cp_offset, on_no_match);
|
| - } else {
|
| - LoadCurrentCharacterUnchecked(cp_offset, 1);
|
| - }
|
| Label success;
|
| __ cmp(current_character(), ' ');
|
| __ j(equal, &success);
|
| // Check range 0x09..0x0d
|
| - __ sub(Operand(current_character()), Immediate('\t'));
|
| - __ cmp(current_character(), '\r' - '\t');
|
| + __ lea(eax, Operand(current_character(), -'\t'));
|
| + __ cmp(eax, '\r' - '\t');
|
| BranchOrBacktrack(above, on_no_match);
|
| __ bind(&success);
|
| return true;
|
| @@ -499,72 +492,118 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
|
| return false;
|
| case 'S':
|
| // Match non-space characters.
|
| - if (check_offset) {
|
| - LoadCurrentCharacter(cp_offset, on_no_match, 1);
|
| - } else {
|
| - LoadCurrentCharacterUnchecked(cp_offset, 1);
|
| - }
|
| if (mode_ == ASCII) {
|
| // ASCII space characters are '\t'..'\r' and ' '.
|
| __ cmp(current_character(), ' ');
|
| BranchOrBacktrack(equal, on_no_match);
|
| - __ sub(Operand(current_character()), Immediate('\t'));
|
| - __ cmp(current_character(), '\r' - '\t');
|
| + __ lea(eax, Operand(current_character(), -'\t'));
|
| + __ cmp(eax, '\r' - '\t');
|
| BranchOrBacktrack(below_equal, on_no_match);
|
| return true;
|
| }
|
| return false;
|
| case 'd':
|
| // Match ASCII digits ('0'..'9')
|
| - if (check_offset) {
|
| - LoadCurrentCharacter(cp_offset, on_no_match, 1);
|
| - } else {
|
| - LoadCurrentCharacterUnchecked(cp_offset, 1);
|
| - }
|
| - __ sub(Operand(current_character()), Immediate('0'));
|
| - __ cmp(current_character(), '9' - '0');
|
| + __ lea(eax, Operand(current_character(), -'0'));
|
| + __ cmp(eax, '9' - '0');
|
| BranchOrBacktrack(above, on_no_match);
|
| return true;
|
| case 'D':
|
| // Match non ASCII-digits
|
| - if (check_offset) {
|
| - LoadCurrentCharacter(cp_offset, on_no_match, 1);
|
| - } else {
|
| - LoadCurrentCharacterUnchecked(cp_offset, 1);
|
| - }
|
| - __ sub(Operand(current_character()), Immediate('0'));
|
| - __ cmp(current_character(), '9' - '0');
|
| + __ lea(eax, Operand(current_character(), -'0'));
|
| + __ cmp(eax, '9' - '0');
|
| BranchOrBacktrack(below_equal, on_no_match);
|
| return true;
|
| case '.': {
|
| // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
|
| - if (check_offset) {
|
| - LoadCurrentCharacter(cp_offset, on_no_match, 1);
|
| - } else {
|
| - LoadCurrentCharacterUnchecked(cp_offset, 1);
|
| - }
|
| - __ xor_(Operand(current_character()), Immediate(0x01));
|
| + __ mov(Operand(eax), current_character());
|
| + __ xor_(Operand(eax), Immediate(0x01));
|
| // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
|
| - __ sub(Operand(current_character()), Immediate(0x0b));
|
| - __ cmp(current_character(), 0x0c - 0x0b);
|
| + __ sub(Operand(eax), Immediate(0x0b));
|
| + __ cmp(eax, 0x0c - 0x0b);
|
| BranchOrBacktrack(below_equal, on_no_match);
|
| if (mode_ == UC16) {
|
| // Compare original value to 0x2028 and 0x2029, using the already
|
| // computed (current_char ^ 0x01 - 0x0b). I.e., check for
|
| // 0x201d (0x2028 - 0x0b) or 0x201e.
|
| - __ sub(Operand(current_character()), Immediate(0x2028 - 0x0b));
|
| - __ cmp(current_character(), 1);
|
| + __ sub(Operand(eax), Immediate(0x2028 - 0x0b));
|
| + __ cmp(eax, 0x2029 - 0x2028);
|
| BranchOrBacktrack(below_equal, on_no_match);
|
| }
|
| return true;
|
| }
|
| + case 'w': {
|
| + Label done, check_digits;
|
| + __ cmp(Operand(current_character()), Immediate('9'));
|
| + __ j(less_equal, &check_digits);
|
| + __ cmp(Operand(current_character()), Immediate('_'));
|
| + __ j(equal, &done);
|
| + // Convert to lower case if letter.
|
| + __ mov(Operand(eax), current_character());
|
| + __ or_(eax, 0x20);
|
| + // check current character in range ['a'..'z'], nondestructively.
|
| + __ sub(Operand(eax), Immediate('a'));
|
| + __ cmp(Operand(eax), Immediate('z' - 'a'));
|
| + BranchOrBacktrack(above, on_no_match);
|
| + __ jmp(&done);
|
| + __ bind(&check_digits);
|
| + // Check current character in range ['0'..'9'].
|
| + __ cmp(Operand(current_character()), Immediate('0'));
|
| + BranchOrBacktrack(below, on_no_match);
|
| + __ bind(&done);
|
| +
|
| + return true;
|
| + }
|
| + case 'W': {
|
| + Label done, check_digits;
|
| + __ cmp(Operand(current_character()), Immediate('9'));
|
| + __ j(less_equal, &check_digits);
|
| + __ cmp(Operand(current_character()), Immediate('_'));
|
| + BranchOrBacktrack(equal, on_no_match);
|
| + // Convert to lower case if letter.
|
| + __ mov(Operand(eax), current_character());
|
| + __ or_(eax, 0x20);
|
| + // check current character in range ['a'..'z'], nondestructively.
|
| + __ sub(Operand(eax), Immediate('a'));
|
| + __ cmp(Operand(eax), Immediate('z' - 'a'));
|
| + BranchOrBacktrack(below_equal, on_no_match);
|
| + __ jmp(&done);
|
| + __ bind(&check_digits);
|
| + // Check current character in range ['0'..'9'].
|
| + __ cmp(Operand(current_character()), Immediate('0'));
|
| + BranchOrBacktrack(above_equal, on_no_match);
|
| + __ bind(&done);
|
| + return true;
|
| + }
|
| + // Non-standard classes (with no syntactic shorthand) used internally.
|
| case '*':
|
| // Match any character.
|
| - if (check_offset) {
|
| - CheckPosition(cp_offset, on_no_match);
|
| + return true;
|
| + case 'n': {
|
| + // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
|
| + // The opposite of '.'.
|
| + __ mov(Operand(eax), current_character());
|
| + __ xor_(Operand(eax), Immediate(0x01));
|
| + // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
|
| + __ sub(Operand(eax), Immediate(0x0b));
|
| + __ cmp(eax, 0x0c - 0x0b);
|
| + if (mode_ == ASCII) {
|
| + BranchOrBacktrack(above, on_no_match);
|
| + } else {
|
| + Label done;
|
| + BranchOrBacktrack(below_equal, &done);
|
| + ASSERT_EQ(UC16, mode_);
|
| + // Compare original value to 0x2028 and 0x2029, using the already
|
| + // computed (current_char ^ 0x01 - 0x0b). I.e., check for
|
| + // 0x201d (0x2028 - 0x0b) or 0x201e.
|
| + __ sub(Operand(eax), Immediate(0x2028 - 0x0b));
|
| + __ cmp(eax, 1);
|
| + BranchOrBacktrack(above, on_no_match);
|
| + __ bind(&done);
|
| }
|
| return true;
|
| - // No custom implementation (yet): w, W, s(UC16), S(UC16).
|
| + }
|
| + // No custom implementation (yet): s(UC16), S(UC16).
|
| default:
|
| return false;
|
| }
|
|
|