Index: src/x64/regexp-macro-assembler-x64.cc |
diff --git a/src/x64/regexp-macro-assembler-x64.cc b/src/x64/regexp-macro-assembler-x64.cc |
index 50b4120a5d396ce73bbef7d1a3fb45699535f8ec..d9b75b1a41f3940cd11f3059d0c9c4cec6246d14 100644 |
--- a/src/x64/regexp-macro-assembler-x64.cc |
+++ b/src/x64/regexp-macro-assembler-x64.cc |
@@ -188,8 +188,8 @@ void RegExpMacroAssemblerX64::CheckCharacterGT(uc16 limit, Label* on_greater) { |
void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) { |
Label not_at_start; |
// Did we start the match at the start of the string at all? |
- __ cmpb(Operand(rbp, kAtStart), Immediate(0)); |
- BranchOrBacktrack(equal, ¬_at_start); |
+ __ cmpb(Operand(rbp, kStartIndex), Immediate(0)); |
+ BranchOrBacktrack(not_equal, ¬_at_start); |
// If we did, are we still at the start of the input? |
__ lea(rax, Operand(rsi, rdi, times_1, 0)); |
__ cmpq(rax, Operand(rbp, kInputStart)); |
@@ -200,8 +200,8 @@ void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) { |
void RegExpMacroAssemblerX64::CheckNotAtStart(Label* on_not_at_start) { |
// Did we start the match at the start of the string at all? |
- __ cmpb(Operand(rbp, kAtStart), Immediate(0)); |
- BranchOrBacktrack(equal, on_not_at_start); |
+ __ cmpb(Operand(rbp, kStartIndex), Immediate(0)); |
+ BranchOrBacktrack(not_equal, on_not_at_start); |
// If we did, are we still at the start of the input? |
__ lea(rax, Operand(rsi, rdi, times_1, 0)); |
__ cmpq(rax, Operand(rbp, kInputStart)); |
@@ -219,6 +219,15 @@ void RegExpMacroAssemblerX64::CheckCharacters(Vector<const uc16> str, |
int cp_offset, |
Label* on_failure, |
bool check_end_of_string) { |
+#ifdef DEBUG |
+ // If input is ASCII, don't even bother calling here if the string to |
+ // match contains a non-ascii character. |
+ if (mode_ == ASCII) { |
+ for (int i = 0; i < str.length(); i++) { |
+ ASSERT(str[i] <= String::kMaxAsciiCharCodeU); |
+ } |
+ } |
+#endif |
int byte_length = str.length() * char_size(); |
int byte_offset = cp_offset * char_size(); |
if (check_end_of_string) { |
@@ -232,16 +241,71 @@ void RegExpMacroAssemblerX64::CheckCharacters(Vector<const uc16> str, |
on_failure = &backtrack_label_; |
} |
- // TODO(lrn): Test multiple characters at a time by loading 4 or 8 bytes |
- // at a time. |
- for (int i = 0; i < str.length(); i++) { |
+ // Do one character test first to minimize loading for the case that |
+ // we don't match at all (loading more than one character introduces that |
+ // chance of reading unaligned and reading across cache boundaries). |
+ // If the first character matches, expect a larger chance of matching the |
+ // string, and start loading more characters at a time. |
+ if (mode_ == ASCII) { |
+ __ cmpb(Operand(rsi, rdi, times_1, byte_offset), |
+ Immediate(static_cast<int8_t>(str[0]))); |
+ } else { |
+ // Don't use 16-bit immediate. The size changing prefix throws off |
+ // pre-decoding. |
+ __ movzxwl(rax, |
+ Operand(rsi, rdi, times_1, byte_offset)); |
+ __ cmpl(rax, Immediate(static_cast<int32_t>(str[0]))); |
+ } |
+ BranchOrBacktrack(not_equal, on_failure); |
+ |
+ __ lea(rbx, Operand(rsi, rdi, times_1, 0)); |
+ for (int i = 1, n = str.length(); i < n; ) { |
if (mode_ == ASCII) { |
- __ cmpb(Operand(rsi, rdi, times_1, byte_offset + i), |
- Immediate(static_cast<int8_t>(str[i]))); |
+ if (i + 8 <= n) { |
+ uint64_t combined_chars = |
+ (static_cast<uint64_t>(str[i + 0]) << 0) || |
+ (static_cast<uint64_t>(str[i + 1]) << 8) || |
+ (static_cast<uint64_t>(str[i + 2]) << 16) || |
+ (static_cast<uint64_t>(str[i + 3]) << 24) || |
+ (static_cast<uint64_t>(str[i + 4]) << 32) || |
+ (static_cast<uint64_t>(str[i + 5]) << 40) || |
+ (static_cast<uint64_t>(str[i + 6]) << 48) || |
+ (static_cast<uint64_t>(str[i + 7]) << 56); |
+ __ movq(rax, combined_chars, RelocInfo::NONE); |
+ __ cmpq(rax, Operand(rbx, byte_offset + i)); |
+ i += 8; |
+ } else if (i + 4 <= n) { |
+ uint32_t combined_chars = |
+ (static_cast<uint32_t>(str[i + 0]) << 0) || |
+ (static_cast<uint32_t>(str[i + 1]) << 8) || |
+ (static_cast<uint32_t>(str[i + 2]) << 16) || |
+ (static_cast<uint32_t>(str[i + 3]) << 24); |
+ __ cmpl(Operand(rbx, byte_offset + i), Immediate(combined_chars)); |
+ i += 4; |
+ } else { |
+ __ cmpb(Operand(rbx, byte_offset + i), |
+ Immediate(static_cast<int8_t>(str[i]))); |
+ i++; |
+ } |
} else { |
ASSERT(mode_ == UC16); |
- __ cmpw(Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)), |
- Immediate(str[i])); |
+ if (i + 4 <= n) { |
+ uint64_t combined_chars = *reinterpret_cast<const uint64_t*>(&str[i]); |
+ __ movq(rax, combined_chars, RelocInfo::NONE); |
+ __ cmpq(rax, |
+ Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16))); |
+ i += 4; |
+ } else if (i + 2 <= n) { |
+ uint32_t combined_chars = *reinterpret_cast<const uint32_t*>(&str[i]); |
+ __ cmpl(Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)), |
+ Immediate(combined_chars)); |
+ i += 2; |
+ } else { |
+ __ movzxwl(rax, |
+ Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16))); |
+ __ cmpl(rax, Immediate(str[i])); |
+ i++; |
+ } |
} |
BranchOrBacktrack(not_equal, on_failure); |
} |
@@ -671,7 +735,6 @@ Handle<Object> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { |
__ push(rbx); // Callee-save |
#endif |
- __ push(Immediate(0)); // Make room for "input start - 1" constant. |
__ push(Immediate(0)); // Make room for "at start" constant. |
// Check if we have space on the stack for registers. |
@@ -724,14 +787,6 @@ Handle<Object> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { |
// position registers. |
__ movq(Operand(rbp, kInputStartMinusOne), rax); |
- // Determine whether the start index is zero, that is at the start of the |
- // string, and store that value in a local variable. |
- __ movq(rbx, Operand(rbp, kStartIndex)); |
- __ xor_(rcx, rcx); // setcc only operates on cl (lower byte of rcx). |
- __ testq(rbx, rbx); |
- __ setcc(zero, rcx); // 1 if 0 (start of string), 0 if positive. |
- __ movq(Operand(rbp, kAtStart), rcx); |
- |
if (num_saved_registers_ > 0) { |
// Fill saved registers with initial value = start offset - 1 |
// Fill in stack push order, to avoid accessing across an unwritten |
@@ -761,8 +816,8 @@ Handle<Object> RegExpMacroAssemblerX64::GetCode(Handle<String> source) { |
__ Move(code_object_pointer(), masm_->CodeObject()); |
// Load previous char as initial value of current-character. |
Label at_start; |
- __ cmpb(Operand(rbp, kAtStart), Immediate(0)); |
- __ j(not_equal, &at_start); |
+ __ cmpb(Operand(rbp, kStartIndex), Immediate(0)); |
+ __ j(equal, &at_start); |
LoadCurrentCharacterUnchecked(-1, 1); // Load previous char. |
__ jmp(&start_label_); |
__ bind(&at_start); |