| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // A simple interpreter for the Irregexp byte code. | 5 // A simple interpreter for the Irregexp byte code. |
| 6 | 6 |
| 7 #ifdef V8_INTERPRETED_REGEXP | 7 #ifdef V8_INTERPRETED_REGEXP |
| 8 | 8 |
| 9 #include "src/regexp/interpreter-irregexp.h" | 9 #include "src/regexp/interpreter-irregexp.h" |
| 10 | 10 |
| 11 #include "src/ast/ast.h" | 11 #include "src/ast/ast.h" |
| 12 #include "src/regexp/bytecodes-irregexp.h" | 12 #include "src/regexp/bytecodes-irregexp.h" |
| 13 #include "src/regexp/jsregexp.h" | 13 #include "src/regexp/jsregexp.h" |
| 14 #include "src/regexp/regexp-macro-assembler.h" | 14 #include "src/regexp/regexp-macro-assembler.h" |
| 15 #include "src/unicode.h" | 15 #include "src/unicode.h" |
| 16 #include "src/utils.h" | 16 #include "src/utils.h" |
| 17 | 17 |
| 18 #ifdef V8_I18N_SUPPORT |
| 19 #include "unicode/uchar.h" |
| 20 #endif // V8_I18N_SUPPORT |
| 21 |
| 18 namespace v8 { | 22 namespace v8 { |
| 19 namespace internal { | 23 namespace internal { |
| 20 | 24 |
| 21 typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize; | 25 typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize; |
| 22 | 26 |
| 23 static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, | 27 static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, |
| 24 int from, | 28 int len, Vector<const uc16> subject, |
| 25 int current, | 29 bool unicode) { |
| 26 int len, | 30 Address offset_a = |
| 27 Vector<const uc16> subject) { | 31 reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from))); |
| 28 for (int i = 0; i < len; i++) { | 32 Address offset_b = |
| 29 unibrow::uchar old_char = subject[from++]; | 33 reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current))); |
| 30 unibrow::uchar new_char = subject[current++]; | 34 size_t length = len * kUC16Size; |
| 31 if (old_char == new_char) continue; | 35 return RegExpMacroAssembler::CaseInsensitiveCompareUC16( |
| 32 unibrow::uchar old_string[1] = { old_char }; | 36 offset_a, offset_b, length, unicode ? nullptr : isolate) == 1; |
| 33 unibrow::uchar new_string[1] = { new_char }; | |
| 34 interp_canonicalize->get(old_char, '\0', old_string); | |
| 35 interp_canonicalize->get(new_char, '\0', new_string); | |
| 36 if (old_string[0] != new_string[0]) { | |
| 37 return false; | |
| 38 } | |
| 39 } | |
| 40 return true; | |
| 41 } | 37 } |
| 42 | 38 |
| 43 | 39 |
| 44 static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, | 40 static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, |
| 45 int from, | 41 int len, Vector<const uint8_t> subject, |
| 46 int current, | 42 bool unicode) { |
| 47 int len, | 43 // For Latin1 characters the unicode flag makes no difference. |
| 48 Vector<const uint8_t> subject) { | |
| 49 for (int i = 0; i < len; i++) { | 44 for (int i = 0; i < len; i++) { |
| 50 unsigned int old_char = subject[from++]; | 45 unsigned int old_char = subject[from++]; |
| 51 unsigned int new_char = subject[current++]; | 46 unsigned int new_char = subject[current++]; |
| 52 if (old_char == new_char) continue; | 47 if (old_char == new_char) continue; |
| 53 // Convert both characters to lower case. | 48 // Convert both characters to lower case. |
| 54 old_char |= 0x20; | 49 old_char |= 0x20; |
| 55 new_char |= 0x20; | 50 new_char |= 0x20; |
| 56 if (old_char != new_char) return false; | 51 if (old_char != new_char) return false; |
| 57 // Not letters in the ASCII range and Latin-1 range. | 52 // Not letters in the ASCII range and Latin-1 range. |
| 58 if (!(old_char - 'a' <= 'z' - 'a') && | 53 if (!(old_char - 'a' <= 'z' - 'a') && |
| (...skipping 457 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 516 if (current - len < 0 || | 511 if (current - len < 0 || |
| 517 CompareChars(&subject[from], &subject[current - len], len) != 0) { | 512 CompareChars(&subject[from], &subject[current - len], len) != 0) { |
| 518 pc = code_base + Load32Aligned(pc + 4); | 513 pc = code_base + Load32Aligned(pc + 4); |
| 519 break; | 514 break; |
| 520 } | 515 } |
| 521 current -= len; | 516 current -= len; |
| 522 } | 517 } |
| 523 pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; | 518 pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; |
| 524 break; | 519 break; |
| 525 } | 520 } |
| 521 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) |
| 526 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { | 522 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { |
| 523 bool unicode = |
| 524 (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE; |
| 527 int from = registers[insn >> BYTECODE_SHIFT]; | 525 int from = registers[insn >> BYTECODE_SHIFT]; |
| 528 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; | 526 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; |
| 529 if (from >= 0 && len > 0) { | 527 if (from >= 0 && len > 0) { |
| 530 if (current + len > subject.length() || | 528 if (current + len > subject.length() || |
| 531 !BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(), | 529 !BackRefMatchesNoCase(isolate, from, current, len, subject, |
| 532 from, current, len, subject)) { | 530 unicode)) { |
| 533 pc = code_base + Load32Aligned(pc + 4); | 531 pc = code_base + Load32Aligned(pc + 4); |
| 534 break; | 532 break; |
| 535 } | 533 } |
| 536 current += len; | 534 current += len; |
| 537 } | 535 } |
| 538 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; | 536 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; |
| 539 break; | 537 break; |
| 540 } | 538 } |
| 539 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) |
| 541 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) { | 540 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) { |
| 541 bool unicode = (insn & BYTECODE_MASK) == |
| 542 BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD; |
| 542 int from = registers[insn >> BYTECODE_SHIFT]; | 543 int from = registers[insn >> BYTECODE_SHIFT]; |
| 543 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; | 544 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; |
| 544 if (from >= 0 && len > 0) { | 545 if (from >= 0 && len > 0) { |
| 545 if (current - len < 0 || | 546 if (current - len < 0 || |
| 546 !BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(), | 547 !BackRefMatchesNoCase(isolate, from, current - len, len, subject, |
| 547 from, current - len, len, subject)) { | 548 unicode)) { |
| 548 pc = code_base + Load32Aligned(pc + 4); | 549 pc = code_base + Load32Aligned(pc + 4); |
| 549 break; | 550 break; |
| 550 } | 551 } |
| 551 current -= len; | 552 current -= len; |
| 552 } | 553 } |
| 553 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; | 554 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; |
| 554 break; | 555 break; |
| 555 } | 556 } |
| 556 BYTECODE(CHECK_AT_START) | 557 BYTECODE(CHECK_AT_START) |
| 557 if (current == 0) { | 558 if (current == 0) { |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 615 registers, | 616 registers, |
| 616 start_position, | 617 start_position, |
| 617 previous_char); | 618 previous_char); |
| 618 } | 619 } |
| 619 } | 620 } |
| 620 | 621 |
| 621 } // namespace internal | 622 } // namespace internal |
| 622 } // namespace v8 | 623 } // namespace v8 |
| 623 | 624 |
| 624 #endif // V8_INTERPRETED_REGEXP | 625 #endif // V8_INTERPRETED_REGEXP |
| OLD | NEW |