OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // A simple interpreter for the Irregexp byte code. | 5 // A simple interpreter for the Irregexp byte code. |
6 | 6 |
7 #ifdef V8_INTERPRETED_REGEXP | 7 #ifdef V8_INTERPRETED_REGEXP |
8 | 8 |
9 #include "src/regexp/interpreter-irregexp.h" | 9 #include "src/regexp/interpreter-irregexp.h" |
10 | 10 |
11 #include "src/ast/ast.h" | 11 #include "src/ast/ast.h" |
12 #include "src/regexp/bytecodes-irregexp.h" | 12 #include "src/regexp/bytecodes-irregexp.h" |
13 #include "src/regexp/jsregexp.h" | 13 #include "src/regexp/jsregexp.h" |
14 #include "src/regexp/regexp-macro-assembler.h" | 14 #include "src/regexp/regexp-macro-assembler.h" |
15 #include "src/unicode.h" | 15 #include "src/unicode.h" |
16 #include "src/utils.h" | 16 #include "src/utils.h" |
17 | 17 |
| 18 #ifdef V8_I18N_SUPPORT |
| 19 #include "unicode/uchar.h" |
| 20 #endif // V8_I18N_SUPPORT |
| 21 |
18 namespace v8 { | 22 namespace v8 { |
19 namespace internal { | 23 namespace internal { |
20 | 24 |
21 typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize; | 25 typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize; |
22 | 26 |
23 static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, | 27 static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, |
24 int from, | 28 int len, Vector<const uc16> subject, |
25 int current, | 29 bool unicode) { |
26 int len, | 30 Address offset_a = |
27 Vector<const uc16> subject) { | 31 reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from))); |
28 for (int i = 0; i < len; i++) { | 32 Address offset_b = |
29 unibrow::uchar old_char = subject[from++]; | 33 reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current))); |
30 unibrow::uchar new_char = subject[current++]; | 34 size_t length = len * kUC16Size; |
31 if (old_char == new_char) continue; | 35 return RegExpMacroAssembler::CaseInsensitiveCompareUC16( |
32 unibrow::uchar old_string[1] = { old_char }; | 36 offset_a, offset_b, length, unicode ? nullptr : isolate) == 1; |
33 unibrow::uchar new_string[1] = { new_char }; | |
34 interp_canonicalize->get(old_char, '\0', old_string); | |
35 interp_canonicalize->get(new_char, '\0', new_string); | |
36 if (old_string[0] != new_string[0]) { | |
37 return false; | |
38 } | |
39 } | |
40 return true; | |
41 } | 37 } |
42 | 38 |
43 | 39 |
44 static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, | 40 static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, |
45 int from, | 41 int len, Vector<const uint8_t> subject, |
46 int current, | 42 bool unicode) { |
47 int len, | 43 // For Latin1 characters the unicode flag makes no difference. |
48 Vector<const uint8_t> subject) { | |
49 for (int i = 0; i < len; i++) { | 44 for (int i = 0; i < len; i++) { |
50 unsigned int old_char = subject[from++]; | 45 unsigned int old_char = subject[from++]; |
51 unsigned int new_char = subject[current++]; | 46 unsigned int new_char = subject[current++]; |
52 if (old_char == new_char) continue; | 47 if (old_char == new_char) continue; |
53 // Convert both characters to lower case. | 48 // Convert both characters to lower case. |
54 old_char |= 0x20; | 49 old_char |= 0x20; |
55 new_char |= 0x20; | 50 new_char |= 0x20; |
56 if (old_char != new_char) return false; | 51 if (old_char != new_char) return false; |
57 // Not letters in the ASCII range and Latin-1 range. | 52 // Not letters in the ASCII range and Latin-1 range. |
58 if (!(old_char - 'a' <= 'z' - 'a') && | 53 if (!(old_char - 'a' <= 'z' - 'a') && |
(...skipping 457 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
516 if (current - len < 0 || | 511 if (current - len < 0 || |
517 CompareChars(&subject[from], &subject[current - len], len) != 0) { | 512 CompareChars(&subject[from], &subject[current - len], len) != 0) { |
518 pc = code_base + Load32Aligned(pc + 4); | 513 pc = code_base + Load32Aligned(pc + 4); |
519 break; | 514 break; |
520 } | 515 } |
521 current -= len; | 516 current -= len; |
522 } | 517 } |
523 pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; | 518 pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; |
524 break; | 519 break; |
525 } | 520 } |
| 521 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) |
526 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { | 522 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { |
| 523 bool unicode = |
| 524 (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE; |
527 int from = registers[insn >> BYTECODE_SHIFT]; | 525 int from = registers[insn >> BYTECODE_SHIFT]; |
528 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; | 526 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; |
529 if (from >= 0 && len > 0) { | 527 if (from >= 0 && len > 0) { |
530 if (current + len > subject.length() || | 528 if (current + len > subject.length() || |
531 !BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(), | 529 !BackRefMatchesNoCase(isolate, from, current, len, subject, |
532 from, current, len, subject)) { | 530 unicode)) { |
533 pc = code_base + Load32Aligned(pc + 4); | 531 pc = code_base + Load32Aligned(pc + 4); |
534 break; | 532 break; |
535 } | 533 } |
536 current += len; | 534 current += len; |
537 } | 535 } |
538 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; | 536 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; |
539 break; | 537 break; |
540 } | 538 } |
| 539 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) |
541 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) { | 540 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) { |
| 541 bool unicode = (insn & BYTECODE_MASK) == |
| 542 BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD; |
542 int from = registers[insn >> BYTECODE_SHIFT]; | 543 int from = registers[insn >> BYTECODE_SHIFT]; |
543 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; | 544 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; |
544 if (from >= 0 && len > 0) { | 545 if (from >= 0 && len > 0) { |
545 if (current - len < 0 || | 546 if (current - len < 0 || |
546 !BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(), | 547 !BackRefMatchesNoCase(isolate, from, current - len, len, subject, |
547 from, current - len, len, subject)) { | 548 unicode)) { |
548 pc = code_base + Load32Aligned(pc + 4); | 549 pc = code_base + Load32Aligned(pc + 4); |
549 break; | 550 break; |
550 } | 551 } |
551 current -= len; | 552 current -= len; |
552 } | 553 } |
553 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; | 554 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; |
554 break; | 555 break; |
555 } | 556 } |
556 BYTECODE(CHECK_AT_START) | 557 BYTECODE(CHECK_AT_START) |
557 if (current == 0) { | 558 if (current == 0) { |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
615 registers, | 616 registers, |
616 start_position, | 617 start_position, |
617 previous_char); | 618 previous_char); |
618 } | 619 } |
619 } | 620 } |
620 | 621 |
621 } // namespace internal | 622 } // namespace internal |
622 } // namespace v8 | 623 } // namespace v8 |
623 | 624 |
624 #endif // V8_INTERPRETED_REGEXP | 625 #endif // V8_INTERPRETED_REGEXP |
OLD | NEW |