OLD | NEW |
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 // A simple interpreter for the Irregexp byte code. | 5 // A simple interpreter for the Irregexp byte code. |
6 | 6 |
7 #include "vm/regexp_interpreter.h" | 7 #include "vm/regexp_interpreter.h" |
8 | 8 |
| 9 #include "vm/object.h" |
| 10 #include "vm/regexp_assembler.h" |
9 #include "vm/regexp_bytecodes.h" | 11 #include "vm/regexp_bytecodes.h" |
10 #include "vm/regexp_assembler.h" | 12 #include "vm/unibrow-inl.h" |
11 #include "vm/object.h" | 13 #include "vm/unibrow.h" |
12 #include "vm/unicode.h" | 14 #include "vm/unicode.h" |
13 #include "vm/unibrow.h" | |
14 #include "vm/unibrow-inl.h" | |
15 | 15 |
16 namespace dart { | 16 namespace dart { |
17 | 17 |
18 DEFINE_FLAG(bool, trace_regexp_bytecodes, false, "trace_regexp_bytecodes"); | 18 DEFINE_FLAG(bool, trace_regexp_bytecodes, false, "trace_regexp_bytecodes"); |
19 | 19 |
20 typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize; | 20 typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize; |
21 | 21 |
22 template <typename Char> | 22 template <typename Char> |
23 static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, | 23 static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, |
24 intptr_t from, | 24 intptr_t from, |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
63 if (old_char != new_char) return false; | 63 if (old_char != new_char) return false; |
64 // Not letters in the ASCII range and Latin-1 range. | 64 // Not letters in the ASCII range and Latin-1 range. |
65 if (!(old_char - 'a' <= 'z' - 'a') && | 65 if (!(old_char - 'a' <= 'z' - 'a') && |
66 !(old_char - 224 <= 254 - 224 && old_char != 247)) { | 66 !(old_char - 224 <= 254 - 224 && old_char != 247)) { |
67 return false; | 67 return false; |
68 } | 68 } |
69 } | 69 } |
70 return true; | 70 return true; |
71 } | 71 } |
72 | 72 |
73 | |
74 #ifdef DEBUG | 73 #ifdef DEBUG |
75 static void TraceInterpreter(const uint8_t* code_base, | 74 static void TraceInterpreter(const uint8_t* code_base, |
76 const uint8_t* pc, | 75 const uint8_t* pc, |
77 int stack_depth, | 76 int stack_depth, |
78 int current_position, | 77 int current_position, |
79 uint32_t current_char, | 78 uint32_t current_char, |
80 int bytecode_length, | 79 int bytecode_length, |
81 const char* bytecode_name) { | 80 const char* bytecode_name) { |
82 if (FLAG_trace_regexp_bytecodes) { | 81 if (FLAG_trace_regexp_bytecodes) { |
83 bool printable = (current_char < 127 && current_char >= 32); | 82 bool printable = (current_char < 127 && current_char >= 32); |
(...skipping 12 matching lines...) Expand all Loading... |
96 if (b < 127 && b >= 32) { | 95 if (b < 127 && b >= 32) { |
97 OS::Print("%c", b); | 96 OS::Print("%c", b); |
98 } else { | 97 } else { |
99 OS::Print("."); | 98 OS::Print("."); |
100 } | 99 } |
101 } | 100 } |
102 OS::Print("\n"); | 101 OS::Print("\n"); |
103 } | 102 } |
104 } | 103 } |
105 | 104 |
106 | |
107 #define BYTECODE(name) \ | 105 #define BYTECODE(name) \ |
108 case BC_##name: \ | 106 case BC_##name: \ |
109 TraceInterpreter(code_base, pc, \ | 107 TraceInterpreter(code_base, pc, \ |
110 static_cast<int>(backtrack_sp - backtrack_stack_base), \ | 108 static_cast<int>(backtrack_sp - backtrack_stack_base), \ |
111 current, current_char, BC_##name##_LENGTH, #name); | 109 current, current_char, BC_##name##_LENGTH, #name); |
112 #else | 110 #else |
113 #define BYTECODE(name) case BC_##name: | 111 #define BYTECODE(name) case BC_##name: |
114 #endif | 112 #endif |
115 | 113 |
116 | |
117 static int32_t Load32Aligned(const uint8_t* pc) { | 114 static int32_t Load32Aligned(const uint8_t* pc) { |
118 ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0); | 115 ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0); |
119 return *reinterpret_cast<const int32_t*>(pc); | 116 return *reinterpret_cast<const int32_t*>(pc); |
120 } | 117 } |
121 | 118 |
122 | |
123 static int32_t Load16Aligned(const uint8_t* pc) { | 119 static int32_t Load16Aligned(const uint8_t* pc) { |
124 ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0); | 120 ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0); |
125 return *reinterpret_cast<const uint16_t*>(pc); | 121 return *reinterpret_cast<const uint16_t*>(pc); |
126 } | 122 } |
127 | 123 |
128 | |
129 // A simple abstraction over the backtracking stack used by the interpreter. | 124 // A simple abstraction over the backtracking stack used by the interpreter. |
130 // This backtracking stack does not grow automatically, but it ensures that the | 125 // This backtracking stack does not grow automatically, but it ensures that the |
131 // the memory held by the stack is released or remembered in a cache if the | 126 // the memory held by the stack is released or remembered in a cache if the |
132 // matching terminates. | 127 // matching terminates. |
133 class BacktrackStack { | 128 class BacktrackStack { |
134 public: | 129 public: |
135 explicit BacktrackStack(Zone* zone) { | 130 explicit BacktrackStack(Zone* zone) { |
136 data_ = zone->Alloc<intptr_t>(kBacktrackStackSize); | 131 data_ = zone->Alloc<intptr_t>(kBacktrackStackSize); |
137 } | 132 } |
138 | 133 |
139 intptr_t* data() const { return data_; } | 134 intptr_t* data() const { return data_; } |
140 | 135 |
141 intptr_t max_size() const { return kBacktrackStackSize; } | 136 intptr_t max_size() const { return kBacktrackStackSize; } |
142 | 137 |
143 private: | 138 private: |
144 static const intptr_t kBacktrackStackSize = 10000; | 139 static const intptr_t kBacktrackStackSize = 10000; |
145 | 140 |
146 intptr_t* data_; | 141 intptr_t* data_; |
147 | 142 |
148 DISALLOW_COPY_AND_ASSIGN(BacktrackStack); | 143 DISALLOW_COPY_AND_ASSIGN(BacktrackStack); |
149 }; | 144 }; |
150 | 145 |
151 | |
152 template <typename Char> | 146 template <typename Char> |
153 static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base, | 147 static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base, |
154 const String& subject, | 148 const String& subject, |
155 int32_t* registers, | 149 int32_t* registers, |
156 intptr_t current, | 150 intptr_t current, |
157 uint32_t current_char, | 151 uint32_t current_char, |
158 Zone* zone) { | 152 Zone* zone) { |
159 const uint8_t* pc = code_base; | 153 const uint8_t* pc = code_base; |
160 // BacktrackStack ensures that the memory allocated for the backtracking stack | 154 // BacktrackStack ensures that the memory allocated for the backtracking stack |
161 // is returned to the system or cached if there is no stack being cached at | 155 // is returned to the system or cached if there is no stack being cached at |
(...skipping 401 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
563 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH; | 557 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH; |
564 break; | 558 break; |
565 } | 559 } |
566 default: | 560 default: |
567 UNREACHABLE(); | 561 UNREACHABLE(); |
568 break; | 562 break; |
569 } | 563 } |
570 } | 564 } |
571 } | 565 } |
572 | 566 |
573 | |
574 IrregexpInterpreter::IrregexpResult IrregexpInterpreter::Match( | 567 IrregexpInterpreter::IrregexpResult IrregexpInterpreter::Match( |
575 const TypedData& bytecode, | 568 const TypedData& bytecode, |
576 const String& subject, | 569 const String& subject, |
577 int32_t* registers, | 570 int32_t* registers, |
578 intptr_t start_position, | 571 intptr_t start_position, |
579 Zone* zone) { | 572 Zone* zone) { |
580 NoSafepointScope no_safepoint; | 573 NoSafepointScope no_safepoint; |
581 const uint8_t* code_base = reinterpret_cast<uint8_t*>(bytecode.DataAddr(0)); | 574 const uint8_t* code_base = reinterpret_cast<uint8_t*>(bytecode.DataAddr(0)); |
582 | 575 |
583 uint16_t previous_char = '\n'; | 576 uint16_t previous_char = '\n'; |
584 if (start_position != 0) { | 577 if (start_position != 0) { |
585 previous_char = subject.CharAt(start_position - 1); | 578 previous_char = subject.CharAt(start_position - 1); |
586 } | 579 } |
587 | 580 |
588 if (subject.IsOneByteString() || subject.IsExternalOneByteString()) { | 581 if (subject.IsOneByteString() || subject.IsExternalOneByteString()) { |
589 return RawMatch<uint8_t>(code_base, subject, registers, start_position, | 582 return RawMatch<uint8_t>(code_base, subject, registers, start_position, |
590 previous_char, zone); | 583 previous_char, zone); |
591 } else if (subject.IsTwoByteString() || subject.IsExternalTwoByteString()) { | 584 } else if (subject.IsTwoByteString() || subject.IsExternalTwoByteString()) { |
592 return RawMatch<uint16_t>(code_base, subject, registers, start_position, | 585 return RawMatch<uint16_t>(code_base, subject, registers, start_position, |
593 previous_char, zone); | 586 previous_char, zone); |
594 } else { | 587 } else { |
595 UNREACHABLE(); | 588 UNREACHABLE(); |
596 return IrregexpInterpreter::RE_FAILURE; | 589 return IrregexpInterpreter::RE_FAILURE; |
597 } | 590 } |
598 } | 591 } |
599 | 592 |
600 } // namespace dart | 593 } // namespace dart |
OLD | NEW |