OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 15 matching lines...) Expand all Loading... |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | 27 |
28 // A simple interpreter for the Irregexp byte code. | 28 // A simple interpreter for the Irregexp byte code. |
29 | 29 |
30 | 30 |
31 #include "v8.h" | 31 #include "v8.h" |
32 #include "unicode.h" | 32 #include "unicode.h" |
33 #include "utils.h" | 33 #include "utils.h" |
34 #include "ast.h" | 34 #include "ast.h" |
35 #include "bytecodes-irregexp.h" | 35 #include "bytecodes-irregexp.h" |
| 36 #include "jsregexp.h" |
36 #include "interpreter-irregexp.h" | 37 #include "interpreter-irregexp.h" |
37 | 38 |
38 | |
39 namespace v8 { | 39 namespace v8 { |
40 namespace internal { | 40 namespace internal { |
41 | 41 |
42 | 42 |
43 typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize; | 43 typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize; |
44 | 44 |
45 static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, | 45 static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, |
46 int from, | 46 int from, |
47 int current, | 47 int current, |
48 int len, | 48 int len, |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
180 static const int kBacktrackStackSize = 10000; | 180 static const int kBacktrackStackSize = 10000; |
181 | 181 |
182 int* data_; | 182 int* data_; |
183 Isolate* isolate_; | 183 Isolate* isolate_; |
184 | 184 |
185 DISALLOW_COPY_AND_ASSIGN(BacktrackStack); | 185 DISALLOW_COPY_AND_ASSIGN(BacktrackStack); |
186 }; | 186 }; |
187 | 187 |
188 | 188 |
189 template <typename Char> | 189 template <typename Char> |
190 static bool RawMatch(Isolate* isolate, | 190 static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate, |
191 const byte* code_base, | 191 const byte* code_base, |
192 Vector<const Char> subject, | 192 Vector<const Char> subject, |
193 int* registers, | 193 int* registers, |
194 int current, | 194 int current, |
195 uint32_t current_char) { | 195 uint32_t current_char) { |
196 const byte* pc = code_base; | 196 const byte* pc = code_base; |
197 // BacktrackStack ensures that the memory allocated for the backtracking stack | 197 // BacktrackStack ensures that the memory allocated for the backtracking stack |
198 // is returned to the system or cached if there is no stack being cached at | 198 // is returned to the system or cached if there is no stack being cached at |
199 // the moment. | 199 // the moment. |
200 BacktrackStack backtrack_stack(isolate); | 200 BacktrackStack backtrack_stack(isolate); |
201 int* backtrack_stack_base = backtrack_stack.data(); | 201 int* backtrack_stack_base = backtrack_stack.data(); |
202 int* backtrack_sp = backtrack_stack_base; | 202 int* backtrack_sp = backtrack_stack_base; |
203 int backtrack_stack_space = backtrack_stack.max_size(); | 203 int backtrack_stack_space = backtrack_stack.max_size(); |
204 #ifdef DEBUG | 204 #ifdef DEBUG |
205 if (FLAG_trace_regexp_bytecodes) { | 205 if (FLAG_trace_regexp_bytecodes) { |
206 PrintF("\n\nStart bytecode interpreter\n\n"); | 206 PrintF("\n\nStart bytecode interpreter\n\n"); |
207 } | 207 } |
208 #endif | 208 #endif |
209 while (true) { | 209 while (true) { |
210 int32_t insn = Load32Aligned(pc); | 210 int32_t insn = Load32Aligned(pc); |
211 switch (insn & BYTECODE_MASK) { | 211 switch (insn & BYTECODE_MASK) { |
212 BYTECODE(BREAK) | 212 BYTECODE(BREAK) |
213 UNREACHABLE(); | 213 UNREACHABLE(); |
214 return false; | 214 return RegExpImpl::RE_FAILURE; |
215 BYTECODE(PUSH_CP) | 215 BYTECODE(PUSH_CP) |
216 if (--backtrack_stack_space < 0) { | 216 if (--backtrack_stack_space < 0) { |
217 return false; // No match on backtrack stack overflow. | 217 return RegExpImpl::RE_EXCEPTION; |
218 } | 218 } |
219 *backtrack_sp++ = current; | 219 *backtrack_sp++ = current; |
220 pc += BC_PUSH_CP_LENGTH; | 220 pc += BC_PUSH_CP_LENGTH; |
221 break; | 221 break; |
222 BYTECODE(PUSH_BT) | 222 BYTECODE(PUSH_BT) |
223 if (--backtrack_stack_space < 0) { | 223 if (--backtrack_stack_space < 0) { |
224 return false; // No match on backtrack stack overflow. | 224 return RegExpImpl::RE_EXCEPTION; |
225 } | 225 } |
226 *backtrack_sp++ = Load32Aligned(pc + 4); | 226 *backtrack_sp++ = Load32Aligned(pc + 4); |
227 pc += BC_PUSH_BT_LENGTH; | 227 pc += BC_PUSH_BT_LENGTH; |
228 break; | 228 break; |
229 BYTECODE(PUSH_REGISTER) | 229 BYTECODE(PUSH_REGISTER) |
230 if (--backtrack_stack_space < 0) { | 230 if (--backtrack_stack_space < 0) { |
231 return false; // No match on backtrack stack overflow. | 231 return RegExpImpl::RE_EXCEPTION; |
232 } | 232 } |
233 *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT]; | 233 *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT]; |
234 pc += BC_PUSH_REGISTER_LENGTH; | 234 pc += BC_PUSH_REGISTER_LENGTH; |
235 break; | 235 break; |
236 BYTECODE(SET_REGISTER) | 236 BYTECODE(SET_REGISTER) |
237 registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4); | 237 registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4); |
238 pc += BC_SET_REGISTER_LENGTH; | 238 pc += BC_SET_REGISTER_LENGTH; |
239 break; | 239 break; |
240 BYTECODE(ADVANCE_REGISTER) | 240 BYTECODE(ADVANCE_REGISTER) |
241 registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4); | 241 registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4); |
(...skipping 29 matching lines...) Expand all Loading... |
271 --backtrack_sp; | 271 --backtrack_sp; |
272 pc = code_base + *backtrack_sp; | 272 pc = code_base + *backtrack_sp; |
273 break; | 273 break; |
274 BYTECODE(POP_REGISTER) | 274 BYTECODE(POP_REGISTER) |
275 backtrack_stack_space++; | 275 backtrack_stack_space++; |
276 --backtrack_sp; | 276 --backtrack_sp; |
277 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp; | 277 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp; |
278 pc += BC_POP_REGISTER_LENGTH; | 278 pc += BC_POP_REGISTER_LENGTH; |
279 break; | 279 break; |
280 BYTECODE(FAIL) | 280 BYTECODE(FAIL) |
281 return false; | 281 return RegExpImpl::RE_FAILURE; |
282 BYTECODE(SUCCEED) | 282 BYTECODE(SUCCEED) |
283 return true; | 283 return RegExpImpl::RE_SUCCESS; |
284 BYTECODE(ADVANCE_CP) | 284 BYTECODE(ADVANCE_CP) |
285 current += insn >> BYTECODE_SHIFT; | 285 current += insn >> BYTECODE_SHIFT; |
286 pc += BC_ADVANCE_CP_LENGTH; | 286 pc += BC_ADVANCE_CP_LENGTH; |
287 break; | 287 break; |
288 BYTECODE(GOTO) | 288 BYTECODE(GOTO) |
289 pc = code_base + Load32Aligned(pc + 4); | 289 pc = code_base + Load32Aligned(pc + 4); |
290 break; | 290 break; |
291 BYTECODE(ADVANCE_CP_AND_GOTO) | 291 BYTECODE(ADVANCE_CP_AND_GOTO) |
292 current += insn >> BYTECODE_SHIFT; | 292 current += insn >> BYTECODE_SHIFT; |
293 pc = code_base + Load32Aligned(pc + 4); | 293 pc = code_base + Load32Aligned(pc + 4); |
(...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
618 break; | 618 break; |
619 } | 619 } |
620 default: | 620 default: |
621 UNREACHABLE(); | 621 UNREACHABLE(); |
622 break; | 622 break; |
623 } | 623 } |
624 } | 624 } |
625 } | 625 } |
626 | 626 |
627 | 627 |
628 bool IrregexpInterpreter::Match(Isolate* isolate, | 628 RegExpImpl::IrregexpResult IrregexpInterpreter::Match( |
629 Handle<ByteArray> code_array, | 629 Isolate* isolate, |
630 Handle<String> subject, | 630 Handle<ByteArray> code_array, |
631 int* registers, | 631 Handle<String> subject, |
632 int start_position) { | 632 int* registers, |
| 633 int start_position) { |
633 ASSERT(subject->IsFlat()); | 634 ASSERT(subject->IsFlat()); |
634 | 635 |
635 AssertNoAllocation a; | 636 AssertNoAllocation a; |
636 const byte* code_base = code_array->GetDataStartAddress(); | 637 const byte* code_base = code_array->GetDataStartAddress(); |
637 uc16 previous_char = '\n'; | 638 uc16 previous_char = '\n'; |
638 String::FlatContent subject_content = subject->GetFlatContent(); | 639 String::FlatContent subject_content = subject->GetFlatContent(); |
639 if (subject_content.IsAscii()) { | 640 if (subject_content.IsAscii()) { |
640 Vector<const char> subject_vector = subject_content.ToAsciiVector(); | 641 Vector<const char> subject_vector = subject_content.ToAsciiVector(); |
641 if (start_position != 0) previous_char = subject_vector[start_position - 1]; | 642 if (start_position != 0) previous_char = subject_vector[start_position - 1]; |
642 return RawMatch(isolate, | 643 return RawMatch(isolate, |
643 code_base, | 644 code_base, |
644 subject_vector, | 645 subject_vector, |
645 registers, | 646 registers, |
646 start_position, | 647 start_position, |
647 previous_char); | 648 previous_char); |
648 } else { | 649 } else { |
649 ASSERT(subject_content.IsTwoByte()); | 650 ASSERT(subject_content.IsTwoByte()); |
650 Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); | 651 Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); |
651 if (start_position != 0) previous_char = subject_vector[start_position - 1]; | 652 if (start_position != 0) previous_char = subject_vector[start_position - 1]; |
652 return RawMatch(isolate, | 653 return RawMatch(isolate, |
653 code_base, | 654 code_base, |
654 subject_vector, | 655 subject_vector, |
655 registers, | 656 registers, |
656 start_position, | 657 start_position, |
657 previous_char); | 658 previous_char); |
658 } | 659 } |
659 } | 660 } |
660 | 661 |
661 } } // namespace v8::internal | 662 } } // namespace v8::internal |
OLD | NEW |