| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 15 matching lines...) Expand all Loading... |
| 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 // A simple interpreter for the Irregexp byte code. | 28 // A simple interpreter for the Irregexp byte code. |
| 29 | 29 |
| 30 | 30 |
| 31 #include "v8.h" | 31 #include "v8.h" |
| 32 #include "unicode.h" | 32 #include "unicode.h" |
| 33 #include "utils.h" | 33 #include "utils.h" |
| 34 #include "ast.h" | 34 #include "ast.h" |
| 35 #include "bytecodes-irregexp.h" | 35 #include "bytecodes-irregexp.h" |
| 36 #include "jsregexp.h" |
| 36 #include "interpreter-irregexp.h" | 37 #include "interpreter-irregexp.h" |
| 37 | 38 |
| 38 | |
| 39 namespace v8 { | 39 namespace v8 { |
| 40 namespace internal { | 40 namespace internal { |
| 41 | 41 |
| 42 | 42 |
| 43 typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize; | 43 typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize; |
| 44 | 44 |
| 45 static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, | 45 static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, |
| 46 int from, | 46 int from, |
| 47 int current, | 47 int current, |
| 48 int len, | 48 int len, |
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 180 static const int kBacktrackStackSize = 10000; | 180 static const int kBacktrackStackSize = 10000; |
| 181 | 181 |
| 182 int* data_; | 182 int* data_; |
| 183 Isolate* isolate_; | 183 Isolate* isolate_; |
| 184 | 184 |
| 185 DISALLOW_COPY_AND_ASSIGN(BacktrackStack); | 185 DISALLOW_COPY_AND_ASSIGN(BacktrackStack); |
| 186 }; | 186 }; |
| 187 | 187 |
| 188 | 188 |
| 189 template <typename Char> | 189 template <typename Char> |
| 190 static bool RawMatch(Isolate* isolate, | 190 static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate, |
| 191 const byte* code_base, | 191 const byte* code_base, |
| 192 Vector<const Char> subject, | 192 Vector<const Char> subject, |
| 193 int* registers, | 193 int* registers, |
| 194 int current, | 194 int current, |
| 195 uint32_t current_char) { | 195 uint32_t current_char) { |
| 196 const byte* pc = code_base; | 196 const byte* pc = code_base; |
| 197 // BacktrackStack ensures that the memory allocated for the backtracking stack | 197 // BacktrackStack ensures that the memory allocated for the backtracking stack |
| 198 // is returned to the system or cached if there is no stack being cached at | 198 // is returned to the system or cached if there is no stack being cached at |
| 199 // the moment. | 199 // the moment. |
| 200 BacktrackStack backtrack_stack(isolate); | 200 BacktrackStack backtrack_stack(isolate); |
| 201 int* backtrack_stack_base = backtrack_stack.data(); | 201 int* backtrack_stack_base = backtrack_stack.data(); |
| 202 int* backtrack_sp = backtrack_stack_base; | 202 int* backtrack_sp = backtrack_stack_base; |
| 203 int backtrack_stack_space = backtrack_stack.max_size(); | 203 int backtrack_stack_space = backtrack_stack.max_size(); |
| 204 #ifdef DEBUG | 204 #ifdef DEBUG |
| 205 if (FLAG_trace_regexp_bytecodes) { | 205 if (FLAG_trace_regexp_bytecodes) { |
| 206 PrintF("\n\nStart bytecode interpreter\n\n"); | 206 PrintF("\n\nStart bytecode interpreter\n\n"); |
| 207 } | 207 } |
| 208 #endif | 208 #endif |
| 209 while (true) { | 209 while (true) { |
| 210 int32_t insn = Load32Aligned(pc); | 210 int32_t insn = Load32Aligned(pc); |
| 211 switch (insn & BYTECODE_MASK) { | 211 switch (insn & BYTECODE_MASK) { |
| 212 BYTECODE(BREAK) | 212 BYTECODE(BREAK) |
| 213 UNREACHABLE(); | 213 UNREACHABLE(); |
| 214 return false; | 214 return RegExpImpl::RE_FAILURE; |
| 215 BYTECODE(PUSH_CP) | 215 BYTECODE(PUSH_CP) |
| 216 if (--backtrack_stack_space < 0) { | 216 if (--backtrack_stack_space < 0) { |
| 217 return false; // No match on backtrack stack overflow. | 217 return RegExpImpl::RE_EXCEPTION; |
| 218 } | 218 } |
| 219 *backtrack_sp++ = current; | 219 *backtrack_sp++ = current; |
| 220 pc += BC_PUSH_CP_LENGTH; | 220 pc += BC_PUSH_CP_LENGTH; |
| 221 break; | 221 break; |
| 222 BYTECODE(PUSH_BT) | 222 BYTECODE(PUSH_BT) |
| 223 if (--backtrack_stack_space < 0) { | 223 if (--backtrack_stack_space < 0) { |
| 224 return false; // No match on backtrack stack overflow. | 224 return RegExpImpl::RE_EXCEPTION; |
| 225 } | 225 } |
| 226 *backtrack_sp++ = Load32Aligned(pc + 4); | 226 *backtrack_sp++ = Load32Aligned(pc + 4); |
| 227 pc += BC_PUSH_BT_LENGTH; | 227 pc += BC_PUSH_BT_LENGTH; |
| 228 break; | 228 break; |
| 229 BYTECODE(PUSH_REGISTER) | 229 BYTECODE(PUSH_REGISTER) |
| 230 if (--backtrack_stack_space < 0) { | 230 if (--backtrack_stack_space < 0) { |
| 231 return false; // No match on backtrack stack overflow. | 231 return RegExpImpl::RE_EXCEPTION; |
| 232 } | 232 } |
| 233 *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT]; | 233 *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT]; |
| 234 pc += BC_PUSH_REGISTER_LENGTH; | 234 pc += BC_PUSH_REGISTER_LENGTH; |
| 235 break; | 235 break; |
| 236 BYTECODE(SET_REGISTER) | 236 BYTECODE(SET_REGISTER) |
| 237 registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4); | 237 registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4); |
| 238 pc += BC_SET_REGISTER_LENGTH; | 238 pc += BC_SET_REGISTER_LENGTH; |
| 239 break; | 239 break; |
| 240 BYTECODE(ADVANCE_REGISTER) | 240 BYTECODE(ADVANCE_REGISTER) |
| 241 registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4); | 241 registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4); |
| (...skipping 29 matching lines...) Expand all Loading... |
| 271 --backtrack_sp; | 271 --backtrack_sp; |
| 272 pc = code_base + *backtrack_sp; | 272 pc = code_base + *backtrack_sp; |
| 273 break; | 273 break; |
| 274 BYTECODE(POP_REGISTER) | 274 BYTECODE(POP_REGISTER) |
| 275 backtrack_stack_space++; | 275 backtrack_stack_space++; |
| 276 --backtrack_sp; | 276 --backtrack_sp; |
| 277 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp; | 277 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp; |
| 278 pc += BC_POP_REGISTER_LENGTH; | 278 pc += BC_POP_REGISTER_LENGTH; |
| 279 break; | 279 break; |
| 280 BYTECODE(FAIL) | 280 BYTECODE(FAIL) |
| 281 return false; | 281 return RegExpImpl::RE_FAILURE; |
| 282 BYTECODE(SUCCEED) | 282 BYTECODE(SUCCEED) |
| 283 return true; | 283 return RegExpImpl::RE_SUCCESS; |
| 284 BYTECODE(ADVANCE_CP) | 284 BYTECODE(ADVANCE_CP) |
| 285 current += insn >> BYTECODE_SHIFT; | 285 current += insn >> BYTECODE_SHIFT; |
| 286 pc += BC_ADVANCE_CP_LENGTH; | 286 pc += BC_ADVANCE_CP_LENGTH; |
| 287 break; | 287 break; |
| 288 BYTECODE(GOTO) | 288 BYTECODE(GOTO) |
| 289 pc = code_base + Load32Aligned(pc + 4); | 289 pc = code_base + Load32Aligned(pc + 4); |
| 290 break; | 290 break; |
| 291 BYTECODE(ADVANCE_CP_AND_GOTO) | 291 BYTECODE(ADVANCE_CP_AND_GOTO) |
| 292 current += insn >> BYTECODE_SHIFT; | 292 current += insn >> BYTECODE_SHIFT; |
| 293 pc = code_base + Load32Aligned(pc + 4); | 293 pc = code_base + Load32Aligned(pc + 4); |
| (...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 618 break; | 618 break; |
| 619 } | 619 } |
| 620 default: | 620 default: |
| 621 UNREACHABLE(); | 621 UNREACHABLE(); |
| 622 break; | 622 break; |
| 623 } | 623 } |
| 624 } | 624 } |
| 625 } | 625 } |
| 626 | 626 |
| 627 | 627 |
| 628 bool IrregexpInterpreter::Match(Isolate* isolate, | 628 RegExpImpl::IrregexpResult IrregexpInterpreter::Match( |
| 629 Handle<ByteArray> code_array, | 629 Isolate* isolate, |
| 630 Handle<String> subject, | 630 Handle<ByteArray> code_array, |
| 631 int* registers, | 631 Handle<String> subject, |
| 632 int start_position) { | 632 int* registers, |
| 633 int start_position) { |
| 633 ASSERT(subject->IsFlat()); | 634 ASSERT(subject->IsFlat()); |
| 634 | 635 |
| 635 AssertNoAllocation a; | 636 AssertNoAllocation a; |
| 636 const byte* code_base = code_array->GetDataStartAddress(); | 637 const byte* code_base = code_array->GetDataStartAddress(); |
| 637 uc16 previous_char = '\n'; | 638 uc16 previous_char = '\n'; |
| 638 String::FlatContent subject_content = subject->GetFlatContent(); | 639 String::FlatContent subject_content = subject->GetFlatContent(); |
| 639 if (subject_content.IsAscii()) { | 640 if (subject_content.IsAscii()) { |
| 640 Vector<const char> subject_vector = subject_content.ToAsciiVector(); | 641 Vector<const char> subject_vector = subject_content.ToAsciiVector(); |
| 641 if (start_position != 0) previous_char = subject_vector[start_position - 1]; | 642 if (start_position != 0) previous_char = subject_vector[start_position - 1]; |
| 642 return RawMatch(isolate, | 643 return RawMatch(isolate, |
| 643 code_base, | 644 code_base, |
| 644 subject_vector, | 645 subject_vector, |
| 645 registers, | 646 registers, |
| 646 start_position, | 647 start_position, |
| 647 previous_char); | 648 previous_char); |
| 648 } else { | 649 } else { |
| 649 ASSERT(subject_content.IsTwoByte()); | 650 ASSERT(subject_content.IsTwoByte()); |
| 650 Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); | 651 Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); |
| 651 if (start_position != 0) previous_char = subject_vector[start_position - 1]; | 652 if (start_position != 0) previous_char = subject_vector[start_position - 1]; |
| 652 return RawMatch(isolate, | 653 return RawMatch(isolate, |
| 653 code_base, | 654 code_base, |
| 654 subject_vector, | 655 subject_vector, |
| 655 registers, | 656 registers, |
| 656 start_position, | 657 start_position, |
| 657 previous_char); | 658 previous_char); |
| 658 } | 659 } |
| 659 } | 660 } |
| 660 | 661 |
| 661 } } // namespace v8::internal | 662 } } // namespace v8::internal |
| OLD | NEW |