| OLD | NEW |
| 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 // A simple interpreter for the Irregexp byte code. | 5 // A simple interpreter for the Irregexp byte code. |
| 6 | 6 |
| 7 #include "vm/regexp_interpreter.h" | 7 #include "vm/regexp_interpreter.h" |
| 8 | 8 |
| 9 #include "vm/object.h" |
| 10 #include "vm/regexp_assembler.h" |
| 9 #include "vm/regexp_bytecodes.h" | 11 #include "vm/regexp_bytecodes.h" |
| 10 #include "vm/regexp_assembler.h" | 12 #include "vm/unibrow-inl.h" |
| 11 #include "vm/object.h" | 13 #include "vm/unibrow.h" |
| 12 #include "vm/unicode.h" | 14 #include "vm/unicode.h" |
| 13 #include "vm/unibrow.h" | |
| 14 #include "vm/unibrow-inl.h" | |
| 15 | 15 |
| 16 namespace dart { | 16 namespace dart { |
| 17 | 17 |
| 18 DEFINE_FLAG(bool, trace_regexp_bytecodes, false, "trace_regexp_bytecodes"); | 18 DEFINE_FLAG(bool, trace_regexp_bytecodes, false, "trace_regexp_bytecodes"); |
| 19 | 19 |
| 20 typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize; | 20 typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize; |
| 21 | 21 |
| 22 template <typename Char> | 22 template <typename Char> |
| 23 static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, | 23 static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, |
| 24 intptr_t from, | 24 intptr_t from, |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 63 if (old_char != new_char) return false; | 63 if (old_char != new_char) return false; |
| 64 // Not letters in the ASCII range and Latin-1 range. | 64 // Not letters in the ASCII range and Latin-1 range. |
| 65 if (!(old_char - 'a' <= 'z' - 'a') && | 65 if (!(old_char - 'a' <= 'z' - 'a') && |
| 66 !(old_char - 224 <= 254 - 224 && old_char != 247)) { | 66 !(old_char - 224 <= 254 - 224 && old_char != 247)) { |
| 67 return false; | 67 return false; |
| 68 } | 68 } |
| 69 } | 69 } |
| 70 return true; | 70 return true; |
| 71 } | 71 } |
| 72 | 72 |
| 73 | |
| 74 #ifdef DEBUG | 73 #ifdef DEBUG |
| 75 static void TraceInterpreter(const uint8_t* code_base, | 74 static void TraceInterpreter(const uint8_t* code_base, |
| 76 const uint8_t* pc, | 75 const uint8_t* pc, |
| 77 int stack_depth, | 76 int stack_depth, |
| 78 int current_position, | 77 int current_position, |
| 79 uint32_t current_char, | 78 uint32_t current_char, |
| 80 int bytecode_length, | 79 int bytecode_length, |
| 81 const char* bytecode_name) { | 80 const char* bytecode_name) { |
| 82 if (FLAG_trace_regexp_bytecodes) { | 81 if (FLAG_trace_regexp_bytecodes) { |
| 83 bool printable = (current_char < 127 && current_char >= 32); | 82 bool printable = (current_char < 127 && current_char >= 32); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 96 if (b < 127 && b >= 32) { | 95 if (b < 127 && b >= 32) { |
| 97 OS::Print("%c", b); | 96 OS::Print("%c", b); |
| 98 } else { | 97 } else { |
| 99 OS::Print("."); | 98 OS::Print("."); |
| 100 } | 99 } |
| 101 } | 100 } |
| 102 OS::Print("\n"); | 101 OS::Print("\n"); |
| 103 } | 102 } |
| 104 } | 103 } |
| 105 | 104 |
| 106 | |
| 107 #define BYTECODE(name) \ | 105 #define BYTECODE(name) \ |
| 108 case BC_##name: \ | 106 case BC_##name: \ |
| 109 TraceInterpreter(code_base, pc, \ | 107 TraceInterpreter(code_base, pc, \ |
| 110 static_cast<int>(backtrack_sp - backtrack_stack_base), \ | 108 static_cast<int>(backtrack_sp - backtrack_stack_base), \ |
| 111 current, current_char, BC_##name##_LENGTH, #name); | 109 current, current_char, BC_##name##_LENGTH, #name); |
| 112 #else | 110 #else |
| 113 #define BYTECODE(name) case BC_##name: | 111 #define BYTECODE(name) case BC_##name: |
| 114 #endif | 112 #endif |
| 115 | 113 |
| 116 | |
| 117 static int32_t Load32Aligned(const uint8_t* pc) { | 114 static int32_t Load32Aligned(const uint8_t* pc) { |
| 118 ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0); | 115 ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0); |
| 119 return *reinterpret_cast<const int32_t*>(pc); | 116 return *reinterpret_cast<const int32_t*>(pc); |
| 120 } | 117 } |
| 121 | 118 |
| 122 | |
| 123 static int32_t Load16Aligned(const uint8_t* pc) { | 119 static int32_t Load16Aligned(const uint8_t* pc) { |
| 124 ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0); | 120 ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0); |
| 125 return *reinterpret_cast<const uint16_t*>(pc); | 121 return *reinterpret_cast<const uint16_t*>(pc); |
| 126 } | 122 } |
| 127 | 123 |
| 128 | |
| 129 // A simple abstraction over the backtracking stack used by the interpreter. | 124 // A simple abstraction over the backtracking stack used by the interpreter. |
| 130 // This backtracking stack does not grow automatically, but it ensures that the | 125 // This backtracking stack does not grow automatically, but it ensures that the |
| 131 // the memory held by the stack is released or remembered in a cache if the | 126 // the memory held by the stack is released or remembered in a cache if the |
| 132 // matching terminates. | 127 // matching terminates. |
| 133 class BacktrackStack { | 128 class BacktrackStack { |
| 134 public: | 129 public: |
| 135 explicit BacktrackStack(Zone* zone) { | 130 explicit BacktrackStack(Zone* zone) { |
| 136 data_ = zone->Alloc<intptr_t>(kBacktrackStackSize); | 131 data_ = zone->Alloc<intptr_t>(kBacktrackStackSize); |
| 137 } | 132 } |
| 138 | 133 |
| 139 intptr_t* data() const { return data_; } | 134 intptr_t* data() const { return data_; } |
| 140 | 135 |
| 141 intptr_t max_size() const { return kBacktrackStackSize; } | 136 intptr_t max_size() const { return kBacktrackStackSize; } |
| 142 | 137 |
| 143 private: | 138 private: |
| 144 static const intptr_t kBacktrackStackSize = 10000; | 139 static const intptr_t kBacktrackStackSize = 10000; |
| 145 | 140 |
| 146 intptr_t* data_; | 141 intptr_t* data_; |
| 147 | 142 |
| 148 DISALLOW_COPY_AND_ASSIGN(BacktrackStack); | 143 DISALLOW_COPY_AND_ASSIGN(BacktrackStack); |
| 149 }; | 144 }; |
| 150 | 145 |
| 151 | |
| 152 template <typename Char> | 146 template <typename Char> |
| 153 static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base, | 147 static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base, |
| 154 const String& subject, | 148 const String& subject, |
| 155 int32_t* registers, | 149 int32_t* registers, |
| 156 intptr_t current, | 150 intptr_t current, |
| 157 uint32_t current_char, | 151 uint32_t current_char, |
| 158 Zone* zone) { | 152 Zone* zone) { |
| 159 const uint8_t* pc = code_base; | 153 const uint8_t* pc = code_base; |
| 160 // BacktrackStack ensures that the memory allocated for the backtracking stack | 154 // BacktrackStack ensures that the memory allocated for the backtracking stack |
| 161 // is returned to the system or cached if there is no stack being cached at | 155 // is returned to the system or cached if there is no stack being cached at |
| (...skipping 401 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 563 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH; | 557 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH; |
| 564 break; | 558 break; |
| 565 } | 559 } |
| 566 default: | 560 default: |
| 567 UNREACHABLE(); | 561 UNREACHABLE(); |
| 568 break; | 562 break; |
| 569 } | 563 } |
| 570 } | 564 } |
| 571 } | 565 } |
| 572 | 566 |
| 573 | |
| 574 IrregexpInterpreter::IrregexpResult IrregexpInterpreter::Match( | 567 IrregexpInterpreter::IrregexpResult IrregexpInterpreter::Match( |
| 575 const TypedData& bytecode, | 568 const TypedData& bytecode, |
| 576 const String& subject, | 569 const String& subject, |
| 577 int32_t* registers, | 570 int32_t* registers, |
| 578 intptr_t start_position, | 571 intptr_t start_position, |
| 579 Zone* zone) { | 572 Zone* zone) { |
| 580 NoSafepointScope no_safepoint; | 573 NoSafepointScope no_safepoint; |
| 581 const uint8_t* code_base = reinterpret_cast<uint8_t*>(bytecode.DataAddr(0)); | 574 const uint8_t* code_base = reinterpret_cast<uint8_t*>(bytecode.DataAddr(0)); |
| 582 | 575 |
| 583 uint16_t previous_char = '\n'; | 576 uint16_t previous_char = '\n'; |
| 584 if (start_position != 0) { | 577 if (start_position != 0) { |
| 585 previous_char = subject.CharAt(start_position - 1); | 578 previous_char = subject.CharAt(start_position - 1); |
| 586 } | 579 } |
| 587 | 580 |
| 588 if (subject.IsOneByteString() || subject.IsExternalOneByteString()) { | 581 if (subject.IsOneByteString() || subject.IsExternalOneByteString()) { |
| 589 return RawMatch<uint8_t>(code_base, subject, registers, start_position, | 582 return RawMatch<uint8_t>(code_base, subject, registers, start_position, |
| 590 previous_char, zone); | 583 previous_char, zone); |
| 591 } else if (subject.IsTwoByteString() || subject.IsExternalTwoByteString()) { | 584 } else if (subject.IsTwoByteString() || subject.IsExternalTwoByteString()) { |
| 592 return RawMatch<uint16_t>(code_base, subject, registers, start_position, | 585 return RawMatch<uint16_t>(code_base, subject, registers, start_position, |
| 593 previous_char, zone); | 586 previous_char, zone); |
| 594 } else { | 587 } else { |
| 595 UNREACHABLE(); | 588 UNREACHABLE(); |
| 596 return IrregexpInterpreter::RE_FAILURE; | 589 return IrregexpInterpreter::RE_FAILURE; |
| 597 } | 590 } |
| 598 } | 591 } |
| 599 | 592 |
| 600 } // namespace dart | 593 } // namespace dart |
| OLD | NEW |