Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(408)

Side by Side Diff: runtime/vm/regexp.cc

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Updated to current version Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/regexp.h" 5 #include "vm/regexp.h"
6 6
7 #include "vm/dart_entry.h"
8 #include "vm/regexp_assembler.h"
7 #include "vm/regexp_ast.h" 9 #include "vm/regexp_ast.h"
10 #include "vm/unibrow-inl.h"
8 #include "vm/unicode.h" 11 #include "vm/unicode.h"
9 #include "vm/symbols.h" 12 #include "vm/symbols.h"
10 13
11 #define I isolate() 14 #define I isolate()
12 #define CI compiler->isolate() 15 #define CI compiler->isolate()
13 16
14 namespace dart { 17 namespace dart {
15 18
19 DECLARE_FLAG(bool, trace_irregexp);
16 20
17 #define DEFINE_ACCEPT(Type) \ 21 #define DEFINE_ACCEPT(Type) \
18 void Type##Node::Accept(NodeVisitor* visitor) { \ 22 void Type##Node::Accept(NodeVisitor* visitor) { \
19 visitor->Visit##Type(this); \ 23 visitor->Visit##Type(this); \
20 } 24 }
21 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) 25 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
22 #undef DEFINE_ACCEPT 26 #undef DEFINE_ACCEPT
23 27
24 28
25 // Default to generating optimized regexp code. 29 // Default to generating optimized regexp code.
(...skipping 12 matching lines...) Expand all
38 static const intptr_t kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, 42 static const intptr_t kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,
39 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, 43 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,
40 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, 44 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,
41 0xFEFF, 0xFF00, 0x10000 }; 45 0xFEFF, 0xFF00, 0x10000 };
42 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); 46 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);
43 static const intptr_t kWordRanges[] = { 47 static const intptr_t kWordRanges[] = {
44 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 }; 48 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };
45 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges); 49 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges);
46 static const intptr_t kDigitRanges[] = { '0', '9' + 1, 0x10000 }; 50 static const intptr_t kDigitRanges[] = { '0', '9' + 1, 0x10000 };
47 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges); 51 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges);
52 static const intptr_t kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };
53 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges);
48 static const intptr_t kLineTerminatorRanges[] = { 54 static const intptr_t kLineTerminatorRanges[] = {
49 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, 0x10000 }; 55 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, 0x10000 };
50 static const intptr_t kLineTerminatorRangeCount = 56 static const intptr_t kLineTerminatorRangeCount =
51 ARRAY_SIZE(kLineTerminatorRanges); 57 ARRAY_SIZE(kLineTerminatorRanges);
52 58
53 59
54 static inline void PrintUtf16(uint16_t c) { 60 static inline void PrintUtf16(uint16_t c) {
55 const char* format = (0x20 <= c && c <= 0x7F) ? 61 const char* format = (0x20 <= c && c <= 0x7F) ?
56 "%c" : (c <= 0xff) ? "\\x%02x" : "\\u%04x"; 62 "%c" : (c <= 0xff) ? "\\x%02x" : "\\u%04x";
57 OS::Print(format, c); 63 OS::Print(format, c);
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
98 info->visited = true; 104 info->visited = true;
99 } 105 }
100 ~VisitMarker() { 106 ~VisitMarker() {
101 info_->visited = false; 107 info_->visited = false;
102 } 108 }
103 private: 109 private:
104 NodeInfo* info_; 110 NodeInfo* info_;
105 }; 111 };
106 112
107 113
114 class FrequencyCollator {
115 public:
116 FrequencyCollator() : total_samples_(0) {
117 for (intptr_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
118 frequencies_[i] = CharacterFrequency(i);
119 }
120 }
121
122 void CountCharacter(intptr_t character) {
123 intptr_t index = (character & RegExpMacroAssembler::kTableMask);
124 frequencies_[index].Increment();
125 total_samples_++;
126 }
127
128 // Does not measure in percent, but rather per-128 (the table size from the
129 // regexp macro assembler).
130 intptr_t Frequency(intptr_t in_character) {
131 ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character);
132 if (total_samples_ < 1) return 1; // Division by zero.
133 intptr_t freq_in_per128 =
134 (frequencies_[in_character].counter() * 128) / total_samples_;
135 return freq_in_per128;
136 }
137
138 private:
139 class CharacterFrequency {
140 public:
141 CharacterFrequency() : counter_(0), character_(-1) { }
142 explicit CharacterFrequency(intptr_t character)
143 : counter_(0), character_(character) { }
144
145 void Increment() { counter_++; }
146 intptr_t counter() { return counter_; }
147 intptr_t character() { return character_; }
148
149 private:
150 intptr_t counter_;
151 intptr_t character_;
152 };
153
154
155 private:
156 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
157 intptr_t total_samples_;
158 };
159
160
108 class RegExpCompiler { 161 class RegExpCompiler {
109 public: 162 public:
110 RegExpCompiler(intptr_t capture_count, bool ignore_case, bool is_ascii); 163 RegExpCompiler(intptr_t capture_count,
164 bool ignore_case,
165 intptr_t specialization_cid);
111 166
112 intptr_t AllocateRegister() { 167 intptr_t AllocateRegister() {
113 return next_register_++; 168 return next_register_++;
114 } 169 }
115 170
116 RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, 171 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler,
117 RegExpNode* start, 172 RegExpNode* start,
118 intptr_t capture_count, 173 intptr_t capture_count,
119 const String& pattern); 174 const String& pattern);
120 175
176 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
177
121 static const intptr_t kImplementationOffset = 0; 178 static const intptr_t kImplementationOffset = 0;
122 static const intptr_t kNumberOfRegistersOffset = 0; 179 static const intptr_t kNumberOfRegistersOffset = 0;
123 static const intptr_t kCodeOffset = 1; 180 static const intptr_t kCodeOffset = 1;
124 181
125 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } 182 IRRegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
126 EndNode* accept() { return accept_; } 183 EndNode* accept() { return accept_; }
127 184
128 static const intptr_t kMaxRecursion = 100; 185 static const intptr_t kMaxRecursion = 100;
129 inline intptr_t recursion_depth() { return recursion_depth_; } 186 inline intptr_t recursion_depth() { return recursion_depth_; }
130 inline void IncrementRecursionDepth() { recursion_depth_++; } 187 inline void IncrementRecursionDepth() { recursion_depth_++; }
131 inline void DecrementRecursionDepth() { recursion_depth_--; } 188 inline void DecrementRecursionDepth() { recursion_depth_--; }
132 189
133 void SetRegExpTooBig() { reg_exp_too_big_ = true; } 190 void SetRegExpTooBig() { reg_exp_too_big_ = true; }
134 191
135 inline bool ignore_case() { return ignore_case_; } 192 inline bool ignore_case() { return ignore_case_; }
136 inline bool ascii() { return ascii_; } 193 inline bool ascii() const {
194 return (specialization_cid_ == kOneByteStringCid ||
195 specialization_cid_ == kExternalOneByteStringCid);
196 }
197 inline intptr_t specialization_cid() { return specialization_cid_; }
198 FrequencyCollator* frequency_collator() { return &frequency_collator_; }
137 199
138 intptr_t current_expansion_factor() { return current_expansion_factor_; } 200 intptr_t current_expansion_factor() { return current_expansion_factor_; }
139 void set_current_expansion_factor(intptr_t value) { 201 void set_current_expansion_factor(intptr_t value) {
140 current_expansion_factor_ = value; 202 current_expansion_factor_ = value;
141 } 203 }
142 204
143 Isolate* isolate() const { return isolate_; } 205 Isolate* isolate() const { return isolate_; }
144 206
145 static const intptr_t kNoRegister = -1; 207 static const intptr_t kNoRegister = -1;
146 208
147 private: 209 private:
148 EndNode* accept_; 210 EndNode* accept_;
149 intptr_t next_register_; 211 intptr_t next_register_;
212 ZoneGrowableArray<RegExpNode*>* work_list_;
150 intptr_t recursion_depth_; 213 intptr_t recursion_depth_;
151 RegExpMacroAssembler* macro_assembler_; 214 IRRegExpMacroAssembler* macro_assembler_;
152 bool ignore_case_; 215 bool ignore_case_;
153 bool ascii_; 216 intptr_t specialization_cid_;
154 bool reg_exp_too_big_; 217 bool reg_exp_too_big_;
155 intptr_t current_expansion_factor_; 218 intptr_t current_expansion_factor_;
219 FrequencyCollator frequency_collator_;
156 Isolate* isolate_; 220 Isolate* isolate_;
157 }; 221 };
158 222
159 223
224 class RecursionCheck {
225 public:
226 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
227 compiler->IncrementRecursionDepth();
228 }
229 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
230 private:
231 RegExpCompiler* compiler_;
232 };
233
234
160 // Scoped object to keep track of how much we unroll quantifier loops in the 235 // Scoped object to keep track of how much we unroll quantifier loops in the
161 // regexp graph generator. 236 // regexp graph generator.
162 class RegExpExpansionLimiter { 237 class RegExpExpansionLimiter {
163 public: 238 public:
164 static const intptr_t kMaxExpansionFactor = 6; 239 static const intptr_t kMaxExpansionFactor = 6;
165 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor) 240 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor)
166 : compiler_(compiler), 241 : compiler_(compiler),
167 saved_expansion_factor_(compiler->current_expansion_factor()), 242 saved_expansion_factor_(compiler->current_expansion_factor()),
168 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) { 243 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
169 ASSERT(factor > 0); 244 ASSERT(factor > 0);
(...skipping 534 matching lines...) Expand 10 before | Expand all | Expand 10 after
704 // we don't need to check it. 779 // we don't need to check it.
705 if (neg_replacement == NULL) return set_replacement(replacement); 780 if (neg_replacement == NULL) return set_replacement(replacement);
706 alternatives_->At(0).set_node(neg_replacement); 781 alternatives_->At(0).set_node(neg_replacement);
707 return set_replacement(this); 782 return set_replacement(this);
708 } 783 }
709 784
710 785
711 // Code emission --------------------------------------------------------------- 786 // Code emission ---------------------------------------------------------------
712 787
713 788
714 #define DEFINE_EMIT(Type) \ 789 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
715 void Type##Node::Emit(RegExpCompiler* compiler, Trace* trace) { \ 790 Guard* guard,
716 UNIMPLEMENTED(); \ 791 Trace* trace) {
717 } 792 switch (guard->op()) {
718 FOR_EACH_NODE_TYPE(DEFINE_EMIT) 793 case Guard::LT:
719 DEFINE_EMIT(LoopChoice) 794 ASSERT(!trace->mentions_reg(guard->reg()));
720 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, 795 macro_assembler->IfRegisterGE(guard->reg(),
721 Trace* trace) { 796 guard->value(),
797 trace->backtrack());
798 break;
799 case Guard::GEQ:
800 ASSERT(!trace->mentions_reg(guard->reg()));
801 macro_assembler->IfRegisterLT(guard->reg(),
802 guard->value(),
803 trace->backtrack());
804 break;
805 }
806 }
807
808
809 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
810 RegExpMacroAssembler* assembler = compiler->macro_assembler();
811
812 // Omit flushing the trace. We discard the entire stack frame anyway.
813
814 if (!label()->IsBound()) {
815 // We are completely independent of the trace, since we ignore it,
816 // so this code can be used as the generic version.
817 assembler->BindBlock(label());
818 }
819
820 // Throw away everything on the backtrack stack since the start
821 // of the negative submatch and restore the character position.
822 assembler->ReadCurrentPositionFromRegister(current_position_register_);
823 assembler->ReadStackPointerFromRegister(stack_pointer_register_);
824 if (clear_capture_count_ > 0) {
825 // Clear any captures that might have been performed during the success
826 // of the body of the negative look-ahead.
827 intptr_t clear_capture_end =
828 clear_capture_start_ + clear_capture_count_ - 1;
829 assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
830 }
831 // Now that we have unwound the stack we find at the top of the stack the
832 // backtrack that the BeginSubmatch node got.
833 assembler->Backtrack();
834 }
835
836
837 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) {
838 ASSERT(*cp_offset == 0);
839 for (DeferredAction* action = actions_;
840 action != NULL;
841 action = action->next()) {
842 if (action->Mentions(reg)) {
843 if (action->action_type() == ActionNode::STORE_POSITION) {
844 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
845 return true;
846 } else {
847 return false;
848 }
849 }
850 }
851 return false;
852 }
853
854
855 // This is called as we come into a loop choice node and some other tricky
856 // nodes. It normalizes the state of the code generator to ensure we can
857 // generate generic code.
858 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers,
859 Isolate* isolate) {
860 intptr_t max_register = RegExpCompiler::kNoRegister;
861 for (DeferredAction* action = actions_;
862 action != NULL;
863 action = action->next()) {
864 if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
865 Interval range = static_cast<DeferredClearCaptures*>(action)->range();
866 for (intptr_t i = range.from(); i <= range.to(); i++)
867 affected_registers->Set(i, isolate);
868 if (range.to() > max_register) max_register = range.to();
869 } else {
870 affected_registers->Set(action->reg(), isolate);
871 if (action->reg() > max_register) max_register = action->reg();
872 }
873 }
874 return max_register;
875 }
876
877
878 bool Trace::DeferredAction::Mentions(intptr_t that) {
879 if (action_type() == ActionNode::CLEAR_CAPTURES) {
880 Interval range = static_cast<DeferredClearCaptures*>(this)->range();
881 return range.Contains(that);
882 } else {
883 return reg() == that;
884 }
885 }
886
887
888 bool Trace::mentions_reg(intptr_t reg) {
889 for (DeferredAction* action = actions_;
890 action != NULL;
891 action = action->next()) {
892 if (action->Mentions(reg))
893 return true;
894 }
895 return false;
896 }
897
898
899 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
900 intptr_t max_register,
901 const OutSet& registers_to_pop,
902 const OutSet& registers_to_clear) {
903 for (intptr_t reg = max_register; reg >= 0; reg--) {
904 if (registers_to_pop.Get(reg)) {
905 assembler->PopRegister(reg);
906 } else if (registers_to_clear.Get(reg)) {
907 intptr_t clear_to = reg;
908 while (reg > 0 && registers_to_clear.Get(reg - 1)) {
909 reg--;
910 }
911 assembler->ClearRegisters(reg, clear_to);
912 }
913 }
914 }
915
916
917 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
918 intptr_t max_register,
919 const OutSet& affected_registers,
920 OutSet* registers_to_pop,
921 OutSet* registers_to_clear,
922 Isolate* isolate) {
923 // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
924 const intptr_t push_limit = (assembler->stack_limit_slack() + 1) / 2;
925
926 // Count pushes performed to force a stack limit check occasionally.
927 intptr_t pushes = 0;
928
929 for (intptr_t reg = 0; reg <= max_register; reg++) {
930 if (!affected_registers.Get(reg)) {
931 continue;
932 }
933
934 // The chronologically first deferred action in the trace
935 // is used to infer the action needed to restore a register
936 // to its previous state (or not, if it's safe to ignore it).
937 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
938 DeferredActionUndoType undo_action = IGNORE;
939
940 intptr_t value = 0;
941 bool absolute = false;
942 bool clear = false;
943 intptr_t store_position = -1;
944 // This is a little tricky because we are scanning the actions in reverse
945 // historical order (newest first).
946 for (DeferredAction* action = actions_;
947 action != NULL;
948 action = action->next()) {
949 if (action->Mentions(reg)) {
950 switch (action->action_type()) {
951 case ActionNode::SET_REGISTER: {
952 Trace::DeferredSetRegister* psr =
953 static_cast<Trace::DeferredSetRegister*>(action);
954 if (!absolute) {
955 value += psr->value();
956 absolute = true;
957 }
958 // SET_REGISTER is currently only used for newly introduced loop
959 // counters. They can have a significant previous value if they
960 // occour in a loop. TODO(lrn): Propagate this information, so
961 // we can set undo_action to IGNORE if we know there is no value to
962 // restore.
963 undo_action = RESTORE;
964 ASSERT(store_position == -1);
965 ASSERT(!clear);
966 break;
967 }
968 case ActionNode::INCREMENT_REGISTER:
969 if (!absolute) {
970 value++;
971 }
972 ASSERT(store_position == -1);
973 ASSERT(!clear);
974 undo_action = RESTORE;
975 break;
976 case ActionNode::STORE_POSITION: {
977 Trace::DeferredCapture* pc =
978 static_cast<Trace::DeferredCapture*>(action);
979 if (!clear && store_position == -1) {
980 store_position = pc->cp_offset();
981 }
982
983 // For captures we know that stores and clears alternate.
984 // Other register, are never cleared, and if the occur
985 // inside a loop, they might be assigned more than once.
986 if (reg <= 1) {
987 // Registers zero and one, aka "capture zero", is
988 // always set correctly if we succeed. There is no
989 // need to undo a setting on backtrack, because we
990 // will set it again or fail.
991 undo_action = IGNORE;
992 } else {
993 undo_action = pc->is_capture() ? CLEAR : RESTORE;
994 }
995 ASSERT(!absolute);
996 ASSERT(value == 0);
997 break;
998 }
999 case ActionNode::CLEAR_CAPTURES: {
1000 // Since we're scanning in reverse order, if we've already
1001 // set the position we have to ignore historically earlier
1002 // clearing operations.
1003 if (store_position == -1) {
1004 clear = true;
1005 }
1006 undo_action = RESTORE;
1007 ASSERT(!absolute);
1008 ASSERT(value == 0);
1009 break;
1010 }
1011 default:
1012 UNREACHABLE();
1013 break;
1014 }
1015 }
1016 }
1017 // Prepare for the undo-action (e.g., push if it's going to be popped).
1018 if (undo_action == RESTORE) {
1019 pushes++;
1020 RegExpMacroAssembler::StackCheckFlag stack_check =
1021 RegExpMacroAssembler::kNoStackLimitCheck;
1022 if (pushes == push_limit) {
1023 stack_check = RegExpMacroAssembler::kCheckStackLimit;
1024 pushes = 0;
1025 }
1026
1027 assembler->PushRegister(reg, stack_check);
1028 registers_to_pop->Set(reg, isolate);
1029 } else if (undo_action == CLEAR) {
1030 registers_to_clear->Set(reg, isolate);
1031 }
1032 // Perform the chronologically last action (or accumulated increment)
1033 // for the register.
1034 if (store_position != -1) {
1035 assembler->WriteCurrentPositionToRegister(reg, store_position);
1036 } else if (clear) {
1037 assembler->ClearRegisters(reg, reg);
1038 } else if (absolute) {
1039 assembler->SetRegister(reg, value);
1040 } else if (value != 0) {
1041 assembler->AdvanceRegister(reg, value);
1042 }
1043 }
1044 }
1045
1046
1047 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
1048 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1049
1050 ASSERT(!is_trivial());
1051
1052 if (actions_ == NULL && backtrack() == NULL) {
1053 // Here we just have some deferred cp advances to fix and we are back to
1054 // a normal situation. We may also have to forget some information gained
1055 // through a quick check that was already performed.
1056 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
1057 // Create a new trivial state and generate the node with that.
1058 Trace new_state;
1059 successor->Emit(compiler, &new_state);
1060 return;
1061 }
1062
1063 // Generate deferred actions here along with code to undo them again.
1064 OutSet affected_registers;
1065
1066 if (backtrack() != NULL) {
1067 // Here we have a concrete backtrack location. These are set up by choice
1068 // nodes and so they indicate that we have a deferred save of the current
1069 // position which we may need to emit here.
1070 assembler->PushCurrentPosition();
1071 }
1072
1073 intptr_t max_register = FindAffectedRegisters(&affected_registers, CI);
1074 OutSet registers_to_pop;
1075 OutSet registers_to_clear;
1076 PerformDeferredActions(assembler,
1077 max_register,
1078 affected_registers,
1079 &registers_to_pop,
1080 &registers_to_clear,
1081 CI);
1082 if (cp_offset_ != 0) {
1083 assembler->AdvanceCurrentPosition(cp_offset_);
1084 }
1085
1086 // Create a new trivial state and generate the node with that.
1087 BlockLabel undo;
1088 assembler->PushBacktrack(&undo);
1089 Trace new_state;
1090 successor->Emit(compiler, &new_state);
1091
1092 // On backtrack we need to restore state.
1093 assembler->BindBlock(&undo);
1094 RestoreAffectedRegisters(assembler,
1095 max_register,
1096 registers_to_pop,
1097 registers_to_clear);
1098 if (backtrack() == NULL) {
1099 assembler->Backtrack();
1100 } else {
1101 assembler->PopCurrentPosition();
1102 assembler->Jump(backtrack());
1103 }
1104 }
1105
1106
1107 void Trace::InvalidateCurrentCharacter() {
1108 characters_preloaded_ = 0;
1109 }
1110
1111
1112 void Trace::AdvanceCurrentPositionInTrace(intptr_t by,
1113 RegExpCompiler* compiler) {
1114 ASSERT(by > 0);
1115 // We don't have an instruction for shifting the current character register
1116 // down or for using a shifted value for anything so lets just forget that
1117 // we preloaded any characters into it.
1118 characters_preloaded_ = 0;
1119 // Adjust the offsets of the quick check performed information. This
1120 // information is used to find out what we already determined about the
1121 // characters by means of mask and compare.
1122 quick_check_performed_.Advance(by, compiler->ascii());
1123 cp_offset_ += by;
1124 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
1125 compiler->SetRegExpTooBig();
1126 cp_offset_ = 0;
1127 }
1128 bound_checked_up_to_ = Utils::Maximum(static_cast<intptr_t>(0),
1129 bound_checked_up_to_ - by);
1130 }
1131
1132
1133 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
1134 if (!trace->is_trivial()) {
1135 trace->Flush(compiler, this);
1136 return;
1137 }
1138 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1139 if (!label()->IsBound()) {
1140 assembler->BindBlock(label());
1141 }
1142 switch (action_) {
1143 case ACCEPT:
1144 assembler->Succeed();
1145 return;
1146 case BACKTRACK:
1147 assembler->Jump(trace->backtrack());
1148 return;
1149 case NEGATIVE_SUBMATCH_SUCCESS:
1150 // This case is handled in a different virtual method.
1151 UNREACHABLE();
1152 }
722 UNIMPLEMENTED(); 1153 UNIMPLEMENTED();
723 } 1154 }
724 #undef DEFINE_EMIT 1155
725 1156
726 1157 bool QuickCheckDetails::Rationalize(bool asc) {
727 #define DEFINE_BMINFO(Type) \ 1158 bool found_useful_op = false;
728 void Type##Node::FillInBMInfo(intptr_t initial_offset, \ 1159 uint32_t char_mask;
729 intptr_t budget, \ 1160 if (asc) {
730 BoyerMooreLookahead* bm, \ 1161 char_mask = Symbols::kMaxOneCharCodeSymbol;
731 bool not_at_start) { \ 1162 } else {
732 UNIMPLEMENTED(); \ 1163 char_mask = Utf16::kMaxCodeUnit;
733 } 1164 }
734 FOR_EACH_NODE_TYPE(DEFINE_BMINFO) 1165 mask_ = 0;
735 DEFINE_BMINFO(LoopChoice) 1166 value_ = 0;
736 #undef DEFINE_BMINFO 1167 intptr_t char_shift = 0;
1168 for (intptr_t i = 0; i < characters_; i++) {
1169 Position* pos = &positions_[i];
1170 if ((pos->mask & Symbols::kMaxOneCharCodeSymbol) != 0) {
1171 found_useful_op = true;
1172 }
1173 mask_ |= (pos->mask & char_mask) << char_shift;
1174 value_ |= (pos->value & char_mask) << char_shift;
1175 char_shift += asc ? 8 : 16;
1176 }
1177 return found_useful_op;
1178 }
1179
1180
1181 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
1182 Trace* trace,
1183 bool preload_has_checked_bounds,
1184 BlockLabel* on_possible_success,
1185 QuickCheckDetails* details,
1186 bool fall_through_on_failure) {
1187 if (details->characters() == 0) return false;
1188 GetQuickCheckDetails(
1189 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);
1190 if (details->cannot_match()) return false;
1191 if (!details->Rationalize(compiler->ascii())) return false;
1192 ASSERT(details->characters() == 1 ||
1193 compiler->macro_assembler()->CanReadUnaligned());
1194 uint32_t mask = details->mask();
1195 uint32_t value = details->value();
1196
1197 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1198
1199 if (trace->characters_preloaded() != details->characters()) {
1200 assembler->LoadCurrentCharacter(trace->cp_offset(),
1201 trace->backtrack(),
1202 !preload_has_checked_bounds,
1203 details->characters());
1204 }
1205
1206
1207 bool need_mask = true;
1208
1209 if (details->characters() == 1) {
1210 // If number of characters preloaded is 1 then we used a byte or 16 bit
1211 // load so the value is already masked down.
1212 uint32_t char_mask;
1213 if (compiler->ascii()) {
1214 char_mask = Symbols::kMaxOneCharCodeSymbol;
1215 } else {
1216 char_mask = Utf16::kMaxCodeUnit;
1217 }
1218 if ((mask & char_mask) == char_mask) need_mask = false;
1219 mask &= char_mask;
1220 } else {
1221 // For 2-character preloads in ASCII mode or 1-character preloads in
1222 // TWO_BYTE mode we also use a 16 bit load with zero extend.
1223 if (details->characters() == 2 && compiler->ascii()) {
1224 if ((mask & 0xffff) == 0xffff) need_mask = false;
1225 } else if (details->characters() == 1 && !compiler->ascii()) {
1226 if ((mask & 0xffff) == 0xffff) need_mask = false;
1227 } else {
1228 if (mask == 0xffffffff) need_mask = false;
1229 }
1230 }
1231
1232 if (fall_through_on_failure) {
1233 if (need_mask) {
1234 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
1235 } else {
1236 assembler->CheckCharacter(value, on_possible_success);
1237 }
1238 } else {
1239 if (need_mask) {
1240 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
1241 } else {
1242 assembler->CheckNotCharacter(value, trace->backtrack());
1243 }
1244 }
1245 return true;
1246 }
1247
1248
1249 // Emit the code to check for a ^ in multiline mode (1-character lookbehind
1250 // that matches newline or the start of input).
1251 static void EmitHat(RegExpCompiler* compiler,
1252 RegExpNode* on_success,
1253 Trace* trace) {
1254 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1255 // We will be loading the previous character into the current character
1256 // register.
1257 Trace new_trace(*trace);
1258 new_trace.InvalidateCurrentCharacter();
1259
1260 BlockLabel ok;
1261 if (new_trace.cp_offset() == 0) {
1262 // The start of input counts as a newline in this context, so skip to
1263 // ok if we are at the start.
1264 assembler->CheckAtStart(&ok);
1265 }
1266 // We already checked that we are not at the start of input so it must be
1267 // OK to load the previous character.
1268 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,
1269 new_trace.backtrack(),
1270 false);
1271 if (!assembler->CheckSpecialCharacterClass('n',
1272 new_trace.backtrack())) {
1273 // Newline means \n, \r, 0x2028 or 0x2029.
1274 if (!compiler->ascii()) {
1275 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
1276 }
1277 assembler->CheckCharacter('\n', &ok);
1278 assembler->CheckNotCharacter('\r', new_trace.backtrack());
1279 }
1280 assembler->BindBlock(&ok);
1281 on_success->Emit(compiler, &new_trace);
1282 }
1283
1284
1285 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
1286 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1287 switch (assertion_type_) {
1288 case AT_END: {
1289 BlockLabel ok;
1290 assembler->CheckPosition(trace->cp_offset(), &ok);
1291 assembler->Jump(trace->backtrack());
1292 assembler->BindBlock(&ok);
1293 break;
1294 }
1295 case AT_START: {
1296 if (trace->at_start() == Trace::FALSE_VALUE) {
1297 assembler->Jump(trace->backtrack());
1298 return;
1299 }
1300 if (trace->at_start() == Trace::UNKNOWN) {
1301 assembler->CheckNotAtStart(trace->backtrack());
1302 Trace at_start_trace = *trace;
1303 at_start_trace.set_at_start(true);
1304 on_success()->Emit(compiler, &at_start_trace);
1305 return;
1306 }
1307 }
1308 break;
1309 case AFTER_NEWLINE:
1310 EmitHat(compiler, on_success(), trace);
1311 return;
1312 case AT_BOUNDARY:
1313 case AT_NON_BOUNDARY: {
1314 EmitBoundaryCheck(compiler, trace);
1315 return;
1316 }
1317 }
1318 on_success()->Emit(compiler, trace);
1319 }
1320
1321
1322 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
1323 Trace* trace) {
1324 // If we are generating a greedy loop then don't stop and don't reuse code.
1325 if (trace->stop_node() != NULL) {
1326 return CONTINUE;
1327 }
1328
1329 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1330 if (trace->is_trivial()) {
1331 if (label_.IsBound()) {
1332 // We are being asked to generate a generic version, but that's already
1333 // been done so just go to it.
1334 macro_assembler->Jump(&label_);
1335 return DONE;
1336 }
1337 if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {
1338 // To avoid too deep recursion we push the node to the work queue and just
1339 // generate a goto here.
1340 compiler->AddWork(this);
1341 macro_assembler->Jump(&label_);
1342 return DONE;
1343 }
1344 // Generate generic version of the node and bind the label for later use.
1345 macro_assembler->BindBlock(&label_);
1346 return CONTINUE;
1347 }
1348
1349 // We are being asked to make a non-generic version. Keep track of how many
1350 // non-generic versions we generate so as not to overdo it.
1351 trace_count_++;
1352 if (kRegexpOptimization &&
1353 trace_count_ < kMaxCopiesCodeGenerated &&
1354 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
1355 return CONTINUE;
1356 }
1357
1358 // If we get here code has been generated for this node too many times or
1359 // recursion is too deep. Time to switch to a generic version. The code for
1360 // generic versions above can handle deep recursion properly.
1361 trace->Flush(compiler, this);
1362 return DONE;
1363 }
1364
1365
1366 // This generates the code to match a text node. A text node can contain
1367 // straight character sequences (possibly to be matched in a case-independent
1368 // way) and character classes. For efficiency we do not do this in a single
1369 // pass from left to right. Instead we pass over the text node several times,
1370 // emitting code for some character positions every time. See the comment on
1371 // TextEmitPass for details.
1372 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
1373 LimitResult limit_result = LimitVersions(compiler, trace);
1374 if (limit_result == DONE) return;
1375 ASSERT(limit_result == CONTINUE);
1376
1377 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
1378 compiler->SetRegExpTooBig();
1379 return;
1380 }
1381
1382 if (compiler->ascii()) {
1383 intptr_t dummy = 0;
1384 TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);
1385 }
1386
1387 bool first_elt_done = false;
1388 intptr_t bound_checked_to = trace->cp_offset() - 1;
1389 bound_checked_to += trace->bound_checked_up_to();
1390
1391 // If a character is preloaded into the current character register then
1392 // check that now.
1393 if (trace->characters_preloaded() == 1) {
1394 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) {
1395 if (!SkipPass(pass, compiler->ignore_case())) {
1396 TextEmitPass(compiler,
1397 static_cast<TextEmitPassType>(pass),
1398 true,
1399 trace,
1400 false,
1401 &bound_checked_to);
1402 }
1403 }
1404 first_elt_done = true;
1405 }
1406
1407 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) {
1408 if (!SkipPass(pass, compiler->ignore_case())) {
1409 TextEmitPass(compiler,
1410 static_cast<TextEmitPassType>(pass),
1411 false,
1412 trace,
1413 first_elt_done,
1414 &bound_checked_to);
1415 }
1416 }
1417
1418 Trace successor_trace(*trace);
1419 successor_trace.set_at_start(false);
1420 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
1421 RecursionCheck rc(compiler);
1422 on_success()->Emit(compiler, &successor_trace);
1423 }
1424
1425
1426 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
1427 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1428 if (trace->stop_node() == this) {
1429 intptr_t text_length =
1430 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0]));
1431 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);
1432 // Update the counter-based backtracking info on the stack. This is an
1433 // optimization for greedy loops (see below).
1434 ASSERT(trace->cp_offset() == text_length);
1435 macro_assembler->AdvanceCurrentPosition(text_length);
1436 macro_assembler->Jump(trace->loop_label());
1437 return;
1438 }
1439 ASSERT(trace->stop_node() == NULL);
1440 if (!trace->is_trivial()) {
1441 trace->Flush(compiler, this);
1442 return;
1443 }
1444 ChoiceNode::Emit(compiler, trace);
1445 }
1446
1447
1448 // This class is used when generating the alternatives in a choice node. It
1449 // records the way the alternative is being code generated.
1450 class AlternativeGeneration {
1451 public:
1452 AlternativeGeneration()
1453 : possible_success(),
1454 expects_preload(false),
1455 after(),
1456 quick_check_details() { }
1457 BlockLabel possible_success;
1458 bool expects_preload;
1459 BlockLabel after;
1460 QuickCheckDetails quick_check_details;
1461 };
1462
1463
1464 // Creates a list of AlternativeGenerations. If the list has a reasonable
1465 // size then it is on the stack, otherwise the excess is on the heap.
1466 class AlternativeGenerationList {
1467 public:
1468 explicit AlternativeGenerationList(intptr_t count)
1469 : alt_gens_(count) {
1470 for (intptr_t i = 0; i < count && i < kAFew; i++) {
1471 alt_gens_.Add(a_few_alt_gens_ + i);
1472 }
1473 for (intptr_t i = kAFew; i < count; i++) {
1474 alt_gens_.Add(new AlternativeGeneration());
1475 }
1476 }
1477 ~AlternativeGenerationList() {
1478 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) {
1479 delete alt_gens_[i];
1480 alt_gens_[i] = NULL;
1481 }
1482 }
1483
1484 AlternativeGeneration* at(intptr_t i) {
1485 return alt_gens_[i];
1486 }
1487
1488 private:
1489 static const intptr_t kAFew = 10;
1490 GrowableArray<AlternativeGeneration*> alt_gens_;
1491 AlternativeGeneration a_few_alt_gens_[kAFew];
1492 };
1493
1494
1495 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
1496 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1497 intptr_t choice_count = alternatives_->length();
1498
1499 #ifdef DEBUG
1500 for (intptr_t i = 0; i < choice_count - 1; i++) {
1501 GuardedAlternative alternative = alternatives_->At(i);
1502 ZoneGrowableArray<Guard*>* guards = alternative.guards();
1503 intptr_t guard_count = (guards == NULL) ? 0 : guards->length();
1504 for (intptr_t j = 0; j < guard_count; j++) {
1505 ASSERT(!trace->mentions_reg(guards->At(j)->reg()));
1506 }
1507 }
1508 #endif
1509
1510 LimitResult limit_result = LimitVersions(compiler, trace);
1511 if (limit_result == DONE) return;
1512 ASSERT(limit_result == CONTINUE);
1513
1514 intptr_t new_flush_budget = trace->flush_budget() / choice_count;
1515 if (trace->flush_budget() == 0 && trace->actions() != NULL) {
1516 trace->Flush(compiler, this);
1517 return;
1518 }
1519
1520 RecursionCheck rc(compiler);
1521
1522 Trace* current_trace = trace;
1523
1524 intptr_t text_length =
1525 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0]));
1526 bool greedy_loop = false;
1527 BlockLabel greedy_loop_label;
1528 Trace counter_backtrack_trace;
1529 counter_backtrack_trace.set_backtrack(&greedy_loop_label);
1530 if (not_at_start()) counter_backtrack_trace.set_at_start(false);
1531
1532 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
1533 // Here we have special handling for greedy loops containing only text nodes
1534 // and other simple nodes. These are handled by pushing the current
1535 // position on the stack and then incrementing the current position each
1536 // time around the switch. On backtrack we decrement the current position
1537 // and check it against the pushed value. This avoids pushing backtrack
1538 // information for each iteration of the loop, which could take up a lot of
1539 // space.
1540 greedy_loop = true;
1541 ASSERT(trace->stop_node() == NULL);
1542 macro_assembler->PushCurrentPosition();
1543 current_trace = &counter_backtrack_trace;
1544 BlockLabel greedy_match_failed;
1545 Trace greedy_match_trace;
1546 if (not_at_start()) greedy_match_trace.set_at_start(false);
1547 greedy_match_trace.set_backtrack(&greedy_match_failed);
1548 BlockLabel loop_label;
1549 macro_assembler->BindBlock(&loop_label);
1550 greedy_match_trace.set_stop_node(this);
1551 greedy_match_trace.set_loop_label(&loop_label);
1552 alternatives_->At(0).node()->Emit(compiler, &greedy_match_trace);
1553 macro_assembler->BindBlock(&greedy_match_failed);
1554 }
1555
1556 BlockLabel second_choice; // For use in greedy matches.
1557 macro_assembler->BindBlock(&second_choice);
1558
1559 intptr_t first_normal_choice = greedy_loop ? 1 : 0;
1560
1561 bool not_at_start = current_trace->at_start() == Trace::FALSE_VALUE;
1562 const intptr_t kEatsAtLeastNotYetInitialized = -1;
1563 intptr_t eats_at_least = kEatsAtLeastNotYetInitialized;
1564
1565 bool skip_was_emitted = false;
1566
1567 if (!greedy_loop && choice_count == 2) {
1568 GuardedAlternative alt1 = alternatives_->At(1);
1569 if (alt1.guards() == NULL || alt1.guards()->length() == 0) {
1570 RegExpNode* eats_anything_node = alt1.node();
1571 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==
1572 this) {
1573 // At this point we know that we are at a non-greedy loop that will eat
1574 // any character one at a time. Any non-anchored regexp has such a
1575 // loop prepended to it in order to find where it starts. We look for
1576 // a pattern of the form ...abc... where we can look 6 characters ahead
1577 // and step forwards 3 if the character is not one of abc. Abc need
1578 // not be atoms, they can be any reasonably limited character class or
1579 // small alternation.
1580 ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes.
1581 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
1582 if (lookahead == NULL) {
1583 eats_at_least = Utils::Minimum(kMaxLookaheadForBoyerMoore,
1584 EatsAtLeast(kMaxLookaheadForBoyerMoore,
1585 kRecursionBudget,
1586 not_at_start));
1587 if (eats_at_least >= 1) {
1588 BoyerMooreLookahead* bm =
1589 new(I) BoyerMooreLookahead(eats_at_least, compiler, I);
1590 GuardedAlternative alt0 = alternatives_->At(0);
1591 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
1592 skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
1593 }
1594 } else {
1595 skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
1596 }
1597 }
1598 }
1599 }
1600
1601 if (eats_at_least == kEatsAtLeastNotYetInitialized) {
1602 // Save some time by looking at most one machine word ahead.
1603 eats_at_least =
1604 EatsAtLeast(compiler->ascii() ? 4 : 2, kRecursionBudget, not_at_start);
1605 }
1606 intptr_t preload_characters =
1607 CalculatePreloadCharacters(compiler, eats_at_least);
1608
1609 bool preload_is_current = !skip_was_emitted &&
1610 (current_trace->characters_preloaded() == preload_characters);
1611 bool preload_has_checked_bounds = preload_is_current;
1612
1613 AlternativeGenerationList alt_gens(choice_count);
1614
1615 // For now we just call all choices one after the other. The idea ultimately
1616 // is to use the Dispatch table to try only the relevant ones.
1617 for (intptr_t i = first_normal_choice; i < choice_count; i++) {
1618 GuardedAlternative alternative = alternatives_->At(i);
1619 AlternativeGeneration* alt_gen = alt_gens.at(i);
1620 alt_gen->quick_check_details.set_characters(preload_characters);
1621 ZoneGrowableArray<Guard*>* guards = alternative.guards();
1622 intptr_t guard_count = (guards == NULL) ? 0 : guards->length();
1623 Trace new_trace(*current_trace);
1624 new_trace.set_characters_preloaded(preload_is_current ?
1625 preload_characters :
1626 0);
1627 if (preload_has_checked_bounds) {
1628 new_trace.set_bound_checked_up_to(preload_characters);
1629 }
1630 new_trace.quick_check_performed()->Clear();
1631 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
1632 alt_gen->expects_preload = preload_is_current;
1633 bool generate_full_check_inline = false;
1634 if (kRegexpOptimization &&
1635 try_to_emit_quick_check_for_alternative(i) &&
1636 alternative.node()->EmitQuickCheck(compiler,
1637 &new_trace,
1638 preload_has_checked_bounds,
1639 &alt_gen->possible_success,
1640 &alt_gen->quick_check_details,
1641 i < choice_count - 1)) {
1642 // Quick check was generated for this choice.
1643 preload_is_current = true;
1644 preload_has_checked_bounds = true;
1645 // On the last choice in the ChoiceNode we generated the quick
1646 // check to fall through on possible success. So now we need to
1647 // generate the full check inline.
1648 if (i == choice_count - 1) {
1649 macro_assembler->BindBlock(&alt_gen->possible_success);
1650 new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
1651 new_trace.set_characters_preloaded(preload_characters);
1652 new_trace.set_bound_checked_up_to(preload_characters);
1653 generate_full_check_inline = true;
1654 }
1655 } else if (alt_gen->quick_check_details.cannot_match()) {
1656 if (i == choice_count - 1 && !greedy_loop) {
1657 macro_assembler->GoTo(trace->backtrack());
1658 }
1659 continue;
1660 } else {
1661 // No quick check was generated. Put the full code here.
1662 // If this is not the first choice then there could be slow checks from
1663 // previous cases that go here when they fail. There's no reason to
1664 // insist that they preload characters since the slow check we are about
1665 // to generate probably can't use it.
1666 if (i != first_normal_choice) {
1667 alt_gen->expects_preload = false;
1668 new_trace.InvalidateCurrentCharacter();
1669 }
1670 if (i < choice_count - 1) {
1671 new_trace.set_backtrack(&alt_gen->after);
1672 }
1673 generate_full_check_inline = true;
1674 }
1675 if (generate_full_check_inline) {
1676 if (new_trace.actions() != NULL) {
1677 new_trace.set_flush_budget(new_flush_budget);
1678 }
1679 for (intptr_t j = 0; j < guard_count; j++) {
1680 GenerateGuard(macro_assembler, guards->At(j), &new_trace);
1681 }
1682 alternative.node()->Emit(compiler, &new_trace);
1683 preload_is_current = false;
1684 }
1685 macro_assembler->BindBlock(&alt_gen->after);
1686 }
1687 if (greedy_loop) {
1688 macro_assembler->BindBlock(&greedy_loop_label);
1689 // If we have unwound to the bottom then backtrack.
1690 macro_assembler->CheckGreedyLoop(trace->backtrack());
1691 // Otherwise try the second priority at an earlier position.
1692 macro_assembler->AdvanceCurrentPosition(-text_length);
1693 macro_assembler->Jump(&second_choice);
1694 }
1695
1696 // At this point we need to generate slow checks for the alternatives where
1697 // the quick check was inlined. We can recognize these because the associated
1698 // label was bound.
1699 for (intptr_t i = first_normal_choice; i < choice_count - 1; i++) {
1700 AlternativeGeneration* alt_gen = alt_gens.at(i);
1701 Trace new_trace(*current_trace);
1702 // If there are actions to be flushed we have to limit how many times
1703 // they are flushed. Take the budget of the parent trace and distribute
1704 // it fairly amongst the children.
1705 if (new_trace.actions() != NULL) {
1706 new_trace.set_flush_budget(new_flush_budget);
1707 }
1708 EmitOutOfLineContinuation(compiler,
1709 &new_trace,
1710 alternatives_->At(i),
1711 alt_gen,
1712 preload_characters,
1713 alt_gens.at(i + 1)->expects_preload);
1714 }
1715 }
1716
1717
1718 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
1719 Trace* trace,
1720 GuardedAlternative alternative,
1721 AlternativeGeneration* alt_gen,
1722 intptr_t preload_characters,
1723 bool next_expects_preload) {
1724 if (!alt_gen->possible_success.IsLinked()) return;
1725
1726 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1727 macro_assembler->BindBlock(&alt_gen->possible_success);
1728 Trace out_of_line_trace(*trace);
1729 out_of_line_trace.set_characters_preloaded(preload_characters);
1730 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
1731 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
1732 ZoneGrowableArray<Guard*>* guards = alternative.guards();
1733 intptr_t guard_count = (guards == NULL) ? 0 : guards->length();
1734 if (next_expects_preload) {
1735 BlockLabel reload_current_char;
1736 out_of_line_trace.set_backtrack(&reload_current_char);
1737 for (intptr_t j = 0; j < guard_count; j++) {
1738 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace);
1739 }
1740 alternative.node()->Emit(compiler, &out_of_line_trace);
1741 macro_assembler->BindBlock(&reload_current_char);
1742 // Reload the current character, since the next quick check expects that.
1743 // We don't need to check bounds here because we only get into this
1744 // code through a quick check which already did the checked load.
1745 macro_assembler->LoadCurrentCharacter(trace->cp_offset(),
1746 NULL,
1747 false,
1748 preload_characters);
1749 macro_assembler->Jump(&(alt_gen->after));
1750 } else {
1751 out_of_line_trace.set_backtrack(&(alt_gen->after));
1752 for (intptr_t j = 0; j < guard_count; j++) {
1753 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace);
1754 }
1755 alternative.node()->Emit(compiler, &out_of_line_trace);
1756 }
1757 }
1758
1759
1760 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
1761 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1762 LimitResult limit_result = LimitVersions(compiler, trace);
1763 if (limit_result == DONE) return;
1764 ASSERT(limit_result == CONTINUE);
1765
1766 RecursionCheck rc(compiler);
1767
1768 switch (action_type_) {
1769 case STORE_POSITION: {
1770 Trace::DeferredCapture
1771 new_capture(data_.u_position_register.reg,
1772 data_.u_position_register.is_capture,
1773 trace);
1774 Trace new_trace = *trace;
1775 new_trace.add_action(&new_capture);
1776 on_success()->Emit(compiler, &new_trace);
1777 break;
1778 }
1779 case INCREMENT_REGISTER: {
1780 Trace::DeferredIncrementRegister
1781 new_increment(data_.u_increment_register.reg);
1782 Trace new_trace = *trace;
1783 new_trace.add_action(&new_increment);
1784 on_success()->Emit(compiler, &new_trace);
1785 break;
1786 }
1787 case SET_REGISTER: {
1788 Trace::DeferredSetRegister
1789 new_set(data_.u_store_register.reg, data_.u_store_register.value);
1790 Trace new_trace = *trace;
1791 new_trace.add_action(&new_set);
1792 on_success()->Emit(compiler, &new_trace);
1793 break;
1794 }
1795 case CLEAR_CAPTURES: {
1796 Trace::DeferredClearCaptures
1797 new_capture(Interval(data_.u_clear_captures.range_from,
1798 data_.u_clear_captures.range_to));
1799 Trace new_trace = *trace;
1800 new_trace.add_action(&new_capture);
1801 on_success()->Emit(compiler, &new_trace);
1802 break;
1803 }
1804 case BEGIN_SUBMATCH:
1805 if (!trace->is_trivial()) {
1806 trace->Flush(compiler, this);
1807 } else {
1808 assembler->WriteCurrentPositionToRegister(
1809 data_.u_submatch.current_position_register, 0);
1810 assembler->WriteStackPointerToRegister(
1811 data_.u_submatch.stack_pointer_register);
1812 on_success()->Emit(compiler, trace);
1813 }
1814 break;
1815 case EMPTY_MATCH_CHECK: {
1816 intptr_t start_pos_reg = data_.u_empty_match_check.start_register;
1817 intptr_t stored_pos = 0;
1818 intptr_t rep_reg = data_.u_empty_match_check.repetition_register;
1819 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
1820 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
1821 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
1822 // If we know we haven't advanced and there is no minimum we
1823 // can just backtrack immediately.
1824 assembler->GoTo(trace->backtrack());
1825 } else if (know_dist && stored_pos < trace->cp_offset()) {
1826 // If we know we've advanced we can generate the continuation
1827 // immediately.
1828 on_success()->Emit(compiler, trace);
1829 } else if (!trace->is_trivial()) {
1830 trace->Flush(compiler, this);
1831 } else {
1832 BlockLabel skip_empty_check;
1833 // If we have a minimum number of repetitions we check the current
1834 // number first and skip the empty check if it's not enough.
1835 if (has_minimum) {
1836 intptr_t limit = data_.u_empty_match_check.repetition_limit;
1837 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
1838 }
1839 // If the match is empty we bail out, otherwise we fall through
1840 // to the on-success continuation.
1841 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
1842 trace->backtrack());
1843 assembler->BindBlock(&skip_empty_check);
1844 on_success()->Emit(compiler, trace);
1845 }
1846 break;
1847 }
1848 case POSITIVE_SUBMATCH_SUCCESS: {
1849 if (!trace->is_trivial()) {
1850 trace->Flush(compiler, this);
1851 return;
1852 }
1853 assembler->ReadCurrentPositionFromRegister(
1854 data_.u_submatch.current_position_register);
1855 assembler->ReadStackPointerFromRegister(
1856 data_.u_submatch.stack_pointer_register);
1857 intptr_t clear_register_count = data_.u_submatch.clear_register_count;
1858 if (clear_register_count == 0) {
1859 on_success()->Emit(compiler, trace);
1860 return;
1861 }
1862 intptr_t clear_registers_from = data_.u_submatch.clear_register_from;
1863 BlockLabel clear_registers_backtrack;
1864 Trace new_trace = *trace;
1865 new_trace.set_backtrack(&clear_registers_backtrack);
1866 on_success()->Emit(compiler, &new_trace);
1867
1868 assembler->BindBlock(&clear_registers_backtrack);
1869 intptr_t clear_registers_to =
1870 clear_registers_from + clear_register_count - 1;
1871 assembler->ClearRegisters(clear_registers_from, clear_registers_to);
1872
1873 ASSERT(trace->backtrack() == NULL);
1874 assembler->Backtrack();
1875 return;
1876 }
1877 default:
1878 UNREACHABLE();
1879 }
1880 }
1881
1882
1883 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
1884 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1885 if (!trace->is_trivial()) {
1886 trace->Flush(compiler, this);
1887 return;
1888 }
1889
1890 LimitResult limit_result = LimitVersions(compiler, trace);
1891 if (limit_result == DONE) return;
1892 ASSERT(limit_result == CONTINUE);
1893
1894 RecursionCheck rc(compiler);
1895
1896 ASSERT(start_reg_ + 1 == end_reg_);
1897 if (compiler->ignore_case()) {
1898 assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
1899 trace->backtrack());
1900 } else {
1901 assembler->CheckNotBackReference(start_reg_, trace->backtrack());
1902 }
1903 on_success()->Emit(compiler, trace);
1904 }
1905
1906
1907 void ActionNode::FillInBMInfo(intptr_t offset,
1908 intptr_t budget,
1909 BoyerMooreLookahead* bm,
1910 bool not_at_start) {
1911 if (action_type_ == BEGIN_SUBMATCH) {
1912 bm->SetRest(offset);
1913 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
1914 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
1915 }
1916 SaveBMInfo(bm, not_at_start, offset);
1917 }
1918
1919
1920 void AssertionNode::FillInBMInfo(intptr_t offset,
1921 intptr_t budget,
1922 BoyerMooreLookahead* bm,
1923 bool not_at_start) {
1924 // Match the behaviour of EatsAtLeast on this node.
1925 if (assertion_type() == AT_START && not_at_start) return;
1926 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
1927 SaveBMInfo(bm, not_at_start, offset);
1928 }
1929
1930
1931 void BackReferenceNode::FillInBMInfo(intptr_t offset,
1932 intptr_t budget,
1933 BoyerMooreLookahead* bm,
1934 bool not_at_start) {
1935 // Working out the set of characters that a backreference can match is too
1936 // hard, so we just say that any character can match.
1937 bm->SetRest(offset);
1938 SaveBMInfo(bm, not_at_start, offset);
1939 }
1940
1941
1942 // Returns the number of characters in the equivalence class, omitting those
1943 // that cannot occur in the source string because it is ASCII.
1944 static intptr_t GetCaseIndependentLetters(uint16_t character,
1945 bool ascii_subject,
1946 int32_t* letters) {
1947 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;
1948 intptr_t length = jsregexp_uncanonicalize.get(character, '\0', letters);
1949 // Unibrow returns 0 or 1 for characters where case independence is
1950 // trivial.
1951 if (length == 0) {
1952 letters[0] = character;
1953 length = 1;
1954 }
1955 if (!ascii_subject || character <= Symbols::kMaxOneCharCodeSymbol) {
1956 return length;
1957 }
1958 // The standard requires that non-ASCII characters cannot have ASCII
1959 // character codes in their equivalence class.
1960 return 0;
1961 }
1962
1963
1964 void ChoiceNode::FillInBMInfo(intptr_t offset,
1965 intptr_t budget,
1966 BoyerMooreLookahead* bm,
1967 bool not_at_start) {
1968 ZoneGrowableArray<GuardedAlternative>* alts = alternatives();
1969 budget = (budget - 1) / alts->length();
1970 for (intptr_t i = 0; i < alts->length(); i++) {
1971 GuardedAlternative& alt = (*alts)[i];
1972 if (alt.guards() != NULL && alt.guards()->length() != 0) {
1973 bm->SetRest(offset); // Give up trying to fill in info.
1974 SaveBMInfo(bm, not_at_start, offset);
1975 return;
1976 }
1977 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start);
1978 }
1979 SaveBMInfo(bm, not_at_start, offset);
1980 }
1981
1982
1983 void EndNode::FillInBMInfo(intptr_t offset,
1984 intptr_t budget,
1985 BoyerMooreLookahead* bm,
1986 bool not_at_start) {
1987 // Returning 0 from EatsAtLeast should ensure we never get here.
1988 UNREACHABLE();
1989 }
1990
1991
1992 void LoopChoiceNode::FillInBMInfo(intptr_t offset,
1993 intptr_t budget,
1994 BoyerMooreLookahead* bm,
1995 bool not_at_start) {
1996 if (body_can_be_zero_length_ || budget <= 0) {
1997 bm->SetRest(offset);
1998 SaveBMInfo(bm, not_at_start, offset);
1999 return;
2000 }
2001 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start);
2002 SaveBMInfo(bm, not_at_start, offset);
2003 }
2004
2005
2006 void TextNode::FillInBMInfo(intptr_t initial_offset,
2007 intptr_t budget,
2008 BoyerMooreLookahead* bm,
2009 bool not_at_start) {
2010 if (initial_offset >= bm->length()) return;
2011 intptr_t offset = initial_offset;
2012 intptr_t max_char = bm->max_char();
2013 for (intptr_t i = 0; i < elements()->length(); i++) {
2014 if (offset >= bm->length()) {
2015 if (initial_offset == 0) set_bm_info(not_at_start, bm);
2016 return;
2017 }
2018 TextElement text = elements()->At(i);
2019 if (text.text_type() == TextElement::ATOM) {
2020 RegExpAtom* atom = text.atom();
2021 for (intptr_t j = 0; j < atom->length(); j++, offset++) {
2022 if (offset >= bm->length()) {
2023 if (initial_offset == 0) set_bm_info(not_at_start, bm);
2024 return;
2025 }
2026 uint16_t character = atom->data()->At(j);
2027 if (bm->compiler()->ignore_case()) {
2028 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
2029 intptr_t length = GetCaseIndependentLetters(
2030 character,
2031 bm->max_char() == Symbols::kMaxOneCharCodeSymbol,
2032 chars);
2033 for (intptr_t j = 0; j < length; j++) {
2034 bm->Set(offset, chars[j]);
2035 }
2036 } else {
2037 if (character <= max_char) bm->Set(offset, character);
2038 }
2039 }
2040 } else {
2041 ASSERT(text.text_type() == TextElement::CHAR_CLASS);
2042 RegExpCharacterClass* char_class = text.char_class();
2043 ZoneGrowableArray<CharacterRange>* ranges = char_class->ranges();
2044 if (char_class->is_negated()) {
2045 bm->SetAll(offset);
2046 } else {
2047 for (intptr_t k = 0; k < ranges->length(); k++) {
2048 CharacterRange& range = (*ranges)[k];
2049 if (range.from() > max_char) continue;
2050 intptr_t to = Utils::Minimum(max_char,
2051 static_cast<intptr_t>(range.to()));
2052 bm->SetInterval(offset, Interval(range.from(), to));
2053 }
2054 }
2055 offset++;
2056 }
2057 }
2058 if (offset >= bm->length()) {
2059 if (initial_offset == 0) set_bm_info(not_at_start, bm);
2060 return;
2061 }
2062 on_success()->FillInBMInfo(offset,
2063 budget - 1,
2064 bm,
2065 true); // Not at start after a text node.
2066 if (initial_offset == 0) set_bm_info(not_at_start, bm);
2067 }
2068
2069
2070 // Check for [0-9A-Z_a-z].
2071 static void EmitWordCheck(RegExpMacroAssembler* assembler,
2072 BlockLabel* word,
2073 BlockLabel* non_word,
2074 bool fall_through_on_word) {
2075 if (assembler->CheckSpecialCharacterClass(
2076 fall_through_on_word ? 'w' : 'W',
2077 fall_through_on_word ? non_word : word)) {
2078 // Optimized implementation available.
2079 return;
2080 }
2081 assembler->CheckCharacterGT('z', non_word);
2082 assembler->CheckCharacterLT('0', non_word);
2083 assembler->CheckCharacterGT('a' - 1, word);
2084 assembler->CheckCharacterLT('9' + 1, word);
2085 assembler->CheckCharacterLT('A', non_word);
2086 assembler->CheckCharacterLT('Z' + 1, word);
2087 if (fall_through_on_word) {
2088 assembler->CheckNotCharacter('_', non_word);
2089 } else {
2090 assembler->CheckCharacter('_', word);
2091 }
2092 }
737 2093
738 2094
739 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). 2095 // Emit the code to handle \b and \B (word-boundary or non-word-boundary).
740 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { 2096 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
741 UNIMPLEMENTED(); 2097 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2098 Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2099 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
2100 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2101 if (lookahead == NULL) {
2102 intptr_t eats_at_least =
2103 Utils::Minimum(kMaxLookaheadForBoyerMoore,
2104 EatsAtLeast(kMaxLookaheadForBoyerMoore,
2105 kRecursionBudget,
2106 not_at_start));
2107 if (eats_at_least >= 1) {
2108 BoyerMooreLookahead* bm =
2109 new(I) BoyerMooreLookahead(eats_at_least, compiler, I);
2110 FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
2111 if (bm->at(0)->is_non_word())
2112 next_is_word_character = Trace::FALSE_VALUE;
2113 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2114 }
2115 } else {
2116 if (lookahead->at(0)->is_non_word())
2117 next_is_word_character = Trace::FALSE_VALUE;
2118 if (lookahead->at(0)->is_word())
2119 next_is_word_character = Trace::TRUE_VALUE;
2120 }
2121 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
2122 if (next_is_word_character == Trace::UNKNOWN) {
2123 BlockLabel before_non_word;
2124 BlockLabel before_word;
2125 if (trace->characters_preloaded() != 1) {
2126 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
2127 }
2128 // Fall through on non-word.
2129 EmitWordCheck(assembler, &before_word, &before_non_word, false);
2130 // Next character is not a word character.
2131 assembler->BindBlock(&before_non_word);
2132 BlockLabel ok;
2133 // Backtrack on \B (non-boundary check) if previous is a word,
2134 // since we know next *is not* a word and this would be a boundary.
2135 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2136
2137 if (!assembler->IsClosed()) {
2138 assembler->Jump(&ok);
2139 }
2140
2141 assembler->BindBlock(&before_word);
2142 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2143 assembler->BindBlock(&ok);
2144 } else if (next_is_word_character == Trace::TRUE_VALUE) {
2145 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2146 } else {
2147 ASSERT(next_is_word_character == Trace::FALSE_VALUE);
2148 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2149 }
742 } 2150 }
743 2151
744 2152
745 void AssertionNode::BacktrackIfPrevious( 2153 void AssertionNode::BacktrackIfPrevious(
746 RegExpCompiler* compiler, 2154 RegExpCompiler* compiler,
747 Trace* trace, 2155 Trace* trace,
748 AssertionNode::IfPrevious backtrack_if_previous) { 2156 AssertionNode::IfPrevious backtrack_if_previous) {
749 UNIMPLEMENTED(); 2157 RegExpMacroAssembler* assembler = compiler->macro_assembler();
750 } 2158 Trace new_trace(*trace);
751 2159 new_trace.InvalidateCurrentCharacter();
752 2160
2161 BlockLabel fall_through, dummy;
2162
2163 BlockLabel* non_word = backtrack_if_previous == kIsNonWord ?
2164 new_trace.backtrack() :
2165 &fall_through;
2166 BlockLabel* word = backtrack_if_previous == kIsNonWord ?
2167 &fall_through :
2168 new_trace.backtrack();
2169
2170 if (new_trace.cp_offset() == 0) {
2171 // The start of input counts as a non-word character, so the question is
2172 // decided if we are at the start.
2173 assembler->CheckAtStart(non_word);
2174 }
2175 // We already checked that we are not at the start of input so it must be
2176 // OK to load the previous character.
2177 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);
2178 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
2179
2180 assembler->BindBlock(&fall_through);
2181 on_success()->Emit(compiler, &new_trace);
2182 }
2183
2184
2185 static bool DeterminedAlready(QuickCheckDetails* quick_check, intptr_t offset) {
2186 if (quick_check == NULL) return false;
2187 if (offset >= quick_check->characters()) return false;
2188 return quick_check->positions(offset)->determines_perfectly;
2189 }
2190
2191
2192 static void UpdateBoundsCheck(intptr_t index, intptr_t* checked_up_to) {
2193 if (index > *checked_up_to) {
2194 *checked_up_to = index;
2195 }
2196 }
2197
2198
2199 typedef bool EmitCharacterFunction(Isolate* isolate,
2200 RegExpCompiler* compiler,
2201 uint16_t c,
2202 BlockLabel* on_failure,
2203 intptr_t cp_offset,
2204 bool check,
2205 bool preloaded);
2206
2207
2208 static inline bool EmitSimpleCharacter(Isolate* isolate,
2209 RegExpCompiler* compiler,
2210 uint16_t c,
2211 BlockLabel* on_failure,
2212 intptr_t cp_offset,
2213 bool check,
2214 bool preloaded) {
2215 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2216 bool bound_checked = false;
2217 if (!preloaded) {
2218 assembler->LoadCurrentCharacter(
2219 cp_offset,
2220 on_failure,
2221 check);
2222 bound_checked = true;
2223 }
2224 assembler->CheckNotCharacter(c, on_failure);
2225 return bound_checked;
2226 }
2227
2228
2229 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
2230 bool ascii,
2231 uint16_t c1,
2232 uint16_t c2,
2233 BlockLabel* on_failure) {
2234 uint16_t char_mask;
2235 if (ascii) {
2236 char_mask = Symbols::kMaxOneCharCodeSymbol;
2237 } else {
2238 char_mask = Utf16::kMaxCodeUnit;
2239 }
2240 uint16_t exor = c1 ^ c2;
2241 // Check whether exor has only one bit set.
2242 if (((exor - 1) & exor) == 0) {
2243 // If c1 and c2 differ only by one bit.
2244 // Ecma262UnCanonicalize always gives the highest number last.
2245 ASSERT(c2 > c1);
2246 uint16_t mask = char_mask ^ exor;
2247 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
2248 return true;
2249 }
2250 ASSERT(c2 > c1);
2251 uint16_t diff = c2 - c1;
2252 if (((diff - 1) & diff) == 0 && c1 >= diff) {
2253 // If the characters differ by 2^n but don't differ by one bit then
2254 // subtract the difference from the found character, then do the or
2255 // trick. We avoid the theoretical case where negative numbers are
2256 // involved in order to simplify code generation.
2257 uint16_t mask = char_mask ^ diff;
2258 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
2259 diff,
2260 mask,
2261 on_failure);
2262 return true;
2263 }
2264 return false;
2265 }
2266
2267 // Only emits letters (things that have case). Only used for case independent
2268 // matches.
2269 static inline bool EmitAtomLetter(Isolate* isolate,
2270 RegExpCompiler* compiler,
2271 uint16_t c,
2272 BlockLabel* on_failure,
2273 intptr_t cp_offset,
2274 bool check,
2275 bool preloaded) {
2276 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2277 bool ascii = compiler->ascii();
2278 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
2279 intptr_t length = GetCaseIndependentLetters(c, ascii, chars);
2280 if (length <= 1) return false;
2281 // We may not need to check against the end of the input string
2282 // if this character lies before a character that matched.
2283 if (!preloaded) {
2284 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
2285 }
2286 BlockLabel ok;
2287 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
2288 switch (length) {
2289 case 2: {
2290 if (ShortCutEmitCharacterPair(macro_assembler,
2291 ascii,
2292 chars[0],
2293 chars[1],
2294 on_failure)) {
2295 } else {
2296 macro_assembler->CheckCharacter(chars[0], &ok);
2297 macro_assembler->CheckNotCharacter(chars[1], on_failure);
2298 macro_assembler->BindBlock(&ok);
2299 }
2300 break;
2301 }
2302 case 4:
2303 macro_assembler->CheckCharacter(chars[3], &ok);
2304 // Fall through!
2305 case 3:
2306 macro_assembler->CheckCharacter(chars[0], &ok);
2307 macro_assembler->CheckCharacter(chars[1], &ok);
2308 macro_assembler->CheckNotCharacter(chars[2], on_failure);
2309 macro_assembler->BindBlock(&ok);
2310 break;
2311 default:
2312 UNREACHABLE();
2313 break;
2314 }
2315 return true;
2316 }
2317
2318
2319 // Only emits non-letters (things that don't have case). Only used for case
2320 // independent matches.
2321 static inline bool EmitAtomNonLetter(Isolate* isolate,
2322 RegExpCompiler* compiler,
2323 uint16_t c,
2324 BlockLabel* on_failure,
2325 intptr_t cp_offset,
2326 bool check,
2327 bool preloaded) {
2328 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2329 bool ascii = compiler->ascii();
2330 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
2331 intptr_t length = GetCaseIndependentLetters(c, ascii, chars);
2332 if (length < 1) {
2333 // This can't match. Must be an ASCII subject and a non-ASCII character.
2334 // We do not need to do anything since the ASCII pass already handled this.
2335 return false; // Bounds not checked.
2336 }
2337 bool checked = false;
2338 // We handle the length > 1 case in a later pass.
2339 if (length == 1) {
2340 if (ascii && c > Symbols::kMaxOneCharCodeSymbol) {
2341 // Can't match - see above.
2342 return false; // Bounds not checked.
2343 }
2344 if (!preloaded) {
2345 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
2346 checked = check;
2347 }
2348 macro_assembler->CheckNotCharacter(c, on_failure);
2349 }
2350 return checked;
2351 }
2352
2353
2354 static void EmitBoundaryTest(RegExpMacroAssembler* masm,
2355 intptr_t border,
2356 BlockLabel* fall_through,
2357 BlockLabel* above_or_equal,
2358 BlockLabel* below) {
2359 if (below != fall_through) {
2360 masm->CheckCharacterLT(border, below);
2361 if (above_or_equal != fall_through) masm->Jump(above_or_equal);
2362 } else {
2363 masm->CheckCharacterGT(border - 1, above_or_equal);
2364 }
2365 }
2366
2367
2368 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,
2369 intptr_t first,
2370 intptr_t last,
2371 BlockLabel* fall_through,
2372 BlockLabel* in_range,
2373 BlockLabel* out_of_range) {
2374 if (in_range == fall_through) {
2375 if (first == last) {
2376 masm->CheckNotCharacter(first, out_of_range);
2377 } else {
2378 masm->CheckCharacterNotInRange(first, last, out_of_range);
2379 }
2380 } else {
2381 if (first == last) {
2382 masm->CheckCharacter(first, in_range);
2383 } else {
2384 masm->CheckCharacterInRange(first, last, in_range);
2385 }
2386 if (out_of_range != fall_through) masm->Jump(out_of_range);
2387 }
2388 }
2389
2390
2391 static void CutOutRange(RegExpMacroAssembler* masm,
2392 ZoneGrowableArray<int>* ranges,
2393 intptr_t start_index,
2394 intptr_t end_index,
2395 intptr_t cut_index,
2396 BlockLabel* even_label,
2397 BlockLabel* odd_label) {
2398 bool odd = (((cut_index - start_index) & 1) == 1);
2399 BlockLabel* in_range_label = odd ? odd_label : even_label;
2400 BlockLabel dummy;
2401 EmitDoubleBoundaryTest(masm,
2402 ranges->At(cut_index),
2403 ranges->At(cut_index + 1) - 1,
2404 &dummy,
2405 in_range_label,
2406 &dummy);
2407 ASSERT(!dummy.IsLinked());
2408 // Cut out the single range by rewriting the array. This creates a new
2409 // range that is a merger of the two ranges on either side of the one we
2410 // are cutting out. The oddity of the labels is preserved.
2411 for (intptr_t j = cut_index; j > start_index; j--) {
2412 (*ranges)[j] = ranges->At(j - 1);
2413 }
2414 for (intptr_t j = cut_index + 1; j < end_index; j++) {
2415 (*ranges)[j] = ranges->At(j + 1);
2416 }
2417 }
2418
2419
2420 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
2421 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
2422 static void EmitUseLookupTable(
2423 RegExpMacroAssembler* masm,
2424 ZoneGrowableArray<int>* ranges,
2425 intptr_t start_index,
2426 intptr_t end_index,
2427 intptr_t min_char,
2428 BlockLabel* fall_through,
2429 BlockLabel* even_label,
2430 BlockLabel* odd_label) {
2431 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;
2432 static const intptr_t kMask = RegExpMacroAssembler::kTableMask;
2433
2434 intptr_t base = (min_char & ~kMask);
2435
2436 // Assert that everything is on one kTableSize page.
2437 for (intptr_t i = start_index; i <= end_index; i++) {
2438 ASSERT((ranges->At(i) & ~kMask) == base);
2439 }
2440 ASSERT(start_index == 0 || (ranges->At(start_index - 1) & ~kMask) <= base);
2441
2442 char templ[kSize];
2443 BlockLabel* on_bit_set;
2444 BlockLabel* on_bit_clear;
2445 intptr_t bit;
2446 if (even_label == fall_through) {
2447 on_bit_set = odd_label;
2448 on_bit_clear = even_label;
2449 bit = 1;
2450 } else {
2451 on_bit_set = even_label;
2452 on_bit_clear = odd_label;
2453 bit = 0;
2454 }
2455 for (intptr_t i = 0; i < (ranges->At(start_index) & kMask) && i < kSize;
2456 i++) {
2457 templ[i] = bit;
2458 }
2459 intptr_t j = 0;
2460 bit ^= 1;
2461 for (intptr_t i = start_index; i < end_index; i++) {
2462 for (j = (ranges->At(i) & kMask); j < (ranges->At(i + 1) & kMask); j++) {
2463 templ[j] = bit;
2464 }
2465 bit ^= 1;
2466 }
2467 for (intptr_t i = j; i < kSize; i++) {
2468 templ[i] = bit;
2469 }
2470 // TODO(erikcorry): Cache these.
2471 TypedData& ba = TypedData::ZoneHandle(
2472 masm->isolate(),
2473 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld));
2474 for (intptr_t i = 0; i < kSize; i++) {
2475 ba.SetUint8(i, templ[i]);
2476 }
2477 masm->CheckBitInTable(ba, on_bit_set);
2478 if (on_bit_clear != fall_through) masm->Jump(on_bit_clear);
2479 }
2480
2481
2482 // Unicode case. Split the search space into kSize spaces that are handled
2483 // with recursion.
2484 static void SplitSearchSpace(ZoneGrowableArray<int>* ranges,
2485 intptr_t start_index,
2486 intptr_t end_index,
2487 intptr_t* new_start_index,
2488 intptr_t* new_end_index,
2489 intptr_t* border) {
2490 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;
2491 static const intptr_t kMask = RegExpMacroAssembler::kTableMask;
2492
2493 intptr_t first = ranges->At(start_index);
2494 intptr_t last = ranges->At(end_index) - 1;
2495
2496 *new_start_index = start_index;
2497 *border = (ranges->At(start_index) & ~kMask) + kSize;
2498 while (*new_start_index < end_index) {
2499 if (ranges->At(*new_start_index) > *border) break;
2500 (*new_start_index)++;
2501 }
2502 // new_start_index is the index of the first edge that is beyond the
2503 // current kSize space.
2504
2505 // For very large search spaces we do a binary chop search of the non-ASCII
2506 // space instead of just going to the end of the current kSize space. The
2507 // heuristics are complicated a little by the fact that any 128-character
2508 // encoding space can be quickly tested with a table lookup, so we don't
2509 // wish to do binary chop search at a smaller granularity than that. A
2510 // 128-character space can take up a lot of space in the ranges array if,
2511 // for example, we only want to match every second character (eg. the lower
2512 // case characters on some Unicode pages).
2513 intptr_t binary_chop_index = (end_index + start_index) / 2;
2514 // The first test ensures that we get to the code that handles the ASCII
2515 // range with a single not-taken branch, speeding up this important
2516 // character range (even non-ASCII charset-based text has spaces and
2517 // punctuation).
2518 if (*border - 1 > Symbols::kMaxOneCharCodeSymbol && // ASCII case.
2519 end_index - start_index > (*new_start_index - start_index) * 2 &&
2520 last - first > kSize * 2 &&
2521 binary_chop_index > *new_start_index &&
2522 ranges->At(binary_chop_index) >= first + 2 * kSize) {
2523 intptr_t scan_forward_for_section_border = binary_chop_index;;
2524 intptr_t new_border = (ranges->At(binary_chop_index) | kMask) + 1;
2525
2526 while (scan_forward_for_section_border < end_index) {
2527 if (ranges->At(scan_forward_for_section_border) > new_border) {
2528 *new_start_index = scan_forward_for_section_border;
2529 *border = new_border;
2530 break;
2531 }
2532 scan_forward_for_section_border++;
2533 }
2534 }
2535
2536 ASSERT(*new_start_index > start_index);
2537 *new_end_index = *new_start_index - 1;
2538 if (ranges->At(*new_end_index) == *border) {
2539 (*new_end_index)--;
2540 }
2541 if (*border >= ranges->At(end_index)) {
2542 *border = ranges->At(end_index);
2543 *new_start_index = end_index; // Won't be used.
2544 *new_end_index = end_index - 1;
2545 }
2546 }
2547
2548
2549 // Gets a series of segment boundaries representing a character class. If the
2550 // character is in the range between an even and an odd boundary (counting from
2551 // start_index) then go to even_label, otherwise go to odd_label. We already
2552 // know that the character is in the range of min_char to max_char inclusive.
2553 // Either label can be NULL indicating backtracking. Either label can also be
2554 // equal to the fall_through label.
2555 static void GenerateBranches(RegExpMacroAssembler* masm,
2556 ZoneGrowableArray<int>* ranges,
2557 intptr_t start_index,
2558 intptr_t end_index,
2559 uint16_t min_char,
2560 uint16_t max_char,
2561 BlockLabel* fall_through,
2562 BlockLabel* even_label,
2563 BlockLabel* odd_label) {
2564 intptr_t first = ranges->At(start_index);
2565 intptr_t last = ranges->At(end_index) - 1;
2566
2567 ASSERT(min_char < first);
2568
2569 // Just need to test if the character is before or on-or-after
2570 // a particular character.
2571 if (start_index == end_index) {
2572 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
2573 return;
2574 }
2575
2576 // Another almost trivial case: There is one interval in the middle that is
2577 // different from the end intervals.
2578 if (start_index + 1 == end_index) {
2579 EmitDoubleBoundaryTest(
2580 masm, first, last, fall_through, even_label, odd_label);
2581 return;
2582 }
2583
2584 // It's not worth using table lookup if there are very few intervals in the
2585 // character class.
2586 if (end_index - start_index <= 6) {
2587 // It is faster to test for individual characters, so we look for those
2588 // first, then try arbitrary ranges in the second round.
2589 static intptr_t kNoCutIndex = -1;
2590 intptr_t cut = kNoCutIndex;
2591 for (intptr_t i = start_index; i < end_index; i++) {
2592 if (ranges->At(i) == ranges->At(i + 1) - 1) {
2593 cut = i;
2594 break;
2595 }
2596 }
2597 if (cut == kNoCutIndex) cut = start_index;
2598 CutOutRange(
2599 masm, ranges, start_index, end_index, cut, even_label, odd_label);
2600 ASSERT(end_index - start_index >= 2);
2601 GenerateBranches(masm,
2602 ranges,
2603 start_index + 1,
2604 end_index - 1,
2605 min_char,
2606 max_char,
2607 fall_through,
2608 even_label,
2609 odd_label);
2610 return;
2611 }
2612
2613 // If there are a lot of intervals in the regexp, then we will use tables to
2614 // determine whether the character is inside or outside the character class.
2615 static const intptr_t kBits = RegExpMacroAssembler::kTableSizeBits;
2616
2617 if ((max_char >> kBits) == (min_char >> kBits)) {
2618 EmitUseLookupTable(masm,
2619 ranges,
2620 start_index,
2621 end_index,
2622 min_char,
2623 fall_through,
2624 even_label,
2625 odd_label);
2626 return;
2627 }
2628
2629 if ((min_char >> kBits) != (first >> kBits)) {
2630 masm->CheckCharacterLT(first, odd_label);
2631 GenerateBranches(masm,
2632 ranges,
2633 start_index + 1,
2634 end_index,
2635 first,
2636 max_char,
2637 fall_through,
2638 odd_label,
2639 even_label);
2640 return;
2641 }
2642
2643 intptr_t new_start_index = 0;
2644 intptr_t new_end_index = 0;
2645 intptr_t border = 0;
2646
2647 SplitSearchSpace(ranges,
2648 start_index,
2649 end_index,
2650 &new_start_index,
2651 &new_end_index,
2652 &border);
2653
2654 BlockLabel handle_rest;
2655 BlockLabel* above = &handle_rest;
2656 if (border == last + 1) {
2657 // We didn't find any section that started after the limit, so everything
2658 // above the border is one of the terminal labels.
2659 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
2660 ASSERT(new_end_index == end_index - 1);
2661 }
2662
2663 ASSERT(start_index <= new_end_index);
2664 ASSERT(new_start_index <= end_index);
2665 ASSERT(start_index < new_start_index);
2666 ASSERT(new_end_index < end_index);
2667 ASSERT(new_end_index + 1 == new_start_index ||
2668 (new_end_index + 2 == new_start_index &&
2669 border == ranges->At(new_end_index + 1)));
2670 ASSERT(min_char < border - 1);
2671 ASSERT(border < max_char);
2672 ASSERT(ranges->At(new_end_index) < border);
2673 ASSERT(border < ranges->At(new_start_index) ||
2674 (border == ranges->At(new_start_index) &&
2675 new_start_index == end_index &&
2676 new_end_index == end_index - 1 &&
2677 border == last + 1));
2678 ASSERT(new_start_index == 0 || border >= ranges->At(new_start_index - 1));
2679
2680 masm->CheckCharacterGT(border - 1, above);
2681 BlockLabel dummy;
2682 GenerateBranches(masm,
2683 ranges,
2684 start_index,
2685 new_end_index,
2686 min_char,
2687 border - 1,
2688 &dummy,
2689 even_label,
2690 odd_label);
2691
2692 if (handle_rest.IsLinked()) {
2693 masm->BindBlock(&handle_rest);
2694 bool flip = (new_start_index & 1) != (start_index & 1);
2695 GenerateBranches(masm,
2696 ranges,
2697 new_start_index,
2698 end_index,
2699 border,
2700 max_char,
2701 &dummy,
2702 flip ? odd_label : even_label,
2703 flip ? even_label : odd_label);
2704 }
2705 }
2706
2707
2708 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
2709 RegExpCharacterClass* cc,
2710 bool ascii,
2711 BlockLabel* on_failure,
2712 intptr_t cp_offset,
2713 bool check_offset,
2714 bool preloaded,
2715 Isolate* isolate) {
2716 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();
2717 if (!CharacterRange::IsCanonical(ranges)) {
2718 CharacterRange::Canonicalize(ranges);
2719 }
2720
2721 intptr_t max_char;
2722 if (ascii) {
2723 max_char = Symbols::kMaxOneCharCodeSymbol;
2724 } else {
2725 max_char = Utf16::kMaxCodeUnit;
2726 }
2727
2728 intptr_t range_count = ranges->length();
2729
2730 intptr_t last_valid_range = range_count - 1;
2731 while (last_valid_range >= 0) {
2732 CharacterRange& range = (*ranges)[last_valid_range];
2733 if (range.from() <= max_char) {
2734 break;
2735 }
2736 last_valid_range--;
2737 }
2738
2739 if (last_valid_range < 0) {
2740 if (!cc->is_negated()) {
2741 macro_assembler->GoTo(on_failure);
2742 }
2743 if (check_offset) {
2744 macro_assembler->CheckPosition(cp_offset, on_failure);
2745 }
2746 return;
2747 }
2748
2749 if (last_valid_range == 0 &&
2750 ranges->At(0).IsEverything(max_char)) {
2751 if (cc->is_negated()) {
2752 macro_assembler->GoTo(on_failure);
2753 } else {
2754 // This is a common case hit by non-anchored expressions.
2755 if (check_offset) {
2756 macro_assembler->CheckPosition(cp_offset, on_failure);
2757 }
2758 }
2759 return;
2760 }
2761 if (last_valid_range == 0 &&
2762 !cc->is_negated() &&
2763 ranges->At(0).IsEverything(max_char)) {
2764 // This is a common case hit by non-anchored expressions.
2765 if (check_offset) {
2766 macro_assembler->CheckPosition(cp_offset, on_failure);
2767 }
2768 return;
2769 }
2770
2771 if (!preloaded) {
2772 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
2773 }
2774
2775 if (cc->is_standard() &&
2776 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
2777 on_failure)) {
2778 return;
2779 }
2780
2781
2782 // A new list with ascending entries. Each entry is a code unit
2783 // where there is a boundary between code units that are part of
2784 // the class and code units that are not. Normally we insert an
2785 // entry at zero which goes to the failure label, but if there
2786 // was already one there we fall through for success on that entry.
2787 // Subsequent entries have alternating meaning (success/failure).
2788 ZoneGrowableArray<int>* range_boundaries =
2789 new(isolate) ZoneGrowableArray<int>(last_valid_range);
2790
2791 bool zeroth_entry_is_failure = !cc->is_negated();
2792
2793 for (intptr_t i = 0; i <= last_valid_range; i++) {
2794 CharacterRange& range = (*ranges)[i];
2795 if (range.from() == 0) {
2796 ASSERT(i == 0);
2797 zeroth_entry_is_failure = !zeroth_entry_is_failure;
2798 } else {
2799 range_boundaries->Add(range.from());
2800 }
2801 range_boundaries->Add(range.to() + 1);
2802 }
2803 intptr_t end_index = range_boundaries->length() - 1;
2804 if (range_boundaries->At(end_index) > max_char) {
2805 end_index--;
2806 }
2807
2808 BlockLabel fall_through;
2809 GenerateBranches(macro_assembler,
2810 range_boundaries,
2811 0, // start_index.
2812 end_index,
2813 0, // min_char.
2814 max_char,
2815 &fall_through,
2816 zeroth_entry_is_failure ? &fall_through : on_failure,
2817 zeroth_entry_is_failure ? on_failure : &fall_through);
2818 macro_assembler->BindBlock(&fall_through);
2819 }
2820
2821
753 // We call this repeatedly to generate code for each pass over the text node. 2822 // We call this repeatedly to generate code for each pass over the text node.
754 // The passes are in increasing order of difficulty because we hope one 2823 // The passes are in increasing order of difficulty because we hope one
755 // of the first passes will fail in which case we are saved the work of the 2824 // of the first passes will fail in which case we are saved the work of the
756 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ 2825 // later passes. for example for the case independent regexp /%[asdfghjkl]a/
757 // we will check the '%' in the first pass, the case independent 'a' in the 2826 // we will check the '%' in the first pass, the case independent 'a' in the
758 // second pass and the character class in the last pass. 2827 // second pass and the character class in the last pass.
759 // 2828 //
760 // The passes are done from right to left, so for example to test for /bar/ 2829 // The passes are done from right to left, so for example to test for /bar/
761 // we will first test for an 'r' with offset 2, then an 'a' with offset 1 2830 // we will first test for an 'r' with offset 2, then an 'a' with offset 1
762 // and then a 'b' with offset 0. This means we can avoid the end-of-input 2831 // and then a 'b' with offset 0. This means we can avoid the end-of-input
(...skipping 15 matching lines...) Expand all
778 // loading characters, which means we do not need to recheck the bounds 2847 // loading characters, which means we do not need to recheck the bounds
779 // up to the limit the quick check already checked. In addition the quick 2848 // up to the limit the quick check already checked. In addition the quick
780 // check can have involved a mask and compare operation which may simplify 2849 // check can have involved a mask and compare operation which may simplify
781 // or obviate the need for further checks at some character positions. 2850 // or obviate the need for further checks at some character positions.
782 void TextNode::TextEmitPass(RegExpCompiler* compiler, 2851 void TextNode::TextEmitPass(RegExpCompiler* compiler,
783 TextEmitPassType pass, 2852 TextEmitPassType pass,
784 bool preloaded, 2853 bool preloaded,
785 Trace* trace, 2854 Trace* trace,
786 bool first_element_checked, 2855 bool first_element_checked,
787 intptr_t* checked_up_to) { 2856 intptr_t* checked_up_to) {
788 UNIMPLEMENTED(); 2857 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2858 bool ascii = compiler->ascii();
2859 BlockLabel* backtrack = trace->backtrack();
2860 QuickCheckDetails* quick_check = trace->quick_check_performed();
2861 intptr_t element_count = elms_->length();
2862 for (intptr_t i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
2863 TextElement elm = elms_->At(i);
2864 intptr_t cp_offset = trace->cp_offset() + elm.cp_offset();
2865 if (elm.text_type() == TextElement::ATOM) {
2866 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();
2867 for (intptr_t j = preloaded ? 0 : quarks->length() - 1; j >= 0; j--) {
2868 if (first_element_checked && i == 0 && j == 0) continue;
2869 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
2870 EmitCharacterFunction* emit_function = NULL;
2871 switch (pass) {
2872 case NON_ASCII_MATCH:
2873 ASSERT(ascii);
2874 if (quarks->At(j) > Symbols::kMaxOneCharCodeSymbol) {
2875 assembler->GoTo(backtrack);
2876 return;
2877 }
2878 break;
2879 case NON_LETTER_CHARACTER_MATCH:
2880 emit_function = &EmitAtomNonLetter;
2881 break;
2882 case SIMPLE_CHARACTER_MATCH:
2883 emit_function = &EmitSimpleCharacter;
2884 break;
2885 case CASE_CHARACTER_MATCH:
2886 emit_function = &EmitAtomLetter;
2887 break;
2888 default:
2889 break;
2890 }
2891 if (emit_function != NULL) {
2892 bool bound_checked = emit_function(I,
2893 compiler,
2894 quarks->At(j),
2895 backtrack,
2896 cp_offset + j,
2897 *checked_up_to < cp_offset + j,
2898 preloaded);
2899 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
2900 }
2901 }
2902 } else {
2903 ASSERT(elm.text_type() == TextElement::CHAR_CLASS);
2904 if (pass == CHARACTER_CLASS_MATCH) {
2905 if (first_element_checked && i == 0) continue;
2906 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
2907 RegExpCharacterClass* cc = elm.char_class();
2908 EmitCharClass(assembler,
2909 cc,
2910 ascii,
2911 backtrack,
2912 cp_offset,
2913 *checked_up_to < cp_offset,
2914 preloaded,
2915 I);
2916 UpdateBoundsCheck(cp_offset, checked_up_to);
2917 }
2918 }
2919 }
789 } 2920 }
790 2921
791 2922
792 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, 2923 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
793 intptr_t eats_at_least) { 2924 intptr_t eats_at_least) {
794 UNIMPLEMENTED(); 2925 intptr_t preload_characters = Utils::Minimum(static_cast<intptr_t>(4),
795 return NULL; 2926 eats_at_least);
2927 if (compiler->macro_assembler()->CanReadUnaligned()) {
2928 bool ascii = compiler->ascii();
2929 if (ascii) {
2930 if (preload_characters > 4) preload_characters = 4;
2931 // We can't preload 3 characters because there is no machine instruction
2932 // to do that. We can't just load 4 because we could be reading
2933 // beyond the end of the string, which could cause a memory fault.
2934 if (preload_characters == 3) preload_characters = 2;
2935 } else {
2936 if (preload_characters > 2) preload_characters = 2;
2937 }
2938 } else {
2939 if (preload_characters > 1) preload_characters = 1;
2940 }
2941 return preload_characters;
796 } 2942 }
797 2943
798 2944
799 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
800 Trace* trace,
801 GuardedAlternative alternative,
802 AlternativeGeneration* alt_gen,
803 intptr_t preload_characters,
804 bool next_expects_preload) {
805 UNIMPLEMENTED();
806 }
807
808
809 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find, 2945 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find,
810 intptr_t budget, 2946 intptr_t budget,
811 bool not_at_start) { 2947 bool not_at_start) {
812 intptr_t answer = Length(); 2948 intptr_t answer = Length();
813 if (answer >= still_to_find) return answer; 2949 if (answer >= still_to_find) return answer;
814 if (budget <= 0) return answer; 2950 if (budget <= 0) return answer;
815 // We are not at start after this node so we set the last argument to 'true'. 2951 // We are not at start after this node so we set the last argument to 'true'.
816 return answer + on_success()->EatsAtLeast(still_to_find - answer, 2952 return answer + on_success()->EatsAtLeast(still_to_find - answer,
817 budget - 1, 2953 budget - 1,
818 true); 2954 true);
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
890 intptr_t budget, 3026 intptr_t budget,
891 bool not_at_start) { 3027 bool not_at_start) {
892 if (budget <= 0) return 0; 3028 if (budget <= 0) return 0;
893 // Alternative 0 is the negative lookahead, alternative 1 is what comes 3029 // Alternative 0 is the negative lookahead, alternative 1 is what comes
894 // afterwards. 3030 // afterwards.
895 RegExpNode* node = alternatives_->At(1).node(); 3031 RegExpNode* node = alternatives_->At(1).node();
896 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start); 3032 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);
897 } 3033 }
898 3034
899 3035
3036 // Takes the left-most 1-bit and smears it out, setting all bits to its right.
3037 static inline uint32_t SmearBitsRight(uint32_t v) {
3038 v |= v >> 1;
3039 v |= v >> 2;
3040 v |= v >> 4;
3041 v |= v >> 8;
3042 v |= v >> 16;
3043 return v;
3044 }
3045
3046
900 // Here is the meat of GetQuickCheckDetails (see also the comment on the 3047 // Here is the meat of GetQuickCheckDetails (see also the comment on the
901 // super-class in the .h file). 3048 // super-class in the .h file).
902 // 3049 //
903 // We iterate along the text object, building up for each character a 3050 // We iterate along the text object, building up for each character a
904 // mask and value that can be used to test for a quick failure to match. 3051 // mask and value that can be used to test for a quick failure to match.
905 // The masks and values for the positions will be combined into a single 3052 // The masks and values for the positions will be combined into a single
906 // machine word for the current character width in order to be used in 3053 // machine word for the current character width in order to be used in
907 // generating a quick check. 3054 // generating a quick check.
908 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, 3055 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
909 RegExpCompiler* compiler, 3056 RegExpCompiler* compiler,
910 intptr_t characters_filled_in, 3057 intptr_t characters_filled_in,
911 bool not_at_start) { 3058 bool not_at_start) {
912 UNIMPLEMENTED(); 3059 ASSERT(characters_filled_in < details->characters());
3060 intptr_t characters = details->characters();
3061 intptr_t char_mask;
3062 if (compiler->ascii()) {
3063 char_mask = Symbols::kMaxOneCharCodeSymbol;
3064 } else {
3065 char_mask = Utf16::kMaxCodeUnit;
3066 }
3067 for (intptr_t k = 0; k < elms_->length(); k++) {
3068 TextElement elm = elms_->At(k);
3069 if (elm.text_type() == TextElement::ATOM) {
3070 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();
3071 for (intptr_t i = 0; i < characters && i < quarks->length(); i++) {
3072 QuickCheckDetails::Position* pos =
3073 details->positions(characters_filled_in);
3074 uint16_t c = quarks->At(i);
3075 if (c > char_mask) {
3076 // If we expect a non-ASCII character from an ASCII string,
3077 // there is no way we can match. Not even case independent
3078 // matching can turn an ASCII character into non-ASCII or
3079 // vice versa.
3080 details->set_cannot_match();
3081 pos->determines_perfectly = false;
3082 return;
3083 }
3084 if (compiler->ignore_case()) {
3085 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
3086 intptr_t length =
3087 GetCaseIndependentLetters(c, compiler->ascii(), chars);
3088 ASSERT(length != 0); // Can only happen if c > char_mask (see above).
3089 if (length == 1) {
3090 // This letter has no case equivalents, so it's nice and simple
3091 // and the mask-compare will determine definitely whether we have
3092 // a match at this character position.
3093 pos->mask = char_mask;
3094 pos->value = c;
3095 pos->determines_perfectly = true;
3096 } else {
3097 uint32_t common_bits = char_mask;
3098 uint32_t bits = chars[0];
3099 for (intptr_t j = 1; j < length; j++) {
3100 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
3101 common_bits ^= differing_bits;
3102 bits &= common_bits;
3103 }
3104 // If length is 2 and common bits has only one zero in it then
3105 // our mask and compare instruction will determine definitely
3106 // whether we have a match at this character position. Otherwise
3107 // it can only be an approximate check.
3108 uint32_t one_zero = (common_bits | ~char_mask);
3109 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
3110 pos->determines_perfectly = true;
3111 }
3112 pos->mask = common_bits;
3113 pos->value = bits;
3114 }
3115 } else {
3116 // Don't ignore case. Nice simple case where the mask-compare will
3117 // determine definitely whether we have a match at this character
3118 // position.
3119 pos->mask = char_mask;
3120 pos->value = c;
3121 pos->determines_perfectly = true;
3122 }
3123 characters_filled_in++;
3124 ASSERT(characters_filled_in <= details->characters());
3125 if (characters_filled_in == details->characters()) {
3126 return;
3127 }
3128 }
3129 } else {
3130 QuickCheckDetails::Position* pos =
3131 details->positions(characters_filled_in);
3132 RegExpCharacterClass* tree = elm.char_class();
3133 ZoneGrowableArray<CharacterRange>* ranges = tree->ranges();
3134 if (tree->is_negated()) {
3135 // A quick check uses multi-character mask and compare. There is no
3136 // useful way to incorporate a negative char class into this scheme
3137 // so we just conservatively create a mask and value that will always
3138 // succeed.
3139 pos->mask = 0;
3140 pos->value = 0;
3141 } else {
3142 intptr_t first_range = 0;
3143 while (ranges->At(first_range).from() > char_mask) {
3144 first_range++;
3145 if (first_range == ranges->length()) {
3146 details->set_cannot_match();
3147 pos->determines_perfectly = false;
3148 return;
3149 }
3150 }
3151 CharacterRange range = ranges->At(first_range);
3152 uint16_t from = range.from();
3153 uint16_t to = range.to();
3154 if (to > char_mask) {
3155 to = char_mask;
3156 }
3157 uint32_t differing_bits = (from ^ to);
3158 // A mask and compare is only perfect if the differing bits form a
3159 // number like 00011111 with one single block of trailing 1s.
3160 if ((differing_bits & (differing_bits + 1)) == 0 &&
3161 from + differing_bits == to) {
3162 pos->determines_perfectly = true;
3163 }
3164 uint32_t common_bits = ~SmearBitsRight(differing_bits);
3165 uint32_t bits = (from & common_bits);
3166 for (intptr_t i = first_range + 1; i < ranges->length(); i++) {
3167 CharacterRange range = ranges->At(i);
3168 uint16_t from = range.from();
3169 uint16_t to = range.to();
3170 if (from > char_mask) continue;
3171 if (to > char_mask) to = char_mask;
3172 // Here we are combining more ranges into the mask and compare
3173 // value. With each new range the mask becomes more sparse and
3174 // so the chances of a false positive rise. A character class
3175 // with multiple ranges is assumed never to be equivalent to a
3176 // mask and compare operation.
3177 pos->determines_perfectly = false;
3178 uint32_t new_common_bits = (from ^ to);
3179 new_common_bits = ~SmearBitsRight(new_common_bits);
3180 common_bits &= new_common_bits;
3181 bits &= new_common_bits;
3182 uint32_t differing_bits = (from & common_bits) ^ bits;
3183 common_bits ^= differing_bits;
3184 bits &= common_bits;
3185 }
3186 pos->mask = common_bits;
3187 pos->value = bits;
3188 }
3189 characters_filled_in++;
3190 ASSERT(characters_filled_in <= details->characters());
3191 if (characters_filled_in == details->characters()) {
3192 return;
3193 }
3194 }
3195 }
3196 ASSERT(characters_filled_in != details->characters());
3197 if (!details->cannot_match()) {
3198 on_success()-> GetQuickCheckDetails(details,
3199 compiler,
3200 characters_filled_in,
3201 true);
3202 }
913 } 3203 }
914 3204
915 3205
916 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 3206 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
917 RegExpCompiler* compiler, 3207 RegExpCompiler* compiler,
918 intptr_t characters_filled_in, 3208 intptr_t characters_filled_in,
919 bool not_at_start) { 3209 bool not_at_start) {
920 UNIMPLEMENTED(); 3210 if (body_can_be_zero_length_ || info()->visited) return;
3211 VisitMarker marker(info());
3212 return ChoiceNode::GetQuickCheckDetails(details,
3213 compiler,
3214 characters_filled_in,
3215 not_at_start);
921 } 3216 }
922 3217
923 3218
924 intptr_t TextNode::Length() { 3219 intptr_t TextNode::Length() {
925 TextElement elm = elms_->Last(); 3220 TextElement elm = elms_->Last();
926 ASSERT(elm.cp_offset() >= 0); 3221 ASSERT(elm.cp_offset() >= 0);
927 return elm.cp_offset() + elm.length(); 3222 return elm.cp_offset() + elm.length();
928 } 3223 }
929 3224
930 3225
(...skipping 518 matching lines...) Expand 10 before | Expand all | Expand 10 after
1449 Isolate* isolate) { 3744 Isolate* isolate) {
1450 uint16_t bottom = from(); 3745 uint16_t bottom = from();
1451 uint16_t top = to(); 3746 uint16_t top = to();
1452 if (is_ascii && !RangeContainsLatin1Equivalents(*this)) { 3747 if (is_ascii && !RangeContainsLatin1Equivalents(*this)) {
1453 if (bottom > Symbols::kMaxOneCharCodeSymbol) return; 3748 if (bottom > Symbols::kMaxOneCharCodeSymbol) return;
1454 if (top > Symbols::kMaxOneCharCodeSymbol) { 3749 if (top > Symbols::kMaxOneCharCodeSymbol) {
1455 top = Symbols::kMaxOneCharCodeSymbol; 3750 top = Symbols::kMaxOneCharCodeSymbol;
1456 } 3751 }
1457 } 3752 }
1458 3753
1459 UNIMPLEMENTED(); 3754 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;
3755 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange;
3756 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
3757 if (top == bottom) {
3758 // If this is a singleton we just expand the one character.
3759 intptr_t length = jsregexp_uncanonicalize.get(bottom, '\0', chars); // NOLIN T
3760 for (intptr_t i = 0; i < length; i++) {
3761 uint32_t chr = chars[i];
3762 if (chr != bottom) {
3763 ranges->Add(CharacterRange::Singleton(chars[i]));
3764 }
3765 }
3766 } else {
3767 // If this is a range we expand the characters block by block,
3768 // expanding contiguous subranges (blocks) one at a time.
3769 // The approach is as follows. For a given start character we
3770 // look up the remainder of the block that contains it (represented
3771 // by the end point), for instance we find 'z' if the character
3772 // is 'c'. A block is characterized by the property
3773 // that all characters uncanonicalize in the same way, except that
3774 // each entry in the result is incremented by the distance from the first
3775 // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and // NOLINT
3776 // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].
3777 // Once we've found the end point we look up its uncanonicalization
3778 // and produce a range for each element. For instance for [c-f]
3779 // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
3780 // add a range if it is not already contained in the input, so [c-f]
3781 // will be skipped but [C-F] will be added. If this range is not
3782 // completely contained in a block we do this for all the blocks
3783 // covered by the range (handling characters that is not in a block
3784 // as a "singleton block").
3785 int32_t range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
3786 intptr_t pos = bottom;
3787 while (pos <= top) {
3788 intptr_t length = jsregexp_canonrange.get(pos, '\0', range);
3789 uint16_t block_end;
3790 if (length == 0) {
3791 block_end = pos;
3792 } else {
3793 ASSERT(length == 1);
3794 block_end = range[0];
3795 }
3796 intptr_t end = (block_end > top) ? top : block_end;
3797 length = jsregexp_uncanonicalize.get(block_end, '\0', range); // NOLINT
3798 for (intptr_t i = 0; i < length; i++) {
3799 uint32_t c = range[i];
3800 uint16_t range_from = c - (block_end - pos);
3801 uint16_t range_to = c - (block_end - end);
3802 if (!(bottom <= range_from && range_to <= top)) {
3803 ranges->Add(CharacterRange(range_from, range_to));
3804 }
3805 }
3806 pos = end + 1;
3807 }
3808 }
3809 }
1460 3810
1461 // unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 3811
1462 // if (top == bottom) { 3812 // -------------------------------------------------------------------
1463 // // If this is a singleton we just expand the one character. 3813 // Splay tree
1464 // intptr_t length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', ch ars); // NOLINT 3814
1465 // for (intptr_t i = 0; i < length; i++) { 3815
1466 // uint32_t chr = chars[i]; 3816 // Workaround for the fact that ZoneGrowableArray does not have contains().
1467 // if (chr != bottom) { 3817 static bool ArrayContains(ZoneGrowableArray<unsigned>* array,
1468 // ranges->Add(CharacterRange::Singleton(chars[i]), zone); 3818 unsigned value) {
1469 // } 3819 for (intptr_t i = 0; i < array->length(); i++) {
1470 // } 3820 if (array->At(i) == value) {
1471 // } else { 3821 return true;
1472 // // If this is a range we expand the characters block by block, 3822 }
1473 // // expanding contiguous subranges (blocks) one at a time. 3823 }
1474 // // The approach is as follows. For a given start character we 3824 return false;
1475 // // look up the remainder of the block that contains it (represented 3825 }
1476 // // by the end point), for instance we find 'z' if the character 3826
1477 // // is 'c'. A block is characterized by the property 3827
1478 // // that all characters uncanonicalize in the same way, except that 3828 void OutSet::Set(unsigned value, Isolate* isolate) {
1479 // // each entry in the result is incremented by the distance from the first 3829 if (value < kFirstLimit) {
1480 // // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] a nd // NOLINT 3830 first_ |= (1 << value);
1481 // // the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. 3831 } else {
1482 // // Once we've found the end point we look up its uncanonicalization 3832 if (remaining_ == NULL)
1483 // // and produce a range for each element. For instance for [c-f] 3833 remaining_ = new(isolate) ZoneGrowableArray<unsigned>(1);
1484 // // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only 3834
1485 // // add a range if it is not already contained in the input, so [c-f] 3835 bool remaining_contains_value = ArrayContains(remaining_, value);
1486 // // will be skipped but [C-F] will be added. If this range is not 3836 if (remaining_->is_empty() || !remaining_contains_value) {
1487 // // completely contained in a block we do this for all the blocks 3837 remaining_->Add(value);
1488 // // covered by the range (handling characters that is not in a block 3838 }
1489 // // as a "singleton block"). 3839 }
1490 // unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 3840 }
1491 // intptr_t pos = bottom; 3841
1492 // while (pos <= top) { 3842
1493 // intptr_t length = isolate->jsregexp_canonrange()->get(pos, '\0', range); 3843 bool OutSet::Get(unsigned value) const {
1494 // uint16_t block_end; 3844 if (value < kFirstLimit) {
1495 // if (length == 0) { 3845 return (first_ & (1 << value)) != 0;
1496 // block_end = pos; 3846 } else if (remaining_ == NULL) {
1497 // } else { 3847 return false;
1498 // ASSERT(length == 1); 3848 } else {
1499 // block_end = range[0]; 3849 return ArrayContains(remaining_, value);
1500 // } 3850 }
1501 // intptr_t end = (block_end > top) ? top : block_end;
1502 // length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range) ; // NOLINT
1503 // for (intptr_t i = 0; i < length; i++) {
1504 // uint32_t c = range[i];
1505 // uint16_t range_from = c - (block_end - pos);
1506 // uint16_t range_to = c - (block_end - end);
1507 // if (!(bottom <= range_from && range_to <= top)) {
1508 // ranges->Add(CharacterRange(range_from, range_to));
1509 // }
1510 // }
1511 // pos = end + 1;
1512 // }
1513 // }
1514 } 3851 }
1515 3852
1516 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, 3853 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
1517 RegExpCompiler* compiler, 3854 RegExpCompiler* compiler,
1518 intptr_t filled_in, 3855 intptr_t filled_in,
1519 bool not_at_start) { 3856 bool not_at_start) {
1520 if (assertion_type_ == AT_START && not_at_start) { 3857 if (assertion_type_ == AT_START && not_at_start) {
1521 details->set_cannot_match(); 3858 details->set_cannot_match();
1522 return; 3859 return;
1523 } 3860 }
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
1557 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find, 3894 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find,
1558 intptr_t budget, 3895 intptr_t budget,
1559 bool not_at_start) { 3896 bool not_at_start) {
1560 return EatsAtLeastHelper(still_to_find, 3897 return EatsAtLeastHelper(still_to_find,
1561 budget - 1, 3898 budget - 1,
1562 loop_node_, 3899 loop_node_,
1563 not_at_start); 3900 not_at_start);
1564 } 3901 }
1565 3902
1566 3903
1567 class RecursionCheck {
1568 public:
1569 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
1570 compiler->IncrementRecursionDepth();
1571 }
1572 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
1573 private:
1574 RegExpCompiler* compiler_;
1575 };
1576
1577
1578 DispatchTable* ChoiceNode::GetTable(bool ignore_case) {
1579 UNIMPLEMENTED();
1580 return NULL;
1581 }
1582
1583
1584 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 3904 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
1585 RegExpCompiler* compiler, 3905 RegExpCompiler* compiler,
1586 intptr_t characters_filled_in, 3906 intptr_t characters_filled_in,
1587 bool not_at_start) { 3907 bool not_at_start) {
1588 not_at_start = (not_at_start || not_at_start_); 3908 not_at_start = (not_at_start || not_at_start_);
1589 intptr_t choice_count = alternatives_->length(); 3909 intptr_t choice_count = alternatives_->length();
1590 ASSERT(choice_count > 0); 3910 ASSERT(choice_count > 0);
1591 alternatives_->At(0).node()->GetQuickCheckDetails(details, 3911 alternatives_->At(0).node()->GetQuickCheckDetails(details,
1592 compiler, 3912 compiler,
1593 characters_filled_in, 3913 characters_filled_in,
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
1696 RegExpCompiler* compiler, 4016 RegExpCompiler* compiler,
1697 intptr_t filled_in, 4017 intptr_t filled_in,
1698 bool not_at_start) { 4018 bool not_at_start) {
1699 // Alternative 0 is the negative lookahead, alternative 1 is what comes 4019 // Alternative 0 is the negative lookahead, alternative 1 is what comes
1700 // afterwards. 4020 // afterwards.
1701 RegExpNode* node = alternatives_->At(1).node(); 4021 RegExpNode* node = alternatives_->At(1).node();
1702 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start); 4022 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
1703 } 4023 }
1704 4024
1705 4025
4026 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
4027 return RegExpEngine::CompilationResult("RegExp too big");
4028 }
4029
4030
1706 // Attempts to compile the regexp using an Irregexp code generator. Returns 4031 // Attempts to compile the regexp using an Irregexp code generator. Returns
1707 // a fixed array or a null handle depending on whether it succeeded. 4032 // a fixed array or a null handle depending on whether it succeeded.
1708 RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool ignore_case, 4033 RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool ignore_case,
1709 bool ascii) 4034 intptr_t specialization_cid)
1710 : next_register_(2 * (capture_count + 1)), 4035 : next_register_(2 * (capture_count + 1)),
4036 work_list_(NULL),
1711 recursion_depth_(0), 4037 recursion_depth_(0),
1712 ignore_case_(ignore_case), 4038 ignore_case_(ignore_case),
1713 ascii_(ascii), 4039 specialization_cid_(specialization_cid),
1714 reg_exp_too_big_(false), 4040 reg_exp_too_big_(false),
1715 current_expansion_factor_(1), 4041 current_expansion_factor_(1),
1716 isolate_(Isolate::Current()) { 4042 isolate_(Isolate::Current()) {
1717 accept_ = new(I) EndNode(EndNode::ACCEPT, I); 4043 accept_ = new(I) EndNode(EndNode::ACCEPT, I);
1718 } 4044 }
1719 4045
1720 4046
1721 RegExpEngine::CompilationResult RegExpCompiler::Assemble( 4047 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
1722 RegExpMacroAssembler* macro_assembler, 4048 IRRegExpMacroAssembler* macro_assembler,
1723 RegExpNode* start, 4049 RegExpNode* start,
1724 intptr_t capture_count, 4050 intptr_t capture_count,
1725 const String& pattern) { 4051 const String& pattern) {
1726 UNIMPLEMENTED(); 4052 static const bool use_slow_safe_regexp_compiler = false;
1727 return RegExpEngine::CompilationResult(""); 4053
4054 macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);
4055 macro_assembler_ = macro_assembler;
4056
4057 ZoneGrowableArray<RegExpNode*> work_list(0);
4058 work_list_ = &work_list;
4059 BlockLabel fail;
4060 macro_assembler_->PushBacktrack(&fail);
4061 Trace new_trace;
4062 start->Emit(this, &new_trace);
4063 macro_assembler_->BindBlock(&fail);
4064 macro_assembler_->Fail();
4065 while (!work_list.is_empty()) {
4066 work_list.RemoveLast()->Emit(this, &new_trace);
4067 }
4068 if (reg_exp_too_big_) return IrregexpRegExpTooBig();
4069
4070 macro_assembler->FinalizeIndirectGotos();
4071
4072 return RegExpEngine::CompilationResult(macro_assembler,
4073 macro_assembler->graph_entry(),
4074 macro_assembler->num_blocks(),
4075 macro_assembler->num_stack_locals());
1728 } 4076 }
1729 4077
1730 4078
1731 RegExpEngine::CompilationResult RegExpEngine::Compile( 4079 RegExpEngine::CompilationResult RegExpEngine::Compile(
1732 RegExpCompileData* data, 4080 RegExpCompileData* data,
1733 bool ignore_case, 4081 const ParsedFunction* parsed_function,
1734 bool is_global, 4082 ZoneGrowableArray<const ICData*>* ic_data_array) {
1735 bool is_multiline,
1736 const String& pattern,
1737 const String& sample_subject,
1738 bool is_ascii) {
1739 Isolate* isolate = Isolate::Current(); 4083 Isolate* isolate = Isolate::Current();
1740 4084
1741 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); 4085 const Function& function = parsed_function->function();
4086 const intptr_t specialization_cid = function.regexp_specialization();
4087 const bool is_ascii = (specialization_cid == kOneByteStringCid ||
4088 specialization_cid == kExternalOneByteStringCid);
4089 JSRegExp& regexp = JSRegExp::Handle(isolate, function.regexp());
4090 const String& pattern = String::Handle(isolate, regexp.pattern());
4091 const String& sample_subject =
4092 String::Handle(isolate, regexp.sample_subject(specialization_cid));
1742 4093
1743 // TODO(jgruber): Character frequency sampling. 4094 ASSERT(!regexp.IsNull());
4095 ASSERT(!pattern.IsNull());
4096 ASSERT(!sample_subject.IsNull());
4097
4098 const bool ignore_case = regexp.is_ignore_case();
4099 const bool is_global = regexp.is_global();
4100
4101 RegExpCompiler compiler(data->capture_count, ignore_case, specialization_cid);
4102
4103 // Sample some characters from the middle of the string.
4104 static const intptr_t kSampleSize = 128;
4105
4106 intptr_t chars_sampled = 0;
4107 intptr_t half_way = (sample_subject.Length() - kSampleSize) / 2;
4108 for (intptr_t i = Utils::Maximum(static_cast<intptr_t>(0), half_way);
4109 i < sample_subject.Length() && chars_sampled < kSampleSize;
4110 i++, chars_sampled++) {
4111 compiler.frequency_collator()->CountCharacter(sample_subject.CharAt(i));
4112 }
1744 4113
1745 // Wrap the body of the regexp in capture #0. 4114 // Wrap the body of the regexp in capture #0.
1746 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, 4115 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
1747 0, 4116 0,
1748 &compiler, 4117 &compiler,
1749 compiler.accept()); 4118 compiler.accept());
1750 4119
1751 RegExpNode* node = captured_body; 4120 RegExpNode* node = captured_body;
1752 bool is_start_anchored = data->tree->IsAnchoredAtStart(); 4121 bool is_start_anchored = data->tree->IsAnchoredAtStart();
4122 bool is_end_anchored = data->tree->IsAnchoredAtEnd();
4123 intptr_t max_length = data->tree->max_match();
1753 if (!is_start_anchored) { 4124 if (!is_start_anchored) {
1754 // Add a .*? at the beginning, outside the body capture, unless 4125 // Add a .*? at the beginning, outside the body capture, unless
1755 // this expression is anchored at the beginning. 4126 // this expression is anchored at the beginning.
1756 RegExpNode* loop_node = 4127 RegExpNode* loop_node =
1757 RegExpQuantifier::ToNode(0, 4128 RegExpQuantifier::ToNode(0,
1758 RegExpTree::kInfinity, 4129 RegExpTree::kInfinity,
1759 false, 4130 false,
1760 new(isolate) RegExpCharacterClass('*'), 4131 new(isolate) RegExpCharacterClass('*'),
1761 &compiler, 4132 &compiler,
1762 captured_body, 4133 captured_body,
(...skipping 23 matching lines...) Expand all
1786 4157
1787 if (node == NULL) node = new(isolate) EndNode(EndNode::BACKTRACK, isolate); 4158 if (node == NULL) node = new(isolate) EndNode(EndNode::BACKTRACK, isolate);
1788 data->node = node; 4159 data->node = node;
1789 Analysis analysis(ignore_case, is_ascii); 4160 Analysis analysis(ignore_case, is_ascii);
1790 analysis.EnsureAnalyzed(node); 4161 analysis.EnsureAnalyzed(node);
1791 if (analysis.has_failed()) { 4162 if (analysis.has_failed()) {
1792 const char* error_message = analysis.error_message(); 4163 const char* error_message = analysis.error_message();
1793 return CompilationResult(error_message); 4164 return CompilationResult(error_message);
1794 } 4165 }
1795 4166
1796 // TODO(jgruber): Assemble native code. 4167 // Native regexp implementation.
1797 4168
1798 return RegExpEngine::CompilationResult(""); 4169 IRRegExpMacroAssembler* macro_assembler =
1799 } 4170 new(isolate) IRRegExpMacroAssembler(specialization_cid,
1800 4171 data->capture_count,
1801 4172 parsed_function,
4173 ic_data_array,
4174 isolate);
4175
4176 // Inserted here, instead of in Assembler, because it depends on information
4177 // in the AST that isn't replicated in the Node structure.
4178 static const intptr_t kMaxBacksearchLimit = 1024;
4179 if (is_end_anchored &&
4180 !is_start_anchored &&
4181 max_length < kMaxBacksearchLimit) {
4182 macro_assembler->SetCurrentPositionFromEnd(max_length);
4183 }
4184
4185 if (is_global) {
4186 macro_assembler->set_global_mode(
4187 (data->tree->min_match() > 0)
4188 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK
4189 : RegExpMacroAssembler::GLOBAL);
4190 }
4191
4192 RegExpEngine::CompilationResult result =
4193 compiler.Assemble(macro_assembler,
4194 node,
4195 data->capture_count,
4196 pattern);
4197
4198 if (FLAG_trace_irregexp) {
4199 macro_assembler->PrintBlocks();
4200 }
4201
4202 return result;
4203 }
4204
4205
4206 static void CreateSpecializedFunction(Isolate* isolate,
4207 JSRegExp* regexp,
4208 intptr_t specialization_cid,
4209 const Object& owner) {
4210 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount;
4211
4212 Function& fn = Function::Handle(isolate,
4213 Function::New(String::Handle(isolate, Symbols::New("RegExp")),
4214 RawFunction::kIrregexpFunction,
4215 true, // Static.
4216 false, // Not const.
4217 false, // Not abstract.
4218 false, // Not external.
4219 false, // Not native.
4220 owner,
4221 0)); // Requires a non-negative token position.
4222
4223 fn.set_num_fixed_parameters(kParamCount);
4224 fn.set_parameter_types(
4225 Array::Handle(isolate, Array::New(kParamCount, Heap::kOld)));
4226 fn.set_parameter_names(
4227 Array::Handle(isolate, Array::New(kParamCount, Heap::kOld)));
4228 fn.SetParameterTypeAt(0, Type::Handle(isolate, Type::DynamicType()));
4229 fn.SetParameterNameAt(0, Symbols::string_param_());
4230 fn.SetParameterTypeAt(1, Type::Handle(isolate, Type::DynamicType()));
4231 fn.SetParameterNameAt(1, Symbols::start_index_param_());
4232 fn.set_result_type(Type::Handle(isolate, Type::ArrayType()));
4233
4234 // Cache the result.
4235 regexp->set_function(specialization_cid, fn);
4236
4237 fn.set_regexp(*regexp);
4238 fn.set_regexp_specialization(specialization_cid);
4239
4240 // The function is compiled lazily during the first call.
4241 }
4242
4243
4244 RawJSRegExp* RegExpEngine::New(Isolate* isolate,
4245 const String& pattern,
4246 bool multi_line,
4247 bool ignore_case) {
4248 JSRegExp& regexp = JSRegExp::Handle(JSRegExp::New(0));
4249
4250 regexp.set_pattern(pattern);
4251
4252 if (multi_line) regexp.set_is_multi_line();
4253 if (ignore_case) regexp.set_is_ignore_case();
4254
4255 // TODO(jgruber): We might want to use normal string searching algorithms
4256 // for simple patterns.
4257 regexp.set_is_complex();
4258 regexp.set_is_global(); // All dart regexps are global.
4259
4260 const Library& lib = Library::Handle(isolate, Library::CoreLibrary());
4261 const Class& owner = Class::Handle(isolate,
4262 lib.LookupClass(String::Handle(isolate, Symbols::New("RegExp"))));
4263
4264 CreateSpecializedFunction(isolate, &regexp, kOneByteStringCid, owner);
4265 CreateSpecializedFunction(isolate, &regexp, kTwoByteStringCid, owner);
4266 CreateSpecializedFunction(isolate, &regexp, kExternalOneByteStringCid, owner);
4267 CreateSpecializedFunction(isolate, &regexp, kExternalTwoByteStringCid, owner);
4268
4269 return regexp.raw();
4270 }
4271
4272
4273 void BoyerMoorePositionInfo::Set(intptr_t character) {
4274 SetInterval(Interval(character, character));
4275 }
4276
4277
4278 ContainedInLattice AddRange(ContainedInLattice containment,
4279 const intptr_t* ranges,
4280 intptr_t ranges_length,
4281 Interval new_range) {
4282 ASSERT((ranges_length & 1) == 1);
4283 ASSERT(ranges[ranges_length - 1] == Utf16::kMaxCodeUnit + 1);
4284 if (containment == kLatticeUnknown) return containment;
4285 bool inside = false;
4286 intptr_t last = 0;
4287 for (intptr_t i = 0; i < ranges_length;
4288 inside = !inside, last = ranges[i], i++) {
4289 // Consider the range from last to ranges[i].
4290 // We haven't got to the new range yet.
4291 if (ranges[i] <= new_range.from()) continue;
4292 // New range is wholly inside last-ranges[i]. Note that new_range.to() is
4293 // inclusive, but the values in ranges are not.
4294 if (last <= new_range.from() && new_range.to() < ranges[i]) {
4295 return Combine(containment, inside ? kLatticeIn : kLatticeOut);
4296 }
4297 return kLatticeUnknown;
4298 }
4299 return containment;
4300 }
4301
4302
4303 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
4304 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
4305 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
4306 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
4307 surrogate_ =
4308 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
4309 if (interval.to() - interval.from() >= kMapSize - 1) {
4310 if (map_count_ != kMapSize) {
4311 map_count_ = kMapSize;
4312 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true;
4313 }
4314 return;
4315 }
4316 for (intptr_t i = interval.from(); i <= interval.to(); i++) {
4317 intptr_t mod_character = (i & kMask);
4318 if (!map_->At(mod_character)) {
4319 map_count_++;
4320 (*map_)[mod_character] = true;
4321 }
4322 if (map_count_ == kMapSize) return;
4323 }
4324 }
4325
4326
4327 void BoyerMoorePositionInfo::SetAll() {
4328 s_ = w_ = d_ = kLatticeUnknown;
4329 if (map_count_ != kMapSize) {
4330 map_count_ = kMapSize;
4331 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true;
4332 }
4333 }
4334
4335
4336 BoyerMooreLookahead::BoyerMooreLookahead(
4337 intptr_t length, RegExpCompiler* compiler, Isolate* isolate)
4338 : length_(length),
4339 compiler_(compiler) {
4340 if (compiler->ascii()) {
4341 max_char_ = Symbols::kMaxOneCharCodeSymbol;
4342 } else {
4343 max_char_ = Utf16::kMaxCodeUnit;
4344 }
4345 bitmaps_ = new(isolate) ZoneGrowableArray<BoyerMoorePositionInfo*>(length);
4346 for (intptr_t i = 0; i < length; i++) {
4347 bitmaps_->Add(new(isolate) BoyerMoorePositionInfo(isolate));
4348 }
4349 }
4350
4351
4352 // Take all the characters that will not prevent a successful match if they
4353 // occur in the subject string in the range between min_lookahead and
4354 // max_lookahead (inclusive) measured from the current position. If the
4355 // character at max_lookahead offset is not one of these characters, then we
4356 // can safely skip forwards by the number of characters in the range.
4357 intptr_t BoyerMooreLookahead::GetSkipTable(intptr_t min_lookahead,
4358 intptr_t max_lookahead,
4359 TypedData* boolean_skip_table) {
4360 const intptr_t kSize = RegExpMacroAssembler::kTableSize;
4361
4362 const intptr_t kSkipArrayEntry = 0;
4363 const intptr_t kDontSkipArrayEntry = 1;
4364
4365 for (intptr_t i = 0; i < kSize; i++) {
4366 boolean_skip_table->SetUint8(i, kSkipArrayEntry);
4367 }
4368 intptr_t skip = max_lookahead + 1 - min_lookahead;
4369
4370 for (intptr_t i = max_lookahead; i >= min_lookahead; i--) {
4371 BoyerMoorePositionInfo* map = bitmaps_->At(i);
4372 for (intptr_t j = 0; j < kSize; j++) {
4373 if (map->at(j)) {
4374 boolean_skip_table->SetUint8(j, kDontSkipArrayEntry);
4375 }
4376 }
4377 }
4378
4379 return skip;
4380 }
4381
4382
4383 // Find the longest range of lookahead that has the fewest number of different
4384 // characters that can occur at a given position. Since we are optimizing two
4385 // different parameters at once this is a tradeoff.
4386 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) {
4387 intptr_t biggest_points = 0;
4388 // If more than 32 characters out of 128 can occur it is unlikely that we can
4389 // be lucky enough to step forwards much of the time.
4390 const intptr_t kMaxMax = 32;
4391 for (intptr_t max_number_of_chars = 4;
4392 max_number_of_chars < kMaxMax;
4393 max_number_of_chars *= 2) {
4394 biggest_points =
4395 FindBestInterval(max_number_of_chars, biggest_points, from, to);
4396 }
4397 if (biggest_points == 0) return false;
4398 return true;
4399 }
4400
4401
4402 // Find the highest-points range between 0 and length_ where the character
4403 // information is not too vague. 'Too vague' means that there are more than
4404 // max_number_of_chars that can occur at this position. Calculates the number
4405 // of points as the product of width-of-the-range and
4406 // probability-of-finding-one-of-the-characters, where the probability is
4407 // calculated using the frequency distribution of the sample subject string.
4408 intptr_t BoyerMooreLookahead::FindBestInterval(
4409 intptr_t max_number_of_chars,
4410 intptr_t old_biggest_points,
4411 intptr_t* from,
4412 intptr_t* to) {
4413 intptr_t biggest_points = old_biggest_points;
4414 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;
4415 for (intptr_t i = 0; i < length_; ) {
4416 while (i < length_ && Count(i) > max_number_of_chars) i++;
4417 if (i == length_) break;
4418 intptr_t remembered_from = i;
4419 bool union_map[kSize];
4420 for (intptr_t j = 0; j < kSize; j++) union_map[j] = false;
4421 while (i < length_ && Count(i) <= max_number_of_chars) {
4422 BoyerMoorePositionInfo* map = bitmaps_->At(i);
4423 for (intptr_t j = 0; j < kSize; j++) union_map[j] |= map->at(j);
4424 i++;
4425 }
4426 intptr_t frequency = 0;
4427 for (intptr_t j = 0; j < kSize; j++) {
4428 if (union_map[j]) {
4429 // Add 1 to the frequency to give a small per-character boost for
4430 // the cases where our sampling is not good enough and many
4431 // characters have a frequency of zero. This means the frequency
4432 // can theoretically be up to 2*kSize though we treat it mostly as
4433 // a fraction of kSize.
4434 frequency += compiler_->frequency_collator()->Frequency(j) + 1;
4435 }
4436 }
4437 // We use the probability of skipping times the distance we are skipping to
4438 // judge the effectiveness of this. Actually we have a cut-off: By
4439 // dividing by 2 we switch off the skipping if the probability of skipping
4440 // is less than 50%. This is because the multibyte mask-and-compare
4441 // skipping in quickcheck is more likely to do well on this case.
4442 bool in_quickcheck_range = ((i - remembered_from < 4) ||
4443 (compiler_->ascii() ? remembered_from <= 4 : remembered_from <= 2));
4444 // Called 'probability' but it is only a rough estimate and can actually
4445 // be outside the 0-kSize range.
4446 intptr_t probability =
4447 (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
4448 intptr_t points = (i - remembered_from) * probability;
4449 if (points > biggest_points) {
4450 *from = remembered_from;
4451 *to = i - 1;
4452 biggest_points = points;
4453 }
4454 }
4455 return biggest_points;
4456 }
4457
4458
4459 // See comment above on the implementation of GetSkipTable.
4460 bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
4461 const intptr_t kSize = RegExpMacroAssembler::kTableSize;
4462
4463 intptr_t min_lookahead = 0;
4464 intptr_t max_lookahead = 0;
4465
4466 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false;
4467
4468 bool found_single_character = false;
4469 intptr_t single_character = 0;
4470 for (intptr_t i = max_lookahead; i >= min_lookahead; i--) {
4471 BoyerMoorePositionInfo* map = bitmaps_->At(i);
4472 if (map->map_count() > 1 ||
4473 (found_single_character && map->map_count() != 0)) {
4474 found_single_character = false;
4475 break;
4476 }
4477 for (intptr_t j = 0; j < kSize; j++) {
4478 if (map->at(j)) {
4479 found_single_character = true;
4480 single_character = j;
4481 break;
4482 }
4483 }
4484 }
4485
4486 intptr_t lookahead_width = max_lookahead + 1 - min_lookahead;
4487
4488 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
4489 // The mask-compare can probably handle this better.
4490 return false;
4491 }
4492
4493 if (found_single_character) {
4494 BlockLabel cont, again;
4495 masm->BindBlock(&again);
4496 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
4497 if (max_char_ > kSize) {
4498 masm->CheckCharacterAfterAnd(single_character,
4499 RegExpMacroAssembler::kTableMask,
4500 &cont);
4501 } else {
4502 masm->CheckCharacter(single_character, &cont);
4503 }
4504 masm->AdvanceCurrentPosition(lookahead_width);
4505 masm->GoTo(&again);
4506 masm->BindBlock(&cont);
4507 return true;
4508 }
4509
4510 TypedData& boolean_skip_table = TypedData::ZoneHandle(
4511 compiler_->isolate(),
4512 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld));
4513 intptr_t skip_distance = GetSkipTable(
4514 min_lookahead, max_lookahead, &boolean_skip_table);
4515 ASSERT(skip_distance != 0);
4516
4517 BlockLabel cont, again;
4518
4519 masm->BindBlock(&again);
4520 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
4521 masm->CheckBitInTable(boolean_skip_table, &cont);
4522 masm->AdvanceCurrentPosition(skip_distance);
4523 masm->Jump(&again);
4524 masm->BindBlock(&cont);
4525
4526 return true;
4527 }
4528
4529
1802 // ------------------------------------------------------------------- 4530 // -------------------------------------------------------------------
1803 // Analysis 4531 // Analysis
1804 4532
1805 4533
1806 void Analysis::EnsureAnalyzed(RegExpNode* that) { 4534 void Analysis::EnsureAnalyzed(RegExpNode* that) {
1807 if (that->info()->been_analyzed || that->info()->being_analyzed) 4535 if (that->info()->been_analyzed || that->info()->being_analyzed)
1808 return; 4536 return;
1809 that->info()->being_analyzed = true; 4537 that->info()->being_analyzed = true;
1810 that->Accept(this); 4538 that->Accept(this);
1811 that->info()->being_analyzed = false; 4539 that->info()->being_analyzed = false;
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
1980 4708
1981 4709
1982 void DotPrinter::PrintAttributes(RegExpNode* that) { 4710 void DotPrinter::PrintAttributes(RegExpNode* that) {
1983 OS::Print(" a%p [shape=Mrecord, color=grey, fontcolor=grey, " 4711 OS::Print(" a%p [shape=Mrecord, color=grey, fontcolor=grey, "
1984 "margin=0.1, fontsize=10, label=\"{", that); 4712 "margin=0.1, fontsize=10, label=\"{", that);
1985 AttributePrinter printer; 4713 AttributePrinter printer;
1986 NodeInfo* info = that->info(); 4714 NodeInfo* info = that->info();
1987 printer.PrintBit("NI", info->follows_newline_interest); 4715 printer.PrintBit("NI", info->follows_newline_interest);
1988 printer.PrintBit("WI", info->follows_word_interest); 4716 printer.PrintBit("WI", info->follows_word_interest);
1989 printer.PrintBit("SI", info->follows_start_interest); 4717 printer.PrintBit("SI", info->follows_start_interest);
1990 Label* label = that->label(); 4718 BlockLabel* label = that->label();
1991 if (label->IsBound()) 4719 if (label->IsBound())
1992 printer.PrintPositive("@", label->Position()); 4720 printer.PrintPositive("@", label->Position());
1993 OS::Print("}\"];\n" 4721 OS::Print("}\"];\n"
1994 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n", 4722 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n",
1995 that, that); 4723 that, that);
1996 } 4724 }
1997 4725
1998 4726
1999 static const bool kPrintDispatchTable = false; 4727 static const bool kPrintDispatchTable = false;
2000 void DotPrinter::VisitChoice(ChoiceNode* that) { 4728 void DotPrinter::VisitChoice(ChoiceNode* that) {
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
2142 RegExpNode* node, 4870 RegExpNode* node,
2143 bool ignore_case) { 4871 bool ignore_case) {
2144 DotPrinter printer(ignore_case); 4872 DotPrinter printer(ignore_case);
2145 printer.PrintNode(label, node); 4873 printer.PrintNode(label, node);
2146 } 4874 }
2147 4875
2148 4876
2149 #endif // DEBUG 4877 #endif // DEBUG
2150 4878
2151 } // namespace dart 4879 } // namespace dart
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698