OLD | NEW |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/regexp_assembler.h" | 5 #include "vm/regexp_assembler.h" |
6 | 6 |
7 #include "vm/bit_vector.h" | |
8 #include "vm/compiler.h" | |
9 #include "vm/dart_entry.h" | |
10 #include "vm/flow_graph_builder.h" | |
11 #include "vm/il_printer.h" | |
12 #include "vm/object_store.h" | |
13 #include "vm/regexp.h" | 7 #include "vm/regexp.h" |
14 #include "vm/resolver.h" | |
15 #include "vm/stack_frame.h" | |
16 #include "vm/unibrow-inl.h" | |
17 #include "vm/unicode.h" | |
18 | |
19 #define Z zone() | |
20 | |
21 // Debugging output macros. TAG() is called at the head of each interesting | |
22 // function and prints its name during execution if irregexp tracing is enabled. | |
23 #define TAG() if (FLAG_trace_irregexp) { TAG_(); } | |
24 #define TAG_() \ | |
25 Print(PushArgument( \ | |
26 Bind(new(Z) ConstantInstr(String::ZoneHandle(Z, String::Concat( \ | |
27 String::Handle(String::New("TAG: ")), \ | |
28 String::Handle(String::New(__FUNCTION__)), Heap::kOld)))))); | |
29 | |
30 #define PRINT(arg) if (FLAG_trace_irregexp) { Print(arg); } | |
31 | 8 |
32 namespace dart { | 9 namespace dart { |
33 | 10 |
34 DEFINE_FLAG(bool, trace_irregexp, false, "Trace irregexps"); | |
35 | |
36 | |
37 static const intptr_t kInvalidTryIndex = CatchClauseNode::kInvalidTryIndex; | |
38 static const intptr_t kNoSourcePos = Scanner::kNoSourcePos; | |
39 static const intptr_t kMinStackSize = 512; | |
40 | |
41 | |
42 void PrintUtf16(uint16_t c) { | |
43 const char* format = (0x20 <= c && c <= 0x7F) ? | |
44 "%c" : (c <= 0xff) ? "\\x%02x" : "\\u%04x"; | |
45 OS::Print(format, c); | |
46 } | |
47 | |
48 | |
49 /* | |
50 * This assembler uses the following main local variables: | |
51 * - stack_: A pointer to a growable list which we use as an all-purpose stack | |
52 * storing backtracking offsets, positions & stored register values. | |
53 * - current_character_: Stores the currently loaded characters (possibly more | |
54 * than one). | |
55 * - current_position_: The current position within the string, stored as a | |
56 * negative offset from the end of the string (i.e. the | |
57 * position corresponding to str[0] is -str.length). | |
58 * Note that current_position_ is *not* byte-based, unlike | |
59 * original V8 code. | |
60 * | |
61 * Results are returned though an array of capture indices, stored at | |
62 * matches_param_. A null array specifies a failure to match. The match indices | |
63 * [start_inclusive, end_exclusive] for capture group i are stored at positions | |
64 * matches_param_[i * 2] and matches_param_[i * 2 + 1], respectively. Match | |
65 * indices of -1 denote non-matched groups. Note that we store these indices | |
66 * as a negative offset from the end of the string in registers_array_ | |
67 * during processing, and convert them to standard indexes when copying them | |
68 * to matches_param_ on successful match. | |
69 */ | |
70 | |
71 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone) | 11 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone) |
72 : slow_safe_compiler_(false), | 12 : slow_safe_compiler_(false), |
73 global_mode_(NOT_GLOBAL), | 13 global_mode_(NOT_GLOBAL), |
74 zone_(zone) { | 14 zone_(zone) { |
75 } | 15 } |
76 | 16 |
77 | 17 |
78 RegExpMacroAssembler::~RegExpMacroAssembler() { | 18 RegExpMacroAssembler::~RegExpMacroAssembler() { |
79 } | 19 } |
80 | 20 |
81 | |
82 IRRegExpMacroAssembler::IRRegExpMacroAssembler( | |
83 intptr_t specialization_cid, | |
84 intptr_t capture_count, | |
85 const ParsedFunction* parsed_function, | |
86 const ZoneGrowableArray<const ICData*>& ic_data_array, | |
87 Zone* zone) | |
88 : RegExpMacroAssembler(zone), | |
89 specialization_cid_(specialization_cid), | |
90 parsed_function_(parsed_function), | |
91 ic_data_array_(ic_data_array), | |
92 current_instruction_(NULL), | |
93 stack_(NULL), | |
94 stack_pointer_(NULL), | |
95 current_character_(NULL), | |
96 current_position_(NULL), | |
97 string_param_(NULL), | |
98 string_param_length_(NULL), | |
99 start_index_param_(NULL), | |
100 registers_count_(0), | |
101 saved_registers_count_((capture_count + 1) * 2), | |
102 stack_array_cell_(Array::ZoneHandle(zone, Array::New(1, Heap::kOld))), | |
103 // The registers array is allocated at a fixed size after assembly. | |
104 registers_array_(TypedData::ZoneHandle(zone, TypedData::null())) { | |
105 switch (specialization_cid) { | |
106 case kOneByteStringCid: | |
107 case kExternalOneByteStringCid: mode_ = ASCII; break; | |
108 case kTwoByteStringCid: | |
109 case kExternalTwoByteStringCid: mode_ = UC16; break; | |
110 default: UNREACHABLE(); | |
111 } | |
112 | |
113 InitializeLocals(); | |
114 | |
115 // Allocate an initial stack backing of the minimum stack size. The stack | |
116 // backing is indirectly referred to so we can reuse it on subsequent matches | |
117 // even in the case where the backing has been enlarged and thus reallocated. | |
118 stack_array_cell_.SetAt(0, TypedData::Handle(zone, | |
119 TypedData::New(kTypedDataInt32ArrayCid, kMinStackSize / 4, Heap::kOld))); | |
120 | |
121 // Create and generate all preset blocks. | |
122 entry_block_ = | |
123 new(zone) GraphEntryInstr( | |
124 *parsed_function_, | |
125 new(zone) TargetEntryInstr(block_id_.Alloc(), kInvalidTryIndex), | |
126 Isolate::kNoDeoptId); | |
127 start_block_ = | |
128 new(zone) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex); | |
129 success_block_ = | |
130 new(zone) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex); | |
131 backtrack_block_ = | |
132 new(zone) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex); | |
133 exit_block_ = | |
134 new(zone) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex); | |
135 | |
136 GenerateEntryBlock(); | |
137 GenerateSuccessBlock(); | |
138 GenerateExitBlock(); | |
139 | |
140 blocks_.Add(entry_block_); | |
141 blocks_.Add(entry_block_->normal_entry()); | |
142 blocks_.Add(start_block_); | |
143 blocks_.Add(success_block_); | |
144 blocks_.Add(backtrack_block_); | |
145 blocks_.Add(exit_block_); | |
146 | |
147 // Begin emission at the start_block_. | |
148 set_current_instruction(start_block_); | |
149 } | |
150 | |
151 | |
152 IRRegExpMacroAssembler::~IRRegExpMacroAssembler() { } | |
153 | |
154 | |
155 void IRRegExpMacroAssembler::InitializeLocals() { | |
156 // All generated functions are expected to have a current-context variable. | |
157 // This variable is unused in irregexp functions. | |
158 parsed_function_->current_context_var()->set_index(GetNextLocalIndex()); | |
159 | |
160 // Create local variables and parameters. | |
161 stack_ = Local(Symbols::stack()); | |
162 stack_pointer_ = Local(Symbols::stack_pointer()); | |
163 registers_ = Local(Symbols::position_registers()); | |
164 current_character_ = Local(Symbols::current_character()); | |
165 current_position_ = Local(Symbols::current_position()); | |
166 string_param_length_ = Local(Symbols::string_param_length()); | |
167 capture_length_ = Local(Symbols::capture_length()); | |
168 match_start_index_ = Local(Symbols::match_start_index()); | |
169 capture_start_index_ = Local(Symbols::capture_start_index()); | |
170 match_end_index_ = Local(Symbols::match_end_index()); | |
171 char_in_capture_ = Local(Symbols::char_in_capture()); | |
172 char_in_match_ = Local(Symbols::char_in_match()); | |
173 index_temp_ = Local(Symbols::index_temp()); | |
174 result_ = Local(Symbols::result()); | |
175 | |
176 string_param_ = Parameter(Symbols::string_param(), 0); | |
177 start_index_param_ = Parameter(Symbols::start_index_param(), 1); | |
178 } | |
179 | |
180 | |
181 void IRRegExpMacroAssembler::GenerateEntryBlock() { | |
182 set_current_instruction(entry_block_->normal_entry()); | |
183 TAG(); | |
184 | |
185 // Store string.length. | |
186 PushArgumentInstr* string_push = PushLocal(string_param_); | |
187 | |
188 StoreLocal( | |
189 string_param_length_, | |
190 Bind(InstanceCall( | |
191 InstanceCallDescriptor( | |
192 String::ZoneHandle(Field::GetterSymbol(Symbols::Length()))), | |
193 string_push))); | |
194 | |
195 // Store (start_index - string.length) as the current position (since it's a | |
196 // negative offset from the end of the string). | |
197 PushArgumentInstr* start_index_push = PushLocal(start_index_param_); | |
198 PushArgumentInstr* length_push = PushLocal(string_param_length_); | |
199 | |
200 StoreLocal(current_position_, Bind(Sub(start_index_push, length_push))); | |
201 | |
202 // Generate a local list variable to represent "registers" and | |
203 // initialize capture registers (others remain garbage). | |
204 StoreLocal(registers_, Bind(new(Z) ConstantInstr(registers_array_))); | |
205 ClearRegisters(0, saved_registers_count_ - 1); | |
206 | |
207 // Generate a local list variable to represent the backtracking stack. | |
208 PushArgumentInstr* stack_cell_push = | |
209 PushArgument(Bind(new(Z) ConstantInstr(stack_array_cell_))); | |
210 StoreLocal(stack_, Bind(InstanceCall( | |
211 InstanceCallDescriptor::FromToken(Token::kINDEX), | |
212 stack_cell_push, | |
213 PushArgument(Bind(Uint64Constant(0)))))); | |
214 StoreLocal(stack_pointer_, Bind(Int64Constant(-1))); | |
215 | |
216 // Jump to the start block. | |
217 current_instruction_->Goto(start_block_); | |
218 } | |
219 | |
220 | |
221 void IRRegExpMacroAssembler::GenerateBacktrackBlock() { | |
222 set_current_instruction(backtrack_block_); | |
223 TAG(); | |
224 CheckPreemption(); | |
225 | |
226 const intptr_t entries_count = entry_block_->indirect_entries().length(); | |
227 | |
228 TypedData& offsets = TypedData::ZoneHandle(Z, | |
229 TypedData::New(kTypedDataInt32ArrayCid, entries_count, Heap::kOld)); | |
230 | |
231 PushArgumentInstr* block_offsets_push = | |
232 PushArgument(Bind(new(Z) ConstantInstr(offsets))); | |
233 PushArgumentInstr* block_id_push = PushArgument(Bind(PopStack())); | |
234 | |
235 Value* offset_value = | |
236 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX), | |
237 block_offsets_push, | |
238 block_id_push)); | |
239 | |
240 backtrack_goto_ = new(Z) IndirectGotoInstr(&offsets, offset_value); | |
241 CloseBlockWith(backtrack_goto_); | |
242 | |
243 // Add an edge from the "indirect" goto to each of the targets. | |
244 for (intptr_t j = 0; j < entries_count; j++) { | |
245 backtrack_goto_->AddSuccessor( | |
246 TargetWithJoinGoto(entry_block_->indirect_entries().At(j))); | |
247 } | |
248 } | |
249 | |
250 | |
251 void IRRegExpMacroAssembler::GenerateSuccessBlock() { | |
252 set_current_instruction(success_block_); | |
253 TAG(); | |
254 | |
255 Value* type = Bind(new(Z) ConstantInstr( | |
256 TypeArguments::ZoneHandle(Z, TypeArguments::null()))); | |
257 Value* length = Bind(Uint64Constant(saved_registers_count_)); | |
258 Value* array = Bind(new(Z) CreateArrayInstr(kNoSourcePos, type, length)); | |
259 StoreLocal(result_, array); | |
260 | |
261 // Store captured offsets in the `matches` parameter. | |
262 for (intptr_t i = 0; i < saved_registers_count_; i++) { | |
263 PushArgumentInstr* matches_push = PushLocal(result_); | |
264 PushArgumentInstr* index_push = PushArgument(Bind(Uint64Constant(i))); | |
265 | |
266 // Convert negative offsets from the end of the string to string indices. | |
267 // TODO(zerny): use positive offsets from the get-go. | |
268 PushArgumentInstr* offset_push = PushArgument(LoadRegister(i)); | |
269 PushArgumentInstr* len_push = PushLocal(string_param_length_); | |
270 PushArgumentInstr* value_push = | |
271 PushArgument(Bind(Add(offset_push, len_push))); | |
272 | |
273 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX), | |
274 matches_push, | |
275 index_push, | |
276 value_push)); | |
277 } | |
278 | |
279 // Print the result if tracing. | |
280 PRINT(PushLocal(result_)); | |
281 | |
282 // Return true on success. | |
283 AppendInstruction(new(Z) ReturnInstr(kNoSourcePos, Bind(LoadLocal(result_)))); | |
284 } | |
285 | |
286 | |
287 void IRRegExpMacroAssembler::GenerateExitBlock() { | |
288 set_current_instruction(exit_block_); | |
289 TAG(); | |
290 | |
291 // Return false on failure. | |
292 AppendInstruction(new(Z) ReturnInstr(kNoSourcePos, Bind(LoadLocal(result_)))); | |
293 } | |
294 | |
295 | |
296 void IRRegExpMacroAssembler::FinalizeRegistersArray() { | |
297 ASSERT(registers_count_ >= saved_registers_count_); | |
298 registers_array_ = | |
299 TypedData::New(kTypedDataInt32ArrayCid, registers_count_, Heap::kOld); | |
300 } | |
301 | |
302 | |
303 #if defined(TARGET_ARCH_ARM64) || \ | |
304 defined(TARGET_ARCH_ARM) || \ | |
305 defined(TARGET_ARCH_MIPS) | |
306 // Disabling unaligned accesses forces the regexp engine to load characters one | |
307 // by one instead of up to 4 at once, along with the associated performance hit. | |
308 // TODO(zerny): Be less conservative about disabling unaligned accesses. | |
309 // For instance, ARMv6 supports unaligned accesses. Once it is enabled here, | |
310 // update LoadCodeUnitsInstr methods for the appropriate architectures. | |
311 static const bool kEnableUnalignedAccesses = false; | |
312 #else | |
313 static const bool kEnableUnalignedAccesses = true; | |
314 #endif | |
315 bool IRRegExpMacroAssembler::CanReadUnaligned() { | |
316 return kEnableUnalignedAccesses && !slow_safe(); | |
317 } | |
318 | |
319 | |
320 RawArray* IRRegExpMacroAssembler::Execute( | |
321 const Function& function, | |
322 const String& input, | |
323 const Smi& start_offset, | |
324 Zone* zone) { | |
325 // Create the argument list. | |
326 const Array& args = Array::Handle(Array::New(2)); | |
327 args.SetAt(0, input); | |
328 args.SetAt(1, start_offset); | |
329 | |
330 // And finally call the generated code. | |
331 | |
332 const Object& retval = | |
333 Object::Handle(zone, DartEntry::InvokeFunction(function, args)); | |
334 if (retval.IsError()) { | |
335 const Error& error = Error::Cast(retval); | |
336 OS::Print("%s\n", error.ToErrorCString()); | |
337 // Should never happen. | |
338 UNREACHABLE(); | |
339 } | |
340 | |
341 if (retval.IsNull()) { | |
342 return Array::null(); | |
343 } | |
344 | |
345 ASSERT(retval.IsArray()); | |
346 return Array::Cast(retval).raw(); | |
347 } | |
348 | |
349 | |
350 RawBool* IRRegExpMacroAssembler::CaseInsensitiveCompareUC16( | |
351 RawString* str_raw, | |
352 RawSmi* lhs_index_raw, | |
353 RawSmi* rhs_index_raw, | |
354 RawSmi* length_raw) { | |
355 const String& str = String::Handle(str_raw); | |
356 const Smi& lhs_index = Smi::Handle(lhs_index_raw); | |
357 const Smi& rhs_index = Smi::Handle(rhs_index_raw); | |
358 const Smi& length = Smi::Handle(length_raw); | |
359 | |
360 // TODO(zerny): Optimize as single instance. V8 has this as an | |
361 // isolate member. | |
362 unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize; | |
363 | |
364 for (intptr_t i = 0; i < length.Value(); i++) { | |
365 int32_t c1 = str.CharAt(lhs_index.Value() + i); | |
366 int32_t c2 = str.CharAt(rhs_index.Value() + i); | |
367 if (c1 != c2) { | |
368 int32_t s1[1] = { c1 }; | |
369 canonicalize.get(c1, '\0', s1); | |
370 if (s1[0] != c2) { | |
371 int32_t s2[1] = { c2 }; | |
372 canonicalize.get(c2, '\0', s2); | |
373 if (s1[0] != s2[0]) { | |
374 return Bool::False().raw(); | |
375 } | |
376 } | |
377 } | |
378 } | |
379 return Bool::True().raw(); | |
380 } | |
381 | |
382 | |
383 LocalVariable* IRRegExpMacroAssembler::Parameter(const String& name, | |
384 intptr_t index) const { | |
385 const Type& local_type = Type::ZoneHandle(Z, Type::DynamicType()); | |
386 LocalVariable* local = | |
387 new(Z) LocalVariable(kNoSourcePos, name, local_type); | |
388 | |
389 intptr_t param_frame_index = kParamEndSlotFromFp + kParamCount - index; | |
390 local->set_index(param_frame_index); | |
391 | |
392 return local; | |
393 } | |
394 | |
395 | |
396 LocalVariable* IRRegExpMacroAssembler::Local(const String& name) { | |
397 const Type& local_type = Type::ZoneHandle(Z, Type::DynamicType()); | |
398 LocalVariable* local = | |
399 new(Z) LocalVariable(kNoSourcePos, name, local_type); | |
400 local->set_index(GetNextLocalIndex()); | |
401 | |
402 return local; | |
403 } | |
404 | |
405 | |
406 ConstantInstr* IRRegExpMacroAssembler::Int64Constant(int64_t value) const { | |
407 return new(Z) ConstantInstr( | |
408 Integer::ZoneHandle(Z, Integer::New(value, Heap::kOld))); | |
409 } | |
410 | |
411 | |
412 ConstantInstr* IRRegExpMacroAssembler::Uint64Constant(uint64_t value) const { | |
413 return new(Z) ConstantInstr( | |
414 Integer::ZoneHandle(Z, Integer::NewFromUint64(value, Heap::kOld))); | |
415 } | |
416 | |
417 | |
418 ConstantInstr* IRRegExpMacroAssembler::BoolConstant(bool value) const { | |
419 return new(Z) ConstantInstr(value ? Bool::True() : Bool::False()); | |
420 } | |
421 | |
422 | |
423 ConstantInstr* IRRegExpMacroAssembler::StringConstant(const char* value) const { | |
424 return new(Z) ConstantInstr( | |
425 String::ZoneHandle(Z, String::New(value, Heap::kOld))); | |
426 } | |
427 | |
428 | |
429 ConstantInstr* IRRegExpMacroAssembler::WordCharacterMapConstant() const { | |
430 const Library& lib = Library::Handle(Z, Library::CoreLibrary()); | |
431 const Class& regexp_class = Class::Handle(Z, | |
432 lib.LookupClassAllowPrivate(Symbols::JSSyntaxRegExp())); | |
433 const Field& word_character_field = Field::ZoneHandle(Z, | |
434 regexp_class.LookupStaticField(Symbols::_wordCharacterMap())); | |
435 ASSERT(!word_character_field.IsNull()); | |
436 | |
437 if (word_character_field.IsUninitialized()) { | |
438 word_character_field.EvaluateInitializer(); | |
439 } | |
440 ASSERT(!word_character_field.IsUninitialized()); | |
441 | |
442 return new(Z) ConstantInstr( | |
443 Instance::ZoneHandle(Z, word_character_field.value())); | |
444 } | |
445 | |
446 | |
447 ComparisonInstr* IRRegExpMacroAssembler::Comparison( | |
448 ComparisonKind kind, PushArgumentInstr* lhs, PushArgumentInstr* rhs) { | |
449 Token::Kind strict_comparison = Token::kEQ_STRICT; | |
450 Token::Kind intermediate_operator = Token::kILLEGAL; | |
451 switch (kind) { | |
452 case kEQ: | |
453 intermediate_operator = Token::kEQ; | |
454 break; | |
455 case kNE: | |
456 intermediate_operator = Token::kEQ; | |
457 strict_comparison = Token::kNE_STRICT; | |
458 break; | |
459 case kLT: | |
460 intermediate_operator = Token::kLT; | |
461 break; | |
462 case kGT: | |
463 intermediate_operator = Token::kGT; | |
464 break; | |
465 case kLTE: | |
466 intermediate_operator = Token::kLTE; | |
467 break; | |
468 case kGTE: | |
469 intermediate_operator = Token::kGTE; | |
470 break; | |
471 default: | |
472 UNREACHABLE(); | |
473 } | |
474 | |
475 ASSERT(intermediate_operator != Token::kILLEGAL); | |
476 | |
477 Value* lhs_value = | |
478 Bind(InstanceCall( | |
479 InstanceCallDescriptor::FromToken(intermediate_operator), | |
480 lhs, | |
481 rhs)); | |
482 Value* rhs_value = Bind(BoolConstant(true)); | |
483 | |
484 return new(Z) StrictCompareInstr( | |
485 kNoSourcePos, strict_comparison, lhs_value, rhs_value, true); | |
486 } | |
487 | |
488 ComparisonInstr* IRRegExpMacroAssembler::Comparison( | |
489 ComparisonKind kind, Definition* lhs, Definition* rhs) { | |
490 PushArgumentInstr* lhs_push = PushArgument(Bind(lhs)); | |
491 PushArgumentInstr* rhs_push = PushArgument(Bind(rhs)); | |
492 return Comparison(kind, lhs_push, rhs_push); | |
493 } | |
494 | |
495 | |
496 StaticCallInstr* IRRegExpMacroAssembler::StaticCall( | |
497 const Function& function) const { | |
498 ZoneGrowableArray<PushArgumentInstr*>* arguments = | |
499 new(Z) ZoneGrowableArray<PushArgumentInstr*>(0); | |
500 return StaticCall(function, arguments); | |
501 } | |
502 | |
503 | |
504 StaticCallInstr* IRRegExpMacroAssembler::StaticCall( | |
505 const Function& function, | |
506 PushArgumentInstr* arg1) const { | |
507 ZoneGrowableArray<PushArgumentInstr*>* arguments = | |
508 new(Z) ZoneGrowableArray<PushArgumentInstr*>(1); | |
509 arguments->Add(arg1); | |
510 | |
511 return StaticCall(function, arguments); | |
512 } | |
513 | |
514 | |
515 StaticCallInstr* IRRegExpMacroAssembler::StaticCall( | |
516 const Function& function, | |
517 PushArgumentInstr* arg1, | |
518 PushArgumentInstr* arg2) const { | |
519 ZoneGrowableArray<PushArgumentInstr*>* arguments = | |
520 new(Z) ZoneGrowableArray<PushArgumentInstr*>(2); | |
521 arguments->Add(arg1); | |
522 arguments->Add(arg2); | |
523 | |
524 return StaticCall(function, arguments); | |
525 } | |
526 | |
527 | |
528 StaticCallInstr* IRRegExpMacroAssembler::StaticCall( | |
529 const Function& function, | |
530 ZoneGrowableArray<PushArgumentInstr*>* arguments) const { | |
531 return new(Z) StaticCallInstr(kNoSourcePos, | |
532 function, | |
533 Object::null_array(), | |
534 arguments, | |
535 ic_data_array_); | |
536 } | |
537 | |
538 | |
539 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall( | |
540 const InstanceCallDescriptor& desc, | |
541 PushArgumentInstr* arg1) const { | |
542 ZoneGrowableArray<PushArgumentInstr*>* arguments = | |
543 new(Z) ZoneGrowableArray<PushArgumentInstr*>(1); | |
544 arguments->Add(arg1); | |
545 | |
546 return InstanceCall(desc, arguments); | |
547 } | |
548 | |
549 | |
550 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall( | |
551 const InstanceCallDescriptor& desc, | |
552 PushArgumentInstr* arg1, | |
553 PushArgumentInstr* arg2) const { | |
554 ZoneGrowableArray<PushArgumentInstr*>* arguments = | |
555 new(Z) ZoneGrowableArray<PushArgumentInstr*>(2); | |
556 arguments->Add(arg1); | |
557 arguments->Add(arg2); | |
558 | |
559 return InstanceCall(desc, arguments); | |
560 } | |
561 | |
562 | |
563 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall( | |
564 const InstanceCallDescriptor& desc, | |
565 PushArgumentInstr* arg1, | |
566 PushArgumentInstr* arg2, | |
567 PushArgumentInstr* arg3) const { | |
568 ZoneGrowableArray<PushArgumentInstr*>* arguments = | |
569 new(Z) ZoneGrowableArray<PushArgumentInstr*>(3); | |
570 arguments->Add(arg1); | |
571 arguments->Add(arg2); | |
572 arguments->Add(arg3); | |
573 | |
574 return InstanceCall(desc, arguments); | |
575 } | |
576 | |
577 | |
578 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall( | |
579 const InstanceCallDescriptor& desc, | |
580 ZoneGrowableArray<PushArgumentInstr*> *arguments) const { | |
581 return | |
582 new(Z) InstanceCallInstr(kNoSourcePos, | |
583 desc.name, | |
584 desc.token_kind, | |
585 arguments, | |
586 Object::null_array(), | |
587 desc.checked_argument_count, | |
588 ic_data_array_); | |
589 } | |
590 | |
591 | |
592 LoadLocalInstr* IRRegExpMacroAssembler::LoadLocal(LocalVariable* local) const { | |
593 return new(Z) LoadLocalInstr(*local); | |
594 } | |
595 | |
596 | |
597 void IRRegExpMacroAssembler::StoreLocal(LocalVariable* local, | |
598 Value* value) { | |
599 Do(new(Z) StoreLocalInstr(*local, value)); | |
600 } | |
601 | |
602 | |
603 void IRRegExpMacroAssembler::set_current_instruction(Instruction* instruction) { | |
604 current_instruction_ = instruction; | |
605 } | |
606 | |
607 | |
608 Value* IRRegExpMacroAssembler::Bind(Definition* definition) { | |
609 AppendInstruction(definition); | |
610 definition->set_temp_index(temp_id_.Alloc()); | |
611 | |
612 return new(Z) Value(definition); | |
613 } | |
614 | |
615 | |
616 void IRRegExpMacroAssembler::Do(Definition* definition) { | |
617 AppendInstruction(definition); | |
618 } | |
619 | |
620 | |
621 Value* IRRegExpMacroAssembler::BindLoadLocal(const LocalVariable& local) { | |
622 if (local.IsConst()) { | |
623 return Bind(new(Z) ConstantInstr(*local.ConstValue())); | |
624 } | |
625 ASSERT(!local.is_captured()); | |
626 return Bind(new(Z) LoadLocalInstr(local)); | |
627 } | |
628 | |
629 | |
630 // In some cases, the V8 irregexp engine generates unreachable code by emitting | |
631 // a jmp not followed by a bind. We cannot do the same, since it is impossible | |
632 // to append to a block following a jmp. In such cases, assume that we are doing | |
633 // the correct thing, but output a warning when tracing. | |
634 #define HANDLE_DEAD_CODE_EMISSION() \ | |
635 if (current_instruction_ == NULL) { \ | |
636 if (FLAG_trace_irregexp) { \ | |
637 OS::Print("WARNING: Attempting to append to a closed assembler. " \ | |
638 "This could be either a bug or generation of dead code " \ | |
639 "inherited from V8.\n"); \ | |
640 } \ | |
641 BlockLabel dummy; \ | |
642 BindBlock(&dummy); \ | |
643 } | |
644 | |
645 void IRRegExpMacroAssembler::AppendInstruction(Instruction* instruction) { | |
646 HANDLE_DEAD_CODE_EMISSION(); | |
647 | |
648 ASSERT(current_instruction_ != NULL); | |
649 ASSERT(current_instruction_->next() == NULL); | |
650 | |
651 temp_id_.Dealloc(instruction->InputCount()); | |
652 arg_id_.Dealloc(instruction->ArgumentCount()); | |
653 | |
654 current_instruction_->LinkTo(instruction); | |
655 set_current_instruction(instruction); | |
656 } | |
657 | |
658 | |
659 void IRRegExpMacroAssembler::CloseBlockWith(Instruction* instruction) { | |
660 HANDLE_DEAD_CODE_EMISSION(); | |
661 | |
662 ASSERT(current_instruction_ != NULL); | |
663 ASSERT(current_instruction_->next() == NULL); | |
664 | |
665 temp_id_.Dealloc(instruction->InputCount()); | |
666 arg_id_.Dealloc(instruction->ArgumentCount()); | |
667 | |
668 current_instruction_->LinkTo(instruction); | |
669 set_current_instruction(NULL); | |
670 } | |
671 | |
672 | |
673 void IRRegExpMacroAssembler::GoTo(BlockLabel* to) { | |
674 if (to == NULL) { | |
675 Backtrack(); | |
676 } else { | |
677 to->SetLinked(); | |
678 GoTo(to->block()); | |
679 } | |
680 } | |
681 | |
682 | |
683 // Closes the current block with a goto, and unsets current_instruction_. | |
684 // BindBlock() must be called before emission can continue. | |
685 void IRRegExpMacroAssembler::GoTo(JoinEntryInstr* to) { | |
686 HANDLE_DEAD_CODE_EMISSION(); | |
687 | |
688 ASSERT(current_instruction_ != NULL); | |
689 ASSERT(current_instruction_->next() == NULL); | |
690 current_instruction_->Goto(to); | |
691 set_current_instruction(NULL); | |
692 } | |
693 | |
694 | |
695 PushArgumentInstr* IRRegExpMacroAssembler::PushArgument(Value* value) { | |
696 arg_id_.Alloc(); | |
697 PushArgumentInstr* push = new(Z) PushArgumentInstr(value); | |
698 // Do *not* use Do() for push argument instructions. | |
699 AppendInstruction(push); | |
700 return push; | |
701 } | |
702 | |
703 | |
704 PushArgumentInstr* IRRegExpMacroAssembler::PushLocal(LocalVariable* local) { | |
705 return PushArgument(Bind(LoadLocal(local))); | |
706 } | |
707 | |
708 | |
709 void IRRegExpMacroAssembler::Print(const char* str) { | |
710 Print(PushArgument( | |
711 Bind(new(Z) ConstantInstr( | |
712 String::ZoneHandle(Z, String::New(str, Heap::kOld)))))); | |
713 } | |
714 | |
715 | |
716 void IRRegExpMacroAssembler::Print(PushArgumentInstr* argument) { | |
717 const Library& lib = Library::Handle(Library::CoreLibrary()); | |
718 const Function& print_fn = Function::ZoneHandle( | |
719 Z, lib.LookupFunctionAllowPrivate(Symbols::print())); | |
720 Do(StaticCall(print_fn, argument)); | |
721 } | |
722 | |
723 | |
724 void IRRegExpMacroAssembler::PrintBlocks() { | |
725 for (intptr_t i = 0; i < blocks_.length(); i++) { | |
726 FlowGraphPrinter::PrintBlock(blocks_[i], false); | |
727 } | |
728 } | |
729 | |
730 | |
731 intptr_t IRRegExpMacroAssembler::stack_limit_slack() { | |
732 return 32; | |
733 } | |
734 | |
735 | |
736 void IRRegExpMacroAssembler::AdvanceCurrentPosition(intptr_t by) { | |
737 TAG(); | |
738 if (by != 0) { | |
739 PushArgumentInstr* cur_pos_push = PushLocal(current_position_); | |
740 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by))); | |
741 | |
742 Value* new_pos_value = Bind(Add(cur_pos_push, by_push)); | |
743 StoreLocal(current_position_, new_pos_value); | |
744 } | |
745 } | |
746 | |
747 | |
748 void IRRegExpMacroAssembler::AdvanceRegister(intptr_t reg, intptr_t by) { | |
749 TAG(); | |
750 ASSERT(reg >= 0); | |
751 ASSERT(reg < registers_count_); | |
752 | |
753 if (by != 0) { | |
754 PushArgumentInstr* registers_push = PushLocal(registers_); | |
755 PushArgumentInstr* index_push = PushRegisterIndex(reg); | |
756 PushArgumentInstr* reg_push = PushArgument(LoadRegister(reg)); | |
757 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by))); | |
758 PushArgumentInstr* value_push = PushArgument(Bind(Add(reg_push, by_push))); | |
759 StoreRegister(registers_push, index_push, value_push); | |
760 } | |
761 } | |
762 | |
763 | |
764 void IRRegExpMacroAssembler::Backtrack() { | |
765 TAG(); | |
766 GoTo(backtrack_block_); | |
767 } | |
768 | |
769 | |
770 // A BindBlock is analogous to assigning a label to a basic block. | |
771 // If the BlockLabel does not yet contain a block, it is created. | |
772 // If there is a current instruction, append a goto to the bound block. | |
773 void IRRegExpMacroAssembler::BindBlock(BlockLabel* label) { | |
774 ASSERT(!label->IsBound()); | |
775 ASSERT(label->block()->next() == NULL); | |
776 | |
777 label->SetBound(block_id_.Alloc()); | |
778 blocks_.Add(label->block()); | |
779 | |
780 if (current_instruction_ != NULL) { | |
781 GoTo(label); | |
782 } | |
783 set_current_instruction(label->block()); | |
784 | |
785 // Print the id of the current block if tracing. | |
786 PRINT(PushArgument(Bind(Uint64Constant(label->block()->block_id())))); | |
787 } | |
788 | |
789 | |
790 intptr_t IRRegExpMacroAssembler::GetNextLocalIndex() { | |
791 intptr_t id = local_id_.Alloc(); | |
792 return kFirstLocalSlotFromFp - id; | |
793 } | |
794 | |
795 | |
796 Value* IRRegExpMacroAssembler::LoadRegister(intptr_t index) { | |
797 PushArgumentInstr* registers_push = PushLocal(registers_); | |
798 PushArgumentInstr* index_push = PushRegisterIndex(index); | |
799 return Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX), | |
800 registers_push, | |
801 index_push)); | |
802 } | |
803 | |
804 void IRRegExpMacroAssembler::StoreRegister(intptr_t index, intptr_t value) { | |
805 PushArgumentInstr* registers_push = PushLocal(registers_); | |
806 PushArgumentInstr* index_push = PushRegisterIndex(index); | |
807 PushArgumentInstr* value_push = PushArgument(Bind(Uint64Constant(value))); | |
808 StoreRegister(registers_push, index_push, value_push); | |
809 } | |
810 | |
811 | |
812 void IRRegExpMacroAssembler::StoreRegister(PushArgumentInstr* registers, | |
813 PushArgumentInstr* index, | |
814 PushArgumentInstr* value) { | |
815 TAG(); | |
816 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX), | |
817 registers, | |
818 index, | |
819 value)); | |
820 } | |
821 | |
822 PushArgumentInstr* IRRegExpMacroAssembler::PushRegisterIndex(intptr_t index) { | |
823 if (registers_count_ <= index) { | |
824 registers_count_ = index + 1; | |
825 } | |
826 return PushArgument(Bind(Uint64Constant(index))); | |
827 } | |
828 | |
829 | |
830 void IRRegExpMacroAssembler::CheckCharacter(uint32_t c, BlockLabel* on_equal) { | |
831 TAG(); | |
832 Definition* cur_char_def = LoadLocal(current_character_); | |
833 Definition* char_def = Uint64Constant(c); | |
834 | |
835 BranchOrBacktrack(Comparison(kEQ, cur_char_def, char_def), on_equal); | |
836 } | |
837 | |
838 | |
839 void IRRegExpMacroAssembler::CheckCharacterGT(uint16_t limit, | |
840 BlockLabel* on_greater) { | |
841 TAG(); | |
842 BranchOrBacktrack(Comparison(kGT, | |
843 LoadLocal(current_character_), | |
844 Uint64Constant(limit)), | |
845 on_greater); | |
846 } | |
847 | |
848 | |
849 void IRRegExpMacroAssembler::CheckAtStart(BlockLabel* on_at_start) { | |
850 TAG(); | |
851 | |
852 BlockLabel not_at_start; | |
853 | |
854 // Did we start the match at the start of the string at all? | |
855 BranchOrBacktrack(Comparison(kNE, | |
856 LoadLocal(start_index_param_), | |
857 Uint64Constant(0)), | |
858 ¬_at_start); | |
859 | |
860 // If we did, are we still at the start of the input, i.e. is | |
861 // (offset == string_length * -1)? | |
862 Definition* neg_len_def = | |
863 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE), | |
864 PushLocal(string_param_length_)); | |
865 Definition* offset_def = LoadLocal(current_position_); | |
866 BranchOrBacktrack(Comparison(kEQ, neg_len_def, offset_def), | |
867 on_at_start); | |
868 | |
869 BindBlock(¬_at_start); | |
870 } | |
871 | |
872 | |
873 void IRRegExpMacroAssembler::CheckNotAtStart(BlockLabel* on_not_at_start) { | |
874 TAG(); | |
875 | |
876 // Did we start the match at the start of the string at all? | |
877 BranchOrBacktrack(Comparison(kNE, | |
878 LoadLocal(start_index_param_), | |
879 Uint64Constant(0)), | |
880 on_not_at_start); | |
881 | |
882 // If we did, are we still at the start of the input, i.e. is | |
883 // (offset == string_length * -1)? | |
884 Definition* neg_len_def = | |
885 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE), | |
886 PushLocal(string_param_length_)); | |
887 Definition* offset_def = LoadLocal(current_position_); | |
888 BranchOrBacktrack(Comparison(kNE, neg_len_def, offset_def), | |
889 on_not_at_start); | |
890 } | |
891 | |
892 | |
893 void IRRegExpMacroAssembler::CheckCharacterLT(uint16_t limit, | |
894 BlockLabel* on_less) { | |
895 TAG(); | |
896 BranchOrBacktrack(Comparison(kLT, | |
897 LoadLocal(current_character_), | |
898 Uint64Constant(limit)), | |
899 on_less); | |
900 } | |
901 | |
902 | |
903 void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) { | |
904 TAG(); | |
905 | |
906 BlockLabel fallthrough; | |
907 | |
908 Definition* head = PeekStack(); | |
909 Definition* cur_pos_def = LoadLocal(current_position_); | |
910 BranchOrBacktrack(Comparison(kNE, head, cur_pos_def), | |
911 &fallthrough); | |
912 | |
913 // Pop, throwing away the value. | |
914 Do(PopStack()); | |
915 | |
916 BranchOrBacktrack(NULL, on_equal); | |
917 | |
918 BindBlock(&fallthrough); | |
919 } | |
920 | |
921 | |
922 void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase( | |
923 intptr_t start_reg, | |
924 BlockLabel* on_no_match) { | |
925 TAG(); | |
926 ASSERT(start_reg + 1 <= registers_count_); | |
927 | |
928 BlockLabel fallthrough; | |
929 | |
930 PushArgumentInstr* end_push = PushArgument(LoadRegister(start_reg + 1)); | |
931 PushArgumentInstr* start_push = PushArgument(LoadRegister(start_reg)); | |
932 StoreLocal(capture_length_, Bind(Sub(end_push, start_push))); | |
933 | |
934 // The length of a capture should not be negative. This can only happen | |
935 // if the end of the capture is unrecorded, or at a point earlier than | |
936 // the start of the capture. | |
937 // BranchOrBacktrack(less, on_no_match); | |
938 | |
939 BranchOrBacktrack(Comparison(kLT, | |
940 LoadLocal(capture_length_), | |
941 Uint64Constant(0)), | |
942 on_no_match); | |
943 | |
944 // If length is zero, either the capture is empty or it is completely | |
945 // uncaptured. In either case succeed immediately. | |
946 BranchOrBacktrack(Comparison(kEQ, | |
947 LoadLocal(capture_length_), | |
948 Uint64Constant(0)), | |
949 &fallthrough); | |
950 | |
951 | |
952 // Check that there are sufficient characters left in the input. | |
953 PushArgumentInstr* pos_push = PushLocal(current_position_); | |
954 PushArgumentInstr* len_push = PushLocal(capture_length_); | |
955 BranchOrBacktrack( | |
956 Comparison(kGT, | |
957 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD), | |
958 pos_push, | |
959 len_push), | |
960 Uint64Constant(0)), | |
961 on_no_match); | |
962 | |
963 pos_push = PushLocal(current_position_); | |
964 len_push = PushLocal(string_param_length_); | |
965 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push))); | |
966 | |
967 pos_push = PushArgument(LoadRegister(start_reg)); | |
968 len_push = PushLocal(string_param_length_); | |
969 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push))); | |
970 | |
971 pos_push = PushLocal(match_start_index_); | |
972 len_push = PushLocal(capture_length_); | |
973 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push))); | |
974 | |
975 BlockLabel success; | |
976 if (mode_ == ASCII) { | |
977 BlockLabel loop_increment; | |
978 BlockLabel loop; | |
979 BindBlock(&loop); | |
980 | |
981 StoreLocal(char_in_capture_, CharacterAt(capture_start_index_)); | |
982 StoreLocal(char_in_match_, CharacterAt(match_start_index_)); | |
983 | |
984 BranchOrBacktrack(Comparison(kEQ, | |
985 LoadLocal(char_in_capture_), | |
986 LoadLocal(char_in_match_)), | |
987 &loop_increment); | |
988 | |
989 // Mismatch, try case-insensitive match (converting letters to lower-case). | |
990 PushArgumentInstr* match_char_push = PushLocal(char_in_match_); | |
991 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(0x20))); | |
992 StoreLocal(char_in_match_, | |
993 Bind(InstanceCall( | |
994 InstanceCallDescriptor::FromToken(Token::kBIT_OR), | |
995 match_char_push, | |
996 mask_push))); | |
997 | |
998 BlockLabel convert_capture; | |
999 BlockLabel on_not_in_range; | |
1000 BranchOrBacktrack(Comparison(kLT, | |
1001 LoadLocal(char_in_match_), | |
1002 Uint64Constant('a')), | |
1003 &on_not_in_range); | |
1004 BranchOrBacktrack(Comparison(kGT, | |
1005 LoadLocal(char_in_match_), | |
1006 Uint64Constant('z')), | |
1007 &on_not_in_range); | |
1008 GoTo(&convert_capture); | |
1009 BindBlock(&on_not_in_range); | |
1010 | |
1011 // Latin-1: Check for values in range [224,254] but not 247. | |
1012 BranchOrBacktrack(Comparison(kLT, | |
1013 LoadLocal(char_in_match_), | |
1014 Uint64Constant(224)), | |
1015 on_no_match); | |
1016 BranchOrBacktrack(Comparison(kGT, | |
1017 LoadLocal(char_in_match_), | |
1018 Uint64Constant(254)), | |
1019 on_no_match); | |
1020 | |
1021 BranchOrBacktrack(Comparison(kEQ, | |
1022 LoadLocal(char_in_match_), | |
1023 Uint64Constant(247)), | |
1024 on_no_match); | |
1025 | |
1026 // Also convert capture character. | |
1027 BindBlock(&convert_capture); | |
1028 | |
1029 PushArgumentInstr* capture_char_push = PushLocal(char_in_capture_); | |
1030 mask_push = PushArgument(Bind(Uint64Constant(0x20))); | |
1031 StoreLocal(char_in_capture_, | |
1032 Bind(InstanceCall( | |
1033 InstanceCallDescriptor::FromToken(Token::kBIT_OR), | |
1034 capture_char_push, | |
1035 mask_push))); | |
1036 | |
1037 BranchOrBacktrack(Comparison(kNE, | |
1038 LoadLocal(char_in_match_), | |
1039 LoadLocal(char_in_capture_)), | |
1040 on_no_match); | |
1041 | |
1042 BindBlock(&loop_increment); | |
1043 | |
1044 // Increment indexes into capture and match strings. | |
1045 PushArgumentInstr* index_push = PushLocal(capture_start_index_); | |
1046 PushArgumentInstr* inc_push = PushArgument(Bind(Uint64Constant(1))); | |
1047 StoreLocal(capture_start_index_, Bind(Add(index_push, inc_push))); | |
1048 | |
1049 index_push = PushLocal(match_start_index_); | |
1050 inc_push = PushArgument(Bind(Uint64Constant(1))); | |
1051 StoreLocal(match_start_index_, Bind(Add(index_push, inc_push))); | |
1052 | |
1053 // Compare to end of match, and loop if not done. | |
1054 BranchOrBacktrack(Comparison(kLT, | |
1055 LoadLocal(match_start_index_), | |
1056 LoadLocal(match_end_index_)), | |
1057 &loop); | |
1058 } else { | |
1059 ASSERT(mode_ == UC16); | |
1060 | |
1061 Value* string_value = Bind(LoadLocal(string_param_)); | |
1062 Value* lhs_index_value = Bind(LoadLocal(match_start_index_)); | |
1063 Value* rhs_index_value = Bind(LoadLocal(capture_start_index_)); | |
1064 Value* length_value = Bind(LoadLocal(capture_length_)); | |
1065 | |
1066 Definition* is_match_def = | |
1067 new(Z) CaseInsensitiveCompareUC16Instr( | |
1068 string_value, | |
1069 lhs_index_value, | |
1070 rhs_index_value, | |
1071 length_value, | |
1072 specialization_cid_); | |
1073 | |
1074 BranchOrBacktrack(Comparison(kNE, is_match_def, BoolConstant(true)), | |
1075 on_no_match); | |
1076 } | |
1077 | |
1078 BindBlock(&success); | |
1079 | |
1080 // Move current character position to position after match. | |
1081 PushArgumentInstr* match_end_push = PushLocal(match_end_index_); | |
1082 len_push = PushLocal(string_param_length_); | |
1083 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push))); | |
1084 | |
1085 BindBlock(&fallthrough); | |
1086 } | |
1087 | |
1088 | |
1089 void IRRegExpMacroAssembler::CheckNotBackReference( | |
1090 intptr_t start_reg, | |
1091 BlockLabel* on_no_match) { | |
1092 TAG(); | |
1093 ASSERT(start_reg + 1 <= registers_count_); | |
1094 | |
1095 BlockLabel fallthrough; | |
1096 BlockLabel success; | |
1097 | |
1098 // Find length of back-referenced capture. | |
1099 PushArgumentInstr* end_push = PushArgument(LoadRegister(start_reg + 1)); | |
1100 PushArgumentInstr* start_push = PushArgument(LoadRegister(start_reg)); | |
1101 StoreLocal(capture_length_, Bind(Sub(end_push, start_push))); | |
1102 | |
1103 // Fail on partial or illegal capture (start of capture after end of capture). | |
1104 BranchOrBacktrack(Comparison(kLT, | |
1105 LoadLocal(capture_length_), | |
1106 Uint64Constant(0)), | |
1107 on_no_match); | |
1108 | |
1109 // Succeed on empty capture (including no capture) | |
1110 BranchOrBacktrack(Comparison(kEQ, | |
1111 LoadLocal(capture_length_), | |
1112 Uint64Constant(0)), | |
1113 &fallthrough); | |
1114 | |
1115 // Check that there are sufficient characters left in the input. | |
1116 PushArgumentInstr* pos_push = PushLocal(current_position_); | |
1117 PushArgumentInstr* len_push = PushLocal(capture_length_); | |
1118 BranchOrBacktrack( | |
1119 Comparison(kGT, | |
1120 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD), | |
1121 pos_push, | |
1122 len_push), | |
1123 Uint64Constant(0)), | |
1124 on_no_match); | |
1125 | |
1126 // Compute pointers to match string and capture string. | |
1127 pos_push = PushLocal(current_position_); | |
1128 len_push = PushLocal(string_param_length_); | |
1129 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push))); | |
1130 | |
1131 pos_push = PushArgument(LoadRegister(start_reg)); | |
1132 len_push = PushLocal(string_param_length_); | |
1133 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push))); | |
1134 | |
1135 pos_push = PushLocal(match_start_index_); | |
1136 len_push = PushLocal(capture_length_); | |
1137 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push))); | |
1138 | |
1139 BlockLabel loop; | |
1140 BindBlock(&loop); | |
1141 | |
1142 StoreLocal(char_in_capture_, CharacterAt(capture_start_index_)); | |
1143 StoreLocal(char_in_match_, CharacterAt(match_start_index_)); | |
1144 | |
1145 BranchOrBacktrack(Comparison(kNE, | |
1146 LoadLocal(char_in_capture_), | |
1147 LoadLocal(char_in_match_)), | |
1148 on_no_match); | |
1149 | |
1150 // Increment indexes into capture and match strings. | |
1151 PushArgumentInstr* index_push = PushLocal(capture_start_index_); | |
1152 PushArgumentInstr* inc_push = PushArgument(Bind(Uint64Constant(1))); | |
1153 StoreLocal(capture_start_index_, Bind(Add(index_push, inc_push))); | |
1154 | |
1155 index_push = PushLocal(match_start_index_); | |
1156 inc_push = PushArgument(Bind(Uint64Constant(1))); | |
1157 StoreLocal(match_start_index_, Bind(Add(index_push, inc_push))); | |
1158 | |
1159 // Check if we have reached end of match area. | |
1160 BranchOrBacktrack(Comparison(kLT, | |
1161 LoadLocal(match_start_index_), | |
1162 LoadLocal(match_end_index_)), | |
1163 &loop); | |
1164 | |
1165 BindBlock(&success); | |
1166 | |
1167 // Move current character position to position after match. | |
1168 PushArgumentInstr* match_end_push = PushLocal(match_end_index_); | |
1169 len_push = PushLocal(string_param_length_); | |
1170 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push))); | |
1171 | |
1172 BindBlock(&fallthrough); | |
1173 } | |
1174 | |
1175 | |
1176 void IRRegExpMacroAssembler::CheckNotCharacter(uint32_t c, | |
1177 BlockLabel* on_not_equal) { | |
1178 TAG(); | |
1179 BranchOrBacktrack(Comparison(kNE, | |
1180 LoadLocal(current_character_), | |
1181 Uint64Constant(c)), | |
1182 on_not_equal); | |
1183 } | |
1184 | |
1185 | |
1186 void IRRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c, | |
1187 uint32_t mask, | |
1188 BlockLabel* on_equal) { | |
1189 TAG(); | |
1190 | |
1191 Definition* actual_def = LoadLocal(current_character_); | |
1192 Definition* expected_def = Uint64Constant(c); | |
1193 | |
1194 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def)); | |
1195 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask))); | |
1196 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND), | |
1197 actual_push, | |
1198 mask_push); | |
1199 | |
1200 BranchOrBacktrack(Comparison(kEQ, actual_def, expected_def), on_equal); | |
1201 } | |
1202 | |
1203 | |
1204 void IRRegExpMacroAssembler::CheckNotCharacterAfterAnd( | |
1205 uint32_t c, | |
1206 uint32_t mask, | |
1207 BlockLabel* on_not_equal) { | |
1208 TAG(); | |
1209 | |
1210 Definition* actual_def = LoadLocal(current_character_); | |
1211 Definition* expected_def = Uint64Constant(c); | |
1212 | |
1213 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def)); | |
1214 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask))); | |
1215 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND), | |
1216 actual_push, | |
1217 mask_push); | |
1218 | |
1219 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal); | |
1220 } | |
1221 | |
1222 | |
1223 void IRRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd( | |
1224 uint16_t c, | |
1225 uint16_t minus, | |
1226 uint16_t mask, | |
1227 BlockLabel* on_not_equal) { | |
1228 TAG(); | |
1229 ASSERT(minus < Utf16::kMaxCodeUnit); // NOLINT | |
1230 | |
1231 Definition* actual_def = LoadLocal(current_character_); | |
1232 Definition* expected_def = Uint64Constant(c); | |
1233 | |
1234 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def)); | |
1235 PushArgumentInstr* minus_push = PushArgument(Bind(Uint64Constant(minus))); | |
1236 | |
1237 actual_push = PushArgument(Bind(Sub(actual_push, minus_push))); | |
1238 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask))); | |
1239 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND), | |
1240 actual_push, | |
1241 mask_push); | |
1242 | |
1243 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal); | |
1244 } | |
1245 | |
1246 | |
1247 void IRRegExpMacroAssembler::CheckCharacterInRange( | |
1248 uint16_t from, | |
1249 uint16_t to, | |
1250 BlockLabel* on_in_range) { | |
1251 TAG(); | |
1252 ASSERT(from <= to); | |
1253 | |
1254 // TODO(zerny): All range comparisons could be done cheaper with unsigned | |
1255 // compares. This pattern repeats in various places. | |
1256 | |
1257 BlockLabel on_not_in_range; | |
1258 BranchOrBacktrack(Comparison(kLT, | |
1259 LoadLocal(current_character_), | |
1260 Uint64Constant(from)), | |
1261 &on_not_in_range); | |
1262 BranchOrBacktrack(Comparison(kGT, | |
1263 LoadLocal(current_character_), | |
1264 Uint64Constant(to)), | |
1265 &on_not_in_range); | |
1266 BranchOrBacktrack(NULL, on_in_range); | |
1267 | |
1268 BindBlock(&on_not_in_range); | |
1269 } | |
1270 | |
1271 | |
1272 void IRRegExpMacroAssembler::CheckCharacterNotInRange( | |
1273 uint16_t from, | |
1274 uint16_t to, | |
1275 BlockLabel* on_not_in_range) { | |
1276 TAG(); | |
1277 ASSERT(from <= to); | |
1278 | |
1279 BranchOrBacktrack(Comparison(kLT, | |
1280 LoadLocal(current_character_), | |
1281 Uint64Constant(from)), | |
1282 on_not_in_range); | |
1283 | |
1284 BranchOrBacktrack(Comparison(kGT, | |
1285 LoadLocal(current_character_), | |
1286 Uint64Constant(to)), | |
1287 on_not_in_range); | |
1288 } | |
1289 | |
1290 | |
1291 void IRRegExpMacroAssembler::CheckBitInTable( | |
1292 const TypedData& table, | |
1293 BlockLabel* on_bit_set) { | |
1294 TAG(); | |
1295 | |
1296 PushArgumentInstr* table_push = | |
1297 PushArgument(Bind(new(Z) ConstantInstr(table))); | |
1298 PushArgumentInstr* index_push = PushLocal(current_character_); | |
1299 | |
1300 if (mode_ != ASCII || kTableMask != Symbols::kMaxOneCharCodeSymbol) { | |
1301 PushArgumentInstr* mask_push = | |
1302 PushArgument(Bind(Uint64Constant(kTableSize - 1))); | |
1303 index_push = PushArgument( | |
1304 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND), | |
1305 index_push, | |
1306 mask_push))); | |
1307 } | |
1308 | |
1309 Definition* byte_def = | |
1310 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX), | |
1311 table_push, | |
1312 index_push); | |
1313 Definition* zero_def = Int64Constant(0); | |
1314 | |
1315 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_bit_set); | |
1316 } | |
1317 | |
1318 | |
1319 bool IRRegExpMacroAssembler::CheckSpecialCharacterClass( | |
1320 uint16_t type, | |
1321 BlockLabel* on_no_match) { | |
1322 TAG(); | |
1323 | |
1324 // Range checks (c in min..max) are generally implemented by an unsigned | |
1325 // (c - min) <= (max - min) check | |
1326 switch (type) { | |
1327 case 's': | |
1328 // Match space-characters | |
1329 if (mode_ == ASCII) { | |
1330 // One byte space characters are '\t'..'\r', ' ' and \u00a0. | |
1331 BlockLabel success; | |
1332 // Space (' '). | |
1333 BranchOrBacktrack(Comparison(kEQ, | |
1334 LoadLocal(current_character_), | |
1335 Uint64Constant(' ')), | |
1336 &success); | |
1337 // Check range 0x09..0x0d. | |
1338 CheckCharacterInRange('\t', '\r', &success); | |
1339 // \u00a0 (NBSP). | |
1340 BranchOrBacktrack(Comparison(kNE, | |
1341 LoadLocal(current_character_), | |
1342 Uint64Constant(0x00a0)), | |
1343 on_no_match); | |
1344 BindBlock(&success); | |
1345 return true; | |
1346 } | |
1347 return false; | |
1348 case 'S': | |
1349 // The emitted code for generic character classes is good enough. | |
1350 return false; | |
1351 case 'd': | |
1352 // Match ASCII digits ('0'..'9') | |
1353 CheckCharacterNotInRange('0', '9', on_no_match); | |
1354 return true; | |
1355 case 'D': | |
1356 // Match non ASCII-digits | |
1357 CheckCharacterInRange('0', '9', on_no_match); | |
1358 return true; | |
1359 case '.': { | |
1360 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) | |
1361 BranchOrBacktrack(Comparison(kEQ, | |
1362 LoadLocal(current_character_), | |
1363 Uint64Constant('\n')), | |
1364 on_no_match); | |
1365 BranchOrBacktrack(Comparison(kEQ, | |
1366 LoadLocal(current_character_), | |
1367 Uint64Constant('\r')), | |
1368 on_no_match); | |
1369 if (mode_ == UC16) { | |
1370 BranchOrBacktrack(Comparison(kEQ, | |
1371 LoadLocal(current_character_), | |
1372 Uint64Constant(0x2028)), | |
1373 on_no_match); | |
1374 BranchOrBacktrack(Comparison(kEQ, | |
1375 LoadLocal(current_character_), | |
1376 Uint64Constant(0x2029)), | |
1377 on_no_match); | |
1378 } | |
1379 return true; | |
1380 } | |
1381 case 'w': { | |
1382 if (mode_ != ASCII) { | |
1383 // Table is 128 entries, so all ASCII characters can be tested. | |
1384 BranchOrBacktrack(Comparison(kGT, | |
1385 LoadLocal(current_character_), | |
1386 Uint64Constant('z')), | |
1387 on_no_match); | |
1388 } | |
1389 | |
1390 PushArgumentInstr* table_push = | |
1391 PushArgument(Bind(WordCharacterMapConstant())); | |
1392 PushArgumentInstr* index_push = PushLocal(current_character_); | |
1393 | |
1394 Definition* byte_def = | |
1395 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX), | |
1396 table_push, | |
1397 index_push); | |
1398 Definition* zero_def = Int64Constant(0); | |
1399 | |
1400 BranchOrBacktrack(Comparison(kEQ, byte_def, zero_def), on_no_match); | |
1401 | |
1402 return true; | |
1403 } | |
1404 case 'W': { | |
1405 BlockLabel done; | |
1406 if (mode_ != ASCII) { | |
1407 // Table is 128 entries, so all ASCII characters can be tested. | |
1408 BranchOrBacktrack(Comparison(kGT, | |
1409 LoadLocal(current_character_), | |
1410 Uint64Constant('z')), | |
1411 &done); | |
1412 } | |
1413 | |
1414 // TODO(zerny): Refactor to use CheckBitInTable if possible. | |
1415 | |
1416 PushArgumentInstr* table_push = | |
1417 PushArgument(Bind(WordCharacterMapConstant())); | |
1418 PushArgumentInstr* index_push = PushLocal(current_character_); | |
1419 | |
1420 Definition* byte_def = | |
1421 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX), | |
1422 table_push, | |
1423 index_push); | |
1424 Definition* zero_def = Int64Constant(0); | |
1425 | |
1426 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_no_match); | |
1427 | |
1428 if (mode_ != ASCII) { | |
1429 BindBlock(&done); | |
1430 } | |
1431 return true; | |
1432 } | |
1433 // Non-standard classes (with no syntactic shorthand) used internally. | |
1434 case '*': | |
1435 // Match any character. | |
1436 return true; | |
1437 case 'n': { | |
1438 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029). | |
1439 // The opposite of '.'. | |
1440 BlockLabel success; | |
1441 BranchOrBacktrack(Comparison(kEQ, | |
1442 LoadLocal(current_character_), | |
1443 Uint64Constant('\n')), | |
1444 &success); | |
1445 BranchOrBacktrack(Comparison(kEQ, | |
1446 LoadLocal(current_character_), | |
1447 Uint64Constant('\r')), | |
1448 &success); | |
1449 if (mode_ == UC16) { | |
1450 BranchOrBacktrack(Comparison(kEQ, | |
1451 LoadLocal(current_character_), | |
1452 Uint64Constant(0x2028)), | |
1453 &success); | |
1454 BranchOrBacktrack(Comparison(kEQ, | |
1455 LoadLocal(current_character_), | |
1456 Uint64Constant(0x2029)), | |
1457 &success); | |
1458 } | |
1459 BranchOrBacktrack(NULL, on_no_match); | |
1460 BindBlock(&success); | |
1461 return true; | |
1462 } | |
1463 // No custom implementation (yet): s(uint16_t), S(uint16_t). | |
1464 default: | |
1465 return false; | |
1466 } | |
1467 } | |
1468 | |
1469 | |
1470 void IRRegExpMacroAssembler::Fail() { | |
1471 TAG(); | |
1472 ASSERT(FAILURE == 0); // Return value for failure is zero. | |
1473 if (!global()) { | |
1474 UNREACHABLE(); // Dart regexps are always global. | |
1475 } | |
1476 GoTo(exit_block_); | |
1477 } | |
1478 | |
1479 | |
1480 void IRRegExpMacroAssembler::IfRegisterGE(intptr_t reg, | |
1481 intptr_t comparand, | |
1482 BlockLabel* if_ge) { | |
1483 TAG(); | |
1484 PushArgumentInstr* reg_push = PushArgument(LoadRegister(reg)); | |
1485 PushArgumentInstr* pos = PushArgument(Bind(Int64Constant(comparand))); | |
1486 BranchOrBacktrack(Comparison(kGTE, reg_push, pos), if_ge); | |
1487 } | |
1488 | |
1489 | |
1490 void IRRegExpMacroAssembler::IfRegisterLT(intptr_t reg, | |
1491 intptr_t comparand, | |
1492 BlockLabel* if_lt) { | |
1493 TAG(); | |
1494 PushArgumentInstr* reg_push = PushArgument(LoadRegister(reg)); | |
1495 PushArgumentInstr* pos = PushArgument(Bind(Int64Constant(comparand))); | |
1496 BranchOrBacktrack(Comparison(kLT, reg_push, pos), if_lt); | |
1497 } | |
1498 | |
1499 | |
1500 void IRRegExpMacroAssembler::IfRegisterEqPos(intptr_t reg, | |
1501 BlockLabel* if_eq) { | |
1502 TAG(); | |
1503 PushArgumentInstr* reg_push = PushArgument(LoadRegister(reg)); | |
1504 PushArgumentInstr* pos = PushArgument(Bind(LoadLocal(current_position_))); | |
1505 BranchOrBacktrack(Comparison(kEQ, reg_push, pos), if_eq); | |
1506 } | |
1507 | |
1508 | |
1509 RegExpMacroAssembler::IrregexpImplementation | |
1510 IRRegExpMacroAssembler::Implementation() { | |
1511 return kIRImplementation; | |
1512 } | |
1513 | |
1514 | |
1515 void IRRegExpMacroAssembler::LoadCurrentCharacter(intptr_t cp_offset, | |
1516 BlockLabel* on_end_of_input, | |
1517 bool check_bounds, | |
1518 intptr_t characters) { | |
1519 TAG(); | |
1520 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character. | |
1521 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works) | |
1522 if (check_bounds) { | |
1523 CheckPosition(cp_offset + characters - 1, on_end_of_input); | |
1524 } | |
1525 LoadCurrentCharacterUnchecked(cp_offset, characters); | |
1526 } | |
1527 | |
1528 | |
1529 void IRRegExpMacroAssembler::PopCurrentPosition() { | |
1530 TAG(); | |
1531 StoreLocal(current_position_, Bind(PopStack())); | |
1532 } | |
1533 | |
1534 | |
1535 void IRRegExpMacroAssembler::PopRegister(intptr_t reg) { | |
1536 TAG(); | |
1537 ASSERT(reg < registers_count_); | |
1538 PushArgumentInstr* registers_push = PushLocal(registers_); | |
1539 PushArgumentInstr* index_push = PushRegisterIndex(reg); | |
1540 PushArgumentInstr* pop_push = PushArgument(Bind(PopStack())); | |
1541 StoreRegister(registers_push, index_push, pop_push); | |
1542 } | |
1543 | |
1544 | |
1545 void IRRegExpMacroAssembler::PushStack(Definition *definition) { | |
1546 PushArgumentInstr* stack_push = PushLocal(stack_); | |
1547 PushArgumentInstr* stack_pointer_push = PushLocal(stack_pointer_); | |
1548 StoreLocal(stack_pointer_, | |
1549 Bind(Add(stack_pointer_push, | |
1550 PushArgument(Bind(Uint64Constant(1)))))); | |
1551 stack_pointer_push = PushLocal(stack_pointer_); | |
1552 // TODO(zerny): bind value and push could break stack discipline. | |
1553 PushArgumentInstr* value_push = PushArgument(Bind(definition)); | |
1554 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX), | |
1555 stack_push, | |
1556 stack_pointer_push, | |
1557 value_push)); | |
1558 } | |
1559 | |
1560 | |
1561 Definition* IRRegExpMacroAssembler::PopStack() { | |
1562 PushArgumentInstr* stack_push = PushLocal(stack_); | |
1563 PushArgumentInstr* stack_pointer_push1 = PushLocal(stack_pointer_); | |
1564 PushArgumentInstr* stack_pointer_push2 = PushLocal(stack_pointer_); | |
1565 StoreLocal(stack_pointer_, | |
1566 Bind(Sub(stack_pointer_push2, | |
1567 PushArgument(Bind(Uint64Constant(1)))))); | |
1568 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX), | |
1569 stack_push, | |
1570 stack_pointer_push1); | |
1571 } | |
1572 | |
1573 | |
1574 Definition* IRRegExpMacroAssembler::PeekStack() { | |
1575 PushArgumentInstr* stack_push = PushLocal(stack_); | |
1576 PushArgumentInstr* stack_pointer_push = PushLocal(stack_pointer_); | |
1577 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX), | |
1578 stack_push, | |
1579 stack_pointer_push); | |
1580 } | |
1581 | |
1582 | |
1583 // Pushes the location corresponding to label to the backtracking stack. | |
1584 void IRRegExpMacroAssembler::PushBacktrack(BlockLabel* label) { | |
1585 TAG(); | |
1586 | |
1587 // Ensure that targets of indirect jumps are never accessed through a | |
1588 // normal control flow instructions by creating a new block for each backtrack | |
1589 // target. | |
1590 IndirectEntryInstr* indirect_target = IndirectWithJoinGoto(label->block()); | |
1591 | |
1592 // Add a fake edge from the graph entry for data flow analysis. | |
1593 entry_block_->AddIndirectEntry(indirect_target); | |
1594 | |
1595 ConstantInstr* offset = Uint64Constant(indirect_target->indirect_id()); | |
1596 PushStack(offset); | |
1597 CheckStackLimit(); | |
1598 } | |
1599 | |
1600 | |
1601 void IRRegExpMacroAssembler::PushCurrentPosition() { | |
1602 TAG(); | |
1603 PushStack(LoadLocal(current_position_)); | |
1604 } | |
1605 | |
1606 | |
1607 void IRRegExpMacroAssembler::PushRegister(intptr_t reg) { | |
1608 TAG(); | |
1609 // TODO(zerny): Refactor PushStack so it can be reused here. | |
1610 PushArgumentInstr* stack_push = PushLocal(stack_); | |
1611 PushArgumentInstr* stack_pointer_push = PushLocal(stack_pointer_); | |
1612 StoreLocal(stack_pointer_, | |
1613 Bind(Add(stack_pointer_push, | |
1614 PushArgument(Bind(Uint64Constant(1)))))); | |
1615 stack_pointer_push = PushLocal(stack_pointer_); | |
1616 // TODO(zerny): bind value and push could break stack discipline. | |
1617 PushArgumentInstr* value_push = PushArgument(LoadRegister(reg)); | |
1618 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX), | |
1619 stack_push, | |
1620 stack_pointer_push, | |
1621 value_push)); | |
1622 CheckStackLimit(); | |
1623 } | |
1624 | |
1625 | |
1626 // Checks that (stack.capacity - stack_limit_slack) > stack_pointer. | |
1627 // This ensures that up to stack_limit_slack stack pushes can be | |
1628 // done without exhausting the stack space. If the check fails the | |
1629 // stack will be grown. | |
1630 void IRRegExpMacroAssembler::CheckStackLimit() { | |
1631 TAG(); | |
1632 PushArgumentInstr* stack_push = PushLocal(stack_); | |
1633 PushArgumentInstr* length_push = PushArgument(Bind(InstanceCall( | |
1634 InstanceCallDescriptor( | |
1635 String::ZoneHandle(Field::GetterSymbol(Symbols::Length()))), | |
1636 stack_push))); | |
1637 PushArgumentInstr* capacity_push = PushArgument(Bind(Sub( | |
1638 length_push, | |
1639 PushArgument(Bind(Uint64Constant(stack_limit_slack())))))); | |
1640 PushArgumentInstr* stack_pointer_push = PushLocal(stack_pointer_); | |
1641 BranchInstr* branch = new(Z) BranchInstr( | |
1642 Comparison(kGT, capacity_push, stack_pointer_push)); | |
1643 CloseBlockWith(branch); | |
1644 | |
1645 BlockLabel grow_stack; | |
1646 BlockLabel fallthrough; | |
1647 *branch->true_successor_address() = | |
1648 TargetWithJoinGoto(fallthrough.block()); | |
1649 *branch->false_successor_address() = | |
1650 TargetWithJoinGoto(grow_stack.block()); | |
1651 | |
1652 BindBlock(&grow_stack); | |
1653 GrowStack(); | |
1654 | |
1655 BindBlock(&fallthrough); | |
1656 } | |
1657 | |
1658 | |
1659 void IRRegExpMacroAssembler::GrowStack() { | |
1660 TAG(); | |
1661 Value* cell = Bind(new(Z) ConstantInstr(stack_array_cell_)); | |
1662 StoreLocal(stack_, Bind(new(Z) GrowRegExpStackInstr(cell))); | |
1663 } | |
1664 | |
1665 | |
1666 void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) { | |
1667 TAG(); | |
1668 StoreLocal(current_position_, LoadRegister(reg)); | |
1669 } | |
1670 | |
1671 // Resets the tip of the stack to the value stored in reg. | |
1672 void IRRegExpMacroAssembler::ReadStackPointerFromRegister(intptr_t reg) { | |
1673 TAG(); | |
1674 ASSERT(reg < registers_count_); | |
1675 StoreLocal(stack_pointer_, LoadRegister(reg)); | |
1676 } | |
1677 | |
1678 void IRRegExpMacroAssembler::SetCurrentPositionFromEnd(intptr_t by) { | |
1679 TAG(); | |
1680 | |
1681 BlockLabel after_position; | |
1682 | |
1683 Definition* cur_pos_def = LoadLocal(current_position_); | |
1684 Definition* by_value_def = Int64Constant(-by); | |
1685 | |
1686 BranchOrBacktrack(Comparison(kGTE, cur_pos_def, by_value_def), | |
1687 &after_position); | |
1688 | |
1689 StoreLocal(current_position_, Bind(Int64Constant(-by))); | |
1690 | |
1691 // On RegExp code entry (where this operation is used), the character before | |
1692 // the current position is expected to be already loaded. | |
1693 // We have advanced the position, so it's safe to read backwards. | |
1694 LoadCurrentCharacterUnchecked(-1, 1); | |
1695 | |
1696 BindBlock(&after_position); | |
1697 } | |
1698 | |
1699 | |
1700 void IRRegExpMacroAssembler::SetRegister(intptr_t reg, intptr_t to) { | |
1701 TAG(); | |
1702 // Reserved for positions! | |
1703 ASSERT(reg >= saved_registers_count_); | |
1704 StoreRegister(reg, to); | |
1705 } | |
1706 | |
1707 | |
1708 bool IRRegExpMacroAssembler::Succeed() { | |
1709 TAG(); | |
1710 GoTo(success_block_); | |
1711 return global(); | |
1712 } | |
1713 | |
1714 | |
1715 void IRRegExpMacroAssembler::WriteCurrentPositionToRegister( | |
1716 intptr_t reg, intptr_t cp_offset) { | |
1717 TAG(); | |
1718 | |
1719 PushArgumentInstr* registers_push = PushLocal(registers_); | |
1720 PushArgumentInstr* index_push = PushRegisterIndex(reg); | |
1721 PushArgumentInstr* pos_push = PushLocal(current_position_); | |
1722 PushArgumentInstr* off_push = PushArgument(Bind(Int64Constant(cp_offset))); | |
1723 PushArgumentInstr* neg_off_push = PushArgument(Bind(Add(pos_push, off_push))); | |
1724 // Push the negative offset; these are converted to positive string positions | |
1725 // within the success block. | |
1726 StoreRegister(registers_push, index_push, neg_off_push); | |
1727 } | |
1728 | |
1729 | |
1730 void IRRegExpMacroAssembler::ClearRegisters( | |
1731 intptr_t reg_from, intptr_t reg_to) { | |
1732 TAG(); | |
1733 | |
1734 ASSERT(reg_from <= reg_to); | |
1735 | |
1736 // In order to clear registers to a final result value of -1, set them to | |
1737 // (-1 - string length), the offset of -1 from the end of the string. | |
1738 | |
1739 for (intptr_t reg = reg_from; reg <= reg_to; reg++) { | |
1740 PushArgumentInstr* registers_push = PushLocal(registers_); | |
1741 PushArgumentInstr* index_push = PushRegisterIndex(reg); | |
1742 PushArgumentInstr* minus_one_push = | |
1743 PushArgument(Bind(Int64Constant(-1))); | |
1744 PushArgumentInstr* length_push = PushLocal(string_param_length_); | |
1745 PushArgumentInstr* value_push = | |
1746 PushArgument(Bind(Sub(minus_one_push, length_push))); | |
1747 StoreRegister(registers_push, index_push, value_push); | |
1748 } | |
1749 } | |
1750 | |
1751 | |
1752 void IRRegExpMacroAssembler::WriteStackPointerToRegister(intptr_t reg) { | |
1753 TAG(); | |
1754 | |
1755 PushArgumentInstr* registers_push = PushLocal(registers_); | |
1756 PushArgumentInstr* index_push = PushRegisterIndex(reg); | |
1757 PushArgumentInstr* tip_push = PushLocal(stack_pointer_); | |
1758 StoreRegister(registers_push, index_push, tip_push); | |
1759 } | |
1760 | |
1761 | |
1762 // Private methods: | |
1763 | |
1764 | |
1765 void IRRegExpMacroAssembler::CheckPosition(intptr_t cp_offset, | |
1766 BlockLabel* on_outside_input) { | |
1767 TAG(); | |
1768 Definition* curpos_def = LoadLocal(current_position_); | |
1769 Definition* cp_off_def = Int64Constant(-cp_offset); | |
1770 | |
1771 // If (current_position_ < -cp_offset), we are in bounds. | |
1772 // Remember, current_position_ is a negative offset from the string end. | |
1773 | |
1774 BranchOrBacktrack(Comparison(kGTE, curpos_def, cp_off_def), | |
1775 on_outside_input); | |
1776 } | |
1777 | |
1778 | |
1779 void IRRegExpMacroAssembler::BranchOrBacktrack( | |
1780 ComparisonInstr* comparison, | |
1781 BlockLabel* true_successor) { | |
1782 if (comparison == NULL) { // No condition | |
1783 if (true_successor == NULL) { | |
1784 Backtrack(); | |
1785 return; | |
1786 } | |
1787 GoTo(true_successor); | |
1788 return; | |
1789 } | |
1790 | |
1791 // If no successor block has been passed in, backtrack. | |
1792 JoinEntryInstr* true_successor_block = backtrack_block_; | |
1793 if (true_successor != NULL) { | |
1794 true_successor->SetLinked(); | |
1795 true_successor_block = true_successor->block(); | |
1796 } | |
1797 ASSERT(true_successor_block != NULL); | |
1798 | |
1799 // If the condition is not true, fall through to a new block. | |
1800 BlockLabel fallthrough; | |
1801 | |
1802 BranchInstr* branch = new(Z) BranchInstr(comparison); | |
1803 *branch->true_successor_address() = | |
1804 TargetWithJoinGoto(true_successor_block); | |
1805 *branch->false_successor_address() = | |
1806 TargetWithJoinGoto(fallthrough.block()); | |
1807 | |
1808 CloseBlockWith(branch); | |
1809 BindBlock(&fallthrough); | |
1810 } | |
1811 | |
1812 | |
1813 TargetEntryInstr* IRRegExpMacroAssembler::TargetWithJoinGoto( | |
1814 JoinEntryInstr* dst) { | |
1815 TargetEntryInstr* target = new(Z) TargetEntryInstr( | |
1816 block_id_.Alloc(), kInvalidTryIndex); | |
1817 blocks_.Add(target); | |
1818 | |
1819 target->AppendInstruction(new(Z) GotoInstr(dst)); | |
1820 | |
1821 return target; | |
1822 } | |
1823 | |
1824 | |
1825 IndirectEntryInstr* IRRegExpMacroAssembler::IndirectWithJoinGoto( | |
1826 JoinEntryInstr* dst) { | |
1827 IndirectEntryInstr* target = new(Z) IndirectEntryInstr( | |
1828 block_id_.Alloc(), indirect_id_.Alloc(), kInvalidTryIndex); | |
1829 blocks_.Add(target); | |
1830 | |
1831 target->AppendInstruction(new(Z) GotoInstr(dst)); | |
1832 | |
1833 return target; | |
1834 } | |
1835 | |
1836 | |
1837 void IRRegExpMacroAssembler::CheckPreemption() { | |
1838 TAG(); | |
1839 AppendInstruction(new(Z) CheckStackOverflowInstr(kNoSourcePos, 0)); | |
1840 } | |
1841 | |
1842 | |
1843 Definition* IRRegExpMacroAssembler::Add( | |
1844 PushArgumentInstr* lhs, | |
1845 PushArgumentInstr* rhs) { | |
1846 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD), lhs, rhs); | |
1847 } | |
1848 | |
1849 | |
1850 Definition* IRRegExpMacroAssembler::Sub( | |
1851 PushArgumentInstr* lhs, | |
1852 PushArgumentInstr* rhs) { | |
1853 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kSUB), lhs, rhs); | |
1854 } | |
1855 | |
1856 | |
1857 void IRRegExpMacroAssembler::LoadCurrentCharacterUnchecked( | |
1858 intptr_t cp_offset, intptr_t characters) { | |
1859 TAG(); | |
1860 | |
1861 ASSERT(characters == 1 || CanReadUnaligned()); | |
1862 if (mode_ == ASCII) { | |
1863 ASSERT(characters == 1 || characters == 2 || characters == 4); | |
1864 } else { | |
1865 ASSERT(mode_ == UC16); | |
1866 ASSERT(characters == 1 || characters == 2); | |
1867 } | |
1868 | |
1869 // Calculate the addressed string index as: | |
1870 // cp_offset + current_position_ + string_param_length_ | |
1871 // TODO(zerny): Avoid generating 'add' instance-calls here. | |
1872 PushArgumentInstr* off_arg = | |
1873 PushArgument(Bind(Int64Constant(cp_offset))); | |
1874 PushArgumentInstr* pos_arg = | |
1875 PushArgument(BindLoadLocal(*current_position_)); | |
1876 PushArgumentInstr* off_pos_arg = | |
1877 PushArgument(Bind(Add(off_arg, pos_arg))); | |
1878 PushArgumentInstr* len_arg = | |
1879 PushArgument(BindLoadLocal(*string_param_length_)); | |
1880 // Index is stored in a temporary local so that we can later load it safely. | |
1881 StoreLocal(index_temp_, Bind(Add(off_pos_arg, len_arg))); | |
1882 | |
1883 // Load and store the code units. | |
1884 Value* code_unit_value = LoadCodeUnitsAt(index_temp_, characters); | |
1885 StoreLocal(current_character_, code_unit_value); | |
1886 PRINT(PushLocal(current_character_)); | |
1887 } | |
1888 | |
1889 | |
1890 Value* IRRegExpMacroAssembler::CharacterAt(LocalVariable* index) { | |
1891 return LoadCodeUnitsAt(index, 1); | |
1892 } | |
1893 | |
1894 | |
1895 Value* IRRegExpMacroAssembler::LoadCodeUnitsAt(LocalVariable* index, | |
1896 intptr_t characters) { | |
1897 // Bind the pattern as the load receiver. | |
1898 Value* pattern_val = BindLoadLocal(*string_param_); | |
1899 if (RawObject::IsExternalStringClassId(specialization_cid_)) { | |
1900 // The data of an external string is stored through two indirections. | |
1901 intptr_t external_offset = 0; | |
1902 intptr_t data_offset = 0; | |
1903 if (specialization_cid_ == kExternalOneByteStringCid) { | |
1904 external_offset = ExternalOneByteString::external_data_offset(); | |
1905 data_offset = RawExternalOneByteString::ExternalData::data_offset(); | |
1906 } else if (specialization_cid_ == kExternalTwoByteStringCid) { | |
1907 external_offset = ExternalTwoByteString::external_data_offset(); | |
1908 data_offset = RawExternalTwoByteString::ExternalData::data_offset(); | |
1909 } else { | |
1910 UNREACHABLE(); | |
1911 } | |
1912 // This pushes untagged values on the stack which are immediately consumed: | |
1913 // the first value is consumed to obtain the second value which is consumed | |
1914 // by LoadCodeUnitsAtInstr below. | |
1915 Value* external_val = | |
1916 Bind(new(Z) LoadUntaggedInstr(pattern_val, external_offset)); | |
1917 pattern_val = | |
1918 Bind(new(Z) LoadUntaggedInstr(external_val, data_offset)); | |
1919 } | |
1920 | |
1921 // Here pattern_val might be untagged so this must not trigger a GC. | |
1922 Value* index_val = BindLoadLocal(*index); | |
1923 | |
1924 return Bind(new(Z) LoadCodeUnitsInstr( | |
1925 pattern_val, | |
1926 index_val, | |
1927 characters, | |
1928 specialization_cid_, | |
1929 Scanner::kNoSourcePos)); | |
1930 } | |
1931 | |
1932 | |
1933 #undef __ | |
1934 | |
1935 } // namespace dart | 21 } // namespace dart |
OLD | NEW |