Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(308)

Side by Side Diff: runtime/vm/regexp_assembler.cc

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Explicitly null IC-Data, whitespace fixes in tests. Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #include "vm/regexp_assembler.h"
6
7 #include "vm/bit_vector.h"
8 #include "vm/compiler.h"
9 #include "vm/dart_entry.h"
10 #include "vm/flow_graph_builder.h"
11 #include "vm/il_printer.h"
12 #include "vm/object_store.h"
13 #include "vm/regexp.h"
14 #include "vm/resolver.h"
15 #include "vm/stack_frame.h"
16 #include "vm/unibrow-inl.h"
17 #include "vm/unicode.h"
18
19 #define I isolate()
20
21 // Debugging output macros. TAG() is called at the head of each interesting
22 // function and prints its name during execution if irregexp tracing is enabled.
23 #define TAG() if (FLAG_trace_irregexp) { TAG_(); }
24 #define TAG_() \
25 Print(PushArgument( \
26 Bind(new(I) ConstantInstr(String::ZoneHandle(I, String::Concat( \
27 String::Handle(String::New("TAG: ")), \
28 String::Handle(String::New(__FUNCTION__)), Heap::kOld))))));
29
30 #define PRINT(arg) if (FLAG_trace_irregexp) { Print(arg); }
31
32 namespace dart {
33
34 DEFINE_FLAG(bool, trace_irregexp, false, "Trace irregexps");
35
36 static const intptr_t kInvalidTryIndex = -1;
37 static const intptr_t kNoTokenPos = -1;
38
39 /*
40 * This assembler uses the following main local variables:
41 * - stack_: A pointer to a growable list which we use as an all-purpose stack
42 * storing backtracking offsets, positions & stored register values.
43 * - current_character_: Stores the currently loaded characters (possibly more
44 * than one).
45 * - current_position_: The current position within the string, stored as a
46 * negative offset from the end of the string (i.e. the
47 * position corresponding to str[0] is -str.length).
48 * Note that current_position_ is *not* byte-based, unlike
49 * original V8 code.
50 *
51 * Results are returned though an array of capture indices, stored at
52 * matches_param_. A null array specifies a failure to match. The match indices
53 * [start_inclusive, end_exclusive] for capture group i are stored at positions
54 * matches_param_[i * 2] and matches_param_[i * 2 + 1], respectively. Match
55 * indices of -1 denote non-matched groups. Note that we store these indices
56 * as a negative offset from the end of the string in position_registers_
57 * during processing, and convert them to standard indexes when copying them
58 * to matches_param_ on successful match.
59 */
60
61 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate)
62 : slow_safe_compiler_(false),
63 global_mode_(NOT_GLOBAL),
64 isolate_(isolate) {
65 }
66
67
68 RegExpMacroAssembler::~RegExpMacroAssembler() {
69 }
70
71
72 IRRegExpMacroAssembler::IRRegExpMacroAssembler(
73 intptr_t specialization_cid,
74 intptr_t capture_count,
75 const ParsedFunction* parsed_function,
76 ZoneGrowableArray<const ICData*>* ic_data_array,
77 Isolate* isolate)
78 : RegExpMacroAssembler(isolate),
79 specialization_cid_(specialization_cid),
80 parsed_function_(parsed_function),
81 ic_data_array_(ic_data_array),
82 current_instruction_(NULL),
83 stack_(NULL),
84 current_character_(NULL),
85 current_position_(NULL),
86 string_param_(NULL),
87 string_param_length_(NULL),
88 start_index_param_(NULL),
89 position_registers_count_((capture_count + 1) * 2),
90 stack_array_(GrowableObjectArray::ZoneHandle(
91 isolate, GrowableObjectArray::New(16, Heap::kOld))) {
92 switch (specialization_cid) {
93 case kOneByteStringCid:
94 case kExternalOneByteStringCid: mode_ = ASCII; break;
95 case kTwoByteStringCid:
96 case kExternalTwoByteStringCid: mode_ = UC16; break;
97 default: UNREACHABLE();
98 }
99
100 InitializeLocals();
101
102 // Create and generate all preset blocks.
103 entry_block_ =
104 new(isolate) GraphEntryInstr(
105 parsed_function_,
106 new(isolate) TargetEntryInstr(block_id.Alloc(), kInvalidTryIndex),
107 Isolate::kNoDeoptId);
108 start_block_ =
109 new(isolate) JoinEntryInstr(block_id.Alloc(), kInvalidTryIndex);
110 success_block_ =
111 new(isolate) JoinEntryInstr(block_id.Alloc(), kInvalidTryIndex);
112 backtrack_block_ =
113 new(isolate) JoinEntryInstr(block_id.Alloc(), kInvalidTryIndex);
114 exit_block_ =
115 new(isolate) JoinEntryInstr(block_id.Alloc(), kInvalidTryIndex);
116
117 GenerateEntryBlock();
118 GenerateSuccessBlock();
119 GenerateBacktrackBlock();
120 GenerateExitBlock();
121
122 blocks_.Add(entry_block_);
123 blocks_.Add(entry_block_->normal_entry());
124 blocks_.Add(start_block_);
125 blocks_.Add(success_block_);
126 blocks_.Add(backtrack_block_);
127 blocks_.Add(exit_block_);
128
129 // Begin emission at the start_block_.
130 set_current_instruction(start_block_);
131 }
132
133
134 IRRegExpMacroAssembler::~IRRegExpMacroAssembler() { }
135
136
137 void IRRegExpMacroAssembler::InitializeLocals() {
138 // Create local variables and parameters.
139 stack_ = Local(Symbols::stack_());
140 current_character_ = Local(Symbols::current_character_());
141 current_position_ = Local(Symbols::current_position_());
142 string_param_length_ = Local(Symbols::string_param_length_());
143 capture_length_ = Local(Symbols::capture_length_());
144 match_start_index_ = Local(Symbols::match_start_index_());
145 capture_start_index_ = Local(Symbols::capture_start_index_());
146 match_end_index_ = Local(Symbols::match_end_index_());
147 char_in_capture_ = Local(Symbols::char_in_capture_());
148 char_in_match_ = Local(Symbols::char_in_match_());
149 result_ = Local(Symbols::result_());
150
151 string_param_ = Parameter(Symbols::string_param_(), 0);
152 start_index_param_ = Parameter(Symbols::start_index_param_(), 1);
153
154 // Reserve space for all captured group positions. Note that more might
155 // be created on the fly for internal use.
156 for (intptr_t i = 0; i < position_registers_count_; i++) {
157 position_register(i);
158 }
159 }
160
161
162 void IRRegExpMacroAssembler::GenerateEntryBlock() {
163 set_current_instruction(entry_block_->normal_entry());
164 TAG();
165
166 // Generate a local list variable which we will use as a backtracking stack.
167
168 StoreLocal(stack_, Bind(new(I) ConstantInstr(stack_array_)));
169 Do(InstanceCall(InstanceCallDescriptor(Symbols::clear()), PushLocal(stack_)));
170
171 // Store string.length.
172 PushArgumentInstr* string_push = PushLocal(string_param_);
173
174 StoreLocal(string_param_length_,
175 Bind(InstanceCall(InstanceCallDescriptor(
176 String::ZoneHandle(
177 Field::GetterSymbol(Symbols::Length()))),
178 string_push)));
179
180 // Initialize all capture registers.
181 ClearRegisters(0, position_registers_count_ - 1);
182
183 // Store (start_index - string.length) as the current position (since it's a
184 // negative offset from the end of the string).
185 PushArgumentInstr* start_index_push = PushLocal(start_index_param_);
186 PushArgumentInstr* length_push = PushLocal(string_param_length_);
187
188 StoreLocal(current_position_, Sub(start_index_push, length_push));
189
190 // Jump to the start block.
191 current_instruction_->Goto(start_block_);
192 }
193
194
195 void IRRegExpMacroAssembler::GenerateBacktrackBlock() {
196 set_current_instruction(backtrack_block_);
197 TAG();
198 Backtrack();
199 }
200
201
202 void IRRegExpMacroAssembler::GenerateSuccessBlock() {
203 set_current_instruction(success_block_);
204 TAG();
205
206 Definition* type_args_null_def = new(I) ConstantInstr(
207 TypeArguments::ZoneHandle(I, TypeArguments::null()));
208 PushArgumentInstr* type_arg_push = PushArgument(Bind(type_args_null_def));
209 PushArgumentInstr* length_push =
210 PushArgument(Bind(Uint64Constant(position_registers_count_)));
211
212 const Library& lib = Library::Handle(Library::CoreLibrary());
213 const Class& list_class = Class::Handle(
214 lib.LookupCoreClass(Symbols::List()));
215 const Function& list_ctor =
216 Function::ZoneHandle(I, list_class.LookupFactory(Symbols::ListFactory()));
217
218 // TODO(jgruber): Use CreateArrayInstr and StoreIndexed instead.
219 StoreLocal(result_, Bind(StaticCall(list_ctor, type_arg_push, length_push)));
220
221 // Store captured offsets in the `matches` parameter.
222 // TODO(jgruber): Eliminate position_register locals and access `matches`
223 // directly.
224 for (intptr_t i = 0; i < position_registers_count_; i++) {
225 PushArgumentInstr* matches_push = PushLocal(result_);
226 PushArgumentInstr* index_push = PushArgument(Bind(Uint64Constant(i)));
227
228 // Convert negative offsets from the end of the string to string indices.
229 PushArgumentInstr* offset_push = PushLocal(position_register(i));
230 PushArgumentInstr* len_push = PushLocal(string_param_length_);
231 PushArgumentInstr* value_push = PushArgument(Add(offset_push, len_push));
232
233 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
234 matches_push,
235 index_push,
236 value_push));
237 }
238
239 // Print the result if tracing.
240 PRINT(PushLocal(result_));
241
242 // Return true on success.
243 AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(LoadLocal(result_))));
244 }
245
246
247 void IRRegExpMacroAssembler::GenerateExitBlock() {
248 set_current_instruction(exit_block_);
249 TAG();
250
251 // Return false on failure.
252 AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(LoadLocal(result_))));
253 }
254
255
256 #if defined(TARGET_ARCH_ARM64) || \
257 defined(TARGET_ARCH_ARM) || \
258 defined(TARGET_ARCH_MIPS)
259 // Disabling unaligned accesses forces the regexp engine to load characters one
260 // by one instead of up to 4 at once, along with the associated performance hit.
261 // TODO(jgruber): Be less conservative about disabling unaligned accesses.
262 // For instance, ARMv6 supports unaligned accesses. Once it is enabled here,
263 // update LoadCodeUnitsInstr methods for the appropriate architectures.
264 static const bool kEnableUnalignedAccesses = false;
265 #else
266 static const bool kEnableUnalignedAccesses = true;
267 #endif
268 bool IRRegExpMacroAssembler::CanReadUnaligned() {
269 return kEnableUnalignedAccesses && !slow_safe();
270 }
271
272
273 RawArray* IRRegExpMacroAssembler::Execute(
274 const Function& function,
275 const String& input,
276 const Smi& start_offset,
277 Isolate* isolate) {
278 // Create the argument list.
279 const Array& args = Array::Handle(Array::New(2));
280 args.SetAt(0, input);
281 args.SetAt(1, start_offset);
282
283 // And finally call the generated code.
284
285 const Object& retval =
286 Object::Handle(isolate, DartEntry::InvokeFunction(function, args));
287 if (retval.IsError()) {
288 const Error& error = Error::Cast(retval);
289 OS::Print("%s\n", error.ToErrorCString());
290 // Should never happen.
291 UNREACHABLE();
292 }
293
294 if (retval.IsNull()) {
295 return Array::null();
296 }
297
298 ASSERT(retval.IsArray());
299 return Array::Cast(retval).raw();
300 }
301
302
303 RawBool* IRRegExpMacroAssembler::CaseInsensitiveCompareUC16(
304 RawString* str_raw,
305 RawSmi* lhs_index_raw,
306 RawSmi* rhs_index_raw,
307 RawSmi* length_raw) {
308 const String& str = String::Handle(str_raw);
309 const Smi& lhs_index = Smi::Handle(lhs_index_raw);
310 const Smi& rhs_index = Smi::Handle(rhs_index_raw);
311 const Smi& length = Smi::Handle(length_raw);
312
313 // TODO(jgruber): Optimize as single instance. V8 has this as an
314 // isolate member.
315 unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
316
317 for (intptr_t i = 0; i < length.Value(); i++) {
318 int32_t c1 = str.CharAt(lhs_index.Value() + i);
319 int32_t c2 = str.CharAt(rhs_index.Value() + i);
320 if (c1 != c2) {
321 int32_t s1[1] = { c1 };
322 canonicalize.get(c1, '\0', s1);
323 if (s1[0] != c2) {
324 int32_t s2[1] = { c2 };
325 canonicalize.get(c2, '\0', s2);
326 if (s1[0] != s2[0]) {
327 return Bool::False().raw();
328 }
329 }
330 }
331 }
332 return Bool::True().raw();
333 }
334
335
336 LocalVariable* IRRegExpMacroAssembler::Parameter(const String& name,
337 intptr_t index) const {
338 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
339 LocalVariable* local =
340 new(I) LocalVariable(kNoTokenPos, name, local_type);
341
342 intptr_t param_frame_index = kParamEndSlotFromFp + kParamCount - index;
343 local->set_index(param_frame_index);
344
345 return local;
346 }
347
348
349 LocalVariable* IRRegExpMacroAssembler::Local(const String& name) {
350 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
351 LocalVariable* local =
352 new(I) LocalVariable(kNoTokenPos, name, local_type);
353 local->set_index(GetNextLocalIndex());
354
355 return local;
356 }
357
358
359 ConstantInstr* IRRegExpMacroAssembler::Int64Constant(int64_t value) const {
360 return new(I) ConstantInstr(
361 Integer::ZoneHandle(I, Integer::New(value, Heap::kOld)));
362 }
363
364
365 ConstantInstr* IRRegExpMacroAssembler::Uint64Constant(uint64_t value) const {
366 return new(I) ConstantInstr(
367 Integer::ZoneHandle(I, Integer::NewFromUint64(value, Heap::kOld)));
368 }
369
370
371 ConstantInstr* IRRegExpMacroAssembler::BoolConstant(bool value) const {
372 return new(I) ConstantInstr(value ? Bool::True() : Bool::False());
373 }
374
375
376 ConstantInstr* IRRegExpMacroAssembler::StringConstant(const char* value) const {
377 return new(I) ConstantInstr(
378 String::ZoneHandle(I, String::New(value, Heap::kOld)));
379 }
380
381
382 ConstantInstr* IRRegExpMacroAssembler::WordCharacterMapConstant() const {
383 const Library& lib = Library::Handle(I, Library::CoreLibrary());
384 const Class& regexp_class = Class::Handle(I,
385 lib.LookupClassAllowPrivate(Symbols::JSSyntaxRegExp()));
386 const Field& word_character_field = Field::ZoneHandle(I,
387 regexp_class.LookupStaticField(Symbols::_wordCharacterMap()));
388 ASSERT(!word_character_field.IsNull());
389
390 if (word_character_field.IsUninitialized()) {
391 word_character_field.EvaluateInitializer();
392 }
393 ASSERT(!word_character_field.IsUninitialized());
394
395 return new(I) ConstantInstr(
396 Array::Cast(Instance::ZoneHandle(I, word_character_field.value())));
Florian Schneider 2014/10/07 12:49:37 No need for Array::Cast here.
jgruber1 2014/10/07 15:00:25 Done.
397 }
398
399
400 ComparisonInstr* IRRegExpMacroAssembler::Comparison(
401 ComparisonKind kind, Definition* lhs, Definition* rhs) {
402 Token::Kind strict_comparison = Token::kEQ_STRICT;
403 Token::Kind intermediate_operator = Token::kILLEGAL;
404 switch (kind) {
405 case kEQ:
406 intermediate_operator = Token::kEQ;
407 break;
408 case kNE:
409 intermediate_operator = Token::kEQ;
410 strict_comparison = Token::kNE_STRICT;
411 break;
412 case kLT:
413 intermediate_operator = Token::kLT;
414 break;
415 case kGT:
416 intermediate_operator = Token::kGT;
417 break;
418 case kLTE:
419 intermediate_operator = Token::kLTE;
420 break;
421 case kGTE:
422 intermediate_operator = Token::kGTE;
423 break;
424 default:
425 UNREACHABLE();
426 }
427
428 ASSERT(intermediate_operator != Token::kILLEGAL);
429
430 PushArgumentInstr* lhs_push = PushArgument(Bind(lhs));
431 PushArgumentInstr* rhs_push = PushArgument(Bind(rhs));
432
433 Value* lhs_value =
434 Bind(InstanceCall(
435 InstanceCallDescriptor::FromToken(intermediate_operator),
436 lhs_push,
437 rhs_push));
438 Value* rhs_value = Bind(BoolConstant(true));
439
440 return new(I) StrictCompareInstr(kNoTokenPos, strict_comparison,
441 lhs_value, rhs_value, true);
442 }
443
444
445 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
446 const Function& function) const {
447 ZoneGrowableArray<PushArgumentInstr*>* arguments =
448 new(I) ZoneGrowableArray<PushArgumentInstr*>(0);
449 return StaticCall(function, arguments);
450 }
451
452
453 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
454 const Function& function,
455 PushArgumentInstr* arg1) const {
456 ZoneGrowableArray<PushArgumentInstr*>* arguments =
457 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
458 arguments->Add(arg1);
459
460 return StaticCall(function, arguments);
461 }
462
463
464 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
465 const Function& function,
466 PushArgumentInstr* arg1,
467 PushArgumentInstr* arg2) const {
468 ZoneGrowableArray<PushArgumentInstr*>* arguments =
469 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
470 arguments->Add(arg1);
471 arguments->Add(arg2);
472
473 return StaticCall(function, arguments);
474 }
475
476
477 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
478 const Function& function,
479 ZoneGrowableArray<PushArgumentInstr*>* arguments) const {
480 return new(I) StaticCallInstr(kNoTokenPos,
481 function,
482 Object::null_array(),
483 arguments,
484 *ic_data_array_);
485 }
486
487
488 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
489 const InstanceCallDescriptor& desc,
490 PushArgumentInstr* arg1) const {
491 ZoneGrowableArray<PushArgumentInstr*>* arguments =
492 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
493 arguments->Add(arg1);
494
495 return InstanceCall(desc, arguments);
496 }
497
498
499 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
500 const InstanceCallDescriptor& desc,
501 PushArgumentInstr* arg1,
502 PushArgumentInstr* arg2) const {
503 ZoneGrowableArray<PushArgumentInstr*>* arguments =
504 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
505 arguments->Add(arg1);
506 arguments->Add(arg2);
507
508 return InstanceCall(desc, arguments);
509 }
510
511
512 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
513 const InstanceCallDescriptor& desc,
514 PushArgumentInstr* arg1,
515 PushArgumentInstr* arg2,
516 PushArgumentInstr* arg3) const {
517 ZoneGrowableArray<PushArgumentInstr*>* arguments =
518 new(I) ZoneGrowableArray<PushArgumentInstr*>(3);
519 arguments->Add(arg1);
520 arguments->Add(arg2);
521 arguments->Add(arg3);
522
523 return InstanceCall(desc, arguments);
524 }
525
526
527 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
528 const InstanceCallDescriptor& desc,
529 ZoneGrowableArray<PushArgumentInstr*> *arguments) const {
530 return
531 new(I) InstanceCallInstr(kNoTokenPos,
532 desc.name,
533 desc.token_kind,
534 arguments,
535 Object::null_array(),
536 desc.checked_argument_count,
537 *ic_data_array_);
538 }
539
540
541 LoadLocalInstr* IRRegExpMacroAssembler::LoadLocal(LocalVariable* local) const {
542 return new(I) LoadLocalInstr(*local);
543 }
544
545
546 void IRRegExpMacroAssembler::StoreLocal(LocalVariable* local,
547 Value* value) {
548 Do(new(I) StoreLocalInstr(*local, value));
549 }
550
551
552 void IRRegExpMacroAssembler::set_current_instruction(Instruction* instruction) {
553 current_instruction_ = instruction;
554 }
555
556
557 Value* IRRegExpMacroAssembler::Bind(Definition* definition) {
558 AppendInstruction(definition);
559 definition->set_temp_index(temp_id.Alloc());
560
561 return new(I) Value(definition);
562 }
563
564
565 void IRRegExpMacroAssembler::Do(Definition* definition) {
566 AppendInstruction(definition);
567 }
568
569 // In some cases, the V8 irregexp engine generates unreachable code by emitting
570 // a jmp not followed by a bind. We cannot do the same, since it is impossible
571 // to append to a block following a jmp. In such cases, assume that we are doing
572 // the correct thing, but output a warning when tracing.
573 #define HANDLE_DEAD_CODE_EMISSION() \
574 if (current_instruction_ == NULL) { \
575 if (FLAG_trace_irregexp) { \
576 OS::Print("WARNING: Attempting to append to a closed assembler. " \
577 "This could be either a bug or generation of dead code " \
578 "inherited from V8.\n"); \
579 } \
580 BlockLabel dummy; \
581 BindBlock(&dummy); \
582 }
583
584 void IRRegExpMacroAssembler::AppendInstruction(Instruction* instruction) {
585 HANDLE_DEAD_CODE_EMISSION();
586
587 ASSERT(current_instruction_ != NULL);
588 ASSERT(current_instruction_->next() == NULL);
589
590 temp_id.Dealloc(instruction->InputCount());
591 arg_id.Dealloc(instruction->ArgumentCount());
592
593 current_instruction_->LinkTo(instruction);
594 set_current_instruction(instruction);
595 }
596
597
598 void IRRegExpMacroAssembler::CloseBlockWith(Instruction* instruction) {
599 HANDLE_DEAD_CODE_EMISSION();
600
601 ASSERT(current_instruction_ != NULL);
602 ASSERT(current_instruction_->next() == NULL);
603
604 temp_id.Dealloc(instruction->InputCount());
605 arg_id.Dealloc(instruction->ArgumentCount());
606
607 current_instruction_->LinkTo(instruction);
608 set_current_instruction(NULL);
609 }
610
611
612 void IRRegExpMacroAssembler::GoTo(BlockLabel* to) {
613 if (to == NULL) {
614 Backtrack();
615 } else {
616 to->SetLinked();
617 GoTo(to->block());
618 }
619 }
620
621
622 // Closes the current block with a goto, and unsets current_instruction_.
623 // BindBlock() must be called before emission can continue.
624 void IRRegExpMacroAssembler::GoTo(JoinEntryInstr* to) {
625 HANDLE_DEAD_CODE_EMISSION();
626
627 ASSERT(current_instruction_ != NULL);
628 ASSERT(current_instruction_->next() == NULL);
629 current_instruction_->Goto(to);
630 set_current_instruction(NULL);
631 }
632
633
634 PushArgumentInstr* IRRegExpMacroAssembler::PushArgument(Value* value) {
635 arg_id.Alloc();
636 PushArgumentInstr* push = new(I) PushArgumentInstr(value);
637 // Do *not* use Do() for push argument instructions.
638 AppendInstruction(push);
639 return push;
640 }
641
642
643 PushArgumentInstr* IRRegExpMacroAssembler::PushLocal(LocalVariable* local) {
644 return PushArgument(Bind(LoadLocal(local)));
645 }
646
647
648 void IRRegExpMacroAssembler::Print(const char* str) {
649 Print(PushArgument(
650 Bind(new(I) ConstantInstr(
651 String::ZoneHandle(I, String::New(str, Heap::kOld))))));
652 }
653
654
655 void IRRegExpMacroAssembler::Print(PushArgumentInstr* argument) {
656 const Library& lib = Library::Handle(Library::CoreLibrary());
657 const Function& print_fn = Function::ZoneHandle(
658 I, lib.LookupFunctionAllowPrivate(Symbols::print()));
659 Do(StaticCall(print_fn, argument));
660 }
661
662
663 void IRRegExpMacroAssembler::PrintBlocks() {
664 for (intptr_t i = 0; i < blocks_.length(); i++) {
665 FlowGraphPrinter::PrintBlock(blocks_[i], false);
666 }
667 }
668
669
670 intptr_t IRRegExpMacroAssembler::stack_limit_slack() {
671 return 32;
672 }
673
674
675 void IRRegExpMacroAssembler::AdvanceCurrentPosition(intptr_t by) {
676 TAG();
677 if (by != 0) {
678 PushArgumentInstr* cur_pos_push = PushLocal(current_position_);
679 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
680
681 Value* new_pos_value = Add(cur_pos_push, by_push);
682 StoreLocal(current_position_, new_pos_value);
683 }
684 }
685
686
687 void IRRegExpMacroAssembler::AdvanceRegister(intptr_t reg, intptr_t by) {
688 TAG();
689 ASSERT(reg >= 0);
690 ASSERT(reg < position_registers_.length());
691
692 if (by != 0) {
693 PushArgumentInstr* reg_push = PushLocal(position_register(reg));
694 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
695 StoreLocal(position_register(reg), Add(reg_push, by_push));
696 }
697 }
698
699
700 void IRRegExpMacroAssembler::Backtrack() {
701 TAG();
702 CheckPreemption();
703
704 GrowableObjectArray& offsets = GrowableObjectArray::ZoneHandle(
705 I, GrowableObjectArray::New(Heap::kOld));
706
707 PushArgumentInstr* block_offsets_push =
708 PushArgument(Bind(new(I) ConstantInstr(offsets)));
709 PushArgumentInstr* block_id_push = PushArgument(PopStack());
710
711 Value* offset_value =
712 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
713 block_offsets_push,
714 block_id_push));
715
716 IndirectGotoInstr* igoto = new(I) IndirectGotoInstr(&offsets, offset_value);
717 CloseBlockWith(igoto);
718 igotos_.Add(igoto);
719 }
720
721
722 // A BindBlock is analogous to assigning a label to a basic block.
723 // If the BlockLabel does not yet contain a block, it is created.
724 // If there is a current instruction, append a goto to the bound block.
725 void IRRegExpMacroAssembler::BindBlock(BlockLabel* label) {
726 ASSERT(!label->IsBound());
727 ASSERT(label->block()->next() == NULL);
728
729 label->SetBound(block_id.Alloc());
730 blocks_.Add(label->block());
731
732 if (current_instruction_ != NULL) {
733 GoTo(label);
734 }
735 set_current_instruction(label->block());
736
737 // Print the id of the current block if tracing.
738 PRINT(PushArgument(Bind(Uint64Constant(label->block()->block_id()))));
739 }
740
741
742 intptr_t IRRegExpMacroAssembler::GetNextLocalIndex() {
743 intptr_t id = local_id.Alloc();
744 return kFirstLocalSlotFromFp - id;
745 }
746
747
748 LocalVariable* IRRegExpMacroAssembler::position_register(intptr_t index) {
749 // Create position registers as needed.
750 for (intptr_t i = position_registers_.length(); i < index + 1; i++) {
751 position_registers_.Add(Local(Symbols::position_registers_()));
752 }
753
754 return position_registers_[index];
755 }
756
757
758 // TODO(jgruber): Move the offset table outside to avoid having to keep
759 // the assembler around until after code generation; both function or regexp
760 // would work.
761 void IRRegExpMacroAssembler::FinalizeBlockOffsetTable() {
762 for (intptr_t i = 0; i < igotos_.length(); i++) {
763 IndirectGotoInstr* igoto = igotos_[i];
764 igoto->SetOffsetCount(I, indirect_id.Count());
765
766 for (intptr_t j = 0; j < igoto->SuccessorCount(); j++) {
767 TargetEntryInstr* target = igoto->SuccessorAt(j);
768
769 // Optimizations might have modified the immediate target block, but
770 // it must end with a goto to the indirect entry.
771 Instruction* instr = target;
772 while (instr != NULL && !instr->IsGoto()) {
773 instr = instr->next();
774 }
775 ASSERT(instr->IsGoto());
776
777 IndirectEntryInstr* ientry =
778 instr->AsGoto()->successor()->AsIndirectEntry();
779 ASSERT(ientry != NULL);
780
781 // The intermediate block was possibly compacted, check both it and the
782 // final indirect entry for a valid offset. If neither are valid, then
783 // the indirect entry is unreachable.
784 intptr_t offset =
785 (target->offset() > 0) ? target->offset() : ientry->offset();
786 if (offset > 0) {
787 intptr_t adjusted_offset =
788 offset - Assembler::EntryPointToPcMarkerOffset();
789 igoto->SetOffsetAt(I, ientry->indirect_id(), adjusted_offset);
790 }
791 }
792 }
793 }
794
795 void IRRegExpMacroAssembler::FinalizeIndirectGotos() {
796 for (intptr_t i = 0; i < igotos_.length(); i++) {
797 for (intptr_t j = 0; j < entry_block_->indirect_entries().length(); j++) {
798 igotos_.At(i)->AddSuccessor(
799 TargetWithJoinGoto(entry_block_->indirect_entries().At(j)));
800 }
801 }
802 }
803
804
805 void IRRegExpMacroAssembler::CheckCharacter(uint32_t c, BlockLabel* on_equal) {
806 TAG();
807 Definition* cur_char_def = LoadLocal(current_character_);
808 Definition* char_def = Uint64Constant(c);
809
810 BranchOrBacktrack(Comparison(kEQ, cur_char_def, char_def),
811 on_equal);
812 }
813
814
815 void IRRegExpMacroAssembler::CheckCharacterGT(uint16_t limit,
816 BlockLabel* on_greater) {
817 TAG();
818 BranchOrBacktrack(Comparison(kGT,
819 LoadLocal(current_character_),
820 Uint64Constant(limit)),
821 on_greater);
822 }
823
824
825 void IRRegExpMacroAssembler::CheckAtStart(BlockLabel* on_at_start) {
826 TAG();
827
828 BlockLabel not_at_start;
829
830 // Did we start the match at the start of the string at all?
831 BranchOrBacktrack(Comparison(kNE,
832 LoadLocal(start_index_param_),
833 Uint64Constant(0)),
834 &not_at_start);
835
836 // If we did, are we still at the start of the input, i.e. is
837 // (offset == string_length * -1)?
838 Definition* neg_len_def =
839 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
840 PushLocal(string_param_length_));
841 Definition* offset_def = LoadLocal(current_position_);
842 BranchOrBacktrack(Comparison(kEQ, neg_len_def, offset_def),
843 on_at_start);
844
845 BindBlock(&not_at_start);
846 }
847
848
849 void IRRegExpMacroAssembler::CheckNotAtStart(BlockLabel* on_not_at_start) {
850 TAG();
851
852 // Did we start the match at the start of the string at all?
853 BranchOrBacktrack(Comparison(kNE,
854 LoadLocal(start_index_param_),
855 Uint64Constant(0)),
856 on_not_at_start);
857
858 // If we did, are we still at the start of the input, i.e. is
859 // (offset == string_length * -1)?
860 Definition* neg_len_def =
861 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
862 PushLocal(string_param_length_));
863 Definition* offset_def = LoadLocal(current_position_);
864 BranchOrBacktrack(Comparison(kNE, neg_len_def, offset_def),
865 on_not_at_start);
866 }
867
868
869 void IRRegExpMacroAssembler::CheckCharacterLT(uint16_t limit,
870 BlockLabel* on_less) {
871 TAG();
872 BranchOrBacktrack(Comparison(kLT,
873 LoadLocal(current_character_),
874 Uint64Constant(limit)),
875 on_less);
876 }
877
878
879 void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) {
880 TAG();
881
882 BlockLabel fallthrough;
883
884 PushArgumentInstr* stack_push = PushLocal(stack_);
885 Definition* stack_tip_def = InstanceCall(
886 InstanceCallDescriptor(String::ZoneHandle(
887 I, Field::GetterSymbol(Symbols::last()))),
888 stack_push);
889 Definition* cur_pos_def = LoadLocal(current_position_);
890
891 BranchOrBacktrack(Comparison(kNE, stack_tip_def, cur_pos_def),
892 &fallthrough);
893
894 // Pop, throwing away the value.
895 stack_push = PushLocal(stack_);
896 Do(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
897 stack_push));
898
899 BranchOrBacktrack(NULL, on_equal);
900
901 BindBlock(&fallthrough);
902 }
903
904
905 void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
906 intptr_t start_reg,
907 BlockLabel* on_no_match) {
908 TAG();
909 ASSERT(start_reg + 1 <= position_registers_.length());
910
911 BlockLabel fallthrough;
912
913 PushArgumentInstr* end_push = PushLocal(position_register(start_reg + 1));
914 PushArgumentInstr* start_push = PushLocal(position_register(start_reg));
915 StoreLocal(capture_length_, Sub(end_push, start_push));
916
917 // The length of a capture should not be negative. This can only happen
918 // if the end of the capture is unrecorded, or at a point earlier than
919 // the start of the capture.
920 // BranchOrBacktrack(less, on_no_match);
921
922 BranchOrBacktrack(Comparison(kLT,
923 LoadLocal(capture_length_),
924 Uint64Constant(0)),
925 on_no_match);
926
927 // If length is zero, either the capture is empty or it is completely
928 // uncaptured. In either case succeed immediately.
929 BranchOrBacktrack(Comparison(kEQ,
930 LoadLocal(capture_length_),
931 Uint64Constant(0)),
932 &fallthrough);
933
934
935 // Check that there are sufficient characters left in the input.
936 PushArgumentInstr* pos_push = PushLocal(current_position_);
937 PushArgumentInstr* len_push = PushLocal(capture_length_);
938 BranchOrBacktrack(
939 Comparison(kGT,
940 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
Florian Schneider 2014/10/07 12:49:36 Use helper IRRegExpMacroAssembler::Add here? Also
jgruber1 2014/10/07 15:00:25 As discussed in person, it is necessary to keep th
941 pos_push,
942 len_push),
943 Uint64Constant(0)),
944 on_no_match);
945
946 pos_push = PushLocal(current_position_);
947 len_push = PushLocal(string_param_length_);
948 StoreLocal(match_start_index_, Add(pos_push, len_push));
949
950 pos_push = PushLocal(position_register(start_reg));
951 len_push = PushLocal(string_param_length_);
952 StoreLocal(capture_start_index_, Add(pos_push, len_push));
953
954 pos_push = PushLocal(match_start_index_);
955 len_push = PushLocal(capture_length_);
956 StoreLocal(match_end_index_, Add(pos_push, len_push));
957
958 BlockLabel success;
959 if (mode_ == ASCII) {
960 BlockLabel loop_increment;
961 BlockLabel loop;
962 BindBlock(&loop);
963
964 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
965 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
966
967 BranchOrBacktrack(Comparison(kEQ,
968 LoadLocal(char_in_capture_),
969 LoadLocal(char_in_match_)),
970 &loop_increment);
971
972 // Mismatch, try case-insensitive match (converting letters to lower-case).
973 PushArgumentInstr* match_char_push = PushLocal(char_in_match_);
974 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(0x20)));
975 StoreLocal(char_in_match_,
976 Bind(InstanceCall(
977 InstanceCallDescriptor::FromToken(Token::kBIT_OR),
978 match_char_push,
979 mask_push)));
980
981 BlockLabel convert_capture;
982 BlockLabel on_not_in_range;
983 BranchOrBacktrack(Comparison(kLT,
984 LoadLocal(char_in_match_),
985 Uint64Constant('a')),
986 &on_not_in_range);
987 BranchOrBacktrack(Comparison(kGT,
988 LoadLocal(char_in_match_),
989 Uint64Constant('z')),
990 &on_not_in_range);
991 GoTo(&convert_capture);
992 BindBlock(&on_not_in_range);
993
994 // Latin-1: Check for values in range [224,254] but not 247.
995 BranchOrBacktrack(Comparison(kLT,
996 LoadLocal(char_in_match_),
997 Uint64Constant(224)),
998 on_no_match);
999 BranchOrBacktrack(Comparison(kGT,
1000 LoadLocal(char_in_match_),
1001 Uint64Constant(254)),
1002 on_no_match);
1003
1004 BranchOrBacktrack(Comparison(kEQ,
1005 LoadLocal(char_in_match_),
1006 Uint64Constant(247)),
1007 on_no_match);
1008
1009 // Also convert capture character.
1010 BindBlock(&convert_capture);
1011
1012 PushArgumentInstr* capture_char_push = PushLocal(char_in_capture_);
1013 mask_push = PushArgument(Bind(Uint64Constant(0x20)));
1014 StoreLocal(char_in_capture_,
1015 Bind(InstanceCall(
1016 InstanceCallDescriptor::FromToken(Token::kBIT_OR),
1017 capture_char_push,
1018 mask_push)));
1019
1020 BranchOrBacktrack(Comparison(kNE,
1021 LoadLocal(char_in_match_),
1022 LoadLocal(char_in_capture_)),
1023 on_no_match);
1024
1025 BindBlock(&loop_increment);
1026
1027 // Increment pointers into match and capture strings.
1028 StoreLocal(capture_start_index_, Add(
1029 PushLocal(capture_start_index_),
1030 PushArgument(Bind(Uint64Constant(1)))));
1031 StoreLocal(match_start_index_, Add(
1032 PushLocal(match_start_index_),
1033 PushArgument(Bind(Uint64Constant(1)))));
1034
1035 // Compare to end of match, and loop if not done.
1036 BranchOrBacktrack(Comparison(kLT,
1037 LoadLocal(match_start_index_),
1038 LoadLocal(match_end_index_)),
1039 &loop);
1040 } else {
1041 ASSERT(mode_ == UC16);
1042
1043 Value* string_value = Bind(LoadLocal(string_param_));
1044 Value* lhs_index_value = Bind(LoadLocal(match_start_index_));
1045 Value* rhs_index_value = Bind(LoadLocal(capture_start_index_));
1046 Value* length_value = Bind(LoadLocal(capture_length_));
1047
1048 Definition* is_match_def =
1049 new(I) CaseInsensitiveCompareUC16Instr(
1050 string_value,
1051 lhs_index_value,
1052 rhs_index_value,
1053 length_value,
1054 specialization_cid_);
1055
1056 BranchOrBacktrack(Comparison(kNE, is_match_def, BoolConstant(true)),
1057 on_no_match);
1058 }
1059
1060 BindBlock(&success);
1061
1062 // Move current character position to position after match.
1063 PushArgumentInstr* match_end_push = PushLocal(match_end_index_);
1064 len_push = PushLocal(string_param_length_);
1065 StoreLocal(current_position_, Sub(match_end_push, len_push));
1066
1067 BindBlock(&fallthrough);
1068 }
1069
1070
1071 void IRRegExpMacroAssembler::CheckNotBackReference(
1072 intptr_t start_reg,
1073 BlockLabel* on_no_match) {
1074 TAG();
1075 ASSERT(start_reg + 1 <= position_registers_.length());
1076
1077 BlockLabel fallthrough;
1078 BlockLabel success;
1079
1080 // Find length of back-referenced capture.
1081 PushArgumentInstr* end_push = PushLocal(position_register(start_reg + 1));
1082 PushArgumentInstr* start_push = PushLocal(position_register(start_reg));
1083 StoreLocal(capture_length_, Sub(end_push, start_push));
1084
1085 // Fail on partial or illegal capture (start of capture after end of capture).
1086 BranchOrBacktrack(Comparison(kLT,
1087 LoadLocal(capture_length_),
1088 Uint64Constant(0)),
1089 on_no_match);
1090
1091 // Succeed on empty capture (including no capture)
1092 BranchOrBacktrack(Comparison(kEQ,
1093 LoadLocal(capture_length_),
1094 Uint64Constant(0)),
1095 &fallthrough);
1096
1097 // Check that there are sufficient characters left in the input.
1098 PushArgumentInstr* pos_push = PushLocal(current_position_);
1099 PushArgumentInstr* len_push = PushLocal(capture_length_);
1100 BranchOrBacktrack(
1101 Comparison(kGT,
1102 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
1103 pos_push,
1104 len_push),
1105 Uint64Constant(0)),
1106 on_no_match);
1107
1108 // Compute pointers to match string and capture string.
1109 pos_push = PushLocal(current_position_);
1110 len_push = PushLocal(string_param_length_);
1111 StoreLocal(match_start_index_, Add(pos_push, len_push));
1112
1113 pos_push = PushLocal(position_register(start_reg));
1114 len_push = PushLocal(string_param_length_);
1115 StoreLocal(capture_start_index_, Add(pos_push, len_push));
1116
1117 pos_push = PushLocal(match_start_index_);
1118 len_push = PushLocal(capture_length_);
1119 StoreLocal(match_end_index_, Add(pos_push, len_push));
1120
1121 BlockLabel loop;
1122 BindBlock(&loop);
1123
1124 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
1125 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
1126
1127 BranchOrBacktrack(Comparison(kNE,
1128 LoadLocal(char_in_capture_),
1129 LoadLocal(char_in_match_)),
1130 on_no_match);
1131
1132 // Increment pointers into capture and match string.
1133 StoreLocal(capture_start_index_, Add(
1134 PushLocal(capture_start_index_),
1135 PushArgument(Bind(Uint64Constant(1)))));
1136 StoreLocal(match_start_index_, Add(
1137 PushLocal(match_start_index_),
1138 PushArgument(Bind(Uint64Constant(1)))));
1139
1140 // Check if we have reached end of match area.
1141 BranchOrBacktrack(Comparison(kLT,
1142 LoadLocal(match_start_index_),
1143 LoadLocal(match_end_index_)),
1144 &loop);
1145
1146 BindBlock(&success);
1147
1148 // Move current character position to position after match.
1149 PushArgumentInstr* match_end_push = PushLocal(match_end_index_);
1150 len_push = PushLocal(string_param_length_);
1151 StoreLocal(current_position_, Sub(match_end_push, len_push));
1152
1153 BindBlock(&fallthrough);
1154 }
1155
1156
1157 void IRRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
1158 BlockLabel* on_not_equal) {
1159 TAG();
1160 BranchOrBacktrack(Comparison(kNE,
1161 LoadLocal(current_character_),
1162 Uint64Constant(c)),
1163 on_not_equal);
1164 }
1165
1166
1167 void IRRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
1168 uint32_t mask,
1169 BlockLabel* on_equal) {
1170 TAG();
1171
1172 Definition* actual_def = LoadLocal(current_character_);
1173 Definition* expected_def = Uint64Constant(c);
1174
1175 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1176 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1177 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1178 actual_push,
1179 mask_push);
1180
1181 BranchOrBacktrack(Comparison(kEQ, actual_def, expected_def), on_equal);
1182 }
1183
1184
1185 void IRRegExpMacroAssembler::CheckNotCharacterAfterAnd(
1186 uint32_t c,
1187 uint32_t mask,
1188 BlockLabel* on_not_equal) {
1189 TAG();
1190
1191 Definition* actual_def = LoadLocal(current_character_);
1192 Definition* expected_def = Uint64Constant(c);
1193
1194 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1195 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1196 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1197 actual_push,
1198 mask_push);
1199
1200 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1201 }
1202
1203
1204 void IRRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
1205 uint16_t c,
1206 uint16_t minus,
1207 uint16_t mask,
1208 BlockLabel* on_not_equal) {
1209 TAG();
1210 ASSERT(minus < Utf16::kMaxCodeUnit); // NOLINT
1211
1212 Definition* actual_def = LoadLocal(current_character_);
1213 Definition* expected_def = Uint64Constant(c);
1214
1215 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1216 PushArgumentInstr* minus_push = PushArgument(Bind(Uint64Constant(minus)));
1217
1218 actual_push = PushArgument(Sub(actual_push, minus_push));
1219 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1220 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1221 actual_push,
1222 mask_push);
1223
1224 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1225 }
1226
1227
1228 void IRRegExpMacroAssembler::CheckCharacterInRange(
1229 uint16_t from,
1230 uint16_t to,
1231 BlockLabel* on_in_range) {
1232 TAG();
1233 ASSERT(from <= to);
1234
1235 // TODO(jgruber): All range comparisons could be done cheaper with unsigned
1236 // compares. This pattern repeats in various places.
1237
1238 BlockLabel on_not_in_range;
1239 BranchOrBacktrack(Comparison(kLT,
1240 LoadLocal(current_character_),
1241 Uint64Constant(from)),
1242 &on_not_in_range);
1243 BranchOrBacktrack(Comparison(kGT,
1244 LoadLocal(current_character_),
1245 Uint64Constant(to)),
1246 &on_not_in_range);
1247 BranchOrBacktrack(NULL, on_in_range);
1248
1249 BindBlock(&on_not_in_range);
1250 }
1251
1252
1253 void IRRegExpMacroAssembler::CheckCharacterNotInRange(
1254 uint16_t from,
1255 uint16_t to,
1256 BlockLabel* on_not_in_range) {
1257 TAG();
1258 ASSERT(from <= to);
1259
1260 BranchOrBacktrack(Comparison(kLT,
1261 LoadLocal(current_character_),
1262 Uint64Constant(from)),
1263 on_not_in_range);
1264
1265 BranchOrBacktrack(Comparison(kGT,
1266 LoadLocal(current_character_),
1267 Uint64Constant(to)),
1268 on_not_in_range);
1269 }
1270
1271
1272 void IRRegExpMacroAssembler::CheckBitInTable(
1273 const TypedData& table,
1274 BlockLabel* on_bit_set) {
1275 TAG();
1276
1277 PushArgumentInstr* table_push =
1278 PushArgument(Bind(new(I) ConstantInstr(table)));
1279 PushArgumentInstr* index_push = PushLocal(current_character_);
1280
1281 if (mode_ != ASCII || kTableMask != Symbols::kMaxOneCharCodeSymbol) {
1282 PushArgumentInstr* mask_push =
1283 PushArgument(Bind(Uint64Constant(kTableSize - 1)));
1284 index_push = PushArgument(
1285 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1286 index_push,
1287 mask_push)));
1288 }
1289
1290 Definition* byte_def =
1291 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1292 table_push,
1293 index_push);
1294 Definition* zero_def = Int64Constant(0);
1295
1296 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_bit_set);
1297 }
1298
1299
1300 bool IRRegExpMacroAssembler::CheckSpecialCharacterClass(
1301 uint16_t type,
1302 BlockLabel* on_no_match) {
1303 TAG();
1304
1305 // Range checks (c in min..max) are generally implemented by an unsigned
1306 // (c - min) <= (max - min) check
1307 switch (type) {
1308 case 's':
1309 // Match space-characters
1310 if (mode_ == ASCII) {
1311 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
1312 BlockLabel success;
1313 // Space (' ').
1314 BranchOrBacktrack(Comparison(kEQ,
1315 LoadLocal(current_character_),
1316 Uint64Constant(' ')),
1317 &success);
1318 // Check range 0x09..0x0d.
1319 CheckCharacterInRange('\t', '\r', &success);
1320 // \u00a0 (NBSP).
1321 BranchOrBacktrack(Comparison(kNE,
1322 LoadLocal(current_character_),
1323 Uint64Constant(0x00a0)),
1324 on_no_match);
1325 BindBlock(&success);
1326 return true;
1327 }
1328 return false;
1329 case 'S':
1330 // The emitted code for generic character classes is good enough.
1331 return false;
1332 case 'd':
1333 // Match ASCII digits ('0'..'9')
1334 CheckCharacterNotInRange('0', '9', on_no_match);
1335 return true;
1336 case 'D':
1337 // Match non ASCII-digits
1338 CheckCharacterInRange('0', '9', on_no_match);
1339 return true;
1340 case '.': {
1341 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
1342 BranchOrBacktrack(Comparison(kEQ,
1343 LoadLocal(current_character_),
1344 Uint64Constant('\n')),
1345 on_no_match);
1346 BranchOrBacktrack(Comparison(kEQ,
1347 LoadLocal(current_character_),
1348 Uint64Constant('\r')),
1349 on_no_match);
1350 if (mode_ == UC16) {
1351 BranchOrBacktrack(Comparison(kEQ,
1352 LoadLocal(current_character_),
1353 Uint64Constant(0x2028)),
1354 on_no_match);
1355 BranchOrBacktrack(Comparison(kEQ,
1356 LoadLocal(current_character_),
1357 Uint64Constant(0x2029)),
1358 on_no_match);
1359 }
1360 return true;
1361 }
1362 case 'w': {
1363 if (mode_ != ASCII) {
1364 // Table is 128 entries, so all ASCII characters can be tested.
1365 BranchOrBacktrack(Comparison(kGT,
1366 LoadLocal(current_character_),
1367 Uint64Constant('z')),
1368 on_no_match);
1369 }
1370
1371 PushArgumentInstr* table_push =
1372 PushArgument(Bind(WordCharacterMapConstant()));
1373 PushArgumentInstr* index_push = PushLocal(current_character_);
1374
1375 Definition* byte_def =
1376 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1377 table_push,
1378 index_push);
1379 Definition* zero_def = Int64Constant(0);
1380
1381 BranchOrBacktrack(Comparison(kEQ, byte_def, zero_def), on_no_match);
1382
1383 return true;
1384 }
1385 case 'W': {
1386 BlockLabel done;
1387 if (mode_ != ASCII) {
1388 // Table is 128 entries, so all ASCII characters can be tested.
1389 BranchOrBacktrack(Comparison(kGT,
1390 LoadLocal(current_character_),
1391 Uint64Constant('z')),
1392 &done);
1393 }
1394
1395 // TODO(jgruber): Refactor to use CheckBitInTable if possible.
1396
1397 PushArgumentInstr* table_push =
1398 PushArgument(Bind(WordCharacterMapConstant()));
1399 PushArgumentInstr* index_push = PushLocal(current_character_);
1400
1401 Definition* byte_def =
1402 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1403 table_push,
1404 index_push);
1405 Definition* zero_def = Int64Constant(0);
1406
1407 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_no_match);
1408
1409 if (mode_ != ASCII) {
1410 BindBlock(&done);
1411 }
1412 return true;
1413 }
1414 // Non-standard classes (with no syntactic shorthand) used internally.
1415 case '*':
1416 // Match any character.
1417 return true;
1418 case 'n': {
1419 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
1420 // The opposite of '.'.
1421 BlockLabel success;
1422 BranchOrBacktrack(Comparison(kEQ,
1423 LoadLocal(current_character_),
1424 Uint64Constant('\n')),
1425 &success);
1426 BranchOrBacktrack(Comparison(kEQ,
1427 LoadLocal(current_character_),
1428 Uint64Constant('\r')),
1429 &success);
1430 if (mode_ == UC16) {
1431 BranchOrBacktrack(Comparison(kEQ,
1432 LoadLocal(current_character_),
1433 Uint64Constant(0x2028)),
1434 &success);
1435 BranchOrBacktrack(Comparison(kEQ,
1436 LoadLocal(current_character_),
1437 Uint64Constant(0x2029)),
1438 &success);
1439 }
1440 BranchOrBacktrack(NULL, on_no_match);
1441 BindBlock(&success);
1442 return true;
1443 }
1444 // No custom implementation (yet): s(uint16_t), S(uint16_t).
1445 default:
1446 return false;
1447 }
1448 }
1449
1450
1451 void IRRegExpMacroAssembler::Fail() {
1452 TAG();
1453 ASSERT(FAILURE == 0); // Return value for failure is zero.
1454 if (!global()) {
1455 UNREACHABLE(); // Dart regexps are always global.
1456 }
1457 GoTo(exit_block_);
1458 }
1459
1460
1461 void IRRegExpMacroAssembler::IfRegisterGE(intptr_t reg,
1462 intptr_t comparand,
1463 BlockLabel* if_ge) {
1464 TAG();
1465 BranchOrBacktrack(Comparison(kGTE,
1466 LoadLocal(position_register(reg)),
1467 Int64Constant(comparand)),
1468 if_ge);
1469 }
1470
1471
1472 void IRRegExpMacroAssembler::IfRegisterLT(intptr_t reg,
1473 intptr_t comparand,
1474 BlockLabel* if_lt) {
1475 TAG();
1476 BranchOrBacktrack(Comparison(kLT,
1477 LoadLocal(position_register(reg)),
1478 Int64Constant(comparand)),
1479 if_lt);
1480 }
1481
1482
1483 void IRRegExpMacroAssembler::IfRegisterEqPos(intptr_t reg,
1484 BlockLabel* if_eq) {
1485 TAG();
1486 BranchOrBacktrack(Comparison(kEQ,
1487 LoadLocal(position_register(reg)),
1488 LoadLocal(current_position_)),
1489 if_eq);
1490 }
1491
1492
1493 RegExpMacroAssembler::IrregexpImplementation
1494 IRRegExpMacroAssembler::Implementation() {
1495 return kIRImplementation;
1496 }
1497
1498
1499 void IRRegExpMacroAssembler::LoadCurrentCharacter(intptr_t cp_offset,
1500 BlockLabel* on_end_of_input,
1501 bool check_bounds,
1502 intptr_t characters) {
1503 TAG();
1504 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
1505 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
1506 if (check_bounds) {
1507 CheckPosition(cp_offset + characters - 1, on_end_of_input);
1508 }
1509 LoadCurrentCharacterUnchecked(cp_offset, characters);
1510 }
1511
1512
1513 void IRRegExpMacroAssembler::PopCurrentPosition() {
1514 TAG();
1515 StoreLocal(current_position_, PopStack());
1516 }
1517
1518
1519 void IRRegExpMacroAssembler::PopRegister(intptr_t register_index) {
1520 TAG();
1521 ASSERT(register_index < position_registers_.length());
1522 StoreLocal(position_register(register_index), PopStack());
1523 }
1524
1525
1526 void IRRegExpMacroAssembler::PushStack(Definition *definition) {
1527 PushArgumentInstr* stack_push = PushLocal(stack_);
1528 PushArgumentInstr* value_push = PushArgument(Bind(definition));
1529 Do(InstanceCall(InstanceCallDescriptor(Symbols::add()),
1530 stack_push,
1531 value_push));
1532 }
1533
1534
1535 Value* IRRegExpMacroAssembler::PopStack() {
1536 PushArgumentInstr* stack_push = PushLocal(stack_);
1537 return Bind(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
1538 stack_push));
1539 }
1540
1541
1542 // Pushes the location corresponding to label to the backtracking stack.
1543 void IRRegExpMacroAssembler::PushBacktrack(BlockLabel* label) {
1544 TAG();
1545
1546 // Ensure that targets of indirect jumps are never accessed through a
1547 // normal control flow instructions by creating a new block for each backtrack
1548 // target.
1549 IndirectEntryInstr* indirect_target = IndirectWithJoinGoto(label->block());
1550
1551 // Add a fake edge from the graph entry for data flow analysis.
1552 entry_block_->AddIndirectEntry(indirect_target);
1553
1554 ConstantInstr* offset = Uint64Constant(indirect_target->indirect_id());
1555 PushStack(offset);
1556 }
1557
1558
1559 void IRRegExpMacroAssembler::PushCurrentPosition() {
1560 TAG();
1561 PushStack(LoadLocal(current_position_));
1562 }
1563
1564
1565 void IRRegExpMacroAssembler::PushRegister(intptr_t register_index) {
1566 TAG();
1567 PushStack(LoadLocal(position_register(register_index)));
1568 }
1569
1570
1571 void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) {
1572 TAG();
1573 StoreLocal(current_position_, Bind(LoadLocal(position_register(reg))));
1574 }
1575
1576 // Resets the size of the stack to the value stored in reg.
1577 void IRRegExpMacroAssembler::ReadStackPointerFromRegister(intptr_t reg) {
1578 TAG();
1579 ASSERT(reg < position_registers_.length());
1580
1581 PushArgumentInstr* stack_push = PushLocal(stack_);
1582 PushArgumentInstr* length_push = PushLocal(position_register(reg));
1583
1584 Do(InstanceCall(InstanceCallDescriptor(
1585 String::ZoneHandle(
1586 I, Field::SetterSymbol(Symbols::Length()))),
1587 stack_push,
1588 length_push));
1589 }
1590
1591 void IRRegExpMacroAssembler::SetCurrentPositionFromEnd(intptr_t by) {
1592 TAG();
1593
1594 BlockLabel after_position;
1595
1596 Definition* cur_pos_def = LoadLocal(current_position_);
1597 Definition* by_value_def = Int64Constant(-by);
1598
1599 BranchOrBacktrack(Comparison(kGTE, cur_pos_def, by_value_def),
1600 &after_position);
1601
1602 StoreLocal(current_position_, Bind(Int64Constant(-by)));
1603
1604 // On RegExp code entry (where this operation is used), the character before
1605 // the current position is expected to be already loaded.
1606 // We have advanced the position, so it's safe to read backwards.
1607 LoadCurrentCharacterUnchecked(-1, 1);
1608
1609 BindBlock(&after_position);
1610 }
1611
1612
1613 void IRRegExpMacroAssembler::SetRegister(intptr_t register_index, intptr_t to) {
1614 TAG();
1615 // Reserved for positions!
1616 ASSERT(register_index >= position_registers_count_);
1617 StoreLocal(position_register(register_index), Bind(Int64Constant(to)));
1618 }
1619
1620
1621 bool IRRegExpMacroAssembler::Succeed() {
1622 TAG();
1623 GoTo(success_block_);
1624 return global();
1625 }
1626
1627
1628 void IRRegExpMacroAssembler::WriteCurrentPositionToRegister(
1629 intptr_t reg, intptr_t cp_offset) {
1630 TAG();
1631
1632 PushArgumentInstr* pos_push = PushLocal(current_position_);
1633 PushArgumentInstr* off_push =
1634 PushArgument(Bind(Int64Constant(cp_offset)));
1635
1636 // Push the negative offset; these are converted to positive string positions
1637 // within the success block.
1638 StoreLocal(position_register(reg), Add(pos_push, off_push));
1639 }
1640
1641
1642 void IRRegExpMacroAssembler::ClearRegisters(
1643 intptr_t reg_from, intptr_t reg_to) {
1644 TAG();
1645
1646 ASSERT(reg_from <= reg_to);
1647 ASSERT(reg_to < position_registers_.length());
1648
1649 // In order to clear registers to a final result value of -1, set them to
1650 // (-1 - string length), the offset of -1 from the end of the string.
1651
1652 for (intptr_t reg = reg_from; reg <= reg_to; reg++) {
1653 PushArgumentInstr* minus_one_push =
1654 PushArgument(Bind(Int64Constant(-1)));
1655 PushArgumentInstr* length_push = PushLocal(string_param_length_);
1656
1657 StoreLocal(position_register(reg), Sub(minus_one_push, length_push));
1658 }
1659 }
1660
1661
1662 void IRRegExpMacroAssembler::WriteStackPointerToRegister(intptr_t reg) {
1663 TAG();
1664
1665 PushArgumentInstr* stack_push = PushLocal(stack_);
1666 Value* length_value =
1667 Bind(InstanceCall(InstanceCallDescriptor(
1668 String::ZoneHandle(
1669 I, Field::GetterSymbol(Symbols::Length()))),
1670 stack_push));
1671
1672 StoreLocal(position_register(reg), length_value);
1673 }
1674
1675
1676 // Private methods:
1677
1678
1679 void IRRegExpMacroAssembler::CheckPosition(intptr_t cp_offset,
1680 BlockLabel* on_outside_input) {
1681 TAG();
1682 Definition* curpos_def = LoadLocal(current_position_);
1683 Definition* cp_off_def = Int64Constant(-cp_offset);
1684
1685 // If (current_position_ < -cp_offset), we are in bounds.
1686 // Remember, current_position_ is a negative offset from the string end.
1687
1688 BranchOrBacktrack(Comparison(kGTE, curpos_def, cp_off_def),
1689 on_outside_input);
1690 }
1691
1692
1693 void IRRegExpMacroAssembler::BranchOrBacktrack(
1694 ComparisonInstr* comparison,
1695 BlockLabel* true_successor) {
1696 if (comparison == NULL) { // No condition
1697 if (true_successor == NULL) {
1698 Backtrack();
1699 return;
1700 }
1701 GoTo(true_successor);
1702 return;
1703 }
1704
1705 // If no successor block has been passed in, backtrack.
1706 JoinEntryInstr* true_successor_block = backtrack_block_;
1707 if (true_successor != NULL) {
1708 true_successor->SetLinked();
1709 true_successor_block = true_successor->block();
1710 }
1711 ASSERT(true_successor_block != NULL);
1712
1713 // If the condition is not true, fall through to a new block.
1714 BlockLabel fallthrough;
1715
1716 BranchInstr* branch = new(I) BranchInstr(comparison);
1717 *branch->true_successor_address() =
1718 TargetWithJoinGoto(true_successor_block);
1719 *branch->false_successor_address() =
1720 TargetWithJoinGoto(fallthrough.block());
1721
1722 CloseBlockWith(branch);
1723 BindBlock(&fallthrough);
1724 }
1725
1726
1727 TargetEntryInstr* IRRegExpMacroAssembler::TargetWithJoinGoto(
1728 JoinEntryInstr* dst) {
1729 TargetEntryInstr* target = new(I) TargetEntryInstr(
1730 block_id.Alloc(), kInvalidTryIndex);
1731 blocks_.Add(target);
1732
1733 target->AppendInstruction(new(I) GotoInstr(dst));
1734
1735 return target;
1736 }
1737
1738
1739 IndirectEntryInstr* IRRegExpMacroAssembler::IndirectWithJoinGoto(
1740 JoinEntryInstr* dst) {
1741 IndirectEntryInstr* target = new(I) IndirectEntryInstr(
1742 block_id.Alloc(), indirect_id.Alloc(), kInvalidTryIndex);
1743 blocks_.Add(target);
1744
1745 target->AppendInstruction(new(I) GotoInstr(dst));
1746
1747 return target;
1748 }
1749
1750
1751 void IRRegExpMacroAssembler::CheckPreemption() {
1752 TAG();
1753 AppendInstruction(new(I) CheckStackOverflowInstr(kNoTokenPos, 0));
1754 }
1755
1756
1757 Value* IRRegExpMacroAssembler::Add(
1758 PushArgumentInstr* lhs,
1759 PushArgumentInstr* rhs) {
1760 return Bind(InstanceCall(
Florian Schneider 2014/10/07 12:49:36 I'd rather not perform Bind() here, and just retur
jgruber1 2014/10/07 15:00:25 Done.
1761 InstanceCallDescriptor::FromToken(Token::kADD), lhs, rhs));
1762 }
1763
1764
1765 Value* IRRegExpMacroAssembler::Sub(
1766 PushArgumentInstr* lhs,
1767 PushArgumentInstr* rhs) {
1768 return Bind(InstanceCall(
Florian Schneider 2014/10/07 12:49:37 Also here better not to call Bind() inside.
jgruber1 2014/10/07 15:00:25 Done.
1769 InstanceCallDescriptor::FromToken(Token::kSUB), lhs, rhs));
1770 }
1771
1772
1773 static const String& codeUnitsAtName(intptr_t characters) {
1774 switch (characters) {
1775 case 1: return Library::PrivateCoreLibName(Symbols::_oneCodeUnitAt());
1776 case 2: return Library::PrivateCoreLibName(Symbols::_twoCodeUnitsAt());
1777 case 4: return Library::PrivateCoreLibName(Symbols::_fourCodeUnitsAt());
1778 }
1779 UNREACHABLE();
1780 return String::Handle();
1781 }
1782
1783
1784 void IRRegExpMacroAssembler::LoadCurrentCharacterUnchecked(
1785 intptr_t cp_offset, intptr_t characters) {
1786 TAG();
1787
1788 if (mode_ == ASCII) {
1789 ASSERT(characters == 1 || characters == 2 || characters == 4);
1790 } else {
1791 ASSERT(mode_ == UC16);
1792 ASSERT(characters == 1 || characters == 2);
1793 }
1794
1795 // LoadLocal pattern_param_
1796 // PushArgument()
1797 PushArgumentInstr* pattern_push = PushLocal(string_param_);
1798
1799 // Calculate the addressed string index as
1800 // cp_offset + current_position_ + string_param_length_
1801 PushArgumentInstr* cp_offset_push =
1802 PushArgument(Bind(Int64Constant(cp_offset)));
1803 PushArgumentInstr* cur_pos_push = PushLocal(current_position_);
1804
1805 PushArgumentInstr* partial_sum_push =
1806 PushArgument(Add(cp_offset_push, cur_pos_push));
1807 PushArgumentInstr* length_push = PushLocal(string_param_length_);
1808
1809 PushArgumentInstr* pos_push =
1810 PushArgument(Add(partial_sum_push, length_push));
1811
1812 // InstanceCall(codeUnitAt, t0, t0)
1813 const String& name = codeUnitsAtName(characters);
1814 Value* code_unit_value =
1815 Bind(InstanceCall(InstanceCallDescriptor(name),
1816 pattern_push,
1817 pos_push));
1818
1819 // StoreLocal(current_character_)
1820 StoreLocal(current_character_, code_unit_value);
1821
1822 PRINT(PushLocal(current_character_));
1823 }
1824
1825
1826 Value* IRRegExpMacroAssembler::CharacterAt(Definition* index) {
1827 PushArgumentInstr* pattern_push = PushLocal(string_param_);
1828 PushArgumentInstr* index_push = PushArgument(Bind(index));
1829
1830 return Bind(InstanceCall(InstanceCallDescriptor(codeUnitsAtName(1)),
1831 pattern_push,
1832 index_push));
1833 }
1834
1835
1836 #undef __
1837
1838 } // namespace dart
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698