Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(602)

Side by Side Diff: runtime/vm/regexp_assembler.cc

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Updated to current version Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #include "vm/regexp_assembler.h"
6
7 #include "vm/bit_vector.h"
8 #include "vm/compiler.h"
9 #include "vm/dart_entry.h"
10 #include "vm/flow_graph_builder.h"
11 #include "vm/il_printer.h"
12 #include "vm/object_store.h"
13 #include "vm/regexp.h"
14 #include "vm/resolver.h"
15 #include "vm/stack_frame.h"
16 #include "vm/unibrow-inl.h"
17 #include "vm/unicode.h"
18
19 #define I isolate()
20
21 // Debugging output macros. TAG() is called at the head of each interesting
22 // function and prints its name during execution if irregexp tracing is enabled.
23 #define TAG() if (FLAG_trace_irregexp) { TAG_(); }
24 #define TAG_() \
25 Print(PushArgument( \
26 Bind(new(I) ConstantInstr(String::ZoneHandle(I, String::Concat( \
27 String::Handle(String::New("TAG: ")), \
28 String::Handle(String::New(__FUNCTION__)), Heap::kOld))))));
29
30 #define PRINT(arg) if (FLAG_trace_irregexp) { Print(arg); }
31
32 namespace dart {
33
34 DEFINE_FLAG(bool, trace_irregexp, false, "Trace irregexps");
35
36 static const intptr_t kInvalidTryIndex = -1;
37 static const intptr_t kNoTokenPos = -1;
38
39 /*
40 * This assembler uses the following main local variables:
41 * - stack_: A pointer to a growable list which we use as an all-purpose stack
42 * storing backtracking offsets, positions & stored register values.
43 * - current_character_: Stores the currently loaded characters (possibly more
44 * than one).
45 * - current_position_: The current position within the string, stored as a
46 * negative offset from the end of the string (i.e. the
47 * position corresponding to str[0] is -str.length).
48 * Note that current_position_ is *not* byte-based, unlike
49 * original V8 code.
50 *
51 * Results are returned though an array of capture indices, stored at
52 * matches_param_. A null array specifies a failure to match. The match indices
53 * [start_inclusive, end_exclusive] for capture group i are stored at positions
54 * matches_param_[i * 2] and matches_param_[i * 2 + 1], respectively. Match
55 * indices of -1 denote non-matched groups. Note that we store these indices
56 * as a negative offset from the end of the string in position_registers_
57 * during processing, and convert them to standard indexes when copying them
58 * to matches_param_ on successful match.
59 */
60
61 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate)
62 : slow_safe_compiler_(false),
63 global_mode_(NOT_GLOBAL),
64 isolate_(isolate) {
65 }
66
67
68 RegExpMacroAssembler::~RegExpMacroAssembler() {
69 }
70
71
72 IRRegExpMacroAssembler::IRRegExpMacroAssembler(
73 intptr_t specialization_cid,
74 intptr_t capture_count,
75 const ParsedFunction* parsed_function,
76 ZoneGrowableArray<const ICData*>* ic_data_array,
77 Isolate* isolate)
78 : RegExpMacroAssembler(isolate),
79 specialization_cid_(specialization_cid),
80 parsed_function_(parsed_function),
81 ic_data_array_(ic_data_array),
82 current_instruction_(NULL),
83 stack_(NULL),
84 current_character_(NULL),
85 current_position_(NULL),
86 string_param_(NULL),
87 string_param_length_(NULL),
88 start_index_param_(NULL),
89 position_registers_count_((capture_count + 1) * 2),
90 stack_array_(GrowableObjectArray::ZoneHandle(
91 isolate, GrowableObjectArray::New(16, Heap::kOld))) {
92 switch (specialization_cid) {
93 case kOneByteStringCid:
94 case kExternalOneByteStringCid: mode_ = ASCII; break;
95 case kTwoByteStringCid:
96 case kExternalTwoByteStringCid: mode_ = UC16; break;
97 default: UNREACHABLE();
98 }
99
100 InitializeLocals();
101
102 // Create and generate all preset blocks.
103 entry_block_ =
104 new(isolate) GraphEntryInstr(
105 parsed_function_,
106 new(isolate) TargetEntryInstr(block_id.Alloc(), kInvalidTryIndex),
107 Isolate::kNoDeoptId);
108 start_block_ =
109 new(isolate) JoinEntryInstr(block_id.Alloc(), kInvalidTryIndex);
110 success_block_ =
111 new(isolate) JoinEntryInstr(block_id.Alloc(), kInvalidTryIndex);
112 backtrack_block_ =
113 new(isolate) JoinEntryInstr(block_id.Alloc(), kInvalidTryIndex);
114 exit_block_ =
115 new(isolate) JoinEntryInstr(block_id.Alloc(), kInvalidTryIndex);
116
117 GenerateEntryBlock();
118 GenerateSuccessBlock();
119 GenerateBacktrackBlock();
120 GenerateExitBlock();
121
122 blocks_.Add(entry_block_);
123 blocks_.Add(entry_block_->normal_entry());
124 blocks_.Add(start_block_);
125 blocks_.Add(success_block_);
126 blocks_.Add(backtrack_block_);
127 blocks_.Add(exit_block_);
128
129 // Begin emission at the start_block_.
130 set_current_instruction(start_block_);
131 }
132
133
134 IRRegExpMacroAssembler::~IRRegExpMacroAssembler() { }
135
136
137 void IRRegExpMacroAssembler::InitializeLocals() {
138 // Create local variables and parameters.
139 stack_ = Local(Symbols::stack_());
140 current_character_ = Local(Symbols::current_character_());
141 current_position_ = Local(Symbols::current_position_());
142 string_param_length_ = Local(Symbols::string_param_length_());
143 capture_length_ = Local(Symbols::capture_length_());
144 match_start_index_ = Local(Symbols::match_start_index_());
145 capture_start_index_ = Local(Symbols::capture_start_index_());
146 match_end_index_ = Local(Symbols::match_end_index_());
147 char_in_capture_ = Local(Symbols::char_in_capture_());
148 char_in_match_ = Local(Symbols::char_in_match_());
149 result_ = Local(Symbols::result_());
150
151 string_param_ = Parameter(Symbols::string_param_(), 0);
152 start_index_param_ = Parameter(Symbols::start_index_param_(), 1);
153
154 // Reserve space for all captured group positions. Note that more might
155 // be created on the fly for internal use.
156 for (intptr_t i = 0; i < position_registers_count_; i++) {
157 position_register(i);
158 }
159 }
160
161
162 void IRRegExpMacroAssembler::GenerateEntryBlock() {
163 set_current_instruction(entry_block_->normal_entry());
164 TAG();
165
166 // Generate a local list variable which we will use as a backtracking stack.
167
168 StoreLocal(stack_, Bind(new(I) ConstantInstr(stack_array_)));
169 Do(InstanceCall(InstanceCallDescriptor(Symbols::clear()),
170 PushArgument(Bind(LoadLocal(stack_)))));
171
172 // Store string.length.
173 PushArgumentInstr* string_push = PushArgument(Bind(LoadLocal(string_param_)));
174
175 StoreLocal(string_param_length_,
176 Bind(InstanceCall(InstanceCallDescriptor(
177 String::ZoneHandle(
178 Field::GetterSymbol(Symbols::Length()))),
179 string_push)));
180
181 // Initialize all capture registers.
182 ClearRegisters(0, position_registers_count_ - 1);
183
184 // Store (start_index - string.length) as the current position (since it's a
185 // negative offset from the end of the string).
186 PushArgumentInstr* start_index_push =
187 PushArgument(Bind(LoadLocal(start_index_param_)));
188 PushArgumentInstr* length_push =
189 PushArgument(Bind(LoadLocal(string_param_length_)));
190
191 StoreLocal(current_position_, Sub(start_index_push, length_push));
192
193 // Jump to the start block.
194 current_instruction_->Goto(start_block_);
195 }
196
197
198 void IRRegExpMacroAssembler::GenerateBacktrackBlock() {
199 set_current_instruction(backtrack_block_);
200 TAG();
201 Backtrack();
202 }
203
204
205 void IRRegExpMacroAssembler::GenerateSuccessBlock() {
206 set_current_instruction(success_block_);
207 TAG();
208
209 Definition* type_args_null_def = new(I) ConstantInstr(
210 TypeArguments::ZoneHandle(I, TypeArguments::null()));
211 PushArgumentInstr* type_arg_push = PushArgument(Bind(type_args_null_def));
212 PushArgumentInstr* length_push =
213 PushArgument(Bind(Uint64Constant(position_registers_count_)));
214
215 const Library& lib = Library::Handle(Library::CoreLibrary());
216 const Class& list_class = Class::Handle(
217 lib.LookupCoreClass(Symbols::List()));
218 const Function& list_ctor =
219 Function::ZoneHandle(I, list_class.LookupFactory(Symbols::ListFactory()));
220
221 StoreLocal(result_, Bind(StaticCall(list_ctor, type_arg_push, length_push)));
222
223 // Store captured offsets in the `matches` parameter.
224 for (intptr_t i = 0; i < position_registers_count_; i++) {
225 PushArgumentInstr* matches_push =
226 PushArgument(Bind(LoadLocal(result_)));
227 PushArgumentInstr* index_push = PushArgument(Bind(Uint64Constant(i)));
228
229 // Convert negative offsets from the end of the string to string indices.
230 PushArgumentInstr* offset_push =
231 PushArgument(Bind(LoadLocal(position_register(i))));
232 PushArgumentInstr* len_push =
233 PushArgument(Bind(LoadLocal(string_param_length_)));
234 PushArgumentInstr* value_push = PushArgument(Add(offset_push, len_push));
235
236 Do(InstanceCall(InstanceCallDescriptor(Token::kASSIGN_INDEX),
237 matches_push,
238 index_push,
239 value_push));
240 }
241
242 // Print the result if tracing.
243 PRINT(PushArgument(Bind(LoadLocal(result_))));
244
245 // Return true on success.
246 AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(LoadLocal(result_))));
247 }
248
249
250 void IRRegExpMacroAssembler::GenerateExitBlock() {
251 set_current_instruction(exit_block_);
252 TAG();
253
254 // Return false on failure.
255 AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(LoadLocal(result_))));
256 }
257
258
259 #if defined(TARGET_ARCH_ARM64) || \
260 defined(TARGET_ARCH_ARM) || \
261 defined(TARGET_ARCH_MIPS)
262 // TODO(jgruber): Some ARM CPUs support unaligned accesses.
263 static const bool kEnableUnalignedAccesses = false;
264 #else
265 static const bool kEnableUnalignedAccesses = true;
266 #endif
267 bool IRRegExpMacroAssembler::CanReadUnaligned() {
268 return kEnableUnalignedAccesses && !slow_safe();
269 }
270
271
272 RawArray* IRRegExpMacroAssembler::Execute(
273 const Function& function,
274 const String& input,
275 const Smi& start_offset,
276 Isolate* isolate) {
277 // Create the argument list.
278 const Array& args = Array::Handle(Array::New(2));
279 args.SetAt(0, input);
280 args.SetAt(1, start_offset);
281
282 // And finally call the generated code.
283
284 const Object& retval =
285 Object::Handle(isolate, DartEntry::InvokeFunction(function, args));
286 if (retval.IsError()) {
287 const Error& error = Error::Cast(retval);
288 OS::Print("%s\n", error.ToErrorCString());
289 // Should never happen.
290 UNREACHABLE();
291 }
292
293 if (retval.IsNull()) {
294 return Array::null();
295 }
296
297 ASSERT(retval.IsArray());
298 return Array::Cast(retval).raw();
299 }
300
301
302 RawBool* IRRegExpMacroAssembler::CaseInsensitiveCompareUC16(
303 RawString* str_raw,
304 RawSmi* lhs_index_raw,
305 RawSmi* rhs_index_raw,
306 RawSmi* length_raw) {
307 const String& str = String::Handle(str_raw);
308 const Smi& lhs_index = Smi::Handle(lhs_index_raw);
309 const Smi& rhs_index = Smi::Handle(rhs_index_raw);
310 const Smi& length = Smi::Handle(length_raw);
311
312 // TODO(jgruber): Optimize as single instance. V8 has this as an
313 // isolate member.
314 unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
315 // This function is not allowed to cause a garbage collection.
316 // A GC might move the calling generated code and invalidate the
317 // return address on the stack.
318
319 for (intptr_t i = 0; i < length.Value(); i++) {
320 int32_t c1 = str.CharAt(lhs_index.Value() + i);
321 int32_t c2 = str.CharAt(rhs_index.Value() + i);
322 if (c1 != c2) {
323 int32_t s1[1] = { c1 };
324 canonicalize.get(c1, '\0', s1);
325 if (s1[0] != c2) {
326 int32_t s2[1] = { c2 };
327 canonicalize.get(c2, '\0', s2);
328 if (s1[0] != s2[0]) {
329 return Bool::False().raw();
330 }
331 }
332 }
333 }
334 return Bool::True().raw();
335 }
336
337
338 LocalVariable* IRRegExpMacroAssembler::Parameter(const String& name,
339 intptr_t index) const {
340 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
341 LocalVariable* local =
342 new(I) LocalVariable(kNoTokenPos, name, local_type);
343
344 intptr_t param_frame_index = kParamEndSlotFromFp + kParamCount - index;
345 local->set_index(param_frame_index);
346
347 return local;
348 }
349
350
351 LocalVariable* IRRegExpMacroAssembler::Local(const String& name) {
352 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
353 LocalVariable* local =
354 new(I) LocalVariable(kNoTokenPos, name, local_type);
355 local->set_index(GetNextLocalIndex());
356
357 return local;
358 }
359
360
361 ConstantInstr* IRRegExpMacroAssembler::Int64Constant(int64_t value) const {
362 return new(I) ConstantInstr(
363 Integer::ZoneHandle(I, Integer::New(value, Heap::kOld)));
364 }
365
366
367 ConstantInstr* IRRegExpMacroAssembler::Uint64Constant(uint64_t value) const {
368 return new(I) ConstantInstr(
369 Integer::ZoneHandle(I, Integer::NewFromUint64(value, Heap::kOld)));
370 }
371
372
373 ConstantInstr* IRRegExpMacroAssembler::BoolConstant(bool value) const {
374 return new(I) ConstantInstr(value ? Bool::True() : Bool::False());
375 }
376
377
378 ConstantInstr* IRRegExpMacroAssembler::StringConstant(const char* value) const {
379 return new(I) ConstantInstr(
380 String::ZoneHandle(I, String::New(value, Heap::kOld)));
381 }
382
383
384 ConstantInstr* IRRegExpMacroAssembler::WordCharacterMapConstant() const {
385 const Library& lib = Library::Handle(I, Library::CoreLibrary());
386 const Class& regexp_class = Class::Handle(I,
387 lib.LookupClassAllowPrivate(Symbols::JSSyntaxRegExp()));
388 const Field& word_character_field = Field::ZoneHandle(I,
389 regexp_class.LookupStaticField(Symbols::_wordCharacterMap()));
390 ASSERT(!word_character_field.IsNull());
391
392 if (word_character_field.IsUninitialized()) {
393 word_character_field.EvaluateInitializer();
394 }
395 ASSERT(!word_character_field.IsUninitialized());
396
397 return new(I) ConstantInstr(
398 Array::Cast(Instance::ZoneHandle(I, word_character_field.value())));
399 }
400
401
402 ComparisonInstr* IRRegExpMacroAssembler::Comparison(
403 ComparisonKind kind, Definition* lhs, Definition* rhs) {
404 Token::Kind strict_comparison = Token::kEQ_STRICT;
405 Token::Kind intermediate_operator = Token::kILLEGAL;
406 switch (kind) {
407 case kEQ:
408 intermediate_operator = Token::kEQ;
409 break;
410 case kNE:
411 intermediate_operator = Token::kEQ;
412 strict_comparison = Token::kNE_STRICT;
413 break;
414 case kLT:
415 intermediate_operator = Token::kLT;
416 break;
417 case kGT:
418 intermediate_operator = Token::kGT;
419 break;
420 case kLTE:
421 intermediate_operator = Token::kLTE;
422 break;
423 case kGTE:
424 intermediate_operator = Token::kGTE;
425 break;
426 default:
427 UNREACHABLE();
428 }
429
430 ASSERT(intermediate_operator != Token::kILLEGAL);
431
432 PushArgumentInstr* lhs_push = PushArgument(Bind(lhs));
433 PushArgumentInstr* rhs_push = PushArgument(Bind(rhs));
434
435 Value* lhs_value =
436 Bind(InstanceCall(InstanceCallDescriptor(intermediate_operator),
437 lhs_push,
438 rhs_push));
439 Value* rhs_value = Bind(BoolConstant(true));
440
441 return new(I) StrictCompareInstr(kNoTokenPos, strict_comparison,
442 lhs_value, rhs_value, true);
443 }
444
445
446 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
447 const Function& function) const {
448 ZoneGrowableArray<PushArgumentInstr*>* arguments =
449 new(I) ZoneGrowableArray<PushArgumentInstr*>(0);
450 return StaticCall(function, arguments);
451 }
452
453
454 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
455 const Function& function,
456 PushArgumentInstr* arg1) const {
457 ZoneGrowableArray<PushArgumentInstr*>* arguments =
458 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
459 arguments->Add(arg1);
460
461 return StaticCall(function, arguments);
462 }
463
464
465 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
466 const Function& function,
467 PushArgumentInstr* arg1,
468 PushArgumentInstr* arg2) const {
469 ZoneGrowableArray<PushArgumentInstr*>* arguments =
470 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
471 arguments->Add(arg1);
472 arguments->Add(arg2);
473
474 return StaticCall(function, arguments);
475 }
476
477
478 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
479 const Function& function,
480 ZoneGrowableArray<PushArgumentInstr*>* arguments) const {
481 return new(I) StaticCallInstr(kNoTokenPos,
482 function,
483 Object::null_array(),
484 arguments,
485 *ic_data_array_);
486 }
487
488
489 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
490 const InstanceCallDescriptor& desc,
491 PushArgumentInstr* arg1) const {
492 ZoneGrowableArray<PushArgumentInstr*>* arguments =
493 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
494 arguments->Add(arg1);
495
496 return InstanceCall(desc, arguments);
497 }
498
499
500 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
501 const InstanceCallDescriptor& desc,
502 PushArgumentInstr* arg1,
503 PushArgumentInstr* arg2) const {
504 ZoneGrowableArray<PushArgumentInstr*>* arguments =
505 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
506 arguments->Add(arg1);
507 arguments->Add(arg2);
508
509 return InstanceCall(desc, arguments);
510 }
511
512
513 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
514 const InstanceCallDescriptor& desc,
515 PushArgumentInstr* arg1,
516 PushArgumentInstr* arg2,
517 PushArgumentInstr* arg3) const {
518 ZoneGrowableArray<PushArgumentInstr*>* arguments =
519 new(I) ZoneGrowableArray<PushArgumentInstr*>(3);
520 arguments->Add(arg1);
521 arguments->Add(arg2);
522 arguments->Add(arg3);
523
524 return InstanceCall(desc, arguments);
525 }
526
527
528 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
529 const InstanceCallDescriptor& desc,
530 ZoneGrowableArray<PushArgumentInstr*> *arguments) const {
531 return
532 new(I) InstanceCallInstr(kNoTokenPos,
533 *desc.name,
534 desc.token_kind,
535 arguments,
536 Object::null_array(),
537 desc.checked_argument_count,
538 *ic_data_array_);
539 }
540
541
542 LoadLocalInstr* IRRegExpMacroAssembler::LoadLocal(LocalVariable* local) const {
543 return new(I) LoadLocalInstr(*local);
544 }
545
546
547 void IRRegExpMacroAssembler::StoreLocal(LocalVariable* local,
548 Value* value) {
549 Do(new(I) StoreLocalInstr(*local, value));
550 }
551
552
553 void IRRegExpMacroAssembler::set_current_instruction(Instruction* instruction) {
554 current_instruction_ = instruction;
555 }
556
557
558 Value* IRRegExpMacroAssembler::Bind(Definition* definition) {
559 AppendInstruction(definition);
560 definition->set_temp_index(temp_id.Alloc());
561
562 return new(I) Value(definition);
563 }
564
565
566 void IRRegExpMacroAssembler::Do(Definition* definition) {
567 AppendInstruction(definition);
568 }
569
570
571 void IRRegExpMacroAssembler::AppendInstruction(Instruction* instruction) {
572 ASSERT(current_instruction_ != NULL);
573 ASSERT(current_instruction_->next() == NULL);
574
575 temp_id.Dealloc(instruction->InputCount());
576 arg_id.Dealloc(instruction->ArgumentCount());
577
578 current_instruction_->LinkTo(instruction);
579 set_current_instruction(instruction);
580 }
581
582
583 void IRRegExpMacroAssembler::CloseBlockWith(Instruction* instruction) {
584 ASSERT(current_instruction_ != NULL);
585 ASSERT(current_instruction_->next() == NULL);
586
587 temp_id.Dealloc(instruction->InputCount());
588 arg_id.Dealloc(instruction->ArgumentCount());
589
590 current_instruction_->LinkTo(instruction);
591 set_current_instruction(NULL);
592 }
593
594
595 // Jumps to the target block and sets it as the target for continued emission.
596 void IRRegExpMacroAssembler::GoTo(BlockLabel* to) {
597 ASSERT(current_instruction_ != NULL);
598 ASSERT(current_instruction_->next() == NULL);
599 ASSERT(to->block()->next() == NULL);
600 to->SetLinked();
601 current_instruction_->Goto(to->block());
602 set_current_instruction(to->block());
603 }
604
605
606 void IRRegExpMacroAssembler::Jump(BlockLabel* to) {
607 if (to == NULL) {
608 Backtrack();
609 } else {
610 to->SetLinked();
611 Jump(to->block());
612 }
613 }
614
615
616 // Closes the current block with a goto, and unsets current_instruction_.
617 // BindBlock() must be called before emission can continue.
618 void IRRegExpMacroAssembler::Jump(JoinEntryInstr* to) {
619 ASSERT(current_instruction_ != NULL);
620 ASSERT(current_instruction_->next() == NULL);
621 current_instruction_->Goto(to);
622 set_current_instruction(NULL);
623 }
624
625
626 PushArgumentInstr* IRRegExpMacroAssembler::PushArgument(Value* value) {
627 arg_id.Alloc();
628 PushArgumentInstr* push = new(I) PushArgumentInstr(value);
629 // Do *not* use Do() for push argument instructions.
630 AppendInstruction(push);
631 return push;
632 }
633
634
635 void IRRegExpMacroAssembler::Print(const char* str) {
636 Print(PushArgument(
637 Bind(new(I) ConstantInstr(
638 String::ZoneHandle(I, String::New(str, Heap::kOld))))));
639 }
640
641
642 void IRRegExpMacroAssembler::Print(PushArgumentInstr* argument) {
643 const Library& lib = Library::Handle(Library::CoreLibrary());
644 const Function& print_fn = Function::ZoneHandle(
645 I, lib.LookupFunctionAllowPrivate(Symbols::print()));
646 Do(StaticCall(print_fn, argument));
647 }
648
649
650 void IRRegExpMacroAssembler::PrintBlocks() {
651 for (intptr_t i = 0; i < blocks_.length(); i++) {
652 FlowGraphPrinter::PrintBlock(blocks_[i], false);
653 }
654 }
655
656
657 intptr_t IRRegExpMacroAssembler::stack_limit_slack() {
658 return 32;
659 }
660
661
662 void IRRegExpMacroAssembler::AdvanceCurrentPosition(intptr_t by) {
663 TAG();
664 if (by != 0) {
665 PushArgumentInstr* cur_pos_push =
666 PushArgument(Bind(LoadLocal(current_position_)));
667
668 PushArgumentInstr* by_push =
669 PushArgument(Bind(Int64Constant(by)));
670
671 Value* new_pos_value = Add(cur_pos_push, by_push);
672 StoreLocal(current_position_, new_pos_value);
673 }
674 }
675
676
677 void IRRegExpMacroAssembler::AdvanceRegister(intptr_t reg, intptr_t by) {
678 TAG();
679 ASSERT(reg >= 0);
680 ASSERT(reg < position_registers_.length());
681
682 if (by != 0) {
683 PushArgumentInstr* reg_push =
684 PushArgument(Bind(LoadLocal(position_register(reg))));
685 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
686 StoreLocal(position_register(reg), Add(reg_push, by_push));
687 }
688 }
689
690
691 void IRRegExpMacroAssembler::Backtrack() {
692 TAG();
693 CheckPreemption();
694
695 GrowableObjectArray& offsets = GrowableObjectArray::ZoneHandle(
696 I, GrowableObjectArray::New(Heap::kOld));
697
698 PushArgumentInstr* block_offsets_push =
699 PushArgument(Bind(new(I) ConstantInstr(offsets)));
700 PushArgumentInstr* block_id_push = PushArgument(PopStack());
701
702 Value* offset_value =
703 Bind(InstanceCall(InstanceCallDescriptor(Token::kINDEX),
704 block_offsets_push,
705 block_id_push));
706
707 IndirectGotoInstr* igoto = new(I) IndirectGotoInstr(&offsets, offset_value);
708 CloseBlockWith(igoto);
709 igotos_.Add(igoto);
710 }
711
712
713 // A BindBlock is analogous to assigning a label to a basic block.
714 // If the BlockLabel does not yet contain a block, it is created.
715 // If there is a current instruction, append a goto to the bound block.
716 void IRRegExpMacroAssembler::BindBlock(BlockLabel* label) {
717 ASSERT(!label->IsBound());
718 ASSERT(label->block()->next() == NULL);
719
720 label->SetBound(block_id.Alloc());
721 blocks_.Add(label->block());
722
723 if (current_instruction_ == NULL) {
724 set_current_instruction(label->block());
725 } else {
726 GoTo(label);
727 }
728
729 // Print the id of the current block if tracing.
730 PRINT(PushArgument(Bind(Uint64Constant(label->block()->block_id()))));
731 }
732
733
734 intptr_t IRRegExpMacroAssembler::GetNextLocalIndex() {
735 intptr_t id = local_id.Alloc();
736 return kFirstLocalSlotFromFp - id;
737 }
738
739
740 LocalVariable* IRRegExpMacroAssembler::position_register(intptr_t index) {
741 ASSERT(index < 10 * 10 * 10 * 10 * 10); // 10^5.
742 const char name_prefix[] = "pos";
743 char name[sizeof(name_prefix) + 5];
744
745 // Create position registers as needed.
746 for (intptr_t i = position_registers_.length(); i < index + 1; i++) {
747 OS::SNPrint(name, sizeof(name), "%s%05" Pd "", name_prefix, i);
748 position_registers_.Add(Local(String::Handle(I, Symbols::New(name))));
749 }
750
751 return position_registers_[index];
752 }
753
754
755 // TODO(jgruber): Move the offset table outside to avoid having to keep
756 // the assembler around until after code generation; both function or regexp
757 // would work.
758 void IRRegExpMacroAssembler::FinalizeBlockOffsetTable() {
759 for (intptr_t i = 0; i < igotos_.length(); i++) {
760 IndirectGotoInstr* igoto = igotos_[i];
761
762 for (intptr_t j = 0; j < igoto->SuccessorCount(); j++) {
763 TargetEntryInstr* target = igoto->SuccessorAt(j);
764 ASSERT(target->next()->IsGoto());
765 IndirectEntryInstr* ientry =
766 target->next()->AsGoto()->successor()->AsIndirectEntry();
767 ASSERT(ientry != NULL);
768
769 // The intermediate block was possibly compacted, check both it and the
770 // final indirect entry for a valid offset. If neither are valid, then
771 // the indirect entry is unreachable.
772 intptr_t offset =
773 (target->offset() > 0) ? target->offset() : ientry->offset();
774 if (offset > 0) {
775 igoto->SetOffsetAt(I, ientry->block_id(), offset);
776 }
777 }
778 }
779 }
780
781 void IRRegExpMacroAssembler::FinalizeIndirectGotos() {
782 for (intptr_t i = 0; i < igotos_.length(); i++) {
783 for (intptr_t j = 0; j < entry_block_->indirect_entries().length(); j++) {
784 igotos_.At(i)->AddSuccessor(
785 TargetWithJoinGoto(entry_block_->indirect_entries().At(j)));
786 }
787 }
788 }
789
790
791 void IRRegExpMacroAssembler::CheckCharacter(uint32_t c, BlockLabel* on_equal) {
792 TAG();
793 Definition* cur_char_def = LoadLocal(current_character_);
794 Definition* char_def = Uint64Constant(c);
795
796 BranchOrBacktrack(Comparison(kEQ, cur_char_def, char_def),
797 on_equal);
798 }
799
800
801 void IRRegExpMacroAssembler::CheckCharacterGT(uint16_t limit,
802 BlockLabel* on_greater) {
803 TAG();
804 BranchOrBacktrack(Comparison(kGT,
805 LoadLocal(current_character_),
806 Uint64Constant(limit)),
807 on_greater);
808 }
809
810
811 void IRRegExpMacroAssembler::CheckAtStart(BlockLabel* on_at_start) {
812 TAG();
813
814 BlockLabel not_at_start;
815
816 // Did we start the match at the start of the string at all?
817 BranchOrBacktrack(Comparison(kNE,
818 LoadLocal(start_index_param_),
819 Uint64Constant(0)),
820 &not_at_start);
821
822 // If we did, are we still at the start of the input, i.e. is
823 // (offset == string_length * -1)?
824 Definition* neg_len_def =
825 InstanceCall(InstanceCallDescriptor(Token::kNEGATE),
826 PushArgument(Bind(LoadLocal(string_param_length_))));
827 Definition* offset_def = LoadLocal(current_position_);
828 BranchOrBacktrack(Comparison(kEQ, neg_len_def, offset_def),
829 on_at_start);
830
831 BindBlock(&not_at_start);
832 }
833
834
835 void IRRegExpMacroAssembler::CheckNotAtStart(BlockLabel* on_not_at_start) {
836 TAG();
837
838 // Did we start the match at the start of the string at all?
839 BranchOrBacktrack(Comparison(kNE,
840 LoadLocal(start_index_param_),
841 Uint64Constant(0)),
842 on_not_at_start);
843
844 // If we did, are we still at the start of the input, i.e. is
845 // (offset == string_length * -1)?
846 Definition* neg_len_def =
847 InstanceCall(InstanceCallDescriptor(Token::kNEGATE),
848 PushArgument(Bind(LoadLocal(string_param_length_))));
849 Definition* offset_def = LoadLocal(current_position_);
850 BranchOrBacktrack(Comparison(kNE, neg_len_def, offset_def),
851 on_not_at_start);
852 }
853
854
855 void IRRegExpMacroAssembler::CheckCharacterLT(uint16_t limit,
856 BlockLabel* on_less) {
857 TAG();
858 BranchOrBacktrack(Comparison(kLT,
859 LoadLocal(current_character_),
860 Uint64Constant(limit)),
861 on_less);
862 }
863
864
865 void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) {
866 TAG();
867
868 BlockLabel fallthrough;
869
870 PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
871 Definition* stack_tip_def = InstanceCall(
872 InstanceCallDescriptor(String::ZoneHandle(
873 I, Field::GetterSymbol(Symbols::last()))),
874 stack_push);
875 Definition* cur_pos_def = LoadLocal(current_position_);
876
877 BranchOrBacktrack(Comparison(kNE, stack_tip_def, cur_pos_def),
878 &fallthrough);
879
880 // Pop, throwing away the value.
881 stack_push = PushArgument(Bind(LoadLocal(stack_)));
882 Do(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
883 stack_push));
884
885 BranchOrBacktrack(NULL, on_equal);
886
887 BindBlock(&fallthrough);
888 }
889
890
891 void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
892 intptr_t start_reg,
893 BlockLabel* on_no_match) {
894 TAG();
895 ASSERT(start_reg + 1 <= position_registers_.length());
896
897 BlockLabel fallthrough;
898
899 PushArgumentInstr* end_push =
900 PushArgument(Bind(LoadLocal(position_register(start_reg + 1))));
901 PushArgumentInstr* start_push =
902 PushArgument(Bind(LoadLocal(position_register(start_reg))));
903 StoreLocal(capture_length_, Sub(end_push, start_push));
904
905 // The length of a capture should not be negative. This can only happen
906 // if the end of the capture is unrecorded, or at a point earlier than
907 // the start of the capture.
908 // BranchOrBacktrack(less, on_no_match);
909
910 BranchOrBacktrack(Comparison(kLT,
911 LoadLocal(capture_length_),
912 Uint64Constant(0)),
913 on_no_match);
914
915 // If length is zero, either the capture is empty or it is completely
916 // uncaptured. In either case succeed immediately.
917 BranchOrBacktrack(Comparison(kEQ,
918 LoadLocal(capture_length_),
919 Uint64Constant(0)),
920 &fallthrough);
921
922
923 // Check that there are sufficient characters left in the input.
924 PushArgumentInstr* pos_push =
925 PushArgument(Bind(LoadLocal(current_position_)));
926 PushArgumentInstr* len_push = PushArgument(Bind(LoadLocal(capture_length_)));
927 BranchOrBacktrack(Comparison(kGT,
928 InstanceCall(InstanceCallDescriptor(Token::kADD),
929 pos_push,
930 len_push),
931 Uint64Constant(0)),
932 on_no_match);
933
934 pos_push = PushArgument(Bind(LoadLocal(current_position_)));
935 len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
936 StoreLocal(match_start_index_, Add(pos_push, len_push));
937
938 pos_push = PushArgument(Bind(LoadLocal(position_register(start_reg))));
939 len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
940 StoreLocal(capture_start_index_, Add(pos_push, len_push));
941
942 pos_push = PushArgument(Bind(LoadLocal(match_start_index_)));
943 len_push = PushArgument(Bind(LoadLocal(capture_length_)));
944 StoreLocal(match_end_index_, Add(pos_push, len_push));
945
946 BlockLabel success;
947 if (mode_ == ASCII) {
948 BlockLabel loop_increment;
949 BlockLabel loop;
950 BindBlock(&loop);
951
952 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
953 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
954
955 BranchOrBacktrack(Comparison(kEQ,
956 LoadLocal(char_in_capture_),
957 LoadLocal(char_in_match_)),
958 &loop_increment);
959
960 // Mismatch, try case-insensitive match (converting letters to lower-case).
961 PushArgumentInstr* match_char_push =
962 PushArgument(Bind(LoadLocal(char_in_match_)));
963 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(0x20)));
964 StoreLocal(char_in_match_,
965 Bind(InstanceCall(InstanceCallDescriptor(Token::kBIT_OR),
966 match_char_push,
967 mask_push)));
968
969 BlockLabel convert_capture;
970 BlockLabel on_not_in_range;
971 BranchOrBacktrack(Comparison(kLT,
972 LoadLocal(char_in_match_),
973 Uint64Constant('a')),
974 &on_not_in_range);
975 BranchOrBacktrack(Comparison(kGT,
976 LoadLocal(char_in_match_),
977 Uint64Constant('z')),
978 &on_not_in_range);
979 Jump(&convert_capture);
980 BindBlock(&on_not_in_range);
981
982 // Latin-1: Check for values in range [224,254] but not 247.
983 BranchOrBacktrack(Comparison(kLT,
984 LoadLocal(char_in_match_),
985 Uint64Constant(224)),
986 on_no_match);
987 BranchOrBacktrack(Comparison(kGT,
988 LoadLocal(char_in_match_),
989 Uint64Constant(254)),
990 on_no_match);
991
992 BranchOrBacktrack(Comparison(kEQ,
993 LoadLocal(char_in_match_),
994 Uint64Constant(247)),
995 on_no_match);
996
997 // Also convert capture character.
998 BindBlock(&convert_capture);
999
1000 PushArgumentInstr* capture_char_push =
1001 PushArgument(Bind(LoadLocal(char_in_capture_)));
1002 mask_push = PushArgument(Bind(Uint64Constant(0x20)));
1003 StoreLocal(char_in_capture_,
1004 Bind(InstanceCall(InstanceCallDescriptor(Token::kBIT_OR),
1005 capture_char_push,
1006 mask_push)));
1007
1008 BranchOrBacktrack(Comparison(kNE,
1009 LoadLocal(char_in_match_),
1010 LoadLocal(char_in_capture_)),
1011 on_no_match);
1012
1013 BindBlock(&loop_increment);
1014
1015 // Increment pointers into match and capture strings.
1016 StoreLocal(capture_start_index_, Add(
1017 PushArgument(Bind(LoadLocal(capture_start_index_))),
1018 PushArgument(Bind(Uint64Constant(1)))));
1019 StoreLocal(match_start_index_, Add(
1020 PushArgument(Bind(LoadLocal(match_start_index_))),
1021 PushArgument(Bind(Uint64Constant(1)))));
1022
1023 // Compare to end of match, and loop if not done.
1024 BranchOrBacktrack(Comparison(kLT,
1025 LoadLocal(match_start_index_),
1026 LoadLocal(match_end_index_)),
1027 &loop);
1028 } else {
1029 ASSERT(mode_ == UC16);
1030
1031 Value* string_value = Bind(LoadLocal(string_param_));
1032 Value* lhs_index_value = Bind(LoadLocal(match_start_index_));
1033 Value* rhs_index_value = Bind(LoadLocal(capture_start_index_));
1034 Value* length_value = Bind(LoadLocal(capture_length_));
1035
1036 Definition* is_match_def =
1037 new(I) CaseInsensitiveCompareUC16Instr(
1038 string_value,
1039 lhs_index_value,
1040 rhs_index_value,
1041 length_value,
1042 specialization_cid_,
1043 Isolate::kNoDeoptId);
1044
1045 BranchOrBacktrack(Comparison(kNE, is_match_def, BoolConstant(true)),
1046 on_no_match);
1047 }
1048
1049 BindBlock(&success);
1050
1051 // Move current character position to position after match.
1052 PushArgumentInstr* match_end_push =
1053 PushArgument(Bind(LoadLocal(match_end_index_)));
1054 len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
1055 StoreLocal(current_position_, Sub(match_end_push, len_push));
1056
1057 BindBlock(&fallthrough);
1058 }
1059
1060
1061 void IRRegExpMacroAssembler::CheckNotBackReference(
1062 intptr_t start_reg,
1063 BlockLabel* on_no_match) {
1064 TAG();
1065 ASSERT(start_reg + 1 <= position_registers_.length());
1066
1067 BlockLabel fallthrough;
1068 BlockLabel success;
1069
1070 // Find length of back-referenced capture.
1071 PushArgumentInstr* end_push =
1072 PushArgument(Bind(LoadLocal(position_register(start_reg + 1))));
1073 PushArgumentInstr* start_push =
1074 PushArgument(Bind(LoadLocal(position_register(start_reg))));
1075 StoreLocal(capture_length_, Sub(end_push, start_push));
1076
1077 // Fail on partial or illegal capture (start of capture after end of capture).
1078 BranchOrBacktrack(Comparison(kLT,
1079 LoadLocal(capture_length_),
1080 Uint64Constant(0)),
1081 on_no_match);
1082
1083 // Succeed on empty capture (including no capture)
1084 BranchOrBacktrack(Comparison(kEQ,
1085 LoadLocal(capture_length_),
1086 Uint64Constant(0)),
1087 &fallthrough);
1088
1089 // Check that there are sufficient characters left in the input.
1090 PushArgumentInstr* pos_push =
1091 PushArgument(Bind(LoadLocal(current_position_)));
1092 PushArgumentInstr* len_push = PushArgument(Bind(LoadLocal(capture_length_)));
1093 BranchOrBacktrack(Comparison(kGT,
1094 InstanceCall(InstanceCallDescriptor(Token::kADD),
1095 pos_push,
1096 len_push),
1097 Uint64Constant(0)),
1098 on_no_match);
1099
1100 // Compute pointers to match string and capture string.
1101 pos_push = PushArgument(Bind(LoadLocal(current_position_)));
1102 len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
1103 StoreLocal(match_start_index_, Add(pos_push, len_push));
1104
1105 pos_push = PushArgument(Bind(LoadLocal(position_register(start_reg))));
1106 len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
1107 StoreLocal(capture_start_index_, Add(pos_push, len_push));
1108
1109 pos_push = PushArgument(Bind(LoadLocal(match_start_index_)));
1110 len_push = PushArgument(Bind(LoadLocal(capture_length_)));
1111 StoreLocal(match_end_index_, Add(pos_push, len_push));
1112
1113 BlockLabel loop;
1114 BindBlock(&loop);
1115
1116 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
1117 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
1118
1119 BranchOrBacktrack(Comparison(kNE,
1120 LoadLocal(char_in_capture_),
1121 LoadLocal(char_in_match_)),
1122 on_no_match);
1123
1124 // Increment pointers into capture and match string.
1125 StoreLocal(capture_start_index_, Add(
1126 PushArgument(Bind(LoadLocal(capture_start_index_))),
1127 PushArgument(Bind(Uint64Constant(1)))));
1128 StoreLocal(match_start_index_, Add(
1129 PushArgument(Bind(LoadLocal(match_start_index_))),
1130 PushArgument(Bind(Uint64Constant(1)))));
1131
1132 // Check if we have reached end of match area.
1133 BranchOrBacktrack(Comparison(kLT,
1134 LoadLocal(match_start_index_),
1135 LoadLocal(match_end_index_)),
1136 &loop);
1137
1138 BindBlock(&success);
1139
1140 // Move current character position to position after match.
1141 PushArgumentInstr* match_end_push =
1142 PushArgument(Bind(LoadLocal(match_end_index_)));
1143 len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
1144 StoreLocal(current_position_, Sub(match_end_push, len_push));
1145
1146 BindBlock(&fallthrough);
1147 }
1148
1149
1150 void IRRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
1151 BlockLabel* on_not_equal) {
1152 TAG();
1153 BranchOrBacktrack(Comparison(kNE,
1154 LoadLocal(current_character_),
1155 Uint64Constant(c)),
1156 on_not_equal);
1157 }
1158
1159
1160 void IRRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
1161 uint32_t mask,
1162 BlockLabel* on_equal) {
1163 TAG();
1164
1165 Definition* actual_def = LoadLocal(current_character_);
1166 Definition* expected_def = Uint64Constant(c);
1167
1168 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1169 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1170 actual_def = InstanceCall(InstanceCallDescriptor(Token::kBIT_AND),
1171 actual_push,
1172 mask_push);
1173
1174 BranchOrBacktrack(Comparison(kEQ, actual_def, expected_def), on_equal);
1175 }
1176
1177
1178 void IRRegExpMacroAssembler::CheckNotCharacterAfterAnd(
1179 uint32_t c,
1180 uint32_t mask,
1181 BlockLabel* on_not_equal) {
1182 TAG();
1183
1184 Definition* actual_def = LoadLocal(current_character_);
1185 Definition* expected_def = Uint64Constant(c);
1186
1187 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1188 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1189 actual_def = InstanceCall(InstanceCallDescriptor(Token::kBIT_AND),
1190 actual_push,
1191 mask_push);
1192
1193 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1194 }
1195
1196
1197 void IRRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
1198 uint16_t c,
1199 uint16_t minus,
1200 uint16_t mask,
1201 BlockLabel* on_not_equal) {
1202 TAG();
1203 ASSERT(minus < Utf16::kMaxCodeUnit); // NOLINT
1204
1205 Definition* actual_def = LoadLocal(current_character_);
1206 Definition* expected_def = Uint64Constant(c);
1207
1208 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1209 PushArgumentInstr* minus_push = PushArgument(Bind(Uint64Constant(minus)));
1210
1211 actual_push = PushArgument(Sub(actual_push, minus_push));
1212 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1213 actual_def = InstanceCall(InstanceCallDescriptor(Token::kBIT_AND),
1214 actual_push,
1215 mask_push);
1216
1217 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1218 }
1219
1220
1221 void IRRegExpMacroAssembler::CheckCharacterInRange(
1222 uint16_t from,
1223 uint16_t to,
1224 BlockLabel* on_in_range) {
1225 TAG();
1226 ASSERT(from <= to);
1227
1228 // TODO(jgruber): All range comparisons could be done cheaper with unsigned
1229 // compares. This pattern repeats in various places.
1230
1231 BlockLabel on_not_in_range;
1232 BranchOrBacktrack(Comparison(kLT,
1233 LoadLocal(current_character_),
1234 Uint64Constant(from)),
1235 &on_not_in_range);
1236 BranchOrBacktrack(Comparison(kGT,
1237 LoadLocal(current_character_),
1238 Uint64Constant(to)),
1239 &on_not_in_range);
1240 BranchOrBacktrack(NULL, on_in_range);
1241
1242 BindBlock(&on_not_in_range);
1243 }
1244
1245
1246 void IRRegExpMacroAssembler::CheckCharacterNotInRange(
1247 uint16_t from,
1248 uint16_t to,
1249 BlockLabel* on_not_in_range) {
1250 TAG();
1251 ASSERT(from <= to);
1252
1253 BranchOrBacktrack(Comparison(kLT,
1254 LoadLocal(current_character_),
1255 Uint64Constant(from)),
1256 on_not_in_range);
1257
1258 BranchOrBacktrack(Comparison(kGT,
1259 LoadLocal(current_character_),
1260 Uint64Constant(to)),
1261 on_not_in_range);
1262 }
1263
1264
1265 void IRRegExpMacroAssembler::CheckBitInTable(
1266 const TypedData& table,
1267 BlockLabel* on_bit_set) {
1268 TAG();
1269
1270 PushArgumentInstr* table_push =
1271 PushArgument(Bind(new(I) ConstantInstr(table)));
1272 PushArgumentInstr* index_push =
1273 PushArgument(Bind(LoadLocal(current_character_)));
1274
1275 if (mode_ != ASCII || kTableMask != Symbols::kMaxOneCharCodeSymbol) {
1276 PushArgumentInstr* mask_push =
1277 PushArgument(Bind(Uint64Constant(kTableSize - 1)));
1278 index_push = PushArgument(
1279 Bind(InstanceCall(InstanceCallDescriptor(Token::kBIT_AND),
1280 index_push,
1281 mask_push)));
1282 }
1283
1284 Definition* byte_def = InstanceCall(InstanceCallDescriptor(Token::kINDEX),
1285 table_push,
1286 index_push);
1287 Definition* zero_def = Int64Constant(0);
1288
1289 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_bit_set);
1290 }
1291
1292
1293 bool IRRegExpMacroAssembler::CheckSpecialCharacterClass(
1294 uint16_t type,
1295 BlockLabel* on_no_match) {
1296 TAG();
1297
1298 // Range checks (c in min..max) are generally implemented by an unsigned
1299 // (c - min) <= (max - min) check
1300 switch (type) {
1301 case 's':
1302 // Match space-characters
1303 if (mode_ == ASCII) {
1304 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
1305 BlockLabel success;
1306 // Space (' ').
1307 BranchOrBacktrack(Comparison(kEQ,
1308 LoadLocal(current_character_),
1309 Uint64Constant(' ')),
1310 &success);
1311 // Check range 0x09..0x0d.
1312 CheckCharacterInRange('\t', '\r', &success);
1313 // \u00a0 (NBSP).
1314 BranchOrBacktrack(Comparison(kNE,
1315 LoadLocal(current_character_),
1316 Uint64Constant(0x00a0)),
1317 on_no_match);
1318 BindBlock(&success);
1319 return true;
1320 }
1321 return false;
1322 case 'S':
1323 // The emitted code for generic character classes is good enough.
1324 return false;
1325 case 'd':
1326 // Match ASCII digits ('0'..'9')
1327 CheckCharacterNotInRange('0', '9', on_no_match);
1328 return true;
1329 case 'D':
1330 // Match non ASCII-digits
1331 CheckCharacterInRange('0', '9', on_no_match);
1332 return true;
1333 case '.': {
1334 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
1335 BranchOrBacktrack(Comparison(kEQ,
1336 LoadLocal(current_character_),
1337 Uint64Constant('\n')),
1338 on_no_match);
1339 BranchOrBacktrack(Comparison(kEQ,
1340 LoadLocal(current_character_),
1341 Uint64Constant('\r')),
1342 on_no_match);
1343 if (mode_ == UC16) {
1344 BranchOrBacktrack(Comparison(kEQ,
1345 LoadLocal(current_character_),
1346 Uint64Constant(0x2028)),
1347 on_no_match);
1348 BranchOrBacktrack(Comparison(kEQ,
1349 LoadLocal(current_character_),
1350 Uint64Constant(0x2029)),
1351 on_no_match);
1352 }
1353 return true;
1354 }
1355 case 'w': {
1356 if (mode_ != ASCII) {
1357 // Table is 128 entries, so all ASCII characters can be tested.
1358 BranchOrBacktrack(Comparison(kGT,
1359 LoadLocal(current_character_),
1360 Uint64Constant('z')),
1361 on_no_match);
1362 }
1363
1364 PushArgumentInstr* table_push =
1365 PushArgument(Bind(WordCharacterMapConstant()));
1366 PushArgumentInstr* index_push =
1367 PushArgument(Bind(LoadLocal(current_character_)));
1368
1369 Definition* byte_def = InstanceCall(InstanceCallDescriptor(Token::kINDEX),
1370 table_push,
1371 index_push);
1372 Definition* zero_def = Int64Constant(0);
1373
1374 BranchOrBacktrack(Comparison(kEQ, byte_def, zero_def), on_no_match);
1375
1376 return true;
1377 }
1378 case 'W': {
1379 BlockLabel done;
1380 if (mode_ != ASCII) {
1381 // Table is 128 entries, so all ASCII characters can be tested.
1382 BranchOrBacktrack(Comparison(kGT,
1383 LoadLocal(current_character_),
1384 Uint64Constant('z')),
1385 &done);
1386 }
1387
1388 // TODO(jgruber): Refactor to use CheckBitInTable if possible.
1389
1390 PushArgumentInstr* table_push =
1391 PushArgument(Bind(WordCharacterMapConstant()));
1392 PushArgumentInstr* index_push =
1393 PushArgument(Bind(LoadLocal(current_character_)));
1394
1395 Definition* byte_def = InstanceCall(InstanceCallDescriptor(Token::kINDEX),
1396 table_push,
1397 index_push);
1398 Definition* zero_def = Int64Constant(0);
1399
1400 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_no_match);
1401
1402 if (mode_ != ASCII) {
1403 BindBlock(&done);
1404 }
1405 return true;
1406 }
1407 // Non-standard classes (with no syntactic shorthand) used internally.
1408 case '*':
1409 // Match any character.
1410 return true;
1411 case 'n': {
1412 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
1413 // The opposite of '.'.
1414 BlockLabel success;
1415 BranchOrBacktrack(Comparison(kEQ,
1416 LoadLocal(current_character_),
1417 Uint64Constant('\n')),
1418 &success);
1419 BranchOrBacktrack(Comparison(kEQ,
1420 LoadLocal(current_character_),
1421 Uint64Constant('\r')),
1422 &success);
1423 if (mode_ == UC16) {
1424 BranchOrBacktrack(Comparison(kEQ,
1425 LoadLocal(current_character_),
1426 Uint64Constant(0x2028)),
1427 &success);
1428 BranchOrBacktrack(Comparison(kEQ,
1429 LoadLocal(current_character_),
1430 Uint64Constant(0x2029)),
1431 &success);
1432 }
1433 BranchOrBacktrack(NULL, on_no_match);
1434 BindBlock(&success);
1435 return true;
1436 }
1437 // No custom implementation (yet): s(uint16_t), S(uint16_t).
1438 default:
1439 return false;
1440 }
1441 }
1442
1443
1444 void IRRegExpMacroAssembler::Fail() {
1445 TAG();
1446 ASSERT(FAILURE == 0); // Return value for failure is zero.
1447 if (!global()) {
1448 UNREACHABLE(); // Dart regexps are always global.
1449 }
1450 Jump(exit_block_);
1451 }
1452
1453
1454 void IRRegExpMacroAssembler::IfRegisterGE(intptr_t reg,
1455 intptr_t comparand,
1456 BlockLabel* if_ge) {
1457 TAG();
1458 BranchOrBacktrack(Comparison(kGTE,
1459 LoadLocal(position_register(reg)),
1460 Int64Constant(comparand)),
1461 if_ge);
1462 }
1463
1464
1465 void IRRegExpMacroAssembler::IfRegisterLT(intptr_t reg,
1466 intptr_t comparand,
1467 BlockLabel* if_lt) {
1468 TAG();
1469 BranchOrBacktrack(Comparison(kLT,
1470 LoadLocal(position_register(reg)),
1471 Int64Constant(comparand)),
1472 if_lt);
1473 }
1474
1475
1476 void IRRegExpMacroAssembler::IfRegisterEqPos(intptr_t reg,
1477 BlockLabel* if_eq) {
1478 TAG();
1479 BranchOrBacktrack(Comparison(kEQ,
1480 LoadLocal(position_register(reg)),
1481 LoadLocal(current_position_)),
1482 if_eq);
1483 }
1484
1485
1486 RegExpMacroAssembler::IrregexpImplementation
1487 IRRegExpMacroAssembler::Implementation() {
1488 return kIRImplementation;
1489 }
1490
1491
1492 void IRRegExpMacroAssembler::LoadCurrentCharacter(intptr_t cp_offset,
1493 BlockLabel* on_end_of_input,
1494 bool check_bounds,
1495 intptr_t characters) {
1496 TAG();
1497 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
1498 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
1499 if (check_bounds) {
1500 CheckPosition(cp_offset + characters - 1, on_end_of_input);
1501 }
1502 LoadCurrentCharacterUnchecked(cp_offset, characters);
1503 }
1504
1505
1506 void IRRegExpMacroAssembler::PopCurrentPosition() {
1507 TAG();
1508 StoreLocal(current_position_, PopStack());
1509 }
1510
1511
1512 void IRRegExpMacroAssembler::PopRegister(intptr_t register_index) {
1513 TAG();
1514 ASSERT(register_index < position_registers_.length());
1515 StoreLocal(position_register(register_index), PopStack());
1516 }
1517
1518
1519 void IRRegExpMacroAssembler::PushStack(Definition *definition) {
1520 PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
1521 PushArgumentInstr* value_push = PushArgument(Bind(definition));
1522 Do(InstanceCall(InstanceCallDescriptor(Symbols::add()),
1523 stack_push,
1524 value_push));
1525 }
1526
1527
1528 Value* IRRegExpMacroAssembler::PopStack() {
1529 PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
1530 return Bind(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
1531 stack_push));
1532 }
1533
1534
1535 // Pushes the location corresponding to label to the backtracking stack.
1536 void IRRegExpMacroAssembler::PushBacktrack(BlockLabel* label) {
1537 TAG();
1538
1539 // Ensure that targets of indirect jumps are never accessed through a
1540 // normal control flow instructions by creating a new block for each backtrack
1541 // target.
1542 IndirectEntryInstr* indirect_target = IndirectWithJoinGoto(label->block());
1543
1544 // Add a fake edge from the graph entry for data flow analysis.
1545 entry_block_->AddIndirectEntry(indirect_target);
1546
1547 ConstantInstr* offset = Uint64Constant(indirect_target->block_id());
1548 PushStack(offset);
1549 }
1550
1551
1552 void IRRegExpMacroAssembler::PushCurrentPosition() {
1553 TAG();
1554 PushStack(LoadLocal(current_position_));
1555 }
1556
1557
1558 void IRRegExpMacroAssembler::PushRegister(intptr_t register_index,
1559 StackCheckFlag check_stack_limit) {
1560 TAG();
1561 PushStack(LoadLocal(position_register(register_index)));
1562 }
1563
1564
1565 void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) {
1566 TAG();
1567 StoreLocal(current_position_, Bind(LoadLocal(position_register(reg))));
1568 }
1569
1570 // Resets the size of the stack to the value stored in reg.
1571 void IRRegExpMacroAssembler::ReadStackPointerFromRegister(intptr_t reg) {
1572 TAG();
1573 ASSERT(reg < position_registers_.length());
1574
1575 PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
1576 PushArgumentInstr* length_push =
1577 PushArgument(Bind(LoadLocal(position_register(reg))));
1578
1579 Do(InstanceCall(InstanceCallDescriptor(
1580 String::ZoneHandle(
1581 I, Field::SetterSymbol(Symbols::Length()))),
1582 stack_push,
1583 length_push));
1584 }
1585
1586 void IRRegExpMacroAssembler::SetCurrentPositionFromEnd(intptr_t by) {
1587 TAG();
1588
1589 BlockLabel after_position;
1590
1591 Definition* cur_pos_def = LoadLocal(current_position_);
1592 Definition* by_value_def = Int64Constant(-by);
1593
1594 BranchOrBacktrack(Comparison(kGTE, cur_pos_def, by_value_def),
1595 &after_position);
1596
1597 StoreLocal(current_position_, Bind(Int64Constant(-by)));
1598
1599 // On RegExp code entry (where this operation is used), the character before
1600 // the current position is expected to be already loaded.
1601 // We have advanced the position, so it's safe to read backwards.
1602 LoadCurrentCharacterUnchecked(-1, 1);
1603
1604 BindBlock(&after_position);
1605 }
1606
1607
1608 void IRRegExpMacroAssembler::SetRegister(intptr_t register_index, intptr_t to) {
1609 TAG();
1610 // Reserved for positions!
1611 ASSERT(register_index >= position_registers_count_);
1612 StoreLocal(position_register(register_index), Bind(Int64Constant(to)));
1613 }
1614
1615
1616 bool IRRegExpMacroAssembler::Succeed() {
1617 TAG();
1618 Jump(success_block_);
1619 return global();
1620 }
1621
1622
1623 void IRRegExpMacroAssembler::WriteCurrentPositionToRegister(
1624 intptr_t reg, intptr_t cp_offset) {
1625 TAG();
1626
1627 PushArgumentInstr* pos_push =
1628 PushArgument(Bind(LoadLocal(current_position_)));
1629 PushArgumentInstr* off_push =
1630 PushArgument(Bind(Int64Constant(cp_offset)));
1631
1632 // Push the negative offset; these are converted to positive string positions
1633 // within the success block.
1634 StoreLocal(position_register(reg), Add(pos_push, off_push));
1635 }
1636
1637
1638 void IRRegExpMacroAssembler::ClearRegisters(
1639 intptr_t reg_from, intptr_t reg_to) {
1640 TAG();
1641
1642 ASSERT(reg_from <= reg_to);
1643 ASSERT(reg_to < position_registers_.length());
1644
1645 // In order to clear registers to a final result value of -1, set them to
1646 // (-1 - string length), the offset of -1 from the end of the string.
1647
1648 for (intptr_t reg = reg_from; reg <= reg_to; reg++) {
1649 PushArgumentInstr* minus_one_push =
1650 PushArgument(Bind(Int64Constant(-1)));
1651 PushArgumentInstr* length_push =
1652 PushArgument(Bind(LoadLocal(string_param_length_)));
1653
1654 StoreLocal(position_register(reg), Sub(minus_one_push, length_push));
1655 }
1656 }
1657
1658
1659 void IRRegExpMacroAssembler::WriteStackPointerToRegister(intptr_t reg) {
1660 TAG();
1661
1662 PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
1663 Value* length_value =
1664 Bind(InstanceCall(InstanceCallDescriptor(
1665 String::ZoneHandle(
1666 I, Field::GetterSymbol(Symbols::Length()))),
1667 stack_push));
1668
1669 StoreLocal(position_register(reg), length_value);
1670 }
1671
1672
1673 // Private methods:
1674
1675
1676 void IRRegExpMacroAssembler::CheckPosition(intptr_t cp_offset,
1677 BlockLabel* on_outside_input) {
1678 TAG();
1679 Definition* curpos_def = LoadLocal(current_position_);
1680 Definition* cp_off_def = Int64Constant(-cp_offset);
1681
1682 // If (current_position_ < -cp_offset), we are in bounds.
1683 // Remember, current_position_ is a negative offset from the string end.
1684
1685 BranchOrBacktrack(Comparison(kGTE, curpos_def, cp_off_def),
1686 on_outside_input);
1687 }
1688
1689
1690 void IRRegExpMacroAssembler::BranchOrBacktrack(
1691 ComparisonInstr* comparison,
1692 BlockLabel* true_successor) {
1693 if (comparison == NULL) { // No condition
1694 if (true_successor == NULL) {
1695 Backtrack();
1696 return;
1697 }
1698 Jump(true_successor);
1699 return;
1700 }
1701
1702 // If no successor block has been passed in, backtrack.
1703 JoinEntryInstr* true_successor_block = backtrack_block_;
1704 if (true_successor != NULL) {
1705 true_successor->SetLinked();
1706 true_successor_block = true_successor->block();
1707 }
1708 ASSERT(true_successor_block != NULL);
1709
1710 // If the condition is not true, fall through to a new block.
1711 BlockLabel fallthrough;
1712
1713 BranchInstr* branch = new(I) BranchInstr(comparison);
1714 *branch->true_successor_address() =
1715 TargetWithJoinGoto(true_successor_block);
1716 *branch->false_successor_address() =
1717 TargetWithJoinGoto(fallthrough.block());
1718
1719 CloseBlockWith(branch);
1720 BindBlock(&fallthrough);
1721 }
1722
1723
1724 TargetEntryInstr* IRRegExpMacroAssembler::TargetWithJoinGoto(
1725 JoinEntryInstr* dst) {
1726 TargetEntryInstr* target = new(I) TargetEntryInstr(
1727 block_id.Alloc(), kInvalidTryIndex);
1728 blocks_.Add(target);
1729
1730 target->AppendInstruction(new(I) GotoInstr(dst));
1731
1732 return target;
1733 }
1734
1735
1736 IndirectEntryInstr* IRRegExpMacroAssembler::IndirectWithJoinGoto(
1737 JoinEntryInstr* dst) {
1738 IndirectEntryInstr* target = new(I) IndirectEntryInstr(
1739 block_id.Alloc(), kInvalidTryIndex);
1740 blocks_.Add(target);
1741
1742 target->AppendInstruction(new(I) GotoInstr(dst));
1743
1744 return target;
1745 }
1746
1747
1748 void IRRegExpMacroAssembler::CheckPreemption() {
1749 TAG();
1750 AppendInstruction(new(I) CheckStackOverflowInstr(kNoTokenPos, 0));
1751 }
1752
1753
1754 Value* IRRegExpMacroAssembler::Add(
1755 PushArgumentInstr* lhs,
1756 PushArgumentInstr* rhs) {
1757 return Bind(InstanceCall(InstanceCallDescriptor(Token::kADD), lhs, rhs));
1758 }
1759
1760
1761 Value* IRRegExpMacroAssembler::Sub(
1762 PushArgumentInstr* lhs,
1763 PushArgumentInstr* rhs) {
1764 return Bind(InstanceCall(InstanceCallDescriptor(Token::kSUB), lhs, rhs));
1765 }
1766
1767
1768 static const String& codeUnitsAtName(intptr_t characters) {
1769 switch (characters) {
1770 case 1: return Library::PrivateCoreLibName(Symbols::_oneCodeUnitAt());
1771 case 2: return Library::PrivateCoreLibName(Symbols::_twoCodeUnitsAt());
1772 case 4: return Library::PrivateCoreLibName(Symbols::_fourCodeUnitsAt());
1773 }
1774 UNREACHABLE();
1775 return String::Handle();
1776 }
1777
1778
1779 void IRRegExpMacroAssembler::LoadCurrentCharacterUnchecked(
1780 intptr_t cp_offset, intptr_t characters) {
1781 TAG();
1782
1783 if (mode_ == ASCII) {
1784 ASSERT(characters == 1 || characters == 2 || characters == 4);
1785 } else {
1786 ASSERT(mode_ == UC16);
1787 ASSERT(characters == 1 || characters == 2);
1788 }
1789
1790 // LoadLocal pattern_param_
1791 // PushArgument()
1792 PushArgumentInstr* pattern_push =
1793 PushArgument(Bind(LoadLocal(string_param_)));
1794
1795 // Calculate the addressed string index as
1796 // cp_offset + current_position_ + string_param_length_
1797 PushArgumentInstr* cp_offset_push =
1798 PushArgument(Bind(Int64Constant(cp_offset)));
1799 PushArgumentInstr* cur_pos_push =
1800 PushArgument(Bind(LoadLocal(current_position_)));
1801
1802 PushArgumentInstr* partial_sum_push =
1803 PushArgument(Add(cp_offset_push, cur_pos_push));
1804 PushArgumentInstr* length_push =
1805 PushArgument(Bind(LoadLocal(string_param_length_)));
1806
1807 PushArgumentInstr* pos_push =
1808 PushArgument(Add(partial_sum_push, length_push));
1809
1810 // InstanceCall(codeUnitAt, t0, t0)
1811 const String& name = codeUnitsAtName(characters);
1812 Value* code_unit_value =
1813 Bind(InstanceCall(InstanceCallDescriptor(name),
1814 pattern_push,
1815 pos_push));
1816
1817 // StoreLocal(current_character_)
1818 StoreLocal(current_character_, code_unit_value);
1819
1820 PRINT(PushArgument(Bind(LoadLocal(current_character_))));
1821 }
1822
1823
1824 Value* IRRegExpMacroAssembler::CharacterAt(Definition* index) {
1825 PushArgumentInstr* pattern_push =
1826 PushArgument(Bind(LoadLocal(string_param_)));
1827 PushArgumentInstr* index_push = PushArgument(Bind(index));
1828
1829 return Bind(InstanceCall(InstanceCallDescriptor(codeUnitsAtName(1)),
1830 pattern_push,
1831 index_push));
1832 }
1833
1834
1835 #undef __
1836
1837 } // namespace dart
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698