Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(152)

Side by Side Diff: runtime/vm/regexp_assembler.cc

Issue 683433003: Integrate the Irregexp Regular Expression Engine. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: rebase, enable tests, remove *CodeUnitsAt Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #include "vm/regexp_assembler.h"
6
7 #include "vm/bit_vector.h"
8 #include "vm/compiler.h"
9 #include "vm/dart_entry.h"
10 #include "vm/flow_graph_builder.h"
11 #include "vm/il_printer.h"
12 #include "vm/object_store.h"
13 #include "vm/regexp.h"
14 #include "vm/resolver.h"
15 #include "vm/stack_frame.h"
16 #include "vm/unibrow-inl.h"
17 #include "vm/unicode.h"
18
19 #define I isolate()
20
21 // Debugging output macros. TAG() is called at the head of each interesting
22 // function and prints its name during execution if irregexp tracing is enabled.
23 #define TAG() if (FLAG_trace_irregexp) { TAG_(); }
24 #define TAG_() \
25 Print(PushArgument( \
26 Bind(new(I) ConstantInstr(String::ZoneHandle(I, String::Concat( \
27 String::Handle(String::New("TAG: ")), \
28 String::Handle(String::New(__FUNCTION__)), Heap::kOld))))));
29
30 #define PRINT(arg) if (FLAG_trace_irregexp) { Print(arg); }
31
32 namespace dart {
33
34 DEFINE_FLAG(bool, trace_irregexp, false, "Trace irregexps");
35
36
37 static const intptr_t kInvalidTryIndex = -1;
38 static const intptr_t kNoTokenPos = -1;
39
40
41 void PrintUtf16(uint16_t c) {
42 const char* format = (0x20 <= c && c <= 0x7F) ?
43 "%c" : (c <= 0xff) ? "\\x%02x" : "\\u%04x";
44 OS::Print(format, c);
45 }
46
47
48 /*
49 * This assembler uses the following main local variables:
50 * - stack_: A pointer to a growable list which we use as an all-purpose stack
51 * storing backtracking offsets, positions & stored register values.
52 * - current_character_: Stores the currently loaded characters (possibly more
53 * than one).
54 * - current_position_: The current position within the string, stored as a
55 * negative offset from the end of the string (i.e. the
56 * position corresponding to str[0] is -str.length).
57 * Note that current_position_ is *not* byte-based, unlike
58 * original V8 code.
59 *
60 * Results are returned though an array of capture indices, stored at
61 * matches_param_. A null array specifies a failure to match. The match indices
62 * [start_inclusive, end_exclusive] for capture group i are stored at positions
63 * matches_param_[i * 2] and matches_param_[i * 2 + 1], respectively. Match
64 * indices of -1 denote non-matched groups. Note that we store these indices
65 * as a negative offset from the end of the string in position_registers_
66 * during processing, and convert them to standard indexes when copying them
67 * to matches_param_ on successful match.
68 */
69
70 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate)
71 : slow_safe_compiler_(false),
72 global_mode_(NOT_GLOBAL),
73 isolate_(isolate) {
74 }
75
76
77 RegExpMacroAssembler::~RegExpMacroAssembler() {
78 }
79
80
81 IRRegExpMacroAssembler::IRRegExpMacroAssembler(
82 intptr_t specialization_cid,
83 intptr_t capture_count,
84 const ParsedFunction* parsed_function,
85 const ZoneGrowableArray<const ICData*>& ic_data_array,
86 Isolate* isolate)
87 : RegExpMacroAssembler(isolate),
88 specialization_cid_(specialization_cid),
89 parsed_function_(parsed_function),
90 ic_data_array_(ic_data_array),
91 current_instruction_(NULL),
92 stack_(NULL),
93 current_character_(NULL),
94 current_position_(NULL),
95 string_param_(NULL),
96 string_param_length_(NULL),
97 start_index_param_(NULL),
98 position_registers_count_((capture_count + 1) * 2),
99 stack_array_(GrowableObjectArray::ZoneHandle(
100 isolate, GrowableObjectArray::New(16, Heap::kOld))) {
101 switch (specialization_cid) {
102 case kOneByteStringCid:
103 case kExternalOneByteStringCid: mode_ = ASCII; break;
104 case kTwoByteStringCid:
105 case kExternalTwoByteStringCid: mode_ = UC16; break;
106 default: UNREACHABLE();
107 }
108
109 InitializeLocals();
110
111 // Create and generate all preset blocks.
112 entry_block_ =
113 new(isolate) GraphEntryInstr(
114 parsed_function_,
115 new(isolate) TargetEntryInstr(block_id_.Alloc(), kInvalidTryIndex),
116 Isolate::kNoDeoptId);
117 start_block_ =
118 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
119 success_block_ =
120 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
121 backtrack_block_ =
122 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
123 exit_block_ =
124 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
125
126 GenerateEntryBlock();
127 GenerateSuccessBlock();
128 GenerateBacktrackBlock();
129 GenerateExitBlock();
130
131 blocks_.Add(entry_block_);
132 blocks_.Add(entry_block_->normal_entry());
133 blocks_.Add(start_block_);
134 blocks_.Add(success_block_);
135 blocks_.Add(backtrack_block_);
136 blocks_.Add(exit_block_);
137
138 // Begin emission at the start_block_.
139 set_current_instruction(start_block_);
140 }
141
142
143 IRRegExpMacroAssembler::~IRRegExpMacroAssembler() { }
144
145
146 void IRRegExpMacroAssembler::InitializeLocals() {
147 // Create local variables and parameters.
148 stack_ = Local(Symbols::stack_());
149 current_character_ = Local(Symbols::current_character_());
150 current_position_ = Local(Symbols::current_position_());
151 string_param_length_ = Local(Symbols::string_param_length_());
152 capture_length_ = Local(Symbols::capture_length_());
153 match_start_index_ = Local(Symbols::match_start_index_());
154 capture_start_index_ = Local(Symbols::capture_start_index_());
155 match_end_index_ = Local(Symbols::match_end_index_());
156 char_in_capture_ = Local(Symbols::char_in_capture_());
157 char_in_match_ = Local(Symbols::char_in_match_());
158 result_ = Local(Symbols::result_());
159
160 string_param_ = Parameter(Symbols::string_param_(), 0);
161 start_index_param_ = Parameter(Symbols::start_index_param_(), 1);
162
163 // Reserve space for all captured group positions. Note that more might
164 // be created on the fly for internal use.
165 for (intptr_t i = 0; i < position_registers_count_; i++) {
166 position_register(i);
167 }
168 }
169
170
171 void IRRegExpMacroAssembler::GenerateEntryBlock() {
172 set_current_instruction(entry_block_->normal_entry());
173 TAG();
174
175 // Generate a local list variable which we will use as a backtracking stack.
176
177 StoreLocal(stack_, Bind(new(I) ConstantInstr(stack_array_)));
178 Do(InstanceCall(InstanceCallDescriptor(Symbols::clear()), PushLocal(stack_)));
179
180 // Store string.length.
181 PushArgumentInstr* string_push = PushLocal(string_param_);
182
183 StoreLocal(string_param_length_,
184 Bind(InstanceCall(InstanceCallDescriptor(
185 String::ZoneHandle(
186 Field::GetterSymbol(Symbols::Length()))),
187 string_push)));
188
189 // Initialize all capture registers.
190 ClearRegisters(0, position_registers_count_ - 1);
191
192 // Store (start_index - string.length) as the current position (since it's a
193 // negative offset from the end of the string).
194 PushArgumentInstr* start_index_push = PushLocal(start_index_param_);
195 PushArgumentInstr* length_push = PushLocal(string_param_length_);
196
197 StoreLocal(current_position_, Bind(Sub(start_index_push, length_push)));
198
199 // Jump to the start block.
200 current_instruction_->Goto(start_block_);
201 }
202
203
204 void IRRegExpMacroAssembler::GenerateBacktrackBlock() {
205 set_current_instruction(backtrack_block_);
206 TAG();
207 Backtrack();
208 }
209
210
211 void IRRegExpMacroAssembler::GenerateSuccessBlock() {
212 set_current_instruction(success_block_);
213 TAG();
214
215 Definition* type_args_null_def = new(I) ConstantInstr(
216 TypeArguments::ZoneHandle(I, TypeArguments::null()));
217 PushArgumentInstr* type_arg_push = PushArgument(Bind(type_args_null_def));
218 PushArgumentInstr* length_push =
219 PushArgument(Bind(Uint64Constant(position_registers_count_)));
220
221 const Library& lib = Library::Handle(Library::CoreLibrary());
222 const Class& list_class = Class::Handle(
223 lib.LookupCoreClass(Symbols::List()));
224 const Function& list_ctor =
225 Function::ZoneHandle(I, list_class.LookupFactory(Symbols::ListFactory()));
226
227 // TODO(zerny): Use CreateArrayInstr and StoreIndexed instead.
228 StoreLocal(result_, Bind(StaticCall(list_ctor, type_arg_push, length_push)));
229
230 // Store captured offsets in the `matches` parameter.
231 // TODO(zerny): Eliminate position_register locals and access `matches`
232 // directly.
233 for (intptr_t i = 0; i < position_registers_count_; i++) {
234 PushArgumentInstr* matches_push = PushLocal(result_);
235 PushArgumentInstr* index_push = PushArgument(Bind(Uint64Constant(i)));
236
237 // Convert negative offsets from the end of the string to string indices.
238 PushArgumentInstr* offset_push = PushLocal(position_register(i));
239 PushArgumentInstr* len_push = PushLocal(string_param_length_);
240 PushArgumentInstr* value_push =
241 PushArgument(Bind(Add(offset_push, len_push)));
242
243 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
244 matches_push,
245 index_push,
246 value_push));
247 }
248
249 // Print the result if tracing.
250 PRINT(PushLocal(result_));
251
252 // Return true on success.
253 AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(LoadLocal(result_))));
254 }
255
256
257 void IRRegExpMacroAssembler::GenerateExitBlock() {
258 set_current_instruction(exit_block_);
259 TAG();
260
261 // Return false on failure.
262 AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(LoadLocal(result_))));
263 }
264
265
266 #if defined(TARGET_ARCH_ARM64) || \
267 defined(TARGET_ARCH_ARM) || \
268 defined(TARGET_ARCH_MIPS)
269 // Disabling unaligned accesses forces the regexp engine to load characters one
270 // by one instead of up to 4 at once, along with the associated performance hit.
271 // TODO(zerny): Be less conservative about disabling unaligned accesses.
272 // For instance, ARMv6 supports unaligned accesses. Once it is enabled here,
273 // update LoadCodeUnitsInstr methods for the appropriate architectures.
274 static const bool kEnableUnalignedAccesses = false;
275 #else
276 static const bool kEnableUnalignedAccesses = true;
277 #endif
278 bool IRRegExpMacroAssembler::CanReadUnaligned() {
279 return kEnableUnalignedAccesses && !slow_safe();
280 }
281
282
283 RawArray* IRRegExpMacroAssembler::Execute(
284 const Function& function,
285 const String& input,
286 const Smi& start_offset,
287 Isolate* isolate) {
288 // Create the argument list.
289 const Array& args = Array::Handle(Array::New(2));
290 args.SetAt(0, input);
291 args.SetAt(1, start_offset);
292
293 // And finally call the generated code.
294
295 const Object& retval =
296 Object::Handle(isolate, DartEntry::InvokeFunction(function, args));
297 if (retval.IsError()) {
298 const Error& error = Error::Cast(retval);
299 OS::Print("%s\n", error.ToErrorCString());
300 // Should never happen.
301 UNREACHABLE();
302 }
303
304 if (retval.IsNull()) {
305 return Array::null();
306 }
307
308 ASSERT(retval.IsArray());
309 return Array::Cast(retval).raw();
310 }
311
312
313 RawBool* IRRegExpMacroAssembler::CaseInsensitiveCompareUC16(
314 RawString* str_raw,
315 RawSmi* lhs_index_raw,
316 RawSmi* rhs_index_raw,
317 RawSmi* length_raw) {
318 const String& str = String::Handle(str_raw);
319 const Smi& lhs_index = Smi::Handle(lhs_index_raw);
320 const Smi& rhs_index = Smi::Handle(rhs_index_raw);
321 const Smi& length = Smi::Handle(length_raw);
322
323 // TODO(zerny): Optimize as single instance. V8 has this as an
324 // isolate member.
325 unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
326
327 for (intptr_t i = 0; i < length.Value(); i++) {
328 int32_t c1 = str.CharAt(lhs_index.Value() + i);
329 int32_t c2 = str.CharAt(rhs_index.Value() + i);
330 if (c1 != c2) {
331 int32_t s1[1] = { c1 };
332 canonicalize.get(c1, '\0', s1);
333 if (s1[0] != c2) {
334 int32_t s2[1] = { c2 };
335 canonicalize.get(c2, '\0', s2);
336 if (s1[0] != s2[0]) {
337 return Bool::False().raw();
338 }
339 }
340 }
341 }
342 return Bool::True().raw();
343 }
344
345
346 LocalVariable* IRRegExpMacroAssembler::Parameter(const String& name,
347 intptr_t index) const {
348 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
349 LocalVariable* local =
350 new(I) LocalVariable(kNoTokenPos, name, local_type);
351
352 intptr_t param_frame_index = kParamEndSlotFromFp + kParamCount - index;
353 local->set_index(param_frame_index);
354
355 return local;
356 }
357
358
359 LocalVariable* IRRegExpMacroAssembler::Local(const String& name) {
360 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
361 LocalVariable* local =
362 new(I) LocalVariable(kNoTokenPos, name, local_type);
363 local->set_index(GetNextLocalIndex());
364
365 return local;
366 }
367
368
369 ConstantInstr* IRRegExpMacroAssembler::Int64Constant(int64_t value) const {
370 return new(I) ConstantInstr(
371 Integer::ZoneHandle(I, Integer::New(value, Heap::kOld)));
372 }
373
374
375 ConstantInstr* IRRegExpMacroAssembler::Uint64Constant(uint64_t value) const {
376 return new(I) ConstantInstr(
377 Integer::ZoneHandle(I, Integer::NewFromUint64(value, Heap::kOld)));
378 }
379
380
381 ConstantInstr* IRRegExpMacroAssembler::BoolConstant(bool value) const {
382 return new(I) ConstantInstr(value ? Bool::True() : Bool::False());
383 }
384
385
386 ConstantInstr* IRRegExpMacroAssembler::StringConstant(const char* value) const {
387 return new(I) ConstantInstr(
388 String::ZoneHandle(I, String::New(value, Heap::kOld)));
389 }
390
391
392 ConstantInstr* IRRegExpMacroAssembler::WordCharacterMapConstant() const {
393 const Library& lib = Library::Handle(I, Library::CoreLibrary());
394 const Class& regexp_class = Class::Handle(I,
395 lib.LookupClassAllowPrivate(Symbols::JSSyntaxRegExp()));
396 const Field& word_character_field = Field::ZoneHandle(I,
397 regexp_class.LookupStaticField(Symbols::_wordCharacterMap()));
398 ASSERT(!word_character_field.IsNull());
399
400 if (word_character_field.IsUninitialized()) {
401 word_character_field.EvaluateInitializer();
402 }
403 ASSERT(!word_character_field.IsUninitialized());
404
405 return new(I) ConstantInstr(
406 Instance::ZoneHandle(I, word_character_field.value()));
407 }
408
409
410 ComparisonInstr* IRRegExpMacroAssembler::Comparison(
411 ComparisonKind kind, Definition* lhs, Definition* rhs) {
412 Token::Kind strict_comparison = Token::kEQ_STRICT;
413 Token::Kind intermediate_operator = Token::kILLEGAL;
414 switch (kind) {
415 case kEQ:
416 intermediate_operator = Token::kEQ;
417 break;
418 case kNE:
419 intermediate_operator = Token::kEQ;
420 strict_comparison = Token::kNE_STRICT;
421 break;
422 case kLT:
423 intermediate_operator = Token::kLT;
424 break;
425 case kGT:
426 intermediate_operator = Token::kGT;
427 break;
428 case kLTE:
429 intermediate_operator = Token::kLTE;
430 break;
431 case kGTE:
432 intermediate_operator = Token::kGTE;
433 break;
434 default:
435 UNREACHABLE();
436 }
437
438 ASSERT(intermediate_operator != Token::kILLEGAL);
439
440 PushArgumentInstr* lhs_push = PushArgument(Bind(lhs));
441 PushArgumentInstr* rhs_push = PushArgument(Bind(rhs));
442
443 Value* lhs_value =
444 Bind(InstanceCall(
445 InstanceCallDescriptor::FromToken(intermediate_operator),
446 lhs_push,
447 rhs_push));
448 Value* rhs_value = Bind(BoolConstant(true));
449
450 return new(I) StrictCompareInstr(kNoTokenPos, strict_comparison,
451 lhs_value, rhs_value, true);
452 }
453
454
455 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
456 const Function& function) const {
457 ZoneGrowableArray<PushArgumentInstr*>* arguments =
458 new(I) ZoneGrowableArray<PushArgumentInstr*>(0);
459 return StaticCall(function, arguments);
460 }
461
462
463 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
464 const Function& function,
465 PushArgumentInstr* arg1) const {
466 ZoneGrowableArray<PushArgumentInstr*>* arguments =
467 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
468 arguments->Add(arg1);
469
470 return StaticCall(function, arguments);
471 }
472
473
474 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
475 const Function& function,
476 PushArgumentInstr* arg1,
477 PushArgumentInstr* arg2) const {
478 ZoneGrowableArray<PushArgumentInstr*>* arguments =
479 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
480 arguments->Add(arg1);
481 arguments->Add(arg2);
482
483 return StaticCall(function, arguments);
484 }
485
486
487 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
488 const Function& function,
489 ZoneGrowableArray<PushArgumentInstr*>* arguments) const {
490 return new(I) StaticCallInstr(kNoTokenPos,
491 function,
492 Object::null_array(),
493 arguments,
494 ic_data_array_);
495 }
496
497
498 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
499 const InstanceCallDescriptor& desc,
500 PushArgumentInstr* arg1) const {
501 ZoneGrowableArray<PushArgumentInstr*>* arguments =
502 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
503 arguments->Add(arg1);
504
505 return InstanceCall(desc, arguments);
506 }
507
508
509 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
510 const InstanceCallDescriptor& desc,
511 PushArgumentInstr* arg1,
512 PushArgumentInstr* arg2) const {
513 ZoneGrowableArray<PushArgumentInstr*>* arguments =
514 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
515 arguments->Add(arg1);
516 arguments->Add(arg2);
517
518 return InstanceCall(desc, arguments);
519 }
520
521
522 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
523 const InstanceCallDescriptor& desc,
524 PushArgumentInstr* arg1,
525 PushArgumentInstr* arg2,
526 PushArgumentInstr* arg3) const {
527 ZoneGrowableArray<PushArgumentInstr*>* arguments =
528 new(I) ZoneGrowableArray<PushArgumentInstr*>(3);
529 arguments->Add(arg1);
530 arguments->Add(arg2);
531 arguments->Add(arg3);
532
533 return InstanceCall(desc, arguments);
534 }
535
536
537 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
538 const InstanceCallDescriptor& desc,
539 ZoneGrowableArray<PushArgumentInstr*> *arguments) const {
540 return
541 new(I) InstanceCallInstr(kNoTokenPos,
542 desc.name,
543 desc.token_kind,
544 arguments,
545 Object::null_array(),
546 desc.checked_argument_count,
547 ic_data_array_);
548 }
549
550
551 LoadLocalInstr* IRRegExpMacroAssembler::LoadLocal(LocalVariable* local) const {
552 return new(I) LoadLocalInstr(*local);
553 }
554
555
556 void IRRegExpMacroAssembler::StoreLocal(LocalVariable* local,
557 Value* value) {
558 Do(new(I) StoreLocalInstr(*local, value));
559 }
560
561
562 void IRRegExpMacroAssembler::set_current_instruction(Instruction* instruction) {
563 current_instruction_ = instruction;
564 }
565
566
567 Value* IRRegExpMacroAssembler::Bind(Definition* definition) {
568 AppendInstruction(definition);
569 definition->set_temp_index(temp_id_.Alloc());
570
571 return new(I) Value(definition);
572 }
573
574
575 void IRRegExpMacroAssembler::Do(Definition* definition) {
576 AppendInstruction(definition);
577 }
578
579
580 Value* IRRegExpMacroAssembler::BindLoadLocal(const LocalVariable& local) {
581 if (local.IsConst()) {
582 return Bind(new(I) ConstantInstr(*local.ConstValue()));
583 }
584 ASSERT(!local.is_captured());
585 return Bind(new(I) LoadLocalInstr(local));
586 }
587
588
589 // In some cases, the V8 irregexp engine generates unreachable code by emitting
590 // a jmp not followed by a bind. We cannot do the same, since it is impossible
591 // to append to a block following a jmp. In such cases, assume that we are doing
592 // the correct thing, but output a warning when tracing.
593 #define HANDLE_DEAD_CODE_EMISSION() \
594 if (current_instruction_ == NULL) { \
595 if (FLAG_trace_irregexp) { \
596 OS::Print("WARNING: Attempting to append to a closed assembler. " \
597 "This could be either a bug or generation of dead code " \
598 "inherited from V8.\n"); \
599 } \
600 BlockLabel dummy; \
601 BindBlock(&dummy); \
602 }
603
604 void IRRegExpMacroAssembler::AppendInstruction(Instruction* instruction) {
605 HANDLE_DEAD_CODE_EMISSION();
606
607 ASSERT(current_instruction_ != NULL);
608 ASSERT(current_instruction_->next() == NULL);
609
610 temp_id_.Dealloc(instruction->InputCount());
611 arg_id_.Dealloc(instruction->ArgumentCount());
612
613 current_instruction_->LinkTo(instruction);
614 set_current_instruction(instruction);
615 }
616
617
618 void IRRegExpMacroAssembler::CloseBlockWith(Instruction* instruction) {
619 HANDLE_DEAD_CODE_EMISSION();
620
621 ASSERT(current_instruction_ != NULL);
622 ASSERT(current_instruction_->next() == NULL);
623
624 temp_id_.Dealloc(instruction->InputCount());
625 arg_id_.Dealloc(instruction->ArgumentCount());
626
627 current_instruction_->LinkTo(instruction);
628 set_current_instruction(NULL);
629 }
630
631
632 void IRRegExpMacroAssembler::GoTo(BlockLabel* to) {
633 if (to == NULL) {
634 Backtrack();
635 } else {
636 to->SetLinked();
637 GoTo(to->block());
638 }
639 }
640
641
642 // Closes the current block with a goto, and unsets current_instruction_.
643 // BindBlock() must be called before emission can continue.
644 void IRRegExpMacroAssembler::GoTo(JoinEntryInstr* to) {
645 HANDLE_DEAD_CODE_EMISSION();
646
647 ASSERT(current_instruction_ != NULL);
648 ASSERT(current_instruction_->next() == NULL);
649 current_instruction_->Goto(to);
650 set_current_instruction(NULL);
651 }
652
653
654 PushArgumentInstr* IRRegExpMacroAssembler::PushArgument(Value* value) {
655 arg_id_.Alloc();
656 PushArgumentInstr* push = new(I) PushArgumentInstr(value);
657 // Do *not* use Do() for push argument instructions.
658 AppendInstruction(push);
659 return push;
660 }
661
662
663 PushArgumentInstr* IRRegExpMacroAssembler::PushLocal(LocalVariable* local) {
664 return PushArgument(Bind(LoadLocal(local)));
665 }
666
667
668 void IRRegExpMacroAssembler::Print(const char* str) {
669 Print(PushArgument(
670 Bind(new(I) ConstantInstr(
671 String::ZoneHandle(I, String::New(str, Heap::kOld))))));
672 }
673
674
675 void IRRegExpMacroAssembler::Print(PushArgumentInstr* argument) {
676 const Library& lib = Library::Handle(Library::CoreLibrary());
677 const Function& print_fn = Function::ZoneHandle(
678 I, lib.LookupFunctionAllowPrivate(Symbols::print()));
679 Do(StaticCall(print_fn, argument));
680 }
681
682
683 void IRRegExpMacroAssembler::PrintBlocks() {
684 for (intptr_t i = 0; i < blocks_.length(); i++) {
685 FlowGraphPrinter::PrintBlock(blocks_[i], false);
686 }
687 }
688
689
690 intptr_t IRRegExpMacroAssembler::stack_limit_slack() {
691 return 32;
692 }
693
694
695 void IRRegExpMacroAssembler::AdvanceCurrentPosition(intptr_t by) {
696 TAG();
697 if (by != 0) {
698 PushArgumentInstr* cur_pos_push = PushLocal(current_position_);
699 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
700
701 Value* new_pos_value = Bind(Add(cur_pos_push, by_push));
702 StoreLocal(current_position_, new_pos_value);
703 }
704 }
705
706
707 void IRRegExpMacroAssembler::AdvanceRegister(intptr_t reg, intptr_t by) {
708 TAG();
709 ASSERT(reg >= 0);
710 ASSERT(reg < position_registers_.length());
711
712 if (by != 0) {
713 PushArgumentInstr* reg_push = PushLocal(position_register(reg));
714 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
715 StoreLocal(position_register(reg), Bind(Add(reg_push, by_push)));
716 }
717 }
718
719
720 void IRRegExpMacroAssembler::Backtrack() {
721 TAG();
722 CheckPreemption();
723
724 GrowableObjectArray& offsets = GrowableObjectArray::ZoneHandle(
725 I, GrowableObjectArray::New(Heap::kOld));
726
727 PushArgumentInstr* block_offsets_push =
728 PushArgument(Bind(new(I) ConstantInstr(offsets)));
729 PushArgumentInstr* block_id_push = PushArgument(PopStack());
730
731 Value* offset_value =
732 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
733 block_offsets_push,
734 block_id_push));
735
736 IndirectGotoInstr* igoto = new(I) IndirectGotoInstr(&offsets, offset_value);
737 CloseBlockWith(igoto);
738 igotos_.Add(igoto);
739 }
740
741
742 // A BindBlock is analogous to assigning a label to a basic block.
743 // If the BlockLabel does not yet contain a block, it is created.
744 // If there is a current instruction, append a goto to the bound block.
745 void IRRegExpMacroAssembler::BindBlock(BlockLabel* label) {
746 ASSERT(!label->IsBound());
747 ASSERT(label->block()->next() == NULL);
748
749 label->SetBound(block_id_.Alloc());
750 blocks_.Add(label->block());
751
752 if (current_instruction_ != NULL) {
753 GoTo(label);
754 }
755 set_current_instruction(label->block());
756
757 // Print the id of the current block if tracing.
758 PRINT(PushArgument(Bind(Uint64Constant(label->block()->block_id()))));
759 }
760
761
762 intptr_t IRRegExpMacroAssembler::GetNextLocalIndex() {
763 intptr_t id = local_id_.Alloc();
764 return kFirstLocalSlotFromFp - id;
765 }
766
767
768 LocalVariable* IRRegExpMacroAssembler::position_register(intptr_t index) {
769 // Create position registers as needed.
770 for (intptr_t i = position_registers_.length(); i < index + 1; i++) {
771 position_registers_.Add(Local(Symbols::position_registers_()));
772 }
773
774 return position_registers_[index];
775 }
776
777
778 // TODO(zerny): Move the offset table outside to avoid having to keep
779 // the assembler around until after code generation; both function or regexp
780 // would work.
781 void IRRegExpMacroAssembler::FinalizeBlockOffsetTable() {
782 for (intptr_t i = 0; i < igotos_.length(); i++) {
783 IndirectGotoInstr* igoto = igotos_[i];
784 igoto->SetOffsetCount(I, indirect_id_.Count());
785
786 for (intptr_t j = 0; j < igoto->SuccessorCount(); j++) {
787 TargetEntryInstr* target = igoto->SuccessorAt(j);
788
789 // Optimizations might have modified the immediate target block, but
790 // it must end with a goto to the indirect entry.
791 Instruction* instr = target;
792 while (instr != NULL && !instr->IsGoto()) {
793 instr = instr->next();
794 }
795 ASSERT(instr->IsGoto());
796
797 IndirectEntryInstr* ientry =
798 instr->AsGoto()->successor()->AsIndirectEntry();
799 ASSERT(ientry != NULL);
800
801 // The intermediate block was possibly compacted, check both it and the
802 // final indirect entry for a valid offset. If neither are valid, then
803 // the indirect entry is unreachable.
804 intptr_t offset =
805 (target->offset() > 0) ? target->offset() : ientry->offset();
806 if (offset > 0) {
807 intptr_t adjusted_offset =
808 offset - Assembler::EntryPointToPcMarkerOffset();
809 igoto->SetOffsetAt(I, ientry->indirect_id(), adjusted_offset);
810 }
811 }
812 }
813 }
814
815 void IRRegExpMacroAssembler::FinalizeIndirectGotos() {
816 for (intptr_t i = 0; i < igotos_.length(); i++) {
817 for (intptr_t j = 0; j < entry_block_->indirect_entries().length(); j++) {
818 igotos_.At(i)->AddSuccessor(
819 TargetWithJoinGoto(entry_block_->indirect_entries().At(j)));
820 }
821 }
822 }
823
824
825 void IRRegExpMacroAssembler::CheckCharacter(uint32_t c, BlockLabel* on_equal) {
826 TAG();
827 Definition* cur_char_def = LoadLocal(current_character_);
828 Definition* char_def = Uint64Constant(c);
829
830 BranchOrBacktrack(Comparison(kEQ, cur_char_def, char_def),
831 on_equal);
832 }
833
834
835 void IRRegExpMacroAssembler::CheckCharacterGT(uint16_t limit,
836 BlockLabel* on_greater) {
837 TAG();
838 BranchOrBacktrack(Comparison(kGT,
839 LoadLocal(current_character_),
840 Uint64Constant(limit)),
841 on_greater);
842 }
843
844
845 void IRRegExpMacroAssembler::CheckAtStart(BlockLabel* on_at_start) {
846 TAG();
847
848 BlockLabel not_at_start;
849
850 // Did we start the match at the start of the string at all?
851 BranchOrBacktrack(Comparison(kNE,
852 LoadLocal(start_index_param_),
853 Uint64Constant(0)),
854 &not_at_start);
855
856 // If we did, are we still at the start of the input, i.e. is
857 // (offset == string_length * -1)?
858 Definition* neg_len_def =
859 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
860 PushLocal(string_param_length_));
861 Definition* offset_def = LoadLocal(current_position_);
862 BranchOrBacktrack(Comparison(kEQ, neg_len_def, offset_def),
863 on_at_start);
864
865 BindBlock(&not_at_start);
866 }
867
868
869 void IRRegExpMacroAssembler::CheckNotAtStart(BlockLabel* on_not_at_start) {
870 TAG();
871
872 // Did we start the match at the start of the string at all?
873 BranchOrBacktrack(Comparison(kNE,
874 LoadLocal(start_index_param_),
875 Uint64Constant(0)),
876 on_not_at_start);
877
878 // If we did, are we still at the start of the input, i.e. is
879 // (offset == string_length * -1)?
880 Definition* neg_len_def =
881 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
882 PushLocal(string_param_length_));
883 Definition* offset_def = LoadLocal(current_position_);
884 BranchOrBacktrack(Comparison(kNE, neg_len_def, offset_def),
885 on_not_at_start);
886 }
887
888
889 void IRRegExpMacroAssembler::CheckCharacterLT(uint16_t limit,
890 BlockLabel* on_less) {
891 TAG();
892 BranchOrBacktrack(Comparison(kLT,
893 LoadLocal(current_character_),
894 Uint64Constant(limit)),
895 on_less);
896 }
897
898
899 void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) {
900 TAG();
901
902 BlockLabel fallthrough;
903
904 PushArgumentInstr* stack_push = PushLocal(stack_);
905 Definition* stack_tip_def = InstanceCall(
906 InstanceCallDescriptor(String::ZoneHandle(
907 I, Field::GetterSymbol(Symbols::last()))),
908 stack_push);
909 Definition* cur_pos_def = LoadLocal(current_position_);
910
911 BranchOrBacktrack(Comparison(kNE, stack_tip_def, cur_pos_def),
912 &fallthrough);
913
914 // Pop, throwing away the value.
915 stack_push = PushLocal(stack_);
916 Do(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
917 stack_push));
918
919 BranchOrBacktrack(NULL, on_equal);
920
921 BindBlock(&fallthrough);
922 }
923
924
925 void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
926 intptr_t start_reg,
927 BlockLabel* on_no_match) {
928 TAG();
929 ASSERT(start_reg + 1 <= position_registers_.length());
930
931 BlockLabel fallthrough;
932
933 PushArgumentInstr* end_push = PushLocal(position_register(start_reg + 1));
934 PushArgumentInstr* start_push = PushLocal(position_register(start_reg));
935 StoreLocal(capture_length_, Bind(Sub(end_push, start_push)));
936
937 // The length of a capture should not be negative. This can only happen
938 // if the end of the capture is unrecorded, or at a point earlier than
939 // the start of the capture.
940 // BranchOrBacktrack(less, on_no_match);
941
942 BranchOrBacktrack(Comparison(kLT,
943 LoadLocal(capture_length_),
944 Uint64Constant(0)),
945 on_no_match);
946
947 // If length is zero, either the capture is empty or it is completely
948 // uncaptured. In either case succeed immediately.
949 BranchOrBacktrack(Comparison(kEQ,
950 LoadLocal(capture_length_),
951 Uint64Constant(0)),
952 &fallthrough);
953
954
955 // Check that there are sufficient characters left in the input.
956 PushArgumentInstr* pos_push = PushLocal(current_position_);
957 PushArgumentInstr* len_push = PushLocal(capture_length_);
958 BranchOrBacktrack(
959 Comparison(kGT,
960 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
961 pos_push,
962 len_push),
963 Uint64Constant(0)),
964 on_no_match);
965
966 pos_push = PushLocal(current_position_);
967 len_push = PushLocal(string_param_length_);
968 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push)));
969
970 pos_push = PushLocal(position_register(start_reg));
971 len_push = PushLocal(string_param_length_);
972 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push)));
973
974 pos_push = PushLocal(match_start_index_);
975 len_push = PushLocal(capture_length_);
976 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push)));
977
978 BlockLabel success;
979 if (mode_ == ASCII) {
980 BlockLabel loop_increment;
981 BlockLabel loop;
982 BindBlock(&loop);
983
984 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
985 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
986
987 BranchOrBacktrack(Comparison(kEQ,
988 LoadLocal(char_in_capture_),
989 LoadLocal(char_in_match_)),
990 &loop_increment);
991
992 // Mismatch, try case-insensitive match (converting letters to lower-case).
993 PushArgumentInstr* match_char_push = PushLocal(char_in_match_);
994 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(0x20)));
995 StoreLocal(char_in_match_,
996 Bind(InstanceCall(
997 InstanceCallDescriptor::FromToken(Token::kBIT_OR),
998 match_char_push,
999 mask_push)));
1000
1001 BlockLabel convert_capture;
1002 BlockLabel on_not_in_range;
1003 BranchOrBacktrack(Comparison(kLT,
1004 LoadLocal(char_in_match_),
1005 Uint64Constant('a')),
1006 &on_not_in_range);
1007 BranchOrBacktrack(Comparison(kGT,
1008 LoadLocal(char_in_match_),
1009 Uint64Constant('z')),
1010 &on_not_in_range);
1011 GoTo(&convert_capture);
1012 BindBlock(&on_not_in_range);
1013
1014 // Latin-1: Check for values in range [224,254] but not 247.
1015 BranchOrBacktrack(Comparison(kLT,
1016 LoadLocal(char_in_match_),
1017 Uint64Constant(224)),
1018 on_no_match);
1019 BranchOrBacktrack(Comparison(kGT,
1020 LoadLocal(char_in_match_),
1021 Uint64Constant(254)),
1022 on_no_match);
1023
1024 BranchOrBacktrack(Comparison(kEQ,
1025 LoadLocal(char_in_match_),
1026 Uint64Constant(247)),
1027 on_no_match);
1028
1029 // Also convert capture character.
1030 BindBlock(&convert_capture);
1031
1032 PushArgumentInstr* capture_char_push = PushLocal(char_in_capture_);
1033 mask_push = PushArgument(Bind(Uint64Constant(0x20)));
1034 StoreLocal(char_in_capture_,
1035 Bind(InstanceCall(
1036 InstanceCallDescriptor::FromToken(Token::kBIT_OR),
1037 capture_char_push,
1038 mask_push)));
1039
1040 BranchOrBacktrack(Comparison(kNE,
1041 LoadLocal(char_in_match_),
1042 LoadLocal(char_in_capture_)),
1043 on_no_match);
1044
1045 BindBlock(&loop_increment);
1046
1047 // Increment pointers into match and capture strings.
1048 StoreLocal(capture_start_index_, Bind(Add(
1049 PushLocal(capture_start_index_),
1050 PushArgument(Bind(Uint64Constant(1))))));
1051 StoreLocal(match_start_index_, Bind(Add(
1052 PushLocal(match_start_index_),
1053 PushArgument(Bind(Uint64Constant(1))))));
1054
1055 // Compare to end of match, and loop if not done.
1056 BranchOrBacktrack(Comparison(kLT,
1057 LoadLocal(match_start_index_),
1058 LoadLocal(match_end_index_)),
1059 &loop);
1060 } else {
1061 ASSERT(mode_ == UC16);
1062
1063 Value* string_value = Bind(LoadLocal(string_param_));
1064 Value* lhs_index_value = Bind(LoadLocal(match_start_index_));
1065 Value* rhs_index_value = Bind(LoadLocal(capture_start_index_));
1066 Value* length_value = Bind(LoadLocal(capture_length_));
1067
1068 Definition* is_match_def =
1069 new(I) CaseInsensitiveCompareUC16Instr(
1070 string_value,
1071 lhs_index_value,
1072 rhs_index_value,
1073 length_value,
1074 specialization_cid_);
1075
1076 BranchOrBacktrack(Comparison(kNE, is_match_def, BoolConstant(true)),
1077 on_no_match);
1078 }
1079
1080 BindBlock(&success);
1081
1082 // Move current character position to position after match.
1083 PushArgumentInstr* match_end_push = PushLocal(match_end_index_);
1084 len_push = PushLocal(string_param_length_);
1085 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push)));
1086
1087 BindBlock(&fallthrough);
1088 }
1089
1090
1091 void IRRegExpMacroAssembler::CheckNotBackReference(
1092 intptr_t start_reg,
1093 BlockLabel* on_no_match) {
1094 TAG();
1095 ASSERT(start_reg + 1 <= position_registers_.length());
1096
1097 BlockLabel fallthrough;
1098 BlockLabel success;
1099
1100 // Find length of back-referenced capture.
1101 PushArgumentInstr* end_push = PushLocal(position_register(start_reg + 1));
1102 PushArgumentInstr* start_push = PushLocal(position_register(start_reg));
1103 StoreLocal(capture_length_, Bind(Sub(end_push, start_push)));
1104
1105 // Fail on partial or illegal capture (start of capture after end of capture).
1106 BranchOrBacktrack(Comparison(kLT,
1107 LoadLocal(capture_length_),
1108 Uint64Constant(0)),
1109 on_no_match);
1110
1111 // Succeed on empty capture (including no capture)
1112 BranchOrBacktrack(Comparison(kEQ,
1113 LoadLocal(capture_length_),
1114 Uint64Constant(0)),
1115 &fallthrough);
1116
1117 // Check that there are sufficient characters left in the input.
1118 PushArgumentInstr* pos_push = PushLocal(current_position_);
1119 PushArgumentInstr* len_push = PushLocal(capture_length_);
1120 BranchOrBacktrack(
1121 Comparison(kGT,
1122 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
1123 pos_push,
1124 len_push),
1125 Uint64Constant(0)),
1126 on_no_match);
1127
1128 // Compute pointers to match string and capture string.
1129 pos_push = PushLocal(current_position_);
1130 len_push = PushLocal(string_param_length_);
1131 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push)));
1132
1133 pos_push = PushLocal(position_register(start_reg));
1134 len_push = PushLocal(string_param_length_);
1135 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push)));
1136
1137 pos_push = PushLocal(match_start_index_);
1138 len_push = PushLocal(capture_length_);
1139 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push)));
1140
1141 BlockLabel loop;
1142 BindBlock(&loop);
1143
1144 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
1145 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
1146
1147 BranchOrBacktrack(Comparison(kNE,
1148 LoadLocal(char_in_capture_),
1149 LoadLocal(char_in_match_)),
1150 on_no_match);
1151
1152 // Increment pointers into capture and match string.
1153 StoreLocal(capture_start_index_, Bind(Add(
1154 PushLocal(capture_start_index_),
1155 PushArgument(Bind(Uint64Constant(1))))));
1156 StoreLocal(match_start_index_, Bind(Add(
1157 PushLocal(match_start_index_),
1158 PushArgument(Bind(Uint64Constant(1))))));
1159
1160 // Check if we have reached end of match area.
1161 BranchOrBacktrack(Comparison(kLT,
1162 LoadLocal(match_start_index_),
1163 LoadLocal(match_end_index_)),
1164 &loop);
1165
1166 BindBlock(&success);
1167
1168 // Move current character position to position after match.
1169 PushArgumentInstr* match_end_push = PushLocal(match_end_index_);
1170 len_push = PushLocal(string_param_length_);
1171 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push)));
1172
1173 BindBlock(&fallthrough);
1174 }
1175
1176
1177 void IRRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
1178 BlockLabel* on_not_equal) {
1179 TAG();
1180 BranchOrBacktrack(Comparison(kNE,
1181 LoadLocal(current_character_),
1182 Uint64Constant(c)),
1183 on_not_equal);
1184 }
1185
1186
1187 void IRRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
1188 uint32_t mask,
1189 BlockLabel* on_equal) {
1190 TAG();
1191
1192 Definition* actual_def = LoadLocal(current_character_);
1193 Definition* expected_def = Uint64Constant(c);
1194
1195 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1196 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1197 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1198 actual_push,
1199 mask_push);
1200
1201 BranchOrBacktrack(Comparison(kEQ, actual_def, expected_def), on_equal);
1202 }
1203
1204
1205 void IRRegExpMacroAssembler::CheckNotCharacterAfterAnd(
1206 uint32_t c,
1207 uint32_t mask,
1208 BlockLabel* on_not_equal) {
1209 TAG();
1210
1211 Definition* actual_def = LoadLocal(current_character_);
1212 Definition* expected_def = Uint64Constant(c);
1213
1214 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1215 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1216 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1217 actual_push,
1218 mask_push);
1219
1220 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1221 }
1222
1223
1224 void IRRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
1225 uint16_t c,
1226 uint16_t minus,
1227 uint16_t mask,
1228 BlockLabel* on_not_equal) {
1229 TAG();
1230 ASSERT(minus < Utf16::kMaxCodeUnit); // NOLINT
1231
1232 Definition* actual_def = LoadLocal(current_character_);
1233 Definition* expected_def = Uint64Constant(c);
1234
1235 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1236 PushArgumentInstr* minus_push = PushArgument(Bind(Uint64Constant(minus)));
1237
1238 actual_push = PushArgument(Bind(Sub(actual_push, minus_push)));
1239 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1240 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1241 actual_push,
1242 mask_push);
1243
1244 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1245 }
1246
1247
1248 void IRRegExpMacroAssembler::CheckCharacterInRange(
1249 uint16_t from,
1250 uint16_t to,
1251 BlockLabel* on_in_range) {
1252 TAG();
1253 ASSERT(from <= to);
1254
1255 // TODO(zerny): All range comparisons could be done cheaper with unsigned
1256 // compares. This pattern repeats in various places.
1257
1258 BlockLabel on_not_in_range;
1259 BranchOrBacktrack(Comparison(kLT,
1260 LoadLocal(current_character_),
1261 Uint64Constant(from)),
1262 &on_not_in_range);
1263 BranchOrBacktrack(Comparison(kGT,
1264 LoadLocal(current_character_),
1265 Uint64Constant(to)),
1266 &on_not_in_range);
1267 BranchOrBacktrack(NULL, on_in_range);
1268
1269 BindBlock(&on_not_in_range);
1270 }
1271
1272
1273 void IRRegExpMacroAssembler::CheckCharacterNotInRange(
1274 uint16_t from,
1275 uint16_t to,
1276 BlockLabel* on_not_in_range) {
1277 TAG();
1278 ASSERT(from <= to);
1279
1280 BranchOrBacktrack(Comparison(kLT,
1281 LoadLocal(current_character_),
1282 Uint64Constant(from)),
1283 on_not_in_range);
1284
1285 BranchOrBacktrack(Comparison(kGT,
1286 LoadLocal(current_character_),
1287 Uint64Constant(to)),
1288 on_not_in_range);
1289 }
1290
1291
1292 void IRRegExpMacroAssembler::CheckBitInTable(
1293 const TypedData& table,
1294 BlockLabel* on_bit_set) {
1295 TAG();
1296
1297 PushArgumentInstr* table_push =
1298 PushArgument(Bind(new(I) ConstantInstr(table)));
1299 PushArgumentInstr* index_push = PushLocal(current_character_);
1300
1301 if (mode_ != ASCII || kTableMask != Symbols::kMaxOneCharCodeSymbol) {
1302 PushArgumentInstr* mask_push =
1303 PushArgument(Bind(Uint64Constant(kTableSize - 1)));
1304 index_push = PushArgument(
1305 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1306 index_push,
1307 mask_push)));
1308 }
1309
1310 Definition* byte_def =
1311 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1312 table_push,
1313 index_push);
1314 Definition* zero_def = Int64Constant(0);
1315
1316 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_bit_set);
1317 }
1318
1319
1320 bool IRRegExpMacroAssembler::CheckSpecialCharacterClass(
1321 uint16_t type,
1322 BlockLabel* on_no_match) {
1323 TAG();
1324
1325 // Range checks (c in min..max) are generally implemented by an unsigned
1326 // (c - min) <= (max - min) check
1327 switch (type) {
1328 case 's':
1329 // Match space-characters
1330 if (mode_ == ASCII) {
1331 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
1332 BlockLabel success;
1333 // Space (' ').
1334 BranchOrBacktrack(Comparison(kEQ,
1335 LoadLocal(current_character_),
1336 Uint64Constant(' ')),
1337 &success);
1338 // Check range 0x09..0x0d.
1339 CheckCharacterInRange('\t', '\r', &success);
1340 // \u00a0 (NBSP).
1341 BranchOrBacktrack(Comparison(kNE,
1342 LoadLocal(current_character_),
1343 Uint64Constant(0x00a0)),
1344 on_no_match);
1345 BindBlock(&success);
1346 return true;
1347 }
1348 return false;
1349 case 'S':
1350 // The emitted code for generic character classes is good enough.
1351 return false;
1352 case 'd':
1353 // Match ASCII digits ('0'..'9')
1354 CheckCharacterNotInRange('0', '9', on_no_match);
1355 return true;
1356 case 'D':
1357 // Match non ASCII-digits
1358 CheckCharacterInRange('0', '9', on_no_match);
1359 return true;
1360 case '.': {
1361 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
1362 BranchOrBacktrack(Comparison(kEQ,
1363 LoadLocal(current_character_),
1364 Uint64Constant('\n')),
1365 on_no_match);
1366 BranchOrBacktrack(Comparison(kEQ,
1367 LoadLocal(current_character_),
1368 Uint64Constant('\r')),
1369 on_no_match);
1370 if (mode_ == UC16) {
1371 BranchOrBacktrack(Comparison(kEQ,
1372 LoadLocal(current_character_),
1373 Uint64Constant(0x2028)),
1374 on_no_match);
1375 BranchOrBacktrack(Comparison(kEQ,
1376 LoadLocal(current_character_),
1377 Uint64Constant(0x2029)),
1378 on_no_match);
1379 }
1380 return true;
1381 }
1382 case 'w': {
1383 if (mode_ != ASCII) {
1384 // Table is 128 entries, so all ASCII characters can be tested.
1385 BranchOrBacktrack(Comparison(kGT,
1386 LoadLocal(current_character_),
1387 Uint64Constant('z')),
1388 on_no_match);
1389 }
1390
1391 PushArgumentInstr* table_push =
1392 PushArgument(Bind(WordCharacterMapConstant()));
1393 PushArgumentInstr* index_push = PushLocal(current_character_);
1394
1395 Definition* byte_def =
1396 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1397 table_push,
1398 index_push);
1399 Definition* zero_def = Int64Constant(0);
1400
1401 BranchOrBacktrack(Comparison(kEQ, byte_def, zero_def), on_no_match);
1402
1403 return true;
1404 }
1405 case 'W': {
1406 BlockLabel done;
1407 if (mode_ != ASCII) {
1408 // Table is 128 entries, so all ASCII characters can be tested.
1409 BranchOrBacktrack(Comparison(kGT,
1410 LoadLocal(current_character_),
1411 Uint64Constant('z')),
1412 &done);
1413 }
1414
1415 // TODO(zerny): Refactor to use CheckBitInTable if possible.
1416
1417 PushArgumentInstr* table_push =
1418 PushArgument(Bind(WordCharacterMapConstant()));
1419 PushArgumentInstr* index_push = PushLocal(current_character_);
1420
1421 Definition* byte_def =
1422 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1423 table_push,
1424 index_push);
1425 Definition* zero_def = Int64Constant(0);
1426
1427 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_no_match);
1428
1429 if (mode_ != ASCII) {
1430 BindBlock(&done);
1431 }
1432 return true;
1433 }
1434 // Non-standard classes (with no syntactic shorthand) used internally.
1435 case '*':
1436 // Match any character.
1437 return true;
1438 case 'n': {
1439 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
1440 // The opposite of '.'.
1441 BlockLabel success;
1442 BranchOrBacktrack(Comparison(kEQ,
1443 LoadLocal(current_character_),
1444 Uint64Constant('\n')),
1445 &success);
1446 BranchOrBacktrack(Comparison(kEQ,
1447 LoadLocal(current_character_),
1448 Uint64Constant('\r')),
1449 &success);
1450 if (mode_ == UC16) {
1451 BranchOrBacktrack(Comparison(kEQ,
1452 LoadLocal(current_character_),
1453 Uint64Constant(0x2028)),
1454 &success);
1455 BranchOrBacktrack(Comparison(kEQ,
1456 LoadLocal(current_character_),
1457 Uint64Constant(0x2029)),
1458 &success);
1459 }
1460 BranchOrBacktrack(NULL, on_no_match);
1461 BindBlock(&success);
1462 return true;
1463 }
1464 // No custom implementation (yet): s(uint16_t), S(uint16_t).
1465 default:
1466 return false;
1467 }
1468 }
1469
1470
1471 void IRRegExpMacroAssembler::Fail() {
1472 TAG();
1473 ASSERT(FAILURE == 0); // Return value for failure is zero.
1474 if (!global()) {
1475 UNREACHABLE(); // Dart regexps are always global.
1476 }
1477 GoTo(exit_block_);
1478 }
1479
1480
1481 void IRRegExpMacroAssembler::IfRegisterGE(intptr_t reg,
1482 intptr_t comparand,
1483 BlockLabel* if_ge) {
1484 TAG();
1485 BranchOrBacktrack(Comparison(kGTE,
1486 LoadLocal(position_register(reg)),
1487 Int64Constant(comparand)),
1488 if_ge);
1489 }
1490
1491
1492 void IRRegExpMacroAssembler::IfRegisterLT(intptr_t reg,
1493 intptr_t comparand,
1494 BlockLabel* if_lt) {
1495 TAG();
1496 BranchOrBacktrack(Comparison(kLT,
1497 LoadLocal(position_register(reg)),
1498 Int64Constant(comparand)),
1499 if_lt);
1500 }
1501
1502
1503 void IRRegExpMacroAssembler::IfRegisterEqPos(intptr_t reg,
1504 BlockLabel* if_eq) {
1505 TAG();
1506 BranchOrBacktrack(Comparison(kEQ,
1507 LoadLocal(position_register(reg)),
1508 LoadLocal(current_position_)),
1509 if_eq);
1510 }
1511
1512
1513 RegExpMacroAssembler::IrregexpImplementation
1514 IRRegExpMacroAssembler::Implementation() {
1515 return kIRImplementation;
1516 }
1517
1518
1519 void IRRegExpMacroAssembler::LoadCurrentCharacter(intptr_t cp_offset,
1520 BlockLabel* on_end_of_input,
1521 bool check_bounds,
1522 intptr_t characters) {
1523 TAG();
1524 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
1525 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
1526 if (check_bounds) {
1527 CheckPosition(cp_offset + characters - 1, on_end_of_input);
1528 }
1529 LoadCurrentCharacterUnchecked(cp_offset, characters);
1530 }
1531
1532
1533 void IRRegExpMacroAssembler::PopCurrentPosition() {
1534 TAG();
1535 StoreLocal(current_position_, PopStack());
1536 }
1537
1538
1539 void IRRegExpMacroAssembler::PopRegister(intptr_t register_index) {
1540 TAG();
1541 ASSERT(register_index < position_registers_.length());
1542 StoreLocal(position_register(register_index), PopStack());
1543 }
1544
1545
1546 void IRRegExpMacroAssembler::PushStack(Definition *definition) {
1547 PushArgumentInstr* stack_push = PushLocal(stack_);
1548 PushArgumentInstr* value_push = PushArgument(Bind(definition));
1549 Do(InstanceCall(InstanceCallDescriptor(Symbols::add()),
1550 stack_push,
1551 value_push));
1552 }
1553
1554
1555 Value* IRRegExpMacroAssembler::PopStack() {
1556 PushArgumentInstr* stack_push = PushLocal(stack_);
1557 return Bind(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
1558 stack_push));
1559 }
1560
1561
1562 // Pushes the location corresponding to label to the backtracking stack.
1563 void IRRegExpMacroAssembler::PushBacktrack(BlockLabel* label) {
1564 TAG();
1565
1566 // Ensure that targets of indirect jumps are never accessed through a
1567 // normal control flow instructions by creating a new block for each backtrack
1568 // target.
1569 IndirectEntryInstr* indirect_target = IndirectWithJoinGoto(label->block());
1570
1571 // Add a fake edge from the graph entry for data flow analysis.
1572 entry_block_->AddIndirectEntry(indirect_target);
1573
1574 ConstantInstr* offset = Uint64Constant(indirect_target->indirect_id());
1575 PushStack(offset);
1576 }
1577
1578
1579 void IRRegExpMacroAssembler::PushCurrentPosition() {
1580 TAG();
1581 PushStack(LoadLocal(current_position_));
1582 }
1583
1584
1585 void IRRegExpMacroAssembler::PushRegister(intptr_t register_index) {
1586 TAG();
1587 PushStack(LoadLocal(position_register(register_index)));
1588 }
1589
1590
1591 void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) {
1592 TAG();
1593 StoreLocal(current_position_, Bind(LoadLocal(position_register(reg))));
1594 }
1595
1596 // Resets the size of the stack to the value stored in reg.
1597 void IRRegExpMacroAssembler::ReadStackPointerFromRegister(intptr_t reg) {
1598 TAG();
1599 ASSERT(reg < position_registers_.length());
1600
1601 PushArgumentInstr* stack_push = PushLocal(stack_);
1602 PushArgumentInstr* length_push = PushLocal(position_register(reg));
1603
1604 Do(InstanceCall(InstanceCallDescriptor(
1605 String::ZoneHandle(
1606 I, Field::SetterSymbol(Symbols::Length()))),
1607 stack_push,
1608 length_push));
1609 }
1610
1611 void IRRegExpMacroAssembler::SetCurrentPositionFromEnd(intptr_t by) {
1612 TAG();
1613
1614 BlockLabel after_position;
1615
1616 Definition* cur_pos_def = LoadLocal(current_position_);
1617 Definition* by_value_def = Int64Constant(-by);
1618
1619 BranchOrBacktrack(Comparison(kGTE, cur_pos_def, by_value_def),
1620 &after_position);
1621
1622 StoreLocal(current_position_, Bind(Int64Constant(-by)));
1623
1624 // On RegExp code entry (where this operation is used), the character before
1625 // the current position is expected to be already loaded.
1626 // We have advanced the position, so it's safe to read backwards.
1627 LoadCurrentCharacterUnchecked(-1, 1);
1628
1629 BindBlock(&after_position);
1630 }
1631
1632
1633 void IRRegExpMacroAssembler::SetRegister(intptr_t register_index, intptr_t to) {
1634 TAG();
1635 // Reserved for positions!
1636 ASSERT(register_index >= position_registers_count_);
1637 StoreLocal(position_register(register_index), Bind(Int64Constant(to)));
1638 }
1639
1640
1641 bool IRRegExpMacroAssembler::Succeed() {
1642 TAG();
1643 GoTo(success_block_);
1644 return global();
1645 }
1646
1647
1648 void IRRegExpMacroAssembler::WriteCurrentPositionToRegister(
1649 intptr_t reg, intptr_t cp_offset) {
1650 TAG();
1651
1652 PushArgumentInstr* pos_push = PushLocal(current_position_);
1653 PushArgumentInstr* off_push =
1654 PushArgument(Bind(Int64Constant(cp_offset)));
1655
1656 // Push the negative offset; these are converted to positive string positions
1657 // within the success block.
1658 StoreLocal(position_register(reg), Bind(Add(pos_push, off_push)));
1659 }
1660
1661
1662 void IRRegExpMacroAssembler::ClearRegisters(
1663 intptr_t reg_from, intptr_t reg_to) {
1664 TAG();
1665
1666 ASSERT(reg_from <= reg_to);
1667 ASSERT(reg_to < position_registers_.length());
1668
1669 // In order to clear registers to a final result value of -1, set them to
1670 // (-1 - string length), the offset of -1 from the end of the string.
1671
1672 for (intptr_t reg = reg_from; reg <= reg_to; reg++) {
1673 PushArgumentInstr* minus_one_push =
1674 PushArgument(Bind(Int64Constant(-1)));
1675 PushArgumentInstr* length_push = PushLocal(string_param_length_);
1676
1677 StoreLocal(position_register(reg), Bind(Sub(minus_one_push, length_push)));
1678 }
1679 }
1680
1681
1682 void IRRegExpMacroAssembler::WriteStackPointerToRegister(intptr_t reg) {
1683 TAG();
1684
1685 PushArgumentInstr* stack_push = PushLocal(stack_);
1686 Value* length_value =
1687 Bind(InstanceCall(InstanceCallDescriptor(
1688 String::ZoneHandle(
1689 I, Field::GetterSymbol(Symbols::Length()))),
1690 stack_push));
1691
1692 StoreLocal(position_register(reg), length_value);
1693 }
1694
1695
1696 // Private methods:
1697
1698
1699 void IRRegExpMacroAssembler::CheckPosition(intptr_t cp_offset,
1700 BlockLabel* on_outside_input) {
1701 TAG();
1702 Definition* curpos_def = LoadLocal(current_position_);
1703 Definition* cp_off_def = Int64Constant(-cp_offset);
1704
1705 // If (current_position_ < -cp_offset), we are in bounds.
1706 // Remember, current_position_ is a negative offset from the string end.
1707
1708 BranchOrBacktrack(Comparison(kGTE, curpos_def, cp_off_def),
1709 on_outside_input);
1710 }
1711
1712
1713 void IRRegExpMacroAssembler::BranchOrBacktrack(
1714 ComparisonInstr* comparison,
1715 BlockLabel* true_successor) {
1716 if (comparison == NULL) { // No condition
1717 if (true_successor == NULL) {
1718 Backtrack();
1719 return;
1720 }
1721 GoTo(true_successor);
1722 return;
1723 }
1724
1725 // If no successor block has been passed in, backtrack.
1726 JoinEntryInstr* true_successor_block = backtrack_block_;
1727 if (true_successor != NULL) {
1728 true_successor->SetLinked();
1729 true_successor_block = true_successor->block();
1730 }
1731 ASSERT(true_successor_block != NULL);
1732
1733 // If the condition is not true, fall through to a new block.
1734 BlockLabel fallthrough;
1735
1736 BranchInstr* branch = new(I) BranchInstr(comparison);
1737 *branch->true_successor_address() =
1738 TargetWithJoinGoto(true_successor_block);
1739 *branch->false_successor_address() =
1740 TargetWithJoinGoto(fallthrough.block());
1741
1742 CloseBlockWith(branch);
1743 BindBlock(&fallthrough);
1744 }
1745
1746
1747 TargetEntryInstr* IRRegExpMacroAssembler::TargetWithJoinGoto(
1748 JoinEntryInstr* dst) {
1749 TargetEntryInstr* target = new(I) TargetEntryInstr(
1750 block_id_.Alloc(), kInvalidTryIndex);
1751 blocks_.Add(target);
1752
1753 target->AppendInstruction(new(I) GotoInstr(dst));
1754
1755 return target;
1756 }
1757
1758
1759 IndirectEntryInstr* IRRegExpMacroAssembler::IndirectWithJoinGoto(
1760 JoinEntryInstr* dst) {
1761 IndirectEntryInstr* target = new(I) IndirectEntryInstr(
1762 block_id_.Alloc(), indirect_id_.Alloc(), kInvalidTryIndex);
1763 blocks_.Add(target);
1764
1765 target->AppendInstruction(new(I) GotoInstr(dst));
1766
1767 return target;
1768 }
1769
1770
1771 void IRRegExpMacroAssembler::CheckPreemption() {
1772 TAG();
1773 AppendInstruction(new(I) CheckStackOverflowInstr(kNoTokenPos, 0));
1774 }
1775
1776
1777 Definition* IRRegExpMacroAssembler::Add(
1778 PushArgumentInstr* lhs,
1779 PushArgumentInstr* rhs) {
1780 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD), lhs, rhs);
1781 }
1782
1783
1784 Definition* IRRegExpMacroAssembler::Sub(
1785 PushArgumentInstr* lhs,
1786 PushArgumentInstr* rhs) {
1787 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kSUB), lhs, rhs);
1788 }
1789
1790
1791 void IRRegExpMacroAssembler::LoadCurrentCharacterUnchecked(
1792 intptr_t cp_offset, intptr_t characters) {
1793 TAG();
1794
1795 if (mode_ == ASCII) {
1796 ASSERT(characters == 1 || characters == 2 || characters == 4);
1797 } else {
1798 ASSERT(mode_ == UC16);
1799 ASSERT(characters == 1 || characters == 2);
1800 }
1801
1802 // Bind the pattern as the load receiver.
1803 Value* pattern = BindLoadLocal(*string_param_);
1804
1805 // Calculate the addressed string index as:
1806 // cp_offset + current_position_ + string_param_length_
1807 // TODO(zerny): Avoid generating 'add' instance-calls here.
1808 PushArgumentInstr* off_arg =
1809 PushArgument(Bind(Int64Constant(cp_offset)));
1810 PushArgumentInstr* pos_arg =
1811 PushArgument(BindLoadLocal(*current_position_));
1812 PushArgumentInstr* off_pos_arg =
1813 PushArgument(Bind(Add(off_arg, pos_arg)));
1814 PushArgumentInstr* len_arg =
1815 PushArgument(BindLoadLocal(*string_param_length_));
1816 Value* index = Bind(Add(off_pos_arg, len_arg));
1817
1818 // Load and store the code units.
1819 Value* code_unit_value = LoadCodeUnitsAt(pattern, index, characters);
1820 StoreLocal(current_character_, code_unit_value);
1821 PRINT(PushLocal(current_character_));
1822 }
1823
1824
1825 Value* IRRegExpMacroAssembler::CharacterAt(Definition* index) {
1826 Value* pattern_val = BindLoadLocal(*string_param_);
1827 Value* index_val = Bind(index);
1828 return LoadCodeUnitsAt(pattern_val, index_val, 1);
1829 }
1830
1831
1832 // Note: We can't replace pattern with a load-local of string_param_
1833 // because we need to maintain the stack discipline in unoptimized code.
1834 Value* IRRegExpMacroAssembler::LoadCodeUnitsAt(Value* pattern,
1835 Value* index,
1836 intptr_t characters) {
1837 return Bind(new(I) LoadCodeUnitsInstr(
1838 pattern,
1839 index,
1840 characters,
1841 specialization_cid_,
1842 Scanner::kNoSourcePos));
1843 }
1844
1845
1846 #undef __
1847
1848 } // namespace dart
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698