Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(775)

Side by Side Diff: runtime/vm/regexp_assembler.cc

Issue 683433003: Integrate the Irregexp Regular Expression Engine. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: more comments Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/regexp_assembler.h ('k') | runtime/vm/regexp_ast.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/regexp_assembler.h" 5 #include "vm/regexp_assembler.h"
6 6
7 // SNIP 7 #include "vm/bit_vector.h"
8 #include "vm/compiler.h"
9 #include "vm/dart_entry.h"
10 #include "vm/flow_graph_builder.h"
11 #include "vm/il_printer.h"
12 #include "vm/object_store.h"
13 #include "vm/regexp.h"
14 #include "vm/resolver.h"
15 #include "vm/stack_frame.h"
16 #include "vm/unibrow-inl.h"
17 #include "vm/unicode.h"
18
19 #define I isolate()
20
21 // Debugging output macros. TAG() is called at the head of each interesting
22 // function and prints its name during execution if irregexp tracing is enabled.
23 #define TAG() if (FLAG_trace_irregexp) { TAG_(); }
24 #define TAG_() \
25 Print(PushArgument( \
26 Bind(new(I) ConstantInstr(String::ZoneHandle(I, String::Concat( \
27 String::Handle(String::New("TAG: ")), \
28 String::Handle(String::New(__FUNCTION__)), Heap::kOld))))));
29
30 #define PRINT(arg) if (FLAG_trace_irregexp) { Print(arg); }
8 31
9 namespace dart { 32 namespace dart {
10 33
11 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone) 34 DEFINE_FLAG(bool, trace_irregexp, false, "Trace irregexps");
35
36
37 static const intptr_t kInvalidTryIndex = CatchClauseNode::kInvalidTryIndex;
38 static const intptr_t kNoSourcePos = Scanner::kNoSourcePos;
39
40
41 void PrintUtf16(uint16_t c) {
42 const char* format = (0x20 <= c && c <= 0x7F) ?
43 "%c" : (c <= 0xff) ? "\\x%02x" : "\\u%04x";
44 OS::Print(format, c);
45 }
46
47
48 /*
49 * This assembler uses the following main local variables:
50 * - stack_: A pointer to a growable list which we use as an all-purpose stack
51 * storing backtracking offsets, positions & stored register values.
52 * - current_character_: Stores the currently loaded characters (possibly more
53 * than one).
54 * - current_position_: The current position within the string, stored as a
55 * negative offset from the end of the string (i.e. the
56 * position corresponding to str[0] is -str.length).
57 * Note that current_position_ is *not* byte-based, unlike
58 * original V8 code.
59 *
60 * Results are returned though an array of capture indices, stored at
61 * matches_param_. A null array specifies a failure to match. The match indices
62 * [start_inclusive, end_exclusive] for capture group i are stored at positions
63 * matches_param_[i * 2] and matches_param_[i * 2 + 1], respectively. Match
64 * indices of -1 denote non-matched groups. Note that we store these indices
65 * as a negative offset from the end of the string in position_registers_
66 * during processing, and convert them to standard indexes when copying them
67 * to matches_param_ on successful match.
68 */
69
70 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate)
12 : slow_safe_compiler_(false), 71 : slow_safe_compiler_(false),
13 global_mode_(NOT_GLOBAL), 72 global_mode_(NOT_GLOBAL),
14 zone_(zone) { 73 isolate_(isolate) {
15 } 74 }
16 75
17 76
18 RegExpMacroAssembler::~RegExpMacroAssembler() { 77 RegExpMacroAssembler::~RegExpMacroAssembler() {
19 } 78 }
20 79
21 // SNIP 80
81 IRRegExpMacroAssembler::IRRegExpMacroAssembler(
82 intptr_t specialization_cid,
83 intptr_t capture_count,
84 const ParsedFunction* parsed_function,
85 const ZoneGrowableArray<const ICData*>& ic_data_array,
86 Isolate* isolate)
87 : RegExpMacroAssembler(isolate),
88 specialization_cid_(specialization_cid),
89 parsed_function_(parsed_function),
90 ic_data_array_(ic_data_array),
91 current_instruction_(NULL),
92 stack_(NULL),
93 current_character_(NULL),
94 current_position_(NULL),
95 string_param_(NULL),
96 string_param_length_(NULL),
97 start_index_param_(NULL),
98 position_registers_count_((capture_count + 1) * 2),
99 stack_array_(GrowableObjectArray::ZoneHandle(
100 isolate, GrowableObjectArray::New(16, Heap::kOld))) {
101 switch (specialization_cid) {
102 case kOneByteStringCid:
103 case kExternalOneByteStringCid: mode_ = ASCII; break;
104 case kTwoByteStringCid:
105 case kExternalTwoByteStringCid: mode_ = UC16; break;
106 default: UNREACHABLE();
107 }
108
109 InitializeLocals();
110
111 // Create and generate all preset blocks.
112 entry_block_ =
113 new(isolate) GraphEntryInstr(
114 parsed_function_,
115 new(isolate) TargetEntryInstr(block_id_.Alloc(), kInvalidTryIndex),
116 Isolate::kNoDeoptId);
117 start_block_ =
118 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
119 success_block_ =
120 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
121 backtrack_block_ =
122 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
123 exit_block_ =
124 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
125
126 GenerateEntryBlock();
127 GenerateSuccessBlock();
128 GenerateBacktrackBlock();
129 GenerateExitBlock();
130
131 blocks_.Add(entry_block_);
132 blocks_.Add(entry_block_->normal_entry());
133 blocks_.Add(start_block_);
134 blocks_.Add(success_block_);
135 blocks_.Add(backtrack_block_);
136 blocks_.Add(exit_block_);
137
138 // Begin emission at the start_block_.
139 set_current_instruction(start_block_);
140 }
141
142
143 IRRegExpMacroAssembler::~IRRegExpMacroAssembler() { }
144
145
146 void IRRegExpMacroAssembler::InitializeLocals() {
147 // All generated functions are expected to have a current-context variable.
148 // This variable is unused in irregexp functions.
149 parsed_function_->current_context_var()->set_index(GetNextLocalIndex());
150
151 // Create local variables and parameters.
152 stack_ = Local(Symbols::stack());
153 current_character_ = Local(Symbols::current_character());
154 current_position_ = Local(Symbols::current_position());
155 string_param_length_ = Local(Symbols::string_param_length());
156 capture_length_ = Local(Symbols::capture_length());
157 match_start_index_ = Local(Symbols::match_start_index());
158 capture_start_index_ = Local(Symbols::capture_start_index());
159 match_end_index_ = Local(Symbols::match_end_index());
160 char_in_capture_ = Local(Symbols::char_in_capture());
161 char_in_match_ = Local(Symbols::char_in_match());
162 result_ = Local(Symbols::result());
163
164 string_param_ = Parameter(Symbols::string_param(), 0);
165 start_index_param_ = Parameter(Symbols::start_index_param(), 1);
166
167 // Reserve space for all captured group positions. Note that more might
168 // be created on the fly for internal use.
169 for (intptr_t i = 0; i < position_registers_count_; i++) {
170 position_register(i);
171 }
172 }
173
174
175 void IRRegExpMacroAssembler::GenerateEntryBlock() {
176 set_current_instruction(entry_block_->normal_entry());
177 TAG();
178
179 // Generate a local list variable which we will use as a backtracking stack.
180
181 StoreLocal(stack_, Bind(new(I) ConstantInstr(stack_array_)));
182 Do(InstanceCall(InstanceCallDescriptor(Symbols::clear()), PushLocal(stack_)));
183
184 // Store string.length.
185 PushArgumentInstr* string_push = PushLocal(string_param_);
186
187 StoreLocal(
188 string_param_length_,
189 Bind(InstanceCall(
190 InstanceCallDescriptor(
191 String::ZoneHandle(Field::GetterSymbol(Symbols::Length()))),
192 string_push)));
193
194 // Initialize all capture registers.
195 ClearRegisters(0, position_registers_count_ - 1);
196
197 // Store (start_index - string.length) as the current position (since it's a
198 // negative offset from the end of the string).
199 PushArgumentInstr* start_index_push = PushLocal(start_index_param_);
200 PushArgumentInstr* length_push = PushLocal(string_param_length_);
201
202 StoreLocal(current_position_, Bind(Sub(start_index_push, length_push)));
203
204 // Jump to the start block.
205 current_instruction_->Goto(start_block_);
206 }
207
208
209 void IRRegExpMacroAssembler::GenerateBacktrackBlock() {
210 set_current_instruction(backtrack_block_);
211 TAG();
212 Backtrack();
213 }
214
215
216 void IRRegExpMacroAssembler::GenerateSuccessBlock() {
217 set_current_instruction(success_block_);
218 TAG();
219
220 Definition* type_args_null_def = new(I) ConstantInstr(
221 TypeArguments::ZoneHandle(I, TypeArguments::null()));
222 PushArgumentInstr* type_arg_push = PushArgument(Bind(type_args_null_def));
223 PushArgumentInstr* length_push =
224 PushArgument(Bind(Uint64Constant(position_registers_count_)));
225
226 const Library& lib = Library::Handle(Library::CoreLibrary());
227 const Class& list_class = Class::Handle(
228 lib.LookupCoreClass(Symbols::List()));
229 const Function& list_ctor =
230 Function::ZoneHandle(I, list_class.LookupFactory(Symbols::ListFactory()));
231
232 // TODO(zerny): Use CreateArrayInstr and StoreIndexed instead.
233 StoreLocal(result_, Bind(StaticCall(list_ctor, type_arg_push, length_push)));
234
235 // Store captured offsets in the `matches` parameter.
236 // TODO(zerny): Eliminate position_register locals and access `matches`
237 // directly.
238 for (intptr_t i = 0; i < position_registers_count_; i++) {
239 PushArgumentInstr* matches_push = PushLocal(result_);
240 PushArgumentInstr* index_push = PushArgument(Bind(Uint64Constant(i)));
241
242 // Convert negative offsets from the end of the string to string indices.
243 PushArgumentInstr* offset_push = PushLocal(position_register(i));
244 PushArgumentInstr* len_push = PushLocal(string_param_length_);
245 PushArgumentInstr* value_push =
246 PushArgument(Bind(Add(offset_push, len_push)));
247
248 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
249 matches_push,
250 index_push,
251 value_push));
252 }
253
254 // Print the result if tracing.
255 PRINT(PushLocal(result_));
256
257 // Return true on success.
258 AppendInstruction(new(I) ReturnInstr(kNoSourcePos, Bind(LoadLocal(result_))));
259 }
260
261
262 void IRRegExpMacroAssembler::GenerateExitBlock() {
263 set_current_instruction(exit_block_);
264 TAG();
265
266 // Return false on failure.
267 AppendInstruction(new(I) ReturnInstr(kNoSourcePos, Bind(LoadLocal(result_))));
268 }
269
270
271 #if defined(TARGET_ARCH_ARM64) || \
272 defined(TARGET_ARCH_ARM) || \
273 defined(TARGET_ARCH_MIPS)
274 // Disabling unaligned accesses forces the regexp engine to load characters one
275 // by one instead of up to 4 at once, along with the associated performance hit.
276 // TODO(zerny): Be less conservative about disabling unaligned accesses.
277 // For instance, ARMv6 supports unaligned accesses. Once it is enabled here,
278 // update LoadCodeUnitsInstr methods for the appropriate architectures.
279 static const bool kEnableUnalignedAccesses = false;
280 #else
281 static const bool kEnableUnalignedAccesses = true;
282 #endif
283 bool IRRegExpMacroAssembler::CanReadUnaligned() {
284 return kEnableUnalignedAccesses && !slow_safe();
285 }
286
287
288 RawArray* IRRegExpMacroAssembler::Execute(
289 const Function& function,
290 const String& input,
291 const Smi& start_offset,
292 Isolate* isolate) {
293 // Create the argument list.
294 const Array& args = Array::Handle(Array::New(2));
295 args.SetAt(0, input);
296 args.SetAt(1, start_offset);
297
298 // And finally call the generated code.
299
300 const Object& retval =
301 Object::Handle(isolate, DartEntry::InvokeFunction(function, args));
302 if (retval.IsError()) {
303 const Error& error = Error::Cast(retval);
304 OS::Print("%s\n", error.ToErrorCString());
305 // Should never happen.
306 UNREACHABLE();
307 }
308
309 if (retval.IsNull()) {
310 return Array::null();
311 }
312
313 ASSERT(retval.IsArray());
314 return Array::Cast(retval).raw();
315 }
316
317
318 RawBool* IRRegExpMacroAssembler::CaseInsensitiveCompareUC16(
319 RawString* str_raw,
320 RawSmi* lhs_index_raw,
321 RawSmi* rhs_index_raw,
322 RawSmi* length_raw) {
323 const String& str = String::Handle(str_raw);
324 const Smi& lhs_index = Smi::Handle(lhs_index_raw);
325 const Smi& rhs_index = Smi::Handle(rhs_index_raw);
326 const Smi& length = Smi::Handle(length_raw);
327
328 // TODO(zerny): Optimize as single instance. V8 has this as an
329 // isolate member.
330 unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
331
332 for (intptr_t i = 0; i < length.Value(); i++) {
333 int32_t c1 = str.CharAt(lhs_index.Value() + i);
334 int32_t c2 = str.CharAt(rhs_index.Value() + i);
335 if (c1 != c2) {
336 int32_t s1[1] = { c1 };
337 canonicalize.get(c1, '\0', s1);
338 if (s1[0] != c2) {
339 int32_t s2[1] = { c2 };
340 canonicalize.get(c2, '\0', s2);
341 if (s1[0] != s2[0]) {
342 return Bool::False().raw();
343 }
344 }
345 }
346 }
347 return Bool::True().raw();
348 }
349
350
351 LocalVariable* IRRegExpMacroAssembler::Parameter(const String& name,
352 intptr_t index) const {
353 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
354 LocalVariable* local =
355 new(I) LocalVariable(kNoSourcePos, name, local_type);
356
357 intptr_t param_frame_index = kParamEndSlotFromFp + kParamCount - index;
358 local->set_index(param_frame_index);
359
360 return local;
361 }
362
363
364 LocalVariable* IRRegExpMacroAssembler::Local(const String& name) {
365 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
366 LocalVariable* local =
367 new(I) LocalVariable(kNoSourcePos, name, local_type);
368 local->set_index(GetNextLocalIndex());
369
370 return local;
371 }
372
373
374 ConstantInstr* IRRegExpMacroAssembler::Int64Constant(int64_t value) const {
375 return new(I) ConstantInstr(
376 Integer::ZoneHandle(I, Integer::New(value, Heap::kOld)));
377 }
378
379
380 ConstantInstr* IRRegExpMacroAssembler::Uint64Constant(uint64_t value) const {
381 return new(I) ConstantInstr(
382 Integer::ZoneHandle(I, Integer::NewFromUint64(value, Heap::kOld)));
383 }
384
385
386 ConstantInstr* IRRegExpMacroAssembler::BoolConstant(bool value) const {
387 return new(I) ConstantInstr(value ? Bool::True() : Bool::False());
388 }
389
390
391 ConstantInstr* IRRegExpMacroAssembler::StringConstant(const char* value) const {
392 return new(I) ConstantInstr(
393 String::ZoneHandle(I, String::New(value, Heap::kOld)));
394 }
395
396
397 ConstantInstr* IRRegExpMacroAssembler::WordCharacterMapConstant() const {
398 const Library& lib = Library::Handle(I, Library::CoreLibrary());
399 const Class& regexp_class = Class::Handle(I,
400 lib.LookupClassAllowPrivate(Symbols::JSSyntaxRegExp()));
401 const Field& word_character_field = Field::ZoneHandle(I,
402 regexp_class.LookupStaticField(Symbols::_wordCharacterMap()));
403 ASSERT(!word_character_field.IsNull());
404
405 if (word_character_field.IsUninitialized()) {
406 word_character_field.EvaluateInitializer();
407 }
408 ASSERT(!word_character_field.IsUninitialized());
409
410 return new(I) ConstantInstr(
411 Instance::ZoneHandle(I, word_character_field.value()));
412 }
413
414
415 ComparisonInstr* IRRegExpMacroAssembler::Comparison(
416 ComparisonKind kind, Definition* lhs, Definition* rhs) {
417 Token::Kind strict_comparison = Token::kEQ_STRICT;
418 Token::Kind intermediate_operator = Token::kILLEGAL;
419 switch (kind) {
420 case kEQ:
421 intermediate_operator = Token::kEQ;
422 break;
423 case kNE:
424 intermediate_operator = Token::kEQ;
425 strict_comparison = Token::kNE_STRICT;
426 break;
427 case kLT:
428 intermediate_operator = Token::kLT;
429 break;
430 case kGT:
431 intermediate_operator = Token::kGT;
432 break;
433 case kLTE:
434 intermediate_operator = Token::kLTE;
435 break;
436 case kGTE:
437 intermediate_operator = Token::kGTE;
438 break;
439 default:
440 UNREACHABLE();
441 }
442
443 ASSERT(intermediate_operator != Token::kILLEGAL);
444
445 PushArgumentInstr* lhs_push = PushArgument(Bind(lhs));
446 PushArgumentInstr* rhs_push = PushArgument(Bind(rhs));
447
448 Value* lhs_value =
449 Bind(InstanceCall(
450 InstanceCallDescriptor::FromToken(intermediate_operator),
451 lhs_push,
452 rhs_push));
453 Value* rhs_value = Bind(BoolConstant(true));
454
455 return new(I) StrictCompareInstr(
456 kNoSourcePos, strict_comparison, lhs_value, rhs_value, true);
457 }
458
459
460 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
461 const Function& function) const {
462 ZoneGrowableArray<PushArgumentInstr*>* arguments =
463 new(I) ZoneGrowableArray<PushArgumentInstr*>(0);
464 return StaticCall(function, arguments);
465 }
466
467
468 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
469 const Function& function,
470 PushArgumentInstr* arg1) const {
471 ZoneGrowableArray<PushArgumentInstr*>* arguments =
472 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
473 arguments->Add(arg1);
474
475 return StaticCall(function, arguments);
476 }
477
478
479 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
480 const Function& function,
481 PushArgumentInstr* arg1,
482 PushArgumentInstr* arg2) const {
483 ZoneGrowableArray<PushArgumentInstr*>* arguments =
484 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
485 arguments->Add(arg1);
486 arguments->Add(arg2);
487
488 return StaticCall(function, arguments);
489 }
490
491
492 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
493 const Function& function,
494 ZoneGrowableArray<PushArgumentInstr*>* arguments) const {
495 return new(I) StaticCallInstr(kNoSourcePos,
496 function,
497 Object::null_array(),
498 arguments,
499 ic_data_array_);
500 }
501
502
503 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
504 const InstanceCallDescriptor& desc,
505 PushArgumentInstr* arg1) const {
506 ZoneGrowableArray<PushArgumentInstr*>* arguments =
507 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
508 arguments->Add(arg1);
509
510 return InstanceCall(desc, arguments);
511 }
512
513
514 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
515 const InstanceCallDescriptor& desc,
516 PushArgumentInstr* arg1,
517 PushArgumentInstr* arg2) const {
518 ZoneGrowableArray<PushArgumentInstr*>* arguments =
519 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
520 arguments->Add(arg1);
521 arguments->Add(arg2);
522
523 return InstanceCall(desc, arguments);
524 }
525
526
527 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
528 const InstanceCallDescriptor& desc,
529 PushArgumentInstr* arg1,
530 PushArgumentInstr* arg2,
531 PushArgumentInstr* arg3) const {
532 ZoneGrowableArray<PushArgumentInstr*>* arguments =
533 new(I) ZoneGrowableArray<PushArgumentInstr*>(3);
534 arguments->Add(arg1);
535 arguments->Add(arg2);
536 arguments->Add(arg3);
537
538 return InstanceCall(desc, arguments);
539 }
540
541
542 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
543 const InstanceCallDescriptor& desc,
544 ZoneGrowableArray<PushArgumentInstr*> *arguments) const {
545 return
546 new(I) InstanceCallInstr(kNoSourcePos,
547 desc.name,
548 desc.token_kind,
549 arguments,
550 Object::null_array(),
551 desc.checked_argument_count,
552 ic_data_array_);
553 }
554
555
556 LoadLocalInstr* IRRegExpMacroAssembler::LoadLocal(LocalVariable* local) const {
557 return new(I) LoadLocalInstr(*local);
558 }
559
560
561 void IRRegExpMacroAssembler::StoreLocal(LocalVariable* local,
562 Value* value) {
563 Do(new(I) StoreLocalInstr(*local, value));
564 }
565
566
567 void IRRegExpMacroAssembler::set_current_instruction(Instruction* instruction) {
568 current_instruction_ = instruction;
569 }
570
571
572 Value* IRRegExpMacroAssembler::Bind(Definition* definition) {
573 AppendInstruction(definition);
574 definition->set_temp_index(temp_id_.Alloc());
575
576 return new(I) Value(definition);
577 }
578
579
580 void IRRegExpMacroAssembler::Do(Definition* definition) {
581 AppendInstruction(definition);
582 }
583
584
585 Value* IRRegExpMacroAssembler::BindLoadLocal(const LocalVariable& local) {
586 if (local.IsConst()) {
587 return Bind(new(I) ConstantInstr(*local.ConstValue()));
588 }
589 ASSERT(!local.is_captured());
590 return Bind(new(I) LoadLocalInstr(local));
591 }
592
593
594 // In some cases, the V8 irregexp engine generates unreachable code by emitting
595 // a jmp not followed by a bind. We cannot do the same, since it is impossible
596 // to append to a block following a jmp. In such cases, assume that we are doing
597 // the correct thing, but output a warning when tracing.
598 #define HANDLE_DEAD_CODE_EMISSION() \
599 if (current_instruction_ == NULL) { \
600 if (FLAG_trace_irregexp) { \
601 OS::Print("WARNING: Attempting to append to a closed assembler. " \
602 "This could be either a bug or generation of dead code " \
603 "inherited from V8.\n"); \
604 } \
605 BlockLabel dummy; \
606 BindBlock(&dummy); \
607 }
608
609 void IRRegExpMacroAssembler::AppendInstruction(Instruction* instruction) {
610 HANDLE_DEAD_CODE_EMISSION();
611
612 ASSERT(current_instruction_ != NULL);
613 ASSERT(current_instruction_->next() == NULL);
614
615 temp_id_.Dealloc(instruction->InputCount());
616 arg_id_.Dealloc(instruction->ArgumentCount());
617
618 current_instruction_->LinkTo(instruction);
619 set_current_instruction(instruction);
620 }
621
622
623 void IRRegExpMacroAssembler::CloseBlockWith(Instruction* instruction) {
624 HANDLE_DEAD_CODE_EMISSION();
625
626 ASSERT(current_instruction_ != NULL);
627 ASSERT(current_instruction_->next() == NULL);
628
629 temp_id_.Dealloc(instruction->InputCount());
630 arg_id_.Dealloc(instruction->ArgumentCount());
631
632 current_instruction_->LinkTo(instruction);
633 set_current_instruction(NULL);
634 }
635
636
637 void IRRegExpMacroAssembler::GoTo(BlockLabel* to) {
638 if (to == NULL) {
639 Backtrack();
640 } else {
641 to->SetLinked();
642 GoTo(to->block());
643 }
644 }
645
646
647 // Closes the current block with a goto, and unsets current_instruction_.
648 // BindBlock() must be called before emission can continue.
649 void IRRegExpMacroAssembler::GoTo(JoinEntryInstr* to) {
650 HANDLE_DEAD_CODE_EMISSION();
651
652 ASSERT(current_instruction_ != NULL);
653 ASSERT(current_instruction_->next() == NULL);
654 current_instruction_->Goto(to);
655 set_current_instruction(NULL);
656 }
657
658
659 PushArgumentInstr* IRRegExpMacroAssembler::PushArgument(Value* value) {
660 arg_id_.Alloc();
661 PushArgumentInstr* push = new(I) PushArgumentInstr(value);
662 // Do *not* use Do() for push argument instructions.
663 AppendInstruction(push);
664 return push;
665 }
666
667
668 PushArgumentInstr* IRRegExpMacroAssembler::PushLocal(LocalVariable* local) {
669 return PushArgument(Bind(LoadLocal(local)));
670 }
671
672
673 void IRRegExpMacroAssembler::Print(const char* str) {
674 Print(PushArgument(
675 Bind(new(I) ConstantInstr(
676 String::ZoneHandle(I, String::New(str, Heap::kOld))))));
677 }
678
679
680 void IRRegExpMacroAssembler::Print(PushArgumentInstr* argument) {
681 const Library& lib = Library::Handle(Library::CoreLibrary());
682 const Function& print_fn = Function::ZoneHandle(
683 I, lib.LookupFunctionAllowPrivate(Symbols::print()));
684 Do(StaticCall(print_fn, argument));
685 }
686
687
688 void IRRegExpMacroAssembler::PrintBlocks() {
689 for (intptr_t i = 0; i < blocks_.length(); i++) {
690 FlowGraphPrinter::PrintBlock(blocks_[i], false);
691 }
692 }
693
694
695 intptr_t IRRegExpMacroAssembler::stack_limit_slack() {
696 return 32;
697 }
698
699
700 void IRRegExpMacroAssembler::AdvanceCurrentPosition(intptr_t by) {
701 TAG();
702 if (by != 0) {
703 PushArgumentInstr* cur_pos_push = PushLocal(current_position_);
704 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
705
706 Value* new_pos_value = Bind(Add(cur_pos_push, by_push));
707 StoreLocal(current_position_, new_pos_value);
708 }
709 }
710
711
712 void IRRegExpMacroAssembler::AdvanceRegister(intptr_t reg, intptr_t by) {
713 TAG();
714 ASSERT(reg >= 0);
715 ASSERT(reg < position_registers_.length());
716
717 if (by != 0) {
718 PushArgumentInstr* reg_push = PushLocal(position_register(reg));
719 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
720 StoreLocal(position_register(reg), Bind(Add(reg_push, by_push)));
721 }
722 }
723
724
725 void IRRegExpMacroAssembler::Backtrack() {
726 TAG();
727 CheckPreemption();
728
729 GrowableObjectArray& offsets = GrowableObjectArray::ZoneHandle(
730 I, GrowableObjectArray::New(Heap::kOld));
731
732 PushArgumentInstr* block_offsets_push =
733 PushArgument(Bind(new(I) ConstantInstr(offsets)));
734 PushArgumentInstr* block_id_push = PushArgument(PopStack());
735
736 Value* offset_value =
737 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
738 block_offsets_push,
739 block_id_push));
740
741 IndirectGotoInstr* igoto = new(I) IndirectGotoInstr(&offsets, offset_value);
742 CloseBlockWith(igoto);
743 igotos_.Add(igoto);
744 }
745
746
747 // A BindBlock is analogous to assigning a label to a basic block.
748 // If the BlockLabel does not yet contain a block, it is created.
749 // If there is a current instruction, append a goto to the bound block.
750 void IRRegExpMacroAssembler::BindBlock(BlockLabel* label) {
751 ASSERT(!label->IsBound());
752 ASSERT(label->block()->next() == NULL);
753
754 label->SetBound(block_id_.Alloc());
755 blocks_.Add(label->block());
756
757 if (current_instruction_ != NULL) {
758 GoTo(label);
759 }
760 set_current_instruction(label->block());
761
762 // Print the id of the current block if tracing.
763 PRINT(PushArgument(Bind(Uint64Constant(label->block()->block_id()))));
764 }
765
766
767 intptr_t IRRegExpMacroAssembler::GetNextLocalIndex() {
768 intptr_t id = local_id_.Alloc();
769 return kFirstLocalSlotFromFp - id;
770 }
771
772
773 LocalVariable* IRRegExpMacroAssembler::position_register(intptr_t index) {
774 // Create position registers as needed.
775 for (intptr_t i = position_registers_.length(); i < index + 1; i++) {
776 position_registers_.Add(Local(Symbols::position_registers()));
777 }
778
779 return position_registers_[index];
780 }
781
782
783 // TODO(zerny): Move the offset table outside to avoid having to keep
784 // the assembler around until after code generation; both function or regexp
785 // would work.
786 void IRRegExpMacroAssembler::FinalizeBlockOffsetTable() {
787 for (intptr_t i = 0; i < igotos_.length(); i++) {
788 IndirectGotoInstr* igoto = igotos_[i];
789 igoto->SetOffsetCount(I, indirect_id_.Count());
790
791 for (intptr_t j = 0; j < igoto->SuccessorCount(); j++) {
792 TargetEntryInstr* target = igoto->SuccessorAt(j);
793
794 // Optimizations might have modified the immediate target block, but
795 // it must end with a goto to the indirect entry.
796 Instruction* instr = target;
797 while (instr != NULL && !instr->IsGoto()) {
798 instr = instr->next();
799 }
800 ASSERT(instr->IsGoto());
801
802 IndirectEntryInstr* ientry =
803 instr->AsGoto()->successor()->AsIndirectEntry();
804 ASSERT(ientry != NULL);
805
806 // The intermediate block was possibly compacted, check both it and the
807 // final indirect entry for a valid offset. If neither are valid, then
808 // the indirect entry is unreachable.
809 intptr_t offset =
810 (target->offset() > 0) ? target->offset() : ientry->offset();
811 if (offset > 0) {
812 intptr_t adjusted_offset =
813 offset - Assembler::EntryPointToPcMarkerOffset();
814 igoto->SetOffsetAt(I, ientry->indirect_id(), adjusted_offset);
815 }
816 }
817 }
818 }
819
820 void IRRegExpMacroAssembler::FinalizeIndirectGotos() {
821 for (intptr_t i = 0; i < igotos_.length(); i++) {
822 for (intptr_t j = 0; j < entry_block_->indirect_entries().length(); j++) {
823 igotos_.At(i)->AddSuccessor(
824 TargetWithJoinGoto(entry_block_->indirect_entries().At(j)));
825 }
826 }
827 }
828
829
830 void IRRegExpMacroAssembler::CheckCharacter(uint32_t c, BlockLabel* on_equal) {
831 TAG();
832 Definition* cur_char_def = LoadLocal(current_character_);
833 Definition* char_def = Uint64Constant(c);
834
835 BranchOrBacktrack(Comparison(kEQ, cur_char_def, char_def), on_equal);
836 }
837
838
839 void IRRegExpMacroAssembler::CheckCharacterGT(uint16_t limit,
840 BlockLabel* on_greater) {
841 TAG();
842 BranchOrBacktrack(Comparison(kGT,
843 LoadLocal(current_character_),
844 Uint64Constant(limit)),
845 on_greater);
846 }
847
848
849 void IRRegExpMacroAssembler::CheckAtStart(BlockLabel* on_at_start) {
850 TAG();
851
852 BlockLabel not_at_start;
853
854 // Did we start the match at the start of the string at all?
855 BranchOrBacktrack(Comparison(kNE,
856 LoadLocal(start_index_param_),
857 Uint64Constant(0)),
858 &not_at_start);
859
860 // If we did, are we still at the start of the input, i.e. is
861 // (offset == string_length * -1)?
862 Definition* neg_len_def =
863 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
864 PushLocal(string_param_length_));
865 Definition* offset_def = LoadLocal(current_position_);
866 BranchOrBacktrack(Comparison(kEQ, neg_len_def, offset_def),
867 on_at_start);
868
869 BindBlock(&not_at_start);
870 }
871
872
873 void IRRegExpMacroAssembler::CheckNotAtStart(BlockLabel* on_not_at_start) {
874 TAG();
875
876 // Did we start the match at the start of the string at all?
877 BranchOrBacktrack(Comparison(kNE,
878 LoadLocal(start_index_param_),
879 Uint64Constant(0)),
880 on_not_at_start);
881
882 // If we did, are we still at the start of the input, i.e. is
883 // (offset == string_length * -1)?
884 Definition* neg_len_def =
885 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
886 PushLocal(string_param_length_));
887 Definition* offset_def = LoadLocal(current_position_);
888 BranchOrBacktrack(Comparison(kNE, neg_len_def, offset_def),
889 on_not_at_start);
890 }
891
892
893 void IRRegExpMacroAssembler::CheckCharacterLT(uint16_t limit,
894 BlockLabel* on_less) {
895 TAG();
896 BranchOrBacktrack(Comparison(kLT,
897 LoadLocal(current_character_),
898 Uint64Constant(limit)),
899 on_less);
900 }
901
902
903 void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) {
904 TAG();
905
906 BlockLabel fallthrough;
907
908 PushArgumentInstr* stack_push = PushLocal(stack_);
909 Definition* stack_tip_def = InstanceCall(
910 InstanceCallDescriptor(String::ZoneHandle(
911 I, Field::GetterSymbol(Symbols::last()))),
912 stack_push);
913 Definition* cur_pos_def = LoadLocal(current_position_);
914
915 BranchOrBacktrack(Comparison(kNE, stack_tip_def, cur_pos_def),
916 &fallthrough);
917
918 // Pop, throwing away the value.
919 stack_push = PushLocal(stack_);
920 Do(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
921 stack_push));
922
923 BranchOrBacktrack(NULL, on_equal);
924
925 BindBlock(&fallthrough);
926 }
927
928
929 void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
930 intptr_t start_reg,
931 BlockLabel* on_no_match) {
932 TAG();
933 ASSERT(start_reg + 1 <= position_registers_.length());
934
935 BlockLabel fallthrough;
936
937 PushArgumentInstr* end_push = PushLocal(position_register(start_reg + 1));
938 PushArgumentInstr* start_push = PushLocal(position_register(start_reg));
939 StoreLocal(capture_length_, Bind(Sub(end_push, start_push)));
940
941 // The length of a capture should not be negative. This can only happen
942 // if the end of the capture is unrecorded, or at a point earlier than
943 // the start of the capture.
944 // BranchOrBacktrack(less, on_no_match);
945
946 BranchOrBacktrack(Comparison(kLT,
947 LoadLocal(capture_length_),
948 Uint64Constant(0)),
949 on_no_match);
950
951 // If length is zero, either the capture is empty or it is completely
952 // uncaptured. In either case succeed immediately.
953 BranchOrBacktrack(Comparison(kEQ,
954 LoadLocal(capture_length_),
955 Uint64Constant(0)),
956 &fallthrough);
957
958
959 // Check that there are sufficient characters left in the input.
960 PushArgumentInstr* pos_push = PushLocal(current_position_);
961 PushArgumentInstr* len_push = PushLocal(capture_length_);
962 BranchOrBacktrack(
963 Comparison(kGT,
964 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
965 pos_push,
966 len_push),
967 Uint64Constant(0)),
968 on_no_match);
969
970 pos_push = PushLocal(current_position_);
971 len_push = PushLocal(string_param_length_);
972 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push)));
973
974 pos_push = PushLocal(position_register(start_reg));
975 len_push = PushLocal(string_param_length_);
976 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push)));
977
978 pos_push = PushLocal(match_start_index_);
979 len_push = PushLocal(capture_length_);
980 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push)));
981
982 BlockLabel success;
983 if (mode_ == ASCII) {
984 BlockLabel loop_increment;
985 BlockLabel loop;
986 BindBlock(&loop);
987
988 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
989 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
990
991 BranchOrBacktrack(Comparison(kEQ,
992 LoadLocal(char_in_capture_),
993 LoadLocal(char_in_match_)),
994 &loop_increment);
995
996 // Mismatch, try case-insensitive match (converting letters to lower-case).
997 PushArgumentInstr* match_char_push = PushLocal(char_in_match_);
998 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(0x20)));
999 StoreLocal(char_in_match_,
1000 Bind(InstanceCall(
1001 InstanceCallDescriptor::FromToken(Token::kBIT_OR),
1002 match_char_push,
1003 mask_push)));
1004
1005 BlockLabel convert_capture;
1006 BlockLabel on_not_in_range;
1007 BranchOrBacktrack(Comparison(kLT,
1008 LoadLocal(char_in_match_),
1009 Uint64Constant('a')),
1010 &on_not_in_range);
1011 BranchOrBacktrack(Comparison(kGT,
1012 LoadLocal(char_in_match_),
1013 Uint64Constant('z')),
1014 &on_not_in_range);
1015 GoTo(&convert_capture);
1016 BindBlock(&on_not_in_range);
1017
1018 // Latin-1: Check for values in range [224,254] but not 247.
1019 BranchOrBacktrack(Comparison(kLT,
1020 LoadLocal(char_in_match_),
1021 Uint64Constant(224)),
1022 on_no_match);
1023 BranchOrBacktrack(Comparison(kGT,
1024 LoadLocal(char_in_match_),
1025 Uint64Constant(254)),
1026 on_no_match);
1027
1028 BranchOrBacktrack(Comparison(kEQ,
1029 LoadLocal(char_in_match_),
1030 Uint64Constant(247)),
1031 on_no_match);
1032
1033 // Also convert capture character.
1034 BindBlock(&convert_capture);
1035
1036 PushArgumentInstr* capture_char_push = PushLocal(char_in_capture_);
1037 mask_push = PushArgument(Bind(Uint64Constant(0x20)));
1038 StoreLocal(char_in_capture_,
1039 Bind(InstanceCall(
1040 InstanceCallDescriptor::FromToken(Token::kBIT_OR),
1041 capture_char_push,
1042 mask_push)));
1043
1044 BranchOrBacktrack(Comparison(kNE,
1045 LoadLocal(char_in_match_),
1046 LoadLocal(char_in_capture_)),
1047 on_no_match);
1048
1049 BindBlock(&loop_increment);
1050
1051 // Increment indexes into capture and match strings.
1052 PushArgumentInstr* index_push = PushLocal(capture_start_index_);
1053 PushArgumentInstr* inc_push = PushArgument(Bind(Uint64Constant(1)));
1054 StoreLocal(capture_start_index_, Bind(Add(index_push, inc_push)));
1055
1056 index_push = PushLocal(match_start_index_);
1057 inc_push = PushArgument(Bind(Uint64Constant(1)));
1058 StoreLocal(match_start_index_, Bind(Add(index_push, inc_push)));
1059
1060 // Compare to end of match, and loop if not done.
1061 BranchOrBacktrack(Comparison(kLT,
1062 LoadLocal(match_start_index_),
1063 LoadLocal(match_end_index_)),
1064 &loop);
1065 } else {
1066 ASSERT(mode_ == UC16);
1067
1068 Value* string_value = Bind(LoadLocal(string_param_));
1069 Value* lhs_index_value = Bind(LoadLocal(match_start_index_));
1070 Value* rhs_index_value = Bind(LoadLocal(capture_start_index_));
1071 Value* length_value = Bind(LoadLocal(capture_length_));
1072
1073 Definition* is_match_def =
1074 new(I) CaseInsensitiveCompareUC16Instr(
1075 string_value,
1076 lhs_index_value,
1077 rhs_index_value,
1078 length_value,
1079 specialization_cid_);
1080
1081 BranchOrBacktrack(Comparison(kNE, is_match_def, BoolConstant(true)),
1082 on_no_match);
1083 }
1084
1085 BindBlock(&success);
1086
1087 // Move current character position to position after match.
1088 PushArgumentInstr* match_end_push = PushLocal(match_end_index_);
1089 len_push = PushLocal(string_param_length_);
1090 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push)));
1091
1092 BindBlock(&fallthrough);
1093 }
1094
1095
1096 void IRRegExpMacroAssembler::CheckNotBackReference(
1097 intptr_t start_reg,
1098 BlockLabel* on_no_match) {
1099 TAG();
1100 ASSERT(start_reg + 1 <= position_registers_.length());
1101
1102 BlockLabel fallthrough;
1103 BlockLabel success;
1104
1105 // Find length of back-referenced capture.
1106 PushArgumentInstr* end_push = PushLocal(position_register(start_reg + 1));
1107 PushArgumentInstr* start_push = PushLocal(position_register(start_reg));
1108 StoreLocal(capture_length_, Bind(Sub(end_push, start_push)));
1109
1110 // Fail on partial or illegal capture (start of capture after end of capture).
1111 BranchOrBacktrack(Comparison(kLT,
1112 LoadLocal(capture_length_),
1113 Uint64Constant(0)),
1114 on_no_match);
1115
1116 // Succeed on empty capture (including no capture)
1117 BranchOrBacktrack(Comparison(kEQ,
1118 LoadLocal(capture_length_),
1119 Uint64Constant(0)),
1120 &fallthrough);
1121
1122 // Check that there are sufficient characters left in the input.
1123 PushArgumentInstr* pos_push = PushLocal(current_position_);
1124 PushArgumentInstr* len_push = PushLocal(capture_length_);
1125 BranchOrBacktrack(
1126 Comparison(kGT,
1127 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
1128 pos_push,
1129 len_push),
1130 Uint64Constant(0)),
1131 on_no_match);
1132
1133 // Compute pointers to match string and capture string.
1134 pos_push = PushLocal(current_position_);
1135 len_push = PushLocal(string_param_length_);
1136 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push)));
1137
1138 pos_push = PushLocal(position_register(start_reg));
1139 len_push = PushLocal(string_param_length_);
1140 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push)));
1141
1142 pos_push = PushLocal(match_start_index_);
1143 len_push = PushLocal(capture_length_);
1144 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push)));
1145
1146 BlockLabel loop;
1147 BindBlock(&loop);
1148
1149 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
1150 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
1151
1152 BranchOrBacktrack(Comparison(kNE,
1153 LoadLocal(char_in_capture_),
1154 LoadLocal(char_in_match_)),
1155 on_no_match);
1156
1157 // Increment indexes into capture and match strings.
1158 PushArgumentInstr* index_push = PushLocal(capture_start_index_);
1159 PushArgumentInstr* inc_push = PushArgument(Bind(Uint64Constant(1)));
1160 StoreLocal(capture_start_index_, Bind(Add(index_push, inc_push)));
1161
1162 index_push = PushLocal(match_start_index_);
1163 inc_push = PushArgument(Bind(Uint64Constant(1)));
1164 StoreLocal(match_start_index_, Bind(Add(index_push, inc_push)));
1165
1166 // Check if we have reached end of match area.
1167 BranchOrBacktrack(Comparison(kLT,
1168 LoadLocal(match_start_index_),
1169 LoadLocal(match_end_index_)),
1170 &loop);
1171
1172 BindBlock(&success);
1173
1174 // Move current character position to position after match.
1175 PushArgumentInstr* match_end_push = PushLocal(match_end_index_);
1176 len_push = PushLocal(string_param_length_);
1177 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push)));
1178
1179 BindBlock(&fallthrough);
1180 }
1181
1182
1183 void IRRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
1184 BlockLabel* on_not_equal) {
1185 TAG();
1186 BranchOrBacktrack(Comparison(kNE,
1187 LoadLocal(current_character_),
1188 Uint64Constant(c)),
1189 on_not_equal);
1190 }
1191
1192
1193 void IRRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
1194 uint32_t mask,
1195 BlockLabel* on_equal) {
1196 TAG();
1197
1198 Definition* actual_def = LoadLocal(current_character_);
1199 Definition* expected_def = Uint64Constant(c);
1200
1201 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1202 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1203 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1204 actual_push,
1205 mask_push);
1206
1207 BranchOrBacktrack(Comparison(kEQ, actual_def, expected_def), on_equal);
1208 }
1209
1210
1211 void IRRegExpMacroAssembler::CheckNotCharacterAfterAnd(
1212 uint32_t c,
1213 uint32_t mask,
1214 BlockLabel* on_not_equal) {
1215 TAG();
1216
1217 Definition* actual_def = LoadLocal(current_character_);
1218 Definition* expected_def = Uint64Constant(c);
1219
1220 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1221 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1222 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1223 actual_push,
1224 mask_push);
1225
1226 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1227 }
1228
1229
1230 void IRRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
1231 uint16_t c,
1232 uint16_t minus,
1233 uint16_t mask,
1234 BlockLabel* on_not_equal) {
1235 TAG();
1236 ASSERT(minus < Utf16::kMaxCodeUnit); // NOLINT
1237
1238 Definition* actual_def = LoadLocal(current_character_);
1239 Definition* expected_def = Uint64Constant(c);
1240
1241 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1242 PushArgumentInstr* minus_push = PushArgument(Bind(Uint64Constant(minus)));
1243
1244 actual_push = PushArgument(Bind(Sub(actual_push, minus_push)));
1245 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1246 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1247 actual_push,
1248 mask_push);
1249
1250 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1251 }
1252
1253
1254 void IRRegExpMacroAssembler::CheckCharacterInRange(
1255 uint16_t from,
1256 uint16_t to,
1257 BlockLabel* on_in_range) {
1258 TAG();
1259 ASSERT(from <= to);
1260
1261 // TODO(zerny): All range comparisons could be done cheaper with unsigned
1262 // compares. This pattern repeats in various places.
1263
1264 BlockLabel on_not_in_range;
1265 BranchOrBacktrack(Comparison(kLT,
1266 LoadLocal(current_character_),
1267 Uint64Constant(from)),
1268 &on_not_in_range);
1269 BranchOrBacktrack(Comparison(kGT,
1270 LoadLocal(current_character_),
1271 Uint64Constant(to)),
1272 &on_not_in_range);
1273 BranchOrBacktrack(NULL, on_in_range);
1274
1275 BindBlock(&on_not_in_range);
1276 }
1277
1278
1279 void IRRegExpMacroAssembler::CheckCharacterNotInRange(
1280 uint16_t from,
1281 uint16_t to,
1282 BlockLabel* on_not_in_range) {
1283 TAG();
1284 ASSERT(from <= to);
1285
1286 BranchOrBacktrack(Comparison(kLT,
1287 LoadLocal(current_character_),
1288 Uint64Constant(from)),
1289 on_not_in_range);
1290
1291 BranchOrBacktrack(Comparison(kGT,
1292 LoadLocal(current_character_),
1293 Uint64Constant(to)),
1294 on_not_in_range);
1295 }
1296
1297
1298 void IRRegExpMacroAssembler::CheckBitInTable(
1299 const TypedData& table,
1300 BlockLabel* on_bit_set) {
1301 TAG();
1302
1303 PushArgumentInstr* table_push =
1304 PushArgument(Bind(new(I) ConstantInstr(table)));
1305 PushArgumentInstr* index_push = PushLocal(current_character_);
1306
1307 if (mode_ != ASCII || kTableMask != Symbols::kMaxOneCharCodeSymbol) {
1308 PushArgumentInstr* mask_push =
1309 PushArgument(Bind(Uint64Constant(kTableSize - 1)));
1310 index_push = PushArgument(
1311 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1312 index_push,
1313 mask_push)));
1314 }
1315
1316 Definition* byte_def =
1317 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1318 table_push,
1319 index_push);
1320 Definition* zero_def = Int64Constant(0);
1321
1322 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_bit_set);
1323 }
1324
1325
1326 bool IRRegExpMacroAssembler::CheckSpecialCharacterClass(
1327 uint16_t type,
1328 BlockLabel* on_no_match) {
1329 TAG();
1330
1331 // Range checks (c in min..max) are generally implemented by an unsigned
1332 // (c - min) <= (max - min) check
1333 switch (type) {
1334 case 's':
1335 // Match space-characters
1336 if (mode_ == ASCII) {
1337 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
1338 BlockLabel success;
1339 // Space (' ').
1340 BranchOrBacktrack(Comparison(kEQ,
1341 LoadLocal(current_character_),
1342 Uint64Constant(' ')),
1343 &success);
1344 // Check range 0x09..0x0d.
1345 CheckCharacterInRange('\t', '\r', &success);
1346 // \u00a0 (NBSP).
1347 BranchOrBacktrack(Comparison(kNE,
1348 LoadLocal(current_character_),
1349 Uint64Constant(0x00a0)),
1350 on_no_match);
1351 BindBlock(&success);
1352 return true;
1353 }
1354 return false;
1355 case 'S':
1356 // The emitted code for generic character classes is good enough.
1357 return false;
1358 case 'd':
1359 // Match ASCII digits ('0'..'9')
1360 CheckCharacterNotInRange('0', '9', on_no_match);
1361 return true;
1362 case 'D':
1363 // Match non ASCII-digits
1364 CheckCharacterInRange('0', '9', on_no_match);
1365 return true;
1366 case '.': {
1367 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
1368 BranchOrBacktrack(Comparison(kEQ,
1369 LoadLocal(current_character_),
1370 Uint64Constant('\n')),
1371 on_no_match);
1372 BranchOrBacktrack(Comparison(kEQ,
1373 LoadLocal(current_character_),
1374 Uint64Constant('\r')),
1375 on_no_match);
1376 if (mode_ == UC16) {
1377 BranchOrBacktrack(Comparison(kEQ,
1378 LoadLocal(current_character_),
1379 Uint64Constant(0x2028)),
1380 on_no_match);
1381 BranchOrBacktrack(Comparison(kEQ,
1382 LoadLocal(current_character_),
1383 Uint64Constant(0x2029)),
1384 on_no_match);
1385 }
1386 return true;
1387 }
1388 case 'w': {
1389 if (mode_ != ASCII) {
1390 // Table is 128 entries, so all ASCII characters can be tested.
1391 BranchOrBacktrack(Comparison(kGT,
1392 LoadLocal(current_character_),
1393 Uint64Constant('z')),
1394 on_no_match);
1395 }
1396
1397 PushArgumentInstr* table_push =
1398 PushArgument(Bind(WordCharacterMapConstant()));
1399 PushArgumentInstr* index_push = PushLocal(current_character_);
1400
1401 Definition* byte_def =
1402 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1403 table_push,
1404 index_push);
1405 Definition* zero_def = Int64Constant(0);
1406
1407 BranchOrBacktrack(Comparison(kEQ, byte_def, zero_def), on_no_match);
1408
1409 return true;
1410 }
1411 case 'W': {
1412 BlockLabel done;
1413 if (mode_ != ASCII) {
1414 // Table is 128 entries, so all ASCII characters can be tested.
1415 BranchOrBacktrack(Comparison(kGT,
1416 LoadLocal(current_character_),
1417 Uint64Constant('z')),
1418 &done);
1419 }
1420
1421 // TODO(zerny): Refactor to use CheckBitInTable if possible.
1422
1423 PushArgumentInstr* table_push =
1424 PushArgument(Bind(WordCharacterMapConstant()));
1425 PushArgumentInstr* index_push = PushLocal(current_character_);
1426
1427 Definition* byte_def =
1428 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1429 table_push,
1430 index_push);
1431 Definition* zero_def = Int64Constant(0);
1432
1433 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_no_match);
1434
1435 if (mode_ != ASCII) {
1436 BindBlock(&done);
1437 }
1438 return true;
1439 }
1440 // Non-standard classes (with no syntactic shorthand) used internally.
1441 case '*':
1442 // Match any character.
1443 return true;
1444 case 'n': {
1445 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
1446 // The opposite of '.'.
1447 BlockLabel success;
1448 BranchOrBacktrack(Comparison(kEQ,
1449 LoadLocal(current_character_),
1450 Uint64Constant('\n')),
1451 &success);
1452 BranchOrBacktrack(Comparison(kEQ,
1453 LoadLocal(current_character_),
1454 Uint64Constant('\r')),
1455 &success);
1456 if (mode_ == UC16) {
1457 BranchOrBacktrack(Comparison(kEQ,
1458 LoadLocal(current_character_),
1459 Uint64Constant(0x2028)),
1460 &success);
1461 BranchOrBacktrack(Comparison(kEQ,
1462 LoadLocal(current_character_),
1463 Uint64Constant(0x2029)),
1464 &success);
1465 }
1466 BranchOrBacktrack(NULL, on_no_match);
1467 BindBlock(&success);
1468 return true;
1469 }
1470 // No custom implementation (yet): s(uint16_t), S(uint16_t).
1471 default:
1472 return false;
1473 }
1474 }
1475
1476
1477 void IRRegExpMacroAssembler::Fail() {
1478 TAG();
1479 ASSERT(FAILURE == 0); // Return value for failure is zero.
1480 if (!global()) {
1481 UNREACHABLE(); // Dart regexps are always global.
1482 }
1483 GoTo(exit_block_);
1484 }
1485
1486
1487 void IRRegExpMacroAssembler::IfRegisterGE(intptr_t reg,
1488 intptr_t comparand,
1489 BlockLabel* if_ge) {
1490 TAG();
1491 BranchOrBacktrack(Comparison(kGTE,
1492 LoadLocal(position_register(reg)),
1493 Int64Constant(comparand)),
1494 if_ge);
1495 }
1496
1497
1498 void IRRegExpMacroAssembler::IfRegisterLT(intptr_t reg,
1499 intptr_t comparand,
1500 BlockLabel* if_lt) {
1501 TAG();
1502 BranchOrBacktrack(Comparison(kLT,
1503 LoadLocal(position_register(reg)),
1504 Int64Constant(comparand)),
1505 if_lt);
1506 }
1507
1508
1509 void IRRegExpMacroAssembler::IfRegisterEqPos(intptr_t reg,
1510 BlockLabel* if_eq) {
1511 TAG();
1512 BranchOrBacktrack(Comparison(kEQ,
1513 LoadLocal(position_register(reg)),
1514 LoadLocal(current_position_)),
1515 if_eq);
1516 }
1517
1518
1519 RegExpMacroAssembler::IrregexpImplementation
1520 IRRegExpMacroAssembler::Implementation() {
1521 return kIRImplementation;
1522 }
1523
1524
1525 void IRRegExpMacroAssembler::LoadCurrentCharacter(intptr_t cp_offset,
1526 BlockLabel* on_end_of_input,
1527 bool check_bounds,
1528 intptr_t characters) {
1529 TAG();
1530 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
1531 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
1532 if (check_bounds) {
1533 CheckPosition(cp_offset + characters - 1, on_end_of_input);
1534 }
1535 LoadCurrentCharacterUnchecked(cp_offset, characters);
1536 }
1537
1538
1539 void IRRegExpMacroAssembler::PopCurrentPosition() {
1540 TAG();
1541 StoreLocal(current_position_, PopStack());
1542 }
1543
1544
1545 void IRRegExpMacroAssembler::PopRegister(intptr_t register_index) {
1546 TAG();
1547 ASSERT(register_index < position_registers_.length());
1548 StoreLocal(position_register(register_index), PopStack());
1549 }
1550
1551
1552 void IRRegExpMacroAssembler::PushStack(Definition *definition) {
1553 PushArgumentInstr* stack_push = PushLocal(stack_);
1554 PushArgumentInstr* value_push = PushArgument(Bind(definition));
1555 Do(InstanceCall(InstanceCallDescriptor(Symbols::add()),
1556 stack_push,
1557 value_push));
1558 }
1559
1560
1561 Value* IRRegExpMacroAssembler::PopStack() {
1562 PushArgumentInstr* stack_push = PushLocal(stack_);
1563 return Bind(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
1564 stack_push));
1565 }
1566
1567
1568 // Pushes the location corresponding to label to the backtracking stack.
1569 void IRRegExpMacroAssembler::PushBacktrack(BlockLabel* label) {
1570 TAG();
1571
1572 // Ensure that targets of indirect jumps are never accessed through a
1573 // normal control flow instructions by creating a new block for each backtrack
1574 // target.
1575 IndirectEntryInstr* indirect_target = IndirectWithJoinGoto(label->block());
1576
1577 // Add a fake edge from the graph entry for data flow analysis.
1578 entry_block_->AddIndirectEntry(indirect_target);
1579
1580 ConstantInstr* offset = Uint64Constant(indirect_target->indirect_id());
1581 PushStack(offset);
1582 }
1583
1584
1585 void IRRegExpMacroAssembler::PushCurrentPosition() {
1586 TAG();
1587 PushStack(LoadLocal(current_position_));
1588 }
1589
1590
1591 void IRRegExpMacroAssembler::PushRegister(intptr_t register_index) {
1592 TAG();
1593 PushStack(LoadLocal(position_register(register_index)));
1594 }
1595
1596
1597 void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) {
1598 TAG();
1599 StoreLocal(current_position_, Bind(LoadLocal(position_register(reg))));
1600 }
1601
1602 // Resets the size of the stack to the value stored in reg.
1603 void IRRegExpMacroAssembler::ReadStackPointerFromRegister(intptr_t reg) {
1604 TAG();
1605 ASSERT(reg < position_registers_.length());
1606
1607 PushArgumentInstr* stack_push = PushLocal(stack_);
1608 PushArgumentInstr* length_push = PushLocal(position_register(reg));
1609
1610 Do(InstanceCall(
1611 InstanceCallDescriptor(
1612 String::ZoneHandle(I, Field::SetterSymbol(Symbols::Length()))),
1613 stack_push,
1614 length_push));
1615 }
1616
1617 void IRRegExpMacroAssembler::SetCurrentPositionFromEnd(intptr_t by) {
1618 TAG();
1619
1620 BlockLabel after_position;
1621
1622 Definition* cur_pos_def = LoadLocal(current_position_);
1623 Definition* by_value_def = Int64Constant(-by);
1624
1625 BranchOrBacktrack(Comparison(kGTE, cur_pos_def, by_value_def),
1626 &after_position);
1627
1628 StoreLocal(current_position_, Bind(Int64Constant(-by)));
1629
1630 // On RegExp code entry (where this operation is used), the character before
1631 // the current position is expected to be already loaded.
1632 // We have advanced the position, so it's safe to read backwards.
1633 LoadCurrentCharacterUnchecked(-1, 1);
1634
1635 BindBlock(&after_position);
1636 }
1637
1638
1639 void IRRegExpMacroAssembler::SetRegister(intptr_t register_index, intptr_t to) {
1640 TAG();
1641 // Reserved for positions!
1642 ASSERT(register_index >= position_registers_count_);
1643 StoreLocal(position_register(register_index), Bind(Int64Constant(to)));
1644 }
1645
1646
1647 bool IRRegExpMacroAssembler::Succeed() {
1648 TAG();
1649 GoTo(success_block_);
1650 return global();
1651 }
1652
1653
1654 void IRRegExpMacroAssembler::WriteCurrentPositionToRegister(
1655 intptr_t reg, intptr_t cp_offset) {
1656 TAG();
1657
1658 PushArgumentInstr* pos_push = PushLocal(current_position_);
1659 PushArgumentInstr* off_push =
1660 PushArgument(Bind(Int64Constant(cp_offset)));
1661
1662 // Push the negative offset; these are converted to positive string positions
1663 // within the success block.
1664 StoreLocal(position_register(reg), Bind(Add(pos_push, off_push)));
1665 }
1666
1667
1668 void IRRegExpMacroAssembler::ClearRegisters(
1669 intptr_t reg_from, intptr_t reg_to) {
1670 TAG();
1671
1672 ASSERT(reg_from <= reg_to);
1673 ASSERT(reg_to < position_registers_.length());
1674
1675 // In order to clear registers to a final result value of -1, set them to
1676 // (-1 - string length), the offset of -1 from the end of the string.
1677
1678 for (intptr_t reg = reg_from; reg <= reg_to; reg++) {
1679 PushArgumentInstr* minus_one_push =
1680 PushArgument(Bind(Int64Constant(-1)));
1681 PushArgumentInstr* length_push = PushLocal(string_param_length_);
1682
1683 StoreLocal(position_register(reg), Bind(Sub(minus_one_push, length_push)));
1684 }
1685 }
1686
1687
1688 void IRRegExpMacroAssembler::WriteStackPointerToRegister(intptr_t reg) {
1689 TAG();
1690
1691 PushArgumentInstr* stack_push = PushLocal(stack_);
1692 Value* length_value =
1693 Bind(InstanceCall(InstanceCallDescriptor(
1694 String::ZoneHandle(
1695 I, Field::GetterSymbol(Symbols::Length()))),
1696 stack_push));
1697
1698 StoreLocal(position_register(reg), length_value);
1699 }
1700
1701
1702 // Private methods:
1703
1704
1705 void IRRegExpMacroAssembler::CheckPosition(intptr_t cp_offset,
1706 BlockLabel* on_outside_input) {
1707 TAG();
1708 Definition* curpos_def = LoadLocal(current_position_);
1709 Definition* cp_off_def = Int64Constant(-cp_offset);
1710
1711 // If (current_position_ < -cp_offset), we are in bounds.
1712 // Remember, current_position_ is a negative offset from the string end.
1713
1714 BranchOrBacktrack(Comparison(kGTE, curpos_def, cp_off_def),
1715 on_outside_input);
1716 }
1717
1718
1719 void IRRegExpMacroAssembler::BranchOrBacktrack(
1720 ComparisonInstr* comparison,
1721 BlockLabel* true_successor) {
1722 if (comparison == NULL) { // No condition
1723 if (true_successor == NULL) {
1724 Backtrack();
1725 return;
1726 }
1727 GoTo(true_successor);
1728 return;
1729 }
1730
1731 // If no successor block has been passed in, backtrack.
1732 JoinEntryInstr* true_successor_block = backtrack_block_;
1733 if (true_successor != NULL) {
1734 true_successor->SetLinked();
1735 true_successor_block = true_successor->block();
1736 }
1737 ASSERT(true_successor_block != NULL);
1738
1739 // If the condition is not true, fall through to a new block.
1740 BlockLabel fallthrough;
1741
1742 BranchInstr* branch = new(I) BranchInstr(comparison);
1743 *branch->true_successor_address() =
1744 TargetWithJoinGoto(true_successor_block);
1745 *branch->false_successor_address() =
1746 TargetWithJoinGoto(fallthrough.block());
1747
1748 CloseBlockWith(branch);
1749 BindBlock(&fallthrough);
1750 }
1751
1752
1753 TargetEntryInstr* IRRegExpMacroAssembler::TargetWithJoinGoto(
1754 JoinEntryInstr* dst) {
1755 TargetEntryInstr* target = new(I) TargetEntryInstr(
1756 block_id_.Alloc(), kInvalidTryIndex);
1757 blocks_.Add(target);
1758
1759 target->AppendInstruction(new(I) GotoInstr(dst));
1760
1761 return target;
1762 }
1763
1764
1765 IndirectEntryInstr* IRRegExpMacroAssembler::IndirectWithJoinGoto(
1766 JoinEntryInstr* dst) {
1767 IndirectEntryInstr* target = new(I) IndirectEntryInstr(
1768 block_id_.Alloc(), indirect_id_.Alloc(), kInvalidTryIndex);
1769 blocks_.Add(target);
1770
1771 target->AppendInstruction(new(I) GotoInstr(dst));
1772
1773 return target;
1774 }
1775
1776
1777 void IRRegExpMacroAssembler::CheckPreemption() {
1778 TAG();
1779 AppendInstruction(new(I) CheckStackOverflowInstr(kNoSourcePos, 0));
1780 }
1781
1782
1783 Definition* IRRegExpMacroAssembler::Add(
1784 PushArgumentInstr* lhs,
1785 PushArgumentInstr* rhs) {
1786 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD), lhs, rhs);
1787 }
1788
1789
1790 Definition* IRRegExpMacroAssembler::Sub(
1791 PushArgumentInstr* lhs,
1792 PushArgumentInstr* rhs) {
1793 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kSUB), lhs, rhs);
1794 }
1795
1796
1797 void IRRegExpMacroAssembler::LoadCurrentCharacterUnchecked(
1798 intptr_t cp_offset, intptr_t characters) {
1799 TAG();
1800
1801 ASSERT(characters == 1 || CanReadUnaligned());
1802 if (mode_ == ASCII) {
1803 ASSERT(characters == 1 || characters == 2 || characters == 4);
1804 } else {
1805 ASSERT(mode_ == UC16);
1806 ASSERT(characters == 1 || characters == 2);
1807 }
1808
1809 // Bind the pattern as the load receiver.
1810 Value* pattern = BindLoadLocal(*string_param_);
1811
1812 // Calculate the addressed string index as:
1813 // cp_offset + current_position_ + string_param_length_
1814 // TODO(zerny): Avoid generating 'add' instance-calls here.
1815 PushArgumentInstr* off_arg =
1816 PushArgument(Bind(Int64Constant(cp_offset)));
1817 PushArgumentInstr* pos_arg =
1818 PushArgument(BindLoadLocal(*current_position_));
1819 PushArgumentInstr* off_pos_arg =
1820 PushArgument(Bind(Add(off_arg, pos_arg)));
1821 PushArgumentInstr* len_arg =
1822 PushArgument(BindLoadLocal(*string_param_length_));
1823 Value* index = Bind(Add(off_pos_arg, len_arg));
1824
1825 // Load and store the code units.
1826 Value* code_unit_value = LoadCodeUnitsAt(pattern, index, characters);
1827 StoreLocal(current_character_, code_unit_value);
1828 PRINT(PushLocal(current_character_));
1829 }
1830
1831
1832 Value* IRRegExpMacroAssembler::CharacterAt(Definition* index) {
1833 Value* pattern_val = BindLoadLocal(*string_param_);
1834 Value* index_val = Bind(index);
1835 return LoadCodeUnitsAt(pattern_val, index_val, 1);
1836 }
1837
1838
1839 // Note: We can't replace pattern with a load-local of string_param_
1840 // because we need to maintain the stack discipline in unoptimized code.
1841 Value* IRRegExpMacroAssembler::LoadCodeUnitsAt(Value* pattern,
1842 Value* index,
1843 intptr_t characters) {
1844 return Bind(new(I) LoadCodeUnitsInstr(
1845 pattern,
1846 index,
1847 characters,
1848 specialization_cid_,
1849 Scanner::kNoSourcePos));
1850 }
1851
1852
1853 #undef __
22 1854
23 } // namespace dart 1855 } // namespace dart
OLDNEW
« no previous file with comments | « runtime/vm/regexp_assembler.h ('k') | runtime/vm/regexp_ast.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698