Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(21)

Side by Side Diff: runtime/vm/regexp_assembler.cc

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Addressed Ivan's comments. Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/regexp_assembler.h ('k') | runtime/vm/regexp_ast.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #include "vm/regexp_assembler.h"
6
7 #include "vm/bit_vector.h"
8 #include "vm/compiler.h"
9 #include "vm/dart_entry.h"
10 #include "vm/flow_graph_builder.h"
11 #include "vm/il_printer.h"
12 #include "vm/object_store.h"
13 #include "vm/regexp.h"
14 #include "vm/resolver.h"
15 #include "vm/stack_frame.h"
16 #include "vm/unibrow-inl.h"
17 #include "vm/unicode.h"
18
19 #define I isolate()
20
21 // Debugging output macros. TAG() is called at the head of each interesting
22 // function and prints its name during execution if irregexp tracing is enabled.
23 #define TAG() if (FLAG_trace_irregexp) { TAG_(); }
24 #define TAG_() \
25 Print(PushArgument( \
26 Bind(new(I) ConstantInstr(String::ZoneHandle(I, String::Concat( \
27 String::Handle(String::New("TAG: ")), \
28 String::Handle(String::New(__FUNCTION__)), Heap::kOld))))));
29
30 #define PRINT(arg) if (FLAG_trace_irregexp) { Print(arg); }
31
32 namespace dart {
33
34 DEFINE_FLAG(bool, trace_irregexp, false, "Trace irregexps");
35
36 static const intptr_t kInvalidTryIndex = -1;
37 static const intptr_t kNoTokenPos = -1;
38
39 /*
40 * This assembler uses the following main local variables:
41 * - stack_: A pointer to a growable list which we use as an all-purpose stack
42 * storing backtracking offsets, positions & stored register values.
43 * - current_character_: Stores the currently loaded characters (possibly more
44 * than one).
45 * - current_position_: The current position within the string, stored as a
46 * negative offset from the end of the string (i.e. the
47 * position corresponding to str[0] is -str.length).
48 * Note that current_position_ is *not* byte-based, unlike
49 * original V8 code.
50 *
51 * Results are returned though an array of capture indices, stored at
52 * matches_param_. A null array specifies a failure to match. The match indices
53 * [start_inclusive, end_exclusive] for capture group i are stored at positions
54 * matches_param_[i * 2] and matches_param_[i * 2 + 1], respectively. Match
55 * indices of -1 denote non-matched groups. Note that we store these indices
56 * as a negative offset from the end of the string in position_registers_
57 * during processing, and convert them to standard indexes when copying them
58 * to matches_param_ on successful match.
59 */
60
61 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate)
62 : slow_safe_compiler_(false),
63 global_mode_(NOT_GLOBAL),
64 isolate_(isolate) {
65 }
66
67
68 RegExpMacroAssembler::~RegExpMacroAssembler() {
69 }
70
71
72 IRRegExpMacroAssembler::IRRegExpMacroAssembler(
73 intptr_t specialization_cid,
74 intptr_t capture_count,
75 const ParsedFunction* parsed_function,
76 const ZoneGrowableArray<const ICData*>& ic_data_array,
77 Isolate* isolate)
78 : RegExpMacroAssembler(isolate),
79 specialization_cid_(specialization_cid),
80 parsed_function_(parsed_function),
81 ic_data_array_(ic_data_array),
82 current_instruction_(NULL),
83 stack_(NULL),
84 current_character_(NULL),
85 current_position_(NULL),
86 string_param_(NULL),
87 string_param_length_(NULL),
88 start_index_param_(NULL),
89 position_registers_count_((capture_count + 1) * 2),
90 stack_array_(GrowableObjectArray::ZoneHandle(
91 isolate, GrowableObjectArray::New(16, Heap::kOld))) {
92 switch (specialization_cid) {
93 case kOneByteStringCid:
94 case kExternalOneByteStringCid: mode_ = ASCII; break;
95 case kTwoByteStringCid:
96 case kExternalTwoByteStringCid: mode_ = UC16; break;
97 default: UNREACHABLE();
98 }
99
100 InitializeLocals();
101
102 // Create and generate all preset blocks.
103 entry_block_ =
104 new(isolate) GraphEntryInstr(
105 parsed_function_,
106 new(isolate) TargetEntryInstr(block_id_.Alloc(), kInvalidTryIndex),
107 Isolate::kNoDeoptId);
108 start_block_ =
109 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
110 success_block_ =
111 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
112 backtrack_block_ =
113 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
114 exit_block_ =
115 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
116
117 GenerateEntryBlock();
118 GenerateSuccessBlock();
119 GenerateBacktrackBlock();
120 GenerateExitBlock();
121
122 blocks_.Add(entry_block_);
123 blocks_.Add(entry_block_->normal_entry());
124 blocks_.Add(start_block_);
125 blocks_.Add(success_block_);
126 blocks_.Add(backtrack_block_);
127 blocks_.Add(exit_block_);
128
129 // Begin emission at the start_block_.
130 set_current_instruction(start_block_);
131 }
132
133
134 IRRegExpMacroAssembler::~IRRegExpMacroAssembler() { }
135
136
137 void IRRegExpMacroAssembler::InitializeLocals() {
138 // Create local variables and parameters.
139 stack_ = Local(Symbols::stack_());
140 current_character_ = Local(Symbols::current_character_());
141 current_position_ = Local(Symbols::current_position_());
142 string_param_length_ = Local(Symbols::string_param_length_());
143 capture_length_ = Local(Symbols::capture_length_());
144 match_start_index_ = Local(Symbols::match_start_index_());
145 capture_start_index_ = Local(Symbols::capture_start_index_());
146 match_end_index_ = Local(Symbols::match_end_index_());
147 char_in_capture_ = Local(Symbols::char_in_capture_());
148 char_in_match_ = Local(Symbols::char_in_match_());
149 result_ = Local(Symbols::result_());
150
151 string_param_ = Parameter(Symbols::string_param_(), 0);
152 start_index_param_ = Parameter(Symbols::start_index_param_(), 1);
153
154 // Reserve space for all captured group positions. Note that more might
155 // be created on the fly for internal use.
156 for (intptr_t i = 0; i < position_registers_count_; i++) {
157 position_register(i);
158 }
159 }
160
161
162 void IRRegExpMacroAssembler::GenerateEntryBlock() {
163 set_current_instruction(entry_block_->normal_entry());
164 TAG();
165
166 // Generate a local list variable which we will use as a backtracking stack.
167
168 StoreLocal(stack_, Bind(new(I) ConstantInstr(stack_array_)));
169 Do(InstanceCall(InstanceCallDescriptor(Symbols::clear()), PushLocal(stack_)));
170
171 // Store string.length.
172 PushArgumentInstr* string_push = PushLocal(string_param_);
173
174 StoreLocal(string_param_length_,
175 Bind(InstanceCall(InstanceCallDescriptor(
176 String::ZoneHandle(
177 Field::GetterSymbol(Symbols::Length()))),
178 string_push)));
179
180 // Initialize all capture registers.
181 ClearRegisters(0, position_registers_count_ - 1);
182
183 // Store (start_index - string.length) as the current position (since it's a
184 // negative offset from the end of the string).
185 PushArgumentInstr* start_index_push = PushLocal(start_index_param_);
186 PushArgumentInstr* length_push = PushLocal(string_param_length_);
187
188 StoreLocal(current_position_, Bind(Sub(start_index_push, length_push)));
189
190 // Jump to the start block.
191 current_instruction_->Goto(start_block_);
192 }
193
194
195 void IRRegExpMacroAssembler::GenerateBacktrackBlock() {
196 set_current_instruction(backtrack_block_);
197 TAG();
198 Backtrack();
199 }
200
201
202 void IRRegExpMacroAssembler::GenerateSuccessBlock() {
203 set_current_instruction(success_block_);
204 TAG();
205
206 Definition* type_args_null_def = new(I) ConstantInstr(
207 TypeArguments::ZoneHandle(I, TypeArguments::null()));
208 PushArgumentInstr* type_arg_push = PushArgument(Bind(type_args_null_def));
209 PushArgumentInstr* length_push =
210 PushArgument(Bind(Uint64Constant(position_registers_count_)));
211
212 const Library& lib = Library::Handle(Library::CoreLibrary());
213 const Class& list_class = Class::Handle(
214 lib.LookupCoreClass(Symbols::List()));
215 const Function& list_ctor =
216 Function::ZoneHandle(I, list_class.LookupFactory(Symbols::ListFactory()));
217
218 // TODO(jgruber): Use CreateArrayInstr and StoreIndexed instead.
219 StoreLocal(result_, Bind(StaticCall(list_ctor, type_arg_push, length_push)));
220
221 // Store captured offsets in the `matches` parameter.
222 // TODO(jgruber): Eliminate position_register locals and access `matches`
223 // directly.
224 for (intptr_t i = 0; i < position_registers_count_; i++) {
225 PushArgumentInstr* matches_push = PushLocal(result_);
226 PushArgumentInstr* index_push = PushArgument(Bind(Uint64Constant(i)));
227
228 // Convert negative offsets from the end of the string to string indices.
229 PushArgumentInstr* offset_push = PushLocal(position_register(i));
230 PushArgumentInstr* len_push = PushLocal(string_param_length_);
231 PushArgumentInstr* value_push =
232 PushArgument(Bind(Add(offset_push, len_push)));
233
234 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
235 matches_push,
236 index_push,
237 value_push));
238 }
239
240 // Print the result if tracing.
241 PRINT(PushLocal(result_));
242
243 // Return true on success.
244 AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(LoadLocal(result_))));
245 }
246
247
248 void IRRegExpMacroAssembler::GenerateExitBlock() {
249 set_current_instruction(exit_block_);
250 TAG();
251
252 // Return false on failure.
253 AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(LoadLocal(result_))));
254 }
255
256
257 #if defined(TARGET_ARCH_ARM64) || \
258 defined(TARGET_ARCH_ARM) || \
259 defined(TARGET_ARCH_MIPS)
260 // Disabling unaligned accesses forces the regexp engine to load characters one
261 // by one instead of up to 4 at once, along with the associated performance hit.
262 // TODO(jgruber): Be less conservative about disabling unaligned accesses.
263 // For instance, ARMv6 supports unaligned accesses. Once it is enabled here,
264 // update LoadCodeUnitsInstr methods for the appropriate architectures.
265 static const bool kEnableUnalignedAccesses = false;
266 #else
267 static const bool kEnableUnalignedAccesses = true;
268 #endif
269 bool IRRegExpMacroAssembler::CanReadUnaligned() {
270 return kEnableUnalignedAccesses && !slow_safe();
271 }
272
273
274 RawArray* IRRegExpMacroAssembler::Execute(
275 const Function& function,
276 const String& input,
277 const Smi& start_offset,
278 Isolate* isolate) {
279 // Create the argument list.
280 const Array& args = Array::Handle(Array::New(2));
281 args.SetAt(0, input);
282 args.SetAt(1, start_offset);
283
284 // And finally call the generated code.
285
286 const Object& retval =
287 Object::Handle(isolate, DartEntry::InvokeFunction(function, args));
288 if (retval.IsError()) {
289 const Error& error = Error::Cast(retval);
290 OS::Print("%s\n", error.ToErrorCString());
291 // Should never happen.
292 UNREACHABLE();
293 }
294
295 if (retval.IsNull()) {
296 return Array::null();
297 }
298
299 ASSERT(retval.IsArray());
300 return Array::Cast(retval).raw();
301 }
302
303
304 RawBool* IRRegExpMacroAssembler::CaseInsensitiveCompareUC16(
305 RawString* str_raw,
306 RawSmi* lhs_index_raw,
307 RawSmi* rhs_index_raw,
308 RawSmi* length_raw) {
309 const String& str = String::Handle(str_raw);
310 const Smi& lhs_index = Smi::Handle(lhs_index_raw);
311 const Smi& rhs_index = Smi::Handle(rhs_index_raw);
312 const Smi& length = Smi::Handle(length_raw);
313
314 // TODO(jgruber): Optimize as single instance. V8 has this as an
315 // isolate member.
316 unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
317
318 for (intptr_t i = 0; i < length.Value(); i++) {
319 int32_t c1 = str.CharAt(lhs_index.Value() + i);
320 int32_t c2 = str.CharAt(rhs_index.Value() + i);
321 if (c1 != c2) {
322 int32_t s1[1] = { c1 };
323 canonicalize.get(c1, '\0', s1);
324 if (s1[0] != c2) {
325 int32_t s2[1] = { c2 };
326 canonicalize.get(c2, '\0', s2);
327 if (s1[0] != s2[0]) {
328 return Bool::False().raw();
329 }
330 }
331 }
332 }
333 return Bool::True().raw();
334 }
335
336
337 LocalVariable* IRRegExpMacroAssembler::Parameter(const String& name,
338 intptr_t index) const {
339 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
340 LocalVariable* local =
341 new(I) LocalVariable(kNoTokenPos, name, local_type);
342
343 intptr_t param_frame_index = kParamEndSlotFromFp + kParamCount - index;
344 local->set_index(param_frame_index);
345
346 return local;
347 }
348
349
350 LocalVariable* IRRegExpMacroAssembler::Local(const String& name) {
351 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
352 LocalVariable* local =
353 new(I) LocalVariable(kNoTokenPos, name, local_type);
354 local->set_index(GetNextLocalIndex());
355
356 return local;
357 }
358
359
360 ConstantInstr* IRRegExpMacroAssembler::Int64Constant(int64_t value) const {
361 return new(I) ConstantInstr(
362 Integer::ZoneHandle(I, Integer::New(value, Heap::kOld)));
363 }
364
365
366 ConstantInstr* IRRegExpMacroAssembler::Uint64Constant(uint64_t value) const {
367 return new(I) ConstantInstr(
368 Integer::ZoneHandle(I, Integer::NewFromUint64(value, Heap::kOld)));
369 }
370
371
372 ConstantInstr* IRRegExpMacroAssembler::BoolConstant(bool value) const {
373 return new(I) ConstantInstr(value ? Bool::True() : Bool::False());
374 }
375
376
377 ConstantInstr* IRRegExpMacroAssembler::StringConstant(const char* value) const {
378 return new(I) ConstantInstr(
379 String::ZoneHandle(I, String::New(value, Heap::kOld)));
380 }
381
382
383 ConstantInstr* IRRegExpMacroAssembler::WordCharacterMapConstant() const {
384 const Library& lib = Library::Handle(I, Library::CoreLibrary());
385 const Class& regexp_class = Class::Handle(I,
386 lib.LookupClassAllowPrivate(Symbols::JSSyntaxRegExp()));
387 const Field& word_character_field = Field::ZoneHandle(I,
388 regexp_class.LookupStaticField(Symbols::_wordCharacterMap()));
389 ASSERT(!word_character_field.IsNull());
390
391 if (word_character_field.IsUninitialized()) {
392 word_character_field.EvaluateInitializer();
393 }
394 ASSERT(!word_character_field.IsUninitialized());
395
396 return new(I) ConstantInstr(
397 Instance::ZoneHandle(I, word_character_field.value()));
398 }
399
400
401 ComparisonInstr* IRRegExpMacroAssembler::Comparison(
402 ComparisonKind kind, Definition* lhs, Definition* rhs) {
403 Token::Kind strict_comparison = Token::kEQ_STRICT;
404 Token::Kind intermediate_operator = Token::kILLEGAL;
405 switch (kind) {
406 case kEQ:
407 intermediate_operator = Token::kEQ;
408 break;
409 case kNE:
410 intermediate_operator = Token::kEQ;
411 strict_comparison = Token::kNE_STRICT;
412 break;
413 case kLT:
414 intermediate_operator = Token::kLT;
415 break;
416 case kGT:
417 intermediate_operator = Token::kGT;
418 break;
419 case kLTE:
420 intermediate_operator = Token::kLTE;
421 break;
422 case kGTE:
423 intermediate_operator = Token::kGTE;
424 break;
425 default:
426 UNREACHABLE();
427 }
428
429 ASSERT(intermediate_operator != Token::kILLEGAL);
430
431 PushArgumentInstr* lhs_push = PushArgument(Bind(lhs));
432 PushArgumentInstr* rhs_push = PushArgument(Bind(rhs));
433
434 Value* lhs_value =
435 Bind(InstanceCall(
436 InstanceCallDescriptor::FromToken(intermediate_operator),
437 lhs_push,
438 rhs_push));
439 Value* rhs_value = Bind(BoolConstant(true));
440
441 return new(I) StrictCompareInstr(kNoTokenPos, strict_comparison,
442 lhs_value, rhs_value, true);
443 }
444
445
446 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
447 const Function& function) const {
448 ZoneGrowableArray<PushArgumentInstr*>* arguments =
449 new(I) ZoneGrowableArray<PushArgumentInstr*>(0);
450 return StaticCall(function, arguments);
451 }
452
453
454 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
455 const Function& function,
456 PushArgumentInstr* arg1) const {
457 ZoneGrowableArray<PushArgumentInstr*>* arguments =
458 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
459 arguments->Add(arg1);
460
461 return StaticCall(function, arguments);
462 }
463
464
465 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
466 const Function& function,
467 PushArgumentInstr* arg1,
468 PushArgumentInstr* arg2) const {
469 ZoneGrowableArray<PushArgumentInstr*>* arguments =
470 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
471 arguments->Add(arg1);
472 arguments->Add(arg2);
473
474 return StaticCall(function, arguments);
475 }
476
477
478 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
479 const Function& function,
480 ZoneGrowableArray<PushArgumentInstr*>* arguments) const {
481 return new(I) StaticCallInstr(kNoTokenPos,
482 function,
483 Object::null_array(),
484 arguments,
485 ic_data_array_);
486 }
487
488
489 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
490 const InstanceCallDescriptor& desc,
491 PushArgumentInstr* arg1) const {
492 ZoneGrowableArray<PushArgumentInstr*>* arguments =
493 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
494 arguments->Add(arg1);
495
496 return InstanceCall(desc, arguments);
497 }
498
499
500 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
501 const InstanceCallDescriptor& desc,
502 PushArgumentInstr* arg1,
503 PushArgumentInstr* arg2) const {
504 ZoneGrowableArray<PushArgumentInstr*>* arguments =
505 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
506 arguments->Add(arg1);
507 arguments->Add(arg2);
508
509 return InstanceCall(desc, arguments);
510 }
511
512
513 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
514 const InstanceCallDescriptor& desc,
515 PushArgumentInstr* arg1,
516 PushArgumentInstr* arg2,
517 PushArgumentInstr* arg3) const {
518 ZoneGrowableArray<PushArgumentInstr*>* arguments =
519 new(I) ZoneGrowableArray<PushArgumentInstr*>(3);
520 arguments->Add(arg1);
521 arguments->Add(arg2);
522 arguments->Add(arg3);
523
524 return InstanceCall(desc, arguments);
525 }
526
527
528 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
529 const InstanceCallDescriptor& desc,
530 ZoneGrowableArray<PushArgumentInstr*> *arguments) const {
531 return
532 new(I) InstanceCallInstr(kNoTokenPos,
533 desc.name,
534 desc.token_kind,
535 arguments,
536 Object::null_array(),
537 desc.checked_argument_count,
538 ic_data_array_);
539 }
540
541
542 LoadLocalInstr* IRRegExpMacroAssembler::LoadLocal(LocalVariable* local) const {
543 return new(I) LoadLocalInstr(*local);
544 }
545
546
547 void IRRegExpMacroAssembler::StoreLocal(LocalVariable* local,
548 Value* value) {
549 Do(new(I) StoreLocalInstr(*local, value));
550 }
551
552
553 void IRRegExpMacroAssembler::set_current_instruction(Instruction* instruction) {
554 current_instruction_ = instruction;
555 }
556
557
558 Value* IRRegExpMacroAssembler::Bind(Definition* definition) {
559 AppendInstruction(definition);
560 definition->set_temp_index(temp_id_.Alloc());
561
562 return new(I) Value(definition);
563 }
564
565
566 void IRRegExpMacroAssembler::Do(Definition* definition) {
567 AppendInstruction(definition);
568 }
569
570 // In some cases, the V8 irregexp engine generates unreachable code by emitting
571 // a jmp not followed by a bind. We cannot do the same, since it is impossible
572 // to append to a block following a jmp. In such cases, assume that we are doing
573 // the correct thing, but output a warning when tracing.
574 #define HANDLE_DEAD_CODE_EMISSION() \
575 if (current_instruction_ == NULL) { \
576 if (FLAG_trace_irregexp) { \
577 OS::Print("WARNING: Attempting to append to a closed assembler. " \
578 "This could be either a bug or generation of dead code " \
579 "inherited from V8.\n"); \
580 } \
581 BlockLabel dummy; \
582 BindBlock(&dummy); \
583 }
584
585 void IRRegExpMacroAssembler::AppendInstruction(Instruction* instruction) {
586 HANDLE_DEAD_CODE_EMISSION();
587
588 ASSERT(current_instruction_ != NULL);
589 ASSERT(current_instruction_->next() == NULL);
590
591 temp_id_.Dealloc(instruction->InputCount());
592 arg_id_.Dealloc(instruction->ArgumentCount());
593
594 current_instruction_->LinkTo(instruction);
595 set_current_instruction(instruction);
596 }
597
598
599 void IRRegExpMacroAssembler::CloseBlockWith(Instruction* instruction) {
600 HANDLE_DEAD_CODE_EMISSION();
601
602 ASSERT(current_instruction_ != NULL);
603 ASSERT(current_instruction_->next() == NULL);
604
605 temp_id_.Dealloc(instruction->InputCount());
606 arg_id_.Dealloc(instruction->ArgumentCount());
607
608 current_instruction_->LinkTo(instruction);
609 set_current_instruction(NULL);
610 }
611
612
613 void IRRegExpMacroAssembler::GoTo(BlockLabel* to) {
614 if (to == NULL) {
615 Backtrack();
616 } else {
617 to->SetLinked();
618 GoTo(to->block());
619 }
620 }
621
622
623 // Closes the current block with a goto, and unsets current_instruction_.
624 // BindBlock() must be called before emission can continue.
625 void IRRegExpMacroAssembler::GoTo(JoinEntryInstr* to) {
626 HANDLE_DEAD_CODE_EMISSION();
627
628 ASSERT(current_instruction_ != NULL);
629 ASSERT(current_instruction_->next() == NULL);
630 current_instruction_->Goto(to);
631 set_current_instruction(NULL);
632 }
633
634
635 PushArgumentInstr* IRRegExpMacroAssembler::PushArgument(Value* value) {
636 arg_id_.Alloc();
637 PushArgumentInstr* push = new(I) PushArgumentInstr(value);
638 // Do *not* use Do() for push argument instructions.
639 AppendInstruction(push);
640 return push;
641 }
642
643
644 PushArgumentInstr* IRRegExpMacroAssembler::PushLocal(LocalVariable* local) {
645 return PushArgument(Bind(LoadLocal(local)));
646 }
647
648
649 void IRRegExpMacroAssembler::Print(const char* str) {
650 Print(PushArgument(
651 Bind(new(I) ConstantInstr(
652 String::ZoneHandle(I, String::New(str, Heap::kOld))))));
653 }
654
655
656 void IRRegExpMacroAssembler::Print(PushArgumentInstr* argument) {
657 const Library& lib = Library::Handle(Library::CoreLibrary());
658 const Function& print_fn = Function::ZoneHandle(
659 I, lib.LookupFunctionAllowPrivate(Symbols::print()));
660 Do(StaticCall(print_fn, argument));
661 }
662
663
664 void IRRegExpMacroAssembler::PrintBlocks() {
665 for (intptr_t i = 0; i < blocks_.length(); i++) {
666 FlowGraphPrinter::PrintBlock(blocks_[i], false);
667 }
668 }
669
670
671 intptr_t IRRegExpMacroAssembler::stack_limit_slack() {
672 return 32;
673 }
674
675
676 void IRRegExpMacroAssembler::AdvanceCurrentPosition(intptr_t by) {
677 TAG();
678 if (by != 0) {
679 PushArgumentInstr* cur_pos_push = PushLocal(current_position_);
680 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
681
682 Value* new_pos_value = Bind(Add(cur_pos_push, by_push));
683 StoreLocal(current_position_, new_pos_value);
684 }
685 }
686
687
688 void IRRegExpMacroAssembler::AdvanceRegister(intptr_t reg, intptr_t by) {
689 TAG();
690 ASSERT(reg >= 0);
691 ASSERT(reg < position_registers_.length());
692
693 if (by != 0) {
694 PushArgumentInstr* reg_push = PushLocal(position_register(reg));
695 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
696 StoreLocal(position_register(reg), Bind(Add(reg_push, by_push)));
697 }
698 }
699
700
701 void IRRegExpMacroAssembler::Backtrack() {
702 TAG();
703 CheckPreemption();
704
705 GrowableObjectArray& offsets = GrowableObjectArray::ZoneHandle(
706 I, GrowableObjectArray::New(Heap::kOld));
707
708 PushArgumentInstr* block_offsets_push =
709 PushArgument(Bind(new(I) ConstantInstr(offsets)));
710 PushArgumentInstr* block_id_push = PushArgument(PopStack());
711
712 Value* offset_value =
713 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
714 block_offsets_push,
715 block_id_push));
716
717 IndirectGotoInstr* igoto = new(I) IndirectGotoInstr(&offsets, offset_value);
718 CloseBlockWith(igoto);
719 igotos_.Add(igoto);
720 }
721
722
723 // A BindBlock is analogous to assigning a label to a basic block.
724 // If the BlockLabel does not yet contain a block, it is created.
725 // If there is a current instruction, append a goto to the bound block.
726 void IRRegExpMacroAssembler::BindBlock(BlockLabel* label) {
727 ASSERT(!label->IsBound());
728 ASSERT(label->block()->next() == NULL);
729
730 label->SetBound(block_id_.Alloc());
731 blocks_.Add(label->block());
732
733 if (current_instruction_ != NULL) {
734 GoTo(label);
735 }
736 set_current_instruction(label->block());
737
738 // Print the id of the current block if tracing.
739 PRINT(PushArgument(Bind(Uint64Constant(label->block()->block_id()))));
740 }
741
742
743 intptr_t IRRegExpMacroAssembler::GetNextLocalIndex() {
744 intptr_t id = local_id_.Alloc();
745 return kFirstLocalSlotFromFp - id;
746 }
747
748
749 LocalVariable* IRRegExpMacroAssembler::position_register(intptr_t index) {
750 // Create position registers as needed.
751 for (intptr_t i = position_registers_.length(); i < index + 1; i++) {
752 position_registers_.Add(Local(Symbols::position_registers_()));
753 }
754
755 return position_registers_[index];
756 }
757
758
759 // TODO(jgruber): Move the offset table outside to avoid having to keep
760 // the assembler around until after code generation; both function or regexp
761 // would work.
762 void IRRegExpMacroAssembler::FinalizeBlockOffsetTable() {
763 for (intptr_t i = 0; i < igotos_.length(); i++) {
764 IndirectGotoInstr* igoto = igotos_[i];
765 igoto->SetOffsetCount(I, indirect_id_.Count());
766
767 for (intptr_t j = 0; j < igoto->SuccessorCount(); j++) {
768 TargetEntryInstr* target = igoto->SuccessorAt(j);
769
770 // Optimizations might have modified the immediate target block, but
771 // it must end with a goto to the indirect entry.
772 Instruction* instr = target;
773 while (instr != NULL && !instr->IsGoto()) {
774 instr = instr->next();
775 }
776 ASSERT(instr->IsGoto());
777
778 IndirectEntryInstr* ientry =
779 instr->AsGoto()->successor()->AsIndirectEntry();
780 ASSERT(ientry != NULL);
781
782 // The intermediate block was possibly compacted, check both it and the
783 // final indirect entry for a valid offset. If neither are valid, then
784 // the indirect entry is unreachable.
785 intptr_t offset =
786 (target->offset() > 0) ? target->offset() : ientry->offset();
787 if (offset > 0) {
788 intptr_t adjusted_offset =
789 offset - Assembler::EntryPointToPcMarkerOffset();
790 igoto->SetOffsetAt(I, ientry->indirect_id(), adjusted_offset);
791 }
792 }
793 }
794 }
795
796 void IRRegExpMacroAssembler::FinalizeIndirectGotos() {
797 for (intptr_t i = 0; i < igotos_.length(); i++) {
798 for (intptr_t j = 0; j < entry_block_->indirect_entries().length(); j++) {
799 igotos_.At(i)->AddSuccessor(
800 TargetWithJoinGoto(entry_block_->indirect_entries().At(j)));
801 }
802 }
803 }
804
805
806 void IRRegExpMacroAssembler::CheckCharacter(uint32_t c, BlockLabel* on_equal) {
807 TAG();
808 Definition* cur_char_def = LoadLocal(current_character_);
809 Definition* char_def = Uint64Constant(c);
810
811 BranchOrBacktrack(Comparison(kEQ, cur_char_def, char_def),
812 on_equal);
813 }
814
815
816 void IRRegExpMacroAssembler::CheckCharacterGT(uint16_t limit,
817 BlockLabel* on_greater) {
818 TAG();
819 BranchOrBacktrack(Comparison(kGT,
820 LoadLocal(current_character_),
821 Uint64Constant(limit)),
822 on_greater);
823 }
824
825
826 void IRRegExpMacroAssembler::CheckAtStart(BlockLabel* on_at_start) {
827 TAG();
828
829 BlockLabel not_at_start;
830
831 // Did we start the match at the start of the string at all?
832 BranchOrBacktrack(Comparison(kNE,
833 LoadLocal(start_index_param_),
834 Uint64Constant(0)),
835 &not_at_start);
836
837 // If we did, are we still at the start of the input, i.e. is
838 // (offset == string_length * -1)?
839 Definition* neg_len_def =
840 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
841 PushLocal(string_param_length_));
842 Definition* offset_def = LoadLocal(current_position_);
843 BranchOrBacktrack(Comparison(kEQ, neg_len_def, offset_def),
844 on_at_start);
845
846 BindBlock(&not_at_start);
847 }
848
849
850 void IRRegExpMacroAssembler::CheckNotAtStart(BlockLabel* on_not_at_start) {
851 TAG();
852
853 // Did we start the match at the start of the string at all?
854 BranchOrBacktrack(Comparison(kNE,
855 LoadLocal(start_index_param_),
856 Uint64Constant(0)),
857 on_not_at_start);
858
859 // If we did, are we still at the start of the input, i.e. is
860 // (offset == string_length * -1)?
861 Definition* neg_len_def =
862 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
863 PushLocal(string_param_length_));
864 Definition* offset_def = LoadLocal(current_position_);
865 BranchOrBacktrack(Comparison(kNE, neg_len_def, offset_def),
866 on_not_at_start);
867 }
868
869
870 void IRRegExpMacroAssembler::CheckCharacterLT(uint16_t limit,
871 BlockLabel* on_less) {
872 TAG();
873 BranchOrBacktrack(Comparison(kLT,
874 LoadLocal(current_character_),
875 Uint64Constant(limit)),
876 on_less);
877 }
878
879
880 void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) {
881 TAG();
882
883 BlockLabel fallthrough;
884
885 PushArgumentInstr* stack_push = PushLocal(stack_);
886 Definition* stack_tip_def = InstanceCall(
887 InstanceCallDescriptor(String::ZoneHandle(
888 I, Field::GetterSymbol(Symbols::last()))),
889 stack_push);
890 Definition* cur_pos_def = LoadLocal(current_position_);
891
892 BranchOrBacktrack(Comparison(kNE, stack_tip_def, cur_pos_def),
893 &fallthrough);
894
895 // Pop, throwing away the value.
896 stack_push = PushLocal(stack_);
897 Do(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
898 stack_push));
899
900 BranchOrBacktrack(NULL, on_equal);
901
902 BindBlock(&fallthrough);
903 }
904
905
906 void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
907 intptr_t start_reg,
908 BlockLabel* on_no_match) {
909 TAG();
910 ASSERT(start_reg + 1 <= position_registers_.length());
911
912 BlockLabel fallthrough;
913
914 PushArgumentInstr* end_push = PushLocal(position_register(start_reg + 1));
915 PushArgumentInstr* start_push = PushLocal(position_register(start_reg));
916 StoreLocal(capture_length_, Bind(Sub(end_push, start_push)));
917
918 // The length of a capture should not be negative. This can only happen
919 // if the end of the capture is unrecorded, or at a point earlier than
920 // the start of the capture.
921 // BranchOrBacktrack(less, on_no_match);
922
923 BranchOrBacktrack(Comparison(kLT,
924 LoadLocal(capture_length_),
925 Uint64Constant(0)),
926 on_no_match);
927
928 // If length is zero, either the capture is empty or it is completely
929 // uncaptured. In either case succeed immediately.
930 BranchOrBacktrack(Comparison(kEQ,
931 LoadLocal(capture_length_),
932 Uint64Constant(0)),
933 &fallthrough);
934
935
936 // Check that there are sufficient characters left in the input.
937 PushArgumentInstr* pos_push = PushLocal(current_position_);
938 PushArgumentInstr* len_push = PushLocal(capture_length_);
939 BranchOrBacktrack(
940 Comparison(kGT,
941 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
942 pos_push,
943 len_push),
944 Uint64Constant(0)),
945 on_no_match);
946
947 pos_push = PushLocal(current_position_);
948 len_push = PushLocal(string_param_length_);
949 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push)));
950
951 pos_push = PushLocal(position_register(start_reg));
952 len_push = PushLocal(string_param_length_);
953 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push)));
954
955 pos_push = PushLocal(match_start_index_);
956 len_push = PushLocal(capture_length_);
957 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push)));
958
959 BlockLabel success;
960 if (mode_ == ASCII) {
961 BlockLabel loop_increment;
962 BlockLabel loop;
963 BindBlock(&loop);
964
965 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
966 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
967
968 BranchOrBacktrack(Comparison(kEQ,
969 LoadLocal(char_in_capture_),
970 LoadLocal(char_in_match_)),
971 &loop_increment);
972
973 // Mismatch, try case-insensitive match (converting letters to lower-case).
974 PushArgumentInstr* match_char_push = PushLocal(char_in_match_);
975 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(0x20)));
976 StoreLocal(char_in_match_,
977 Bind(InstanceCall(
978 InstanceCallDescriptor::FromToken(Token::kBIT_OR),
979 match_char_push,
980 mask_push)));
981
982 BlockLabel convert_capture;
983 BlockLabel on_not_in_range;
984 BranchOrBacktrack(Comparison(kLT,
985 LoadLocal(char_in_match_),
986 Uint64Constant('a')),
987 &on_not_in_range);
988 BranchOrBacktrack(Comparison(kGT,
989 LoadLocal(char_in_match_),
990 Uint64Constant('z')),
991 &on_not_in_range);
992 GoTo(&convert_capture);
993 BindBlock(&on_not_in_range);
994
995 // Latin-1: Check for values in range [224,254] but not 247.
996 BranchOrBacktrack(Comparison(kLT,
997 LoadLocal(char_in_match_),
998 Uint64Constant(224)),
999 on_no_match);
1000 BranchOrBacktrack(Comparison(kGT,
1001 LoadLocal(char_in_match_),
1002 Uint64Constant(254)),
1003 on_no_match);
1004
1005 BranchOrBacktrack(Comparison(kEQ,
1006 LoadLocal(char_in_match_),
1007 Uint64Constant(247)),
1008 on_no_match);
1009
1010 // Also convert capture character.
1011 BindBlock(&convert_capture);
1012
1013 PushArgumentInstr* capture_char_push = PushLocal(char_in_capture_);
1014 mask_push = PushArgument(Bind(Uint64Constant(0x20)));
1015 StoreLocal(char_in_capture_,
1016 Bind(InstanceCall(
1017 InstanceCallDescriptor::FromToken(Token::kBIT_OR),
1018 capture_char_push,
1019 mask_push)));
1020
1021 BranchOrBacktrack(Comparison(kNE,
1022 LoadLocal(char_in_match_),
1023 LoadLocal(char_in_capture_)),
1024 on_no_match);
1025
1026 BindBlock(&loop_increment);
1027
1028 // Increment pointers into match and capture strings.
1029 StoreLocal(capture_start_index_, Bind(Add(
1030 PushLocal(capture_start_index_),
1031 PushArgument(Bind(Uint64Constant(1))))));
1032 StoreLocal(match_start_index_, Bind(Add(
1033 PushLocal(match_start_index_),
1034 PushArgument(Bind(Uint64Constant(1))))));
1035
1036 // Compare to end of match, and loop if not done.
1037 BranchOrBacktrack(Comparison(kLT,
1038 LoadLocal(match_start_index_),
1039 LoadLocal(match_end_index_)),
1040 &loop);
1041 } else {
1042 ASSERT(mode_ == UC16);
1043
1044 Value* string_value = Bind(LoadLocal(string_param_));
1045 Value* lhs_index_value = Bind(LoadLocal(match_start_index_));
1046 Value* rhs_index_value = Bind(LoadLocal(capture_start_index_));
1047 Value* length_value = Bind(LoadLocal(capture_length_));
1048
1049 Definition* is_match_def =
1050 new(I) CaseInsensitiveCompareUC16Instr(
1051 string_value,
1052 lhs_index_value,
1053 rhs_index_value,
1054 length_value,
1055 specialization_cid_);
1056
1057 BranchOrBacktrack(Comparison(kNE, is_match_def, BoolConstant(true)),
1058 on_no_match);
1059 }
1060
1061 BindBlock(&success);
1062
1063 // Move current character position to position after match.
1064 PushArgumentInstr* match_end_push = PushLocal(match_end_index_);
1065 len_push = PushLocal(string_param_length_);
1066 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push)));
1067
1068 BindBlock(&fallthrough);
1069 }
1070
1071
1072 void IRRegExpMacroAssembler::CheckNotBackReference(
1073 intptr_t start_reg,
1074 BlockLabel* on_no_match) {
1075 TAG();
1076 ASSERT(start_reg + 1 <= position_registers_.length());
1077
1078 BlockLabel fallthrough;
1079 BlockLabel success;
1080
1081 // Find length of back-referenced capture.
1082 PushArgumentInstr* end_push = PushLocal(position_register(start_reg + 1));
1083 PushArgumentInstr* start_push = PushLocal(position_register(start_reg));
1084 StoreLocal(capture_length_, Bind(Sub(end_push, start_push)));
1085
1086 // Fail on partial or illegal capture (start of capture after end of capture).
1087 BranchOrBacktrack(Comparison(kLT,
1088 LoadLocal(capture_length_),
1089 Uint64Constant(0)),
1090 on_no_match);
1091
1092 // Succeed on empty capture (including no capture)
1093 BranchOrBacktrack(Comparison(kEQ,
1094 LoadLocal(capture_length_),
1095 Uint64Constant(0)),
1096 &fallthrough);
1097
1098 // Check that there are sufficient characters left in the input.
1099 PushArgumentInstr* pos_push = PushLocal(current_position_);
1100 PushArgumentInstr* len_push = PushLocal(capture_length_);
1101 BranchOrBacktrack(
1102 Comparison(kGT,
1103 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
1104 pos_push,
1105 len_push),
1106 Uint64Constant(0)),
1107 on_no_match);
1108
1109 // Compute pointers to match string and capture string.
1110 pos_push = PushLocal(current_position_);
1111 len_push = PushLocal(string_param_length_);
1112 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push)));
1113
1114 pos_push = PushLocal(position_register(start_reg));
1115 len_push = PushLocal(string_param_length_);
1116 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push)));
1117
1118 pos_push = PushLocal(match_start_index_);
1119 len_push = PushLocal(capture_length_);
1120 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push)));
1121
1122 BlockLabel loop;
1123 BindBlock(&loop);
1124
1125 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
1126 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
1127
1128 BranchOrBacktrack(Comparison(kNE,
1129 LoadLocal(char_in_capture_),
1130 LoadLocal(char_in_match_)),
1131 on_no_match);
1132
1133 // Increment pointers into capture and match string.
1134 StoreLocal(capture_start_index_, Bind(Add(
1135 PushLocal(capture_start_index_),
1136 PushArgument(Bind(Uint64Constant(1))))));
1137 StoreLocal(match_start_index_, Bind(Add(
1138 PushLocal(match_start_index_),
1139 PushArgument(Bind(Uint64Constant(1))))));
1140
1141 // Check if we have reached end of match area.
1142 BranchOrBacktrack(Comparison(kLT,
1143 LoadLocal(match_start_index_),
1144 LoadLocal(match_end_index_)),
1145 &loop);
1146
1147 BindBlock(&success);
1148
1149 // Move current character position to position after match.
1150 PushArgumentInstr* match_end_push = PushLocal(match_end_index_);
1151 len_push = PushLocal(string_param_length_);
1152 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push)));
1153
1154 BindBlock(&fallthrough);
1155 }
1156
1157
1158 void IRRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
1159 BlockLabel* on_not_equal) {
1160 TAG();
1161 BranchOrBacktrack(Comparison(kNE,
1162 LoadLocal(current_character_),
1163 Uint64Constant(c)),
1164 on_not_equal);
1165 }
1166
1167
1168 void IRRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
1169 uint32_t mask,
1170 BlockLabel* on_equal) {
1171 TAG();
1172
1173 Definition* actual_def = LoadLocal(current_character_);
1174 Definition* expected_def = Uint64Constant(c);
1175
1176 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1177 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1178 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1179 actual_push,
1180 mask_push);
1181
1182 BranchOrBacktrack(Comparison(kEQ, actual_def, expected_def), on_equal);
1183 }
1184
1185
1186 void IRRegExpMacroAssembler::CheckNotCharacterAfterAnd(
1187 uint32_t c,
1188 uint32_t mask,
1189 BlockLabel* on_not_equal) {
1190 TAG();
1191
1192 Definition* actual_def = LoadLocal(current_character_);
1193 Definition* expected_def = Uint64Constant(c);
1194
1195 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1196 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1197 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1198 actual_push,
1199 mask_push);
1200
1201 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1202 }
1203
1204
1205 void IRRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
1206 uint16_t c,
1207 uint16_t minus,
1208 uint16_t mask,
1209 BlockLabel* on_not_equal) {
1210 TAG();
1211 ASSERT(minus < Utf16::kMaxCodeUnit); // NOLINT
1212
1213 Definition* actual_def = LoadLocal(current_character_);
1214 Definition* expected_def = Uint64Constant(c);
1215
1216 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1217 PushArgumentInstr* minus_push = PushArgument(Bind(Uint64Constant(minus)));
1218
1219 actual_push = PushArgument(Bind(Sub(actual_push, minus_push)));
1220 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1221 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1222 actual_push,
1223 mask_push);
1224
1225 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1226 }
1227
1228
1229 void IRRegExpMacroAssembler::CheckCharacterInRange(
1230 uint16_t from,
1231 uint16_t to,
1232 BlockLabel* on_in_range) {
1233 TAG();
1234 ASSERT(from <= to);
1235
1236 // TODO(jgruber): All range comparisons could be done cheaper with unsigned
1237 // compares. This pattern repeats in various places.
1238
1239 BlockLabel on_not_in_range;
1240 BranchOrBacktrack(Comparison(kLT,
1241 LoadLocal(current_character_),
1242 Uint64Constant(from)),
1243 &on_not_in_range);
1244 BranchOrBacktrack(Comparison(kGT,
1245 LoadLocal(current_character_),
1246 Uint64Constant(to)),
1247 &on_not_in_range);
1248 BranchOrBacktrack(NULL, on_in_range);
1249
1250 BindBlock(&on_not_in_range);
1251 }
1252
1253
1254 void IRRegExpMacroAssembler::CheckCharacterNotInRange(
1255 uint16_t from,
1256 uint16_t to,
1257 BlockLabel* on_not_in_range) {
1258 TAG();
1259 ASSERT(from <= to);
1260
1261 BranchOrBacktrack(Comparison(kLT,
1262 LoadLocal(current_character_),
1263 Uint64Constant(from)),
1264 on_not_in_range);
1265
1266 BranchOrBacktrack(Comparison(kGT,
1267 LoadLocal(current_character_),
1268 Uint64Constant(to)),
1269 on_not_in_range);
1270 }
1271
1272
1273 void IRRegExpMacroAssembler::CheckBitInTable(
1274 const TypedData& table,
1275 BlockLabel* on_bit_set) {
1276 TAG();
1277
1278 PushArgumentInstr* table_push =
1279 PushArgument(Bind(new(I) ConstantInstr(table)));
1280 PushArgumentInstr* index_push = PushLocal(current_character_);
1281
1282 if (mode_ != ASCII || kTableMask != Symbols::kMaxOneCharCodeSymbol) {
1283 PushArgumentInstr* mask_push =
1284 PushArgument(Bind(Uint64Constant(kTableSize - 1)));
1285 index_push = PushArgument(
1286 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1287 index_push,
1288 mask_push)));
1289 }
1290
1291 Definition* byte_def =
1292 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1293 table_push,
1294 index_push);
1295 Definition* zero_def = Int64Constant(0);
1296
1297 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_bit_set);
1298 }
1299
1300
1301 bool IRRegExpMacroAssembler::CheckSpecialCharacterClass(
1302 uint16_t type,
1303 BlockLabel* on_no_match) {
1304 TAG();
1305
1306 // Range checks (c in min..max) are generally implemented by an unsigned
1307 // (c - min) <= (max - min) check
1308 switch (type) {
1309 case 's':
1310 // Match space-characters
1311 if (mode_ == ASCII) {
1312 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
1313 BlockLabel success;
1314 // Space (' ').
1315 BranchOrBacktrack(Comparison(kEQ,
1316 LoadLocal(current_character_),
1317 Uint64Constant(' ')),
1318 &success);
1319 // Check range 0x09..0x0d.
1320 CheckCharacterInRange('\t', '\r', &success);
1321 // \u00a0 (NBSP).
1322 BranchOrBacktrack(Comparison(kNE,
1323 LoadLocal(current_character_),
1324 Uint64Constant(0x00a0)),
1325 on_no_match);
1326 BindBlock(&success);
1327 return true;
1328 }
1329 return false;
1330 case 'S':
1331 // The emitted code for generic character classes is good enough.
1332 return false;
1333 case 'd':
1334 // Match ASCII digits ('0'..'9')
1335 CheckCharacterNotInRange('0', '9', on_no_match);
1336 return true;
1337 case 'D':
1338 // Match non ASCII-digits
1339 CheckCharacterInRange('0', '9', on_no_match);
1340 return true;
1341 case '.': {
1342 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
1343 BranchOrBacktrack(Comparison(kEQ,
1344 LoadLocal(current_character_),
1345 Uint64Constant('\n')),
1346 on_no_match);
1347 BranchOrBacktrack(Comparison(kEQ,
1348 LoadLocal(current_character_),
1349 Uint64Constant('\r')),
1350 on_no_match);
1351 if (mode_ == UC16) {
1352 BranchOrBacktrack(Comparison(kEQ,
1353 LoadLocal(current_character_),
1354 Uint64Constant(0x2028)),
1355 on_no_match);
1356 BranchOrBacktrack(Comparison(kEQ,
1357 LoadLocal(current_character_),
1358 Uint64Constant(0x2029)),
1359 on_no_match);
1360 }
1361 return true;
1362 }
1363 case 'w': {
1364 if (mode_ != ASCII) {
1365 // Table is 128 entries, so all ASCII characters can be tested.
1366 BranchOrBacktrack(Comparison(kGT,
1367 LoadLocal(current_character_),
1368 Uint64Constant('z')),
1369 on_no_match);
1370 }
1371
1372 PushArgumentInstr* table_push =
1373 PushArgument(Bind(WordCharacterMapConstant()));
1374 PushArgumentInstr* index_push = PushLocal(current_character_);
1375
1376 Definition* byte_def =
1377 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1378 table_push,
1379 index_push);
1380 Definition* zero_def = Int64Constant(0);
1381
1382 BranchOrBacktrack(Comparison(kEQ, byte_def, zero_def), on_no_match);
1383
1384 return true;
1385 }
1386 case 'W': {
1387 BlockLabel done;
1388 if (mode_ != ASCII) {
1389 // Table is 128 entries, so all ASCII characters can be tested.
1390 BranchOrBacktrack(Comparison(kGT,
1391 LoadLocal(current_character_),
1392 Uint64Constant('z')),
1393 &done);
1394 }
1395
1396 // TODO(jgruber): Refactor to use CheckBitInTable if possible.
1397
1398 PushArgumentInstr* table_push =
1399 PushArgument(Bind(WordCharacterMapConstant()));
1400 PushArgumentInstr* index_push = PushLocal(current_character_);
1401
1402 Definition* byte_def =
1403 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1404 table_push,
1405 index_push);
1406 Definition* zero_def = Int64Constant(0);
1407
1408 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_no_match);
1409
1410 if (mode_ != ASCII) {
1411 BindBlock(&done);
1412 }
1413 return true;
1414 }
1415 // Non-standard classes (with no syntactic shorthand) used internally.
1416 case '*':
1417 // Match any character.
1418 return true;
1419 case 'n': {
1420 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
1421 // The opposite of '.'.
1422 BlockLabel success;
1423 BranchOrBacktrack(Comparison(kEQ,
1424 LoadLocal(current_character_),
1425 Uint64Constant('\n')),
1426 &success);
1427 BranchOrBacktrack(Comparison(kEQ,
1428 LoadLocal(current_character_),
1429 Uint64Constant('\r')),
1430 &success);
1431 if (mode_ == UC16) {
1432 BranchOrBacktrack(Comparison(kEQ,
1433 LoadLocal(current_character_),
1434 Uint64Constant(0x2028)),
1435 &success);
1436 BranchOrBacktrack(Comparison(kEQ,
1437 LoadLocal(current_character_),
1438 Uint64Constant(0x2029)),
1439 &success);
1440 }
1441 BranchOrBacktrack(NULL, on_no_match);
1442 BindBlock(&success);
1443 return true;
1444 }
1445 // No custom implementation (yet): s(uint16_t), S(uint16_t).
1446 default:
1447 return false;
1448 }
1449 }
1450
1451
1452 void IRRegExpMacroAssembler::Fail() {
1453 TAG();
1454 ASSERT(FAILURE == 0); // Return value for failure is zero.
1455 if (!global()) {
1456 UNREACHABLE(); // Dart regexps are always global.
1457 }
1458 GoTo(exit_block_);
1459 }
1460
1461
1462 void IRRegExpMacroAssembler::IfRegisterGE(intptr_t reg,
1463 intptr_t comparand,
1464 BlockLabel* if_ge) {
1465 TAG();
1466 BranchOrBacktrack(Comparison(kGTE,
1467 LoadLocal(position_register(reg)),
1468 Int64Constant(comparand)),
1469 if_ge);
1470 }
1471
1472
1473 void IRRegExpMacroAssembler::IfRegisterLT(intptr_t reg,
1474 intptr_t comparand,
1475 BlockLabel* if_lt) {
1476 TAG();
1477 BranchOrBacktrack(Comparison(kLT,
1478 LoadLocal(position_register(reg)),
1479 Int64Constant(comparand)),
1480 if_lt);
1481 }
1482
1483
1484 void IRRegExpMacroAssembler::IfRegisterEqPos(intptr_t reg,
1485 BlockLabel* if_eq) {
1486 TAG();
1487 BranchOrBacktrack(Comparison(kEQ,
1488 LoadLocal(position_register(reg)),
1489 LoadLocal(current_position_)),
1490 if_eq);
1491 }
1492
1493
1494 RegExpMacroAssembler::IrregexpImplementation
1495 IRRegExpMacroAssembler::Implementation() {
1496 return kIRImplementation;
1497 }
1498
1499
1500 void IRRegExpMacroAssembler::LoadCurrentCharacter(intptr_t cp_offset,
1501 BlockLabel* on_end_of_input,
1502 bool check_bounds,
1503 intptr_t characters) {
1504 TAG();
1505 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
1506 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
1507 if (check_bounds) {
1508 CheckPosition(cp_offset + characters - 1, on_end_of_input);
1509 }
1510 LoadCurrentCharacterUnchecked(cp_offset, characters);
1511 }
1512
1513
1514 void IRRegExpMacroAssembler::PopCurrentPosition() {
1515 TAG();
1516 StoreLocal(current_position_, PopStack());
1517 }
1518
1519
1520 void IRRegExpMacroAssembler::PopRegister(intptr_t register_index) {
1521 TAG();
1522 ASSERT(register_index < position_registers_.length());
1523 StoreLocal(position_register(register_index), PopStack());
1524 }
1525
1526
1527 void IRRegExpMacroAssembler::PushStack(Definition *definition) {
1528 PushArgumentInstr* stack_push = PushLocal(stack_);
1529 PushArgumentInstr* value_push = PushArgument(Bind(definition));
1530 Do(InstanceCall(InstanceCallDescriptor(Symbols::add()),
1531 stack_push,
1532 value_push));
1533 }
1534
1535
1536 Value* IRRegExpMacroAssembler::PopStack() {
1537 PushArgumentInstr* stack_push = PushLocal(stack_);
1538 return Bind(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
1539 stack_push));
1540 }
1541
1542
1543 // Pushes the location corresponding to label to the backtracking stack.
1544 void IRRegExpMacroAssembler::PushBacktrack(BlockLabel* label) {
1545 TAG();
1546
1547 // Ensure that targets of indirect jumps are never accessed through a
1548 // normal control flow instructions by creating a new block for each backtrack
1549 // target.
1550 IndirectEntryInstr* indirect_target = IndirectWithJoinGoto(label->block());
1551
1552 // Add a fake edge from the graph entry for data flow analysis.
1553 entry_block_->AddIndirectEntry(indirect_target);
1554
1555 ConstantInstr* offset = Uint64Constant(indirect_target->indirect_id());
1556 PushStack(offset);
1557 }
1558
1559
1560 void IRRegExpMacroAssembler::PushCurrentPosition() {
1561 TAG();
1562 PushStack(LoadLocal(current_position_));
1563 }
1564
1565
1566 void IRRegExpMacroAssembler::PushRegister(intptr_t register_index) {
1567 TAG();
1568 PushStack(LoadLocal(position_register(register_index)));
1569 }
1570
1571
1572 void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) {
1573 TAG();
1574 StoreLocal(current_position_, Bind(LoadLocal(position_register(reg))));
1575 }
1576
1577 // Resets the size of the stack to the value stored in reg.
1578 void IRRegExpMacroAssembler::ReadStackPointerFromRegister(intptr_t reg) {
1579 TAG();
1580 ASSERT(reg < position_registers_.length());
1581
1582 PushArgumentInstr* stack_push = PushLocal(stack_);
1583 PushArgumentInstr* length_push = PushLocal(position_register(reg));
1584
1585 Do(InstanceCall(InstanceCallDescriptor(
1586 String::ZoneHandle(
1587 I, Field::SetterSymbol(Symbols::Length()))),
1588 stack_push,
1589 length_push));
1590 }
1591
1592 void IRRegExpMacroAssembler::SetCurrentPositionFromEnd(intptr_t by) {
1593 TAG();
1594
1595 BlockLabel after_position;
1596
1597 Definition* cur_pos_def = LoadLocal(current_position_);
1598 Definition* by_value_def = Int64Constant(-by);
1599
1600 BranchOrBacktrack(Comparison(kGTE, cur_pos_def, by_value_def),
1601 &after_position);
1602
1603 StoreLocal(current_position_, Bind(Int64Constant(-by)));
1604
1605 // On RegExp code entry (where this operation is used), the character before
1606 // the current position is expected to be already loaded.
1607 // We have advanced the position, so it's safe to read backwards.
1608 LoadCurrentCharacterUnchecked(-1, 1);
1609
1610 BindBlock(&after_position);
1611 }
1612
1613
1614 void IRRegExpMacroAssembler::SetRegister(intptr_t register_index, intptr_t to) {
1615 TAG();
1616 // Reserved for positions!
1617 ASSERT(register_index >= position_registers_count_);
1618 StoreLocal(position_register(register_index), Bind(Int64Constant(to)));
1619 }
1620
1621
1622 bool IRRegExpMacroAssembler::Succeed() {
1623 TAG();
1624 GoTo(success_block_);
1625 return global();
1626 }
1627
1628
1629 void IRRegExpMacroAssembler::WriteCurrentPositionToRegister(
1630 intptr_t reg, intptr_t cp_offset) {
1631 TAG();
1632
1633 PushArgumentInstr* pos_push = PushLocal(current_position_);
1634 PushArgumentInstr* off_push =
1635 PushArgument(Bind(Int64Constant(cp_offset)));
1636
1637 // Push the negative offset; these are converted to positive string positions
1638 // within the success block.
1639 StoreLocal(position_register(reg), Bind(Add(pos_push, off_push)));
1640 }
1641
1642
1643 void IRRegExpMacroAssembler::ClearRegisters(
1644 intptr_t reg_from, intptr_t reg_to) {
1645 TAG();
1646
1647 ASSERT(reg_from <= reg_to);
1648 ASSERT(reg_to < position_registers_.length());
1649
1650 // In order to clear registers to a final result value of -1, set them to
1651 // (-1 - string length), the offset of -1 from the end of the string.
1652
1653 for (intptr_t reg = reg_from; reg <= reg_to; reg++) {
1654 PushArgumentInstr* minus_one_push =
1655 PushArgument(Bind(Int64Constant(-1)));
1656 PushArgumentInstr* length_push = PushLocal(string_param_length_);
1657
1658 StoreLocal(position_register(reg), Bind(Sub(minus_one_push, length_push)));
1659 }
1660 }
1661
1662
1663 void IRRegExpMacroAssembler::WriteStackPointerToRegister(intptr_t reg) {
1664 TAG();
1665
1666 PushArgumentInstr* stack_push = PushLocal(stack_);
1667 Value* length_value =
1668 Bind(InstanceCall(InstanceCallDescriptor(
1669 String::ZoneHandle(
1670 I, Field::GetterSymbol(Symbols::Length()))),
1671 stack_push));
1672
1673 StoreLocal(position_register(reg), length_value);
1674 }
1675
1676
1677 // Private methods:
1678
1679
1680 void IRRegExpMacroAssembler::CheckPosition(intptr_t cp_offset,
1681 BlockLabel* on_outside_input) {
1682 TAG();
1683 Definition* curpos_def = LoadLocal(current_position_);
1684 Definition* cp_off_def = Int64Constant(-cp_offset);
1685
1686 // If (current_position_ < -cp_offset), we are in bounds.
1687 // Remember, current_position_ is a negative offset from the string end.
1688
1689 BranchOrBacktrack(Comparison(kGTE, curpos_def, cp_off_def),
1690 on_outside_input);
1691 }
1692
1693
1694 void IRRegExpMacroAssembler::BranchOrBacktrack(
1695 ComparisonInstr* comparison,
1696 BlockLabel* true_successor) {
1697 if (comparison == NULL) { // No condition
1698 if (true_successor == NULL) {
1699 Backtrack();
1700 return;
1701 }
1702 GoTo(true_successor);
1703 return;
1704 }
1705
1706 // If no successor block has been passed in, backtrack.
1707 JoinEntryInstr* true_successor_block = backtrack_block_;
1708 if (true_successor != NULL) {
1709 true_successor->SetLinked();
1710 true_successor_block = true_successor->block();
1711 }
1712 ASSERT(true_successor_block != NULL);
1713
1714 // If the condition is not true, fall through to a new block.
1715 BlockLabel fallthrough;
1716
1717 BranchInstr* branch = new(I) BranchInstr(comparison);
1718 *branch->true_successor_address() =
1719 TargetWithJoinGoto(true_successor_block);
1720 *branch->false_successor_address() =
1721 TargetWithJoinGoto(fallthrough.block());
1722
1723 CloseBlockWith(branch);
1724 BindBlock(&fallthrough);
1725 }
1726
1727
1728 TargetEntryInstr* IRRegExpMacroAssembler::TargetWithJoinGoto(
1729 JoinEntryInstr* dst) {
1730 TargetEntryInstr* target = new(I) TargetEntryInstr(
1731 block_id_.Alloc(), kInvalidTryIndex);
1732 blocks_.Add(target);
1733
1734 target->AppendInstruction(new(I) GotoInstr(dst));
1735
1736 return target;
1737 }
1738
1739
1740 IndirectEntryInstr* IRRegExpMacroAssembler::IndirectWithJoinGoto(
1741 JoinEntryInstr* dst) {
1742 IndirectEntryInstr* target = new(I) IndirectEntryInstr(
1743 block_id_.Alloc(), indirect_id_.Alloc(), kInvalidTryIndex);
1744 blocks_.Add(target);
1745
1746 target->AppendInstruction(new(I) GotoInstr(dst));
1747
1748 return target;
1749 }
1750
1751
1752 void IRRegExpMacroAssembler::CheckPreemption() {
1753 TAG();
1754 AppendInstruction(new(I) CheckStackOverflowInstr(kNoTokenPos, 0));
1755 }
1756
1757
1758 Definition* IRRegExpMacroAssembler::Add(
1759 PushArgumentInstr* lhs,
1760 PushArgumentInstr* rhs) {
1761 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD), lhs, rhs);
1762 }
1763
1764
1765 Definition* IRRegExpMacroAssembler::Sub(
1766 PushArgumentInstr* lhs,
1767 PushArgumentInstr* rhs) {
1768 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kSUB), lhs, rhs);
1769 }
1770
1771
1772 static const String& codeUnitsAtName(intptr_t characters) {
1773 switch (characters) {
1774 case 1: return Library::PrivateCoreLibName(Symbols::_oneCodeUnitAt());
1775 case 2: return Library::PrivateCoreLibName(Symbols::_twoCodeUnitsAt());
1776 case 4: return Library::PrivateCoreLibName(Symbols::_fourCodeUnitsAt());
1777 }
1778 UNREACHABLE();
1779 return String::Handle();
1780 }
1781
1782
1783 void IRRegExpMacroAssembler::LoadCurrentCharacterUnchecked(
1784 intptr_t cp_offset, intptr_t characters) {
1785 TAG();
1786
1787 if (mode_ == ASCII) {
1788 ASSERT(characters == 1 || characters == 2 || characters == 4);
1789 } else {
1790 ASSERT(mode_ == UC16);
1791 ASSERT(characters == 1 || characters == 2);
1792 }
1793
1794 // LoadLocal pattern_param_
1795 // PushArgument()
1796 PushArgumentInstr* pattern_push = PushLocal(string_param_);
1797
1798 // Calculate the addressed string index as
1799 // cp_offset + current_position_ + string_param_length_
1800 PushArgumentInstr* cp_offset_push =
1801 PushArgument(Bind(Int64Constant(cp_offset)));
1802 PushArgumentInstr* cur_pos_push = PushLocal(current_position_);
1803
1804 PushArgumentInstr* partial_sum_push =
1805 PushArgument(Bind(Add(cp_offset_push, cur_pos_push)));
1806 PushArgumentInstr* length_push = PushLocal(string_param_length_);
1807
1808 PushArgumentInstr* pos_push =
1809 PushArgument(Bind(Add(partial_sum_push, length_push)));
1810
1811 // InstanceCall(codeUnitAt, t0, t0)
1812 const String& name = codeUnitsAtName(characters);
1813 Value* code_unit_value =
1814 Bind(InstanceCall(InstanceCallDescriptor(name),
1815 pattern_push,
1816 pos_push));
1817
1818 // StoreLocal(current_character_)
1819 StoreLocal(current_character_, code_unit_value);
1820
1821 PRINT(PushLocal(current_character_));
1822 }
1823
1824
1825 Value* IRRegExpMacroAssembler::CharacterAt(Definition* index) {
1826 PushArgumentInstr* pattern_push = PushLocal(string_param_);
1827 PushArgumentInstr* index_push = PushArgument(Bind(index));
1828
1829 return Bind(InstanceCall(InstanceCallDescriptor(codeUnitsAtName(1)),
1830 pattern_push,
1831 index_push));
1832 }
1833
1834
1835 #undef __
1836
1837 } // namespace dart
OLDNEW
« no previous file with comments | « runtime/vm/regexp_assembler.h ('k') | runtime/vm/regexp_ast.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698