Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(187)

Side by Side Diff: runtime/vm/regexp_assembler.cc

Issue 683433003: Integrate the Irregexp Regular Expression Engine. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: byte-order assert & context-var Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/regexp_assembler.h" 5 #include "vm/regexp_assembler.h"
6 6
7 // SNIP 7 #include "vm/bit_vector.h"
8 #include "vm/compiler.h"
9 #include "vm/dart_entry.h"
10 #include "vm/flow_graph_builder.h"
11 #include "vm/il_printer.h"
12 #include "vm/object_store.h"
13 #include "vm/regexp.h"
14 #include "vm/resolver.h"
15 #include "vm/stack_frame.h"
16 #include "vm/unibrow-inl.h"
17 #include "vm/unicode.h"
18
19 #define I isolate()
20
21 // Debugging output macros. TAG() is called at the head of each interesting
22 // function and prints its name during execution if irregexp tracing is enabled.
23 #define TAG() if (FLAG_trace_irregexp) { TAG_(); }
24 #define TAG_() \
25 Print(PushArgument( \
26 Bind(new(I) ConstantInstr(String::ZoneHandle(I, String::Concat( \
27 String::Handle(String::New("TAG: ")), \
28 String::Handle(String::New(__FUNCTION__)), Heap::kOld))))));
29
30 #define PRINT(arg) if (FLAG_trace_irregexp) { Print(arg); }
8 31
9 namespace dart { 32 namespace dart {
10 33
11 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone) 34 DEFINE_FLAG(bool, trace_irregexp, false, "Trace irregexps");
35
36
37 static const intptr_t kInvalidTryIndex = -1;
38 static const intptr_t kNoTokenPos = -1;
39
40
41 void PrintUtf16(uint16_t c) {
42 const char* format = (0x20 <= c && c <= 0x7F) ?
43 "%c" : (c <= 0xff) ? "\\x%02x" : "\\u%04x";
44 OS::Print(format, c);
45 }
46
47
48 /*
49 * This assembler uses the following main local variables:
50 * - stack_: A pointer to a growable list which we use as an all-purpose stack
51 * storing backtracking offsets, positions & stored register values.
52 * - current_character_: Stores the currently loaded characters (possibly more
53 * than one).
54 * - current_position_: The current position within the string, stored as a
55 * negative offset from the end of the string (i.e. the
56 * position corresponding to str[0] is -str.length).
57 * Note that current_position_ is *not* byte-based, unlike
58 * original V8 code.
59 *
60 * Results are returned though an array of capture indices, stored at
61 * matches_param_. A null array specifies a failure to match. The match indices
62 * [start_inclusive, end_exclusive] for capture group i are stored at positions
63 * matches_param_[i * 2] and matches_param_[i * 2 + 1], respectively. Match
64 * indices of -1 denote non-matched groups. Note that we store these indices
65 * as a negative offset from the end of the string in position_registers_
66 * during processing, and convert them to standard indexes when copying them
67 * to matches_param_ on successful match.
68 */
69
70 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate)
12 : slow_safe_compiler_(false), 71 : slow_safe_compiler_(false),
13 global_mode_(NOT_GLOBAL), 72 global_mode_(NOT_GLOBAL),
14 zone_(zone) { 73 isolate_(isolate) {
15 } 74 }
16 75
17 76
18 RegExpMacroAssembler::~RegExpMacroAssembler() { 77 RegExpMacroAssembler::~RegExpMacroAssembler() {
19 } 78 }
20 79
21 // SNIP 80
81 IRRegExpMacroAssembler::IRRegExpMacroAssembler(
82 intptr_t specialization_cid,
83 intptr_t capture_count,
84 const ParsedFunction* parsed_function,
85 const ZoneGrowableArray<const ICData*>& ic_data_array,
86 Isolate* isolate)
87 : RegExpMacroAssembler(isolate),
88 specialization_cid_(specialization_cid),
89 parsed_function_(parsed_function),
90 ic_data_array_(ic_data_array),
91 current_instruction_(NULL),
92 stack_(NULL),
93 current_character_(NULL),
94 current_position_(NULL),
95 string_param_(NULL),
96 string_param_length_(NULL),
97 start_index_param_(NULL),
98 position_registers_count_((capture_count + 1) * 2),
99 stack_array_(GrowableObjectArray::ZoneHandle(
100 isolate, GrowableObjectArray::New(16, Heap::kOld))) {
101 switch (specialization_cid) {
102 case kOneByteStringCid:
103 case kExternalOneByteStringCid: mode_ = ASCII; break;
104 case kTwoByteStringCid:
105 case kExternalTwoByteStringCid: mode_ = UC16; break;
106 default: UNREACHABLE();
107 }
108
109 InitializeLocals();
110
111 // Create and generate all preset blocks.
112 entry_block_ =
113 new(isolate) GraphEntryInstr(
114 parsed_function_,
115 new(isolate) TargetEntryInstr(block_id_.Alloc(), kInvalidTryIndex),
116 Isolate::kNoDeoptId);
117 start_block_ =
118 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
119 success_block_ =
120 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
121 backtrack_block_ =
122 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
123 exit_block_ =
124 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
125
126 GenerateEntryBlock();
127 GenerateSuccessBlock();
128 GenerateBacktrackBlock();
129 GenerateExitBlock();
130
131 blocks_.Add(entry_block_);
132 blocks_.Add(entry_block_->normal_entry());
133 blocks_.Add(start_block_);
134 blocks_.Add(success_block_);
135 blocks_.Add(backtrack_block_);
136 blocks_.Add(exit_block_);
137
138 // Begin emission at the start_block_.
139 set_current_instruction(start_block_);
140 }
141
142
143 IRRegExpMacroAssembler::~IRRegExpMacroAssembler() { }
144
145
146 void IRRegExpMacroAssembler::InitializeLocals() {
147 // All generated functions are expected to have a current-context variable.
148 // This variable is unused in irregexp functions.
149 parsed_function_->current_context_var()->set_index(GetNextLocalIndex());
150
151 // Create local variables and parameters.
152 stack_ = Local(Symbols::stack_());
153 current_character_ = Local(Symbols::current_character_());
154 current_position_ = Local(Symbols::current_position_());
155 string_param_length_ = Local(Symbols::string_param_length_());
156 capture_length_ = Local(Symbols::capture_length_());
157 match_start_index_ = Local(Symbols::match_start_index_());
158 capture_start_index_ = Local(Symbols::capture_start_index_());
159 match_end_index_ = Local(Symbols::match_end_index_());
160 char_in_capture_ = Local(Symbols::char_in_capture_());
161 char_in_match_ = Local(Symbols::char_in_match_());
162 result_ = Local(Symbols::result_());
163
164 string_param_ = Parameter(Symbols::string_param_(), 0);
165 start_index_param_ = Parameter(Symbols::start_index_param_(), 1);
166
167 // Reserve space for all captured group positions. Note that more might
168 // be created on the fly for internal use.
169 for (intptr_t i = 0; i < position_registers_count_; i++) {
170 position_register(i);
171 }
172 }
173
174
175 void IRRegExpMacroAssembler::GenerateEntryBlock() {
176 set_current_instruction(entry_block_->normal_entry());
177 TAG();
178
179 // Generate a local list variable which we will use as a backtracking stack.
180
181 StoreLocal(stack_, Bind(new(I) ConstantInstr(stack_array_)));
182 Do(InstanceCall(InstanceCallDescriptor(Symbols::clear()), PushLocal(stack_)));
183
184 // Store string.length.
185 PushArgumentInstr* string_push = PushLocal(string_param_);
186
187 StoreLocal(string_param_length_,
188 Bind(InstanceCall(InstanceCallDescriptor(
189 String::ZoneHandle(
190 Field::GetterSymbol(Symbols::Length()))),
191 string_push)));
192
193 // Initialize all capture registers.
194 ClearRegisters(0, position_registers_count_ - 1);
195
196 // Store (start_index - string.length) as the current position (since it's a
197 // negative offset from the end of the string).
198 PushArgumentInstr* start_index_push = PushLocal(start_index_param_);
199 PushArgumentInstr* length_push = PushLocal(string_param_length_);
200
201 StoreLocal(current_position_, Bind(Sub(start_index_push, length_push)));
202
203 // Jump to the start block.
204 current_instruction_->Goto(start_block_);
205 }
206
207
208 void IRRegExpMacroAssembler::GenerateBacktrackBlock() {
209 set_current_instruction(backtrack_block_);
210 TAG();
211 Backtrack();
212 }
213
214
215 void IRRegExpMacroAssembler::GenerateSuccessBlock() {
216 set_current_instruction(success_block_);
217 TAG();
218
219 Definition* type_args_null_def = new(I) ConstantInstr(
220 TypeArguments::ZoneHandle(I, TypeArguments::null()));
221 PushArgumentInstr* type_arg_push = PushArgument(Bind(type_args_null_def));
222 PushArgumentInstr* length_push =
223 PushArgument(Bind(Uint64Constant(position_registers_count_)));
224
225 const Library& lib = Library::Handle(Library::CoreLibrary());
226 const Class& list_class = Class::Handle(
227 lib.LookupCoreClass(Symbols::List()));
228 const Function& list_ctor =
229 Function::ZoneHandle(I, list_class.LookupFactory(Symbols::ListFactory()));
230
231 // TODO(zerny): Use CreateArrayInstr and StoreIndexed instead.
232 StoreLocal(result_, Bind(StaticCall(list_ctor, type_arg_push, length_push)));
233
234 // Store captured offsets in the `matches` parameter.
235 // TODO(zerny): Eliminate position_register locals and access `matches`
236 // directly.
237 for (intptr_t i = 0; i < position_registers_count_; i++) {
238 PushArgumentInstr* matches_push = PushLocal(result_);
239 PushArgumentInstr* index_push = PushArgument(Bind(Uint64Constant(i)));
240
241 // Convert negative offsets from the end of the string to string indices.
242 PushArgumentInstr* offset_push = PushLocal(position_register(i));
243 PushArgumentInstr* len_push = PushLocal(string_param_length_);
244 PushArgumentInstr* value_push =
245 PushArgument(Bind(Add(offset_push, len_push)));
246
247 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
248 matches_push,
249 index_push,
250 value_push));
251 }
252
253 // Print the result if tracing.
254 PRINT(PushLocal(result_));
255
256 // Return true on success.
257 AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(LoadLocal(result_))));
258 }
259
260
261 void IRRegExpMacroAssembler::GenerateExitBlock() {
262 set_current_instruction(exit_block_);
263 TAG();
264
265 // Return false on failure.
266 AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(LoadLocal(result_))));
267 }
268
269
270 #if defined(TARGET_ARCH_ARM64) || \
271 defined(TARGET_ARCH_ARM) || \
272 defined(TARGET_ARCH_MIPS)
273 // Disabling unaligned accesses forces the regexp engine to load characters one
274 // by one instead of up to 4 at once, along with the associated performance hit.
275 // TODO(zerny): Be less conservative about disabling unaligned accesses.
276 // For instance, ARMv6 supports unaligned accesses. Once it is enabled here,
277 // update LoadCodeUnitsInstr methods for the appropriate architectures.
278 static const bool kEnableUnalignedAccesses = false;
279 #else
280 static const bool kEnableUnalignedAccesses = true;
281 #endif
282 bool IRRegExpMacroAssembler::CanReadUnaligned() {
283 return kEnableUnalignedAccesses && !slow_safe();
284 }
285
286
287 RawArray* IRRegExpMacroAssembler::Execute(
288 const Function& function,
289 const String& input,
290 const Smi& start_offset,
291 Isolate* isolate) {
292 // Create the argument list.
293 const Array& args = Array::Handle(Array::New(2));
294 args.SetAt(0, input);
295 args.SetAt(1, start_offset);
296
297 // And finally call the generated code.
298
299 const Object& retval =
300 Object::Handle(isolate, DartEntry::InvokeFunction(function, args));
301 if (retval.IsError()) {
302 const Error& error = Error::Cast(retval);
303 OS::Print("%s\n", error.ToErrorCString());
304 // Should never happen.
305 UNREACHABLE();
306 }
307
308 if (retval.IsNull()) {
309 return Array::null();
310 }
311
312 ASSERT(retval.IsArray());
313 return Array::Cast(retval).raw();
314 }
315
316
317 RawBool* IRRegExpMacroAssembler::CaseInsensitiveCompareUC16(
318 RawString* str_raw,
319 RawSmi* lhs_index_raw,
320 RawSmi* rhs_index_raw,
321 RawSmi* length_raw) {
322 const String& str = String::Handle(str_raw);
323 const Smi& lhs_index = Smi::Handle(lhs_index_raw);
324 const Smi& rhs_index = Smi::Handle(rhs_index_raw);
325 const Smi& length = Smi::Handle(length_raw);
326
327 // TODO(zerny): Optimize as single instance. V8 has this as an
328 // isolate member.
329 unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
330
331 for (intptr_t i = 0; i < length.Value(); i++) {
332 int32_t c1 = str.CharAt(lhs_index.Value() + i);
333 int32_t c2 = str.CharAt(rhs_index.Value() + i);
334 if (c1 != c2) {
335 int32_t s1[1] = { c1 };
336 canonicalize.get(c1, '\0', s1);
337 if (s1[0] != c2) {
338 int32_t s2[1] = { c2 };
339 canonicalize.get(c2, '\0', s2);
340 if (s1[0] != s2[0]) {
341 return Bool::False().raw();
342 }
343 }
344 }
345 }
346 return Bool::True().raw();
347 }
348
349
350 LocalVariable* IRRegExpMacroAssembler::Parameter(const String& name,
351 intptr_t index) const {
352 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
353 LocalVariable* local =
354 new(I) LocalVariable(kNoTokenPos, name, local_type);
355
356 intptr_t param_frame_index = kParamEndSlotFromFp + kParamCount - index;
357 local->set_index(param_frame_index);
358
359 return local;
360 }
361
362
363 LocalVariable* IRRegExpMacroAssembler::Local(const String& name) {
364 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
365 LocalVariable* local =
366 new(I) LocalVariable(kNoTokenPos, name, local_type);
367 local->set_index(GetNextLocalIndex());
368
369 return local;
370 }
371
372
373 ConstantInstr* IRRegExpMacroAssembler::Int64Constant(int64_t value) const {
374 return new(I) ConstantInstr(
375 Integer::ZoneHandle(I, Integer::New(value, Heap::kOld)));
376 }
377
378
379 ConstantInstr* IRRegExpMacroAssembler::Uint64Constant(uint64_t value) const {
380 return new(I) ConstantInstr(
381 Integer::ZoneHandle(I, Integer::NewFromUint64(value, Heap::kOld)));
382 }
383
384
385 ConstantInstr* IRRegExpMacroAssembler::BoolConstant(bool value) const {
386 return new(I) ConstantInstr(value ? Bool::True() : Bool::False());
387 }
388
389
390 ConstantInstr* IRRegExpMacroAssembler::StringConstant(const char* value) const {
391 return new(I) ConstantInstr(
392 String::ZoneHandle(I, String::New(value, Heap::kOld)));
393 }
394
395
396 ConstantInstr* IRRegExpMacroAssembler::WordCharacterMapConstant() const {
397 const Library& lib = Library::Handle(I, Library::CoreLibrary());
398 const Class& regexp_class = Class::Handle(I,
399 lib.LookupClassAllowPrivate(Symbols::JSSyntaxRegExp()));
400 const Field& word_character_field = Field::ZoneHandle(I,
401 regexp_class.LookupStaticField(Symbols::_wordCharacterMap()));
402 ASSERT(!word_character_field.IsNull());
403
404 if (word_character_field.IsUninitialized()) {
405 word_character_field.EvaluateInitializer();
406 }
407 ASSERT(!word_character_field.IsUninitialized());
408
409 return new(I) ConstantInstr(
410 Instance::ZoneHandle(I, word_character_field.value()));
411 }
412
413
414 ComparisonInstr* IRRegExpMacroAssembler::Comparison(
415 ComparisonKind kind, Definition* lhs, Definition* rhs) {
416 Token::Kind strict_comparison = Token::kEQ_STRICT;
417 Token::Kind intermediate_operator = Token::kILLEGAL;
418 switch (kind) {
419 case kEQ:
420 intermediate_operator = Token::kEQ;
421 break;
422 case kNE:
423 intermediate_operator = Token::kEQ;
424 strict_comparison = Token::kNE_STRICT;
425 break;
426 case kLT:
427 intermediate_operator = Token::kLT;
428 break;
429 case kGT:
430 intermediate_operator = Token::kGT;
431 break;
432 case kLTE:
433 intermediate_operator = Token::kLTE;
434 break;
435 case kGTE:
436 intermediate_operator = Token::kGTE;
437 break;
438 default:
439 UNREACHABLE();
440 }
441
442 ASSERT(intermediate_operator != Token::kILLEGAL);
443
444 PushArgumentInstr* lhs_push = PushArgument(Bind(lhs));
445 PushArgumentInstr* rhs_push = PushArgument(Bind(rhs));
446
447 Value* lhs_value =
448 Bind(InstanceCall(
449 InstanceCallDescriptor::FromToken(intermediate_operator),
450 lhs_push,
451 rhs_push));
452 Value* rhs_value = Bind(BoolConstant(true));
453
454 return new(I) StrictCompareInstr(kNoTokenPos, strict_comparison,
455 lhs_value, rhs_value, true);
456 }
457
458
459 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
460 const Function& function) const {
461 ZoneGrowableArray<PushArgumentInstr*>* arguments =
462 new(I) ZoneGrowableArray<PushArgumentInstr*>(0);
463 return StaticCall(function, arguments);
464 }
465
466
467 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
468 const Function& function,
469 PushArgumentInstr* arg1) const {
470 ZoneGrowableArray<PushArgumentInstr*>* arguments =
471 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
472 arguments->Add(arg1);
473
474 return StaticCall(function, arguments);
475 }
476
477
478 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
479 const Function& function,
480 PushArgumentInstr* arg1,
481 PushArgumentInstr* arg2) const {
482 ZoneGrowableArray<PushArgumentInstr*>* arguments =
483 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
484 arguments->Add(arg1);
485 arguments->Add(arg2);
486
487 return StaticCall(function, arguments);
488 }
489
490
491 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
492 const Function& function,
493 ZoneGrowableArray<PushArgumentInstr*>* arguments) const {
494 return new(I) StaticCallInstr(kNoTokenPos,
495 function,
496 Object::null_array(),
497 arguments,
498 ic_data_array_);
499 }
500
501
502 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
503 const InstanceCallDescriptor& desc,
504 PushArgumentInstr* arg1) const {
505 ZoneGrowableArray<PushArgumentInstr*>* arguments =
506 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
507 arguments->Add(arg1);
508
509 return InstanceCall(desc, arguments);
510 }
511
512
513 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
514 const InstanceCallDescriptor& desc,
515 PushArgumentInstr* arg1,
516 PushArgumentInstr* arg2) const {
517 ZoneGrowableArray<PushArgumentInstr*>* arguments =
518 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
519 arguments->Add(arg1);
520 arguments->Add(arg2);
521
522 return InstanceCall(desc, arguments);
523 }
524
525
526 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
527 const InstanceCallDescriptor& desc,
528 PushArgumentInstr* arg1,
529 PushArgumentInstr* arg2,
530 PushArgumentInstr* arg3) const {
531 ZoneGrowableArray<PushArgumentInstr*>* arguments =
532 new(I) ZoneGrowableArray<PushArgumentInstr*>(3);
533 arguments->Add(arg1);
534 arguments->Add(arg2);
535 arguments->Add(arg3);
536
537 return InstanceCall(desc, arguments);
538 }
539
540
541 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
542 const InstanceCallDescriptor& desc,
543 ZoneGrowableArray<PushArgumentInstr*> *arguments) const {
544 return
545 new(I) InstanceCallInstr(kNoTokenPos,
546 desc.name,
547 desc.token_kind,
548 arguments,
549 Object::null_array(),
550 desc.checked_argument_count,
551 ic_data_array_);
552 }
553
554
555 LoadLocalInstr* IRRegExpMacroAssembler::LoadLocal(LocalVariable* local) const {
556 return new(I) LoadLocalInstr(*local);
557 }
558
559
560 void IRRegExpMacroAssembler::StoreLocal(LocalVariable* local,
561 Value* value) {
562 Do(new(I) StoreLocalInstr(*local, value));
563 }
564
565
566 void IRRegExpMacroAssembler::set_current_instruction(Instruction* instruction) {
567 current_instruction_ = instruction;
568 }
569
570
571 Value* IRRegExpMacroAssembler::Bind(Definition* definition) {
572 AppendInstruction(definition);
573 definition->set_temp_index(temp_id_.Alloc());
574
575 return new(I) Value(definition);
576 }
577
578
579 void IRRegExpMacroAssembler::Do(Definition* definition) {
580 AppendInstruction(definition);
581 }
582
583
584 Value* IRRegExpMacroAssembler::BindLoadLocal(const LocalVariable& local) {
585 if (local.IsConst()) {
586 return Bind(new(I) ConstantInstr(*local.ConstValue()));
587 }
588 ASSERT(!local.is_captured());
589 return Bind(new(I) LoadLocalInstr(local));
590 }
591
592
593 // In some cases, the V8 irregexp engine generates unreachable code by emitting
594 // a jmp not followed by a bind. We cannot do the same, since it is impossible
595 // to append to a block following a jmp. In such cases, assume that we are doing
596 // the correct thing, but output a warning when tracing.
597 #define HANDLE_DEAD_CODE_EMISSION() \
598 if (current_instruction_ == NULL) { \
599 if (FLAG_trace_irregexp) { \
600 OS::Print("WARNING: Attempting to append to a closed assembler. " \
601 "This could be either a bug or generation of dead code " \
602 "inherited from V8.\n"); \
603 } \
604 BlockLabel dummy; \
605 BindBlock(&dummy); \
606 }
607
608 void IRRegExpMacroAssembler::AppendInstruction(Instruction* instruction) {
609 HANDLE_DEAD_CODE_EMISSION();
610
611 ASSERT(current_instruction_ != NULL);
612 ASSERT(current_instruction_->next() == NULL);
613
614 temp_id_.Dealloc(instruction->InputCount());
615 arg_id_.Dealloc(instruction->ArgumentCount());
616
617 current_instruction_->LinkTo(instruction);
618 set_current_instruction(instruction);
619 }
620
621
622 void IRRegExpMacroAssembler::CloseBlockWith(Instruction* instruction) {
623 HANDLE_DEAD_CODE_EMISSION();
624
625 ASSERT(current_instruction_ != NULL);
626 ASSERT(current_instruction_->next() == NULL);
627
628 temp_id_.Dealloc(instruction->InputCount());
629 arg_id_.Dealloc(instruction->ArgumentCount());
630
631 current_instruction_->LinkTo(instruction);
632 set_current_instruction(NULL);
633 }
634
635
636 void IRRegExpMacroAssembler::GoTo(BlockLabel* to) {
637 if (to == NULL) {
638 Backtrack();
639 } else {
640 to->SetLinked();
641 GoTo(to->block());
642 }
643 }
644
645
646 // Closes the current block with a goto, and unsets current_instruction_.
647 // BindBlock() must be called before emission can continue.
648 void IRRegExpMacroAssembler::GoTo(JoinEntryInstr* to) {
649 HANDLE_DEAD_CODE_EMISSION();
650
651 ASSERT(current_instruction_ != NULL);
652 ASSERT(current_instruction_->next() == NULL);
653 current_instruction_->Goto(to);
654 set_current_instruction(NULL);
655 }
656
657
658 PushArgumentInstr* IRRegExpMacroAssembler::PushArgument(Value* value) {
659 arg_id_.Alloc();
660 PushArgumentInstr* push = new(I) PushArgumentInstr(value);
661 // Do *not* use Do() for push argument instructions.
662 AppendInstruction(push);
663 return push;
664 }
665
666
667 PushArgumentInstr* IRRegExpMacroAssembler::PushLocal(LocalVariable* local) {
668 return PushArgument(Bind(LoadLocal(local)));
669 }
670
671
672 void IRRegExpMacroAssembler::Print(const char* str) {
673 Print(PushArgument(
674 Bind(new(I) ConstantInstr(
675 String::ZoneHandle(I, String::New(str, Heap::kOld))))));
676 }
677
678
679 void IRRegExpMacroAssembler::Print(PushArgumentInstr* argument) {
680 const Library& lib = Library::Handle(Library::CoreLibrary());
681 const Function& print_fn = Function::ZoneHandle(
682 I, lib.LookupFunctionAllowPrivate(Symbols::print()));
683 Do(StaticCall(print_fn, argument));
684 }
685
686
687 void IRRegExpMacroAssembler::PrintBlocks() {
688 for (intptr_t i = 0; i < blocks_.length(); i++) {
689 FlowGraphPrinter::PrintBlock(blocks_[i], false);
690 }
691 }
692
693
694 intptr_t IRRegExpMacroAssembler::stack_limit_slack() {
695 return 32;
696 }
697
698
699 void IRRegExpMacroAssembler::AdvanceCurrentPosition(intptr_t by) {
700 TAG();
701 if (by != 0) {
702 PushArgumentInstr* cur_pos_push = PushLocal(current_position_);
703 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
704
705 Value* new_pos_value = Bind(Add(cur_pos_push, by_push));
706 StoreLocal(current_position_, new_pos_value);
707 }
708 }
709
710
711 void IRRegExpMacroAssembler::AdvanceRegister(intptr_t reg, intptr_t by) {
712 TAG();
713 ASSERT(reg >= 0);
714 ASSERT(reg < position_registers_.length());
715
716 if (by != 0) {
717 PushArgumentInstr* reg_push = PushLocal(position_register(reg));
718 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
719 StoreLocal(position_register(reg), Bind(Add(reg_push, by_push)));
720 }
721 }
722
723
724 void IRRegExpMacroAssembler::Backtrack() {
725 TAG();
726 CheckPreemption();
727
728 GrowableObjectArray& offsets = GrowableObjectArray::ZoneHandle(
729 I, GrowableObjectArray::New(Heap::kOld));
730
731 PushArgumentInstr* block_offsets_push =
732 PushArgument(Bind(new(I) ConstantInstr(offsets)));
733 PushArgumentInstr* block_id_push = PushArgument(PopStack());
734
735 Value* offset_value =
736 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
737 block_offsets_push,
738 block_id_push));
739
740 IndirectGotoInstr* igoto = new(I) IndirectGotoInstr(&offsets, offset_value);
741 CloseBlockWith(igoto);
742 igotos_.Add(igoto);
743 }
744
745
746 // A BindBlock is analogous to assigning a label to a basic block.
747 // If the BlockLabel does not yet contain a block, it is created.
748 // If there is a current instruction, append a goto to the bound block.
749 void IRRegExpMacroAssembler::BindBlock(BlockLabel* label) {
750 ASSERT(!label->IsBound());
751 ASSERT(label->block()->next() == NULL);
752
753 label->SetBound(block_id_.Alloc());
754 blocks_.Add(label->block());
755
756 if (current_instruction_ != NULL) {
757 GoTo(label);
758 }
759 set_current_instruction(label->block());
760
761 // Print the id of the current block if tracing.
762 PRINT(PushArgument(Bind(Uint64Constant(label->block()->block_id()))));
763 }
764
765
766 intptr_t IRRegExpMacroAssembler::GetNextLocalIndex() {
767 intptr_t id = local_id_.Alloc();
768 return kFirstLocalSlotFromFp - id;
769 }
770
771
772 LocalVariable* IRRegExpMacroAssembler::position_register(intptr_t index) {
773 // Create position registers as needed.
774 for (intptr_t i = position_registers_.length(); i < index + 1; i++) {
775 position_registers_.Add(Local(Symbols::position_registers_()));
776 }
777
778 return position_registers_[index];
779 }
780
781
782 // TODO(zerny): Move the offset table outside to avoid having to keep
783 // the assembler around until after code generation; both function or regexp
784 // would work.
785 void IRRegExpMacroAssembler::FinalizeBlockOffsetTable() {
786 for (intptr_t i = 0; i < igotos_.length(); i++) {
787 IndirectGotoInstr* igoto = igotos_[i];
788 igoto->SetOffsetCount(I, indirect_id_.Count());
789
790 for (intptr_t j = 0; j < igoto->SuccessorCount(); j++) {
791 TargetEntryInstr* target = igoto->SuccessorAt(j);
792
793 // Optimizations might have modified the immediate target block, but
794 // it must end with a goto to the indirect entry.
795 Instruction* instr = target;
796 while (instr != NULL && !instr->IsGoto()) {
797 instr = instr->next();
798 }
799 ASSERT(instr->IsGoto());
800
801 IndirectEntryInstr* ientry =
802 instr->AsGoto()->successor()->AsIndirectEntry();
803 ASSERT(ientry != NULL);
804
805 // The intermediate block was possibly compacted, check both it and the
806 // final indirect entry for a valid offset. If neither are valid, then
807 // the indirect entry is unreachable.
808 intptr_t offset =
809 (target->offset() > 0) ? target->offset() : ientry->offset();
810 if (offset > 0) {
811 intptr_t adjusted_offset =
812 offset - Assembler::EntryPointToPcMarkerOffset();
813 igoto->SetOffsetAt(I, ientry->indirect_id(), adjusted_offset);
814 }
815 }
816 }
817 }
818
819 void IRRegExpMacroAssembler::FinalizeIndirectGotos() {
820 for (intptr_t i = 0; i < igotos_.length(); i++) {
821 for (intptr_t j = 0; j < entry_block_->indirect_entries().length(); j++) {
822 igotos_.At(i)->AddSuccessor(
823 TargetWithJoinGoto(entry_block_->indirect_entries().At(j)));
824 }
825 }
826 }
827
828
829 void IRRegExpMacroAssembler::CheckCharacter(uint32_t c, BlockLabel* on_equal) {
830 TAG();
831 Definition* cur_char_def = LoadLocal(current_character_);
832 Definition* char_def = Uint64Constant(c);
833
834 BranchOrBacktrack(Comparison(kEQ, cur_char_def, char_def),
835 on_equal);
836 }
837
838
839 void IRRegExpMacroAssembler::CheckCharacterGT(uint16_t limit,
840 BlockLabel* on_greater) {
841 TAG();
842 BranchOrBacktrack(Comparison(kGT,
843 LoadLocal(current_character_),
844 Uint64Constant(limit)),
845 on_greater);
846 }
847
848
849 void IRRegExpMacroAssembler::CheckAtStart(BlockLabel* on_at_start) {
850 TAG();
851
852 BlockLabel not_at_start;
853
854 // Did we start the match at the start of the string at all?
855 BranchOrBacktrack(Comparison(kNE,
856 LoadLocal(start_index_param_),
857 Uint64Constant(0)),
858 &not_at_start);
859
860 // If we did, are we still at the start of the input, i.e. is
861 // (offset == string_length * -1)?
862 Definition* neg_len_def =
863 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
864 PushLocal(string_param_length_));
865 Definition* offset_def = LoadLocal(current_position_);
866 BranchOrBacktrack(Comparison(kEQ, neg_len_def, offset_def),
867 on_at_start);
868
869 BindBlock(&not_at_start);
870 }
871
872
873 void IRRegExpMacroAssembler::CheckNotAtStart(BlockLabel* on_not_at_start) {
874 TAG();
875
876 // Did we start the match at the start of the string at all?
877 BranchOrBacktrack(Comparison(kNE,
878 LoadLocal(start_index_param_),
879 Uint64Constant(0)),
880 on_not_at_start);
881
882 // If we did, are we still at the start of the input, i.e. is
883 // (offset == string_length * -1)?
884 Definition* neg_len_def =
885 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
886 PushLocal(string_param_length_));
887 Definition* offset_def = LoadLocal(current_position_);
888 BranchOrBacktrack(Comparison(kNE, neg_len_def, offset_def),
889 on_not_at_start);
890 }
891
892
893 void IRRegExpMacroAssembler::CheckCharacterLT(uint16_t limit,
894 BlockLabel* on_less) {
895 TAG();
896 BranchOrBacktrack(Comparison(kLT,
897 LoadLocal(current_character_),
898 Uint64Constant(limit)),
899 on_less);
900 }
901
902
903 void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) {
904 TAG();
905
906 BlockLabel fallthrough;
907
908 PushArgumentInstr* stack_push = PushLocal(stack_);
909 Definition* stack_tip_def = InstanceCall(
910 InstanceCallDescriptor(String::ZoneHandle(
911 I, Field::GetterSymbol(Symbols::last()))),
912 stack_push);
913 Definition* cur_pos_def = LoadLocal(current_position_);
914
915 BranchOrBacktrack(Comparison(kNE, stack_tip_def, cur_pos_def),
916 &fallthrough);
917
918 // Pop, throwing away the value.
919 stack_push = PushLocal(stack_);
920 Do(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
921 stack_push));
922
923 BranchOrBacktrack(NULL, on_equal);
924
925 BindBlock(&fallthrough);
926 }
927
928
929 void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
930 intptr_t start_reg,
931 BlockLabel* on_no_match) {
932 TAG();
933 ASSERT(start_reg + 1 <= position_registers_.length());
934
935 BlockLabel fallthrough;
936
937 PushArgumentInstr* end_push = PushLocal(position_register(start_reg + 1));
938 PushArgumentInstr* start_push = PushLocal(position_register(start_reg));
939 StoreLocal(capture_length_, Bind(Sub(end_push, start_push)));
940
941 // The length of a capture should not be negative. This can only happen
942 // if the end of the capture is unrecorded, or at a point earlier than
943 // the start of the capture.
944 // BranchOrBacktrack(less, on_no_match);
945
946 BranchOrBacktrack(Comparison(kLT,
947 LoadLocal(capture_length_),
948 Uint64Constant(0)),
949 on_no_match);
950
951 // If length is zero, either the capture is empty or it is completely
952 // uncaptured. In either case succeed immediately.
953 BranchOrBacktrack(Comparison(kEQ,
954 LoadLocal(capture_length_),
955 Uint64Constant(0)),
956 &fallthrough);
957
958
959 // Check that there are sufficient characters left in the input.
960 PushArgumentInstr* pos_push = PushLocal(current_position_);
961 PushArgumentInstr* len_push = PushLocal(capture_length_);
962 BranchOrBacktrack(
963 Comparison(kGT,
964 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
965 pos_push,
966 len_push),
967 Uint64Constant(0)),
968 on_no_match);
969
970 pos_push = PushLocal(current_position_);
971 len_push = PushLocal(string_param_length_);
972 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push)));
973
974 pos_push = PushLocal(position_register(start_reg));
975 len_push = PushLocal(string_param_length_);
976 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push)));
977
978 pos_push = PushLocal(match_start_index_);
979 len_push = PushLocal(capture_length_);
980 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push)));
981
982 BlockLabel success;
983 if (mode_ == ASCII) {
984 BlockLabel loop_increment;
985 BlockLabel loop;
986 BindBlock(&loop);
987
988 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
989 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
990
991 BranchOrBacktrack(Comparison(kEQ,
992 LoadLocal(char_in_capture_),
993 LoadLocal(char_in_match_)),
994 &loop_increment);
995
996 // Mismatch, try case-insensitive match (converting letters to lower-case).
997 PushArgumentInstr* match_char_push = PushLocal(char_in_match_);
998 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(0x20)));
999 StoreLocal(char_in_match_,
1000 Bind(InstanceCall(
1001 InstanceCallDescriptor::FromToken(Token::kBIT_OR),
1002 match_char_push,
1003 mask_push)));
1004
1005 BlockLabel convert_capture;
1006 BlockLabel on_not_in_range;
1007 BranchOrBacktrack(Comparison(kLT,
1008 LoadLocal(char_in_match_),
1009 Uint64Constant('a')),
1010 &on_not_in_range);
1011 BranchOrBacktrack(Comparison(kGT,
1012 LoadLocal(char_in_match_),
1013 Uint64Constant('z')),
1014 &on_not_in_range);
1015 GoTo(&convert_capture);
1016 BindBlock(&on_not_in_range);
1017
1018 // Latin-1: Check for values in range [224,254] but not 247.
1019 BranchOrBacktrack(Comparison(kLT,
1020 LoadLocal(char_in_match_),
1021 Uint64Constant(224)),
1022 on_no_match);
1023 BranchOrBacktrack(Comparison(kGT,
1024 LoadLocal(char_in_match_),
1025 Uint64Constant(254)),
1026 on_no_match);
1027
1028 BranchOrBacktrack(Comparison(kEQ,
1029 LoadLocal(char_in_match_),
1030 Uint64Constant(247)),
1031 on_no_match);
1032
1033 // Also convert capture character.
1034 BindBlock(&convert_capture);
1035
1036 PushArgumentInstr* capture_char_push = PushLocal(char_in_capture_);
1037 mask_push = PushArgument(Bind(Uint64Constant(0x20)));
1038 StoreLocal(char_in_capture_,
1039 Bind(InstanceCall(
1040 InstanceCallDescriptor::FromToken(Token::kBIT_OR),
1041 capture_char_push,
1042 mask_push)));
1043
1044 BranchOrBacktrack(Comparison(kNE,
1045 LoadLocal(char_in_match_),
1046 LoadLocal(char_in_capture_)),
1047 on_no_match);
1048
1049 BindBlock(&loop_increment);
1050
1051 // Increment pointers into match and capture strings.
1052 StoreLocal(capture_start_index_, Bind(Add(
1053 PushLocal(capture_start_index_),
1054 PushArgument(Bind(Uint64Constant(1))))));
1055 StoreLocal(match_start_index_, Bind(Add(
1056 PushLocal(match_start_index_),
1057 PushArgument(Bind(Uint64Constant(1))))));
1058
1059 // Compare to end of match, and loop if not done.
1060 BranchOrBacktrack(Comparison(kLT,
1061 LoadLocal(match_start_index_),
1062 LoadLocal(match_end_index_)),
1063 &loop);
1064 } else {
1065 ASSERT(mode_ == UC16);
1066
1067 Value* string_value = Bind(LoadLocal(string_param_));
1068 Value* lhs_index_value = Bind(LoadLocal(match_start_index_));
1069 Value* rhs_index_value = Bind(LoadLocal(capture_start_index_));
1070 Value* length_value = Bind(LoadLocal(capture_length_));
1071
1072 Definition* is_match_def =
1073 new(I) CaseInsensitiveCompareUC16Instr(
1074 string_value,
1075 lhs_index_value,
1076 rhs_index_value,
1077 length_value,
1078 specialization_cid_);
1079
1080 BranchOrBacktrack(Comparison(kNE, is_match_def, BoolConstant(true)),
1081 on_no_match);
1082 }
1083
1084 BindBlock(&success);
1085
1086 // Move current character position to position after match.
1087 PushArgumentInstr* match_end_push = PushLocal(match_end_index_);
1088 len_push = PushLocal(string_param_length_);
1089 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push)));
1090
1091 BindBlock(&fallthrough);
1092 }
1093
1094
1095 void IRRegExpMacroAssembler::CheckNotBackReference(
1096 intptr_t start_reg,
1097 BlockLabel* on_no_match) {
1098 TAG();
1099 ASSERT(start_reg + 1 <= position_registers_.length());
1100
1101 BlockLabel fallthrough;
1102 BlockLabel success;
1103
1104 // Find length of back-referenced capture.
1105 PushArgumentInstr* end_push = PushLocal(position_register(start_reg + 1));
1106 PushArgumentInstr* start_push = PushLocal(position_register(start_reg));
1107 StoreLocal(capture_length_, Bind(Sub(end_push, start_push)));
1108
1109 // Fail on partial or illegal capture (start of capture after end of capture).
1110 BranchOrBacktrack(Comparison(kLT,
1111 LoadLocal(capture_length_),
1112 Uint64Constant(0)),
1113 on_no_match);
1114
1115 // Succeed on empty capture (including no capture)
1116 BranchOrBacktrack(Comparison(kEQ,
1117 LoadLocal(capture_length_),
1118 Uint64Constant(0)),
1119 &fallthrough);
1120
1121 // Check that there are sufficient characters left in the input.
1122 PushArgumentInstr* pos_push = PushLocal(current_position_);
1123 PushArgumentInstr* len_push = PushLocal(capture_length_);
1124 BranchOrBacktrack(
1125 Comparison(kGT,
1126 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
1127 pos_push,
1128 len_push),
1129 Uint64Constant(0)),
1130 on_no_match);
1131
1132 // Compute pointers to match string and capture string.
1133 pos_push = PushLocal(current_position_);
1134 len_push = PushLocal(string_param_length_);
1135 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push)));
1136
1137 pos_push = PushLocal(position_register(start_reg));
1138 len_push = PushLocal(string_param_length_);
1139 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push)));
1140
1141 pos_push = PushLocal(match_start_index_);
1142 len_push = PushLocal(capture_length_);
1143 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push)));
1144
1145 BlockLabel loop;
1146 BindBlock(&loop);
1147
1148 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
1149 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
1150
1151 BranchOrBacktrack(Comparison(kNE,
1152 LoadLocal(char_in_capture_),
1153 LoadLocal(char_in_match_)),
1154 on_no_match);
1155
1156 // Increment pointers into capture and match string.
1157 StoreLocal(capture_start_index_, Bind(Add(
1158 PushLocal(capture_start_index_),
1159 PushArgument(Bind(Uint64Constant(1))))));
1160 StoreLocal(match_start_index_, Bind(Add(
1161 PushLocal(match_start_index_),
1162 PushArgument(Bind(Uint64Constant(1))))));
1163
1164 // Check if we have reached end of match area.
1165 BranchOrBacktrack(Comparison(kLT,
1166 LoadLocal(match_start_index_),
1167 LoadLocal(match_end_index_)),
1168 &loop);
1169
1170 BindBlock(&success);
1171
1172 // Move current character position to position after match.
1173 PushArgumentInstr* match_end_push = PushLocal(match_end_index_);
1174 len_push = PushLocal(string_param_length_);
1175 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push)));
1176
1177 BindBlock(&fallthrough);
1178 }
1179
1180
1181 void IRRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
1182 BlockLabel* on_not_equal) {
1183 TAG();
1184 BranchOrBacktrack(Comparison(kNE,
1185 LoadLocal(current_character_),
1186 Uint64Constant(c)),
1187 on_not_equal);
1188 }
1189
1190
1191 void IRRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
1192 uint32_t mask,
1193 BlockLabel* on_equal) {
1194 TAG();
1195
1196 Definition* actual_def = LoadLocal(current_character_);
1197 Definition* expected_def = Uint64Constant(c);
1198
1199 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1200 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1201 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1202 actual_push,
1203 mask_push);
1204
1205 BranchOrBacktrack(Comparison(kEQ, actual_def, expected_def), on_equal);
1206 }
1207
1208
1209 void IRRegExpMacroAssembler::CheckNotCharacterAfterAnd(
1210 uint32_t c,
1211 uint32_t mask,
1212 BlockLabel* on_not_equal) {
1213 TAG();
1214
1215 Definition* actual_def = LoadLocal(current_character_);
1216 Definition* expected_def = Uint64Constant(c);
1217
1218 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1219 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1220 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1221 actual_push,
1222 mask_push);
1223
1224 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1225 }
1226
1227
1228 void IRRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
1229 uint16_t c,
1230 uint16_t minus,
1231 uint16_t mask,
1232 BlockLabel* on_not_equal) {
1233 TAG();
1234 ASSERT(minus < Utf16::kMaxCodeUnit); // NOLINT
1235
1236 Definition* actual_def = LoadLocal(current_character_);
1237 Definition* expected_def = Uint64Constant(c);
1238
1239 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1240 PushArgumentInstr* minus_push = PushArgument(Bind(Uint64Constant(minus)));
1241
1242 actual_push = PushArgument(Bind(Sub(actual_push, minus_push)));
1243 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1244 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1245 actual_push,
1246 mask_push);
1247
1248 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1249 }
1250
1251
1252 void IRRegExpMacroAssembler::CheckCharacterInRange(
1253 uint16_t from,
1254 uint16_t to,
1255 BlockLabel* on_in_range) {
1256 TAG();
1257 ASSERT(from <= to);
1258
1259 // TODO(zerny): All range comparisons could be done cheaper with unsigned
1260 // compares. This pattern repeats in various places.
1261
1262 BlockLabel on_not_in_range;
1263 BranchOrBacktrack(Comparison(kLT,
1264 LoadLocal(current_character_),
1265 Uint64Constant(from)),
1266 &on_not_in_range);
1267 BranchOrBacktrack(Comparison(kGT,
1268 LoadLocal(current_character_),
1269 Uint64Constant(to)),
1270 &on_not_in_range);
1271 BranchOrBacktrack(NULL, on_in_range);
1272
1273 BindBlock(&on_not_in_range);
1274 }
1275
1276
1277 void IRRegExpMacroAssembler::CheckCharacterNotInRange(
1278 uint16_t from,
1279 uint16_t to,
1280 BlockLabel* on_not_in_range) {
1281 TAG();
1282 ASSERT(from <= to);
1283
1284 BranchOrBacktrack(Comparison(kLT,
1285 LoadLocal(current_character_),
1286 Uint64Constant(from)),
1287 on_not_in_range);
1288
1289 BranchOrBacktrack(Comparison(kGT,
1290 LoadLocal(current_character_),
1291 Uint64Constant(to)),
1292 on_not_in_range);
1293 }
1294
1295
1296 void IRRegExpMacroAssembler::CheckBitInTable(
1297 const TypedData& table,
1298 BlockLabel* on_bit_set) {
1299 TAG();
1300
1301 PushArgumentInstr* table_push =
1302 PushArgument(Bind(new(I) ConstantInstr(table)));
1303 PushArgumentInstr* index_push = PushLocal(current_character_);
1304
1305 if (mode_ != ASCII || kTableMask != Symbols::kMaxOneCharCodeSymbol) {
1306 PushArgumentInstr* mask_push =
1307 PushArgument(Bind(Uint64Constant(kTableSize - 1)));
1308 index_push = PushArgument(
1309 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1310 index_push,
1311 mask_push)));
1312 }
1313
1314 Definition* byte_def =
1315 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1316 table_push,
1317 index_push);
1318 Definition* zero_def = Int64Constant(0);
1319
1320 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_bit_set);
1321 }
1322
1323
1324 bool IRRegExpMacroAssembler::CheckSpecialCharacterClass(
1325 uint16_t type,
1326 BlockLabel* on_no_match) {
1327 TAG();
1328
1329 // Range checks (c in min..max) are generally implemented by an unsigned
1330 // (c - min) <= (max - min) check
1331 switch (type) {
1332 case 's':
1333 // Match space-characters
1334 if (mode_ == ASCII) {
1335 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
1336 BlockLabel success;
1337 // Space (' ').
1338 BranchOrBacktrack(Comparison(kEQ,
1339 LoadLocal(current_character_),
1340 Uint64Constant(' ')),
1341 &success);
1342 // Check range 0x09..0x0d.
1343 CheckCharacterInRange('\t', '\r', &success);
1344 // \u00a0 (NBSP).
1345 BranchOrBacktrack(Comparison(kNE,
1346 LoadLocal(current_character_),
1347 Uint64Constant(0x00a0)),
1348 on_no_match);
1349 BindBlock(&success);
1350 return true;
1351 }
1352 return false;
1353 case 'S':
1354 // The emitted code for generic character classes is good enough.
1355 return false;
1356 case 'd':
1357 // Match ASCII digits ('0'..'9')
1358 CheckCharacterNotInRange('0', '9', on_no_match);
1359 return true;
1360 case 'D':
1361 // Match non ASCII-digits
1362 CheckCharacterInRange('0', '9', on_no_match);
1363 return true;
1364 case '.': {
1365 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
1366 BranchOrBacktrack(Comparison(kEQ,
1367 LoadLocal(current_character_),
1368 Uint64Constant('\n')),
1369 on_no_match);
1370 BranchOrBacktrack(Comparison(kEQ,
1371 LoadLocal(current_character_),
1372 Uint64Constant('\r')),
1373 on_no_match);
1374 if (mode_ == UC16) {
1375 BranchOrBacktrack(Comparison(kEQ,
1376 LoadLocal(current_character_),
1377 Uint64Constant(0x2028)),
1378 on_no_match);
1379 BranchOrBacktrack(Comparison(kEQ,
1380 LoadLocal(current_character_),
1381 Uint64Constant(0x2029)),
1382 on_no_match);
1383 }
1384 return true;
1385 }
1386 case 'w': {
1387 if (mode_ != ASCII) {
1388 // Table is 128 entries, so all ASCII characters can be tested.
1389 BranchOrBacktrack(Comparison(kGT,
1390 LoadLocal(current_character_),
1391 Uint64Constant('z')),
1392 on_no_match);
1393 }
1394
1395 PushArgumentInstr* table_push =
1396 PushArgument(Bind(WordCharacterMapConstant()));
1397 PushArgumentInstr* index_push = PushLocal(current_character_);
1398
1399 Definition* byte_def =
1400 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1401 table_push,
1402 index_push);
1403 Definition* zero_def = Int64Constant(0);
1404
1405 BranchOrBacktrack(Comparison(kEQ, byte_def, zero_def), on_no_match);
1406
1407 return true;
1408 }
1409 case 'W': {
1410 BlockLabel done;
1411 if (mode_ != ASCII) {
1412 // Table is 128 entries, so all ASCII characters can be tested.
1413 BranchOrBacktrack(Comparison(kGT,
1414 LoadLocal(current_character_),
1415 Uint64Constant('z')),
1416 &done);
1417 }
1418
1419 // TODO(zerny): Refactor to use CheckBitInTable if possible.
1420
1421 PushArgumentInstr* table_push =
1422 PushArgument(Bind(WordCharacterMapConstant()));
1423 PushArgumentInstr* index_push = PushLocal(current_character_);
1424
1425 Definition* byte_def =
1426 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1427 table_push,
1428 index_push);
1429 Definition* zero_def = Int64Constant(0);
1430
1431 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_no_match);
1432
1433 if (mode_ != ASCII) {
1434 BindBlock(&done);
1435 }
1436 return true;
1437 }
1438 // Non-standard classes (with no syntactic shorthand) used internally.
1439 case '*':
1440 // Match any character.
1441 return true;
1442 case 'n': {
1443 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
1444 // The opposite of '.'.
1445 BlockLabel success;
1446 BranchOrBacktrack(Comparison(kEQ,
1447 LoadLocal(current_character_),
1448 Uint64Constant('\n')),
1449 &success);
1450 BranchOrBacktrack(Comparison(kEQ,
1451 LoadLocal(current_character_),
1452 Uint64Constant('\r')),
1453 &success);
1454 if (mode_ == UC16) {
1455 BranchOrBacktrack(Comparison(kEQ,
1456 LoadLocal(current_character_),
1457 Uint64Constant(0x2028)),
1458 &success);
1459 BranchOrBacktrack(Comparison(kEQ,
1460 LoadLocal(current_character_),
1461 Uint64Constant(0x2029)),
1462 &success);
1463 }
1464 BranchOrBacktrack(NULL, on_no_match);
1465 BindBlock(&success);
1466 return true;
1467 }
1468 // No custom implementation (yet): s(uint16_t), S(uint16_t).
1469 default:
1470 return false;
1471 }
1472 }
1473
1474
1475 void IRRegExpMacroAssembler::Fail() {
1476 TAG();
1477 ASSERT(FAILURE == 0); // Return value for failure is zero.
1478 if (!global()) {
1479 UNREACHABLE(); // Dart regexps are always global.
1480 }
1481 GoTo(exit_block_);
1482 }
1483
1484
1485 void IRRegExpMacroAssembler::IfRegisterGE(intptr_t reg,
1486 intptr_t comparand,
1487 BlockLabel* if_ge) {
1488 TAG();
1489 BranchOrBacktrack(Comparison(kGTE,
1490 LoadLocal(position_register(reg)),
1491 Int64Constant(comparand)),
1492 if_ge);
1493 }
1494
1495
1496 void IRRegExpMacroAssembler::IfRegisterLT(intptr_t reg,
1497 intptr_t comparand,
1498 BlockLabel* if_lt) {
1499 TAG();
1500 BranchOrBacktrack(Comparison(kLT,
1501 LoadLocal(position_register(reg)),
1502 Int64Constant(comparand)),
1503 if_lt);
1504 }
1505
1506
1507 void IRRegExpMacroAssembler::IfRegisterEqPos(intptr_t reg,
1508 BlockLabel* if_eq) {
1509 TAG();
1510 BranchOrBacktrack(Comparison(kEQ,
1511 LoadLocal(position_register(reg)),
1512 LoadLocal(current_position_)),
1513 if_eq);
1514 }
1515
1516
1517 RegExpMacroAssembler::IrregexpImplementation
1518 IRRegExpMacroAssembler::Implementation() {
1519 return kIRImplementation;
1520 }
1521
1522
1523 void IRRegExpMacroAssembler::LoadCurrentCharacter(intptr_t cp_offset,
1524 BlockLabel* on_end_of_input,
1525 bool check_bounds,
1526 intptr_t characters) {
1527 TAG();
1528 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
1529 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
1530 if (check_bounds) {
1531 CheckPosition(cp_offset + characters - 1, on_end_of_input);
1532 }
1533 LoadCurrentCharacterUnchecked(cp_offset, characters);
1534 }
1535
1536
1537 void IRRegExpMacroAssembler::PopCurrentPosition() {
1538 TAG();
1539 StoreLocal(current_position_, PopStack());
1540 }
1541
1542
1543 void IRRegExpMacroAssembler::PopRegister(intptr_t register_index) {
1544 TAG();
1545 ASSERT(register_index < position_registers_.length());
1546 StoreLocal(position_register(register_index), PopStack());
1547 }
1548
1549
1550 void IRRegExpMacroAssembler::PushStack(Definition *definition) {
1551 PushArgumentInstr* stack_push = PushLocal(stack_);
1552 PushArgumentInstr* value_push = PushArgument(Bind(definition));
1553 Do(InstanceCall(InstanceCallDescriptor(Symbols::add()),
1554 stack_push,
1555 value_push));
1556 }
1557
1558
1559 Value* IRRegExpMacroAssembler::PopStack() {
1560 PushArgumentInstr* stack_push = PushLocal(stack_);
1561 return Bind(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
1562 stack_push));
1563 }
1564
1565
1566 // Pushes the location corresponding to label to the backtracking stack.
1567 void IRRegExpMacroAssembler::PushBacktrack(BlockLabel* label) {
1568 TAG();
1569
1570 // Ensure that targets of indirect jumps are never accessed through a
1571 // normal control flow instructions by creating a new block for each backtrack
1572 // target.
1573 IndirectEntryInstr* indirect_target = IndirectWithJoinGoto(label->block());
1574
1575 // Add a fake edge from the graph entry for data flow analysis.
1576 entry_block_->AddIndirectEntry(indirect_target);
1577
1578 ConstantInstr* offset = Uint64Constant(indirect_target->indirect_id());
1579 PushStack(offset);
1580 }
1581
1582
1583 void IRRegExpMacroAssembler::PushCurrentPosition() {
1584 TAG();
1585 PushStack(LoadLocal(current_position_));
1586 }
1587
1588
1589 void IRRegExpMacroAssembler::PushRegister(intptr_t register_index) {
1590 TAG();
1591 PushStack(LoadLocal(position_register(register_index)));
1592 }
1593
1594
1595 void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) {
1596 TAG();
1597 StoreLocal(current_position_, Bind(LoadLocal(position_register(reg))));
1598 }
1599
1600 // Resets the size of the stack to the value stored in reg.
1601 void IRRegExpMacroAssembler::ReadStackPointerFromRegister(intptr_t reg) {
1602 TAG();
1603 ASSERT(reg < position_registers_.length());
1604
1605 PushArgumentInstr* stack_push = PushLocal(stack_);
1606 PushArgumentInstr* length_push = PushLocal(position_register(reg));
1607
1608 Do(InstanceCall(InstanceCallDescriptor(
1609 String::ZoneHandle(
1610 I, Field::SetterSymbol(Symbols::Length()))),
1611 stack_push,
1612 length_push));
1613 }
1614
1615 void IRRegExpMacroAssembler::SetCurrentPositionFromEnd(intptr_t by) {
1616 TAG();
1617
1618 BlockLabel after_position;
1619
1620 Definition* cur_pos_def = LoadLocal(current_position_);
1621 Definition* by_value_def = Int64Constant(-by);
1622
1623 BranchOrBacktrack(Comparison(kGTE, cur_pos_def, by_value_def),
1624 &after_position);
1625
1626 StoreLocal(current_position_, Bind(Int64Constant(-by)));
1627
1628 // On RegExp code entry (where this operation is used), the character before
1629 // the current position is expected to be already loaded.
1630 // We have advanced the position, so it's safe to read backwards.
1631 LoadCurrentCharacterUnchecked(-1, 1);
1632
1633 BindBlock(&after_position);
1634 }
1635
1636
1637 void IRRegExpMacroAssembler::SetRegister(intptr_t register_index, intptr_t to) {
1638 TAG();
1639 // Reserved for positions!
1640 ASSERT(register_index >= position_registers_count_);
1641 StoreLocal(position_register(register_index), Bind(Int64Constant(to)));
1642 }
1643
1644
1645 bool IRRegExpMacroAssembler::Succeed() {
1646 TAG();
1647 GoTo(success_block_);
1648 return global();
1649 }
1650
1651
1652 void IRRegExpMacroAssembler::WriteCurrentPositionToRegister(
1653 intptr_t reg, intptr_t cp_offset) {
1654 TAG();
1655
1656 PushArgumentInstr* pos_push = PushLocal(current_position_);
1657 PushArgumentInstr* off_push =
1658 PushArgument(Bind(Int64Constant(cp_offset)));
1659
1660 // Push the negative offset; these are converted to positive string positions
1661 // within the success block.
1662 StoreLocal(position_register(reg), Bind(Add(pos_push, off_push)));
1663 }
1664
1665
1666 void IRRegExpMacroAssembler::ClearRegisters(
1667 intptr_t reg_from, intptr_t reg_to) {
1668 TAG();
1669
1670 ASSERT(reg_from <= reg_to);
1671 ASSERT(reg_to < position_registers_.length());
1672
1673 // In order to clear registers to a final result value of -1, set them to
1674 // (-1 - string length), the offset of -1 from the end of the string.
1675
1676 for (intptr_t reg = reg_from; reg <= reg_to; reg++) {
1677 PushArgumentInstr* minus_one_push =
1678 PushArgument(Bind(Int64Constant(-1)));
1679 PushArgumentInstr* length_push = PushLocal(string_param_length_);
1680
1681 StoreLocal(position_register(reg), Bind(Sub(minus_one_push, length_push)));
1682 }
1683 }
1684
1685
1686 void IRRegExpMacroAssembler::WriteStackPointerToRegister(intptr_t reg) {
1687 TAG();
1688
1689 PushArgumentInstr* stack_push = PushLocal(stack_);
1690 Value* length_value =
1691 Bind(InstanceCall(InstanceCallDescriptor(
1692 String::ZoneHandle(
1693 I, Field::GetterSymbol(Symbols::Length()))),
1694 stack_push));
1695
1696 StoreLocal(position_register(reg), length_value);
1697 }
1698
1699
1700 // Private methods:
1701
1702
1703 void IRRegExpMacroAssembler::CheckPosition(intptr_t cp_offset,
1704 BlockLabel* on_outside_input) {
1705 TAG();
1706 Definition* curpos_def = LoadLocal(current_position_);
1707 Definition* cp_off_def = Int64Constant(-cp_offset);
1708
1709 // If (current_position_ < -cp_offset), we are in bounds.
1710 // Remember, current_position_ is a negative offset from the string end.
1711
1712 BranchOrBacktrack(Comparison(kGTE, curpos_def, cp_off_def),
1713 on_outside_input);
1714 }
1715
1716
1717 void IRRegExpMacroAssembler::BranchOrBacktrack(
1718 ComparisonInstr* comparison,
1719 BlockLabel* true_successor) {
1720 if (comparison == NULL) { // No condition
1721 if (true_successor == NULL) {
1722 Backtrack();
1723 return;
1724 }
1725 GoTo(true_successor);
1726 return;
1727 }
1728
1729 // If no successor block has been passed in, backtrack.
1730 JoinEntryInstr* true_successor_block = backtrack_block_;
1731 if (true_successor != NULL) {
1732 true_successor->SetLinked();
1733 true_successor_block = true_successor->block();
1734 }
1735 ASSERT(true_successor_block != NULL);
1736
1737 // If the condition is not true, fall through to a new block.
1738 BlockLabel fallthrough;
1739
1740 BranchInstr* branch = new(I) BranchInstr(comparison);
1741 *branch->true_successor_address() =
1742 TargetWithJoinGoto(true_successor_block);
1743 *branch->false_successor_address() =
1744 TargetWithJoinGoto(fallthrough.block());
1745
1746 CloseBlockWith(branch);
1747 BindBlock(&fallthrough);
1748 }
1749
1750
1751 TargetEntryInstr* IRRegExpMacroAssembler::TargetWithJoinGoto(
1752 JoinEntryInstr* dst) {
1753 TargetEntryInstr* target = new(I) TargetEntryInstr(
1754 block_id_.Alloc(), kInvalidTryIndex);
1755 blocks_.Add(target);
1756
1757 target->AppendInstruction(new(I) GotoInstr(dst));
1758
1759 return target;
1760 }
1761
1762
1763 IndirectEntryInstr* IRRegExpMacroAssembler::IndirectWithJoinGoto(
1764 JoinEntryInstr* dst) {
1765 IndirectEntryInstr* target = new(I) IndirectEntryInstr(
1766 block_id_.Alloc(), indirect_id_.Alloc(), kInvalidTryIndex);
1767 blocks_.Add(target);
1768
1769 target->AppendInstruction(new(I) GotoInstr(dst));
1770
1771 return target;
1772 }
1773
1774
1775 void IRRegExpMacroAssembler::CheckPreemption() {
1776 TAG();
1777 AppendInstruction(new(I) CheckStackOverflowInstr(kNoTokenPos, 0));
1778 }
1779
1780
1781 Definition* IRRegExpMacroAssembler::Add(
1782 PushArgumentInstr* lhs,
1783 PushArgumentInstr* rhs) {
1784 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD), lhs, rhs);
1785 }
1786
1787
1788 Definition* IRRegExpMacroAssembler::Sub(
1789 PushArgumentInstr* lhs,
1790 PushArgumentInstr* rhs) {
1791 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kSUB), lhs, rhs);
1792 }
1793
1794
1795 void IRRegExpMacroAssembler::LoadCurrentCharacterUnchecked(
1796 intptr_t cp_offset, intptr_t characters) {
1797 TAG();
1798
1799 ASSERT(characters == 1 || CanReadUnaligned());
1800 if (mode_ == ASCII) {
1801 ASSERT(characters == 1 || characters == 2 || characters == 4);
1802 } else {
1803 ASSERT(mode_ == UC16);
1804 ASSERT(characters == 1 || characters == 2);
1805 }
1806
1807 // Bind the pattern as the load receiver.
1808 Value* pattern = BindLoadLocal(*string_param_);
1809
1810 // Calculate the addressed string index as:
1811 // cp_offset + current_position_ + string_param_length_
1812 // TODO(zerny): Avoid generating 'add' instance-calls here.
1813 PushArgumentInstr* off_arg =
1814 PushArgument(Bind(Int64Constant(cp_offset)));
1815 PushArgumentInstr* pos_arg =
1816 PushArgument(BindLoadLocal(*current_position_));
1817 PushArgumentInstr* off_pos_arg =
1818 PushArgument(Bind(Add(off_arg, pos_arg)));
1819 PushArgumentInstr* len_arg =
1820 PushArgument(BindLoadLocal(*string_param_length_));
1821 Value* index = Bind(Add(off_pos_arg, len_arg));
1822
1823 // Load and store the code units.
1824 Value* code_unit_value = LoadCodeUnitsAt(pattern, index, characters);
1825 StoreLocal(current_character_, code_unit_value);
1826 PRINT(PushLocal(current_character_));
1827 }
1828
1829
1830 Value* IRRegExpMacroAssembler::CharacterAt(Definition* index) {
1831 Value* pattern_val = BindLoadLocal(*string_param_);
1832 Value* index_val = Bind(index);
1833 return LoadCodeUnitsAt(pattern_val, index_val, 1);
1834 }
1835
1836
1837 // Note: We can't replace pattern with a load-local of string_param_
1838 // because we need to maintain the stack discipline in unoptimized code.
1839 Value* IRRegExpMacroAssembler::LoadCodeUnitsAt(Value* pattern,
1840 Value* index,
1841 intptr_t characters) {
1842 return Bind(new(I) LoadCodeUnitsInstr(
1843 pattern,
1844 index,
1845 characters,
1846 specialization_cid_,
1847 Scanner::kNoSourcePos));
1848 }
1849
1850
1851 #undef __
22 1852
23 } // namespace dart 1853 } // namespace dart
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698