Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1086)

Side by Side Diff: runtime/vm/regexp_assembler.cc

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Port remaining V8 regexp tests and fix exposed bugs. Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #include "vm/regexp_assembler.h"
6
7 #include "vm/bit_vector.h"
8 #include "vm/compiler.h"
9 #include "vm/dart_entry.h"
10 #include "vm/flow_graph_builder.h"
11 #include "vm/il_printer.h"
12 #include "vm/object_store.h"
13 #include "vm/regexp.h"
14 #include "vm/resolver.h"
15 #include "vm/stack_frame.h"
16 #include "vm/unibrow-inl.h"
17 #include "vm/unicode.h"
18
19 #define I isolate()
20
21 // Debugging output macros. TAG() is called at the head of each interesting
22 // function and prints its name during execution if irregexp tracing is enabled.
23 #define TAG() if (FLAG_trace_irregexp) { TAG_(); }
24 #define TAG_() \
25 Print(PushArgument( \
26 Bind(new(I) ConstantInstr(String::ZoneHandle(I, String::Concat( \
27 String::Handle(String::New("TAG: ")), \
28 String::Handle(String::New(__FUNCTION__)), Heap::kOld))))));
29
30 #define PRINT(arg) if (FLAG_trace_irregexp) { Print(arg); }
31
32 namespace dart {
33
34 DEFINE_FLAG(bool, trace_irregexp, false, "Trace irregexps");
35
36 static const intptr_t kInvalidTryIndex = -1;
37 static const intptr_t kNoTokenPos = -1;
38
39 /*
40 * This assembler uses the following main local variables:
41 * - stack_: A pointer to a growable list which we use as an all-purpose stack
42 * storing backtracking offsets, positions & stored register values.
43 * - current_character_: Stores the currently loaded characters (possibly more
44 * than one).
45 * - current_position_: The current position within the string, stored as a
46 * negative offset from the end of the string (i.e. the
47 * position corresponding to str[0] is -str.length).
48 * Note that current_position_ is *not* byte-based, unlike
49 * original V8 code.
50 *
51 * Results are returned though an array of capture indices, stored at
52 * matches_param_. A null array specifies a failure to match. The match indices
53 * [start_inclusive, end_exclusive] for capture group i are stored at positions
54 * matches_param_[i * 2] and matches_param_[i * 2 + 1], respectively. Match
55 * indices of -1 denote non-matched groups. Note that we store these indices
56 * as a negative offset from the end of the string in position_registers_
57 * during processing, and convert them to standard indexes when copying them
58 * to matches_param_ on successful match.
59 */
60
61 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate)
62 : slow_safe_compiler_(false),
63 global_mode_(NOT_GLOBAL),
64 isolate_(isolate) {
65 }
66
67
68 RegExpMacroAssembler::~RegExpMacroAssembler() {
69 }
70
71
72 IRRegExpMacroAssembler::IRRegExpMacroAssembler(
73 intptr_t specialization_cid,
74 intptr_t capture_count,
75 const ParsedFunction* parsed_function,
76 ZoneGrowableArray<const ICData*>* ic_data_array,
77 Isolate* isolate)
78 : RegExpMacroAssembler(isolate),
79 specialization_cid_(specialization_cid),
80 parsed_function_(parsed_function),
81 ic_data_array_(ic_data_array),
82 current_instruction_(NULL),
83 stack_(NULL),
84 current_character_(NULL),
85 current_position_(NULL),
86 string_param_(NULL),
87 string_param_length_(NULL),
88 start_index_param_(NULL),
89 position_registers_count_((capture_count + 1) * 2),
90 stack_array_(GrowableObjectArray::ZoneHandle(
91 isolate, GrowableObjectArray::New(16, Heap::kOld))) {
92 switch (specialization_cid) {
93 case kOneByteStringCid:
94 case kExternalOneByteStringCid: mode_ = ASCII; break;
95 case kTwoByteStringCid:
96 case kExternalTwoByteStringCid: mode_ = UC16; break;
97 default: UNREACHABLE();
98 }
99
100 InitializeLocals();
101
102 // Create and generate all preset blocks.
103 entry_block_ =
104 new(isolate) GraphEntryInstr(
105 parsed_function_,
106 new(isolate) TargetEntryInstr(block_id.Alloc(), kInvalidTryIndex),
107 Isolate::kNoDeoptId);
108 start_block_ =
109 new(isolate) JoinEntryInstr(block_id.Alloc(), kInvalidTryIndex);
110 success_block_ =
111 new(isolate) JoinEntryInstr(block_id.Alloc(), kInvalidTryIndex);
112 backtrack_block_ =
113 new(isolate) JoinEntryInstr(block_id.Alloc(), kInvalidTryIndex);
114 exit_block_ =
115 new(isolate) JoinEntryInstr(block_id.Alloc(), kInvalidTryIndex);
116
117 GenerateEntryBlock();
118 GenerateSuccessBlock();
119 GenerateBacktrackBlock();
120 GenerateExitBlock();
121
122 blocks_.Add(entry_block_);
123 blocks_.Add(entry_block_->normal_entry());
124 blocks_.Add(start_block_);
125 blocks_.Add(success_block_);
126 blocks_.Add(backtrack_block_);
127 blocks_.Add(exit_block_);
128
129 // Begin emission at the start_block_.
130 set_current_instruction(start_block_);
131 }
132
133
134 IRRegExpMacroAssembler::~IRRegExpMacroAssembler() { }
135
136
137 void IRRegExpMacroAssembler::InitializeLocals() {
138 // Create local variables and parameters.
139 stack_ = Local(Symbols::stack_());
140 current_character_ = Local(Symbols::current_character_());
141 current_position_ = Local(Symbols::current_position_());
142 string_param_length_ = Local(Symbols::string_param_length_());
143 capture_length_ = Local(Symbols::capture_length_());
144 match_start_index_ = Local(Symbols::match_start_index_());
145 capture_start_index_ = Local(Symbols::capture_start_index_());
146 match_end_index_ = Local(Symbols::match_end_index_());
147 char_in_capture_ = Local(Symbols::char_in_capture_());
148 char_in_match_ = Local(Symbols::char_in_match_());
149 result_ = Local(Symbols::result_());
150
151 string_param_ = Parameter(Symbols::string_param_(), 0);
152 start_index_param_ = Parameter(Symbols::start_index_param_(), 1);
153
154 // Reserve space for all captured group positions. Note that more might
155 // be created on the fly for internal use.
156 for (intptr_t i = 0; i < position_registers_count_; i++) {
157 position_register(i);
158 }
159 }
160
161
162 void IRRegExpMacroAssembler::GenerateEntryBlock() {
163 set_current_instruction(entry_block_->normal_entry());
164 TAG();
165
166 // Generate a local list variable which we will use as a backtracking stack.
167
168 StoreLocal(stack_, Bind(new(I) ConstantInstr(stack_array_)));
169 Do(InstanceCall(InstanceCallDescriptor(Symbols::clear()), PushLocal(stack_)));
170
171 // Store string.length.
172 PushArgumentInstr* string_push = PushLocal(string_param_);
173
174 StoreLocal(string_param_length_,
175 Bind(InstanceCall(InstanceCallDescriptor(
176 String::ZoneHandle(
177 Field::GetterSymbol(Symbols::Length()))),
178 string_push)));
179
180 // Initialize all capture registers.
181 ClearRegisters(0, position_registers_count_ - 1);
182
183 // Store (start_index - string.length) as the current position (since it's a
184 // negative offset from the end of the string).
185 PushArgumentInstr* start_index_push = PushLocal(start_index_param_);
186 PushArgumentInstr* length_push = PushLocal(string_param_length_);
187
188 StoreLocal(current_position_, Sub(start_index_push, length_push));
189
190 // Jump to the start block.
191 current_instruction_->Goto(start_block_);
192 }
193
194
195 void IRRegExpMacroAssembler::GenerateBacktrackBlock() {
196 set_current_instruction(backtrack_block_);
197 TAG();
198 Backtrack();
199 }
200
201
202 void IRRegExpMacroAssembler::GenerateSuccessBlock() {
203 set_current_instruction(success_block_);
204 TAG();
205
206 Definition* type_args_null_def = new(I) ConstantInstr(
207 TypeArguments::ZoneHandle(I, TypeArguments::null()));
208 PushArgumentInstr* type_arg_push = PushArgument(Bind(type_args_null_def));
209 PushArgumentInstr* length_push =
210 PushArgument(Bind(Uint64Constant(position_registers_count_)));
211
212 const Library& lib = Library::Handle(Library::CoreLibrary());
213 const Class& list_class = Class::Handle(
214 lib.LookupCoreClass(Symbols::List()));
215 const Function& list_ctor =
216 Function::ZoneHandle(I, list_class.LookupFactory(Symbols::ListFactory()));
217
218 StoreLocal(result_, Bind(StaticCall(list_ctor, type_arg_push, length_push)));
Florian Schneider 2014/10/01 17:04:14 I think it's fine to use the generic List construc
jgruber1 2014/10/03 18:59:53 Done.
219
220 // Store captured offsets in the `matches` parameter.
221 for (intptr_t i = 0; i < position_registers_count_; i++) {
222 PushArgumentInstr* matches_push = PushLocal(result_);
223 PushArgumentInstr* index_push = PushArgument(Bind(Uint64Constant(i)));
224
225 // Convert negative offsets from the end of the string to string indices.
226 PushArgumentInstr* offset_push = PushLocal(position_register(i));
227 PushArgumentInstr* len_push = PushLocal(string_param_length_);
228 PushArgumentInstr* value_push = PushArgument(Add(offset_push, len_push));
229
230 Do(InstanceCall(InstanceCallDescriptor(Token::kASSIGN_INDEX),
231 matches_push,
232 index_push,
233 value_push));
234 }
235
236 // Print the result if tracing.
237 PRINT(PushLocal(result_));
238
239 // Return true on success.
240 AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(LoadLocal(result_))));
241 }
242
243
244 void IRRegExpMacroAssembler::GenerateExitBlock() {
245 set_current_instruction(exit_block_);
246 TAG();
247
248 // Return false on failure.
249 AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(LoadLocal(result_))));
250 }
251
252
253 #if defined(TARGET_ARCH_ARM64) || \
254 defined(TARGET_ARCH_ARM) || \
255 defined(TARGET_ARCH_MIPS)
256 // Disabling unaligned accesses forces the regexp engine to load characters one
257 // by one instead of up to 4 at once, along with the associated performance hit.
258 // TODO(jgruber): Be less conservative about disabling unaligned accesses.
259 // For instance, ARMv6 supports unaligned accesses.
260 static const bool kEnableUnalignedAccesses = false;
261 #else
262 static const bool kEnableUnalignedAccesses = true;
263 #endif
264 bool IRRegExpMacroAssembler::CanReadUnaligned() {
265 return kEnableUnalignedAccesses && !slow_safe();
266 }
267
268
269 RawArray* IRRegExpMacroAssembler::Execute(
270 const Function& function,
271 const String& input,
272 const Smi& start_offset,
273 Isolate* isolate) {
274 // Create the argument list.
275 const Array& args = Array::Handle(Array::New(2));
276 args.SetAt(0, input);
277 args.SetAt(1, start_offset);
278
279 // And finally call the generated code.
280
281 const Object& retval =
282 Object::Handle(isolate, DartEntry::InvokeFunction(function, args));
283 if (retval.IsError()) {
284 const Error& error = Error::Cast(retval);
285 OS::Print("%s\n", error.ToErrorCString());
286 // Should never happen.
287 UNREACHABLE();
288 }
289
290 if (retval.IsNull()) {
291 return Array::null();
292 }
293
294 ASSERT(retval.IsArray());
295 return Array::Cast(retval).raw();
296 }
297
298
299 RawBool* IRRegExpMacroAssembler::CaseInsensitiveCompareUC16(
300 RawString* str_raw,
301 RawSmi* lhs_index_raw,
302 RawSmi* rhs_index_raw,
303 RawSmi* length_raw) {
304 const String& str = String::Handle(str_raw);
Florian Schneider 2014/10/01 17:04:14 Is this performance-critical? If not, I'd make it
jgruber1 2014/10/03 18:59:53 Acknowledged.
305 const Smi& lhs_index = Smi::Handle(lhs_index_raw);
306 const Smi& rhs_index = Smi::Handle(rhs_index_raw);
307 const Smi& length = Smi::Handle(length_raw);
308
309 // TODO(jgruber): Optimize as single instance. V8 has this as an
310 // isolate member.
311 unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
312 // This function is not allowed to cause a garbage collection.
Florian Schneider 2014/10/01 17:04:14 Add a NoGCScope to this function to assert that no
jgruber1 2014/10/03 18:59:53 Removing comment as discussed in person since code
313 // A GC might move the calling generated code and invalidate the
314 // return address on the stack.
315
316 for (intptr_t i = 0; i < length.Value(); i++) {
317 int32_t c1 = str.CharAt(lhs_index.Value() + i);
318 int32_t c2 = str.CharAt(rhs_index.Value() + i);
319 if (c1 != c2) {
320 int32_t s1[1] = { c1 };
321 canonicalize.get(c1, '\0', s1);
322 if (s1[0] != c2) {
323 int32_t s2[1] = { c2 };
324 canonicalize.get(c2, '\0', s2);
325 if (s1[0] != s2[0]) {
326 return Bool::False().raw();
327 }
328 }
329 }
330 }
331 return Bool::True().raw();
332 }
333
334
335 LocalVariable* IRRegExpMacroAssembler::Parameter(const String& name,
336 intptr_t index) const {
337 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
338 LocalVariable* local =
339 new(I) LocalVariable(kNoTokenPos, name, local_type);
340
341 intptr_t param_frame_index = kParamEndSlotFromFp + kParamCount - index;
342 local->set_index(param_frame_index);
343
344 return local;
345 }
346
347
348 LocalVariable* IRRegExpMacroAssembler::Local(const String& name) {
349 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
350 LocalVariable* local =
351 new(I) LocalVariable(kNoTokenPos, name, local_type);
352 local->set_index(GetNextLocalIndex());
353
354 return local;
355 }
356
357
358 ConstantInstr* IRRegExpMacroAssembler::Int64Constant(int64_t value) const {
359 return new(I) ConstantInstr(
360 Integer::ZoneHandle(I, Integer::New(value, Heap::kOld)));
361 }
362
363
364 ConstantInstr* IRRegExpMacroAssembler::Uint64Constant(uint64_t value) const {
365 return new(I) ConstantInstr(
366 Integer::ZoneHandle(I, Integer::NewFromUint64(value, Heap::kOld)));
367 }
368
369
370 ConstantInstr* IRRegExpMacroAssembler::BoolConstant(bool value) const {
371 return new(I) ConstantInstr(value ? Bool::True() : Bool::False());
372 }
373
374
375 ConstantInstr* IRRegExpMacroAssembler::StringConstant(const char* value) const {
376 return new(I) ConstantInstr(
377 String::ZoneHandle(I, String::New(value, Heap::kOld)));
378 }
379
380
381 ConstantInstr* IRRegExpMacroAssembler::WordCharacterMapConstant() const {
382 const Library& lib = Library::Handle(I, Library::CoreLibrary());
383 const Class& regexp_class = Class::Handle(I,
384 lib.LookupClassAllowPrivate(Symbols::JSSyntaxRegExp()));
385 const Field& word_character_field = Field::ZoneHandle(I,
386 regexp_class.LookupStaticField(Symbols::_wordCharacterMap()));
387 ASSERT(!word_character_field.IsNull());
388
389 if (word_character_field.IsUninitialized()) {
390 word_character_field.EvaluateInitializer();
391 }
392 ASSERT(!word_character_field.IsUninitialized());
393
394 return new(I) ConstantInstr(
395 Array::Cast(Instance::ZoneHandle(I, word_character_field.value())));
396 }
397
398
399 ComparisonInstr* IRRegExpMacroAssembler::Comparison(
400 ComparisonKind kind, Definition* lhs, Definition* rhs) {
401 Token::Kind strict_comparison = Token::kEQ_STRICT;
402 Token::Kind intermediate_operator = Token::kILLEGAL;
403 switch (kind) {
404 case kEQ:
405 intermediate_operator = Token::kEQ;
406 break;
407 case kNE:
408 intermediate_operator = Token::kEQ;
409 strict_comparison = Token::kNE_STRICT;
410 break;
411 case kLT:
412 intermediate_operator = Token::kLT;
413 break;
414 case kGT:
415 intermediate_operator = Token::kGT;
416 break;
417 case kLTE:
418 intermediate_operator = Token::kLTE;
419 break;
420 case kGTE:
421 intermediate_operator = Token::kGTE;
422 break;
423 default:
424 UNREACHABLE();
425 }
426
427 ASSERT(intermediate_operator != Token::kILLEGAL);
428
429 PushArgumentInstr* lhs_push = PushArgument(Bind(lhs));
430 PushArgumentInstr* rhs_push = PushArgument(Bind(rhs));
431
432 Value* lhs_value =
433 Bind(InstanceCall(InstanceCallDescriptor(intermediate_operator),
434 lhs_push,
435 rhs_push));
436 Value* rhs_value = Bind(BoolConstant(true));
437
438 return new(I) StrictCompareInstr(kNoTokenPos, strict_comparison,
439 lhs_value, rhs_value, true);
440 }
441
442
443 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
444 const Function& function) const {
445 ZoneGrowableArray<PushArgumentInstr*>* arguments =
446 new(I) ZoneGrowableArray<PushArgumentInstr*>(0);
447 return StaticCall(function, arguments);
448 }
449
450
451 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
452 const Function& function,
453 PushArgumentInstr* arg1) const {
454 ZoneGrowableArray<PushArgumentInstr*>* arguments =
455 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
456 arguments->Add(arg1);
457
458 return StaticCall(function, arguments);
459 }
460
461
462 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
463 const Function& function,
464 PushArgumentInstr* arg1,
465 PushArgumentInstr* arg2) const {
466 ZoneGrowableArray<PushArgumentInstr*>* arguments =
467 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
468 arguments->Add(arg1);
469 arguments->Add(arg2);
470
471 return StaticCall(function, arguments);
472 }
473
474
475 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
476 const Function& function,
477 ZoneGrowableArray<PushArgumentInstr*>* arguments) const {
478 return new(I) StaticCallInstr(kNoTokenPos,
479 function,
480 Object::null_array(),
481 arguments,
482 *ic_data_array_);
483 }
484
485
486 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
487 const InstanceCallDescriptor& desc,
488 PushArgumentInstr* arg1) const {
489 ZoneGrowableArray<PushArgumentInstr*>* arguments =
490 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
491 arguments->Add(arg1);
492
493 return InstanceCall(desc, arguments);
494 }
495
496
497 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
498 const InstanceCallDescriptor& desc,
499 PushArgumentInstr* arg1,
500 PushArgumentInstr* arg2) const {
501 ZoneGrowableArray<PushArgumentInstr*>* arguments =
502 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
503 arguments->Add(arg1);
504 arguments->Add(arg2);
505
506 return InstanceCall(desc, arguments);
507 }
508
509
510 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
511 const InstanceCallDescriptor& desc,
512 PushArgumentInstr* arg1,
513 PushArgumentInstr* arg2,
514 PushArgumentInstr* arg3) const {
515 ZoneGrowableArray<PushArgumentInstr*>* arguments =
516 new(I) ZoneGrowableArray<PushArgumentInstr*>(3);
517 arguments->Add(arg1);
518 arguments->Add(arg2);
519 arguments->Add(arg3);
520
521 return InstanceCall(desc, arguments);
522 }
523
524
525 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
526 const InstanceCallDescriptor& desc,
527 ZoneGrowableArray<PushArgumentInstr*> *arguments) const {
528 return
529 new(I) InstanceCallInstr(kNoTokenPos,
530 *desc.name,
531 desc.token_kind,
532 arguments,
533 Object::null_array(),
534 desc.checked_argument_count,
535 *ic_data_array_);
536 }
537
538
539 LoadLocalInstr* IRRegExpMacroAssembler::LoadLocal(LocalVariable* local) const {
540 return new(I) LoadLocalInstr(*local);
541 }
542
543
544 void IRRegExpMacroAssembler::StoreLocal(LocalVariable* local,
545 Value* value) {
546 Do(new(I) StoreLocalInstr(*local, value));
547 }
548
549
550 void IRRegExpMacroAssembler::set_current_instruction(Instruction* instruction) {
551 current_instruction_ = instruction;
552 }
553
554
555 Value* IRRegExpMacroAssembler::Bind(Definition* definition) {
556 AppendInstruction(definition);
557 definition->set_temp_index(temp_id.Alloc());
558
559 return new(I) Value(definition);
560 }
561
562
563 void IRRegExpMacroAssembler::Do(Definition* definition) {
564 AppendInstruction(definition);
565 }
566
567 // In some cases, the V8 irregexp engine generates unreachable code by emitting
568 // a jmp not followed by a bind. We cannot do the same, since it is impossible
569 // to append to a block following a jmp. In such cases, assume that we are doing
570 // the correct thing, but output a warning when tracing.
571 #define HANDLE_DEAD_CODE_EMISSION() \
572 if (current_instruction_ == NULL) { \
573 if (FLAG_trace_irregexp) { \
574 OS::Print("WARNING: Attempting to append to a closed assembler. " \
575 "This could be either a bug or generation of dead code " \
576 "inherited from V8.\n"); \
577 } \
578 BlockLabel dummy; \
579 BindBlock(&dummy); \
580 }
581
582 void IRRegExpMacroAssembler::AppendInstruction(Instruction* instruction) {
583 HANDLE_DEAD_CODE_EMISSION();
584
585 ASSERT(current_instruction_ != NULL);
586 ASSERT(current_instruction_->next() == NULL);
587
588 temp_id.Dealloc(instruction->InputCount());
589 arg_id.Dealloc(instruction->ArgumentCount());
590
591 current_instruction_->LinkTo(instruction);
592 set_current_instruction(instruction);
593 }
594
595
596 void IRRegExpMacroAssembler::CloseBlockWith(Instruction* instruction) {
597 HANDLE_DEAD_CODE_EMISSION();
598
599 ASSERT(current_instruction_ != NULL);
600 ASSERT(current_instruction_->next() == NULL);
601
602 temp_id.Dealloc(instruction->InputCount());
603 arg_id.Dealloc(instruction->ArgumentCount());
604
605 current_instruction_->LinkTo(instruction);
606 set_current_instruction(NULL);
607 }
608
609
610 void IRRegExpMacroAssembler::GoTo(BlockLabel* to) {
611 if (to == NULL) {
612 Backtrack();
613 } else {
614 to->SetLinked();
615 GoTo(to->block());
616 }
617 }
618
619
620 // Closes the current block with a goto, and unsets current_instruction_.
621 // BindBlock() must be called before emission can continue.
622 void IRRegExpMacroAssembler::GoTo(JoinEntryInstr* to) {
623 HANDLE_DEAD_CODE_EMISSION();
624
625 ASSERT(current_instruction_ != NULL);
626 ASSERT(current_instruction_->next() == NULL);
627 current_instruction_->Goto(to);
628 set_current_instruction(NULL);
629 }
630
631
632 PushArgumentInstr* IRRegExpMacroAssembler::PushArgument(Value* value) {
633 arg_id.Alloc();
634 PushArgumentInstr* push = new(I) PushArgumentInstr(value);
635 // Do *not* use Do() for push argument instructions.
636 AppendInstruction(push);
637 return push;
638 }
639
640
641 PushArgumentInstr* IRRegExpMacroAssembler::PushLocal(LocalVariable* local) {
642 return PushArgument(Bind(LoadLocal(local)));
643 }
644
645
646 void IRRegExpMacroAssembler::Print(const char* str) {
647 Print(PushArgument(
648 Bind(new(I) ConstantInstr(
649 String::ZoneHandle(I, String::New(str, Heap::kOld))))));
650 }
651
652
653 void IRRegExpMacroAssembler::Print(PushArgumentInstr* argument) {
654 const Library& lib = Library::Handle(Library::CoreLibrary());
655 const Function& print_fn = Function::ZoneHandle(
656 I, lib.LookupFunctionAllowPrivate(Symbols::print()));
657 Do(StaticCall(print_fn, argument));
658 }
659
660
661 void IRRegExpMacroAssembler::PrintBlocks() {
662 for (intptr_t i = 0; i < blocks_.length(); i++) {
663 FlowGraphPrinter::PrintBlock(blocks_[i], false);
664 }
665 }
666
667
668 intptr_t IRRegExpMacroAssembler::stack_limit_slack() {
669 return 32;
670 }
671
672
673 void IRRegExpMacroAssembler::AdvanceCurrentPosition(intptr_t by) {
674 TAG();
675 if (by != 0) {
676 PushArgumentInstr* cur_pos_push = PushLocal(current_position_);
677 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
678
679 Value* new_pos_value = Add(cur_pos_push, by_push);
680 StoreLocal(current_position_, new_pos_value);
681 }
682 }
683
684
685 void IRRegExpMacroAssembler::AdvanceRegister(intptr_t reg, intptr_t by) {
686 TAG();
687 ASSERT(reg >= 0);
688 ASSERT(reg < position_registers_.length());
689
690 if (by != 0) {
691 PushArgumentInstr* reg_push = PushLocal(position_register(reg));
692 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
693 StoreLocal(position_register(reg), Add(reg_push, by_push));
694 }
695 }
696
697
698 void IRRegExpMacroAssembler::Backtrack() {
699 TAG();
700 CheckPreemption();
701
702 GrowableObjectArray& offsets = GrowableObjectArray::ZoneHandle(
703 I, GrowableObjectArray::New(Heap::kOld));
704
705 PushArgumentInstr* block_offsets_push =
706 PushArgument(Bind(new(I) ConstantInstr(offsets)));
707 PushArgumentInstr* block_id_push = PushArgument(PopStack());
708
709 Value* offset_value =
710 Bind(InstanceCall(InstanceCallDescriptor(Token::kINDEX),
711 block_offsets_push,
712 block_id_push));
713
714 IndirectGotoInstr* igoto = new(I) IndirectGotoInstr(&offsets, offset_value);
715 CloseBlockWith(igoto);
716 igotos_.Add(igoto);
717 }
718
719
720 // A BindBlock is analogous to assigning a label to a basic block.
721 // If the BlockLabel does not yet contain a block, it is created.
722 // If there is a current instruction, append a goto to the bound block.
723 void IRRegExpMacroAssembler::BindBlock(BlockLabel* label) {
724 ASSERT(!label->IsBound());
725 ASSERT(label->block()->next() == NULL);
726
727 label->SetBound(block_id.Alloc());
728 blocks_.Add(label->block());
729
730 if (current_instruction_ != NULL) {
731 GoTo(label);
732 }
733 set_current_instruction(label->block());
734
735 // Print the id of the current block if tracing.
736 PRINT(PushArgument(Bind(Uint64Constant(label->block()->block_id()))));
737 }
738
739
740 intptr_t IRRegExpMacroAssembler::GetNextLocalIndex() {
741 intptr_t id = local_id.Alloc();
742 return kFirstLocalSlotFromFp - id;
743 }
744
745
746 LocalVariable* IRRegExpMacroAssembler::position_register(intptr_t index) {
747 ASSERT(index < 10 * 10 * 10 * 10 * 10); // 10^5.
Florian Schneider 2014/10/01 17:04:14 Where doesthis limit (10^5) come from? We should m
jgruber1 2014/10/03 18:59:53 I removed all of the name generation code since it
748 const char name_prefix[] = "pos";
749 char name[sizeof(name_prefix) + 5];
750
751 // Create position registers as needed.
752 for (intptr_t i = position_registers_.length(); i < index + 1; i++) {
753 OS::SNPrint(name, sizeof(name), "%s%05" Pd "", name_prefix, i);
754 position_registers_.Add(Local(String::Handle(I, Symbols::New(name))));
755 }
756
757 return position_registers_[index];
758 }
759
760
761 // TODO(jgruber): Move the offset table outside to avoid having to keep
762 // the assembler around until after code generation; both function or regexp
763 // would work.
764 void IRRegExpMacroAssembler::FinalizeBlockOffsetTable() {
765 for (intptr_t i = 0; i < igotos_.length(); i++) {
766 IndirectGotoInstr* igoto = igotos_[i];
767 igoto->SetOffsetCount(I, indirect_id.Count());
768
769 for (intptr_t j = 0; j < igoto->SuccessorCount(); j++) {
770 TargetEntryInstr* target = igoto->SuccessorAt(j);
771
772 // Optimizations might have modified the immediate target block, but
773 // it must end with a goto to the indirect entry.
774 Instruction* instr = target;
775 while (instr != NULL && !instr->IsGoto()) {
776 instr = instr->next();
777 }
778 ASSERT(instr->IsGoto());
779
780 IndirectEntryInstr* ientry =
781 instr->AsGoto()->successor()->AsIndirectEntry();
782 ASSERT(ientry != NULL);
783
784 // The intermediate block was possibly compacted, check both it and the
785 // final indirect entry for a valid offset. If neither are valid, then
786 // the indirect entry is unreachable.
787 intptr_t offset =
788 (target->offset() > 0) ? target->offset() : ientry->offset();
789 if (offset > 0) {
790 igoto->SetOffsetAt(I, ientry->indirect_id(), offset);
791 }
792 }
793 }
794 }
795
796 void IRRegExpMacroAssembler::FinalizeIndirectGotos() {
797 for (intptr_t i = 0; i < igotos_.length(); i++) {
798 for (intptr_t j = 0; j < entry_block_->indirect_entries().length(); j++) {
799 igotos_.At(i)->AddSuccessor(
800 TargetWithJoinGoto(entry_block_->indirect_entries().At(j)));
801 }
802 }
803 }
804
805
806 void IRRegExpMacroAssembler::CheckCharacter(uint32_t c, BlockLabel* on_equal) {
807 TAG();
808 Definition* cur_char_def = LoadLocal(current_character_);
809 Definition* char_def = Uint64Constant(c);
810
811 BranchOrBacktrack(Comparison(kEQ, cur_char_def, char_def),
812 on_equal);
813 }
814
815
816 void IRRegExpMacroAssembler::CheckCharacterGT(uint16_t limit,
817 BlockLabel* on_greater) {
818 TAG();
819 BranchOrBacktrack(Comparison(kGT,
820 LoadLocal(current_character_),
821 Uint64Constant(limit)),
822 on_greater);
823 }
824
825
826 void IRRegExpMacroAssembler::CheckAtStart(BlockLabel* on_at_start) {
827 TAG();
828
829 BlockLabel not_at_start;
830
831 // Did we start the match at the start of the string at all?
832 BranchOrBacktrack(Comparison(kNE,
833 LoadLocal(start_index_param_),
834 Uint64Constant(0)),
835 &not_at_start);
836
837 // If we did, are we still at the start of the input, i.e. is
838 // (offset == string_length * -1)?
839 Definition* neg_len_def =
840 InstanceCall(InstanceCallDescriptor(Token::kNEGATE),
841 PushLocal(string_param_length_));
842 Definition* offset_def = LoadLocal(current_position_);
843 BranchOrBacktrack(Comparison(kEQ, neg_len_def, offset_def),
844 on_at_start);
845
846 BindBlock(&not_at_start);
847 }
848
849
850 void IRRegExpMacroAssembler::CheckNotAtStart(BlockLabel* on_not_at_start) {
851 TAG();
852
853 // Did we start the match at the start of the string at all?
854 BranchOrBacktrack(Comparison(kNE,
855 LoadLocal(start_index_param_),
856 Uint64Constant(0)),
857 on_not_at_start);
858
859 // If we did, are we still at the start of the input, i.e. is
860 // (offset == string_length * -1)?
861 Definition* neg_len_def =
862 InstanceCall(InstanceCallDescriptor(Token::kNEGATE),
863 PushLocal(string_param_length_));
864 Definition* offset_def = LoadLocal(current_position_);
865 BranchOrBacktrack(Comparison(kNE, neg_len_def, offset_def),
866 on_not_at_start);
867 }
868
869
870 void IRRegExpMacroAssembler::CheckCharacterLT(uint16_t limit,
871 BlockLabel* on_less) {
872 TAG();
873 BranchOrBacktrack(Comparison(kLT,
874 LoadLocal(current_character_),
875 Uint64Constant(limit)),
876 on_less);
877 }
878
879
880 void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) {
881 TAG();
882
883 BlockLabel fallthrough;
884
885 PushArgumentInstr* stack_push = PushLocal(stack_);
886 Definition* stack_tip_def = InstanceCall(
887 InstanceCallDescriptor(String::ZoneHandle(
888 I, Field::GetterSymbol(Symbols::last()))),
889 stack_push);
890 Definition* cur_pos_def = LoadLocal(current_position_);
891
892 BranchOrBacktrack(Comparison(kNE, stack_tip_def, cur_pos_def),
893 &fallthrough);
894
895 // Pop, throwing away the value.
896 stack_push = PushLocal(stack_);
897 Do(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
898 stack_push));
899
900 BranchOrBacktrack(NULL, on_equal);
901
902 BindBlock(&fallthrough);
903 }
904
905
906 void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
907 intptr_t start_reg,
908 BlockLabel* on_no_match) {
909 TAG();
910 ASSERT(start_reg + 1 <= position_registers_.length());
911
912 BlockLabel fallthrough;
913
914 PushArgumentInstr* end_push = PushLocal(position_register(start_reg + 1));
915 PushArgumentInstr* start_push = PushLocal(position_register(start_reg));
916 StoreLocal(capture_length_, Sub(end_push, start_push));
917
918 // The length of a capture should not be negative. This can only happen
919 // if the end of the capture is unrecorded, or at a point earlier than
920 // the start of the capture.
921 // BranchOrBacktrack(less, on_no_match);
922
923 BranchOrBacktrack(Comparison(kLT,
924 LoadLocal(capture_length_),
925 Uint64Constant(0)),
926 on_no_match);
927
928 // If length is zero, either the capture is empty or it is completely
929 // uncaptured. In either case succeed immediately.
930 BranchOrBacktrack(Comparison(kEQ,
931 LoadLocal(capture_length_),
932 Uint64Constant(0)),
933 &fallthrough);
934
935
936 // Check that there are sufficient characters left in the input.
937 PushArgumentInstr* pos_push = PushLocal(current_position_);
938 PushArgumentInstr* len_push = PushLocal(capture_length_);
939 BranchOrBacktrack(Comparison(kGT,
940 InstanceCall(InstanceCallDescriptor(Token::kADD),
941 pos_push,
942 len_push),
943 Uint64Constant(0)),
944 on_no_match);
945
946 pos_push = PushLocal(current_position_);
947 len_push = PushLocal(string_param_length_);
948 StoreLocal(match_start_index_, Add(pos_push, len_push));
949
950 pos_push = PushLocal(position_register(start_reg));
951 len_push = PushLocal(string_param_length_);
952 StoreLocal(capture_start_index_, Add(pos_push, len_push));
953
954 pos_push = PushLocal(match_start_index_);
955 len_push = PushLocal(capture_length_);
956 StoreLocal(match_end_index_, Add(pos_push, len_push));
957
958 BlockLabel success;
959 if (mode_ == ASCII) {
960 BlockLabel loop_increment;
961 BlockLabel loop;
962 BindBlock(&loop);
963
964 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
965 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
966
967 BranchOrBacktrack(Comparison(kEQ,
968 LoadLocal(char_in_capture_),
969 LoadLocal(char_in_match_)),
970 &loop_increment);
971
972 // Mismatch, try case-insensitive match (converting letters to lower-case).
973 PushArgumentInstr* match_char_push = PushLocal(char_in_match_);
974 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(0x20)));
975 StoreLocal(char_in_match_,
976 Bind(InstanceCall(InstanceCallDescriptor(Token::kBIT_OR),
977 match_char_push,
978 mask_push)));
979
980 BlockLabel convert_capture;
981 BlockLabel on_not_in_range;
982 BranchOrBacktrack(Comparison(kLT,
983 LoadLocal(char_in_match_),
984 Uint64Constant('a')),
985 &on_not_in_range);
986 BranchOrBacktrack(Comparison(kGT,
987 LoadLocal(char_in_match_),
988 Uint64Constant('z')),
989 &on_not_in_range);
990 GoTo(&convert_capture);
991 BindBlock(&on_not_in_range);
992
993 // Latin-1: Check for values in range [224,254] but not 247.
994 BranchOrBacktrack(Comparison(kLT,
995 LoadLocal(char_in_match_),
996 Uint64Constant(224)),
997 on_no_match);
998 BranchOrBacktrack(Comparison(kGT,
999 LoadLocal(char_in_match_),
1000 Uint64Constant(254)),
1001 on_no_match);
1002
1003 BranchOrBacktrack(Comparison(kEQ,
1004 LoadLocal(char_in_match_),
1005 Uint64Constant(247)),
1006 on_no_match);
1007
1008 // Also convert capture character.
1009 BindBlock(&convert_capture);
1010
1011 PushArgumentInstr* capture_char_push = PushLocal(char_in_capture_);
1012 mask_push = PushArgument(Bind(Uint64Constant(0x20)));
1013 StoreLocal(char_in_capture_,
1014 Bind(InstanceCall(InstanceCallDescriptor(Token::kBIT_OR),
1015 capture_char_push,
1016 mask_push)));
1017
1018 BranchOrBacktrack(Comparison(kNE,
1019 LoadLocal(char_in_match_),
1020 LoadLocal(char_in_capture_)),
1021 on_no_match);
1022
1023 BindBlock(&loop_increment);
1024
1025 // Increment pointers into match and capture strings.
1026 StoreLocal(capture_start_index_, Add(
1027 PushLocal(capture_start_index_),
1028 PushArgument(Bind(Uint64Constant(1)))));
1029 StoreLocal(match_start_index_, Add(
1030 PushLocal(match_start_index_),
1031 PushArgument(Bind(Uint64Constant(1)))));
1032
1033 // Compare to end of match, and loop if not done.
1034 BranchOrBacktrack(Comparison(kLT,
1035 LoadLocal(match_start_index_),
1036 LoadLocal(match_end_index_)),
1037 &loop);
1038 } else {
1039 ASSERT(mode_ == UC16);
1040
1041 Value* string_value = Bind(LoadLocal(string_param_));
1042 Value* lhs_index_value = Bind(LoadLocal(match_start_index_));
1043 Value* rhs_index_value = Bind(LoadLocal(capture_start_index_));
1044 Value* length_value = Bind(LoadLocal(capture_length_));
1045
1046 Definition* is_match_def =
1047 new(I) CaseInsensitiveCompareUC16Instr(
1048 string_value,
1049 lhs_index_value,
1050 rhs_index_value,
1051 length_value,
1052 specialization_cid_,
1053 Isolate::kNoDeoptId);
1054
1055 BranchOrBacktrack(Comparison(kNE, is_match_def, BoolConstant(true)),
1056 on_no_match);
1057 }
1058
1059 BindBlock(&success);
1060
1061 // Move current character position to position after match.
1062 PushArgumentInstr* match_end_push = PushLocal(match_end_index_);
1063 len_push = PushLocal(string_param_length_);
1064 StoreLocal(current_position_, Sub(match_end_push, len_push));
1065
1066 BindBlock(&fallthrough);
1067 }
1068
1069
1070 void IRRegExpMacroAssembler::CheckNotBackReference(
1071 intptr_t start_reg,
1072 BlockLabel* on_no_match) {
1073 TAG();
1074 ASSERT(start_reg + 1 <= position_registers_.length());
1075
1076 BlockLabel fallthrough;
1077 BlockLabel success;
1078
1079 // Find length of back-referenced capture.
1080 PushArgumentInstr* end_push = PushLocal(position_register(start_reg + 1));
1081 PushArgumentInstr* start_push = PushLocal(position_register(start_reg));
1082 StoreLocal(capture_length_, Sub(end_push, start_push));
1083
1084 // Fail on partial or illegal capture (start of capture after end of capture).
1085 BranchOrBacktrack(Comparison(kLT,
1086 LoadLocal(capture_length_),
1087 Uint64Constant(0)),
1088 on_no_match);
1089
1090 // Succeed on empty capture (including no capture)
1091 BranchOrBacktrack(Comparison(kEQ,
1092 LoadLocal(capture_length_),
1093 Uint64Constant(0)),
1094 &fallthrough);
1095
1096 // Check that there are sufficient characters left in the input.
1097 PushArgumentInstr* pos_push = PushLocal(current_position_);
1098 PushArgumentInstr* len_push = PushLocal(capture_length_);
1099 BranchOrBacktrack(Comparison(kGT,
1100 InstanceCall(InstanceCallDescriptor(Token::kADD),
1101 pos_push,
1102 len_push),
1103 Uint64Constant(0)),
1104 on_no_match);
1105
1106 // Compute pointers to match string and capture string.
1107 pos_push = PushLocal(current_position_);
1108 len_push = PushLocal(string_param_length_);
1109 StoreLocal(match_start_index_, Add(pos_push, len_push));
1110
1111 pos_push = PushLocal(position_register(start_reg));
1112 len_push = PushLocal(string_param_length_);
1113 StoreLocal(capture_start_index_, Add(pos_push, len_push));
1114
1115 pos_push = PushLocal(match_start_index_);
1116 len_push = PushLocal(capture_length_);
1117 StoreLocal(match_end_index_, Add(pos_push, len_push));
1118
1119 BlockLabel loop;
1120 BindBlock(&loop);
1121
1122 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
1123 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
1124
1125 BranchOrBacktrack(Comparison(kNE,
1126 LoadLocal(char_in_capture_),
1127 LoadLocal(char_in_match_)),
1128 on_no_match);
1129
1130 // Increment pointers into capture and match string.
1131 StoreLocal(capture_start_index_, Add(
1132 PushLocal(capture_start_index_),
1133 PushArgument(Bind(Uint64Constant(1)))));
1134 StoreLocal(match_start_index_, Add(
1135 PushLocal(match_start_index_),
1136 PushArgument(Bind(Uint64Constant(1)))));
1137
1138 // Check if we have reached end of match area.
1139 BranchOrBacktrack(Comparison(kLT,
1140 LoadLocal(match_start_index_),
1141 LoadLocal(match_end_index_)),
1142 &loop);
1143
1144 BindBlock(&success);
1145
1146 // Move current character position to position after match.
1147 PushArgumentInstr* match_end_push = PushLocal(match_end_index_);
1148 len_push = PushLocal(string_param_length_);
1149 StoreLocal(current_position_, Sub(match_end_push, len_push));
1150
1151 BindBlock(&fallthrough);
1152 }
1153
1154
1155 void IRRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
1156 BlockLabel* on_not_equal) {
1157 TAG();
1158 BranchOrBacktrack(Comparison(kNE,
1159 LoadLocal(current_character_),
1160 Uint64Constant(c)),
1161 on_not_equal);
1162 }
1163
1164
1165 void IRRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
1166 uint32_t mask,
1167 BlockLabel* on_equal) {
1168 TAG();
1169
1170 Definition* actual_def = LoadLocal(current_character_);
1171 Definition* expected_def = Uint64Constant(c);
1172
1173 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1174 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1175 actual_def = InstanceCall(InstanceCallDescriptor(Token::kBIT_AND),
1176 actual_push,
1177 mask_push);
1178
1179 BranchOrBacktrack(Comparison(kEQ, actual_def, expected_def), on_equal);
1180 }
1181
1182
1183 void IRRegExpMacroAssembler::CheckNotCharacterAfterAnd(
1184 uint32_t c,
1185 uint32_t mask,
1186 BlockLabel* on_not_equal) {
1187 TAG();
1188
1189 Definition* actual_def = LoadLocal(current_character_);
1190 Definition* expected_def = Uint64Constant(c);
1191
1192 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1193 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1194 actual_def = InstanceCall(InstanceCallDescriptor(Token::kBIT_AND),
1195 actual_push,
1196 mask_push);
1197
1198 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1199 }
1200
1201
1202 void IRRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
1203 uint16_t c,
1204 uint16_t minus,
1205 uint16_t mask,
1206 BlockLabel* on_not_equal) {
1207 TAG();
1208 ASSERT(minus < Utf16::kMaxCodeUnit); // NOLINT
1209
1210 Definition* actual_def = LoadLocal(current_character_);
1211 Definition* expected_def = Uint64Constant(c);
1212
1213 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1214 PushArgumentInstr* minus_push = PushArgument(Bind(Uint64Constant(minus)));
1215
1216 actual_push = PushArgument(Sub(actual_push, minus_push));
1217 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1218 actual_def = InstanceCall(InstanceCallDescriptor(Token::kBIT_AND),
1219 actual_push,
1220 mask_push);
1221
1222 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1223 }
1224
1225
1226 void IRRegExpMacroAssembler::CheckCharacterInRange(
1227 uint16_t from,
1228 uint16_t to,
1229 BlockLabel* on_in_range) {
1230 TAG();
1231 ASSERT(from <= to);
1232
1233 // TODO(jgruber): All range comparisons could be done cheaper with unsigned
1234 // compares. This pattern repeats in various places.
1235
1236 BlockLabel on_not_in_range;
1237 BranchOrBacktrack(Comparison(kLT,
1238 LoadLocal(current_character_),
1239 Uint64Constant(from)),
1240 &on_not_in_range);
1241 BranchOrBacktrack(Comparison(kGT,
1242 LoadLocal(current_character_),
1243 Uint64Constant(to)),
1244 &on_not_in_range);
1245 BranchOrBacktrack(NULL, on_in_range);
1246
1247 BindBlock(&on_not_in_range);
1248 }
1249
1250
1251 void IRRegExpMacroAssembler::CheckCharacterNotInRange(
1252 uint16_t from,
1253 uint16_t to,
1254 BlockLabel* on_not_in_range) {
1255 TAG();
1256 ASSERT(from <= to);
1257
1258 BranchOrBacktrack(Comparison(kLT,
1259 LoadLocal(current_character_),
1260 Uint64Constant(from)),
1261 on_not_in_range);
1262
1263 BranchOrBacktrack(Comparison(kGT,
1264 LoadLocal(current_character_),
1265 Uint64Constant(to)),
1266 on_not_in_range);
1267 }
1268
1269
1270 void IRRegExpMacroAssembler::CheckBitInTable(
1271 const TypedData& table,
1272 BlockLabel* on_bit_set) {
1273 TAG();
1274
1275 PushArgumentInstr* table_push =
1276 PushArgument(Bind(new(I) ConstantInstr(table)));
1277 PushArgumentInstr* index_push = PushLocal(current_character_);
1278
1279 if (mode_ != ASCII || kTableMask != Symbols::kMaxOneCharCodeSymbol) {
1280 PushArgumentInstr* mask_push =
1281 PushArgument(Bind(Uint64Constant(kTableSize - 1)));
1282 index_push = PushArgument(
1283 Bind(InstanceCall(InstanceCallDescriptor(Token::kBIT_AND),
1284 index_push,
1285 mask_push)));
1286 }
1287
1288 Definition* byte_def = InstanceCall(InstanceCallDescriptor(Token::kINDEX),
1289 table_push,
1290 index_push);
1291 Definition* zero_def = Int64Constant(0);
1292
1293 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_bit_set);
1294 }
1295
1296
1297 bool IRRegExpMacroAssembler::CheckSpecialCharacterClass(
1298 uint16_t type,
1299 BlockLabel* on_no_match) {
1300 TAG();
1301
1302 // Range checks (c in min..max) are generally implemented by an unsigned
1303 // (c - min) <= (max - min) check
1304 switch (type) {
1305 case 's':
1306 // Match space-characters
1307 if (mode_ == ASCII) {
1308 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
1309 BlockLabel success;
1310 // Space (' ').
1311 BranchOrBacktrack(Comparison(kEQ,
1312 LoadLocal(current_character_),
1313 Uint64Constant(' ')),
1314 &success);
1315 // Check range 0x09..0x0d.
1316 CheckCharacterInRange('\t', '\r', &success);
1317 // \u00a0 (NBSP).
1318 BranchOrBacktrack(Comparison(kNE,
1319 LoadLocal(current_character_),
1320 Uint64Constant(0x00a0)),
1321 on_no_match);
1322 BindBlock(&success);
1323 return true;
1324 }
1325 return false;
1326 case 'S':
1327 // The emitted code for generic character classes is good enough.
1328 return false;
1329 case 'd':
1330 // Match ASCII digits ('0'..'9')
1331 CheckCharacterNotInRange('0', '9', on_no_match);
1332 return true;
1333 case 'D':
1334 // Match non ASCII-digits
1335 CheckCharacterInRange('0', '9', on_no_match);
1336 return true;
1337 case '.': {
1338 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
1339 BranchOrBacktrack(Comparison(kEQ,
1340 LoadLocal(current_character_),
1341 Uint64Constant('\n')),
1342 on_no_match);
1343 BranchOrBacktrack(Comparison(kEQ,
1344 LoadLocal(current_character_),
1345 Uint64Constant('\r')),
1346 on_no_match);
1347 if (mode_ == UC16) {
1348 BranchOrBacktrack(Comparison(kEQ,
1349 LoadLocal(current_character_),
1350 Uint64Constant(0x2028)),
1351 on_no_match);
1352 BranchOrBacktrack(Comparison(kEQ,
1353 LoadLocal(current_character_),
1354 Uint64Constant(0x2029)),
1355 on_no_match);
1356 }
1357 return true;
1358 }
1359 case 'w': {
1360 if (mode_ != ASCII) {
1361 // Table is 128 entries, so all ASCII characters can be tested.
1362 BranchOrBacktrack(Comparison(kGT,
1363 LoadLocal(current_character_),
1364 Uint64Constant('z')),
1365 on_no_match);
1366 }
1367
1368 PushArgumentInstr* table_push =
1369 PushArgument(Bind(WordCharacterMapConstant()));
1370 PushArgumentInstr* index_push = PushLocal(current_character_);
1371
1372 Definition* byte_def = InstanceCall(InstanceCallDescriptor(Token::kINDEX),
1373 table_push,
1374 index_push);
1375 Definition* zero_def = Int64Constant(0);
1376
1377 BranchOrBacktrack(Comparison(kEQ, byte_def, zero_def), on_no_match);
1378
1379 return true;
1380 }
1381 case 'W': {
1382 BlockLabel done;
1383 if (mode_ != ASCII) {
1384 // Table is 128 entries, so all ASCII characters can be tested.
1385 BranchOrBacktrack(Comparison(kGT,
1386 LoadLocal(current_character_),
1387 Uint64Constant('z')),
1388 &done);
1389 }
1390
1391 // TODO(jgruber): Refactor to use CheckBitInTable if possible.
1392
1393 PushArgumentInstr* table_push =
1394 PushArgument(Bind(WordCharacterMapConstant()));
1395 PushArgumentInstr* index_push = PushLocal(current_character_);
1396
1397 Definition* byte_def = InstanceCall(InstanceCallDescriptor(Token::kINDEX),
1398 table_push,
1399 index_push);
1400 Definition* zero_def = Int64Constant(0);
1401
1402 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_no_match);
1403
1404 if (mode_ != ASCII) {
1405 BindBlock(&done);
1406 }
1407 return true;
1408 }
1409 // Non-standard classes (with no syntactic shorthand) used internally.
1410 case '*':
1411 // Match any character.
1412 return true;
1413 case 'n': {
1414 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
1415 // The opposite of '.'.
1416 BlockLabel success;
1417 BranchOrBacktrack(Comparison(kEQ,
1418 LoadLocal(current_character_),
1419 Uint64Constant('\n')),
1420 &success);
1421 BranchOrBacktrack(Comparison(kEQ,
1422 LoadLocal(current_character_),
1423 Uint64Constant('\r')),
1424 &success);
1425 if (mode_ == UC16) {
1426 BranchOrBacktrack(Comparison(kEQ,
1427 LoadLocal(current_character_),
1428 Uint64Constant(0x2028)),
1429 &success);
1430 BranchOrBacktrack(Comparison(kEQ,
1431 LoadLocal(current_character_),
1432 Uint64Constant(0x2029)),
1433 &success);
1434 }
1435 BranchOrBacktrack(NULL, on_no_match);
1436 BindBlock(&success);
1437 return true;
1438 }
1439 // No custom implementation (yet): s(uint16_t), S(uint16_t).
1440 default:
1441 return false;
1442 }
1443 }
1444
1445
1446 void IRRegExpMacroAssembler::Fail() {
1447 TAG();
1448 ASSERT(FAILURE == 0); // Return value for failure is zero.
1449 if (!global()) {
1450 UNREACHABLE(); // Dart regexps are always global.
1451 }
1452 GoTo(exit_block_);
1453 }
1454
1455
1456 void IRRegExpMacroAssembler::IfRegisterGE(intptr_t reg,
1457 intptr_t comparand,
1458 BlockLabel* if_ge) {
1459 TAG();
1460 BranchOrBacktrack(Comparison(kGTE,
1461 LoadLocal(position_register(reg)),
1462 Int64Constant(comparand)),
1463 if_ge);
1464 }
1465
1466
1467 void IRRegExpMacroAssembler::IfRegisterLT(intptr_t reg,
1468 intptr_t comparand,
1469 BlockLabel* if_lt) {
1470 TAG();
1471 BranchOrBacktrack(Comparison(kLT,
1472 LoadLocal(position_register(reg)),
1473 Int64Constant(comparand)),
1474 if_lt);
1475 }
1476
1477
1478 void IRRegExpMacroAssembler::IfRegisterEqPos(intptr_t reg,
1479 BlockLabel* if_eq) {
1480 TAG();
1481 BranchOrBacktrack(Comparison(kEQ,
1482 LoadLocal(position_register(reg)),
1483 LoadLocal(current_position_)),
1484 if_eq);
1485 }
1486
1487
1488 RegExpMacroAssembler::IrregexpImplementation
1489 IRRegExpMacroAssembler::Implementation() {
1490 return kIRImplementation;
1491 }
1492
1493
1494 void IRRegExpMacroAssembler::LoadCurrentCharacter(intptr_t cp_offset,
1495 BlockLabel* on_end_of_input,
1496 bool check_bounds,
1497 intptr_t characters) {
1498 TAG();
1499 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
1500 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
1501 if (check_bounds) {
1502 CheckPosition(cp_offset + characters - 1, on_end_of_input);
1503 }
1504 LoadCurrentCharacterUnchecked(cp_offset, characters);
1505 }
1506
1507
1508 void IRRegExpMacroAssembler::PopCurrentPosition() {
1509 TAG();
1510 StoreLocal(current_position_, PopStack());
1511 }
1512
1513
1514 void IRRegExpMacroAssembler::PopRegister(intptr_t register_index) {
1515 TAG();
1516 ASSERT(register_index < position_registers_.length());
1517 StoreLocal(position_register(register_index), PopStack());
1518 }
1519
1520
1521 void IRRegExpMacroAssembler::PushStack(Definition *definition) {
1522 PushArgumentInstr* stack_push = PushLocal(stack_);
1523 PushArgumentInstr* value_push = PushArgument(Bind(definition));
1524 Do(InstanceCall(InstanceCallDescriptor(Symbols::add()),
1525 stack_push,
1526 value_push));
1527 }
1528
1529
1530 Value* IRRegExpMacroAssembler::PopStack() {
1531 PushArgumentInstr* stack_push = PushLocal(stack_);
1532 return Bind(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
1533 stack_push));
1534 }
1535
1536
1537 // Pushes the location corresponding to label to the backtracking stack.
1538 void IRRegExpMacroAssembler::PushBacktrack(BlockLabel* label) {
1539 TAG();
1540
1541 // Ensure that targets of indirect jumps are never accessed through a
1542 // normal control flow instructions by creating a new block for each backtrack
1543 // target.
1544 IndirectEntryInstr* indirect_target = IndirectWithJoinGoto(label->block());
1545
1546 // Add a fake edge from the graph entry for data flow analysis.
1547 entry_block_->AddIndirectEntry(indirect_target);
1548
1549 ConstantInstr* offset = Uint64Constant(indirect_target->indirect_id());
1550 PushStack(offset);
1551 }
1552
1553
1554 void IRRegExpMacroAssembler::PushCurrentPosition() {
1555 TAG();
1556 PushStack(LoadLocal(current_position_));
1557 }
1558
1559
1560 void IRRegExpMacroAssembler::PushRegister(intptr_t register_index,
1561 StackCheckFlag check_stack_limit) {
1562 TAG();
1563 PushStack(LoadLocal(position_register(register_index)));
1564 }
1565
1566
1567 void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) {
1568 TAG();
1569 StoreLocal(current_position_, Bind(LoadLocal(position_register(reg))));
1570 }
1571
1572 // Resets the size of the stack to the value stored in reg.
1573 void IRRegExpMacroAssembler::ReadStackPointerFromRegister(intptr_t reg) {
1574 TAG();
1575 ASSERT(reg < position_registers_.length());
1576
1577 PushArgumentInstr* stack_push = PushLocal(stack_);
1578 PushArgumentInstr* length_push = PushLocal(position_register(reg));
1579
1580 Do(InstanceCall(InstanceCallDescriptor(
1581 String::ZoneHandle(
1582 I, Field::SetterSymbol(Symbols::Length()))),
1583 stack_push,
1584 length_push));
1585 }
1586
1587 void IRRegExpMacroAssembler::SetCurrentPositionFromEnd(intptr_t by) {
1588 TAG();
1589
1590 BlockLabel after_position;
1591
1592 Definition* cur_pos_def = LoadLocal(current_position_);
1593 Definition* by_value_def = Int64Constant(-by);
1594
1595 BranchOrBacktrack(Comparison(kGTE, cur_pos_def, by_value_def),
1596 &after_position);
1597
1598 StoreLocal(current_position_, Bind(Int64Constant(-by)));
1599
1600 // On RegExp code entry (where this operation is used), the character before
1601 // the current position is expected to be already loaded.
1602 // We have advanced the position, so it's safe to read backwards.
1603 LoadCurrentCharacterUnchecked(-1, 1);
1604
1605 BindBlock(&after_position);
1606 }
1607
1608
1609 void IRRegExpMacroAssembler::SetRegister(intptr_t register_index, intptr_t to) {
1610 TAG();
1611 // Reserved for positions!
1612 ASSERT(register_index >= position_registers_count_);
1613 StoreLocal(position_register(register_index), Bind(Int64Constant(to)));
1614 }
1615
1616
1617 bool IRRegExpMacroAssembler::Succeed() {
1618 TAG();
1619 GoTo(success_block_);
1620 return global();
1621 }
1622
1623
1624 void IRRegExpMacroAssembler::WriteCurrentPositionToRegister(
1625 intptr_t reg, intptr_t cp_offset) {
1626 TAG();
1627
1628 PushArgumentInstr* pos_push = PushLocal(current_position_);
1629 PushArgumentInstr* off_push =
1630 PushArgument(Bind(Int64Constant(cp_offset)));
1631
1632 // Push the negative offset; these are converted to positive string positions
1633 // within the success block.
1634 StoreLocal(position_register(reg), Add(pos_push, off_push));
1635 }
1636
1637
1638 void IRRegExpMacroAssembler::ClearRegisters(
1639 intptr_t reg_from, intptr_t reg_to) {
1640 TAG();
1641
1642 ASSERT(reg_from <= reg_to);
1643 ASSERT(reg_to < position_registers_.length());
1644
1645 // In order to clear registers to a final result value of -1, set them to
1646 // (-1 - string length), the offset of -1 from the end of the string.
1647
1648 for (intptr_t reg = reg_from; reg <= reg_to; reg++) {
1649 PushArgumentInstr* minus_one_push =
1650 PushArgument(Bind(Int64Constant(-1)));
1651 PushArgumentInstr* length_push = PushLocal(string_param_length_);
1652
1653 StoreLocal(position_register(reg), Sub(minus_one_push, length_push));
1654 }
1655 }
1656
1657
1658 void IRRegExpMacroAssembler::WriteStackPointerToRegister(intptr_t reg) {
1659 TAG();
1660
1661 PushArgumentInstr* stack_push = PushLocal(stack_);
1662 Value* length_value =
1663 Bind(InstanceCall(InstanceCallDescriptor(
1664 String::ZoneHandle(
1665 I, Field::GetterSymbol(Symbols::Length()))),
1666 stack_push));
1667
1668 StoreLocal(position_register(reg), length_value);
1669 }
1670
1671
1672 // Private methods:
1673
1674
1675 void IRRegExpMacroAssembler::CheckPosition(intptr_t cp_offset,
1676 BlockLabel* on_outside_input) {
1677 TAG();
1678 Definition* curpos_def = LoadLocal(current_position_);
1679 Definition* cp_off_def = Int64Constant(-cp_offset);
1680
1681 // If (current_position_ < -cp_offset), we are in bounds.
1682 // Remember, current_position_ is a negative offset from the string end.
1683
1684 BranchOrBacktrack(Comparison(kGTE, curpos_def, cp_off_def),
1685 on_outside_input);
1686 }
1687
1688
1689 void IRRegExpMacroAssembler::BranchOrBacktrack(
1690 ComparisonInstr* comparison,
1691 BlockLabel* true_successor) {
1692 if (comparison == NULL) { // No condition
1693 if (true_successor == NULL) {
1694 Backtrack();
1695 return;
1696 }
1697 GoTo(true_successor);
1698 return;
1699 }
1700
1701 // If no successor block has been passed in, backtrack.
1702 JoinEntryInstr* true_successor_block = backtrack_block_;
1703 if (true_successor != NULL) {
1704 true_successor->SetLinked();
1705 true_successor_block = true_successor->block();
1706 }
1707 ASSERT(true_successor_block != NULL);
1708
1709 // If the condition is not true, fall through to a new block.
1710 BlockLabel fallthrough;
1711
1712 BranchInstr* branch = new(I) BranchInstr(comparison);
1713 *branch->true_successor_address() =
1714 TargetWithJoinGoto(true_successor_block);
1715 *branch->false_successor_address() =
1716 TargetWithJoinGoto(fallthrough.block());
1717
1718 CloseBlockWith(branch);
1719 BindBlock(&fallthrough);
1720 }
1721
1722
1723 TargetEntryInstr* IRRegExpMacroAssembler::TargetWithJoinGoto(
1724 JoinEntryInstr* dst) {
1725 TargetEntryInstr* target = new(I) TargetEntryInstr(
1726 block_id.Alloc(), kInvalidTryIndex);
1727 blocks_.Add(target);
1728
1729 target->AppendInstruction(new(I) GotoInstr(dst));
1730
1731 return target;
1732 }
1733
1734
1735 IndirectEntryInstr* IRRegExpMacroAssembler::IndirectWithJoinGoto(
1736 JoinEntryInstr* dst) {
1737 IndirectEntryInstr* target = new(I) IndirectEntryInstr(
1738 block_id.Alloc(), indirect_id.Alloc(), kInvalidTryIndex);
1739 blocks_.Add(target);
1740
1741 target->AppendInstruction(new(I) GotoInstr(dst));
1742
1743 return target;
1744 }
1745
1746
1747 void IRRegExpMacroAssembler::CheckPreemption() {
1748 TAG();
1749 AppendInstruction(new(I) CheckStackOverflowInstr(kNoTokenPos, 0));
1750 }
1751
1752
1753 Value* IRRegExpMacroAssembler::Add(
1754 PushArgumentInstr* lhs,
1755 PushArgumentInstr* rhs) {
1756 return Bind(InstanceCall(InstanceCallDescriptor(Token::kADD), lhs, rhs));
1757 }
1758
1759
1760 Value* IRRegExpMacroAssembler::Sub(
1761 PushArgumentInstr* lhs,
1762 PushArgumentInstr* rhs) {
1763 return Bind(InstanceCall(InstanceCallDescriptor(Token::kSUB), lhs, rhs));
1764 }
1765
1766
1767 static const String& codeUnitsAtName(intptr_t characters) {
1768 switch (characters) {
1769 case 1: return Library::PrivateCoreLibName(Symbols::_oneCodeUnitAt());
1770 case 2: return Library::PrivateCoreLibName(Symbols::_twoCodeUnitsAt());
1771 case 4: return Library::PrivateCoreLibName(Symbols::_fourCodeUnitsAt());
1772 }
1773 UNREACHABLE();
1774 return String::Handle();
1775 }
1776
1777
1778 void IRRegExpMacroAssembler::LoadCurrentCharacterUnchecked(
1779 intptr_t cp_offset, intptr_t characters) {
1780 TAG();
1781
1782 if (mode_ == ASCII) {
1783 ASSERT(characters == 1 || characters == 2 || characters == 4);
1784 } else {
1785 ASSERT(mode_ == UC16);
1786 ASSERT(characters == 1 || characters == 2);
1787 }
1788
1789 // LoadLocal pattern_param_
1790 // PushArgument()
1791 PushArgumentInstr* pattern_push = PushLocal(string_param_);
1792
1793 // Calculate the addressed string index as
1794 // cp_offset + current_position_ + string_param_length_
1795 PushArgumentInstr* cp_offset_push =
1796 PushArgument(Bind(Int64Constant(cp_offset)));
1797 PushArgumentInstr* cur_pos_push = PushLocal(current_position_);
1798
1799 PushArgumentInstr* partial_sum_push =
1800 PushArgument(Add(cp_offset_push, cur_pos_push));
1801 PushArgumentInstr* length_push = PushLocal(string_param_length_);
1802
1803 PushArgumentInstr* pos_push =
1804 PushArgument(Add(partial_sum_push, length_push));
1805
1806 // InstanceCall(codeUnitAt, t0, t0)
1807 const String& name = codeUnitsAtName(characters);
1808 Value* code_unit_value =
1809 Bind(InstanceCall(InstanceCallDescriptor(name),
1810 pattern_push,
1811 pos_push));
1812
1813 // StoreLocal(current_character_)
1814 StoreLocal(current_character_, code_unit_value);
1815
1816 PRINT(PushLocal(current_character_));
1817 }
1818
1819
1820 Value* IRRegExpMacroAssembler::CharacterAt(Definition* index) {
1821 PushArgumentInstr* pattern_push = PushLocal(string_param_);
1822 PushArgumentInstr* index_push = PushArgument(Bind(index));
1823
1824 return Bind(InstanceCall(InstanceCallDescriptor(codeUnitsAtName(1)),
1825 pattern_push,
1826 index_push));
1827 }
1828
1829
1830 #undef __
1831
1832 } // namespace dart
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698