Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(291)

Side by Side Diff: runtime/vm/regexp_assembler.cc

Issue 744853003: Integrate the Irregexp Regular Expression Engine. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: fix clang and win build Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/regexp_assembler.h ('k') | runtime/vm/regexp_ast.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/regexp_assembler.h" 5 #include "vm/regexp_assembler.h"
6 6
7 // SNIP 7 #include "vm/bit_vector.h"
8 #include "vm/compiler.h"
9 #include "vm/dart_entry.h"
10 #include "vm/flow_graph_builder.h"
11 #include "vm/il_printer.h"
12 #include "vm/object_store.h"
13 #include "vm/regexp.h"
14 #include "vm/resolver.h"
15 #include "vm/stack_frame.h"
16 #include "vm/unibrow-inl.h"
17 #include "vm/unicode.h"
18
19 #define I isolate()
20
21 // Debugging output macros. TAG() is called at the head of each interesting
22 // function and prints its name during execution if irregexp tracing is enabled.
23 #define TAG() if (FLAG_trace_irregexp) { TAG_(); }
24 #define TAG_() \
25 Print(PushArgument( \
26 Bind(new(I) ConstantInstr(String::ZoneHandle(I, String::Concat( \
27 String::Handle(String::New("TAG: ")), \
28 String::Handle(String::New(__FUNCTION__)), Heap::kOld))))));
29
30 #define PRINT(arg) if (FLAG_trace_irregexp) { Print(arg); }
8 31
9 namespace dart { 32 namespace dart {
10 33
11 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone) 34 DEFINE_FLAG(bool, trace_irregexp, false, "Trace irregexps");
35
36
37 static const intptr_t kInvalidTryIndex = CatchClauseNode::kInvalidTryIndex;
38 static const intptr_t kNoSourcePos = Scanner::kNoSourcePos;
39
40
41 void PrintUtf16(uint16_t c) {
42 const char* format = (0x20 <= c && c <= 0x7F) ?
43 "%c" : (c <= 0xff) ? "\\x%02x" : "\\u%04x";
44 OS::Print(format, c);
45 }
46
47
48 /*
49 * This assembler uses the following main local variables:
50 * - stack_: A pointer to a growable list which we use as an all-purpose stack
51 * storing backtracking offsets, positions & stored register values.
52 * - current_character_: Stores the currently loaded characters (possibly more
53 * than one).
54 * - current_position_: The current position within the string, stored as a
55 * negative offset from the end of the string (i.e. the
56 * position corresponding to str[0] is -str.length).
57 * Note that current_position_ is *not* byte-based, unlike
58 * original V8 code.
59 *
60 * Results are returned though an array of capture indices, stored at
61 * matches_param_. A null array specifies a failure to match. The match indices
62 * [start_inclusive, end_exclusive] for capture group i are stored at positions
63 * matches_param_[i * 2] and matches_param_[i * 2 + 1], respectively. Match
64 * indices of -1 denote non-matched groups. Note that we store these indices
65 * as a negative offset from the end of the string in position_registers_
66 * during processing, and convert them to standard indexes when copying them
67 * to matches_param_ on successful match.
68 */
69
70 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate)
12 : slow_safe_compiler_(false), 71 : slow_safe_compiler_(false),
13 global_mode_(NOT_GLOBAL), 72 global_mode_(NOT_GLOBAL),
14 zone_(zone) { 73 isolate_(isolate) {
15 } 74 }
16 75
17 76
18 RegExpMacroAssembler::~RegExpMacroAssembler() { 77 RegExpMacroAssembler::~RegExpMacroAssembler() {
19 } 78 }
20 79
21 // SNIP 80
81 IRRegExpMacroAssembler::IRRegExpMacroAssembler(
82 intptr_t specialization_cid,
83 intptr_t capture_count,
84 const ParsedFunction* parsed_function,
85 const ZoneGrowableArray<const ICData*>& ic_data_array,
86 Isolate* isolate)
87 : RegExpMacroAssembler(isolate),
88 specialization_cid_(specialization_cid),
89 parsed_function_(parsed_function),
90 ic_data_array_(ic_data_array),
91 current_instruction_(NULL),
92 stack_(NULL),
93 current_character_(NULL),
94 current_position_(NULL),
95 string_param_(NULL),
96 string_param_length_(NULL),
97 start_index_param_(NULL),
98 position_registers_count_((capture_count + 1) * 2),
99 stack_array_(GrowableObjectArray::ZoneHandle(
100 isolate, GrowableObjectArray::New(16, Heap::kOld))) {
101 switch (specialization_cid) {
102 case kOneByteStringCid:
103 case kExternalOneByteStringCid: mode_ = ASCII; break;
104 case kTwoByteStringCid:
105 case kExternalTwoByteStringCid: mode_ = UC16; break;
106 default: UNREACHABLE();
107 }
108
109 InitializeLocals();
110
111 // Create and generate all preset blocks.
112 entry_block_ =
113 new(isolate) GraphEntryInstr(
114 parsed_function_,
115 new(isolate) TargetEntryInstr(block_id_.Alloc(), kInvalidTryIndex),
116 Isolate::kNoDeoptId);
117 start_block_ =
118 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
119 success_block_ =
120 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
121 backtrack_block_ =
122 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
123 exit_block_ =
124 new(isolate) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex);
125
126 GenerateEntryBlock();
127 GenerateSuccessBlock();
128 GenerateExitBlock();
129
130 blocks_.Add(entry_block_);
131 blocks_.Add(entry_block_->normal_entry());
132 blocks_.Add(start_block_);
133 blocks_.Add(success_block_);
134 blocks_.Add(backtrack_block_);
135 blocks_.Add(exit_block_);
136
137 // Begin emission at the start_block_.
138 set_current_instruction(start_block_);
139 }
140
141
142 IRRegExpMacroAssembler::~IRRegExpMacroAssembler() { }
143
144
145 void IRRegExpMacroAssembler::InitializeLocals() {
146 // All generated functions are expected to have a current-context variable.
147 // This variable is unused in irregexp functions.
148 parsed_function_->current_context_var()->set_index(GetNextLocalIndex());
149
150 // Create local variables and parameters.
151 stack_ = Local(Symbols::stack());
152 current_character_ = Local(Symbols::current_character());
153 current_position_ = Local(Symbols::current_position());
154 string_param_length_ = Local(Symbols::string_param_length());
155 capture_length_ = Local(Symbols::capture_length());
156 match_start_index_ = Local(Symbols::match_start_index());
157 capture_start_index_ = Local(Symbols::capture_start_index());
158 match_end_index_ = Local(Symbols::match_end_index());
159 char_in_capture_ = Local(Symbols::char_in_capture());
160 char_in_match_ = Local(Symbols::char_in_match());
161 result_ = Local(Symbols::result());
162
163 string_param_ = Parameter(Symbols::string_param(), 0);
164 start_index_param_ = Parameter(Symbols::start_index_param(), 1);
165
166 // Reserve space for all captured group positions. Note that more might
167 // be created on the fly for internal use.
168 for (intptr_t i = 0; i < position_registers_count_; i++) {
169 position_register(i);
170 }
171 }
172
173
174 void IRRegExpMacroAssembler::GenerateEntryBlock() {
175 set_current_instruction(entry_block_->normal_entry());
176 TAG();
177
178 // Generate a local list variable which we will use as a backtracking stack.
179
180 StoreLocal(stack_, Bind(new(I) ConstantInstr(stack_array_)));
181 Do(InstanceCall(InstanceCallDescriptor(Symbols::clear()), PushLocal(stack_)));
182
183 // Store string.length.
184 PushArgumentInstr* string_push = PushLocal(string_param_);
185
186 StoreLocal(
187 string_param_length_,
188 Bind(InstanceCall(
189 InstanceCallDescriptor(
190 String::ZoneHandle(Field::GetterSymbol(Symbols::Length()))),
191 string_push)));
192
193 // Initialize all capture registers.
194 ClearRegisters(0, position_registers_count_ - 1);
195
196 // Store (start_index - string.length) as the current position (since it's a
197 // negative offset from the end of the string).
198 PushArgumentInstr* start_index_push = PushLocal(start_index_param_);
199 PushArgumentInstr* length_push = PushLocal(string_param_length_);
200
201 StoreLocal(current_position_, Bind(Sub(start_index_push, length_push)));
202
203 // Jump to the start block.
204 current_instruction_->Goto(start_block_);
205 }
206
207
208 void IRRegExpMacroAssembler::GenerateBacktrackBlock() {
209 set_current_instruction(backtrack_block_);
210 TAG();
211 CheckPreemption();
212
213 const intptr_t entries_count = entry_block_->indirect_entries().length();
214
215 GrowableObjectArray& offsets = GrowableObjectArray::ZoneHandle(
216 I, GrowableObjectArray::New(entries_count, Heap::kOld));
217
218 PushArgumentInstr* block_offsets_push =
219 PushArgument(Bind(new(I) ConstantInstr(offsets)));
220 PushArgumentInstr* block_id_push = PushArgument(PopStack());
221
222 Value* offset_value =
223 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
224 block_offsets_push,
225 block_id_push));
226
227 backtrack_goto_ = new(I) IndirectGotoInstr(&offsets, offset_value);
228 CloseBlockWith(backtrack_goto_);
229
230 // Add an edge from the "indirect" goto to each of the targets.
231 for (intptr_t j = 0; j < entries_count; j++) {
232 backtrack_goto_->AddSuccessor(
233 TargetWithJoinGoto(entry_block_->indirect_entries().At(j)));
234 }
235 }
236
237
238 void IRRegExpMacroAssembler::GenerateSuccessBlock() {
239 set_current_instruction(success_block_);
240 TAG();
241
242 Definition* type_args_null_def = new(I) ConstantInstr(
243 TypeArguments::ZoneHandle(I, TypeArguments::null()));
244 PushArgumentInstr* type_arg_push = PushArgument(Bind(type_args_null_def));
245 PushArgumentInstr* length_push =
246 PushArgument(Bind(Uint64Constant(position_registers_count_)));
247
248 const Library& lib = Library::Handle(Library::CoreLibrary());
249 const Class& list_class = Class::Handle(
250 lib.LookupCoreClass(Symbols::List()));
251 const Function& list_ctor =
252 Function::ZoneHandle(I, list_class.LookupFactory(Symbols::ListFactory()));
253
254 // TODO(zerny): Use CreateArrayInstr and StoreIndexed instead.
255 StoreLocal(result_, Bind(StaticCall(list_ctor, type_arg_push, length_push)));
256
257 // Store captured offsets in the `matches` parameter.
258 // TODO(zerny): Eliminate position_register locals and access `matches`
259 // directly.
260 for (intptr_t i = 0; i < position_registers_count_; i++) {
261 PushArgumentInstr* matches_push = PushLocal(result_);
262 PushArgumentInstr* index_push = PushArgument(Bind(Uint64Constant(i)));
263
264 // Convert negative offsets from the end of the string to string indices.
265 PushArgumentInstr* offset_push = PushLocal(position_register(i));
266 PushArgumentInstr* len_push = PushLocal(string_param_length_);
267 PushArgumentInstr* value_push =
268 PushArgument(Bind(Add(offset_push, len_push)));
269
270 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
271 matches_push,
272 index_push,
273 value_push));
274 }
275
276 // Print the result if tracing.
277 PRINT(PushLocal(result_));
278
279 // Return true on success.
280 AppendInstruction(new(I) ReturnInstr(kNoSourcePos, Bind(LoadLocal(result_))));
281 }
282
283
284 void IRRegExpMacroAssembler::GenerateExitBlock() {
285 set_current_instruction(exit_block_);
286 TAG();
287
288 // Return false on failure.
289 AppendInstruction(new(I) ReturnInstr(kNoSourcePos, Bind(LoadLocal(result_))));
290 }
291
292
293 #if defined(TARGET_ARCH_ARM64) || \
294 defined(TARGET_ARCH_ARM) || \
295 defined(TARGET_ARCH_MIPS)
296 // Disabling unaligned accesses forces the regexp engine to load characters one
297 // by one instead of up to 4 at once, along with the associated performance hit.
298 // TODO(zerny): Be less conservative about disabling unaligned accesses.
299 // For instance, ARMv6 supports unaligned accesses. Once it is enabled here,
300 // update LoadCodeUnitsInstr methods for the appropriate architectures.
301 static const bool kEnableUnalignedAccesses = false;
302 #else
303 static const bool kEnableUnalignedAccesses = true;
304 #endif
305 bool IRRegExpMacroAssembler::CanReadUnaligned() {
306 return kEnableUnalignedAccesses && !slow_safe();
307 }
308
309
310 RawArray* IRRegExpMacroAssembler::Execute(
311 const Function& function,
312 const String& input,
313 const Smi& start_offset,
314 Isolate* isolate) {
315 // Create the argument list.
316 const Array& args = Array::Handle(Array::New(2));
317 args.SetAt(0, input);
318 args.SetAt(1, start_offset);
319
320 // And finally call the generated code.
321
322 const Object& retval =
323 Object::Handle(isolate, DartEntry::InvokeFunction(function, args));
324 if (retval.IsError()) {
325 const Error& error = Error::Cast(retval);
326 OS::Print("%s\n", error.ToErrorCString());
327 // Should never happen.
328 UNREACHABLE();
329 }
330
331 if (retval.IsNull()) {
332 return Array::null();
333 }
334
335 ASSERT(retval.IsArray());
336 return Array::Cast(retval).raw();
337 }
338
339
340 RawBool* IRRegExpMacroAssembler::CaseInsensitiveCompareUC16(
341 RawString* str_raw,
342 RawSmi* lhs_index_raw,
343 RawSmi* rhs_index_raw,
344 RawSmi* length_raw) {
345 const String& str = String::Handle(str_raw);
346 const Smi& lhs_index = Smi::Handle(lhs_index_raw);
347 const Smi& rhs_index = Smi::Handle(rhs_index_raw);
348 const Smi& length = Smi::Handle(length_raw);
349
350 // TODO(zerny): Optimize as single instance. V8 has this as an
351 // isolate member.
352 unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
353
354 for (intptr_t i = 0; i < length.Value(); i++) {
355 int32_t c1 = str.CharAt(lhs_index.Value() + i);
356 int32_t c2 = str.CharAt(rhs_index.Value() + i);
357 if (c1 != c2) {
358 int32_t s1[1] = { c1 };
359 canonicalize.get(c1, '\0', s1);
360 if (s1[0] != c2) {
361 int32_t s2[1] = { c2 };
362 canonicalize.get(c2, '\0', s2);
363 if (s1[0] != s2[0]) {
364 return Bool::False().raw();
365 }
366 }
367 }
368 }
369 return Bool::True().raw();
370 }
371
372
373 LocalVariable* IRRegExpMacroAssembler::Parameter(const String& name,
374 intptr_t index) const {
375 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
376 LocalVariable* local =
377 new(I) LocalVariable(kNoSourcePos, name, local_type);
378
379 intptr_t param_frame_index = kParamEndSlotFromFp + kParamCount - index;
380 local->set_index(param_frame_index);
381
382 return local;
383 }
384
385
386 LocalVariable* IRRegExpMacroAssembler::Local(const String& name) {
387 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
388 LocalVariable* local =
389 new(I) LocalVariable(kNoSourcePos, name, local_type);
390 local->set_index(GetNextLocalIndex());
391
392 return local;
393 }
394
395
396 ConstantInstr* IRRegExpMacroAssembler::Int64Constant(int64_t value) const {
397 return new(I) ConstantInstr(
398 Integer::ZoneHandle(I, Integer::New(value, Heap::kOld)));
399 }
400
401
402 ConstantInstr* IRRegExpMacroAssembler::Uint64Constant(uint64_t value) const {
403 return new(I) ConstantInstr(
404 Integer::ZoneHandle(I, Integer::NewFromUint64(value, Heap::kOld)));
405 }
406
407
408 ConstantInstr* IRRegExpMacroAssembler::BoolConstant(bool value) const {
409 return new(I) ConstantInstr(value ? Bool::True() : Bool::False());
410 }
411
412
413 ConstantInstr* IRRegExpMacroAssembler::StringConstant(const char* value) const {
414 return new(I) ConstantInstr(
415 String::ZoneHandle(I, String::New(value, Heap::kOld)));
416 }
417
418
419 ConstantInstr* IRRegExpMacroAssembler::WordCharacterMapConstant() const {
420 const Library& lib = Library::Handle(I, Library::CoreLibrary());
421 const Class& regexp_class = Class::Handle(I,
422 lib.LookupClassAllowPrivate(Symbols::JSSyntaxRegExp()));
423 const Field& word_character_field = Field::ZoneHandle(I,
424 regexp_class.LookupStaticField(Symbols::_wordCharacterMap()));
425 ASSERT(!word_character_field.IsNull());
426
427 if (word_character_field.IsUninitialized()) {
428 word_character_field.EvaluateInitializer();
429 }
430 ASSERT(!word_character_field.IsUninitialized());
431
432 return new(I) ConstantInstr(
433 Instance::ZoneHandle(I, word_character_field.value()));
434 }
435
436
437 ComparisonInstr* IRRegExpMacroAssembler::Comparison(
438 ComparisonKind kind, Definition* lhs, Definition* rhs) {
439 Token::Kind strict_comparison = Token::kEQ_STRICT;
440 Token::Kind intermediate_operator = Token::kILLEGAL;
441 switch (kind) {
442 case kEQ:
443 intermediate_operator = Token::kEQ;
444 break;
445 case kNE:
446 intermediate_operator = Token::kEQ;
447 strict_comparison = Token::kNE_STRICT;
448 break;
449 case kLT:
450 intermediate_operator = Token::kLT;
451 break;
452 case kGT:
453 intermediate_operator = Token::kGT;
454 break;
455 case kLTE:
456 intermediate_operator = Token::kLTE;
457 break;
458 case kGTE:
459 intermediate_operator = Token::kGTE;
460 break;
461 default:
462 UNREACHABLE();
463 }
464
465 ASSERT(intermediate_operator != Token::kILLEGAL);
466
467 PushArgumentInstr* lhs_push = PushArgument(Bind(lhs));
468 PushArgumentInstr* rhs_push = PushArgument(Bind(rhs));
469
470 Value* lhs_value =
471 Bind(InstanceCall(
472 InstanceCallDescriptor::FromToken(intermediate_operator),
473 lhs_push,
474 rhs_push));
475 Value* rhs_value = Bind(BoolConstant(true));
476
477 return new(I) StrictCompareInstr(
478 kNoSourcePos, strict_comparison, lhs_value, rhs_value, true);
479 }
480
481
482 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
483 const Function& function) const {
484 ZoneGrowableArray<PushArgumentInstr*>* arguments =
485 new(I) ZoneGrowableArray<PushArgumentInstr*>(0);
486 return StaticCall(function, arguments);
487 }
488
489
490 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
491 const Function& function,
492 PushArgumentInstr* arg1) const {
493 ZoneGrowableArray<PushArgumentInstr*>* arguments =
494 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
495 arguments->Add(arg1);
496
497 return StaticCall(function, arguments);
498 }
499
500
501 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
502 const Function& function,
503 PushArgumentInstr* arg1,
504 PushArgumentInstr* arg2) const {
505 ZoneGrowableArray<PushArgumentInstr*>* arguments =
506 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
507 arguments->Add(arg1);
508 arguments->Add(arg2);
509
510 return StaticCall(function, arguments);
511 }
512
513
514 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
515 const Function& function,
516 ZoneGrowableArray<PushArgumentInstr*>* arguments) const {
517 return new(I) StaticCallInstr(kNoSourcePos,
518 function,
519 Object::null_array(),
520 arguments,
521 ic_data_array_);
522 }
523
524
525 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
526 const InstanceCallDescriptor& desc,
527 PushArgumentInstr* arg1) const {
528 ZoneGrowableArray<PushArgumentInstr*>* arguments =
529 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
530 arguments->Add(arg1);
531
532 return InstanceCall(desc, arguments);
533 }
534
535
536 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
537 const InstanceCallDescriptor& desc,
538 PushArgumentInstr* arg1,
539 PushArgumentInstr* arg2) const {
540 ZoneGrowableArray<PushArgumentInstr*>* arguments =
541 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
542 arguments->Add(arg1);
543 arguments->Add(arg2);
544
545 return InstanceCall(desc, arguments);
546 }
547
548
549 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
550 const InstanceCallDescriptor& desc,
551 PushArgumentInstr* arg1,
552 PushArgumentInstr* arg2,
553 PushArgumentInstr* arg3) const {
554 ZoneGrowableArray<PushArgumentInstr*>* arguments =
555 new(I) ZoneGrowableArray<PushArgumentInstr*>(3);
556 arguments->Add(arg1);
557 arguments->Add(arg2);
558 arguments->Add(arg3);
559
560 return InstanceCall(desc, arguments);
561 }
562
563
564 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
565 const InstanceCallDescriptor& desc,
566 ZoneGrowableArray<PushArgumentInstr*> *arguments) const {
567 return
568 new(I) InstanceCallInstr(kNoSourcePos,
569 desc.name,
570 desc.token_kind,
571 arguments,
572 Object::null_array(),
573 desc.checked_argument_count,
574 ic_data_array_);
575 }
576
577
578 LoadLocalInstr* IRRegExpMacroAssembler::LoadLocal(LocalVariable* local) const {
579 return new(I) LoadLocalInstr(*local);
580 }
581
582
583 void IRRegExpMacroAssembler::StoreLocal(LocalVariable* local,
584 Value* value) {
585 Do(new(I) StoreLocalInstr(*local, value));
586 }
587
588
589 void IRRegExpMacroAssembler::set_current_instruction(Instruction* instruction) {
590 current_instruction_ = instruction;
591 }
592
593
594 Value* IRRegExpMacroAssembler::Bind(Definition* definition) {
595 AppendInstruction(definition);
596 definition->set_temp_index(temp_id_.Alloc());
597
598 return new(I) Value(definition);
599 }
600
601
602 void IRRegExpMacroAssembler::Do(Definition* definition) {
603 AppendInstruction(definition);
604 }
605
606
607 Value* IRRegExpMacroAssembler::BindLoadLocal(const LocalVariable& local) {
608 if (local.IsConst()) {
609 return Bind(new(I) ConstantInstr(*local.ConstValue()));
610 }
611 ASSERT(!local.is_captured());
612 return Bind(new(I) LoadLocalInstr(local));
613 }
614
615
616 // In some cases, the V8 irregexp engine generates unreachable code by emitting
617 // a jmp not followed by a bind. We cannot do the same, since it is impossible
618 // to append to a block following a jmp. In such cases, assume that we are doing
619 // the correct thing, but output a warning when tracing.
620 #define HANDLE_DEAD_CODE_EMISSION() \
621 if (current_instruction_ == NULL) { \
622 if (FLAG_trace_irregexp) { \
623 OS::Print("WARNING: Attempting to append to a closed assembler. " \
624 "This could be either a bug or generation of dead code " \
625 "inherited from V8.\n"); \
626 } \
627 BlockLabel dummy; \
628 BindBlock(&dummy); \
629 }
630
631 void IRRegExpMacroAssembler::AppendInstruction(Instruction* instruction) {
632 HANDLE_DEAD_CODE_EMISSION();
633
634 ASSERT(current_instruction_ != NULL);
635 ASSERT(current_instruction_->next() == NULL);
636
637 temp_id_.Dealloc(instruction->InputCount());
638 arg_id_.Dealloc(instruction->ArgumentCount());
639
640 current_instruction_->LinkTo(instruction);
641 set_current_instruction(instruction);
642 }
643
644
645 void IRRegExpMacroAssembler::CloseBlockWith(Instruction* instruction) {
646 HANDLE_DEAD_CODE_EMISSION();
647
648 ASSERT(current_instruction_ != NULL);
649 ASSERT(current_instruction_->next() == NULL);
650
651 temp_id_.Dealloc(instruction->InputCount());
652 arg_id_.Dealloc(instruction->ArgumentCount());
653
654 current_instruction_->LinkTo(instruction);
655 set_current_instruction(NULL);
656 }
657
658
659 void IRRegExpMacroAssembler::GoTo(BlockLabel* to) {
660 if (to == NULL) {
661 Backtrack();
662 } else {
663 to->SetLinked();
664 GoTo(to->block());
665 }
666 }
667
668
669 // Closes the current block with a goto, and unsets current_instruction_.
670 // BindBlock() must be called before emission can continue.
671 void IRRegExpMacroAssembler::GoTo(JoinEntryInstr* to) {
672 HANDLE_DEAD_CODE_EMISSION();
673
674 ASSERT(current_instruction_ != NULL);
675 ASSERT(current_instruction_->next() == NULL);
676 current_instruction_->Goto(to);
677 set_current_instruction(NULL);
678 }
679
680
681 PushArgumentInstr* IRRegExpMacroAssembler::PushArgument(Value* value) {
682 arg_id_.Alloc();
683 PushArgumentInstr* push = new(I) PushArgumentInstr(value);
684 // Do *not* use Do() for push argument instructions.
685 AppendInstruction(push);
686 return push;
687 }
688
689
690 PushArgumentInstr* IRRegExpMacroAssembler::PushLocal(LocalVariable* local) {
691 return PushArgument(Bind(LoadLocal(local)));
692 }
693
694
695 void IRRegExpMacroAssembler::Print(const char* str) {
696 Print(PushArgument(
697 Bind(new(I) ConstantInstr(
698 String::ZoneHandle(I, String::New(str, Heap::kOld))))));
699 }
700
701
702 void IRRegExpMacroAssembler::Print(PushArgumentInstr* argument) {
703 const Library& lib = Library::Handle(Library::CoreLibrary());
704 const Function& print_fn = Function::ZoneHandle(
705 I, lib.LookupFunctionAllowPrivate(Symbols::print()));
706 Do(StaticCall(print_fn, argument));
707 }
708
709
710 void IRRegExpMacroAssembler::PrintBlocks() {
711 for (intptr_t i = 0; i < blocks_.length(); i++) {
712 FlowGraphPrinter::PrintBlock(blocks_[i], false);
713 }
714 }
715
716
717 intptr_t IRRegExpMacroAssembler::stack_limit_slack() {
718 return 32;
719 }
720
721
722 void IRRegExpMacroAssembler::AdvanceCurrentPosition(intptr_t by) {
723 TAG();
724 if (by != 0) {
725 PushArgumentInstr* cur_pos_push = PushLocal(current_position_);
726 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
727
728 Value* new_pos_value = Bind(Add(cur_pos_push, by_push));
729 StoreLocal(current_position_, new_pos_value);
730 }
731 }
732
733
734 void IRRegExpMacroAssembler::AdvanceRegister(intptr_t reg, intptr_t by) {
735 TAG();
736 ASSERT(reg >= 0);
737 ASSERT(reg < position_registers_.length());
738
739 if (by != 0) {
740 PushArgumentInstr* reg_push = PushLocal(position_register(reg));
741 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
742 StoreLocal(position_register(reg), Bind(Add(reg_push, by_push)));
743 }
744 }
745
746
747 void IRRegExpMacroAssembler::Backtrack() {
748 TAG();
749 GoTo(backtrack_block_);
750 }
751
752
753 // A BindBlock is analogous to assigning a label to a basic block.
754 // If the BlockLabel does not yet contain a block, it is created.
755 // If there is a current instruction, append a goto to the bound block.
756 void IRRegExpMacroAssembler::BindBlock(BlockLabel* label) {
757 ASSERT(!label->IsBound());
758 ASSERT(label->block()->next() == NULL);
759
760 label->SetBound(block_id_.Alloc());
761 blocks_.Add(label->block());
762
763 if (current_instruction_ != NULL) {
764 GoTo(label);
765 }
766 set_current_instruction(label->block());
767
768 // Print the id of the current block if tracing.
769 PRINT(PushArgument(Bind(Uint64Constant(label->block()->block_id()))));
770 }
771
772
773 intptr_t IRRegExpMacroAssembler::GetNextLocalIndex() {
774 intptr_t id = local_id_.Alloc();
775 return kFirstLocalSlotFromFp - id;
776 }
777
778
779 LocalVariable* IRRegExpMacroAssembler::position_register(intptr_t index) {
780 // Create position registers as needed.
781 for (intptr_t i = position_registers_.length(); i < index + 1; i++) {
782 position_registers_.Add(Local(Symbols::position_registers()));
783 }
784
785 return position_registers_[index];
786 }
787
788
789 void IRRegExpMacroAssembler::CheckCharacter(uint32_t c, BlockLabel* on_equal) {
790 TAG();
791 Definition* cur_char_def = LoadLocal(current_character_);
792 Definition* char_def = Uint64Constant(c);
793
794 BranchOrBacktrack(Comparison(kEQ, cur_char_def, char_def), on_equal);
795 }
796
797
798 void IRRegExpMacroAssembler::CheckCharacterGT(uint16_t limit,
799 BlockLabel* on_greater) {
800 TAG();
801 BranchOrBacktrack(Comparison(kGT,
802 LoadLocal(current_character_),
803 Uint64Constant(limit)),
804 on_greater);
805 }
806
807
808 void IRRegExpMacroAssembler::CheckAtStart(BlockLabel* on_at_start) {
809 TAG();
810
811 BlockLabel not_at_start;
812
813 // Did we start the match at the start of the string at all?
814 BranchOrBacktrack(Comparison(kNE,
815 LoadLocal(start_index_param_),
816 Uint64Constant(0)),
817 &not_at_start);
818
819 // If we did, are we still at the start of the input, i.e. is
820 // (offset == string_length * -1)?
821 Definition* neg_len_def =
822 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
823 PushLocal(string_param_length_));
824 Definition* offset_def = LoadLocal(current_position_);
825 BranchOrBacktrack(Comparison(kEQ, neg_len_def, offset_def),
826 on_at_start);
827
828 BindBlock(&not_at_start);
829 }
830
831
832 void IRRegExpMacroAssembler::CheckNotAtStart(BlockLabel* on_not_at_start) {
833 TAG();
834
835 // Did we start the match at the start of the string at all?
836 BranchOrBacktrack(Comparison(kNE,
837 LoadLocal(start_index_param_),
838 Uint64Constant(0)),
839 on_not_at_start);
840
841 // If we did, are we still at the start of the input, i.e. is
842 // (offset == string_length * -1)?
843 Definition* neg_len_def =
844 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
845 PushLocal(string_param_length_));
846 Definition* offset_def = LoadLocal(current_position_);
847 BranchOrBacktrack(Comparison(kNE, neg_len_def, offset_def),
848 on_not_at_start);
849 }
850
851
852 void IRRegExpMacroAssembler::CheckCharacterLT(uint16_t limit,
853 BlockLabel* on_less) {
854 TAG();
855 BranchOrBacktrack(Comparison(kLT,
856 LoadLocal(current_character_),
857 Uint64Constant(limit)),
858 on_less);
859 }
860
861
862 void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) {
863 TAG();
864
865 BlockLabel fallthrough;
866
867 PushArgumentInstr* stack_push = PushLocal(stack_);
868 Definition* stack_tip_def = InstanceCall(
869 InstanceCallDescriptor(String::ZoneHandle(
870 I, Field::GetterSymbol(Symbols::last()))),
871 stack_push);
872 Definition* cur_pos_def = LoadLocal(current_position_);
873
874 BranchOrBacktrack(Comparison(kNE, stack_tip_def, cur_pos_def),
875 &fallthrough);
876
877 // Pop, throwing away the value.
878 stack_push = PushLocal(stack_);
879 Do(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
880 stack_push));
881
882 BranchOrBacktrack(NULL, on_equal);
883
884 BindBlock(&fallthrough);
885 }
886
887
888 void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
889 intptr_t start_reg,
890 BlockLabel* on_no_match) {
891 TAG();
892 ASSERT(start_reg + 1 <= position_registers_.length());
893
894 BlockLabel fallthrough;
895
896 PushArgumentInstr* end_push = PushLocal(position_register(start_reg + 1));
897 PushArgumentInstr* start_push = PushLocal(position_register(start_reg));
898 StoreLocal(capture_length_, Bind(Sub(end_push, start_push)));
899
900 // The length of a capture should not be negative. This can only happen
901 // if the end of the capture is unrecorded, or at a point earlier than
902 // the start of the capture.
903 // BranchOrBacktrack(less, on_no_match);
904
905 BranchOrBacktrack(Comparison(kLT,
906 LoadLocal(capture_length_),
907 Uint64Constant(0)),
908 on_no_match);
909
910 // If length is zero, either the capture is empty or it is completely
911 // uncaptured. In either case succeed immediately.
912 BranchOrBacktrack(Comparison(kEQ,
913 LoadLocal(capture_length_),
914 Uint64Constant(0)),
915 &fallthrough);
916
917
918 // Check that there are sufficient characters left in the input.
919 PushArgumentInstr* pos_push = PushLocal(current_position_);
920 PushArgumentInstr* len_push = PushLocal(capture_length_);
921 BranchOrBacktrack(
922 Comparison(kGT,
923 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
924 pos_push,
925 len_push),
926 Uint64Constant(0)),
927 on_no_match);
928
929 pos_push = PushLocal(current_position_);
930 len_push = PushLocal(string_param_length_);
931 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push)));
932
933 pos_push = PushLocal(position_register(start_reg));
934 len_push = PushLocal(string_param_length_);
935 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push)));
936
937 pos_push = PushLocal(match_start_index_);
938 len_push = PushLocal(capture_length_);
939 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push)));
940
941 BlockLabel success;
942 if (mode_ == ASCII) {
943 BlockLabel loop_increment;
944 BlockLabel loop;
945 BindBlock(&loop);
946
947 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
948 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
949
950 BranchOrBacktrack(Comparison(kEQ,
951 LoadLocal(char_in_capture_),
952 LoadLocal(char_in_match_)),
953 &loop_increment);
954
955 // Mismatch, try case-insensitive match (converting letters to lower-case).
956 PushArgumentInstr* match_char_push = PushLocal(char_in_match_);
957 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(0x20)));
958 StoreLocal(char_in_match_,
959 Bind(InstanceCall(
960 InstanceCallDescriptor::FromToken(Token::kBIT_OR),
961 match_char_push,
962 mask_push)));
963
964 BlockLabel convert_capture;
965 BlockLabel on_not_in_range;
966 BranchOrBacktrack(Comparison(kLT,
967 LoadLocal(char_in_match_),
968 Uint64Constant('a')),
969 &on_not_in_range);
970 BranchOrBacktrack(Comparison(kGT,
971 LoadLocal(char_in_match_),
972 Uint64Constant('z')),
973 &on_not_in_range);
974 GoTo(&convert_capture);
975 BindBlock(&on_not_in_range);
976
977 // Latin-1: Check for values in range [224,254] but not 247.
978 BranchOrBacktrack(Comparison(kLT,
979 LoadLocal(char_in_match_),
980 Uint64Constant(224)),
981 on_no_match);
982 BranchOrBacktrack(Comparison(kGT,
983 LoadLocal(char_in_match_),
984 Uint64Constant(254)),
985 on_no_match);
986
987 BranchOrBacktrack(Comparison(kEQ,
988 LoadLocal(char_in_match_),
989 Uint64Constant(247)),
990 on_no_match);
991
992 // Also convert capture character.
993 BindBlock(&convert_capture);
994
995 PushArgumentInstr* capture_char_push = PushLocal(char_in_capture_);
996 mask_push = PushArgument(Bind(Uint64Constant(0x20)));
997 StoreLocal(char_in_capture_,
998 Bind(InstanceCall(
999 InstanceCallDescriptor::FromToken(Token::kBIT_OR),
1000 capture_char_push,
1001 mask_push)));
1002
1003 BranchOrBacktrack(Comparison(kNE,
1004 LoadLocal(char_in_match_),
1005 LoadLocal(char_in_capture_)),
1006 on_no_match);
1007
1008 BindBlock(&loop_increment);
1009
1010 // Increment indexes into capture and match strings.
1011 PushArgumentInstr* index_push = PushLocal(capture_start_index_);
1012 PushArgumentInstr* inc_push = PushArgument(Bind(Uint64Constant(1)));
1013 StoreLocal(capture_start_index_, Bind(Add(index_push, inc_push)));
1014
1015 index_push = PushLocal(match_start_index_);
1016 inc_push = PushArgument(Bind(Uint64Constant(1)));
1017 StoreLocal(match_start_index_, Bind(Add(index_push, inc_push)));
1018
1019 // Compare to end of match, and loop if not done.
1020 BranchOrBacktrack(Comparison(kLT,
1021 LoadLocal(match_start_index_),
1022 LoadLocal(match_end_index_)),
1023 &loop);
1024 } else {
1025 ASSERT(mode_ == UC16);
1026
1027 Value* string_value = Bind(LoadLocal(string_param_));
1028 Value* lhs_index_value = Bind(LoadLocal(match_start_index_));
1029 Value* rhs_index_value = Bind(LoadLocal(capture_start_index_));
1030 Value* length_value = Bind(LoadLocal(capture_length_));
1031
1032 Definition* is_match_def =
1033 new(I) CaseInsensitiveCompareUC16Instr(
1034 string_value,
1035 lhs_index_value,
1036 rhs_index_value,
1037 length_value,
1038 specialization_cid_);
1039
1040 BranchOrBacktrack(Comparison(kNE, is_match_def, BoolConstant(true)),
1041 on_no_match);
1042 }
1043
1044 BindBlock(&success);
1045
1046 // Move current character position to position after match.
1047 PushArgumentInstr* match_end_push = PushLocal(match_end_index_);
1048 len_push = PushLocal(string_param_length_);
1049 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push)));
1050
1051 BindBlock(&fallthrough);
1052 }
1053
1054
1055 void IRRegExpMacroAssembler::CheckNotBackReference(
1056 intptr_t start_reg,
1057 BlockLabel* on_no_match) {
1058 TAG();
1059 ASSERT(start_reg + 1 <= position_registers_.length());
1060
1061 BlockLabel fallthrough;
1062 BlockLabel success;
1063
1064 // Find length of back-referenced capture.
1065 PushArgumentInstr* end_push = PushLocal(position_register(start_reg + 1));
1066 PushArgumentInstr* start_push = PushLocal(position_register(start_reg));
1067 StoreLocal(capture_length_, Bind(Sub(end_push, start_push)));
1068
1069 // Fail on partial or illegal capture (start of capture after end of capture).
1070 BranchOrBacktrack(Comparison(kLT,
1071 LoadLocal(capture_length_),
1072 Uint64Constant(0)),
1073 on_no_match);
1074
1075 // Succeed on empty capture (including no capture)
1076 BranchOrBacktrack(Comparison(kEQ,
1077 LoadLocal(capture_length_),
1078 Uint64Constant(0)),
1079 &fallthrough);
1080
1081 // Check that there are sufficient characters left in the input.
1082 PushArgumentInstr* pos_push = PushLocal(current_position_);
1083 PushArgumentInstr* len_push = PushLocal(capture_length_);
1084 BranchOrBacktrack(
1085 Comparison(kGT,
1086 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
1087 pos_push,
1088 len_push),
1089 Uint64Constant(0)),
1090 on_no_match);
1091
1092 // Compute pointers to match string and capture string.
1093 pos_push = PushLocal(current_position_);
1094 len_push = PushLocal(string_param_length_);
1095 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push)));
1096
1097 pos_push = PushLocal(position_register(start_reg));
1098 len_push = PushLocal(string_param_length_);
1099 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push)));
1100
1101 pos_push = PushLocal(match_start_index_);
1102 len_push = PushLocal(capture_length_);
1103 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push)));
1104
1105 BlockLabel loop;
1106 BindBlock(&loop);
1107
1108 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
1109 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
1110
1111 BranchOrBacktrack(Comparison(kNE,
1112 LoadLocal(char_in_capture_),
1113 LoadLocal(char_in_match_)),
1114 on_no_match);
1115
1116 // Increment indexes into capture and match strings.
1117 PushArgumentInstr* index_push = PushLocal(capture_start_index_);
1118 PushArgumentInstr* inc_push = PushArgument(Bind(Uint64Constant(1)));
1119 StoreLocal(capture_start_index_, Bind(Add(index_push, inc_push)));
1120
1121 index_push = PushLocal(match_start_index_);
1122 inc_push = PushArgument(Bind(Uint64Constant(1)));
1123 StoreLocal(match_start_index_, Bind(Add(index_push, inc_push)));
1124
1125 // Check if we have reached end of match area.
1126 BranchOrBacktrack(Comparison(kLT,
1127 LoadLocal(match_start_index_),
1128 LoadLocal(match_end_index_)),
1129 &loop);
1130
1131 BindBlock(&success);
1132
1133 // Move current character position to position after match.
1134 PushArgumentInstr* match_end_push = PushLocal(match_end_index_);
1135 len_push = PushLocal(string_param_length_);
1136 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push)));
1137
1138 BindBlock(&fallthrough);
1139 }
1140
1141
1142 void IRRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
1143 BlockLabel* on_not_equal) {
1144 TAG();
1145 BranchOrBacktrack(Comparison(kNE,
1146 LoadLocal(current_character_),
1147 Uint64Constant(c)),
1148 on_not_equal);
1149 }
1150
1151
1152 void IRRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
1153 uint32_t mask,
1154 BlockLabel* on_equal) {
1155 TAG();
1156
1157 Definition* actual_def = LoadLocal(current_character_);
1158 Definition* expected_def = Uint64Constant(c);
1159
1160 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1161 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1162 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1163 actual_push,
1164 mask_push);
1165
1166 BranchOrBacktrack(Comparison(kEQ, actual_def, expected_def), on_equal);
1167 }
1168
1169
1170 void IRRegExpMacroAssembler::CheckNotCharacterAfterAnd(
1171 uint32_t c,
1172 uint32_t mask,
1173 BlockLabel* on_not_equal) {
1174 TAG();
1175
1176 Definition* actual_def = LoadLocal(current_character_);
1177 Definition* expected_def = Uint64Constant(c);
1178
1179 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1180 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1181 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1182 actual_push,
1183 mask_push);
1184
1185 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1186 }
1187
1188
1189 void IRRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
1190 uint16_t c,
1191 uint16_t minus,
1192 uint16_t mask,
1193 BlockLabel* on_not_equal) {
1194 TAG();
1195 ASSERT(minus < Utf16::kMaxCodeUnit); // NOLINT
1196
1197 Definition* actual_def = LoadLocal(current_character_);
1198 Definition* expected_def = Uint64Constant(c);
1199
1200 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1201 PushArgumentInstr* minus_push = PushArgument(Bind(Uint64Constant(minus)));
1202
1203 actual_push = PushArgument(Bind(Sub(actual_push, minus_push)));
1204 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1205 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1206 actual_push,
1207 mask_push);
1208
1209 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1210 }
1211
1212
1213 void IRRegExpMacroAssembler::CheckCharacterInRange(
1214 uint16_t from,
1215 uint16_t to,
1216 BlockLabel* on_in_range) {
1217 TAG();
1218 ASSERT(from <= to);
1219
1220 // TODO(zerny): All range comparisons could be done cheaper with unsigned
1221 // compares. This pattern repeats in various places.
1222
1223 BlockLabel on_not_in_range;
1224 BranchOrBacktrack(Comparison(kLT,
1225 LoadLocal(current_character_),
1226 Uint64Constant(from)),
1227 &on_not_in_range);
1228 BranchOrBacktrack(Comparison(kGT,
1229 LoadLocal(current_character_),
1230 Uint64Constant(to)),
1231 &on_not_in_range);
1232 BranchOrBacktrack(NULL, on_in_range);
1233
1234 BindBlock(&on_not_in_range);
1235 }
1236
1237
1238 void IRRegExpMacroAssembler::CheckCharacterNotInRange(
1239 uint16_t from,
1240 uint16_t to,
1241 BlockLabel* on_not_in_range) {
1242 TAG();
1243 ASSERT(from <= to);
1244
1245 BranchOrBacktrack(Comparison(kLT,
1246 LoadLocal(current_character_),
1247 Uint64Constant(from)),
1248 on_not_in_range);
1249
1250 BranchOrBacktrack(Comparison(kGT,
1251 LoadLocal(current_character_),
1252 Uint64Constant(to)),
1253 on_not_in_range);
1254 }
1255
1256
1257 void IRRegExpMacroAssembler::CheckBitInTable(
1258 const TypedData& table,
1259 BlockLabel* on_bit_set) {
1260 TAG();
1261
1262 PushArgumentInstr* table_push =
1263 PushArgument(Bind(new(I) ConstantInstr(table)));
1264 PushArgumentInstr* index_push = PushLocal(current_character_);
1265
1266 if (mode_ != ASCII || kTableMask != Symbols::kMaxOneCharCodeSymbol) {
1267 PushArgumentInstr* mask_push =
1268 PushArgument(Bind(Uint64Constant(kTableSize - 1)));
1269 index_push = PushArgument(
1270 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1271 index_push,
1272 mask_push)));
1273 }
1274
1275 Definition* byte_def =
1276 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1277 table_push,
1278 index_push);
1279 Definition* zero_def = Int64Constant(0);
1280
1281 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_bit_set);
1282 }
1283
1284
1285 bool IRRegExpMacroAssembler::CheckSpecialCharacterClass(
1286 uint16_t type,
1287 BlockLabel* on_no_match) {
1288 TAG();
1289
1290 // Range checks (c in min..max) are generally implemented by an unsigned
1291 // (c - min) <= (max - min) check
1292 switch (type) {
1293 case 's':
1294 // Match space-characters
1295 if (mode_ == ASCII) {
1296 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
1297 BlockLabel success;
1298 // Space (' ').
1299 BranchOrBacktrack(Comparison(kEQ,
1300 LoadLocal(current_character_),
1301 Uint64Constant(' ')),
1302 &success);
1303 // Check range 0x09..0x0d.
1304 CheckCharacterInRange('\t', '\r', &success);
1305 // \u00a0 (NBSP).
1306 BranchOrBacktrack(Comparison(kNE,
1307 LoadLocal(current_character_),
1308 Uint64Constant(0x00a0)),
1309 on_no_match);
1310 BindBlock(&success);
1311 return true;
1312 }
1313 return false;
1314 case 'S':
1315 // The emitted code for generic character classes is good enough.
1316 return false;
1317 case 'd':
1318 // Match ASCII digits ('0'..'9')
1319 CheckCharacterNotInRange('0', '9', on_no_match);
1320 return true;
1321 case 'D':
1322 // Match non ASCII-digits
1323 CheckCharacterInRange('0', '9', on_no_match);
1324 return true;
1325 case '.': {
1326 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
1327 BranchOrBacktrack(Comparison(kEQ,
1328 LoadLocal(current_character_),
1329 Uint64Constant('\n')),
1330 on_no_match);
1331 BranchOrBacktrack(Comparison(kEQ,
1332 LoadLocal(current_character_),
1333 Uint64Constant('\r')),
1334 on_no_match);
1335 if (mode_ == UC16) {
1336 BranchOrBacktrack(Comparison(kEQ,
1337 LoadLocal(current_character_),
1338 Uint64Constant(0x2028)),
1339 on_no_match);
1340 BranchOrBacktrack(Comparison(kEQ,
1341 LoadLocal(current_character_),
1342 Uint64Constant(0x2029)),
1343 on_no_match);
1344 }
1345 return true;
1346 }
1347 case 'w': {
1348 if (mode_ != ASCII) {
1349 // Table is 128 entries, so all ASCII characters can be tested.
1350 BranchOrBacktrack(Comparison(kGT,
1351 LoadLocal(current_character_),
1352 Uint64Constant('z')),
1353 on_no_match);
1354 }
1355
1356 PushArgumentInstr* table_push =
1357 PushArgument(Bind(WordCharacterMapConstant()));
1358 PushArgumentInstr* index_push = PushLocal(current_character_);
1359
1360 Definition* byte_def =
1361 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1362 table_push,
1363 index_push);
1364 Definition* zero_def = Int64Constant(0);
1365
1366 BranchOrBacktrack(Comparison(kEQ, byte_def, zero_def), on_no_match);
1367
1368 return true;
1369 }
1370 case 'W': {
1371 BlockLabel done;
1372 if (mode_ != ASCII) {
1373 // Table is 128 entries, so all ASCII characters can be tested.
1374 BranchOrBacktrack(Comparison(kGT,
1375 LoadLocal(current_character_),
1376 Uint64Constant('z')),
1377 &done);
1378 }
1379
1380 // TODO(zerny): Refactor to use CheckBitInTable if possible.
1381
1382 PushArgumentInstr* table_push =
1383 PushArgument(Bind(WordCharacterMapConstant()));
1384 PushArgumentInstr* index_push = PushLocal(current_character_);
1385
1386 Definition* byte_def =
1387 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1388 table_push,
1389 index_push);
1390 Definition* zero_def = Int64Constant(0);
1391
1392 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_no_match);
1393
1394 if (mode_ != ASCII) {
1395 BindBlock(&done);
1396 }
1397 return true;
1398 }
1399 // Non-standard classes (with no syntactic shorthand) used internally.
1400 case '*':
1401 // Match any character.
1402 return true;
1403 case 'n': {
1404 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
1405 // The opposite of '.'.
1406 BlockLabel success;
1407 BranchOrBacktrack(Comparison(kEQ,
1408 LoadLocal(current_character_),
1409 Uint64Constant('\n')),
1410 &success);
1411 BranchOrBacktrack(Comparison(kEQ,
1412 LoadLocal(current_character_),
1413 Uint64Constant('\r')),
1414 &success);
1415 if (mode_ == UC16) {
1416 BranchOrBacktrack(Comparison(kEQ,
1417 LoadLocal(current_character_),
1418 Uint64Constant(0x2028)),
1419 &success);
1420 BranchOrBacktrack(Comparison(kEQ,
1421 LoadLocal(current_character_),
1422 Uint64Constant(0x2029)),
1423 &success);
1424 }
1425 BranchOrBacktrack(NULL, on_no_match);
1426 BindBlock(&success);
1427 return true;
1428 }
1429 // No custom implementation (yet): s(uint16_t), S(uint16_t).
1430 default:
1431 return false;
1432 }
1433 }
1434
1435
1436 void IRRegExpMacroAssembler::Fail() {
1437 TAG();
1438 ASSERT(FAILURE == 0); // Return value for failure is zero.
1439 if (!global()) {
1440 UNREACHABLE(); // Dart regexps are always global.
1441 }
1442 GoTo(exit_block_);
1443 }
1444
1445
1446 void IRRegExpMacroAssembler::IfRegisterGE(intptr_t reg,
1447 intptr_t comparand,
1448 BlockLabel* if_ge) {
1449 TAG();
1450 BranchOrBacktrack(Comparison(kGTE,
1451 LoadLocal(position_register(reg)),
1452 Int64Constant(comparand)),
1453 if_ge);
1454 }
1455
1456
1457 void IRRegExpMacroAssembler::IfRegisterLT(intptr_t reg,
1458 intptr_t comparand,
1459 BlockLabel* if_lt) {
1460 TAG();
1461 BranchOrBacktrack(Comparison(kLT,
1462 LoadLocal(position_register(reg)),
1463 Int64Constant(comparand)),
1464 if_lt);
1465 }
1466
1467
1468 void IRRegExpMacroAssembler::IfRegisterEqPos(intptr_t reg,
1469 BlockLabel* if_eq) {
1470 TAG();
1471 BranchOrBacktrack(Comparison(kEQ,
1472 LoadLocal(position_register(reg)),
1473 LoadLocal(current_position_)),
1474 if_eq);
1475 }
1476
1477
1478 RegExpMacroAssembler::IrregexpImplementation
1479 IRRegExpMacroAssembler::Implementation() {
1480 return kIRImplementation;
1481 }
1482
1483
1484 void IRRegExpMacroAssembler::LoadCurrentCharacter(intptr_t cp_offset,
1485 BlockLabel* on_end_of_input,
1486 bool check_bounds,
1487 intptr_t characters) {
1488 TAG();
1489 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
1490 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
1491 if (check_bounds) {
1492 CheckPosition(cp_offset + characters - 1, on_end_of_input);
1493 }
1494 LoadCurrentCharacterUnchecked(cp_offset, characters);
1495 }
1496
1497
1498 void IRRegExpMacroAssembler::PopCurrentPosition() {
1499 TAG();
1500 StoreLocal(current_position_, PopStack());
1501 }
1502
1503
1504 void IRRegExpMacroAssembler::PopRegister(intptr_t register_index) {
1505 TAG();
1506 ASSERT(register_index < position_registers_.length());
1507 StoreLocal(position_register(register_index), PopStack());
1508 }
1509
1510
1511 void IRRegExpMacroAssembler::PushStack(Definition *definition) {
1512 PushArgumentInstr* stack_push = PushLocal(stack_);
1513 PushArgumentInstr* value_push = PushArgument(Bind(definition));
1514 Do(InstanceCall(InstanceCallDescriptor(Symbols::add()),
1515 stack_push,
1516 value_push));
1517 }
1518
1519
1520 Value* IRRegExpMacroAssembler::PopStack() {
1521 PushArgumentInstr* stack_push = PushLocal(stack_);
1522 return Bind(InstanceCall(InstanceCallDescriptor(Symbols::removeLast()),
1523 stack_push));
1524 }
1525
1526
1527 // Pushes the location corresponding to label to the backtracking stack.
1528 void IRRegExpMacroAssembler::PushBacktrack(BlockLabel* label) {
1529 TAG();
1530
1531 // Ensure that targets of indirect jumps are never accessed through a
1532 // normal control flow instructions by creating a new block for each backtrack
1533 // target.
1534 IndirectEntryInstr* indirect_target = IndirectWithJoinGoto(label->block());
1535
1536 // Add a fake edge from the graph entry for data flow analysis.
1537 entry_block_->AddIndirectEntry(indirect_target);
1538
1539 ConstantInstr* offset = Uint64Constant(indirect_target->indirect_id());
1540 PushStack(offset);
1541 }
1542
1543
1544 void IRRegExpMacroAssembler::PushCurrentPosition() {
1545 TAG();
1546 PushStack(LoadLocal(current_position_));
1547 }
1548
1549
1550 void IRRegExpMacroAssembler::PushRegister(intptr_t register_index) {
1551 TAG();
1552 PushStack(LoadLocal(position_register(register_index)));
1553 }
1554
1555
1556 void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) {
1557 TAG();
1558 StoreLocal(current_position_, Bind(LoadLocal(position_register(reg))));
1559 }
1560
1561 // Resets the size of the stack to the value stored in reg.
1562 void IRRegExpMacroAssembler::ReadStackPointerFromRegister(intptr_t reg) {
1563 TAG();
1564 ASSERT(reg < position_registers_.length());
1565
1566 PushArgumentInstr* stack_push = PushLocal(stack_);
1567 PushArgumentInstr* length_push = PushLocal(position_register(reg));
1568
1569 Do(InstanceCall(
1570 InstanceCallDescriptor(
1571 String::ZoneHandle(I, Field::SetterSymbol(Symbols::Length()))),
1572 stack_push,
1573 length_push));
1574 }
1575
1576 void IRRegExpMacroAssembler::SetCurrentPositionFromEnd(intptr_t by) {
1577 TAG();
1578
1579 BlockLabel after_position;
1580
1581 Definition* cur_pos_def = LoadLocal(current_position_);
1582 Definition* by_value_def = Int64Constant(-by);
1583
1584 BranchOrBacktrack(Comparison(kGTE, cur_pos_def, by_value_def),
1585 &after_position);
1586
1587 StoreLocal(current_position_, Bind(Int64Constant(-by)));
1588
1589 // On RegExp code entry (where this operation is used), the character before
1590 // the current position is expected to be already loaded.
1591 // We have advanced the position, so it's safe to read backwards.
1592 LoadCurrentCharacterUnchecked(-1, 1);
1593
1594 BindBlock(&after_position);
1595 }
1596
1597
1598 void IRRegExpMacroAssembler::SetRegister(intptr_t register_index, intptr_t to) {
1599 TAG();
1600 // Reserved for positions!
1601 ASSERT(register_index >= position_registers_count_);
1602 StoreLocal(position_register(register_index), Bind(Int64Constant(to)));
1603 }
1604
1605
1606 bool IRRegExpMacroAssembler::Succeed() {
1607 TAG();
1608 GoTo(success_block_);
1609 return global();
1610 }
1611
1612
1613 void IRRegExpMacroAssembler::WriteCurrentPositionToRegister(
1614 intptr_t reg, intptr_t cp_offset) {
1615 TAG();
1616
1617 PushArgumentInstr* pos_push = PushLocal(current_position_);
1618 PushArgumentInstr* off_push =
1619 PushArgument(Bind(Int64Constant(cp_offset)));
1620
1621 // Push the negative offset; these are converted to positive string positions
1622 // within the success block.
1623 StoreLocal(position_register(reg), Bind(Add(pos_push, off_push)));
1624 }
1625
1626
1627 void IRRegExpMacroAssembler::ClearRegisters(
1628 intptr_t reg_from, intptr_t reg_to) {
1629 TAG();
1630
1631 ASSERT(reg_from <= reg_to);
1632 ASSERT(reg_to < position_registers_.length());
1633
1634 // In order to clear registers to a final result value of -1, set them to
1635 // (-1 - string length), the offset of -1 from the end of the string.
1636
1637 for (intptr_t reg = reg_from; reg <= reg_to; reg++) {
1638 PushArgumentInstr* minus_one_push =
1639 PushArgument(Bind(Int64Constant(-1)));
1640 PushArgumentInstr* length_push = PushLocal(string_param_length_);
1641
1642 StoreLocal(position_register(reg), Bind(Sub(minus_one_push, length_push)));
1643 }
1644 }
1645
1646
1647 void IRRegExpMacroAssembler::WriteStackPointerToRegister(intptr_t reg) {
1648 TAG();
1649
1650 PushArgumentInstr* stack_push = PushLocal(stack_);
1651 Value* length_value =
1652 Bind(InstanceCall(InstanceCallDescriptor(
1653 String::ZoneHandle(
1654 I, Field::GetterSymbol(Symbols::Length()))),
1655 stack_push));
1656
1657 StoreLocal(position_register(reg), length_value);
1658 }
1659
1660
1661 // Private methods:
1662
1663
1664 void IRRegExpMacroAssembler::CheckPosition(intptr_t cp_offset,
1665 BlockLabel* on_outside_input) {
1666 TAG();
1667 Definition* curpos_def = LoadLocal(current_position_);
1668 Definition* cp_off_def = Int64Constant(-cp_offset);
1669
1670 // If (current_position_ < -cp_offset), we are in bounds.
1671 // Remember, current_position_ is a negative offset from the string end.
1672
1673 BranchOrBacktrack(Comparison(kGTE, curpos_def, cp_off_def),
1674 on_outside_input);
1675 }
1676
1677
1678 void IRRegExpMacroAssembler::BranchOrBacktrack(
1679 ComparisonInstr* comparison,
1680 BlockLabel* true_successor) {
1681 if (comparison == NULL) { // No condition
1682 if (true_successor == NULL) {
1683 Backtrack();
1684 return;
1685 }
1686 GoTo(true_successor);
1687 return;
1688 }
1689
1690 // If no successor block has been passed in, backtrack.
1691 JoinEntryInstr* true_successor_block = backtrack_block_;
1692 if (true_successor != NULL) {
1693 true_successor->SetLinked();
1694 true_successor_block = true_successor->block();
1695 }
1696 ASSERT(true_successor_block != NULL);
1697
1698 // If the condition is not true, fall through to a new block.
1699 BlockLabel fallthrough;
1700
1701 BranchInstr* branch = new(I) BranchInstr(comparison);
1702 *branch->true_successor_address() =
1703 TargetWithJoinGoto(true_successor_block);
1704 *branch->false_successor_address() =
1705 TargetWithJoinGoto(fallthrough.block());
1706
1707 CloseBlockWith(branch);
1708 BindBlock(&fallthrough);
1709 }
1710
1711
1712 TargetEntryInstr* IRRegExpMacroAssembler::TargetWithJoinGoto(
1713 JoinEntryInstr* dst) {
1714 TargetEntryInstr* target = new(I) TargetEntryInstr(
1715 block_id_.Alloc(), kInvalidTryIndex);
1716 blocks_.Add(target);
1717
1718 target->AppendInstruction(new(I) GotoInstr(dst));
1719
1720 return target;
1721 }
1722
1723
1724 IndirectEntryInstr* IRRegExpMacroAssembler::IndirectWithJoinGoto(
1725 JoinEntryInstr* dst) {
1726 IndirectEntryInstr* target = new(I) IndirectEntryInstr(
1727 block_id_.Alloc(), indirect_id_.Alloc(), kInvalidTryIndex);
1728 blocks_.Add(target);
1729
1730 target->AppendInstruction(new(I) GotoInstr(dst));
1731
1732 return target;
1733 }
1734
1735
1736 void IRRegExpMacroAssembler::CheckPreemption() {
1737 TAG();
1738 AppendInstruction(new(I) CheckStackOverflowInstr(kNoSourcePos, 0));
1739 }
1740
1741
1742 Definition* IRRegExpMacroAssembler::Add(
1743 PushArgumentInstr* lhs,
1744 PushArgumentInstr* rhs) {
1745 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD), lhs, rhs);
1746 }
1747
1748
1749 Definition* IRRegExpMacroAssembler::Sub(
1750 PushArgumentInstr* lhs,
1751 PushArgumentInstr* rhs) {
1752 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kSUB), lhs, rhs);
1753 }
1754
1755
1756 void IRRegExpMacroAssembler::LoadCurrentCharacterUnchecked(
1757 intptr_t cp_offset, intptr_t characters) {
1758 TAG();
1759
1760 ASSERT(characters == 1 || CanReadUnaligned());
1761 if (mode_ == ASCII) {
1762 ASSERT(characters == 1 || characters == 2 || characters == 4);
1763 } else {
1764 ASSERT(mode_ == UC16);
1765 ASSERT(characters == 1 || characters == 2);
1766 }
1767
1768 // Bind the pattern as the load receiver.
1769 Value* pattern = BindLoadLocal(*string_param_);
1770
1771 // Calculate the addressed string index as:
1772 // cp_offset + current_position_ + string_param_length_
1773 // TODO(zerny): Avoid generating 'add' instance-calls here.
1774 PushArgumentInstr* off_arg =
1775 PushArgument(Bind(Int64Constant(cp_offset)));
1776 PushArgumentInstr* pos_arg =
1777 PushArgument(BindLoadLocal(*current_position_));
1778 PushArgumentInstr* off_pos_arg =
1779 PushArgument(Bind(Add(off_arg, pos_arg)));
1780 PushArgumentInstr* len_arg =
1781 PushArgument(BindLoadLocal(*string_param_length_));
1782 Value* index = Bind(Add(off_pos_arg, len_arg));
1783
1784 // Load and store the code units.
1785 Value* code_unit_value = LoadCodeUnitsAt(pattern, index, characters);
1786 StoreLocal(current_character_, code_unit_value);
1787 PRINT(PushLocal(current_character_));
1788 }
1789
1790
1791 Value* IRRegExpMacroAssembler::CharacterAt(Definition* index) {
1792 Value* pattern_val = BindLoadLocal(*string_param_);
1793 Value* index_val = Bind(index);
1794 return LoadCodeUnitsAt(pattern_val, index_val, 1);
1795 }
1796
1797
1798 // Note: We can't replace pattern with a load-local of string_param_
1799 // because we need to maintain the stack discipline in unoptimized code.
1800 Value* IRRegExpMacroAssembler::LoadCodeUnitsAt(Value* pattern,
1801 Value* index,
1802 intptr_t characters) {
1803 return Bind(new(I) LoadCodeUnitsInstr(
1804 pattern,
1805 index,
1806 characters,
1807 specialization_cid_,
1808 Scanner::kNoSourcePos));
1809 }
1810
1811
1812 #undef __
22 1813
23 } // namespace dart 1814 } // namespace dart
OLDNEW
« no previous file with comments | « runtime/vm/regexp_assembler.h ('k') | runtime/vm/regexp_ast.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698