Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(441)

Side by Side Diff: runtime/vm/regexp_assembler.cc

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #include "vm/regexp_assembler.h"
6
7 #include "vm/bit_vector.h"
8 #include "vm/compiler.h"
9 #include "vm/dart_entry.h"
10 #include "vm/flow_graph_builder.h"
11 #include "vm/il_printer.h"
12 #include "vm/object_store.h"
13 #include "vm/resolver.h"
14 #include "vm/stack_frame.h"
15 #include "vm/unibrow-inl.h"
16
17 #define I isolate()
18
19 // Debugging output macros. TAG() is called at the head of each interesting
20 // function and prints its name during execution if irregexp tracing is enabled.
21 #define TAG() if (FLAG_trace_irregexp) { TAG_(); }
22 #define TAG_() \
23 Print(PushArgument( \
24 Bind(new(I) ConstantInstr(String::ZoneHandle(I, String::Concat( \
25 String::Handle(String::New("TAG: ")), \
26 String::Handle(String::New(__FUNCTION__)), Heap::kOld))))));
27
28 #define PRINT(arg) if (FLAG_trace_irregexp) { Print(arg); }
29
30 namespace dart {
31
32 DEFINE_FLAG(bool, trace_irregexp, false, "Trace irregexps");
33
34 static const intptr_t kInvalidTryIndex = -1;
35 static const intptr_t kNoTokenPos = -1;
36 static const intptr_t kOffsetNotYetSet = -1;
37
38 /*
39 * This assembler uses the following main local variables:
40 * - stack_: A pointer to a growable list which we use as an all-purpose stack
41 * storing backtracking offsets, positions & stored register values.
42 * - current_character_: Stores the currently loaded characters (possibly more
43 * than one).
44 * - current_position_: The current position within the string, stored as a
45 * negative offset from the end of the string (i.e. the
46 * position corresponding to str[0] is -str.length).
47 * Note that current_position_ is *not* byte-based, unlike
48 * original V8 code.
49 *
50 * Results are returned though an array of capture indices, stored at
51 * matches_param_. A null array specifies a failure to match. The match indices
52 * [start_inclusive, end_exclusive] for capture group i are stored at positions
53 * matches_param_[i * 2] and matches_param_[i * 2 + 1], respectively. Match
54 * indices of -1 denote non-matched groups. Note that we store these indices
55 * as a negative offset from the end of the string in position_registers_
56 * during processing, and convert them to standard indexes when copying them
57 * to matches_param_ on successful match.
58 */
59
60 // The number of parameters of the generated function.
61 static const intptr_t kNumParameters = 3;
62
63 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate)
64 : slow_safe_compiler_(false),
65 global_mode_(NOT_GLOBAL),
66 isolate_(isolate) {
67 }
68
69
70 RegExpMacroAssembler::~RegExpMacroAssembler() {
71 }
72
73
74 IRRegExpMacroAssembler::IRRegExpMacroAssembler(
75 Mode mode,
76 intptr_t capture_count,
77 Isolate* isolate)
78 : RegExpMacroAssembler(isolate),
79 mode_(mode),
80 next_block_id_(0),
81 temp_count_(0),
82 args_pushed_(0),
83 num_stack_locals_(0),
84 current_instruction_(NULL),
85 stack_(NULL),
86 current_character_(NULL),
87 current_position_(NULL),
88 string_param_(NULL),
89 string_param_length_(NULL),
90 start_index_param_(NULL),
91 matches_param_(NULL),
92 position_registers_count_((capture_count + 1) * 2),
93 block_offsets_(GrowableObjectArray::ZoneHandle(
94 isolate, GrowableObjectArray::New(Heap::kOld))) {
95 InitializeLocals();
96
97 // Create and generate all preset blocks.
98 entry_block_ =
99 new(isolate) GraphEntryInstr(
100 NULL,
101 new(isolate) TargetEntryInstr(AllocateBlockId(), kInvalidTryIndex),
102 Isolate::kNoDeoptId);
103 start_block_ =
104 new(isolate) JoinEntryInstr(AllocateBlockId(), kInvalidTryIndex);
105 success_block_ =
106 new(isolate) JoinEntryInstr(AllocateBlockId(), kInvalidTryIndex);
107 backtrack_block_ =
108 new(isolate) JoinEntryInstr(AllocateBlockId(), kInvalidTryIndex);
109 exit_block_ =
110 new(isolate) JoinEntryInstr(AllocateBlockId(), kInvalidTryIndex);
111
112 GenerateEntryBlock();
113 GenerateSuccessBlock();
114 GenerateBacktrackBlock();
115 GenerateExitBlock();
116
117 blocks_.Add(entry_block_);
118 blocks_.Add(entry_block_->normal_entry());
119 blocks_.Add(start_block_);
120 blocks_.Add(success_block_);
121 blocks_.Add(backtrack_block_);
122 blocks_.Add(exit_block_);
123
124 // Begin emission at the start_block_.
125 set_current_instruction(start_block_);
126 }
127
128
129 IRRegExpMacroAssembler::~IRRegExpMacroAssembler() { }
130
131
132 void IRRegExpMacroAssembler::InitializeLocals() {
133 // Create local variables and parameters.
134 stack_ = Local("stack_");
135 current_character_ = Local("current_character_");
Florian Schneider 2014/09/16 11:11:48 Make those strings VM symbols, so that they are sh
jgruber1 2014/09/22 18:58:05 Done. Added variable names, function names, etc. t
136 current_position_ = Local("current_position_");
137 string_param_length_ = Local("string_param_length_");
138 capture_length_ = Local("capture_length_");
139 word_character_map_ = Local("word_character_map_");
140 stack_ptr_ = Local("stack_ptr_");
141 match_start_index_ = Local("match_start_index_");
142 capture_start_index_ = Local("capture_start_index_");
143 match_end_index_ = Local("match_end_index_");
144 char_in_capture_ = Local("char_in_capture_");
145 char_in_match_ = Local("char_in_match_");
146
147 string_param_ = Parameter("string_param_", 0);
148 start_index_param_ = Parameter("start_index_param_", 1);
149 matches_param_ = Parameter("matches_param_", 2);
150
151 // Reserve space for all captured group positions. Note that more might
152 // be created on the fly for internal use.
153 for (intptr_t i = 0; i < position_registers_count_; i++) {
154 position_register(i);
155 }
156 }
157
158
159 void IRRegExpMacroAssembler::GenerateEntryBlock() {
160 set_current_instruction(entry_block_->normal_entry());
161 TAG();
162
163 // Generate a local list variable which we will use as a backtracking stack.
164
165 Definition* type_args_null_def = new(I) ConstantInstr(
166 TypeArguments::ZoneHandle(I, TypeArguments::null()));
167 PushArgumentInstr* type_arg_push = PushArgument(Bind(type_args_null_def));
168
169 const Library& lib = Library::Handle(Library::CoreLibrary());
170 const Class& list_class = Class::Handle(
171 lib.LookupCoreClass(Symbols::List()));
172 const Function& list_ctor = Function::ZoneHandle(I,
173 list_class.LookupFactory(Symbols::ListFactory()));
174
175 StoreLocal(stack_, Bind(StaticCall(list_ctor, type_arg_push)));
176
177 // Store string.length.
178 PushArgumentInstr* string_push = PushArgument(Bind(LoadLocal(string_param_)));
179 StoreLocal(string_param_length_,
180 Bind(InstanceCall("get:length", string_push)));
Florian Schneider 2014/09/16 11:11:48 Have you tried directly using LoadField / StoreFie
Florian Schneider 2014/09/16 11:11:48 String::ZoneHandle(Field::GetterName(Symbols::Leng
jgruber1 2014/09/22 18:58:04 No, since optimization has the same effect and the
jgruber1 2014/09/22 18:58:05 Done.
181
182 // Initialize all capture registers.
183 ClearRegisters(0, position_registers_count_ - 1);
184
185 // Store (start_index - string.length) as the current position (since it's a
186 // negative offset from the end of the string).
187 PushArgumentInstr* start_index_push =
188 PushArgument(Bind(LoadLocal(start_index_param_)));
189 PushArgumentInstr* length_push =
190 PushArgument(Bind(LoadLocal(string_param_length_)));
191
192 StoreLocal(current_position_, Sub(start_index_push, length_push));
193
194 // Look up and store the word character map static field of the RegExp class.
195 const Class& regexp_class = Class::Handle(
196 lib.LookupClassAllowPrivate(
197 String::Handle(Symbols::New("_JSSyntaxRegExp"))));
Florian Schneider 2014/09/16 11:11:48 Just Symbols::JSSyntaxRegExp()
jgruber1 2014/09/22 18:58:05 Done.
198 const Function& word_character_getter = Function::ZoneHandle(I,
199 regexp_class.LookupStaticFunctionAllowPrivate(
200 String::Handle(Field::GetterName(String::Handle(
201 Symbols::New("_wordCharacterMap"))))));
Florian Schneider 2014/09/16 11:11:48 Add this string to the list of VM symbols in symbo
jgruber1 2014/09/22 18:58:05 Done.
202
203 StoreLocal(word_character_map_, Bind(StaticCall(word_character_getter)));
Florian Schneider 2014/09/16 11:11:48 The wordCharacterMap should be embedded as a compi
jgruber1 2014/09/22 18:58:04 Done.
204
205 // Jump to the start block.
206 current_instruction_->Goto(start_block_);
207 }
208
209
210 void IRRegExpMacroAssembler::GenerateBacktrackBlock() {
211 set_current_instruction(backtrack_block_);
212 TAG();
213
214 PushArgumentInstr* block_offsets_push =
215 PushArgument(Bind(new(I) ConstantInstr(block_offsets_)));
216 PushArgumentInstr* block_id_push = PushArgument(PopStack());
217
218 Value* offset_value =
219 Bind(InstanceCall("[]", block_offsets_push, block_id_push));
Florian Schneider 2014/09/16 11:11:49 s/"[]"/Symbols::IndexToken()/
jgruber1 2014/09/22 18:58:04 Done.
220
221 CloseBlockWith(new(I) IndirectGotoInstr(offset_value));
222 }
223
224
225 void IRRegExpMacroAssembler::GenerateSuccessBlock() {
226 set_current_instruction(success_block_);
227 TAG();
228
229 // Store captured offsets in the `matches` parameter.
230 for (intptr_t i = 0; i < position_registers_count_; i++) {
231 PushArgumentInstr* matches_push =
232 PushArgument(Bind(LoadLocal(matches_param_)));
233 PushArgumentInstr* index_push = PushArgument(Bind(Uint64Constant(i)));
234
235 // Convert negative offsets from the end of the string to string indices.
236 PushArgumentInstr* offset_push =
237 PushArgument(Bind(LoadLocal(position_register(i))));
238 PushArgumentInstr* len_push =
239 PushArgument(Bind(LoadLocal(string_param_length_)));
240 PushArgumentInstr* value_push = PushArgument(Add(offset_push, len_push));
241
242 Do(InstanceCall("[]=", matches_push, index_push, value_push));
Florian Schneider 2014/09/16 11:11:49 s/[]=/Symbols::AssignIndexToken()/
jgruber1 2014/09/22 18:58:04 Done.
243 }
244
245 // Print the result if tracing.
246 PRINT(PushArgument(Bind(LoadLocal(matches_param_))));
247
248 // Return true on success.
249 AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(BoolConstant(true))));
250 }
251
252
253 void IRRegExpMacroAssembler::GenerateExitBlock() {
254 set_current_instruction(exit_block_);
255 TAG();
256
257 // Return false on failure.
258 AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(BoolConstant(false))));
259 }
260
261
262 static const bool kEnableUnalignedAccesses = true;
263 bool IRRegExpMacroAssembler::CanReadUnaligned() {
264 return kEnableUnalignedAccesses && !slow_safe();
265 }
266
267
268 IRRegExpMacroAssembler::Result IRRegExpMacroAssembler::Execute(
269 const Function& function,
270 const String& input,
271 const Smi& start_offset,
272 Array* output,
273 Isolate* isolate) {
274 // Create the argument list.
275 const Array& args = Array::ZoneHandle(isolate, Array::New(3));
276 args.SetAt(0, input);
277 args.SetAt(1, start_offset);
278 args.SetAt(2, *output);
279
280 // And finally call the generated code.
281 const Object& retval =
282 Object::Handle(DartEntry::InvokeFunction(function, args));
283 if (retval.IsError()) {
284 const Error& error = Error::Cast(retval);
285 OS::Print("%s\n", error.ToErrorCString());
286 // Should never happen.
287 UNREACHABLE();
Florian Schneider 2014/09/16 11:11:48 Since exceptions can happen in generated code (e.g
jgruber1 2014/09/22 18:58:05 The entry into irregexp matcher functions is now a
288 }
289
290 return (Bool::Cast(retval).value()) ? SUCCESS : FAILURE;
291 }
292
293
294 intptr_t IRRegExpMacroAssembler::CaseInsensitiveCompareUC16(
Florian Schneider 2014/09/16 11:11:48 Where is this function used?
jgruber1 2014/09/22 18:58:05 This was one of the last unported bits. It is now
295 uint8_t* byte_offset1,
296 uint8_t* byte_offset2,
297 size_t byte_length) {
298 // TODO(jgruber): Optimize as single instance. V8 has this as an
299 // isolate member.
300 unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
301 // This function is not allowed to cause a garbage collection.
302 // A GC might move the calling generated code and invalidate the
303 // return address on the stack.
304 ASSERT(byte_length % 2 == 0);
305 uint16_t* substring1 = reinterpret_cast<uint16_t*>(byte_offset1);
306 uint16_t* substring2 = reinterpret_cast<uint16_t*>(byte_offset2);
307 size_t length = byte_length >> 1;
308
309 for (size_t i = 0; i < length; i++) {
310 unibrow::uchar c1 = substring1[i];
311 unibrow::uchar c2 = substring2[i];
312 if (c1 != c2) {
313 unibrow::uchar s1[1] = { c1 };
314 canonicalize.get(c1, '\0', s1);
315 if (s1[0] != c2) {
316 unibrow::uchar s2[1] = { c2 };
317 canonicalize.get(c2, '\0', s2);
318 if (s1[0] != s2[0]) {
319 return 0;
320 }
321 }
322 }
323 }
324 return 1;
325 }
326
327
328 LocalVariable* IRRegExpMacroAssembler::Parameter(const char* name,
Florian Schneider 2014/09/16 11:11:48 const String& name
jgruber1 2014/09/22 18:58:04 Done.
329 intptr_t index) const {
330 const Type& stack_type = Type::ZoneHandle(I, Type::DynamicType());
331 LocalVariable* local = new(I) LocalVariable(
332 kNoTokenPos,
333 String::Handle(Symbols::New(name)),
334 stack_type);
335
336 intptr_t param_frame_index = kParamEndSlotFromFp + kNumParameters - index;
337 local->set_index(param_frame_index);
338
339 return local;
340 }
341
342
343 LocalVariable* IRRegExpMacroAssembler::Local(const char* name) {
Florian Schneider 2014/09/16 11:11:48 const String& name
jgruber1 2014/09/22 18:58:05 Done.
344 const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
345 const String& local_name = String::Handle(Symbols::New(name));
346
347 LocalVariable* local =
348 new(I) LocalVariable(kNoTokenPos, local_name, local_type);
349 local->set_index(GetNextLocalIndex());
350
351 return local;
352 }
353
354
355 ConstantInstr* IRRegExpMacroAssembler::Int64Constant(int64_t value) const {
356 return new(I) ConstantInstr(
357 Integer::ZoneHandle(I, Integer::New(value, Heap::kOld)));
358 }
359
360
361 ConstantInstr* IRRegExpMacroAssembler::Uint64Constant(uint64_t value) const {
362 return new(I) ConstantInstr(
363 Integer::ZoneHandle(I, Integer::NewFromUint64(value, Heap::kOld)));
364 }
365
366
367 ConstantInstr* IRRegExpMacroAssembler::BoolConstant(bool value) const {
368 return new(I) ConstantInstr(value ? Bool::True() : Bool::False());
369 }
370
371
372 ConstantInstr* IRRegExpMacroAssembler::StringConstant(const char* value) const {
373 return new(I) ConstantInstr(
374 String::ZoneHandle(I, String::New(value, Heap::kOld)));
375 }
376
377
378 ComparisonInstr* IRRegExpMacroAssembler::Comparison(
379 ComparisonKind kind, Definition* lhs, Definition* rhs) {
380 bool is_strict_operator = false;
381 const char* intermediate_operator = NULL;
382
383 Token::Kind token_kind = Token::kEQ_STRICT;
384 Value* lhs_value = NULL;
385 Value* rhs_value = NULL;
386
387 switch (kind) {
388 case kEQ:
389 is_strict_operator = true;
390 break;
391 case kNE:
392 token_kind = Token::kNE_STRICT;
393 is_strict_operator = true;
394 break;
395 case kLT:
396 intermediate_operator = "<";
397 break;
398 case kGT:
399 intermediate_operator = ">";
400 break;
401 case kLTE:
402 intermediate_operator = "<=";
403 break;
404 case kGTE:
405 intermediate_operator = ">=";
406 break;
407 default:
408 UNREACHABLE();
409 }
410
411 if (!is_strict_operator) {
412 ASSERT(intermediate_operator != NULL);
413
414 PushArgumentInstr* lhs_push = PushArgument(Bind(lhs));
415 PushArgumentInstr* rhs_push = PushArgument(Bind(rhs));
416
417 lhs_value = Bind(InstanceCall(intermediate_operator, lhs_push, rhs_push));
418 rhs_value = Bind(BoolConstant(true));
419 } else {
420 lhs_value = Bind(lhs);
421 rhs_value = Bind(rhs);
422 }
423
424 return new(I) StrictCompareInstr(kNoTokenPos, token_kind,
425 lhs_value, rhs_value, true);
426 }
427
428
429 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
430 const Function& function) const {
431 ZoneGrowableArray<PushArgumentInstr*>* arguments =
432 new(I) ZoneGrowableArray<PushArgumentInstr*>(0);
433 return StaticCall(function, arguments);
434 }
435
436
437 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
438 const Function& function,
439 PushArgumentInstr* arg1) const {
440 ZoneGrowableArray<PushArgumentInstr*>* arguments =
441 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
442 arguments->Add(arg1);
443
444 return StaticCall(function, arguments);
445 }
446
447
448 StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
449 const Function& function,
450 ZoneGrowableArray<PushArgumentInstr*>* arguments) const {
451 ZoneGrowableArray<const ICData*> ic_data_array;
452 return new(I) StaticCallInstr(kNoTokenPos,
453 function,
454 Object::null_array(),
455 arguments,
456 ic_data_array);
457 }
458
459
460 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
461 const char *name,
Florian Schneider 2014/09/16 11:11:49 const String& name
jgruber1 2014/09/22 18:58:04 These have been refactored to handle # of checked
462 PushArgumentInstr* arg1) const {
463 ZoneGrowableArray<PushArgumentInstr*>* arguments =
464 new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
465 arguments->Add(arg1);
466
467 return InstanceCall(name, arguments);
468 }
469
470
471 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
472 const char *name,
Florian Schneider 2014/09/16 11:11:48 const String& name
jgruber1 2014/09/22 18:58:05 Done.
473 PushArgumentInstr* arg1,
474 PushArgumentInstr* arg2) const {
475 ZoneGrowableArray<PushArgumentInstr*>* arguments =
476 new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
477 arguments->Add(arg1);
478 arguments->Add(arg2);
479
480 return InstanceCall(name, arguments);
481 }
482
483
484 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
485 const char *name,
Florian Schneider 2014/09/16 11:11:48 const String& name
jgruber1 2014/09/22 18:58:04 Done.
486 PushArgumentInstr* arg1,
487 PushArgumentInstr* arg2,
488 PushArgumentInstr* arg3) const {
489 ZoneGrowableArray<PushArgumentInstr*>* arguments =
490 new(I) ZoneGrowableArray<PushArgumentInstr*>(3);
491 arguments->Add(arg1);
492 arguments->Add(arg2);
493 arguments->Add(arg3);
494
495 return InstanceCall(name, arguments);
496 }
497
498
499 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
500 const char *name,
Florian Schneider 2014/09/16 11:11:49 const String& name
jgruber1 2014/09/22 18:58:05 Done.
501 ZoneGrowableArray<PushArgumentInstr*> *arguments) const {
502 ZoneGrowableArray<const ICData*> ic_data_array;
503
504
505 return
506 new(I) InstanceCallInstr(kNoTokenPos,
507 String::ZoneHandle(I, Symbols::New(name)),
Florian Schneider 2014/09/16 11:11:49 Just use name.
jgruber1 2014/09/22 18:58:05 Done.
508 Token::kILLEGAL,
509 arguments,
510 Object::null_array(),
511 1,
512 ic_data_array);
513 }
514
515
516 LoadLocalInstr* IRRegExpMacroAssembler::LoadLocal(LocalVariable* local) const {
517 return new(I) LoadLocalInstr(*local);
518 }
519
520
521 void IRRegExpMacroAssembler::StoreLocal(LocalVariable* local,
522 Value* value) {
523 Do(new(I) StoreLocalInstr(*local, value));
524 }
525
526
527 void IRRegExpMacroAssembler::set_current_instruction(Instruction* instruction) {
528 current_instruction_ = instruction;
529 }
530
531
532 Value* IRRegExpMacroAssembler::Bind(Definition* definition) {
533 AppendInstruction(definition);
534 definition->set_temp_index(AllocateTemp());
535
536 return new(I) Value(definition);
537 }
538
539
540 void IRRegExpMacroAssembler::Do(Definition* definition) {
541 AppendInstruction(definition);
542 }
543
544
545 void IRRegExpMacroAssembler::AppendInstruction(Instruction* instruction) {
546 ASSERT(current_instruction_ != NULL);
547 ASSERT(current_instruction_->next() == NULL);
548
549 DeallocateTemps(instruction->InputCount());
550 add_args_pushed(-instruction->ArgumentCount());
551
552 current_instruction_->LinkTo(instruction);
553 set_current_instruction(instruction);
554 }
555
556
557 void IRRegExpMacroAssembler::CloseBlockWith(Instruction* instruction) {
558 ASSERT(current_instruction_ != NULL);
559 ASSERT(current_instruction_->next() == NULL);
560
561 DeallocateTemps(instruction->InputCount());
562 add_args_pushed(-instruction->ArgumentCount());
563
564 current_instruction_->LinkTo(instruction);
565 set_current_instruction(NULL);
566 }
567
568
569 // Jumps to the target block and sets it as the target for continued emission.
570 void IRRegExpMacroAssembler::GoTo(BlockLabel* to) {
571 ASSERT(current_instruction_ != NULL);
572 ASSERT(current_instruction_->next() == NULL);
573 ASSERT(to->block()->next() == NULL);
574 to->SetLinked();
575 current_instruction_->Goto(to->block());
576 set_current_instruction(to->block());
577 }
578
579
580 void IRRegExpMacroAssembler::Jump(BlockLabel* to) {
581 if (to == NULL) {
582 Backtrack();
583 } else {
584 to->SetLinked();
585 Jump(to->block());
586 }
587 }
588
589
590 // Closes the current block with a goto, and unsets current_instruction_.
591 // BindBlock() must be called before emission can continue.
592 void IRRegExpMacroAssembler::Jump(JoinEntryInstr* to) {
593 ASSERT(current_instruction_ != NULL);
594 ASSERT(current_instruction_->next() == NULL);
595 current_instruction_->Goto(to);
596 set_current_instruction(NULL);
597 }
598
599
600 PushArgumentInstr* IRRegExpMacroAssembler::PushArgument(Value* value) {
601 add_args_pushed(1);
602 PushArgumentInstr* push = new(I) PushArgumentInstr(value);
603 // Do *not* use Do() for push argument instructions.
604 AppendInstruction(push);
605 return push;
606 }
607
608
609 void IRRegExpMacroAssembler::Print(const char* str) {
610 Print(PushArgument(
611 Bind(new(I) ConstantInstr(
612 String::ZoneHandle(I, String::New(str, Heap::kOld))))));
613 }
614
615
616 void IRRegExpMacroAssembler::Print(PushArgumentInstr* argument) {
617 const Library& lib = Library::Handle(Library::CoreLibrary());
618 const Function& print_fn = Function::ZoneHandle(I,
619 lib.LookupFunctionAllowPrivate(String::Handle(Symbols::New("print"))));
620 Do(StaticCall(print_fn, argument));
621 }
622
623
624 void IRRegExpMacroAssembler::PrintBlocks() {
625 for (intptr_t i = 0; i < blocks_.length(); i++) {
626 FlowGraphPrinter::PrintBlock(blocks_[i], false);
627 }
628 }
629
630
631 intptr_t IRRegExpMacroAssembler::stack_limit_slack() {
632 return 32;
633 }
634
635
636 void IRRegExpMacroAssembler::AdvanceCurrentPosition(intptr_t by) {
637 TAG();
638 if (by != 0) {
639 PushArgumentInstr* cur_pos_push =
640 PushArgument(Bind(LoadLocal(current_position_)));
641
642 PushArgumentInstr* by_push =
643 PushArgument(Bind(Int64Constant(by)));
644
645 Value* new_pos_value = Add(cur_pos_push, by_push);
646 StoreLocal(current_position_, new_pos_value);
647 }
648 }
649
650
651 void IRRegExpMacroAssembler::AdvanceRegister(intptr_t reg, intptr_t by) {
652 TAG();
653 ASSERT(reg >= 0);
654 ASSERT(reg < position_registers_.length());
655
656 if (by != 0) {
657 PushArgumentInstr* reg_push =
658 PushArgument(Bind(LoadLocal(position_register(reg))));
659 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
660 StoreLocal(position_register(reg), Add(reg_push, by_push));
661 }
662 }
663
664
665 void IRRegExpMacroAssembler::Backtrack() {
666 TAG();
667 CheckPreemption();
668
669 // TODO(jgruber): Duplicate code in GenerateBacktrackBlock, refactor.
670 PushArgumentInstr* block_offsets_push =
671 PushArgument(Bind(new(I) ConstantInstr(block_offsets_)));
672 PushArgumentInstr* block_id_push = PushArgument(PopStack());
673
674 Value* offset_value =
675 Bind(InstanceCall("[]", block_offsets_push, block_id_push));
676
677 CloseBlockWith(new(I) IndirectGotoInstr(offset_value));
678 }
679
680
681 // A BindBlock is analogous to assigning a label to a basic block.
682 // If the BlockLabel does not yet contain a block, it is created.
683 // If there is a current instruction, append a goto to the bound block.
684 void IRRegExpMacroAssembler::BindBlock(BlockLabel* label) {
685 ASSERT(!label->IsBound());
686 ASSERT(label->block()->next() == NULL);
687
688 label->SetBound(AllocateBlockId());
689 blocks_.Add(label->block());
690
691 if (current_instruction_ == NULL) {
692 set_current_instruction(label->block());
693 } else {
694 GoTo(label);
695 }
696
697 // Print the id of the current block if tracing.
698 PRINT(PushArgument(Bind(Uint64Constant(label->block()->block_id()))));
699 }
700
701
702 intptr_t IRRegExpMacroAssembler::GetNextLocalIndex() {
703 intptr_t local_id = AllocateStackLocal();
704 return kFirstLocalSlotFromFp - local_id;
705 }
706
707
708 LocalVariable* IRRegExpMacroAssembler::position_register(intptr_t index) {
709 const char name_prefix[] = "pos";
710 char name[sizeof(name_prefix) + 5];
711
712 // Create position registers as needed.
713 for (intptr_t i = position_registers_.length(); i < index + 1; i++) {
714 OS::SNPrint(name, sizeof(name), "%s%05d", name_prefix, i);
715 position_registers_.Add(Local(name));
716 }
717
718 return position_registers_[index];
719 }
720
721
722 void IRRegExpMacroAssembler::AttachIndirectTargets() {
723 BitVector* attached_blocks = new(I) BitVector(next_block_id_);
724 for (intptr_t i = 0; i < backtrack_references_.length(); i++) {
725 JoinEntryInstr* block = backtrack_references_[i].block;
726 if (attached_blocks->Contains(block->block_id())) {
727 continue;
728 }
729 entry_block_->AddIndirectTargetEntry(block);
730 attached_blocks->Add(block->block_id());
731 }
732 }
733
734
735 void IRRegExpMacroAssembler::RewriteBacktrackPushes() {
736 for (intptr_t i = 0; i < backtrack_references_.length(); i++) {
737 const BacktrackReference& bref = backtrack_references_[i];
738
739 // Replace the fake pushed value now that we definitely have a block id.
740 uint64_t block_id = bref.block->block_id();
741 ConstantInstr* id_constant = Uint64Constant(block_id);
742 id_constant->set_temp_index(bref.reference->temp_index());
743
744 bref.reference->previous()->LinkTo(id_constant);
745 id_constant->LinkTo(bref.reference->next());
746 }
747 }
748
749
750 void IRRegExpMacroAssembler::FinalizeBlockOffsetTable(
751 const GrowableArray<BlockEntryInstr*>& blocks) {
752 block_offsets_.Grow(next_block_id_, Heap::kOld);
753 block_offsets_.SetLength(next_block_id_);
754 for (intptr_t i = 0; i < blocks.length(); i++) {
755 BlockEntryInstr* block = blocks[i];
756 block_offsets_.SetAt(block->block_id(),
757 Smi::ZoneHandle(I, Smi::New(block->offset())));
758 }
759 }
760
761
762 void IRRegExpMacroAssembler::CheckCharacter(uint32_t c, BlockLabel* on_equal) {
Florian Schneider 2014/09/16 11:11:49 Isn't characters uint16_t as most?
jgruber1 2014/09/22 18:58:05 No, at present up to 4 ASCII or 2 UTF16 code units
763 TAG();
764 Definition* cur_char_def = LoadLocal(current_character_);
765 Definition* char_def = Uint64Constant(c);
766
767 BranchOrBacktrack(Comparison(kEQ, cur_char_def, char_def),
768 on_equal);
769 }
770
771
772 void IRRegExpMacroAssembler::CheckCharacterGT(uint16_t limit,
773 BlockLabel* on_greater) {
774 TAG();
775 BranchOrBacktrack(Comparison(kGT,
776 LoadLocal(current_character_),
777 Uint64Constant(limit)),
778 on_greater);
779 }
780
781
782 void IRRegExpMacroAssembler::CheckAtStart(BlockLabel* on_at_start) {
783 TAG();
784
785 BlockLabel not_at_start;
786
787 // Did we start the match at the start of the string at all?
788 BranchOrBacktrack(Comparison(kNE,
789 LoadLocal(start_index_param_),
790 Uint64Constant(0)),
791 &not_at_start);
792
793 // If we did, are we still at the start of the input, i.e. is
794 // (offset == string_length * -1)?
795 Definition* neg_len_def =
796 InstanceCall("unary-",
Florian Schneider 2014/09/16 11:11:48 If not present, add unary- to VM symbols in symbol
jgruber1 2014/09/22 18:58:04 Done.
797 PushArgument(Bind(LoadLocal(string_param_length_))));
798 Definition* offset_def = LoadLocal(current_position_);
799 BranchOrBacktrack(Comparison(kEQ, neg_len_def, offset_def),
800 on_at_start);
801
802 BindBlock(&not_at_start);
803 }
804
805
806 void IRRegExpMacroAssembler::CheckNotAtStart(BlockLabel* on_not_at_start) {
807 TAG();
808
809 // Did we start the match at the start of the string at all?
810 BranchOrBacktrack(Comparison(kNE,
811 LoadLocal(start_index_param_),
812 Uint64Constant(0)),
813 on_not_at_start);
814
815 // If we did, are we still at the start of the input, i.e. is
816 // (offset == string_length * -1)?
817 Definition* neg_len_def =
818 InstanceCall("unary-",
Florian Schneider 2014/09/16 11:11:48 If not present, add unary- to VM symbols in symbol
jgruber1 2014/09/22 18:58:05 Done.
819 PushArgument(Bind(LoadLocal(string_param_length_))));
820 Definition* offset_def = LoadLocal(current_position_);
821 BranchOrBacktrack(Comparison(kNE, neg_len_def, offset_def),
822 on_not_at_start);
823 }
824
825
826 void IRRegExpMacroAssembler::CheckCharacterLT(uint16_t limit,
827 BlockLabel* on_less) {
828 TAG();
829 BranchOrBacktrack(Comparison(kLT,
830 LoadLocal(current_character_),
831 Uint64Constant(limit)),
832 on_less);
833 }
834
835
836 void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) {
837 TAG();
838
839 BlockLabel fallthrough;
840
841 PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
842 Definition* stack_tip_def = InstanceCall("get:last", stack_push);
843 Definition* cur_pos_def = LoadLocal(current_position_);
844
845 BranchOrBacktrack(Comparison(kNE, stack_tip_def, cur_pos_def),
846 &fallthrough);
847
848 // Pop, throwing away the value.
849 stack_push = PushArgument(Bind(LoadLocal(stack_)));
850 Do(InstanceCall("removeLast", stack_push));
851
852 BranchOrBacktrack(NULL, on_equal);
853
854 BindBlock(&fallthrough);
855 }
856
857
858 void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
859 intptr_t start_reg,
860 BlockLabel* on_no_match) {
861 TAG();
862 ASSERT(start_reg + 1 <= position_registers_.length());
863
864 BlockLabel fallthrough;
865
866 PushArgumentInstr* end_push =
867 PushArgument(Bind(LoadLocal(position_register(start_reg + 1))));
868 PushArgumentInstr* start_push =
869 PushArgument(Bind(LoadLocal(position_register(start_reg))));
870 StoreLocal(capture_length_, Sub(end_push, start_push));
871
872 // The length of a capture should not be negative. This can only happen
873 // if the end of the capture is unrecorded, or at a point earlier than
874 // the start of the capture.
875 // BranchOrBacktrack(less, on_no_match);
876
877 BranchOrBacktrack(Comparison(kLT,
878 LoadLocal(capture_length_),
879 Uint64Constant(0)),
880 on_no_match);
881
882 // If length is zero, either the capture is empty or it is completely
883 // uncaptured. In either case succeed immediately.
884 BranchOrBacktrack(Comparison(kEQ,
885 LoadLocal(capture_length_),
886 Uint64Constant(0)),
887 &fallthrough);
888
889
890 // Check that there are sufficient characters left in the input.
891 PushArgumentInstr* pos_push =
892 PushArgument(Bind(LoadLocal(current_position_)));
893 PushArgumentInstr* len_push = PushArgument(Bind(LoadLocal(capture_length_)));
894 BranchOrBacktrack(Comparison(kGT,
895 InstanceCall("+", pos_push, len_push),
896 Uint64Constant(0)),
897 on_no_match);
898
899
900 if (mode_ == ASCII) {
901 BlockLabel success;
902 BlockLabel fail;
903 BlockLabel loop_increment;
904
905 pos_push = PushArgument(Bind(LoadLocal(current_position_)));
906 len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
907 StoreLocal(match_start_index_, Add(pos_push, len_push));
908
909 pos_push = PushArgument(Bind(LoadLocal(position_register(start_reg))));
910 len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
911 StoreLocal(capture_start_index_, Add(pos_push, len_push));
912
913 pos_push = PushArgument(Bind(LoadLocal(match_start_index_)));
914 len_push = PushArgument(Bind(LoadLocal(capture_length_)));
915 StoreLocal(match_end_index_, Add(pos_push, len_push));
916
917 BlockLabel loop;
918 BindBlock(&loop);
919
920 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
921 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
922
923 BranchOrBacktrack(Comparison(kEQ,
924 LoadLocal(char_in_capture_),
925 LoadLocal(char_in_match_)),
926 &loop_increment);
927
928 // Mismatch, try case-insensitive match (converting letters to lower-case).
929 PushArgumentInstr* match_char_push =
930 PushArgument(Bind(LoadLocal(char_in_match_)));
931 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(0x20)));
932 StoreLocal(char_in_match_,
933 Bind(InstanceCall("|", match_char_push, mask_push)));
934
935 BlockLabel convert_capture;
936 BlockLabel on_not_in_range;
937 BranchOrBacktrack(Comparison(kLT,
938 LoadLocal(char_in_match_),
939 Uint64Constant('a')),
940 &on_not_in_range);
941 BranchOrBacktrack(Comparison(kGT,
942 LoadLocal(char_in_match_),
943 Uint64Constant('z')),
944 &on_not_in_range);
945 Jump(&convert_capture);
946 BindBlock(&on_not_in_range);
947
948 // Latin-1: Check for values in range [224,254] but not 247.
949 BranchOrBacktrack(Comparison(kLT,
950 LoadLocal(char_in_match_),
951 Uint64Constant(224)),
952 &fail);
953 BranchOrBacktrack(Comparison(kGT,
954 LoadLocal(char_in_match_),
955 Uint64Constant(254)),
956 &fail);
957
958 BranchOrBacktrack(Comparison(kEQ,
959 LoadLocal(char_in_match_),
960 Uint64Constant(247)),
961 &fail);
962
963 // Also convert capture character.
964 BindBlock(&convert_capture);
965
966 PushArgumentInstr* capture_char_push =
967 PushArgument(Bind(LoadLocal(char_in_capture_)));
968 mask_push = PushArgument(Bind(Uint64Constant(0x20)));
969 StoreLocal(char_in_capture_,
970 Bind(InstanceCall("|", capture_char_push, mask_push)));
971
972 BranchOrBacktrack(Comparison(kNE,
973 LoadLocal(char_in_match_),
974 LoadLocal(char_in_capture_)),
975 &fail);
976
977 BindBlock(&loop_increment);
978
979 // Increment pointers into match and capture strings.
980 StoreLocal(capture_start_index_, Add(
981 PushArgument(Bind(LoadLocal(capture_start_index_))),
982 PushArgument(Bind(Uint64Constant(1)))));
983 StoreLocal(match_start_index_, Add(
984 PushArgument(Bind(LoadLocal(match_start_index_))),
985 PushArgument(Bind(Uint64Constant(1)))));
986
987 // Compare to end of match, and loop if not done.
988 BranchOrBacktrack(Comparison(kLT,
989 LoadLocal(match_start_index_),
990 LoadLocal(match_end_index_)),
991 &loop);
992 Jump(&success);
993
994 // TODO(jgruber): If we don't need to restore edi and the stack ptr,
995 // remove fail.
996 BindBlock(&fail);
997 BranchOrBacktrack(NULL, on_no_match);
998
999 BindBlock(&success);
1000
1001 // Move current character position to position after match.
1002 PushArgumentInstr* match_end_push =
1003 PushArgument(Bind(LoadLocal(match_end_index_)));
1004 len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
1005 StoreLocal(current_position_, Sub(match_end_push, len_push));
1006 } else {
1007 ASSERT(mode_ == UC16);
1008 UNIMPLEMENTED();
1009 }
1010
1011 BindBlock(&fallthrough);
1012 }
1013
1014
1015 void IRRegExpMacroAssembler::CheckNotBackReference(
1016 intptr_t start_reg,
1017 BlockLabel* on_no_match) {
1018 TAG();
1019 ASSERT(start_reg + 1 <= position_registers_.length());
1020
1021 BlockLabel fallthrough;
1022 BlockLabel success;
1023 BlockLabel fail;
1024
1025 // Find length of back-referenced capture.
1026 PushArgumentInstr* end_push =
1027 PushArgument(Bind(LoadLocal(position_register(start_reg + 1))));
1028 PushArgumentInstr* start_push =
1029 PushArgument(Bind(LoadLocal(position_register(start_reg))));
1030 StoreLocal(capture_length_, Sub(end_push, start_push));
1031
1032 // Fail on partial or illegal capture (start of capture after end of capture).
1033 BranchOrBacktrack(Comparison(kLT,
1034 LoadLocal(capture_length_),
1035 Uint64Constant(0)),
1036 on_no_match);
1037
1038 // Succeed on empty capture (including no capture)
1039 BranchOrBacktrack(Comparison(kEQ,
1040 LoadLocal(capture_length_),
1041 Uint64Constant(0)),
1042 &fallthrough);
1043
1044 // Check that there are sufficient characters left in the input.
1045 PushArgumentInstr* pos_push =
1046 PushArgument(Bind(LoadLocal(current_position_)));
1047 PushArgumentInstr* len_push = PushArgument(Bind(LoadLocal(capture_length_)));
1048 BranchOrBacktrack(Comparison(kGT,
1049 InstanceCall("+", pos_push, len_push),
1050 Uint64Constant(0)),
1051 on_no_match);
1052
1053 // Save register to make it available below.
1054 // TODO(jgruber): The original purpose was probably to free up a register for
1055 // use, so we don't need to do this.
1056 PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
1057 StoreLocal(stack_ptr_, Bind(InstanceCall("get:length", stack_push)));
1058
1059 // Compute pointers to match string and capture string.
1060 pos_push = PushArgument(Bind(LoadLocal(current_position_)));
1061 len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
1062 StoreLocal(match_start_index_, Add(pos_push, len_push));
1063
1064 pos_push = PushArgument(Bind(LoadLocal(position_register(start_reg))));
1065 len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
1066 StoreLocal(capture_start_index_, Add(pos_push, len_push));
1067
1068 pos_push = PushArgument(Bind(LoadLocal(match_start_index_)));
1069 len_push = PushArgument(Bind(LoadLocal(capture_length_)));
1070 StoreLocal(match_end_index_, Add(pos_push, len_push));
1071
1072 BlockLabel loop;
1073 BindBlock(&loop);
1074
1075 StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
1076 StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
1077
1078 BranchOrBacktrack(Comparison(kNE,
1079 LoadLocal(char_in_capture_),
1080 LoadLocal(char_in_match_)),
1081 &fail);
1082
1083 // Increment pointers into capture and match string.
1084 StoreLocal(capture_start_index_, Add(
1085 PushArgument(Bind(LoadLocal(capture_start_index_))),
1086 PushArgument(Bind(Uint64Constant(1)))));
1087 StoreLocal(match_start_index_, Add(
1088 PushArgument(Bind(LoadLocal(match_start_index_))),
1089 PushArgument(Bind(Uint64Constant(1)))));
1090
1091 // Check if we have reached end of match area.
1092 BranchOrBacktrack(Comparison(kLT,
1093 LoadLocal(match_start_index_),
1094 LoadLocal(match_end_index_)),
1095 &loop);
1096
1097 Jump(&success);
1098
1099 BindBlock(&fail);
1100
1101 // Restore backtrack stackpointer.
1102 stack_push = PushArgument(Bind(LoadLocal(stack_)));
1103 PushArgumentInstr* stack_ptr_push = PushArgument(Bind(LoadLocal(stack_ptr_)));
1104 Do(InstanceCall("set:length", stack_push, stack_ptr_push));
1105
1106 BranchOrBacktrack(NULL, on_no_match);
1107
1108 BindBlock(&success);
1109
1110 // Move current character position to position after match.
1111 PushArgumentInstr* match_end_push =
1112 PushArgument(Bind(LoadLocal(match_end_index_)));
1113 len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
1114 StoreLocal(current_position_, Sub(match_end_push, len_push));
1115
1116 // Restore backtrack stackpointer.
1117 stack_push = PushArgument(Bind(LoadLocal(stack_)));
1118 stack_ptr_push = PushArgument(Bind(LoadLocal(stack_ptr_)));
1119 Do(InstanceCall("set:length", stack_push, stack_ptr_push));
1120
1121 BindBlock(&fallthrough);
1122 }
1123
1124
1125 void IRRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
1126 BlockLabel* on_not_equal) {
1127 TAG();
1128 BranchOrBacktrack(Comparison(kNE,
1129 LoadLocal(current_character_),
1130 Uint64Constant(c)),
1131 on_not_equal);
1132 }
1133
1134
1135 void IRRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
1136 uint32_t mask,
1137 BlockLabel* on_equal) {
1138 TAG();
1139
1140 Definition* actual_def = LoadLocal(current_character_);
1141 Definition* expected_def = Uint64Constant(c);
1142
1143 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1144 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1145 actual_def = InstanceCall("&", actual_push, mask_push);
1146
1147 BranchOrBacktrack(Comparison(kEQ, actual_def, expected_def), on_equal);
1148 }
1149
1150
1151 void IRRegExpMacroAssembler::CheckNotCharacterAfterAnd(
1152 uint32_t c,
1153 uint32_t mask,
1154 BlockLabel* on_not_equal) {
1155 TAG();
1156
1157 Definition* actual_def = LoadLocal(current_character_);
1158 Definition* expected_def = Uint64Constant(c);
1159
1160 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
1161 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
1162 actual_def = InstanceCall("&", actual_push, mask_push);
1163
1164 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1165 }
1166
1167
1168 void IRRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
1169 uint16_t c,
1170 uint16_t minus,
1171 uint16_t mask,
1172 BlockLabel* on_not_equal) {
1173 UNIMPLEMENTED();
1174 }
1175
1176
1177 void IRRegExpMacroAssembler::CheckCharacterInRange(
1178 uint16_t from,
1179 uint16_t to,
1180 BlockLabel* on_in_range) {
1181 TAG();
1182 ASSERT(from <= to);
1183
1184 // TODO(jgruber): All range comparisons could be done cheaper with unsigned
1185 // compares. This pattern repeats in various places.
1186
1187 BlockLabel on_not_in_range;
1188 BranchOrBacktrack(Comparison(kLT,
1189 LoadLocal(current_character_),
1190 Uint64Constant(from)),
1191 &on_not_in_range);
1192 BranchOrBacktrack(Comparison(kGT,
1193 LoadLocal(current_character_),
1194 Uint64Constant(to)),
1195 &on_not_in_range);
1196 BranchOrBacktrack(NULL, on_in_range);
1197
1198 BindBlock(&on_not_in_range);
1199 }
1200
1201
1202 void IRRegExpMacroAssembler::CheckCharacterNotInRange(
1203 uint16_t from,
1204 uint16_t to,
1205 BlockLabel* on_not_in_range) {
1206 TAG();
1207 ASSERT(from <= to);
1208
1209 BranchOrBacktrack(Comparison(kLT,
1210 LoadLocal(current_character_),
1211 Uint64Constant(from)),
1212 on_not_in_range);
1213
1214 BranchOrBacktrack(Comparison(kGT,
1215 LoadLocal(current_character_),
1216 Uint64Constant(to)),
1217 on_not_in_range);
1218 }
1219
1220
1221 void IRRegExpMacroAssembler::CheckBitInTable(
1222 const TypedData& table,
1223 BlockLabel* on_bit_set) {
1224 TAG();
1225
1226 PushArgumentInstr* table_push =
1227 PushArgument(Bind(new(I) ConstantInstr(table)));
1228 PushArgumentInstr* index_push =
1229 PushArgument(Bind(LoadLocal(current_character_)));
1230
1231 if (mode_ != ASCII || kTableMask != Symbols::kMaxOneCharCodeSymbol) {
1232 PushArgumentInstr* mask_push =
1233 PushArgument(Bind(Uint64Constant(kTableSize - 1)));
1234 index_push = PushArgument(Bind(InstanceCall("&", index_push, mask_push)));
1235 }
1236
1237 Definition* byte_def = InstanceCall("[]", table_push, index_push);
1238 Definition* zero_def = Int64Constant(0);
1239
1240 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_bit_set);
1241 }
1242
1243
1244 bool IRRegExpMacroAssembler::CheckSpecialCharacterClass(
1245 uint16_t type,
1246 BlockLabel* on_no_match) {
1247 TAG();
1248
1249 // Range checks (c in min..max) are generally implemented by an unsigned
1250 // (c - min) <= (max - min) check
1251 switch (type) {
1252 case 's':
1253 // Match space-characters
1254 if (mode_ == ASCII) {
1255 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
1256 BlockLabel success;
1257 // Space (' ').
1258 BranchOrBacktrack(Comparison(kEQ,
1259 LoadLocal(current_character_),
1260 Uint64Constant(' ')),
1261 &success);
1262 // Check range 0x09..0x0d.
1263 CheckCharacterInRange('\t', '\r', &success);
1264 // \u00a0 (NBSP).
1265 BranchOrBacktrack(Comparison(kNE,
1266 LoadLocal(current_character_),
1267 Uint64Constant(0x00a0)),
1268 on_no_match);
1269 BindBlock(&success);
1270 return true;
1271 }
1272 return false;
1273 case 'S':
1274 // The emitted code for generic character classes is good enough.
1275 return false;
1276 case 'd':
1277 // Match ASCII digits ('0'..'9')
1278 CheckCharacterNotInRange('0', '9', on_no_match);
1279 return true;
1280 case 'D':
1281 // Match non ASCII-digits
1282 CheckCharacterInRange('0', '9', on_no_match);
1283 return true;
1284 case '.': {
1285 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
1286 BranchOrBacktrack(Comparison(kEQ,
1287 LoadLocal(current_character_),
1288 Uint64Constant('\n')),
1289 on_no_match);
1290 BranchOrBacktrack(Comparison(kEQ,
1291 LoadLocal(current_character_),
1292 Uint64Constant('\r')),
1293 on_no_match);
1294 if (mode_ == UC16) {
1295 BranchOrBacktrack(Comparison(kEQ,
1296 LoadLocal(current_character_),
1297 Uint64Constant(0x2028)),
1298 on_no_match);
1299 BranchOrBacktrack(Comparison(kEQ,
1300 LoadLocal(current_character_),
1301 Uint64Constant(0x2029)),
1302 on_no_match);
1303 }
1304 return true;
1305 }
1306 case 'w': {
1307 if (mode_ != ASCII) {
1308 // Table is 128 entries, so all ASCII characters can be tested.
1309 BranchOrBacktrack(Comparison(kGT,
1310 LoadLocal(current_character_),
1311 Uint64Constant('z')),
1312 on_no_match);
1313 }
1314
1315 PushArgumentInstr* table_push =
1316 PushArgument(Bind(LoadLocal(word_character_map_)));
1317 PushArgumentInstr* index_push =
1318 PushArgument(Bind(LoadLocal(current_character_)));
1319
1320 Definition* byte_def = InstanceCall("[]", table_push, index_push);
1321 Definition* zero_def = Int64Constant(0);
1322
1323 BranchOrBacktrack(Comparison(kEQ, byte_def, zero_def), on_no_match);
1324
1325 return true;
1326 }
1327 case 'W': {
1328 BlockLabel done;
1329 if (mode_ != ASCII) {
1330 // Table is 128 entries, so all ASCII characters can be tested.
1331 BranchOrBacktrack(Comparison(kGT,
1332 LoadLocal(current_character_),
1333 Uint64Constant('z')),
1334 &done);
1335 }
1336
1337 // TODO(jgruber): Refactor to use CheckBitInTable if possible.
1338
1339 PushArgumentInstr* table_push =
1340 PushArgument(Bind(LoadLocal(word_character_map_)));
1341 PushArgumentInstr* index_push =
1342 PushArgument(Bind(LoadLocal(current_character_)));
1343
1344 Definition* byte_def = InstanceCall("[]", table_push, index_push);
1345 Definition* zero_def = Int64Constant(0);
1346
1347 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_no_match);
1348
1349 if (mode_ != ASCII) {
1350 BindBlock(&done);
1351 }
1352 return true;
1353 }
1354 // Non-standard classes (with no syntactic shorthand) used internally.
1355 case '*':
1356 // Match any character.
1357 return true;
1358 case 'n': {
1359 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
1360 // The opposite of '.'.
1361 BlockLabel success;
1362 BranchOrBacktrack(Comparison(kEQ,
1363 LoadLocal(current_character_),
1364 Uint64Constant('\n')),
1365 &success);
1366 BranchOrBacktrack(Comparison(kEQ,
1367 LoadLocal(current_character_),
1368 Uint64Constant('\r')),
1369 &success);
1370 if (mode_ == UC16) {
1371 BranchOrBacktrack(Comparison(kEQ,
1372 LoadLocal(current_character_),
1373 Uint64Constant(0x2028)),
1374 &success);
1375 BranchOrBacktrack(Comparison(kEQ,
1376 LoadLocal(current_character_),
1377 Uint64Constant(0x2029)),
1378 &success);
1379 }
1380 BranchOrBacktrack(NULL, on_no_match);
1381 BindBlock(&success);
1382 return true;
1383 }
1384 // No custom implementation (yet): s(uint16_t), S(uint16_t).
1385 default:
1386 return false;
1387 }
1388 }
1389
1390
1391 void IRRegExpMacroAssembler::Fail() {
1392 TAG();
1393 ASSERT(FAILURE == 0); // Return value for failure is zero.
1394 if (!global()) {
1395 UNREACHABLE(); // Dart regexps are always global.
1396 }
1397 Jump(exit_block_);
1398 }
1399
1400
1401 void IRRegExpMacroAssembler::IfRegisterGE(intptr_t reg,
1402 intptr_t comparand,
1403 BlockLabel* if_ge) {
1404 TAG();
1405 BranchOrBacktrack(Comparison(kGTE,
1406 LoadLocal(position_register(reg)),
1407 Int64Constant(comparand)),
1408 if_ge);
1409 }
1410
1411
1412 void IRRegExpMacroAssembler::IfRegisterLT(intptr_t reg,
1413 intptr_t comparand,
1414 BlockLabel* if_lt) {
1415 TAG();
1416 BranchOrBacktrack(Comparison(kLT,
1417 LoadLocal(position_register(reg)),
1418 Int64Constant(comparand)),
1419 if_lt);
1420 }
1421
1422
1423 void IRRegExpMacroAssembler::IfRegisterEqPos(intptr_t reg,
1424 BlockLabel* if_eq) {
1425 TAG();
1426 BranchOrBacktrack(Comparison(kEQ,
1427 LoadLocal(position_register(reg)),
1428 LoadLocal(current_position_)),
1429 if_eq);
1430 }
1431
1432
1433 RegExpMacroAssembler::IrregexpImplementation
1434 IRRegExpMacroAssembler::Implementation() {
1435 return kIRImplementation;
1436 }
1437
1438
1439 void IRRegExpMacroAssembler::LoadCurrentCharacter(intptr_t cp_offset,
1440 BlockLabel* on_end_of_input,
1441 bool check_bounds,
1442 intptr_t characters) {
1443 TAG();
1444 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
1445 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
1446 if (check_bounds) {
1447 CheckPosition(cp_offset + characters - 1, on_end_of_input);
1448 }
1449 LoadCurrentCharacterUnchecked(cp_offset, characters);
1450 }
1451
1452
1453 void IRRegExpMacroAssembler::PopCurrentPosition() {
1454 TAG();
1455 StoreLocal(current_position_, PopStack());
1456 }
1457
1458
1459 void IRRegExpMacroAssembler::PopRegister(intptr_t register_index) {
1460 TAG();
1461 ASSERT(register_index < position_registers_.length());
1462 StoreLocal(position_register(register_index), PopStack());
1463 }
1464
1465
1466 void IRRegExpMacroAssembler::PushStack(Definition *definition) {
1467 PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
1468 PushArgumentInstr* value_push = PushArgument(Bind(definition));
1469 Do(InstanceCall("add", stack_push, value_push));
1470 }
1471
1472
1473 Value* IRRegExpMacroAssembler::PopStack() {
1474 PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
1475 return Bind(InstanceCall("removeLast", stack_push));
1476 }
1477
1478
1479 // Pushes the location corresponding to label to the backtracking stack.
1480 // Backtracking blocks do not necessarily have an ID at this point.
1481 // Push a dummy instead; these are later rewritten.
1482 void IRRegExpMacroAssembler::PushBacktrack(BlockLabel* label) {
1483 TAG();
1484 ConstantInstr* offset = Int64Constant(kOffsetNotYetSet);
1485 backtrack_references_.Add(BacktrackReference(label->block(), offset));
1486 PushStack(offset);
1487 }
1488
1489
1490 void IRRegExpMacroAssembler::PushCurrentPosition() {
1491 TAG();
1492 PushStack(LoadLocal(current_position_));
1493 }
1494
1495
1496 void IRRegExpMacroAssembler::PushRegister(intptr_t register_index,
1497 StackCheckFlag check_stack_limit) {
1498 TAG();
1499 PushStack(LoadLocal(position_register(register_index)));
1500 }
1501
1502
1503 void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) {
1504 TAG();
1505 StoreLocal(current_position_, Bind(LoadLocal(position_register(reg))));
1506 }
1507
1508 // Resets the size of the stack to the value stored in reg.
1509 void IRRegExpMacroAssembler::ReadStackPointerFromRegister(intptr_t reg) {
1510 TAG();
1511 ASSERT(reg < position_registers_.length());
1512
1513 PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
1514 PushArgumentInstr* length_push =
1515 PushArgument(Bind(LoadLocal(position_register(reg))));
1516
1517 Do(InstanceCall("set:length", stack_push, length_push));
1518 }
1519
1520 void IRRegExpMacroAssembler::SetCurrentPositionFromEnd(intptr_t by) {
1521 TAG();
1522
1523 BlockLabel after_position;
1524
1525 Definition* cur_pos_def = LoadLocal(current_position_);
1526 Definition* by_value_def = Int64Constant(-by);
1527
1528 BranchOrBacktrack(Comparison(kGTE, cur_pos_def, by_value_def),
1529 &after_position);
1530
1531 StoreLocal(current_position_, Bind(Int64Constant(-by)));
1532
1533 // On RegExp code entry (where this operation is used), the character before
1534 // the current position is expected to be already loaded.
1535 // We have advanced the position, so it's safe to read backwards.
1536 LoadCurrentCharacterUnchecked(-1, 1);
1537
1538 BindBlock(&after_position);
1539 }
1540
1541
1542 void IRRegExpMacroAssembler::SetRegister(intptr_t register_index, intptr_t to) {
1543 TAG();
1544 // Reserved for positions!
1545 ASSERT(register_index >= position_registers_count_);
1546 StoreLocal(position_register(register_index), Bind(Int64Constant(to)));
1547 }
1548
1549
1550 bool IRRegExpMacroAssembler::Succeed() {
1551 TAG();
1552 Jump(success_block_);
1553 return global();
1554 }
1555
1556
1557 void IRRegExpMacroAssembler::WriteCurrentPositionToRegister(
1558 intptr_t reg, intptr_t cp_offset) {
1559 TAG();
1560
1561 PushArgumentInstr* pos_push =
1562 PushArgument(Bind(LoadLocal(current_position_)));
1563 PushArgumentInstr* off_push =
1564 PushArgument(Bind(Int64Constant(cp_offset)));
1565
1566 // Push the negative offset; these are converted to positive string positions
1567 // within the success block.
1568 StoreLocal(position_register(reg), Add(pos_push, off_push));
1569 }
1570
1571
1572 void IRRegExpMacroAssembler::ClearRegisters(
1573 intptr_t reg_from, intptr_t reg_to) {
1574 TAG();
1575
1576 ASSERT(reg_from <= reg_to);
1577 ASSERT(reg_to < position_registers_.length());
1578
1579 // In order to clear registers to a final result value of -1, set them to
1580 // (-1 - string length), the offset of -1 from the end of the string.
1581
1582 for (intptr_t reg = reg_from; reg <= reg_to; reg++) {
1583 PushArgumentInstr* minus_one_push =
1584 PushArgument(Bind(Int64Constant(-1)));
1585 PushArgumentInstr* length_push =
1586 PushArgument(Bind(LoadLocal(string_param_length_)));
1587
1588 StoreLocal(position_register(reg), Sub(minus_one_push, length_push));
1589 }
1590 }
1591
1592
1593 void IRRegExpMacroAssembler::WriteStackPointerToRegister(intptr_t reg) {
1594 TAG();
1595
1596 PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
1597 Value* length_value = Bind(InstanceCall("get:length", stack_push));
1598
1599 StoreLocal(position_register(reg), length_value);
1600 }
1601
1602
1603 // Private methods:
1604
1605 void IRRegExpMacroAssembler::CallCheckStackGuardState(Register scratch) {
1606 UNIMPLEMENTED();
1607 }
1608
1609
1610 intptr_t IRRegExpMacroAssembler::CheckStackGuardState(Address* return_address,
1611 Code* re_code,
1612 Address re_frame) {
1613 UNIMPLEMENTED();
1614 return -1;
1615 }
1616
1617
1618 void IRRegExpMacroAssembler::CheckPosition(intptr_t cp_offset,
1619 BlockLabel* on_outside_input) {
1620 TAG();
1621 Definition* curpos_def = LoadLocal(current_position_);
1622 Definition* cp_off_def = Int64Constant(-cp_offset);
1623
1624 // If (current_position_ < -cp_offset), we are in bounds.
1625 // Remember, current_position_ is a negative offset from the string end.
1626
1627 BranchOrBacktrack(Comparison(kGTE, curpos_def, cp_off_def),
1628 on_outside_input);
1629 }
1630
1631
1632 void IRRegExpMacroAssembler::BranchOrBacktrack(
1633 ComparisonInstr* comparison,
1634 BlockLabel* true_successor) {
1635 if (comparison == NULL) { // No condition
1636 if (true_successor == NULL) {
1637 Backtrack();
1638 return;
1639 }
1640 Jump(true_successor);
1641 return;
1642 }
1643
1644 // If no successor block has been passed in, backtrack.
1645 JoinEntryInstr* true_successor_block = backtrack_block_;
1646 if (true_successor != NULL) {
1647 true_successor->SetLinked();
1648 true_successor_block = true_successor->block();
1649 }
1650 ASSERT(true_successor_block != NULL);
1651
1652 // If the condition is not true, fall through to a new block.
1653 BlockLabel fallthrough;
1654
1655 BranchInstr* branch = new(I) BranchInstr(comparison);
1656 *branch->true_successor_address() =
1657 TargetWithJoinGoto(true_successor_block);
1658 *branch->false_successor_address() =
1659 TargetWithJoinGoto(fallthrough.block());
1660
1661 CloseBlockWith(branch);
1662 BindBlock(&fallthrough);
1663 }
1664
1665 TargetEntryInstr* IRRegExpMacroAssembler::TargetWithJoinGoto(
1666 JoinEntryInstr* dst) {
1667 TargetEntryInstr* target = new(I) TargetEntryInstr(
1668 AllocateBlockId(), kInvalidTryIndex);
1669 blocks_.Add(target);
1670
1671 target->AppendInstruction(new(I) GotoInstr(dst));
1672
1673 return target;
1674 }
1675
1676
1677 void IRRegExpMacroAssembler::CheckPreemption() {
1678 TAG();
1679 // TODO(jgruber): Implement the preemption check.
1680 }
1681
1682
1683 Value* IRRegExpMacroAssembler::Add(
1684 PushArgumentInstr* lhs,
1685 PushArgumentInstr* rhs) {
1686 return Bind(InstanceCall("+", lhs, rhs));
1687 }
1688
1689
1690 Value* IRRegExpMacroAssembler::Sub(
1691 PushArgumentInstr* lhs,
1692 PushArgumentInstr* rhs) {
1693 return Bind(InstanceCall("-", lhs, rhs));
1694 }
1695
1696
1697 void IRRegExpMacroAssembler::LoadCurrentCharacterUnchecked(
1698 intptr_t cp_offset, intptr_t characters) {
1699 TAG();
1700
1701 if (mode_ == ASCII) {
1702 ASSERT(characters == 1 || characters == 2 || characters == 4);
1703 } else {
1704 ASSERT(mode_ == UC16);
1705 ASSERT(characters == 1 || characters == 2);
1706 }
1707
1708 // LoadLocal pattern_param_
1709 // PushArgument()
1710 PushArgumentInstr* pattern_push =
1711 PushArgument(Bind(LoadLocal(string_param_)));
1712
1713 // Calculate the addressed string index as
1714 // cp_offset + current_position_ + string_param_length_
1715 PushArgumentInstr* cp_offset_push =
1716 PushArgument(Bind(Int64Constant(cp_offset)));
1717 PushArgumentInstr* cur_pos_push =
1718 PushArgument(Bind(LoadLocal(current_position_)));
1719
1720 PushArgumentInstr* partial_sum_push =
1721 PushArgument(Add(cp_offset_push, cur_pos_push));
1722 PushArgumentInstr* length_push =
1723 PushArgument(Bind(LoadLocal(string_param_length_)));
1724
1725 PushArgumentInstr* pos_push =
1726 PushArgument(Add(partial_sum_push, length_push));
1727
1728 // Finally, push the character count.
1729 PushArgumentInstr* count_push =
1730 PushArgument(Bind(Uint64Constant(characters)));
1731
1732 // InstanceCall(codeUnitAt, t0, t0)
1733 const char* name = Library::PrivateCoreLibName(
1734 String::Handle(Symbols::New("_codeUnitsAt"))).ToCString();
1735 Value* code_unit_value =
1736 Bind(InstanceCall(name, pattern_push, pos_push, count_push));
1737
1738 // StoreLocal(current_character_)
1739 StoreLocal(current_character_, code_unit_value);
1740 }
1741
1742
1743 Value* IRRegExpMacroAssembler::CharacterAt(Definition* index) {
1744 PushArgumentInstr* pattern_push =
1745 PushArgument(Bind(LoadLocal(string_param_)));
1746 PushArgumentInstr* index_push = PushArgument(Bind(index));
1747 PushArgumentInstr* count_push = PushArgument(Bind(Uint64Constant(1)));
1748
1749 const char* name = Library::PrivateCoreLibName(
1750 String::Handle(Symbols::New("_codeUnitsAt"))).ToCString();
Florian Schneider 2014/09/16 11:11:49 Add _codeUnitsAt to VM symbols and use static acce
jgruber1 2014/09/22 18:58:04 Done.
1751 return Bind(InstanceCall(name, pattern_push, index_push, count_push));
1752 }
1753
1754
1755 Function& IRRegExpMacroAssembler::GetCode(const String& source) {
1756 // At this point, code generation is completed, sanity check our bookkeeping.
1757 ASSERT(args_pushed() == 0);
1758 ASSERT(temp_count() == 0);
1759
1760 // Look up the regexp class to be used as the function owner.
1761 const Library& core_lib = Library::Handle(Library::CoreLibrary());
1762 const Class& regexp_class = Class::Handle(
1763 core_lib.LookupClass(String::Handle(Symbols::New("RegExp"))));
1764
1765 // Set up our fake parsed function.
1766 Function& function = Function::ZoneHandle(
1767 I,
1768 Function::New(String::Handle(I, Symbols::New("IrregexpFn")),
1769 RawFunction::kIrregexpFunction,
1770 true, // Static.
1771 false, // Not const.
1772 false, // Not abstract.
1773 false, // Not external.
1774 false, // Not native.
1775 regexp_class,
1776 0)); // Requires a non-negative token position.
1777
1778 function.set_num_fixed_parameters(kNumParameters);
1779 function.set_parameter_types(Array::Handle(Array::New(kNumParameters,
1780 Heap::kOld)));
1781 function.set_parameter_names(Array::Handle(Array::New(kNumParameters,
1782 Heap::kOld)));
1783 function.SetParameterTypeAt(0, Type::Handle(Type::DynamicType()));
1784 function.SetParameterNameAt(0, String::ZoneHandle(I, Symbols::New("string")));
1785 function.SetParameterTypeAt(1, Type::Handle(Type::DynamicType()));
1786 function.SetParameterNameAt(
1787 1, String::ZoneHandle(I, Symbols::New("start_index")));
1788 function.set_result_type(Type::Handle(Type::DynamicType()));
1789 function.SetParameterTypeAt(2, Type::Handle(Type::DynamicType()));
1790 function.SetParameterNameAt(
1791 2, String::ZoneHandle(I, Symbols::New("matches")));
1792 function.set_result_type(Type::Handle(Type::DynamicType()));
1793
1794 ParsedFunction* parsed_func = new(I) ParsedFunction(I, function);
1795 parsed_func->AllocateIrregexpVariables(num_stack_locals());
1796
1797 static const bool kIsOptimized = false;
1798
1799 // Set up the flow graph.
1800 ZoneGrowableArray<const ICData*>* ic_data_array =
1801 new(I) ZoneGrowableArray<const ICData*>();
1802 FlowGraphBuilder builder(parsed_func,
1803 *ic_data_array,
1804 NULL, // NULL = not inlining.
1805 Isolate::kNoDeoptId,
1806 kIsOptimized);
1807
1808 // Indirect targets (i.e. all blocks reachable only through backtracking)
1809 // must be attached to the graph entry in order to be discovered by the
1810 // flow graph.
1811 AttachIndirectTargets();
1812
1813 // A dense block ordering is available, rewrite all pushes to the backtracking
1814 // stack to point to valid block IDs.
1815 RewriteBacktrackPushes();
1816
1817 FlowGraph* flow_graph = new(I) FlowGraph(builder,
1818 entry_block_,
1819 next_block_id_);
1820 const GrowableArray<BlockEntryInstr*>& blocks = flow_graph->preorder();
1821
1822 Error& error = Error::Handle(Compiler::CompileIrregexpFunction(
1823 parsed_func, flow_graph));
1824 if (!error.IsNull()) {
1825 // Should never happen.
1826 UNREACHABLE();
1827 }
1828
1829 // Now that block offsets are known, we can insert them into the table.
1830 FinalizeBlockOffsetTable(blocks);
1831
1832 return function;
1833 }
1834
1835
1836 #undef __
1837
1838 } // namespace dart
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698