Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1320)

Unified Diff: runtime/vm/regexp_assembler.cc

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: runtime/vm/regexp_assembler.cc
diff --git a/runtime/vm/regexp_assembler.cc b/runtime/vm/regexp_assembler.cc
new file mode 100644
index 0000000000000000000000000000000000000000..299bf7a6a2a3dfc337f5aa1417edf5c59776d7de
--- /dev/null
+++ b/runtime/vm/regexp_assembler.cc
@@ -0,0 +1,1838 @@
+// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+#include "vm/regexp_assembler.h"
+
+#include "vm/bit_vector.h"
+#include "vm/compiler.h"
+#include "vm/dart_entry.h"
+#include "vm/flow_graph_builder.h"
+#include "vm/il_printer.h"
+#include "vm/object_store.h"
+#include "vm/resolver.h"
+#include "vm/stack_frame.h"
+#include "vm/unibrow-inl.h"
+
+#define I isolate()
+
+// Debugging output macros. TAG() is called at the head of each interesting
+// function and prints its name during execution if irregexp tracing is enabled.
+#define TAG() if (FLAG_trace_irregexp) { TAG_(); }
+#define TAG_() \
+ Print(PushArgument( \
+ Bind(new(I) ConstantInstr(String::ZoneHandle(I, String::Concat( \
+ String::Handle(String::New("TAG: ")), \
+ String::Handle(String::New(__FUNCTION__)), Heap::kOld))))));
+
+#define PRINT(arg) if (FLAG_trace_irregexp) { Print(arg); }
+
+namespace dart {
+
+DEFINE_FLAG(bool, trace_irregexp, false, "Trace irregexps");
+
+static const intptr_t kInvalidTryIndex = -1;
+static const intptr_t kNoTokenPos = -1;
+static const intptr_t kOffsetNotYetSet = -1;
+
+/*
+ * This assembler uses the following main local variables:
+ * - stack_: A pointer to a growable list which we use as an all-purpose stack
+ * storing backtracking offsets, positions & stored register values.
+ * - current_character_: Stores the currently loaded characters (possibly more
+ * than one).
+ * - current_position_: The current position within the string, stored as a
+ * negative offset from the end of the string (i.e. the
+ * position corresponding to str[0] is -str.length).
+ * Note that current_position_ is *not* byte-based, unlike
+ * original V8 code.
+ *
+ * Results are returned though an array of capture indices, stored at
+ * matches_param_. A null array specifies a failure to match. The match indices
+ * [start_inclusive, end_exclusive] for capture group i are stored at positions
+ * matches_param_[i * 2] and matches_param_[i * 2 + 1], respectively. Match
+ * indices of -1 denote non-matched groups. Note that we store these indices
+ * as a negative offset from the end of the string in position_registers_
+ * during processing, and convert them to standard indexes when copying them
+ * to matches_param_ on successful match.
+ */
+
+// The number of parameters of the generated function.
+static const intptr_t kNumParameters = 3;
+
+RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate)
+ : slow_safe_compiler_(false),
+ global_mode_(NOT_GLOBAL),
+ isolate_(isolate) {
+}
+
+
+RegExpMacroAssembler::~RegExpMacroAssembler() {
+}
+
+
+IRRegExpMacroAssembler::IRRegExpMacroAssembler(
+ Mode mode,
+ intptr_t capture_count,
+ Isolate* isolate)
+ : RegExpMacroAssembler(isolate),
+ mode_(mode),
+ next_block_id_(0),
+ temp_count_(0),
+ args_pushed_(0),
+ num_stack_locals_(0),
+ current_instruction_(NULL),
+ stack_(NULL),
+ current_character_(NULL),
+ current_position_(NULL),
+ string_param_(NULL),
+ string_param_length_(NULL),
+ start_index_param_(NULL),
+ matches_param_(NULL),
+ position_registers_count_((capture_count + 1) * 2),
+ block_offsets_(GrowableObjectArray::ZoneHandle(
+ isolate, GrowableObjectArray::New(Heap::kOld))) {
+ InitializeLocals();
+
+ // Create and generate all preset blocks.
+ entry_block_ =
+ new(isolate) GraphEntryInstr(
+ NULL,
+ new(isolate) TargetEntryInstr(AllocateBlockId(), kInvalidTryIndex),
+ Isolate::kNoDeoptId);
+ start_block_ =
+ new(isolate) JoinEntryInstr(AllocateBlockId(), kInvalidTryIndex);
+ success_block_ =
+ new(isolate) JoinEntryInstr(AllocateBlockId(), kInvalidTryIndex);
+ backtrack_block_ =
+ new(isolate) JoinEntryInstr(AllocateBlockId(), kInvalidTryIndex);
+ exit_block_ =
+ new(isolate) JoinEntryInstr(AllocateBlockId(), kInvalidTryIndex);
+
+ GenerateEntryBlock();
+ GenerateSuccessBlock();
+ GenerateBacktrackBlock();
+ GenerateExitBlock();
+
+ blocks_.Add(entry_block_);
+ blocks_.Add(entry_block_->normal_entry());
+ blocks_.Add(start_block_);
+ blocks_.Add(success_block_);
+ blocks_.Add(backtrack_block_);
+ blocks_.Add(exit_block_);
+
+ // Begin emission at the start_block_.
+ set_current_instruction(start_block_);
+}
+
+
+IRRegExpMacroAssembler::~IRRegExpMacroAssembler() { }
+
+
+void IRRegExpMacroAssembler::InitializeLocals() {
+ // Create local variables and parameters.
+ stack_ = Local("stack_");
+ current_character_ = Local("current_character_");
Florian Schneider 2014/09/16 11:11:48 Make those strings VM symbols, so that they are sh
jgruber1 2014/09/22 18:58:05 Done. Added variable names, function names, etc. t
+ current_position_ = Local("current_position_");
+ string_param_length_ = Local("string_param_length_");
+ capture_length_ = Local("capture_length_");
+ word_character_map_ = Local("word_character_map_");
+ stack_ptr_ = Local("stack_ptr_");
+ match_start_index_ = Local("match_start_index_");
+ capture_start_index_ = Local("capture_start_index_");
+ match_end_index_ = Local("match_end_index_");
+ char_in_capture_ = Local("char_in_capture_");
+ char_in_match_ = Local("char_in_match_");
+
+ string_param_ = Parameter("string_param_", 0);
+ start_index_param_ = Parameter("start_index_param_", 1);
+ matches_param_ = Parameter("matches_param_", 2);
+
+ // Reserve space for all captured group positions. Note that more might
+ // be created on the fly for internal use.
+ for (intptr_t i = 0; i < position_registers_count_; i++) {
+ position_register(i);
+ }
+}
+
+
+void IRRegExpMacroAssembler::GenerateEntryBlock() {
+ set_current_instruction(entry_block_->normal_entry());
+ TAG();
+
+ // Generate a local list variable which we will use as a backtracking stack.
+
+ Definition* type_args_null_def = new(I) ConstantInstr(
+ TypeArguments::ZoneHandle(I, TypeArguments::null()));
+ PushArgumentInstr* type_arg_push = PushArgument(Bind(type_args_null_def));
+
+ const Library& lib = Library::Handle(Library::CoreLibrary());
+ const Class& list_class = Class::Handle(
+ lib.LookupCoreClass(Symbols::List()));
+ const Function& list_ctor = Function::ZoneHandle(I,
+ list_class.LookupFactory(Symbols::ListFactory()));
+
+ StoreLocal(stack_, Bind(StaticCall(list_ctor, type_arg_push)));
+
+ // Store string.length.
+ PushArgumentInstr* string_push = PushArgument(Bind(LoadLocal(string_param_)));
+ StoreLocal(string_param_length_,
+ Bind(InstanceCall("get:length", string_push)));
Florian Schneider 2014/09/16 11:11:48 Have you tried directly using LoadField / StoreFie
Florian Schneider 2014/09/16 11:11:48 String::ZoneHandle(Field::GetterName(Symbols::Leng
jgruber1 2014/09/22 18:58:04 No, since optimization has the same effect and the
jgruber1 2014/09/22 18:58:05 Done.
+
+ // Initialize all capture registers.
+ ClearRegisters(0, position_registers_count_ - 1);
+
+ // Store (start_index - string.length) as the current position (since it's a
+ // negative offset from the end of the string).
+ PushArgumentInstr* start_index_push =
+ PushArgument(Bind(LoadLocal(start_index_param_)));
+ PushArgumentInstr* length_push =
+ PushArgument(Bind(LoadLocal(string_param_length_)));
+
+ StoreLocal(current_position_, Sub(start_index_push, length_push));
+
+ // Look up and store the word character map static field of the RegExp class.
+ const Class& regexp_class = Class::Handle(
+ lib.LookupClassAllowPrivate(
+ String::Handle(Symbols::New("_JSSyntaxRegExp"))));
Florian Schneider 2014/09/16 11:11:48 Just Symbols::JSSyntaxRegExp()
jgruber1 2014/09/22 18:58:05 Done.
+ const Function& word_character_getter = Function::ZoneHandle(I,
+ regexp_class.LookupStaticFunctionAllowPrivate(
+ String::Handle(Field::GetterName(String::Handle(
+ Symbols::New("_wordCharacterMap"))))));
Florian Schneider 2014/09/16 11:11:48 Add this string to the list of VM symbols in symbo
jgruber1 2014/09/22 18:58:05 Done.
+
+ StoreLocal(word_character_map_, Bind(StaticCall(word_character_getter)));
Florian Schneider 2014/09/16 11:11:48 The wordCharacterMap should be embedded as a compi
jgruber1 2014/09/22 18:58:04 Done.
+
+ // Jump to the start block.
+ current_instruction_->Goto(start_block_);
+}
+
+
+void IRRegExpMacroAssembler::GenerateBacktrackBlock() {
+ set_current_instruction(backtrack_block_);
+ TAG();
+
+ PushArgumentInstr* block_offsets_push =
+ PushArgument(Bind(new(I) ConstantInstr(block_offsets_)));
+ PushArgumentInstr* block_id_push = PushArgument(PopStack());
+
+ Value* offset_value =
+ Bind(InstanceCall("[]", block_offsets_push, block_id_push));
Florian Schneider 2014/09/16 11:11:49 s/"[]"/Symbols::IndexToken()/
jgruber1 2014/09/22 18:58:04 Done.
+
+ CloseBlockWith(new(I) IndirectGotoInstr(offset_value));
+}
+
+
+void IRRegExpMacroAssembler::GenerateSuccessBlock() {
+ set_current_instruction(success_block_);
+ TAG();
+
+ // Store captured offsets in the `matches` parameter.
+ for (intptr_t i = 0; i < position_registers_count_; i++) {
+ PushArgumentInstr* matches_push =
+ PushArgument(Bind(LoadLocal(matches_param_)));
+ PushArgumentInstr* index_push = PushArgument(Bind(Uint64Constant(i)));
+
+ // Convert negative offsets from the end of the string to string indices.
+ PushArgumentInstr* offset_push =
+ PushArgument(Bind(LoadLocal(position_register(i))));
+ PushArgumentInstr* len_push =
+ PushArgument(Bind(LoadLocal(string_param_length_)));
+ PushArgumentInstr* value_push = PushArgument(Add(offset_push, len_push));
+
+ Do(InstanceCall("[]=", matches_push, index_push, value_push));
Florian Schneider 2014/09/16 11:11:49 s/[]=/Symbols::AssignIndexToken()/
jgruber1 2014/09/22 18:58:04 Done.
+ }
+
+ // Print the result if tracing.
+ PRINT(PushArgument(Bind(LoadLocal(matches_param_))));
+
+ // Return true on success.
+ AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(BoolConstant(true))));
+}
+
+
+void IRRegExpMacroAssembler::GenerateExitBlock() {
+ set_current_instruction(exit_block_);
+ TAG();
+
+ // Return false on failure.
+ AppendInstruction(new(I) ReturnInstr(kNoTokenPos, Bind(BoolConstant(false))));
+}
+
+
+static const bool kEnableUnalignedAccesses = true;
+bool IRRegExpMacroAssembler::CanReadUnaligned() {
+ return kEnableUnalignedAccesses && !slow_safe();
+}
+
+
+IRRegExpMacroAssembler::Result IRRegExpMacroAssembler::Execute(
+ const Function& function,
+ const String& input,
+ const Smi& start_offset,
+ Array* output,
+ Isolate* isolate) {
+ // Create the argument list.
+ const Array& args = Array::ZoneHandle(isolate, Array::New(3));
+ args.SetAt(0, input);
+ args.SetAt(1, start_offset);
+ args.SetAt(2, *output);
+
+ // And finally call the generated code.
+ const Object& retval =
+ Object::Handle(DartEntry::InvokeFunction(function, args));
+ if (retval.IsError()) {
+ const Error& error = Error::Cast(retval);
+ OS::Print("%s\n", error.ToErrorCString());
+ // Should never happen.
+ UNREACHABLE();
Florian Schneider 2014/09/16 11:11:48 Since exceptions can happen in generated code (e.g
jgruber1 2014/09/22 18:58:05 The entry into irregexp matcher functions is now a
+ }
+
+ return (Bool::Cast(retval).value()) ? SUCCESS : FAILURE;
+}
+
+
+intptr_t IRRegExpMacroAssembler::CaseInsensitiveCompareUC16(
Florian Schneider 2014/09/16 11:11:48 Where is this function used?
jgruber1 2014/09/22 18:58:05 This was one of the last unported bits. It is now
+ uint8_t* byte_offset1,
+ uint8_t* byte_offset2,
+ size_t byte_length) {
+ // TODO(jgruber): Optimize as single instance. V8 has this as an
+ // isolate member.
+ unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
+ // This function is not allowed to cause a garbage collection.
+ // A GC might move the calling generated code and invalidate the
+ // return address on the stack.
+ ASSERT(byte_length % 2 == 0);
+ uint16_t* substring1 = reinterpret_cast<uint16_t*>(byte_offset1);
+ uint16_t* substring2 = reinterpret_cast<uint16_t*>(byte_offset2);
+ size_t length = byte_length >> 1;
+
+ for (size_t i = 0; i < length; i++) {
+ unibrow::uchar c1 = substring1[i];
+ unibrow::uchar c2 = substring2[i];
+ if (c1 != c2) {
+ unibrow::uchar s1[1] = { c1 };
+ canonicalize.get(c1, '\0', s1);
+ if (s1[0] != c2) {
+ unibrow::uchar s2[1] = { c2 };
+ canonicalize.get(c2, '\0', s2);
+ if (s1[0] != s2[0]) {
+ return 0;
+ }
+ }
+ }
+ }
+ return 1;
+}
+
+
+LocalVariable* IRRegExpMacroAssembler::Parameter(const char* name,
Florian Schneider 2014/09/16 11:11:48 const String& name
jgruber1 2014/09/22 18:58:04 Done.
+ intptr_t index) const {
+ const Type& stack_type = Type::ZoneHandle(I, Type::DynamicType());
+ LocalVariable* local = new(I) LocalVariable(
+ kNoTokenPos,
+ String::Handle(Symbols::New(name)),
+ stack_type);
+
+ intptr_t param_frame_index = kParamEndSlotFromFp + kNumParameters - index;
+ local->set_index(param_frame_index);
+
+ return local;
+}
+
+
+LocalVariable* IRRegExpMacroAssembler::Local(const char* name) {
Florian Schneider 2014/09/16 11:11:48 const String& name
jgruber1 2014/09/22 18:58:05 Done.
+ const Type& local_type = Type::ZoneHandle(I, Type::DynamicType());
+ const String& local_name = String::Handle(Symbols::New(name));
+
+ LocalVariable* local =
+ new(I) LocalVariable(kNoTokenPos, local_name, local_type);
+ local->set_index(GetNextLocalIndex());
+
+ return local;
+}
+
+
+ConstantInstr* IRRegExpMacroAssembler::Int64Constant(int64_t value) const {
+ return new(I) ConstantInstr(
+ Integer::ZoneHandle(I, Integer::New(value, Heap::kOld)));
+}
+
+
+ConstantInstr* IRRegExpMacroAssembler::Uint64Constant(uint64_t value) const {
+ return new(I) ConstantInstr(
+ Integer::ZoneHandle(I, Integer::NewFromUint64(value, Heap::kOld)));
+}
+
+
+ConstantInstr* IRRegExpMacroAssembler::BoolConstant(bool value) const {
+ return new(I) ConstantInstr(value ? Bool::True() : Bool::False());
+}
+
+
+ConstantInstr* IRRegExpMacroAssembler::StringConstant(const char* value) const {
+ return new(I) ConstantInstr(
+ String::ZoneHandle(I, String::New(value, Heap::kOld)));
+}
+
+
+ComparisonInstr* IRRegExpMacroAssembler::Comparison(
+ ComparisonKind kind, Definition* lhs, Definition* rhs) {
+ bool is_strict_operator = false;
+ const char* intermediate_operator = NULL;
+
+ Token::Kind token_kind = Token::kEQ_STRICT;
+ Value* lhs_value = NULL;
+ Value* rhs_value = NULL;
+
+ switch (kind) {
+ case kEQ:
+ is_strict_operator = true;
+ break;
+ case kNE:
+ token_kind = Token::kNE_STRICT;
+ is_strict_operator = true;
+ break;
+ case kLT:
+ intermediate_operator = "<";
+ break;
+ case kGT:
+ intermediate_operator = ">";
+ break;
+ case kLTE:
+ intermediate_operator = "<=";
+ break;
+ case kGTE:
+ intermediate_operator = ">=";
+ break;
+ default:
+ UNREACHABLE();
+ }
+
+ if (!is_strict_operator) {
+ ASSERT(intermediate_operator != NULL);
+
+ PushArgumentInstr* lhs_push = PushArgument(Bind(lhs));
+ PushArgumentInstr* rhs_push = PushArgument(Bind(rhs));
+
+ lhs_value = Bind(InstanceCall(intermediate_operator, lhs_push, rhs_push));
+ rhs_value = Bind(BoolConstant(true));
+ } else {
+ lhs_value = Bind(lhs);
+ rhs_value = Bind(rhs);
+ }
+
+ return new(I) StrictCompareInstr(kNoTokenPos, token_kind,
+ lhs_value, rhs_value, true);
+}
+
+
+StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
+ const Function& function) const {
+ ZoneGrowableArray<PushArgumentInstr*>* arguments =
+ new(I) ZoneGrowableArray<PushArgumentInstr*>(0);
+ return StaticCall(function, arguments);
+}
+
+
+StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
+ const Function& function,
+ PushArgumentInstr* arg1) const {
+ ZoneGrowableArray<PushArgumentInstr*>* arguments =
+ new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
+ arguments->Add(arg1);
+
+ return StaticCall(function, arguments);
+}
+
+
+StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
+ const Function& function,
+ ZoneGrowableArray<PushArgumentInstr*>* arguments) const {
+ ZoneGrowableArray<const ICData*> ic_data_array;
+ return new(I) StaticCallInstr(kNoTokenPos,
+ function,
+ Object::null_array(),
+ arguments,
+ ic_data_array);
+}
+
+
+InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
+ const char *name,
Florian Schneider 2014/09/16 11:11:49 const String& name
jgruber1 2014/09/22 18:58:04 These have been refactored to handle # of checked
+ PushArgumentInstr* arg1) const {
+ ZoneGrowableArray<PushArgumentInstr*>* arguments =
+ new(I) ZoneGrowableArray<PushArgumentInstr*>(1);
+ arguments->Add(arg1);
+
+ return InstanceCall(name, arguments);
+}
+
+
+InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
+ const char *name,
Florian Schneider 2014/09/16 11:11:48 const String& name
jgruber1 2014/09/22 18:58:05 Done.
+ PushArgumentInstr* arg1,
+ PushArgumentInstr* arg2) const {
+ ZoneGrowableArray<PushArgumentInstr*>* arguments =
+ new(I) ZoneGrowableArray<PushArgumentInstr*>(2);
+ arguments->Add(arg1);
+ arguments->Add(arg2);
+
+ return InstanceCall(name, arguments);
+}
+
+
+InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
+ const char *name,
Florian Schneider 2014/09/16 11:11:48 const String& name
jgruber1 2014/09/22 18:58:04 Done.
+ PushArgumentInstr* arg1,
+ PushArgumentInstr* arg2,
+ PushArgumentInstr* arg3) const {
+ ZoneGrowableArray<PushArgumentInstr*>* arguments =
+ new(I) ZoneGrowableArray<PushArgumentInstr*>(3);
+ arguments->Add(arg1);
+ arguments->Add(arg2);
+ arguments->Add(arg3);
+
+ return InstanceCall(name, arguments);
+}
+
+
+InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
+ const char *name,
Florian Schneider 2014/09/16 11:11:49 const String& name
jgruber1 2014/09/22 18:58:05 Done.
+ ZoneGrowableArray<PushArgumentInstr*> *arguments) const {
+ ZoneGrowableArray<const ICData*> ic_data_array;
+
+
+ return
+ new(I) InstanceCallInstr(kNoTokenPos,
+ String::ZoneHandle(I, Symbols::New(name)),
Florian Schneider 2014/09/16 11:11:49 Just use name.
jgruber1 2014/09/22 18:58:05 Done.
+ Token::kILLEGAL,
+ arguments,
+ Object::null_array(),
+ 1,
+ ic_data_array);
+}
+
+
+LoadLocalInstr* IRRegExpMacroAssembler::LoadLocal(LocalVariable* local) const {
+ return new(I) LoadLocalInstr(*local);
+}
+
+
+void IRRegExpMacroAssembler::StoreLocal(LocalVariable* local,
+ Value* value) {
+ Do(new(I) StoreLocalInstr(*local, value));
+}
+
+
+void IRRegExpMacroAssembler::set_current_instruction(Instruction* instruction) {
+ current_instruction_ = instruction;
+}
+
+
+Value* IRRegExpMacroAssembler::Bind(Definition* definition) {
+ AppendInstruction(definition);
+ definition->set_temp_index(AllocateTemp());
+
+ return new(I) Value(definition);
+}
+
+
+void IRRegExpMacroAssembler::Do(Definition* definition) {
+ AppendInstruction(definition);
+}
+
+
+void IRRegExpMacroAssembler::AppendInstruction(Instruction* instruction) {
+ ASSERT(current_instruction_ != NULL);
+ ASSERT(current_instruction_->next() == NULL);
+
+ DeallocateTemps(instruction->InputCount());
+ add_args_pushed(-instruction->ArgumentCount());
+
+ current_instruction_->LinkTo(instruction);
+ set_current_instruction(instruction);
+}
+
+
+void IRRegExpMacroAssembler::CloseBlockWith(Instruction* instruction) {
+ ASSERT(current_instruction_ != NULL);
+ ASSERT(current_instruction_->next() == NULL);
+
+ DeallocateTemps(instruction->InputCount());
+ add_args_pushed(-instruction->ArgumentCount());
+
+ current_instruction_->LinkTo(instruction);
+ set_current_instruction(NULL);
+}
+
+
+// Jumps to the target block and sets it as the target for continued emission.
+void IRRegExpMacroAssembler::GoTo(BlockLabel* to) {
+ ASSERT(current_instruction_ != NULL);
+ ASSERT(current_instruction_->next() == NULL);
+ ASSERT(to->block()->next() == NULL);
+ to->SetLinked();
+ current_instruction_->Goto(to->block());
+ set_current_instruction(to->block());
+}
+
+
+void IRRegExpMacroAssembler::Jump(BlockLabel* to) {
+ if (to == NULL) {
+ Backtrack();
+ } else {
+ to->SetLinked();
+ Jump(to->block());
+ }
+}
+
+
+// Closes the current block with a goto, and unsets current_instruction_.
+// BindBlock() must be called before emission can continue.
+void IRRegExpMacroAssembler::Jump(JoinEntryInstr* to) {
+ ASSERT(current_instruction_ != NULL);
+ ASSERT(current_instruction_->next() == NULL);
+ current_instruction_->Goto(to);
+ set_current_instruction(NULL);
+}
+
+
+PushArgumentInstr* IRRegExpMacroAssembler::PushArgument(Value* value) {
+ add_args_pushed(1);
+ PushArgumentInstr* push = new(I) PushArgumentInstr(value);
+ // Do *not* use Do() for push argument instructions.
+ AppendInstruction(push);
+ return push;
+}
+
+
+void IRRegExpMacroAssembler::Print(const char* str) {
+ Print(PushArgument(
+ Bind(new(I) ConstantInstr(
+ String::ZoneHandle(I, String::New(str, Heap::kOld))))));
+}
+
+
+void IRRegExpMacroAssembler::Print(PushArgumentInstr* argument) {
+ const Library& lib = Library::Handle(Library::CoreLibrary());
+ const Function& print_fn = Function::ZoneHandle(I,
+ lib.LookupFunctionAllowPrivate(String::Handle(Symbols::New("print"))));
+ Do(StaticCall(print_fn, argument));
+}
+
+
+void IRRegExpMacroAssembler::PrintBlocks() {
+ for (intptr_t i = 0; i < blocks_.length(); i++) {
+ FlowGraphPrinter::PrintBlock(blocks_[i], false);
+ }
+}
+
+
+intptr_t IRRegExpMacroAssembler::stack_limit_slack() {
+ return 32;
+}
+
+
+void IRRegExpMacroAssembler::AdvanceCurrentPosition(intptr_t by) {
+ TAG();
+ if (by != 0) {
+ PushArgumentInstr* cur_pos_push =
+ PushArgument(Bind(LoadLocal(current_position_)));
+
+ PushArgumentInstr* by_push =
+ PushArgument(Bind(Int64Constant(by)));
+
+ Value* new_pos_value = Add(cur_pos_push, by_push);
+ StoreLocal(current_position_, new_pos_value);
+ }
+}
+
+
+void IRRegExpMacroAssembler::AdvanceRegister(intptr_t reg, intptr_t by) {
+ TAG();
+ ASSERT(reg >= 0);
+ ASSERT(reg < position_registers_.length());
+
+ if (by != 0) {
+ PushArgumentInstr* reg_push =
+ PushArgument(Bind(LoadLocal(position_register(reg))));
+ PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by)));
+ StoreLocal(position_register(reg), Add(reg_push, by_push));
+ }
+}
+
+
+void IRRegExpMacroAssembler::Backtrack() {
+ TAG();
+ CheckPreemption();
+
+ // TODO(jgruber): Duplicate code in GenerateBacktrackBlock, refactor.
+ PushArgumentInstr* block_offsets_push =
+ PushArgument(Bind(new(I) ConstantInstr(block_offsets_)));
+ PushArgumentInstr* block_id_push = PushArgument(PopStack());
+
+ Value* offset_value =
+ Bind(InstanceCall("[]", block_offsets_push, block_id_push));
+
+ CloseBlockWith(new(I) IndirectGotoInstr(offset_value));
+}
+
+
+// A BindBlock is analogous to assigning a label to a basic block.
+// If the BlockLabel does not yet contain a block, it is created.
+// If there is a current instruction, append a goto to the bound block.
+void IRRegExpMacroAssembler::BindBlock(BlockLabel* label) {
+ ASSERT(!label->IsBound());
+ ASSERT(label->block()->next() == NULL);
+
+ label->SetBound(AllocateBlockId());
+ blocks_.Add(label->block());
+
+ if (current_instruction_ == NULL) {
+ set_current_instruction(label->block());
+ } else {
+ GoTo(label);
+ }
+
+ // Print the id of the current block if tracing.
+ PRINT(PushArgument(Bind(Uint64Constant(label->block()->block_id()))));
+}
+
+
+intptr_t IRRegExpMacroAssembler::GetNextLocalIndex() {
+ intptr_t local_id = AllocateStackLocal();
+ return kFirstLocalSlotFromFp - local_id;
+}
+
+
+LocalVariable* IRRegExpMacroAssembler::position_register(intptr_t index) {
+ const char name_prefix[] = "pos";
+ char name[sizeof(name_prefix) + 5];
+
+ // Create position registers as needed.
+ for (intptr_t i = position_registers_.length(); i < index + 1; i++) {
+ OS::SNPrint(name, sizeof(name), "%s%05d", name_prefix, i);
+ position_registers_.Add(Local(name));
+ }
+
+ return position_registers_[index];
+}
+
+
+void IRRegExpMacroAssembler::AttachIndirectTargets() {
+ BitVector* attached_blocks = new(I) BitVector(next_block_id_);
+ for (intptr_t i = 0; i < backtrack_references_.length(); i++) {
+ JoinEntryInstr* block = backtrack_references_[i].block;
+ if (attached_blocks->Contains(block->block_id())) {
+ continue;
+ }
+ entry_block_->AddIndirectTargetEntry(block);
+ attached_blocks->Add(block->block_id());
+ }
+}
+
+
+void IRRegExpMacroAssembler::RewriteBacktrackPushes() {
+ for (intptr_t i = 0; i < backtrack_references_.length(); i++) {
+ const BacktrackReference& bref = backtrack_references_[i];
+
+ // Replace the fake pushed value now that we definitely have a block id.
+ uint64_t block_id = bref.block->block_id();
+ ConstantInstr* id_constant = Uint64Constant(block_id);
+ id_constant->set_temp_index(bref.reference->temp_index());
+
+ bref.reference->previous()->LinkTo(id_constant);
+ id_constant->LinkTo(bref.reference->next());
+ }
+}
+
+
+void IRRegExpMacroAssembler::FinalizeBlockOffsetTable(
+ const GrowableArray<BlockEntryInstr*>& blocks) {
+ block_offsets_.Grow(next_block_id_, Heap::kOld);
+ block_offsets_.SetLength(next_block_id_);
+ for (intptr_t i = 0; i < blocks.length(); i++) {
+ BlockEntryInstr* block = blocks[i];
+ block_offsets_.SetAt(block->block_id(),
+ Smi::ZoneHandle(I, Smi::New(block->offset())));
+ }
+}
+
+
+void IRRegExpMacroAssembler::CheckCharacter(uint32_t c, BlockLabel* on_equal) {
Florian Schneider 2014/09/16 11:11:49 Isn't characters uint16_t as most?
jgruber1 2014/09/22 18:58:05 No, at present up to 4 ASCII or 2 UTF16 code units
+ TAG();
+ Definition* cur_char_def = LoadLocal(current_character_);
+ Definition* char_def = Uint64Constant(c);
+
+ BranchOrBacktrack(Comparison(kEQ, cur_char_def, char_def),
+ on_equal);
+}
+
+
+void IRRegExpMacroAssembler::CheckCharacterGT(uint16_t limit,
+ BlockLabel* on_greater) {
+ TAG();
+ BranchOrBacktrack(Comparison(kGT,
+ LoadLocal(current_character_),
+ Uint64Constant(limit)),
+ on_greater);
+}
+
+
+void IRRegExpMacroAssembler::CheckAtStart(BlockLabel* on_at_start) {
+ TAG();
+
+ BlockLabel not_at_start;
+
+ // Did we start the match at the start of the string at all?
+ BranchOrBacktrack(Comparison(kNE,
+ LoadLocal(start_index_param_),
+ Uint64Constant(0)),
+ &not_at_start);
+
+ // If we did, are we still at the start of the input, i.e. is
+ // (offset == string_length * -1)?
+ Definition* neg_len_def =
+ InstanceCall("unary-",
Florian Schneider 2014/09/16 11:11:48 If not present, add unary- to VM symbols in symbol
jgruber1 2014/09/22 18:58:04 Done.
+ PushArgument(Bind(LoadLocal(string_param_length_))));
+ Definition* offset_def = LoadLocal(current_position_);
+ BranchOrBacktrack(Comparison(kEQ, neg_len_def, offset_def),
+ on_at_start);
+
+ BindBlock(&not_at_start);
+}
+
+
+void IRRegExpMacroAssembler::CheckNotAtStart(BlockLabel* on_not_at_start) {
+ TAG();
+
+ // Did we start the match at the start of the string at all?
+ BranchOrBacktrack(Comparison(kNE,
+ LoadLocal(start_index_param_),
+ Uint64Constant(0)),
+ on_not_at_start);
+
+ // If we did, are we still at the start of the input, i.e. is
+ // (offset == string_length * -1)?
+ Definition* neg_len_def =
+ InstanceCall("unary-",
Florian Schneider 2014/09/16 11:11:48 If not present, add unary- to VM symbols in symbol
jgruber1 2014/09/22 18:58:05 Done.
+ PushArgument(Bind(LoadLocal(string_param_length_))));
+ Definition* offset_def = LoadLocal(current_position_);
+ BranchOrBacktrack(Comparison(kNE, neg_len_def, offset_def),
+ on_not_at_start);
+}
+
+
+void IRRegExpMacroAssembler::CheckCharacterLT(uint16_t limit,
+ BlockLabel* on_less) {
+ TAG();
+ BranchOrBacktrack(Comparison(kLT,
+ LoadLocal(current_character_),
+ Uint64Constant(limit)),
+ on_less);
+}
+
+
+void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) {
+ TAG();
+
+ BlockLabel fallthrough;
+
+ PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
+ Definition* stack_tip_def = InstanceCall("get:last", stack_push);
+ Definition* cur_pos_def = LoadLocal(current_position_);
+
+ BranchOrBacktrack(Comparison(kNE, stack_tip_def, cur_pos_def),
+ &fallthrough);
+
+ // Pop, throwing away the value.
+ stack_push = PushArgument(Bind(LoadLocal(stack_)));
+ Do(InstanceCall("removeLast", stack_push));
+
+ BranchOrBacktrack(NULL, on_equal);
+
+ BindBlock(&fallthrough);
+}
+
+
+void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
+ intptr_t start_reg,
+ BlockLabel* on_no_match) {
+ TAG();
+ ASSERT(start_reg + 1 <= position_registers_.length());
+
+ BlockLabel fallthrough;
+
+ PushArgumentInstr* end_push =
+ PushArgument(Bind(LoadLocal(position_register(start_reg + 1))));
+ PushArgumentInstr* start_push =
+ PushArgument(Bind(LoadLocal(position_register(start_reg))));
+ StoreLocal(capture_length_, Sub(end_push, start_push));
+
+ // The length of a capture should not be negative. This can only happen
+ // if the end of the capture is unrecorded, or at a point earlier than
+ // the start of the capture.
+ // BranchOrBacktrack(less, on_no_match);
+
+ BranchOrBacktrack(Comparison(kLT,
+ LoadLocal(capture_length_),
+ Uint64Constant(0)),
+ on_no_match);
+
+ // If length is zero, either the capture is empty or it is completely
+ // uncaptured. In either case succeed immediately.
+ BranchOrBacktrack(Comparison(kEQ,
+ LoadLocal(capture_length_),
+ Uint64Constant(0)),
+ &fallthrough);
+
+
+ // Check that there are sufficient characters left in the input.
+ PushArgumentInstr* pos_push =
+ PushArgument(Bind(LoadLocal(current_position_)));
+ PushArgumentInstr* len_push = PushArgument(Bind(LoadLocal(capture_length_)));
+ BranchOrBacktrack(Comparison(kGT,
+ InstanceCall("+", pos_push, len_push),
+ Uint64Constant(0)),
+ on_no_match);
+
+
+ if (mode_ == ASCII) {
+ BlockLabel success;
+ BlockLabel fail;
+ BlockLabel loop_increment;
+
+ pos_push = PushArgument(Bind(LoadLocal(current_position_)));
+ len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
+ StoreLocal(match_start_index_, Add(pos_push, len_push));
+
+ pos_push = PushArgument(Bind(LoadLocal(position_register(start_reg))));
+ len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
+ StoreLocal(capture_start_index_, Add(pos_push, len_push));
+
+ pos_push = PushArgument(Bind(LoadLocal(match_start_index_)));
+ len_push = PushArgument(Bind(LoadLocal(capture_length_)));
+ StoreLocal(match_end_index_, Add(pos_push, len_push));
+
+ BlockLabel loop;
+ BindBlock(&loop);
+
+ StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
+ StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
+
+ BranchOrBacktrack(Comparison(kEQ,
+ LoadLocal(char_in_capture_),
+ LoadLocal(char_in_match_)),
+ &loop_increment);
+
+ // Mismatch, try case-insensitive match (converting letters to lower-case).
+ PushArgumentInstr* match_char_push =
+ PushArgument(Bind(LoadLocal(char_in_match_)));
+ PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(0x20)));
+ StoreLocal(char_in_match_,
+ Bind(InstanceCall("|", match_char_push, mask_push)));
+
+ BlockLabel convert_capture;
+ BlockLabel on_not_in_range;
+ BranchOrBacktrack(Comparison(kLT,
+ LoadLocal(char_in_match_),
+ Uint64Constant('a')),
+ &on_not_in_range);
+ BranchOrBacktrack(Comparison(kGT,
+ LoadLocal(char_in_match_),
+ Uint64Constant('z')),
+ &on_not_in_range);
+ Jump(&convert_capture);
+ BindBlock(&on_not_in_range);
+
+ // Latin-1: Check for values in range [224,254] but not 247.
+ BranchOrBacktrack(Comparison(kLT,
+ LoadLocal(char_in_match_),
+ Uint64Constant(224)),
+ &fail);
+ BranchOrBacktrack(Comparison(kGT,
+ LoadLocal(char_in_match_),
+ Uint64Constant(254)),
+ &fail);
+
+ BranchOrBacktrack(Comparison(kEQ,
+ LoadLocal(char_in_match_),
+ Uint64Constant(247)),
+ &fail);
+
+ // Also convert capture character.
+ BindBlock(&convert_capture);
+
+ PushArgumentInstr* capture_char_push =
+ PushArgument(Bind(LoadLocal(char_in_capture_)));
+ mask_push = PushArgument(Bind(Uint64Constant(0x20)));
+ StoreLocal(char_in_capture_,
+ Bind(InstanceCall("|", capture_char_push, mask_push)));
+
+ BranchOrBacktrack(Comparison(kNE,
+ LoadLocal(char_in_match_),
+ LoadLocal(char_in_capture_)),
+ &fail);
+
+ BindBlock(&loop_increment);
+
+ // Increment pointers into match and capture strings.
+ StoreLocal(capture_start_index_, Add(
+ PushArgument(Bind(LoadLocal(capture_start_index_))),
+ PushArgument(Bind(Uint64Constant(1)))));
+ StoreLocal(match_start_index_, Add(
+ PushArgument(Bind(LoadLocal(match_start_index_))),
+ PushArgument(Bind(Uint64Constant(1)))));
+
+ // Compare to end of match, and loop if not done.
+ BranchOrBacktrack(Comparison(kLT,
+ LoadLocal(match_start_index_),
+ LoadLocal(match_end_index_)),
+ &loop);
+ Jump(&success);
+
+ // TODO(jgruber): If we don't need to restore edi and the stack ptr,
+ // remove fail.
+ BindBlock(&fail);
+ BranchOrBacktrack(NULL, on_no_match);
+
+ BindBlock(&success);
+
+ // Move current character position to position after match.
+ PushArgumentInstr* match_end_push =
+ PushArgument(Bind(LoadLocal(match_end_index_)));
+ len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
+ StoreLocal(current_position_, Sub(match_end_push, len_push));
+ } else {
+ ASSERT(mode_ == UC16);
+ UNIMPLEMENTED();
+ }
+
+ BindBlock(&fallthrough);
+}
+
+
+void IRRegExpMacroAssembler::CheckNotBackReference(
+ intptr_t start_reg,
+ BlockLabel* on_no_match) {
+ TAG();
+ ASSERT(start_reg + 1 <= position_registers_.length());
+
+ BlockLabel fallthrough;
+ BlockLabel success;
+ BlockLabel fail;
+
+ // Find length of back-referenced capture.
+ PushArgumentInstr* end_push =
+ PushArgument(Bind(LoadLocal(position_register(start_reg + 1))));
+ PushArgumentInstr* start_push =
+ PushArgument(Bind(LoadLocal(position_register(start_reg))));
+ StoreLocal(capture_length_, Sub(end_push, start_push));
+
+ // Fail on partial or illegal capture (start of capture after end of capture).
+ BranchOrBacktrack(Comparison(kLT,
+ LoadLocal(capture_length_),
+ Uint64Constant(0)),
+ on_no_match);
+
+ // Succeed on empty capture (including no capture)
+ BranchOrBacktrack(Comparison(kEQ,
+ LoadLocal(capture_length_),
+ Uint64Constant(0)),
+ &fallthrough);
+
+ // Check that there are sufficient characters left in the input.
+ PushArgumentInstr* pos_push =
+ PushArgument(Bind(LoadLocal(current_position_)));
+ PushArgumentInstr* len_push = PushArgument(Bind(LoadLocal(capture_length_)));
+ BranchOrBacktrack(Comparison(kGT,
+ InstanceCall("+", pos_push, len_push),
+ Uint64Constant(0)),
+ on_no_match);
+
+ // Save register to make it available below.
+ // TODO(jgruber): The original purpose was probably to free up a register for
+ // use, so we don't need to do this.
+ PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
+ StoreLocal(stack_ptr_, Bind(InstanceCall("get:length", stack_push)));
+
+ // Compute pointers to match string and capture string.
+ pos_push = PushArgument(Bind(LoadLocal(current_position_)));
+ len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
+ StoreLocal(match_start_index_, Add(pos_push, len_push));
+
+ pos_push = PushArgument(Bind(LoadLocal(position_register(start_reg))));
+ len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
+ StoreLocal(capture_start_index_, Add(pos_push, len_push));
+
+ pos_push = PushArgument(Bind(LoadLocal(match_start_index_)));
+ len_push = PushArgument(Bind(LoadLocal(capture_length_)));
+ StoreLocal(match_end_index_, Add(pos_push, len_push));
+
+ BlockLabel loop;
+ BindBlock(&loop);
+
+ StoreLocal(char_in_capture_, CharacterAt(LoadLocal(capture_start_index_)));
+ StoreLocal(char_in_match_, CharacterAt(LoadLocal(match_start_index_)));
+
+ BranchOrBacktrack(Comparison(kNE,
+ LoadLocal(char_in_capture_),
+ LoadLocal(char_in_match_)),
+ &fail);
+
+ // Increment pointers into capture and match string.
+ StoreLocal(capture_start_index_, Add(
+ PushArgument(Bind(LoadLocal(capture_start_index_))),
+ PushArgument(Bind(Uint64Constant(1)))));
+ StoreLocal(match_start_index_, Add(
+ PushArgument(Bind(LoadLocal(match_start_index_))),
+ PushArgument(Bind(Uint64Constant(1)))));
+
+ // Check if we have reached end of match area.
+ BranchOrBacktrack(Comparison(kLT,
+ LoadLocal(match_start_index_),
+ LoadLocal(match_end_index_)),
+ &loop);
+
+ Jump(&success);
+
+ BindBlock(&fail);
+
+ // Restore backtrack stackpointer.
+ stack_push = PushArgument(Bind(LoadLocal(stack_)));
+ PushArgumentInstr* stack_ptr_push = PushArgument(Bind(LoadLocal(stack_ptr_)));
+ Do(InstanceCall("set:length", stack_push, stack_ptr_push));
+
+ BranchOrBacktrack(NULL, on_no_match);
+
+ BindBlock(&success);
+
+ // Move current character position to position after match.
+ PushArgumentInstr* match_end_push =
+ PushArgument(Bind(LoadLocal(match_end_index_)));
+ len_push = PushArgument(Bind(LoadLocal(string_param_length_)));
+ StoreLocal(current_position_, Sub(match_end_push, len_push));
+
+ // Restore backtrack stackpointer.
+ stack_push = PushArgument(Bind(LoadLocal(stack_)));
+ stack_ptr_push = PushArgument(Bind(LoadLocal(stack_ptr_)));
+ Do(InstanceCall("set:length", stack_push, stack_ptr_push));
+
+ BindBlock(&fallthrough);
+}
+
+
+void IRRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
+ BlockLabel* on_not_equal) {
+ TAG();
+ BranchOrBacktrack(Comparison(kNE,
+ LoadLocal(current_character_),
+ Uint64Constant(c)),
+ on_not_equal);
+}
+
+
+void IRRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
+ uint32_t mask,
+ BlockLabel* on_equal) {
+ TAG();
+
+ Definition* actual_def = LoadLocal(current_character_);
+ Definition* expected_def = Uint64Constant(c);
+
+ PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
+ PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
+ actual_def = InstanceCall("&", actual_push, mask_push);
+
+ BranchOrBacktrack(Comparison(kEQ, actual_def, expected_def), on_equal);
+}
+
+
+void IRRegExpMacroAssembler::CheckNotCharacterAfterAnd(
+ uint32_t c,
+ uint32_t mask,
+ BlockLabel* on_not_equal) {
+ TAG();
+
+ Definition* actual_def = LoadLocal(current_character_);
+ Definition* expected_def = Uint64Constant(c);
+
+ PushArgumentInstr* actual_push = PushArgument(Bind(actual_def));
+ PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask)));
+ actual_def = InstanceCall("&", actual_push, mask_push);
+
+ BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
+}
+
+
+void IRRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
+ uint16_t c,
+ uint16_t minus,
+ uint16_t mask,
+ BlockLabel* on_not_equal) {
+ UNIMPLEMENTED();
+}
+
+
+void IRRegExpMacroAssembler::CheckCharacterInRange(
+ uint16_t from,
+ uint16_t to,
+ BlockLabel* on_in_range) {
+ TAG();
+ ASSERT(from <= to);
+
+ // TODO(jgruber): All range comparisons could be done cheaper with unsigned
+ // compares. This pattern repeats in various places.
+
+ BlockLabel on_not_in_range;
+ BranchOrBacktrack(Comparison(kLT,
+ LoadLocal(current_character_),
+ Uint64Constant(from)),
+ &on_not_in_range);
+ BranchOrBacktrack(Comparison(kGT,
+ LoadLocal(current_character_),
+ Uint64Constant(to)),
+ &on_not_in_range);
+ BranchOrBacktrack(NULL, on_in_range);
+
+ BindBlock(&on_not_in_range);
+}
+
+
+void IRRegExpMacroAssembler::CheckCharacterNotInRange(
+ uint16_t from,
+ uint16_t to,
+ BlockLabel* on_not_in_range) {
+ TAG();
+ ASSERT(from <= to);
+
+ BranchOrBacktrack(Comparison(kLT,
+ LoadLocal(current_character_),
+ Uint64Constant(from)),
+ on_not_in_range);
+
+ BranchOrBacktrack(Comparison(kGT,
+ LoadLocal(current_character_),
+ Uint64Constant(to)),
+ on_not_in_range);
+}
+
+
+void IRRegExpMacroAssembler::CheckBitInTable(
+ const TypedData& table,
+ BlockLabel* on_bit_set) {
+ TAG();
+
+ PushArgumentInstr* table_push =
+ PushArgument(Bind(new(I) ConstantInstr(table)));
+ PushArgumentInstr* index_push =
+ PushArgument(Bind(LoadLocal(current_character_)));
+
+ if (mode_ != ASCII || kTableMask != Symbols::kMaxOneCharCodeSymbol) {
+ PushArgumentInstr* mask_push =
+ PushArgument(Bind(Uint64Constant(kTableSize - 1)));
+ index_push = PushArgument(Bind(InstanceCall("&", index_push, mask_push)));
+ }
+
+ Definition* byte_def = InstanceCall("[]", table_push, index_push);
+ Definition* zero_def = Int64Constant(0);
+
+ BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_bit_set);
+}
+
+
+bool IRRegExpMacroAssembler::CheckSpecialCharacterClass(
+ uint16_t type,
+ BlockLabel* on_no_match) {
+ TAG();
+
+ // Range checks (c in min..max) are generally implemented by an unsigned
+ // (c - min) <= (max - min) check
+ switch (type) {
+ case 's':
+ // Match space-characters
+ if (mode_ == ASCII) {
+ // One byte space characters are '\t'..'\r', ' ' and \u00a0.
+ BlockLabel success;
+ // Space (' ').
+ BranchOrBacktrack(Comparison(kEQ,
+ LoadLocal(current_character_),
+ Uint64Constant(' ')),
+ &success);
+ // Check range 0x09..0x0d.
+ CheckCharacterInRange('\t', '\r', &success);
+ // \u00a0 (NBSP).
+ BranchOrBacktrack(Comparison(kNE,
+ LoadLocal(current_character_),
+ Uint64Constant(0x00a0)),
+ on_no_match);
+ BindBlock(&success);
+ return true;
+ }
+ return false;
+ case 'S':
+ // The emitted code for generic character classes is good enough.
+ return false;
+ case 'd':
+ // Match ASCII digits ('0'..'9')
+ CheckCharacterNotInRange('0', '9', on_no_match);
+ return true;
+ case 'D':
+ // Match non ASCII-digits
+ CheckCharacterInRange('0', '9', on_no_match);
+ return true;
+ case '.': {
+ // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
+ BranchOrBacktrack(Comparison(kEQ,
+ LoadLocal(current_character_),
+ Uint64Constant('\n')),
+ on_no_match);
+ BranchOrBacktrack(Comparison(kEQ,
+ LoadLocal(current_character_),
+ Uint64Constant('\r')),
+ on_no_match);
+ if (mode_ == UC16) {
+ BranchOrBacktrack(Comparison(kEQ,
+ LoadLocal(current_character_),
+ Uint64Constant(0x2028)),
+ on_no_match);
+ BranchOrBacktrack(Comparison(kEQ,
+ LoadLocal(current_character_),
+ Uint64Constant(0x2029)),
+ on_no_match);
+ }
+ return true;
+ }
+ case 'w': {
+ if (mode_ != ASCII) {
+ // Table is 128 entries, so all ASCII characters can be tested.
+ BranchOrBacktrack(Comparison(kGT,
+ LoadLocal(current_character_),
+ Uint64Constant('z')),
+ on_no_match);
+ }
+
+ PushArgumentInstr* table_push =
+ PushArgument(Bind(LoadLocal(word_character_map_)));
+ PushArgumentInstr* index_push =
+ PushArgument(Bind(LoadLocal(current_character_)));
+
+ Definition* byte_def = InstanceCall("[]", table_push, index_push);
+ Definition* zero_def = Int64Constant(0);
+
+ BranchOrBacktrack(Comparison(kEQ, byte_def, zero_def), on_no_match);
+
+ return true;
+ }
+ case 'W': {
+ BlockLabel done;
+ if (mode_ != ASCII) {
+ // Table is 128 entries, so all ASCII characters can be tested.
+ BranchOrBacktrack(Comparison(kGT,
+ LoadLocal(current_character_),
+ Uint64Constant('z')),
+ &done);
+ }
+
+ // TODO(jgruber): Refactor to use CheckBitInTable if possible.
+
+ PushArgumentInstr* table_push =
+ PushArgument(Bind(LoadLocal(word_character_map_)));
+ PushArgumentInstr* index_push =
+ PushArgument(Bind(LoadLocal(current_character_)));
+
+ Definition* byte_def = InstanceCall("[]", table_push, index_push);
+ Definition* zero_def = Int64Constant(0);
+
+ BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_no_match);
+
+ if (mode_ != ASCII) {
+ BindBlock(&done);
+ }
+ return true;
+ }
+ // Non-standard classes (with no syntactic shorthand) used internally.
+ case '*':
+ // Match any character.
+ return true;
+ case 'n': {
+ // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
+ // The opposite of '.'.
+ BlockLabel success;
+ BranchOrBacktrack(Comparison(kEQ,
+ LoadLocal(current_character_),
+ Uint64Constant('\n')),
+ &success);
+ BranchOrBacktrack(Comparison(kEQ,
+ LoadLocal(current_character_),
+ Uint64Constant('\r')),
+ &success);
+ if (mode_ == UC16) {
+ BranchOrBacktrack(Comparison(kEQ,
+ LoadLocal(current_character_),
+ Uint64Constant(0x2028)),
+ &success);
+ BranchOrBacktrack(Comparison(kEQ,
+ LoadLocal(current_character_),
+ Uint64Constant(0x2029)),
+ &success);
+ }
+ BranchOrBacktrack(NULL, on_no_match);
+ BindBlock(&success);
+ return true;
+ }
+ // No custom implementation (yet): s(uint16_t), S(uint16_t).
+ default:
+ return false;
+ }
+}
+
+
+void IRRegExpMacroAssembler::Fail() {
+ TAG();
+ ASSERT(FAILURE == 0); // Return value for failure is zero.
+ if (!global()) {
+ UNREACHABLE(); // Dart regexps are always global.
+ }
+ Jump(exit_block_);
+}
+
+
+void IRRegExpMacroAssembler::IfRegisterGE(intptr_t reg,
+ intptr_t comparand,
+ BlockLabel* if_ge) {
+ TAG();
+ BranchOrBacktrack(Comparison(kGTE,
+ LoadLocal(position_register(reg)),
+ Int64Constant(comparand)),
+ if_ge);
+}
+
+
+void IRRegExpMacroAssembler::IfRegisterLT(intptr_t reg,
+ intptr_t comparand,
+ BlockLabel* if_lt) {
+ TAG();
+ BranchOrBacktrack(Comparison(kLT,
+ LoadLocal(position_register(reg)),
+ Int64Constant(comparand)),
+ if_lt);
+}
+
+
+void IRRegExpMacroAssembler::IfRegisterEqPos(intptr_t reg,
+ BlockLabel* if_eq) {
+ TAG();
+ BranchOrBacktrack(Comparison(kEQ,
+ LoadLocal(position_register(reg)),
+ LoadLocal(current_position_)),
+ if_eq);
+}
+
+
+RegExpMacroAssembler::IrregexpImplementation
+ IRRegExpMacroAssembler::Implementation() {
+ return kIRImplementation;
+}
+
+
+void IRRegExpMacroAssembler::LoadCurrentCharacter(intptr_t cp_offset,
+ BlockLabel* on_end_of_input,
+ bool check_bounds,
+ intptr_t characters) {
+ TAG();
+ ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
+ ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
+ if (check_bounds) {
+ CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ }
+ LoadCurrentCharacterUnchecked(cp_offset, characters);
+}
+
+
+void IRRegExpMacroAssembler::PopCurrentPosition() {
+ TAG();
+ StoreLocal(current_position_, PopStack());
+}
+
+
+void IRRegExpMacroAssembler::PopRegister(intptr_t register_index) {
+ TAG();
+ ASSERT(register_index < position_registers_.length());
+ StoreLocal(position_register(register_index), PopStack());
+}
+
+
+void IRRegExpMacroAssembler::PushStack(Definition *definition) {
+ PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
+ PushArgumentInstr* value_push = PushArgument(Bind(definition));
+ Do(InstanceCall("add", stack_push, value_push));
+}
+
+
+Value* IRRegExpMacroAssembler::PopStack() {
+ PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
+ return Bind(InstanceCall("removeLast", stack_push));
+}
+
+
+// Pushes the location corresponding to label to the backtracking stack.
+// Backtracking blocks do not necessarily have an ID at this point.
+// Push a dummy instead; these are later rewritten.
+void IRRegExpMacroAssembler::PushBacktrack(BlockLabel* label) {
+ TAG();
+ ConstantInstr* offset = Int64Constant(kOffsetNotYetSet);
+ backtrack_references_.Add(BacktrackReference(label->block(), offset));
+ PushStack(offset);
+}
+
+
+void IRRegExpMacroAssembler::PushCurrentPosition() {
+ TAG();
+ PushStack(LoadLocal(current_position_));
+}
+
+
+void IRRegExpMacroAssembler::PushRegister(intptr_t register_index,
+ StackCheckFlag check_stack_limit) {
+ TAG();
+ PushStack(LoadLocal(position_register(register_index)));
+}
+
+
+void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) {
+ TAG();
+ StoreLocal(current_position_, Bind(LoadLocal(position_register(reg))));
+}
+
+// Resets the size of the stack to the value stored in reg.
+void IRRegExpMacroAssembler::ReadStackPointerFromRegister(intptr_t reg) {
+ TAG();
+ ASSERT(reg < position_registers_.length());
+
+ PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
+ PushArgumentInstr* length_push =
+ PushArgument(Bind(LoadLocal(position_register(reg))));
+
+ Do(InstanceCall("set:length", stack_push, length_push));
+}
+
+void IRRegExpMacroAssembler::SetCurrentPositionFromEnd(intptr_t by) {
+ TAG();
+
+ BlockLabel after_position;
+
+ Definition* cur_pos_def = LoadLocal(current_position_);
+ Definition* by_value_def = Int64Constant(-by);
+
+ BranchOrBacktrack(Comparison(kGTE, cur_pos_def, by_value_def),
+ &after_position);
+
+ StoreLocal(current_position_, Bind(Int64Constant(-by)));
+
+ // On RegExp code entry (where this operation is used), the character before
+ // the current position is expected to be already loaded.
+ // We have advanced the position, so it's safe to read backwards.
+ LoadCurrentCharacterUnchecked(-1, 1);
+
+ BindBlock(&after_position);
+}
+
+
+void IRRegExpMacroAssembler::SetRegister(intptr_t register_index, intptr_t to) {
+ TAG();
+ // Reserved for positions!
+ ASSERT(register_index >= position_registers_count_);
+ StoreLocal(position_register(register_index), Bind(Int64Constant(to)));
+}
+
+
+bool IRRegExpMacroAssembler::Succeed() {
+ TAG();
+ Jump(success_block_);
+ return global();
+}
+
+
+void IRRegExpMacroAssembler::WriteCurrentPositionToRegister(
+ intptr_t reg, intptr_t cp_offset) {
+ TAG();
+
+ PushArgumentInstr* pos_push =
+ PushArgument(Bind(LoadLocal(current_position_)));
+ PushArgumentInstr* off_push =
+ PushArgument(Bind(Int64Constant(cp_offset)));
+
+ // Push the negative offset; these are converted to positive string positions
+ // within the success block.
+ StoreLocal(position_register(reg), Add(pos_push, off_push));
+}
+
+
+void IRRegExpMacroAssembler::ClearRegisters(
+ intptr_t reg_from, intptr_t reg_to) {
+ TAG();
+
+ ASSERT(reg_from <= reg_to);
+ ASSERT(reg_to < position_registers_.length());
+
+ // In order to clear registers to a final result value of -1, set them to
+ // (-1 - string length), the offset of -1 from the end of the string.
+
+ for (intptr_t reg = reg_from; reg <= reg_to; reg++) {
+ PushArgumentInstr* minus_one_push =
+ PushArgument(Bind(Int64Constant(-1)));
+ PushArgumentInstr* length_push =
+ PushArgument(Bind(LoadLocal(string_param_length_)));
+
+ StoreLocal(position_register(reg), Sub(minus_one_push, length_push));
+ }
+}
+
+
+void IRRegExpMacroAssembler::WriteStackPointerToRegister(intptr_t reg) {
+ TAG();
+
+ PushArgumentInstr* stack_push = PushArgument(Bind(LoadLocal(stack_)));
+ Value* length_value = Bind(InstanceCall("get:length", stack_push));
+
+ StoreLocal(position_register(reg), length_value);
+}
+
+
+// Private methods:
+
+void IRRegExpMacroAssembler::CallCheckStackGuardState(Register scratch) {
+ UNIMPLEMENTED();
+}
+
+
+intptr_t IRRegExpMacroAssembler::CheckStackGuardState(Address* return_address,
+ Code* re_code,
+ Address re_frame) {
+ UNIMPLEMENTED();
+ return -1;
+}
+
+
+void IRRegExpMacroAssembler::CheckPosition(intptr_t cp_offset,
+ BlockLabel* on_outside_input) {
+ TAG();
+ Definition* curpos_def = LoadLocal(current_position_);
+ Definition* cp_off_def = Int64Constant(-cp_offset);
+
+ // If (current_position_ < -cp_offset), we are in bounds.
+ // Remember, current_position_ is a negative offset from the string end.
+
+ BranchOrBacktrack(Comparison(kGTE, curpos_def, cp_off_def),
+ on_outside_input);
+}
+
+
+void IRRegExpMacroAssembler::BranchOrBacktrack(
+ ComparisonInstr* comparison,
+ BlockLabel* true_successor) {
+ if (comparison == NULL) { // No condition
+ if (true_successor == NULL) {
+ Backtrack();
+ return;
+ }
+ Jump(true_successor);
+ return;
+ }
+
+ // If no successor block has been passed in, backtrack.
+ JoinEntryInstr* true_successor_block = backtrack_block_;
+ if (true_successor != NULL) {
+ true_successor->SetLinked();
+ true_successor_block = true_successor->block();
+ }
+ ASSERT(true_successor_block != NULL);
+
+ // If the condition is not true, fall through to a new block.
+ BlockLabel fallthrough;
+
+ BranchInstr* branch = new(I) BranchInstr(comparison);
+ *branch->true_successor_address() =
+ TargetWithJoinGoto(true_successor_block);
+ *branch->false_successor_address() =
+ TargetWithJoinGoto(fallthrough.block());
+
+ CloseBlockWith(branch);
+ BindBlock(&fallthrough);
+}
+
+TargetEntryInstr* IRRegExpMacroAssembler::TargetWithJoinGoto(
+ JoinEntryInstr* dst) {
+ TargetEntryInstr* target = new(I) TargetEntryInstr(
+ AllocateBlockId(), kInvalidTryIndex);
+ blocks_.Add(target);
+
+ target->AppendInstruction(new(I) GotoInstr(dst));
+
+ return target;
+}
+
+
+void IRRegExpMacroAssembler::CheckPreemption() {
+ TAG();
+ // TODO(jgruber): Implement the preemption check.
+}
+
+
+Value* IRRegExpMacroAssembler::Add(
+ PushArgumentInstr* lhs,
+ PushArgumentInstr* rhs) {
+ return Bind(InstanceCall("+", lhs, rhs));
+}
+
+
+Value* IRRegExpMacroAssembler::Sub(
+ PushArgumentInstr* lhs,
+ PushArgumentInstr* rhs) {
+ return Bind(InstanceCall("-", lhs, rhs));
+}
+
+
+void IRRegExpMacroAssembler::LoadCurrentCharacterUnchecked(
+ intptr_t cp_offset, intptr_t characters) {
+ TAG();
+
+ if (mode_ == ASCII) {
+ ASSERT(characters == 1 || characters == 2 || characters == 4);
+ } else {
+ ASSERT(mode_ == UC16);
+ ASSERT(characters == 1 || characters == 2);
+ }
+
+ // LoadLocal pattern_param_
+ // PushArgument()
+ PushArgumentInstr* pattern_push =
+ PushArgument(Bind(LoadLocal(string_param_)));
+
+ // Calculate the addressed string index as
+ // cp_offset + current_position_ + string_param_length_
+ PushArgumentInstr* cp_offset_push =
+ PushArgument(Bind(Int64Constant(cp_offset)));
+ PushArgumentInstr* cur_pos_push =
+ PushArgument(Bind(LoadLocal(current_position_)));
+
+ PushArgumentInstr* partial_sum_push =
+ PushArgument(Add(cp_offset_push, cur_pos_push));
+ PushArgumentInstr* length_push =
+ PushArgument(Bind(LoadLocal(string_param_length_)));
+
+ PushArgumentInstr* pos_push =
+ PushArgument(Add(partial_sum_push, length_push));
+
+ // Finally, push the character count.
+ PushArgumentInstr* count_push =
+ PushArgument(Bind(Uint64Constant(characters)));
+
+ // InstanceCall(codeUnitAt, t0, t0)
+ const char* name = Library::PrivateCoreLibName(
+ String::Handle(Symbols::New("_codeUnitsAt"))).ToCString();
+ Value* code_unit_value =
+ Bind(InstanceCall(name, pattern_push, pos_push, count_push));
+
+ // StoreLocal(current_character_)
+ StoreLocal(current_character_, code_unit_value);
+}
+
+
+Value* IRRegExpMacroAssembler::CharacterAt(Definition* index) {
+ PushArgumentInstr* pattern_push =
+ PushArgument(Bind(LoadLocal(string_param_)));
+ PushArgumentInstr* index_push = PushArgument(Bind(index));
+ PushArgumentInstr* count_push = PushArgument(Bind(Uint64Constant(1)));
+
+ const char* name = Library::PrivateCoreLibName(
+ String::Handle(Symbols::New("_codeUnitsAt"))).ToCString();
Florian Schneider 2014/09/16 11:11:49 Add _codeUnitsAt to VM symbols and use static acce
jgruber1 2014/09/22 18:58:04 Done.
+ return Bind(InstanceCall(name, pattern_push, index_push, count_push));
+}
+
+
+Function& IRRegExpMacroAssembler::GetCode(const String& source) {
+ // At this point, code generation is completed, sanity check our bookkeeping.
+ ASSERT(args_pushed() == 0);
+ ASSERT(temp_count() == 0);
+
+ // Look up the regexp class to be used as the function owner.
+ const Library& core_lib = Library::Handle(Library::CoreLibrary());
+ const Class& regexp_class = Class::Handle(
+ core_lib.LookupClass(String::Handle(Symbols::New("RegExp"))));
+
+ // Set up our fake parsed function.
+ Function& function = Function::ZoneHandle(
+ I,
+ Function::New(String::Handle(I, Symbols::New("IrregexpFn")),
+ RawFunction::kIrregexpFunction,
+ true, // Static.
+ false, // Not const.
+ false, // Not abstract.
+ false, // Not external.
+ false, // Not native.
+ regexp_class,
+ 0)); // Requires a non-negative token position.
+
+ function.set_num_fixed_parameters(kNumParameters);
+ function.set_parameter_types(Array::Handle(Array::New(kNumParameters,
+ Heap::kOld)));
+ function.set_parameter_names(Array::Handle(Array::New(kNumParameters,
+ Heap::kOld)));
+ function.SetParameterTypeAt(0, Type::Handle(Type::DynamicType()));
+ function.SetParameterNameAt(0, String::ZoneHandle(I, Symbols::New("string")));
+ function.SetParameterTypeAt(1, Type::Handle(Type::DynamicType()));
+ function.SetParameterNameAt(
+ 1, String::ZoneHandle(I, Symbols::New("start_index")));
+ function.set_result_type(Type::Handle(Type::DynamicType()));
+ function.SetParameterTypeAt(2, Type::Handle(Type::DynamicType()));
+ function.SetParameterNameAt(
+ 2, String::ZoneHandle(I, Symbols::New("matches")));
+ function.set_result_type(Type::Handle(Type::DynamicType()));
+
+ ParsedFunction* parsed_func = new(I) ParsedFunction(I, function);
+ parsed_func->AllocateIrregexpVariables(num_stack_locals());
+
+ static const bool kIsOptimized = false;
+
+ // Set up the flow graph.
+ ZoneGrowableArray<const ICData*>* ic_data_array =
+ new(I) ZoneGrowableArray<const ICData*>();
+ FlowGraphBuilder builder(parsed_func,
+ *ic_data_array,
+ NULL, // NULL = not inlining.
+ Isolate::kNoDeoptId,
+ kIsOptimized);
+
+ // Indirect targets (i.e. all blocks reachable only through backtracking)
+ // must be attached to the graph entry in order to be discovered by the
+ // flow graph.
+ AttachIndirectTargets();
+
+ // A dense block ordering is available, rewrite all pushes to the backtracking
+ // stack to point to valid block IDs.
+ RewriteBacktrackPushes();
+
+ FlowGraph* flow_graph = new(I) FlowGraph(builder,
+ entry_block_,
+ next_block_id_);
+ const GrowableArray<BlockEntryInstr*>& blocks = flow_graph->preorder();
+
+ Error& error = Error::Handle(Compiler::CompileIrregexpFunction(
+ parsed_func, flow_graph));
+ if (!error.IsNull()) {
+ // Should never happen.
+ UNREACHABLE();
+ }
+
+ // Now that block offsets are known, we can insert them into the table.
+ FinalizeBlockOffsetTable(blocks);
+
+ return function;
+}
+
+
+#undef __
+
+} // namespace dart

Powered by Google App Engine
This is Rietveld 408576698