Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(17)

Unified Diff: runtime/vm/regexp_assembler.h

Issue 1201383002: Port irregexp bytecode compiler and interpreter from V8 r24065. (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « runtime/vm/regexp.cc ('k') | runtime/vm/regexp_assembler.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: runtime/vm/regexp_assembler.h
diff --git a/runtime/vm/regexp_assembler.h b/runtime/vm/regexp_assembler.h
index 88aa06b027d5bca3868acdbc98bc0bc13f1a1555..821f05469fb15a4ed03d916658816addd7cc1fff 100644
--- a/runtime/vm/regexp_assembler.h
+++ b/runtime/vm/regexp_assembler.h
@@ -17,24 +17,13 @@ void PrintUtf16(uint16_t c);
/// Convenience wrapper around a BlockEntryInstr pointer.
class BlockLabel : public ValueObject {
+ // Used by the IR assembler.
public:
BlockLabel()
: block_(new JoinEntryInstr(-1, -1)),
is_bound_(false),
- is_linked_(false) { }
-
- BlockLabel(const BlockLabel& that)
- : ValueObject(),
- block_(that.block_),
- is_bound_(that.is_bound_),
- is_linked_(that.is_linked_) { }
-
- BlockLabel& operator=(const BlockLabel& that) {
- block_ = that.block_;
- is_bound_ = that.is_bound_;
- is_linked_ = that.is_linked_;
- return *this;
- }
+ is_linked_(false),
+ pos_(-1) { }
JoinEntryInstr* block() const { return block_; }
@@ -60,6 +49,41 @@ class BlockLabel : public ValueObject {
bool is_bound_;
bool is_linked_;
+
+ // Used by the bytecode assembler.
+ public:
+ ~BlockLabel() {
+ ASSERT(!is_linked());
+ }
+
+ intptr_t pos() const {
+ return pos_;
+ }
+ bool is_bound() const { return IsBound(); }
+ bool is_linked() const { return IsLinked(); }
+
+ void Unuse() {
+ pos_ = 0;
+ is_bound_ = false;
+ is_linked_ = false;
+ }
+
+ void bind_to(intptr_t pos) {
+ pos_ = pos;
+ is_bound_ = true;
+ is_linked_ = false;
+ ASSERT(is_bound());
+ }
+
+ void link_to(intptr_t pos) {
+ pos_ = pos;
+ is_bound_ = false;
+ is_linked_ = true;
+ ASSERT(is_linked());
+ }
+
+ private:
+ intptr_t pos_;
};
@@ -81,6 +105,7 @@ class RegExpMacroAssembler : public ZoneAllocated {
};
enum IrregexpImplementation {
+ kBytecodeImplementation,
kIRImplementation
};
@@ -217,433 +242,6 @@ class RegExpMacroAssembler : public ZoneAllocated {
Zone* zone_;
};
-
-class IRRegExpMacroAssembler : public RegExpMacroAssembler {
- public:
- // Type of input string to generate code for.
- enum Mode { ASCII = 1, UC16 = 2 };
-
- // Result of calling generated native RegExp code.
- // RETRY: Something significant changed during execution, and the matching
- // should be retried from scratch.
- // EXCEPTION: Something failed during execution. If no exception has been
- // thrown, it's an internal out-of-memory, and the caller should
- // throw the exception.
- // FAILURE: Matching failed.
- // SUCCESS: Matching succeeded, and the output array has been filled with
- // capture positions.
- enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
-
- IRRegExpMacroAssembler(intptr_t specialization_cid,
- intptr_t capture_count,
- const ParsedFunction* parsed_function,
- const ZoneGrowableArray<const ICData*>& ic_data_array,
- Zone* zone);
- virtual ~IRRegExpMacroAssembler();
-
- virtual bool CanReadUnaligned();
-
- // Compares two-byte strings case insensitively.
- // Called from generated RegExp code.
- static RawBool* CaseInsensitiveCompareUC16(
- RawString* str_raw,
- RawSmi* lhs_index_raw,
- RawSmi* rhs_index_raw,
- RawSmi* length_raw);
-
- static RawArray* Execute(const Function& function,
- const String& input,
- const Smi& start_offset,
- Zone* zone);
-
- virtual bool IsClosed() const { return (current_instruction_ == NULL); }
-
- virtual intptr_t stack_limit_slack();
- virtual void AdvanceCurrentPosition(intptr_t by);
- virtual void AdvanceRegister(intptr_t reg, intptr_t by);
- virtual void Backtrack();
- virtual void BindBlock(BlockLabel* label);
- virtual void CheckAtStart(BlockLabel* on_at_start);
- virtual void CheckCharacter(uint32_t c, BlockLabel* on_equal);
- virtual void CheckCharacterAfterAnd(uint32_t c,
- uint32_t mask,
- BlockLabel* on_equal);
- virtual void CheckCharacterGT(uint16_t limit, BlockLabel* on_greater);
- virtual void CheckCharacterLT(uint16_t limit, BlockLabel* on_less);
- // A "greedy loop" is a loop that is both greedy and with a simple
- // body. It has a particularly simple implementation.
- virtual void CheckGreedyLoop(BlockLabel* on_tos_equals_current_position);
- virtual void CheckNotAtStart(BlockLabel* on_not_at_start);
- virtual void CheckNotBackReference(intptr_t start_reg,
- BlockLabel* on_no_match);
- virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg,
- BlockLabel* on_no_match);
- virtual void CheckNotCharacter(uint32_t c, BlockLabel* on_not_equal);
- virtual void CheckNotCharacterAfterAnd(uint32_t c,
- uint32_t mask,
- BlockLabel* on_not_equal);
- virtual void CheckNotCharacterAfterMinusAnd(uint16_t c,
- uint16_t minus,
- uint16_t mask,
- BlockLabel* on_not_equal);
- virtual void CheckCharacterInRange(uint16_t from,
- uint16_t to,
- BlockLabel* on_in_range);
- virtual void CheckCharacterNotInRange(uint16_t from,
- uint16_t to,
- BlockLabel* on_not_in_range);
- virtual void CheckBitInTable(const TypedData& table, BlockLabel* on_bit_set);
-
- // Checks whether the given offset from the current position is before
- // the end of the string.
- virtual void CheckPosition(intptr_t cp_offset, BlockLabel* on_outside_input);
- virtual bool CheckSpecialCharacterClass(
- uint16_t type, BlockLabel* on_no_match);
- virtual void Fail();
- virtual void IfRegisterGE(intptr_t reg,
- intptr_t comparand, BlockLabel* if_ge);
- virtual void IfRegisterLT(intptr_t reg,
- intptr_t comparand, BlockLabel* if_lt);
- virtual void IfRegisterEqPos(intptr_t reg, BlockLabel* if_eq);
- virtual IrregexpImplementation Implementation();
- virtual void GoTo(BlockLabel* to);
- virtual void LoadCurrentCharacter(intptr_t cp_offset,
- BlockLabel* on_end_of_input,
- bool check_bounds = true,
- intptr_t characters = 1);
- virtual void PopCurrentPosition();
- virtual void PopRegister(intptr_t register_index);
- virtual void Print(const char* str);
- virtual void PushBacktrack(BlockLabel* label);
- virtual void PushCurrentPosition();
- virtual void PushRegister(intptr_t register_index);
- virtual void ReadCurrentPositionFromRegister(intptr_t reg);
- virtual void ReadStackPointerFromRegister(intptr_t reg);
- virtual void SetCurrentPositionFromEnd(intptr_t by);
- virtual void SetRegister(intptr_t register_index, intptr_t to);
- virtual bool Succeed();
- virtual void WriteCurrentPositionToRegister(intptr_t reg, intptr_t cp_offset);
- virtual void ClearRegisters(intptr_t reg_from, intptr_t reg_to);
- virtual void WriteStackPointerToRegister(intptr_t reg);
-
- virtual void PrintBlocks();
-
- IndirectGotoInstr* backtrack_goto() const { return backtrack_goto_; }
- GraphEntryInstr* graph_entry() const { return entry_block_; }
-
- intptr_t num_stack_locals() const { return local_id_.Count(); }
- intptr_t num_blocks() const { return block_id_.Count(); }
-
- // Generate a dispatch block implementing backtracking. Must be done after
- // graph construction.
- void GenerateBacktrackBlock();
-
- // Allocate the actual registers array once its size is known. Must be done
- // after graph construction.
- void FinalizeRegistersArray();
-
- private:
- // Generate the contents of preset blocks. The entry block is the entry point
- // of the generated code.
- void GenerateEntryBlock();
- // Copies capture indices into the result area and returns true.
- void GenerateSuccessBlock();
- // Returns false.
- void GenerateExitBlock();
-
- enum ComparisonKind {
- kEQ,
- kNE,
- kLT,
- kGT,
- kLTE,
- kGTE,
- };
-
- struct InstanceCallDescriptor {
- // Standard (i.e. most non-Smi) functions.
- explicit InstanceCallDescriptor(const String& name)
- : name(name),
- token_kind(Token::kILLEGAL),
- checked_argument_count(1) { }
-
- InstanceCallDescriptor(const String& name,
- Token::Kind token_kind,
- intptr_t checked_argument_count)
- : name(name),
- token_kind(token_kind),
- checked_argument_count(checked_argument_count) { }
-
- // Special cases for Smi and indexing functions.
- static InstanceCallDescriptor FromToken(Token::Kind token_kind) {
- switch (token_kind) {
- case Token::kEQ: return InstanceCallDescriptor(
- Symbols::EqualOperator(), token_kind, 2);
- case Token::kADD: return InstanceCallDescriptor(
- Symbols::Plus(), token_kind, 2);
- case Token::kSUB: return InstanceCallDescriptor(
- Symbols::Minus(), token_kind, 2);
- case Token::kBIT_OR: return InstanceCallDescriptor(
- Symbols::BitOr(), token_kind, 2);
- case Token::kBIT_AND: return InstanceCallDescriptor(
- Symbols::BitAnd(), token_kind, 2);
- case Token::kLT: return InstanceCallDescriptor(
- Symbols::LAngleBracket(), token_kind, 2);
- case Token::kLTE: return InstanceCallDescriptor(
- Symbols::LessEqualOperator(), token_kind, 2);
- case Token::kGT: return InstanceCallDescriptor(
- Symbols::RAngleBracket(), token_kind, 2);
- case Token::kGTE: return InstanceCallDescriptor(
- Symbols::GreaterEqualOperator(), token_kind, 2);
- case Token::kNEGATE: return InstanceCallDescriptor(
- Symbols::UnaryMinus(), token_kind, 1);
- case Token::kINDEX: return InstanceCallDescriptor(
- Symbols::IndexToken(), token_kind, 2);
- case Token::kASSIGN_INDEX: return InstanceCallDescriptor(
- Symbols::AssignIndexToken(), token_kind, 2);
- default:
- UNREACHABLE();
- }
- UNREACHABLE();
- return InstanceCallDescriptor(Symbols::Empty());
- }
-
- const String& name;
- Token::Kind token_kind;
- intptr_t checked_argument_count;
- };
-
- LocalVariable* Local(const String& name);
- LocalVariable* Parameter(const String& name, intptr_t index) const;
-
- ConstantInstr* Int64Constant(int64_t value) const;
- ConstantInstr* Uint64Constant(uint64_t value) const;
- ConstantInstr* BoolConstant(bool value) const;
- ConstantInstr* StringConstant(const char* value) const;
-
- // The word character map static member of the RegExp class.
- // Byte map of one byte characters with a 0xff if the character is a word
- // character (digit, letter or underscore) and 0x00 otherwise.
- // Used by generated RegExp code.
- ConstantInstr* WordCharacterMapConstant() const;
-
- ComparisonInstr* Comparison(ComparisonKind kind,
- PushArgumentInstr* lhs,
- PushArgumentInstr* rhs);
- ComparisonInstr* Comparison(ComparisonKind kind,
- Definition* lhs,
- Definition* rhs);
-
- InstanceCallInstr* InstanceCall(const InstanceCallDescriptor& desc,
- PushArgumentInstr* arg1) const;
- InstanceCallInstr* InstanceCall(const InstanceCallDescriptor& desc,
- PushArgumentInstr* arg1,
- PushArgumentInstr* arg2) const;
- InstanceCallInstr* InstanceCall(const InstanceCallDescriptor& desc,
- PushArgumentInstr* arg1,
- PushArgumentInstr* arg2,
- PushArgumentInstr* arg3) const;
- InstanceCallInstr* InstanceCall(
- const InstanceCallDescriptor& desc,
- ZoneGrowableArray<PushArgumentInstr*>* arguments) const;
-
- StaticCallInstr* StaticCall(const Function& function) const;
- StaticCallInstr* StaticCall(const Function& function,
- PushArgumentInstr* arg1) const;
- StaticCallInstr* StaticCall(const Function& function,
- PushArgumentInstr* arg1,
- PushArgumentInstr* arg2) const;
- StaticCallInstr* StaticCall(
- const Function& function,
- ZoneGrowableArray<PushArgumentInstr*>* arguments) const;
-
- // Creates a new block consisting simply of a goto to dst.
- TargetEntryInstr* TargetWithJoinGoto(JoinEntryInstr* dst);
- IndirectEntryInstr* IndirectWithJoinGoto(JoinEntryInstr* dst);
-
- // Adds, respectively subtracts lhs and rhs and returns the result.
- Definition* Add(PushArgumentInstr* lhs, PushArgumentInstr* rhs);
- Definition* Sub(PushArgumentInstr* lhs, PushArgumentInstr* rhs);
-
- LoadLocalInstr* LoadLocal(LocalVariable* local) const;
- void StoreLocal(LocalVariable* local, Value* value);
-
- PushArgumentInstr* PushArgument(Value* value);
- PushArgumentInstr* PushLocal(LocalVariable* local);
-
- PushArgumentInstr* PushRegisterIndex(intptr_t reg);
- Value* LoadRegister(intptr_t reg);
- void StoreRegister(intptr_t reg, intptr_t value);
- void StoreRegister(PushArgumentInstr* registers,
- PushArgumentInstr* index,
- PushArgumentInstr* value);
-
- // Load a number of characters at the given offset from the
- // current position, into the current-character register.
- void LoadCurrentCharacterUnchecked(intptr_t cp_offset,
- intptr_t character_count);
-
- // Returns the character within the passed string at the specified index.
- Value* CharacterAt(LocalVariable* index);
-
- // Load a number of characters starting from index in the pattern string.
- Value* LoadCodeUnitsAt(LocalVariable* index, intptr_t character_count);
-
- // Check whether preemption has been requested.
- void CheckPreemption();
-
- // Byte size of chars in the string to match (decided by the Mode argument)
- inline intptr_t char_size() { return static_cast<int>(mode_); }
-
- // Equivalent to a conditional branch to the label, unless the label
- // is NULL, in which case it is a conditional Backtrack.
- void BranchOrBacktrack(ComparisonInstr* comparison,
- BlockLabel* true_successor);
-
- // Set up all local variables and parameters.
- void InitializeLocals();
-
- // Allocates a new local, and returns the appropriate id for placing it
- // on the stack.
- intptr_t GetNextLocalIndex();
-
- // We never have any copied parameters.
- intptr_t num_copied_params() const {
- return 0;
- }
-
- // Return the position register at the specified index, creating it if
- // necessary. Note that the number of such registers can exceed the amount
- // required by the number of output captures.
- LocalVariable* position_register(intptr_t index);
-
- void set_current_instruction(Instruction* instruction);
-
- // The following functions are responsible for appending instructions
- // to the current instruction in various ways. The most simple one
- // is AppendInstruction, which simply appends an instruction and performs
- // bookkeeping.
- void AppendInstruction(Instruction* instruction);
- // Similar to AppendInstruction, but closes the current block by
- // setting current_instruction_ to NULL.
- void CloseBlockWith(Instruction* instruction);
- // Appends definition and allocates a temp index for the result.
- Value* Bind(Definition* definition);
- // Loads and binds a local variable.
- Value* BindLoadLocal(const LocalVariable& local);
-
- // Appends the definition.
- void Do(Definition* definition);
- // Closes the current block with a jump to the specified block.
- void GoTo(JoinEntryInstr* to);
-
- // Accessors for our local stack_.
- void PushStack(Definition* definition);
- Definition* PopStack();
- Definition* PeekStack();
- void CheckStackLimit();
- void GrowStack();
-
- // Prints the specified argument. Used for debugging.
- void Print(PushArgumentInstr* argument);
-
- // A utility class tracking ids of various objects such as blocks, temps, etc.
- class IdAllocator : public ValueObject {
- public:
- IdAllocator() : next_id(0) { }
-
- intptr_t Count() const { return next_id; }
- intptr_t Alloc(intptr_t count = 1) {
- ASSERT(count >= 0);
- intptr_t current_id = next_id;
- next_id += count;
- return current_id;
- }
- void Dealloc(intptr_t count = 1) {
- ASSERT(count <= next_id);
- next_id -= count;
- }
-
- private:
- intptr_t next_id;
- };
-
- // Which mode to generate code for (ASCII or UC16).
- Mode mode_;
-
- // Which specific string class to generate code for.
- intptr_t specialization_cid_;
-
- // Block entries used internally.
- GraphEntryInstr* entry_block_;
- JoinEntryInstr* start_block_;
- JoinEntryInstr* success_block_;
- JoinEntryInstr* exit_block_;
-
- // Shared backtracking block.
- JoinEntryInstr* backtrack_block_;
- // Single indirect goto instruction which performs all backtracking.
- IndirectGotoInstr* backtrack_goto_;
-
- const ParsedFunction* parsed_function_;
- const ZoneGrowableArray<const ICData*>& ic_data_array_;
-
- // All created blocks are contained within this set. Used for printing
- // the generated code.
- GrowableArray<BlockEntryInstr*> blocks_;
-
- // The current instruction to link to when new code is emitted.
- Instruction* current_instruction_;
-
- // A list, acting as the runtime stack for both backtrack locations and
- // stored positions within the string.
- LocalVariable* stack_;
- LocalVariable* stack_pointer_;
-
- // Stores the current character within the string.
- LocalVariable* current_character_;
-
- // Stores the current location within the string as a negative offset
- // from the end of the string.
- LocalVariable* current_position_;
-
- // The string being processed, passed as a function parameter.
- LocalVariable* string_param_;
-
- // Stores the length of string_param_.
- LocalVariable* string_param_length_;
-
- // The start index within the string, passed as a function parameter.
- LocalVariable* start_index_param_;
-
- // An assortment of utility variables.
- LocalVariable* capture_length_;
- LocalVariable* match_start_index_;
- LocalVariable* capture_start_index_;
- LocalVariable* match_end_index_;
- LocalVariable* char_in_capture_;
- LocalVariable* char_in_match_;
- LocalVariable* index_temp_;
-
- LocalVariable* result_;
-
- // Stored positions containing group bounds. Generated as needed.
- LocalVariable* registers_;
- intptr_t registers_count_;
- const intptr_t saved_registers_count_;
-
- // The actual array objects used for the stack and registers.
- Array& stack_array_cell_;
- TypedData& registers_array_;
-
- IdAllocator block_id_;
- IdAllocator temp_id_;
- IdAllocator arg_id_;
- IdAllocator local_id_;
- IdAllocator indirect_id_;
-};
-
-
} // namespace dart
#endif // VM_REGEXP_ASSEMBLER_H_
« no previous file with comments | « runtime/vm/regexp.cc ('k') | runtime/vm/regexp_assembler.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698