Index: runtime/vm/regexp.cc |
diff --git a/runtime/vm/regexp.cc b/runtime/vm/regexp.cc |
index fcfa02c1028d08a61da80a235a83ad9876f298c4..db115a5791e8989770b0b9fa24465eea0ebdebea 100644 |
--- a/runtime/vm/regexp.cc |
+++ b/runtime/vm/regexp.cc |
@@ -6,6 +6,8 @@ |
#include "vm/dart_entry.h" |
#include "vm/regexp_assembler.h" |
+#include "vm/regexp_assembler_bytecode.h" |
+#include "vm/regexp_assembler_ir.h" |
#include "vm/regexp_ast.h" |
#include "vm/unibrow-inl.h" |
#include "vm/unicode.h" |
@@ -17,6 +19,8 @@ |
namespace dart { |
DECLARE_FLAG(bool, trace_irregexp); |
+DEFINE_FLAG(bool, interpret_irregexp, false, |
+ "Use irregexp bytecode interpreter"); |
// Default to generating optimized regexp code. |
static const bool kRegexpOptimization = true; |
@@ -294,16 +298,23 @@ class RegExpCompiler : public ValueObject { |
public: |
RegExpCompiler(intptr_t capture_count, |
bool ignore_case, |
- intptr_t specialization_cid); |
+ bool is_one_byte); |
intptr_t AllocateRegister() { |
return next_register_++; |
} |
- RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler, |
- RegExpNode* start, |
- intptr_t capture_count, |
- const String& pattern); |
+ RegExpEngine::CompilationResult Assemble( |
+ IRRegExpMacroAssembler* assembler, |
+ RegExpNode* start, |
+ intptr_t capture_count, |
+ const String& pattern); |
+ |
+ RegExpEngine::CompilationResult Assemble( |
+ BytecodeRegExpMacroAssembler* assembler, |
+ RegExpNode* start, |
+ intptr_t capture_count, |
+ const String& pattern); |
inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
@@ -311,7 +322,7 @@ class RegExpCompiler : public ValueObject { |
static const intptr_t kNumberOfRegistersOffset = 0; |
static const intptr_t kCodeOffset = 1; |
- IRRegExpMacroAssembler* macro_assembler() { return macro_assembler_; } |
+ RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } |
EndNode* accept() { return accept_; } |
static const intptr_t kMaxRecursion = 100; |
@@ -322,11 +333,7 @@ class RegExpCompiler : public ValueObject { |
void SetRegExpTooBig() { reg_exp_too_big_ = true; } |
inline bool ignore_case() { return ignore_case_; } |
- inline bool one_byte() const { |
- return (specialization_cid_ == kOneByteStringCid || |
- specialization_cid_ == kExternalOneByteStringCid); |
- } |
- inline intptr_t specialization_cid() { return specialization_cid_; } |
+ inline bool one_byte() const { return is_one_byte_; } |
FrequencyCollator* frequency_collator() { return &frequency_collator_; } |
intptr_t current_expansion_factor() { return current_expansion_factor_; } |
@@ -343,9 +350,9 @@ class RegExpCompiler : public ValueObject { |
intptr_t next_register_; |
ZoneGrowableArray<RegExpNode*>* work_list_; |
intptr_t recursion_depth_; |
- IRRegExpMacroAssembler* macro_assembler_; |
+ RegExpMacroAssembler* macro_assembler_; |
bool ignore_case_; |
- intptr_t specialization_cid_; |
+ bool is_one_byte_; |
bool reg_exp_too_big_; |
intptr_t current_expansion_factor_; |
FrequencyCollator frequency_collator_; |
@@ -371,13 +378,14 @@ static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { |
// Attempts to compile the regexp using an Irregexp code generator. Returns |
// a fixed array or a null handle depending on whether it succeeded. |
-RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool ignore_case, |
- intptr_t specialization_cid) |
+RegExpCompiler::RegExpCompiler(intptr_t capture_count, |
+ bool ignore_case, |
+ bool is_one_byte) |
: next_register_(2 * (capture_count + 1)), |
work_list_(NULL), |
recursion_depth_(0), |
ignore_case_(ignore_case), |
- specialization_cid_(specialization_cid), |
+ is_one_byte_(is_one_byte), |
reg_exp_too_big_(false), |
current_expansion_factor_(1), |
zone_(Thread::Current()->zone()) { |
@@ -390,9 +398,7 @@ RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
RegExpNode* start, |
intptr_t capture_count, |
const String& pattern) { |
- static const bool use_slow_safe_regexp_compiler = false; |
- |
- macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler); |
+ macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); |
macro_assembler_ = macro_assembler; |
ZoneGrowableArray<RegExpNode*> work_list(0); |
@@ -414,7 +420,34 @@ RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
return RegExpEngine::CompilationResult(macro_assembler->backtrack_goto(), |
macro_assembler->graph_entry(), |
macro_assembler->num_blocks(), |
- macro_assembler->num_stack_locals()); |
+ macro_assembler->num_stack_locals(), |
+ next_register_); |
+} |
+ |
+ |
+RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
+ BytecodeRegExpMacroAssembler* macro_assembler, |
+ RegExpNode* start, |
+ intptr_t capture_count, |
+ const String& pattern) { |
+ macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); |
+ macro_assembler_ = macro_assembler; |
+ |
+ ZoneGrowableArray<RegExpNode*> work_list(0); |
+ work_list_ = &work_list; |
+ BlockLabel fail; |
+ macro_assembler_->PushBacktrack(&fail); |
+ Trace new_trace; |
+ start->Emit(this, &new_trace); |
+ macro_assembler_->BindBlock(&fail); |
+ macro_assembler_->Fail(); |
+ while (!work_list.is_empty()) { |
+ work_list.RemoveLast()->Emit(this, &new_trace); |
+ } |
+ if (reg_exp_too_big_) return IrregexpRegExpTooBig(); |
+ |
+ TypedData& bytecode = TypedData::ZoneHandle(macro_assembler->GetBytecode()); |
+ return RegExpEngine::CompilationResult(&bytecode, next_register_); |
} |
@@ -4976,10 +5009,11 @@ void TextNode::FillInBMInfo(intptr_t initial_offset, |
} |
-RegExpEngine::CompilationResult RegExpEngine::Compile( |
+RegExpEngine::CompilationResult RegExpEngine::CompileIR( |
RegExpCompileData* data, |
const ParsedFunction* parsed_function, |
const ZoneGrowableArray<const ICData*>& ic_data_array) { |
+ ASSERT(!FLAG_interpret_irregexp); |
Zone* zone = Thread::Current()->zone(); |
const Function& function = parsed_function->function(); |
@@ -4995,7 +5029,7 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( |
const bool ignore_case = regexp.is_ignore_case(); |
const bool is_global = regexp.is_global(); |
- RegExpCompiler compiler(data->capture_count, ignore_case, specialization_cid); |
+ RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte); |
// TODO(zerny): Frequency sampling is currently disabled because of several |
// issues. We do not want to store subject strings in the regexp object since |
@@ -5098,6 +5132,120 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( |
} |
+RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( |
+ RegExpCompileData* data, |
+ const JSRegExp& regexp, |
+ bool is_one_byte, |
+ Zone* zone) { |
+ ASSERT(FLAG_interpret_irregexp); |
+ const String& pattern = String::Handle(zone, regexp.pattern()); |
+ |
+ ASSERT(!regexp.IsNull()); |
+ ASSERT(!pattern.IsNull()); |
+ |
+ const bool ignore_case = regexp.is_ignore_case(); |
+ const bool is_global = regexp.is_global(); |
+ |
+ RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte); |
+ |
+ // TODO(zerny): Frequency sampling is currently disabled because of several |
+ // issues. We do not want to store subject strings in the regexp object since |
+ // they might be long and we should not prevent their garbage collection. |
+ // Passing them to this function explicitly does not help, since we must |
+ // generate exactly the same IR for both the unoptimizing and optimizing |
+ // pipelines (otherwise it gets confused when i.e. deopt id's differ). |
+ // An option would be to store sampling results in the regexp object, but |
+ // I'm not sure the performance gains are relevant enough. |
+ |
+ // Wrap the body of the regexp in capture #0. |
+ RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, |
+ 0, |
+ &compiler, |
+ compiler.accept()); |
+ |
+ RegExpNode* node = captured_body; |
+ bool is_end_anchored = data->tree->IsAnchoredAtEnd(); |
+ bool is_start_anchored = data->tree->IsAnchoredAtStart(); |
+ intptr_t max_length = data->tree->max_match(); |
+ if (!is_start_anchored) { |
+ // Add a .*? at the beginning, outside the body capture, unless |
+ // this expression is anchored at the beginning. |
+ RegExpNode* loop_node = |
+ RegExpQuantifier::ToNode(0, |
+ RegExpTree::kInfinity, |
+ false, |
+ new(zone) RegExpCharacterClass('*'), |
+ &compiler, |
+ captured_body, |
+ data->contains_anchor); |
+ |
+ if (data->contains_anchor) { |
+ // Unroll loop once, to take care of the case that might start |
+ // at the start of input. |
+ ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); |
+ first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
+ first_step_node->AddAlternative(GuardedAlternative( |
+ new(zone) TextNode( |
+ new(zone) RegExpCharacterClass('*'), loop_node))); |
+ node = first_step_node; |
+ } else { |
+ node = loop_node; |
+ } |
+ } |
+ if (is_one_byte) { |
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
+ // Do it again to propagate the new nodes to places where they were not |
+ // put because they had not been calculated yet. |
+ if (node != NULL) { |
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
+ } |
+ } |
+ |
+ if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); |
+ data->node = node; |
+ Analysis analysis(ignore_case, is_one_byte); |
+ analysis.EnsureAnalyzed(node); |
+ if (analysis.has_failed()) { |
+ const char* error_message = analysis.error_message(); |
+ return CompilationResult(error_message); |
+ } |
+ |
+ // Bytecode regexp implementation. |
+ |
+ ZoneGrowableArray<uint8_t> buffer(zone, 1024); |
+ BytecodeRegExpMacroAssembler* macro_assembler = |
+ new(zone) BytecodeRegExpMacroAssembler(&buffer, zone); |
+ |
+ // Inserted here, instead of in Assembler, because it depends on information |
+ // in the AST that isn't replicated in the Node structure. |
+ static const intptr_t kMaxBacksearchLimit = 1024; |
+ if (is_end_anchored && |
+ !is_start_anchored && |
+ max_length < kMaxBacksearchLimit) { |
+ macro_assembler->SetCurrentPositionFromEnd(max_length); |
+ } |
+ |
+ if (is_global) { |
+ macro_assembler->set_global_mode( |
+ (data->tree->min_match() > 0) |
+ ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK |
+ : RegExpMacroAssembler::GLOBAL); |
+ } |
+ |
+ RegExpEngine::CompilationResult result = |
+ compiler.Assemble(macro_assembler, |
+ node, |
+ data->capture_count, |
+ pattern); |
+ |
+ if (FLAG_trace_irregexp) { |
+ macro_assembler->PrintBlocks(); |
+ } |
+ |
+ return result; |
+} |
+ |
+ |
static void CreateSpecializedFunction(Zone* zone, |
const JSRegExp& regexp, |
intptr_t specialization_cid, |