Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1953)

Unified Diff: runtime/vm/regexp.cc

Issue 1201383002: Port irregexp bytecode compiler and interpreter from V8 r24065. (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « runtime/vm/regexp.h ('k') | runtime/vm/regexp_assembler.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: runtime/vm/regexp.cc
diff --git a/runtime/vm/regexp.cc b/runtime/vm/regexp.cc
index fcfa02c1028d08a61da80a235a83ad9876f298c4..db115a5791e8989770b0b9fa24465eea0ebdebea 100644
--- a/runtime/vm/regexp.cc
+++ b/runtime/vm/regexp.cc
@@ -6,6 +6,8 @@
#include "vm/dart_entry.h"
#include "vm/regexp_assembler.h"
+#include "vm/regexp_assembler_bytecode.h"
+#include "vm/regexp_assembler_ir.h"
#include "vm/regexp_ast.h"
#include "vm/unibrow-inl.h"
#include "vm/unicode.h"
@@ -17,6 +19,8 @@
namespace dart {
DECLARE_FLAG(bool, trace_irregexp);
+DEFINE_FLAG(bool, interpret_irregexp, false,
+ "Use irregexp bytecode interpreter");
// Default to generating optimized regexp code.
static const bool kRegexpOptimization = true;
@@ -294,16 +298,23 @@ class RegExpCompiler : public ValueObject {
public:
RegExpCompiler(intptr_t capture_count,
bool ignore_case,
- intptr_t specialization_cid);
+ bool is_one_byte);
intptr_t AllocateRegister() {
return next_register_++;
}
- RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler,
- RegExpNode* start,
- intptr_t capture_count,
- const String& pattern);
+ RegExpEngine::CompilationResult Assemble(
+ IRRegExpMacroAssembler* assembler,
+ RegExpNode* start,
+ intptr_t capture_count,
+ const String& pattern);
+
+ RegExpEngine::CompilationResult Assemble(
+ BytecodeRegExpMacroAssembler* assembler,
+ RegExpNode* start,
+ intptr_t capture_count,
+ const String& pattern);
inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
@@ -311,7 +322,7 @@ class RegExpCompiler : public ValueObject {
static const intptr_t kNumberOfRegistersOffset = 0;
static const intptr_t kCodeOffset = 1;
- IRRegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
+ RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
EndNode* accept() { return accept_; }
static const intptr_t kMaxRecursion = 100;
@@ -322,11 +333,7 @@ class RegExpCompiler : public ValueObject {
void SetRegExpTooBig() { reg_exp_too_big_ = true; }
inline bool ignore_case() { return ignore_case_; }
- inline bool one_byte() const {
- return (specialization_cid_ == kOneByteStringCid ||
- specialization_cid_ == kExternalOneByteStringCid);
- }
- inline intptr_t specialization_cid() { return specialization_cid_; }
+ inline bool one_byte() const { return is_one_byte_; }
FrequencyCollator* frequency_collator() { return &frequency_collator_; }
intptr_t current_expansion_factor() { return current_expansion_factor_; }
@@ -343,9 +350,9 @@ class RegExpCompiler : public ValueObject {
intptr_t next_register_;
ZoneGrowableArray<RegExpNode*>* work_list_;
intptr_t recursion_depth_;
- IRRegExpMacroAssembler* macro_assembler_;
+ RegExpMacroAssembler* macro_assembler_;
bool ignore_case_;
- intptr_t specialization_cid_;
+ bool is_one_byte_;
bool reg_exp_too_big_;
intptr_t current_expansion_factor_;
FrequencyCollator frequency_collator_;
@@ -371,13 +378,14 @@ static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
// Attempts to compile the regexp using an Irregexp code generator. Returns
// a fixed array or a null handle depending on whether it succeeded.
-RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool ignore_case,
- intptr_t specialization_cid)
+RegExpCompiler::RegExpCompiler(intptr_t capture_count,
+ bool ignore_case,
+ bool is_one_byte)
: next_register_(2 * (capture_count + 1)),
work_list_(NULL),
recursion_depth_(0),
ignore_case_(ignore_case),
- specialization_cid_(specialization_cid),
+ is_one_byte_(is_one_byte),
reg_exp_too_big_(false),
current_expansion_factor_(1),
zone_(Thread::Current()->zone()) {
@@ -390,9 +398,7 @@ RegExpEngine::CompilationResult RegExpCompiler::Assemble(
RegExpNode* start,
intptr_t capture_count,
const String& pattern) {
- static const bool use_slow_safe_regexp_compiler = false;
-
- macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);
+ macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */);
macro_assembler_ = macro_assembler;
ZoneGrowableArray<RegExpNode*> work_list(0);
@@ -414,7 +420,34 @@ RegExpEngine::CompilationResult RegExpCompiler::Assemble(
return RegExpEngine::CompilationResult(macro_assembler->backtrack_goto(),
macro_assembler->graph_entry(),
macro_assembler->num_blocks(),
- macro_assembler->num_stack_locals());
+ macro_assembler->num_stack_locals(),
+ next_register_);
+}
+
+
+RegExpEngine::CompilationResult RegExpCompiler::Assemble(
+ BytecodeRegExpMacroAssembler* macro_assembler,
+ RegExpNode* start,
+ intptr_t capture_count,
+ const String& pattern) {
+ macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */);
+ macro_assembler_ = macro_assembler;
+
+ ZoneGrowableArray<RegExpNode*> work_list(0);
+ work_list_ = &work_list;
+ BlockLabel fail;
+ macro_assembler_->PushBacktrack(&fail);
+ Trace new_trace;
+ start->Emit(this, &new_trace);
+ macro_assembler_->BindBlock(&fail);
+ macro_assembler_->Fail();
+ while (!work_list.is_empty()) {
+ work_list.RemoveLast()->Emit(this, &new_trace);
+ }
+ if (reg_exp_too_big_) return IrregexpRegExpTooBig();
+
+ TypedData& bytecode = TypedData::ZoneHandle(macro_assembler->GetBytecode());
+ return RegExpEngine::CompilationResult(&bytecode, next_register_);
}
@@ -4976,10 +5009,11 @@ void TextNode::FillInBMInfo(intptr_t initial_offset,
}
-RegExpEngine::CompilationResult RegExpEngine::Compile(
+RegExpEngine::CompilationResult RegExpEngine::CompileIR(
RegExpCompileData* data,
const ParsedFunction* parsed_function,
const ZoneGrowableArray<const ICData*>& ic_data_array) {
+ ASSERT(!FLAG_interpret_irregexp);
Zone* zone = Thread::Current()->zone();
const Function& function = parsed_function->function();
@@ -4995,7 +5029,7 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
const bool ignore_case = regexp.is_ignore_case();
const bool is_global = regexp.is_global();
- RegExpCompiler compiler(data->capture_count, ignore_case, specialization_cid);
+ RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte);
// TODO(zerny): Frequency sampling is currently disabled because of several
// issues. We do not want to store subject strings in the regexp object since
@@ -5098,6 +5132,120 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
}
+RegExpEngine::CompilationResult RegExpEngine::CompileBytecode(
+ RegExpCompileData* data,
+ const JSRegExp& regexp,
+ bool is_one_byte,
+ Zone* zone) {
+ ASSERT(FLAG_interpret_irregexp);
+ const String& pattern = String::Handle(zone, regexp.pattern());
+
+ ASSERT(!regexp.IsNull());
+ ASSERT(!pattern.IsNull());
+
+ const bool ignore_case = regexp.is_ignore_case();
+ const bool is_global = regexp.is_global();
+
+ RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte);
+
+ // TODO(zerny): Frequency sampling is currently disabled because of several
+ // issues. We do not want to store subject strings in the regexp object since
+ // they might be long and we should not prevent their garbage collection.
+ // Passing them to this function explicitly does not help, since we must
+ // generate exactly the same IR for both the unoptimizing and optimizing
+ // pipelines (otherwise it gets confused when i.e. deopt id's differ).
+ // An option would be to store sampling results in the regexp object, but
+ // I'm not sure the performance gains are relevant enough.
+
+ // Wrap the body of the regexp in capture #0.
+ RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
+ 0,
+ &compiler,
+ compiler.accept());
+
+ RegExpNode* node = captured_body;
+ bool is_end_anchored = data->tree->IsAnchoredAtEnd();
+ bool is_start_anchored = data->tree->IsAnchoredAtStart();
+ intptr_t max_length = data->tree->max_match();
+ if (!is_start_anchored) {
+ // Add a .*? at the beginning, outside the body capture, unless
+ // this expression is anchored at the beginning.
+ RegExpNode* loop_node =
+ RegExpQuantifier::ToNode(0,
+ RegExpTree::kInfinity,
+ false,
+ new(zone) RegExpCharacterClass('*'),
+ &compiler,
+ captured_body,
+ data->contains_anchor);
+
+ if (data->contains_anchor) {
+ // Unroll loop once, to take care of the case that might start
+ // at the start of input.
+ ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
+ first_step_node->AddAlternative(GuardedAlternative(captured_body));
+ first_step_node->AddAlternative(GuardedAlternative(
+ new(zone) TextNode(
+ new(zone) RegExpCharacterClass('*'), loop_node)));
+ node = first_step_node;
+ } else {
+ node = loop_node;
+ }
+ }
+ if (is_one_byte) {
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
+ // Do it again to propagate the new nodes to places where they were not
+ // put because they had not been calculated yet.
+ if (node != NULL) {
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
+ }
+ }
+
+ if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
+ data->node = node;
+ Analysis analysis(ignore_case, is_one_byte);
+ analysis.EnsureAnalyzed(node);
+ if (analysis.has_failed()) {
+ const char* error_message = analysis.error_message();
+ return CompilationResult(error_message);
+ }
+
+ // Bytecode regexp implementation.
+
+ ZoneGrowableArray<uint8_t> buffer(zone, 1024);
+ BytecodeRegExpMacroAssembler* macro_assembler =
+ new(zone) BytecodeRegExpMacroAssembler(&buffer, zone);
+
+ // Inserted here, instead of in Assembler, because it depends on information
+ // in the AST that isn't replicated in the Node structure.
+ static const intptr_t kMaxBacksearchLimit = 1024;
+ if (is_end_anchored &&
+ !is_start_anchored &&
+ max_length < kMaxBacksearchLimit) {
+ macro_assembler->SetCurrentPositionFromEnd(max_length);
+ }
+
+ if (is_global) {
+ macro_assembler->set_global_mode(
+ (data->tree->min_match() > 0)
+ ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK
+ : RegExpMacroAssembler::GLOBAL);
+ }
+
+ RegExpEngine::CompilationResult result =
+ compiler.Assemble(macro_assembler,
+ node,
+ data->capture_count,
+ pattern);
+
+ if (FLAG_trace_irregexp) {
+ macro_assembler->PrintBlocks();
+ }
+
+ return result;
+}
+
+
static void CreateSpecializedFunction(Zone* zone,
const JSRegExp& regexp,
intptr_t specialization_cid,
« no previous file with comments | « runtime/vm/regexp.h ('k') | runtime/vm/regexp_assembler.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698