| Index: runtime/vm/regexp.cc | 
| diff --git a/runtime/vm/regexp.cc b/runtime/vm/regexp.cc | 
| index fcfa02c1028d08a61da80a235a83ad9876f298c4..db115a5791e8989770b0b9fa24465eea0ebdebea 100644 | 
| --- a/runtime/vm/regexp.cc | 
| +++ b/runtime/vm/regexp.cc | 
| @@ -6,6 +6,8 @@ | 
|  | 
| #include "vm/dart_entry.h" | 
| #include "vm/regexp_assembler.h" | 
| +#include "vm/regexp_assembler_bytecode.h" | 
| +#include "vm/regexp_assembler_ir.h" | 
| #include "vm/regexp_ast.h" | 
| #include "vm/unibrow-inl.h" | 
| #include "vm/unicode.h" | 
| @@ -17,6 +19,8 @@ | 
| namespace dart { | 
|  | 
| DECLARE_FLAG(bool, trace_irregexp); | 
| +DEFINE_FLAG(bool, interpret_irregexp, false, | 
| +            "Use irregexp bytecode interpreter"); | 
|  | 
| // Default to generating optimized regexp code. | 
| static const bool kRegexpOptimization = true; | 
| @@ -294,16 +298,23 @@ class RegExpCompiler : public ValueObject { | 
| public: | 
| RegExpCompiler(intptr_t capture_count, | 
| bool ignore_case, | 
| -                 intptr_t specialization_cid); | 
| +                 bool is_one_byte); | 
|  | 
| intptr_t AllocateRegister() { | 
| return next_register_++; | 
| } | 
|  | 
| -  RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler, | 
| -                                           RegExpNode* start, | 
| -                                           intptr_t capture_count, | 
| -                                           const String& pattern); | 
| +  RegExpEngine::CompilationResult Assemble( | 
| +      IRRegExpMacroAssembler* assembler, | 
| +      RegExpNode* start, | 
| +      intptr_t capture_count, | 
| +      const String& pattern); | 
| + | 
| +  RegExpEngine::CompilationResult Assemble( | 
| +      BytecodeRegExpMacroAssembler* assembler, | 
| +      RegExpNode* start, | 
| +      intptr_t capture_count, | 
| +      const String& pattern); | 
|  | 
| inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 
|  | 
| @@ -311,7 +322,7 @@ class RegExpCompiler : public ValueObject { | 
| static const intptr_t kNumberOfRegistersOffset = 0; | 
| static const intptr_t kCodeOffset = 1; | 
|  | 
| -  IRRegExpMacroAssembler* macro_assembler() { return macro_assembler_; } | 
| +  RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } | 
| EndNode* accept() { return accept_; } | 
|  | 
| static const intptr_t kMaxRecursion = 100; | 
| @@ -322,11 +333,7 @@ class RegExpCompiler : public ValueObject { | 
| void SetRegExpTooBig() { reg_exp_too_big_ = true; } | 
|  | 
| inline bool ignore_case() { return ignore_case_; } | 
| -  inline bool one_byte() const { | 
| -    return (specialization_cid_ == kOneByteStringCid || | 
| -            specialization_cid_ == kExternalOneByteStringCid); | 
| -  } | 
| -  inline intptr_t specialization_cid() { return specialization_cid_; } | 
| +  inline bool one_byte() const { return is_one_byte_; } | 
| FrequencyCollator* frequency_collator() { return &frequency_collator_; } | 
|  | 
| intptr_t current_expansion_factor() { return current_expansion_factor_; } | 
| @@ -343,9 +350,9 @@ class RegExpCompiler : public ValueObject { | 
| intptr_t next_register_; | 
| ZoneGrowableArray<RegExpNode*>* work_list_; | 
| intptr_t recursion_depth_; | 
| -  IRRegExpMacroAssembler* macro_assembler_; | 
| +  RegExpMacroAssembler* macro_assembler_; | 
| bool ignore_case_; | 
| -  intptr_t specialization_cid_; | 
| +  bool is_one_byte_; | 
| bool reg_exp_too_big_; | 
| intptr_t current_expansion_factor_; | 
| FrequencyCollator frequency_collator_; | 
| @@ -371,13 +378,14 @@ static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { | 
|  | 
| // Attempts to compile the regexp using an Irregexp code generator.  Returns | 
| // a fixed array or a null handle depending on whether it succeeded. | 
| -RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool ignore_case, | 
| -                               intptr_t specialization_cid) | 
| +RegExpCompiler::RegExpCompiler(intptr_t capture_count, | 
| +                               bool ignore_case, | 
| +                               bool is_one_byte) | 
| : next_register_(2 * (capture_count + 1)), | 
| work_list_(NULL), | 
| recursion_depth_(0), | 
| ignore_case_(ignore_case), | 
| -      specialization_cid_(specialization_cid), | 
| +      is_one_byte_(is_one_byte), | 
| reg_exp_too_big_(false), | 
| current_expansion_factor_(1), | 
| zone_(Thread::Current()->zone()) { | 
| @@ -390,9 +398,7 @@ RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 
| RegExpNode* start, | 
| intptr_t capture_count, | 
| const String& pattern) { | 
| -  static const bool use_slow_safe_regexp_compiler = false; | 
| - | 
| -  macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler); | 
| +  macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); | 
| macro_assembler_ = macro_assembler; | 
|  | 
| ZoneGrowableArray<RegExpNode*> work_list(0); | 
| @@ -414,7 +420,34 @@ RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 
| return RegExpEngine::CompilationResult(macro_assembler->backtrack_goto(), | 
| macro_assembler->graph_entry(), | 
| macro_assembler->num_blocks(), | 
| -                                         macro_assembler->num_stack_locals()); | 
| +                                         macro_assembler->num_stack_locals(), | 
| +                                         next_register_); | 
| +} | 
| + | 
| + | 
| +RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 
| +    BytecodeRegExpMacroAssembler* macro_assembler, | 
| +    RegExpNode* start, | 
| +    intptr_t capture_count, | 
| +    const String& pattern) { | 
| +  macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); | 
| +  macro_assembler_ = macro_assembler; | 
| + | 
| +  ZoneGrowableArray<RegExpNode*> work_list(0); | 
| +  work_list_ = &work_list; | 
| +  BlockLabel fail; | 
| +  macro_assembler_->PushBacktrack(&fail); | 
| +  Trace new_trace; | 
| +  start->Emit(this, &new_trace); | 
| +  macro_assembler_->BindBlock(&fail); | 
| +  macro_assembler_->Fail(); | 
| +  while (!work_list.is_empty()) { | 
| +    work_list.RemoveLast()->Emit(this, &new_trace); | 
| +  } | 
| +  if (reg_exp_too_big_) return IrregexpRegExpTooBig(); | 
| + | 
| +  TypedData& bytecode = TypedData::ZoneHandle(macro_assembler->GetBytecode()); | 
| +  return RegExpEngine::CompilationResult(&bytecode, next_register_); | 
| } | 
|  | 
|  | 
| @@ -4976,10 +5009,11 @@ void TextNode::FillInBMInfo(intptr_t initial_offset, | 
| } | 
|  | 
|  | 
| -RegExpEngine::CompilationResult RegExpEngine::Compile( | 
| +RegExpEngine::CompilationResult RegExpEngine::CompileIR( | 
| RegExpCompileData* data, | 
| const ParsedFunction* parsed_function, | 
| const ZoneGrowableArray<const ICData*>& ic_data_array) { | 
| +  ASSERT(!FLAG_interpret_irregexp); | 
| Zone* zone = Thread::Current()->zone(); | 
|  | 
| const Function& function = parsed_function->function(); | 
| @@ -4995,7 +5029,7 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( | 
| const bool ignore_case = regexp.is_ignore_case(); | 
| const bool is_global = regexp.is_global(); | 
|  | 
| -  RegExpCompiler compiler(data->capture_count, ignore_case, specialization_cid); | 
| +  RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte); | 
|  | 
| // TODO(zerny): Frequency sampling is currently disabled because of several | 
| // issues. We do not want to store subject strings in the regexp object since | 
| @@ -5098,6 +5132,120 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( | 
| } | 
|  | 
|  | 
| +RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( | 
| +    RegExpCompileData* data, | 
| +    const JSRegExp& regexp, | 
| +    bool is_one_byte, | 
| +    Zone* zone) { | 
| +  ASSERT(FLAG_interpret_irregexp); | 
| +  const String& pattern = String::Handle(zone, regexp.pattern()); | 
| + | 
| +  ASSERT(!regexp.IsNull()); | 
| +  ASSERT(!pattern.IsNull()); | 
| + | 
| +  const bool ignore_case = regexp.is_ignore_case(); | 
| +  const bool is_global = regexp.is_global(); | 
| + | 
| +  RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte); | 
| + | 
| +  // TODO(zerny): Frequency sampling is currently disabled because of several | 
| +  // issues. We do not want to store subject strings in the regexp object since | 
| +  // they might be long and we should not prevent their garbage collection. | 
| +  // Passing them to this function explicitly does not help, since we must | 
| +  // generate exactly the same IR for both the unoptimizing and optimizing | 
| +  // pipelines (otherwise it gets confused when i.e. deopt id's differ). | 
| +  // An option would be to store sampling results in the regexp object, but | 
| +  // I'm not sure the performance gains are relevant enough. | 
| + | 
| +  // Wrap the body of the regexp in capture #0. | 
| +  RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, | 
| +                                                    0, | 
| +                                                    &compiler, | 
| +                                                    compiler.accept()); | 
| + | 
| +  RegExpNode* node = captured_body; | 
| +  bool is_end_anchored = data->tree->IsAnchoredAtEnd(); | 
| +  bool is_start_anchored = data->tree->IsAnchoredAtStart(); | 
| +  intptr_t max_length = data->tree->max_match(); | 
| +  if (!is_start_anchored) { | 
| +    // Add a .*? at the beginning, outside the body capture, unless | 
| +    // this expression is anchored at the beginning. | 
| +    RegExpNode* loop_node = | 
| +        RegExpQuantifier::ToNode(0, | 
| +                                 RegExpTree::kInfinity, | 
| +                                 false, | 
| +                                 new(zone) RegExpCharacterClass('*'), | 
| +                                 &compiler, | 
| +                                 captured_body, | 
| +                                 data->contains_anchor); | 
| + | 
| +    if (data->contains_anchor) { | 
| +      // Unroll loop once, to take care of the case that might start | 
| +      // at the start of input. | 
| +      ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); | 
| +      first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 
| +      first_step_node->AddAlternative(GuardedAlternative( | 
| +          new(zone) TextNode( | 
| +              new(zone) RegExpCharacterClass('*'), loop_node))); | 
| +      node = first_step_node; | 
| +    } else { | 
| +      node = loop_node; | 
| +    } | 
| +  } | 
| +  if (is_one_byte) { | 
| +    node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 
| +    // Do it again to propagate the new nodes to places where they were not | 
| +    // put because they had not been calculated yet. | 
| +    if (node != NULL) { | 
| +      node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 
| +    } | 
| +  } | 
| + | 
| +  if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); | 
| +  data->node = node; | 
| +  Analysis analysis(ignore_case, is_one_byte); | 
| +  analysis.EnsureAnalyzed(node); | 
| +  if (analysis.has_failed()) { | 
| +    const char* error_message = analysis.error_message(); | 
| +    return CompilationResult(error_message); | 
| +  } | 
| + | 
| +  // Bytecode regexp implementation. | 
| + | 
| +  ZoneGrowableArray<uint8_t> buffer(zone, 1024); | 
| +  BytecodeRegExpMacroAssembler* macro_assembler = | 
| +      new(zone) BytecodeRegExpMacroAssembler(&buffer, zone); | 
| + | 
| +  // Inserted here, instead of in Assembler, because it depends on information | 
| +  // in the AST that isn't replicated in the Node structure. | 
| +  static const intptr_t kMaxBacksearchLimit = 1024; | 
| +  if (is_end_anchored && | 
| +      !is_start_anchored && | 
| +      max_length < kMaxBacksearchLimit) { | 
| +    macro_assembler->SetCurrentPositionFromEnd(max_length); | 
| +  } | 
| + | 
| +  if (is_global) { | 
| +    macro_assembler->set_global_mode( | 
| +        (data->tree->min_match() > 0) | 
| +            ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK | 
| +            : RegExpMacroAssembler::GLOBAL); | 
| +  } | 
| + | 
| +  RegExpEngine::CompilationResult result = | 
| +      compiler.Assemble(macro_assembler, | 
| +                        node, | 
| +                        data->capture_count, | 
| +                        pattern); | 
| + | 
| +  if (FLAG_trace_irregexp) { | 
| +    macro_assembler->PrintBlocks(); | 
| +  } | 
| + | 
| +  return result; | 
| +} | 
| + | 
| + | 
| static void CreateSpecializedFunction(Zone* zone, | 
| const JSRegExp& regexp, | 
| intptr_t specialization_cid, | 
|  |