| Index: src/regexp/regexp-parser.cc
 | 
| diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc
 | 
| index 07d5779675786b0dfbec11fb7a8cf8fa19f3aecb..fa8900342cfc4878411a1c06d753254024f138fe 100644
 | 
| --- a/src/regexp/regexp-parser.cc
 | 
| +++ b/src/regexp/regexp-parser.cc
 | 
| @@ -15,18 +15,20 @@
 | 
|  namespace internal {
 | 
|  
 | 
|  RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
 | 
| -                           JSRegExp::Flags flags, Isolate* isolate, Zone* zone)
 | 
| +                           bool multiline, bool unicode, Isolate* isolate,
 | 
| +                           Zone* zone)
 | 
|      : isolate_(isolate),
 | 
|        zone_(zone),
 | 
|        error_(error),
 | 
|        captures_(NULL),
 | 
|        in_(in),
 | 
|        current_(kEndMarker),
 | 
| -      flags_(flags),
 | 
|        next_pos_(0),
 | 
|        captures_started_(0),
 | 
|        capture_count_(0),
 | 
|        has_more_(true),
 | 
| +      multiline_(multiline),
 | 
| +      unicode_(unicode),
 | 
|        simple_(false),
 | 
|        contains_anchor_(false),
 | 
|        is_scanned_for_captures_(false),
 | 
| @@ -35,28 +37,9 @@
 | 
|  }
 | 
|  
 | 
|  
 | 
| -template <bool update_position>
 | 
| -uc32 RegExpParser::ReadNext() {
 | 
| -  int position = next_pos_;
 | 
| -  uc32 c0 = in()->Get(position);
 | 
| -  position++;
 | 
| -  // Read the whole surrogate pair in case of unicode flag, if possible.
 | 
| -  if (unicode() && position < in()->length() &&
 | 
| -      unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(c0))) {
 | 
| -    uc16 c1 = in()->Get(position);
 | 
| -    if (unibrow::Utf16::IsTrailSurrogate(c1)) {
 | 
| -      c0 = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(c0), c1);
 | 
| -      position++;
 | 
| -    }
 | 
| -  }
 | 
| -  if (update_position) next_pos_ = position;
 | 
| -  return c0;
 | 
| -}
 | 
| -
 | 
| -
 | 
|  uc32 RegExpParser::Next() {
 | 
|    if (has_next()) {
 | 
| -    return ReadNext<false>();
 | 
| +    return in()->Get(next_pos_);
 | 
|    } else {
 | 
|      return kEndMarker;
 | 
|    }
 | 
| @@ -64,14 +47,25 @@
 | 
|  
 | 
|  
 | 
|  void RegExpParser::Advance() {
 | 
| -  if (has_next()) {
 | 
| +  if (next_pos_ < in()->length()) {
 | 
|      StackLimitCheck check(isolate());
 | 
|      if (check.HasOverflowed()) {
 | 
|        ReportError(CStrVector(Isolate::kStackOverflowMessage));
 | 
|      } else if (zone()->excess_allocation()) {
 | 
|        ReportError(CStrVector("Regular expression too large"));
 | 
|      } else {
 | 
| -      current_ = ReadNext<true>();
 | 
| +      current_ = in()->Get(next_pos_);
 | 
| +      next_pos_++;
 | 
| +      // Read the whole surrogate pair in case of unicode flag, if possible.
 | 
| +      if (unicode_ && next_pos_ < in()->length() &&
 | 
| +          unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) {
 | 
| +        uc16 trail = in()->Get(next_pos_);
 | 
| +        if (unibrow::Utf16::IsTrailSurrogate(trail)) {
 | 
| +          current_ = unibrow::Utf16::CombineSurrogatePair(
 | 
| +              static_cast<uc16>(current_), trail);
 | 
| +          next_pos_++;
 | 
| +        }
 | 
| +      }
 | 
|      }
 | 
|    } else {
 | 
|      current_ = kEndMarker;
 | 
| @@ -148,7 +142,7 @@
 | 
|  RegExpTree* RegExpParser::ParseDisjunction() {
 | 
|    // Used to store current state while parsing subexpressions.
 | 
|    RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,
 | 
| -                                  flags_, zone());
 | 
| +                                  zone());
 | 
|    RegExpParserState* state = &initial_state;
 | 
|    // Cache the builder in a local variable for quick access.
 | 
|    RegExpBuilder* builder = initial_state.builder();
 | 
| @@ -212,7 +206,7 @@
 | 
|          return ReportError(CStrVector("Nothing to repeat"));
 | 
|        case '^': {
 | 
|          Advance();
 | 
| -        if (multiline()) {
 | 
| +        if (multiline_) {
 | 
|            builder->AddAssertion(
 | 
|                new (zone()) RegExpAssertion(RegExpAssertion::START_OF_LINE));
 | 
|          } else {
 | 
| @@ -225,8 +219,8 @@
 | 
|        case '$': {
 | 
|          Advance();
 | 
|          RegExpAssertion::AssertionType assertion_type =
 | 
| -            multiline() ? RegExpAssertion::END_OF_LINE
 | 
| -                        : RegExpAssertion::END_OF_INPUT;
 | 
| +            multiline_ ? RegExpAssertion::END_OF_LINE
 | 
| +                       : RegExpAssertion::END_OF_INPUT;
 | 
|          builder->AddAssertion(new (zone()) RegExpAssertion(assertion_type));
 | 
|          continue;
 | 
|        }
 | 
| @@ -236,9 +230,8 @@
 | 
|          ZoneList<CharacterRange>* ranges =
 | 
|              new (zone()) ZoneList<CharacterRange>(2, zone());
 | 
|          CharacterRange::AddClassEscape('.', ranges, zone());
 | 
| -        RegExpCharacterClass* cc =
 | 
| -            new (zone()) RegExpCharacterClass(ranges, false);
 | 
| -        builder->AddCharacterClass(cc);
 | 
| +        RegExpTree* atom = new (zone()) RegExpCharacterClass(ranges, false);
 | 
| +        builder->AddAtom(atom);
 | 
|          break;
 | 
|        }
 | 
|        case '(': {
 | 
| @@ -283,15 +276,14 @@
 | 
|            captures_started_++;
 | 
|          }
 | 
|          // Store current state and begin new disjunction parsing.
 | 
| -        state =
 | 
| -            new (zone()) RegExpParserState(state, subexpr_type, lookaround_type,
 | 
| -                                           captures_started_, flags_, zone());
 | 
| +        state = new (zone()) RegExpParserState(
 | 
| +            state, subexpr_type, lookaround_type, captures_started_, zone());
 | 
|          builder = state->builder();
 | 
|          continue;
 | 
|        }
 | 
|        case '[': {
 | 
| -        RegExpTree* cc = ParseCharacterClass(CHECK_FAILED);
 | 
| -        builder->AddCharacterClass(cc->AsCharacterClass());
 | 
| +        RegExpTree* atom = ParseCharacterClass(CHECK_FAILED);
 | 
| +        builder->AddAtom(atom);
 | 
|          break;
 | 
|        }
 | 
|        // Atom ::
 | 
| @@ -326,9 +318,8 @@
 | 
|              ZoneList<CharacterRange>* ranges =
 | 
|                  new (zone()) ZoneList<CharacterRange>(2, zone());
 | 
|              CharacterRange::AddClassEscape(c, ranges, zone());
 | 
| -            RegExpCharacterClass* cc =
 | 
| -                new (zone()) RegExpCharacterClass(ranges, false);
 | 
| -            builder->AddCharacterClass(cc);
 | 
| +            RegExpTree* atom = new (zone()) RegExpCharacterClass(ranges, false);
 | 
| +            builder->AddAtom(atom);
 | 
|              break;
 | 
|            }
 | 
|            case '1':
 | 
| @@ -362,7 +353,7 @@
 | 
|                // escaped,
 | 
|                // no other identity escapes are allowed. If the 'u' flag is not
 | 
|                // present, all identity escapes are allowed.
 | 
| -              if (!unicode()) {
 | 
| +              if (!unicode_) {
 | 
|                  builder->AddCharacter(first_digit);
 | 
|                  Advance(2);
 | 
|                } else {
 | 
| @@ -423,7 +414,7 @@
 | 
|              uc32 value;
 | 
|              if (ParseHexEscape(2, &value)) {
 | 
|                builder->AddCharacter(value);
 | 
| -            } else if (!unicode()) {
 | 
| +            } else if (!unicode_) {
 | 
|                builder->AddCharacter('x');
 | 
|              } else {
 | 
|                // If the 'u' flag is present, invalid escapes are not treated as
 | 
| @@ -437,7 +428,7 @@
 | 
|              uc32 value;
 | 
|              if (ParseUnicodeEscape(&value)) {
 | 
|                builder->AddUnicodeCharacter(value);
 | 
| -            } else if (!unicode()) {
 | 
| +            } else if (!unicode_) {
 | 
|                builder->AddCharacter('u');
 | 
|              } else {
 | 
|                // If the 'u' flag is present, invalid escapes are not treated as
 | 
| @@ -453,7 +444,7 @@
 | 
|              // other identity escapes are allowed. If the 'u' flag is not
 | 
|              // present,
 | 
|              // all identity escapes are allowed.
 | 
| -            if (!unicode() || IsSyntaxCharacter(current())) {
 | 
| +            if (!unicode_ || IsSyntaxCharacter(current())) {
 | 
|                builder->AddCharacter(current());
 | 
|                Advance();
 | 
|              } else {
 | 
| @@ -754,7 +745,7 @@
 | 
|    // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
 | 
|    // allowed). In the latter case, the number of hex digits between { } is
 | 
|    // arbitrary. \ and u have already been read.
 | 
| -  if (current() == '{' && unicode()) {
 | 
| +  if (current() == '{' && unicode_) {
 | 
|      int start = position();
 | 
|      Advance();
 | 
|      if (ParseUnlimitedLengthHexNumber(0x10ffff, value)) {
 | 
| @@ -849,7 +840,7 @@
 | 
|        if (ParseHexEscape(2, &value)) {
 | 
|          return value;
 | 
|        }
 | 
| -      if (!unicode()) {
 | 
| +      if (!unicode_) {
 | 
|          // If \x is not followed by a two-digit hexadecimal, treat it
 | 
|          // as an identity escape.
 | 
|          return 'x';
 | 
| @@ -865,7 +856,7 @@
 | 
|        if (ParseUnicodeEscape(&value)) {
 | 
|          return value;
 | 
|        }
 | 
| -      if (!unicode()) {
 | 
| +      if (!unicode_) {
 | 
|          return 'u';
 | 
|        }
 | 
|        // If the 'u' flag is present, invalid escapes are not treated as
 | 
| @@ -878,7 +869,7 @@
 | 
|        // If the 'u' flag is present, only syntax characters can be escaped, no
 | 
|        // other identity escapes are allowed. If the 'u' flag is not present, all
 | 
|        // identity escapes are allowed.
 | 
| -      if (!unicode() || IsSyntaxCharacter(result)) {
 | 
| +      if (!unicode_ || IsSyntaxCharacter(result)) {
 | 
|          Advance();
 | 
|          return result;
 | 
|        }
 | 
| @@ -908,29 +899,13 @@
 | 
|        case kEndMarker:
 | 
|          return ReportError(CStrVector("\\ at end of pattern"));
 | 
|        default:
 | 
| -        first = ParseClassCharacterEscape(CHECK_FAILED);
 | 
| +        uc32 c = ParseClassCharacterEscape(CHECK_FAILED);
 | 
| +        return CharacterRange::Singleton(c);
 | 
|      }
 | 
|    } else {
 | 
|      Advance();
 | 
| -  }
 | 
| -
 | 
| -  if (unicode() && unibrow::Utf16::IsLeadSurrogate(first)) {
 | 
| -    // Combine with possibly following trail surrogate.
 | 
| -    int start = position();
 | 
| -    uc32 second = current();
 | 
| -    if (second == '\\') {
 | 
| -      second = ParseClassCharacterEscape(CHECK_FAILED);
 | 
| -    } else {
 | 
| -      Advance();
 | 
| -    }
 | 
| -    if (unibrow::Utf16::IsTrailSurrogate(second)) {
 | 
| -      first = unibrow::Utf16::CombineSurrogatePair(first, second);
 | 
| -    } else {
 | 
| -      Reset(start);
 | 
| -    }
 | 
| -  }
 | 
| -
 | 
| -  return CharacterRange::Singleton(first);
 | 
| +    return CharacterRange::Singleton(first);
 | 
| +  }
 | 
|  }
 | 
|  
 | 
|  
 | 
| @@ -1010,10 +985,10 @@
 | 
|  
 | 
|  
 | 
|  bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,
 | 
| -                               FlatStringReader* input, JSRegExp::Flags flags,
 | 
| -                               RegExpCompileData* result) {
 | 
| +                               FlatStringReader* input, bool multiline,
 | 
| +                               bool unicode, RegExpCompileData* result) {
 | 
|    DCHECK(result != NULL);
 | 
| -  RegExpParser parser(input, &result->error, flags, isolate, zone);
 | 
| +  RegExpParser parser(input, &result->error, multiline, unicode, isolate, zone);
 | 
|    RegExpTree* tree = parser.ParsePattern();
 | 
|    if (parser.failed()) {
 | 
|      DCHECK(tree == NULL);
 | 
| @@ -1036,12 +1011,10 @@
 | 
|  }
 | 
|  
 | 
|  
 | 
| -RegExpBuilder::RegExpBuilder(Zone* zone, JSRegExp::Flags flags)
 | 
| +RegExpBuilder::RegExpBuilder(Zone* zone)
 | 
|      : zone_(zone),
 | 
|        pending_empty_(false),
 | 
| -      flags_(flags),
 | 
|        characters_(NULL),
 | 
| -      pending_surrogate_(kNoPendingSurrogate),
 | 
|        terms_(),
 | 
|        alternatives_()
 | 
|  #ifdef DEBUG
 | 
| @@ -1052,48 +1025,7 @@
 | 
|  }
 | 
|  
 | 
|  
 | 
| -void RegExpBuilder::AddLeadSurrogate(uc16 lead_surrogate) {
 | 
| -  DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate));
 | 
| -  FlushPendingSurrogate();
 | 
| -  // Hold onto the lead surrogate, waiting for a trail surrogate to follow.
 | 
| -  pending_surrogate_ = lead_surrogate;
 | 
| -}
 | 
| -
 | 
| -
 | 
| -void RegExpBuilder::AddTrailSurrogate(uc16 trail_surrogate) {
 | 
| -  DCHECK(unibrow::Utf16::IsTrailSurrogate(trail_surrogate));
 | 
| -  if (pending_surrogate_ != kNoPendingSurrogate) {
 | 
| -    uc16 lead_surrogate = pending_surrogate_;
 | 
| -    DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate));
 | 
| -    ZoneList<uc16> surrogate_pair(2, zone());
 | 
| -    surrogate_pair.Add(lead_surrogate, zone());
 | 
| -    surrogate_pair.Add(trail_surrogate, zone());
 | 
| -    RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector());
 | 
| -    pending_surrogate_ = kNoPendingSurrogate;
 | 
| -    AddAtom(atom);
 | 
| -  } else {
 | 
| -    pending_surrogate_ = trail_surrogate;
 | 
| -    FlushPendingSurrogate();
 | 
| -  }
 | 
| -}
 | 
| -
 | 
| -
 | 
| -void RegExpBuilder::FlushPendingSurrogate() {
 | 
| -  if (pending_surrogate_ != kNoPendingSurrogate) {
 | 
| -    // Use character class to desugar lone surrogate matching.
 | 
| -    RegExpCharacterClass* cc = new (zone()) RegExpCharacterClass(
 | 
| -        CharacterRange::List(zone(),
 | 
| -                             CharacterRange::Singleton(pending_surrogate_)),
 | 
| -        false);
 | 
| -    pending_surrogate_ = kNoPendingSurrogate;
 | 
| -    DCHECK(unicode());
 | 
| -    AddCharacterClass(cc);
 | 
| -  }
 | 
| -}
 | 
| -
 | 
| -
 | 
|  void RegExpBuilder::FlushCharacters() {
 | 
| -  FlushPendingSurrogate();
 | 
|    pending_empty_ = false;
 | 
|    if (characters_ != NULL) {
 | 
|      RegExpTree* atom = new (zone()) RegExpAtom(characters_->ToConstVector());
 | 
| @@ -1121,7 +1053,6 @@
 | 
|  
 | 
|  
 | 
|  void RegExpBuilder::AddCharacter(uc16 c) {
 | 
| -  FlushPendingSurrogate();
 | 
|    pending_empty_ = false;
 | 
|    if (characters_ == NULL) {
 | 
|      characters_ = new (zone()) ZoneList<uc16>(4, zone());
 | 
| @@ -1133,13 +1064,11 @@
 | 
|  
 | 
|  void RegExpBuilder::AddUnicodeCharacter(uc32 c) {
 | 
|    if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
 | 
| -    DCHECK(unicode());
 | 
| -    AddLeadSurrogate(unibrow::Utf16::LeadSurrogate(c));
 | 
| -    AddTrailSurrogate(unibrow::Utf16::TrailSurrogate(c));
 | 
| -  } else if (unicode() && unibrow::Utf16::IsLeadSurrogate(c)) {
 | 
| -    AddLeadSurrogate(c);
 | 
| -  } else if (unicode() && unibrow::Utf16::IsTrailSurrogate(c)) {
 | 
| -    AddTrailSurrogate(c);
 | 
| +    ZoneList<uc16> surrogate_pair(2, zone());
 | 
| +    surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone());
 | 
| +    surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone());
 | 
| +    RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector());
 | 
| +    AddAtom(atom);
 | 
|    } else {
 | 
|      AddCharacter(static_cast<uc16>(c));
 | 
|    }
 | 
| @@ -1147,17 +1076,6 @@
 | 
|  
 | 
|  
 | 
|  void RegExpBuilder::AddEmpty() { pending_empty_ = true; }
 | 
| -
 | 
| -
 | 
| -void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
 | 
| -  if (unicode() && cc->NeedsDesugaringForUnicode(zone())) {
 | 
| -    // In unicode mode, character class needs to be desugared, so it
 | 
| -    // must be a standalone term instead of being part of a RegExpText.
 | 
| -    AddTerm(cc);
 | 
| -  } else {
 | 
| -    AddAtom(cc);
 | 
| -  }
 | 
| -}
 | 
|  
 | 
|  
 | 
|  void RegExpBuilder::AddAtom(RegExpTree* term) {
 | 
| @@ -1172,13 +1090,6 @@
 | 
|      FlushText();
 | 
|      terms_.Add(term, zone());
 | 
|    }
 | 
| -  LAST(ADD_ATOM);
 | 
| -}
 | 
| -
 | 
| -
 | 
| -void RegExpBuilder::AddTerm(RegExpTree* term) {
 | 
| -  FlushText();
 | 
| -  terms_.Add(term, zone());
 | 
|    LAST(ADD_ATOM);
 | 
|  }
 | 
|  
 | 
| @@ -1221,7 +1132,6 @@
 | 
|  
 | 
|  void RegExpBuilder::AddQuantifierToAtom(
 | 
|      int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {
 | 
| -  FlushPendingSurrogate();
 | 
|    if (pending_empty_) {
 | 
|      pending_empty_ = false;
 | 
|      return;
 | 
| 
 |