| Index: src/parser.cc
|
| diff --git a/src/parser.cc b/src/parser.cc
|
| index 9227272e09ca51ed62dce91704ad6e940b7723e5..89362d082b347e0341e5c5b5bb4cbafb82dd936f 100644
|
| --- a/src/parser.cc
|
| +++ b/src/parser.cc
|
| @@ -94,12 +94,14 @@ ParseInfo::ParseInfo(Zone* zone, Handle<Script> script) : ParseInfo(zone) {
|
| }
|
|
|
|
|
| -RegExpBuilder::RegExpBuilder(Zone* zone)
|
| +RegExpBuilder::RegExpBuilder(Zone* zone,
|
| + RegExpTree::ReadDirection read_direction)
|
| : zone_(zone),
|
| pending_empty_(false),
|
| characters_(NULL),
|
| terms_(),
|
| - alternatives_()
|
| + alternatives_(),
|
| + read_direction_(read_direction)
|
| #ifdef DEBUG
|
| , last_added_(ADD_NONE)
|
| #endif
|
| @@ -109,7 +111,8 @@ RegExpBuilder::RegExpBuilder(Zone* zone)
|
| void RegExpBuilder::FlushCharacters() {
|
| pending_empty_ = false;
|
| if (characters_ != NULL) {
|
| - RegExpTree* atom = new(zone()) RegExpAtom(characters_->ToConstVector());
|
| + RegExpTree* atom =
|
| + new (zone()) RegExpAtom(characters_->ToConstVector(), read_direction_);
|
| characters_ = NULL;
|
| text_.Add(atom, zone());
|
| LAST(ADD_ATOM);
|
| @@ -125,7 +128,7 @@ void RegExpBuilder::FlushText() {
|
| } else if (num_text == 1) {
|
| terms_.Add(text_.last(), zone());
|
| } else {
|
| - RegExpText* text = new(zone()) RegExpText(zone());
|
| + RegExpText* text = new (zone()) RegExpText(zone(), read_direction_);
|
| for (int i = 0; i < num_text; i++)
|
| text_.Get(i)->AppendToText(text, zone());
|
| terms_.Add(text, zone());
|
| @@ -186,7 +189,8 @@ void RegExpBuilder::FlushTerms() {
|
| } else if (num_terms == 1) {
|
| alternative = terms_.last();
|
| } else {
|
| - alternative = new(zone()) RegExpAlternative(terms_.GetList(zone()));
|
| + alternative =
|
| + new (zone()) RegExpAlternative(terms_.GetList(zone()), read_direction_);
|
| }
|
| alternatives_.Add(alternative, zone());
|
| terms_.Clear();
|
| @@ -199,7 +203,8 @@ RegExpTree* RegExpBuilder::ToRegExp() {
|
| int num_alternatives = alternatives_.length();
|
| if (num_alternatives == 0) return new (zone()) RegExpEmpty();
|
| if (num_alternatives == 1) return alternatives_.last();
|
| - return new(zone()) RegExpDisjunction(alternatives_.GetList(zone()));
|
| + return new (zone())
|
| + RegExpDisjunction(alternatives_.GetList(zone()), read_direction_);
|
| }
|
|
|
|
|
| @@ -217,11 +222,11 @@ void RegExpBuilder::AddQuantifierToAtom(
|
| int num_chars = char_vector.length();
|
| if (num_chars > 1) {
|
| Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);
|
| - text_.Add(new(zone()) RegExpAtom(prefix), zone());
|
| + text_.Add(new (zone()) RegExpAtom(prefix, read_direction_), zone());
|
| char_vector = char_vector.SubVector(num_chars - 1, num_chars);
|
| }
|
| characters_ = NULL;
|
| - atom = new(zone()) RegExpAtom(char_vector);
|
| + atom = new (zone()) RegExpAtom(char_vector, read_direction_);
|
| FlushText();
|
| } else if (text_.length() > 0) {
|
| DCHECK(last_added_ == ADD_ATOM);
|
| @@ -244,8 +249,9 @@ void RegExpBuilder::AddQuantifierToAtom(
|
| UNREACHABLE();
|
| return;
|
| }
|
| - terms_.Add(
|
| - new(zone()) RegExpQuantifier(min, max, quantifier_type, atom), zone());
|
| + terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom,
|
| + read_direction_),
|
| + zone());
|
| LAST(ADD_TERM);
|
| }
|
|
|
| @@ -5221,6 +5227,7 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
|
| in_(in),
|
| current_(kEndMarker),
|
| next_pos_(0),
|
| + captures_started_(0),
|
| capture_count_(0),
|
| has_more_(true),
|
| multiline_(multiline),
|
| @@ -5302,6 +5309,7 @@ RegExpTree* RegExpParser::ReportError(Vector<const char> message) {
|
| // Disjunction
|
| RegExpTree* RegExpParser::ParsePattern() {
|
| RegExpTree* result = ParseDisjunction(CHECK_FAILED);
|
| +
|
| DCHECK(!has_more());
|
| // If the result of parsing is a literal string atom, and it has the
|
| // same length as the input, then the atom is identical to the input.
|
| @@ -5324,54 +5332,56 @@ RegExpTree* RegExpParser::ParsePattern() {
|
| // Atom Quantifier
|
| RegExpTree* RegExpParser::ParseDisjunction() {
|
| // Used to store current state while parsing subexpressions.
|
| - RegExpParserState initial_state(NULL, INITIAL, 0, zone());
|
| - RegExpParserState* stored_state = &initial_state;
|
| + RegExpParserState initial_state(NULL, INITIAL, RegExpTree::READ_FORWARD, 0,
|
| + zone());
|
| + RegExpParserState* state = &initial_state;
|
| // Cache the builder in a local variable for quick access.
|
| RegExpBuilder* builder = initial_state.builder();
|
| while (true) {
|
| switch (current()) {
|
| case kEndMarker:
|
| - if (stored_state->IsSubexpression()) {
|
| + if (state->IsSubexpression()) {
|
| // Inside a parenthesized group when hitting end of input.
|
| ReportError(CStrVector("Unterminated group") CHECK_FAILED);
|
| }
|
| - DCHECK_EQ(INITIAL, stored_state->group_type());
|
| + DCHECK_EQ(INITIAL, state->group_type());
|
| // Parsing completed successfully.
|
| return builder->ToRegExp();
|
| case ')': {
|
| - if (!stored_state->IsSubexpression()) {
|
| + if (!state->IsSubexpression()) {
|
| ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);
|
| }
|
| - DCHECK_NE(INITIAL, stored_state->group_type());
|
| + DCHECK_NE(INITIAL, state->group_type());
|
|
|
| Advance();
|
| // End disjunction parsing and convert builder content to new single
|
| // regexp atom.
|
| RegExpTree* body = builder->ToRegExp();
|
|
|
| - int end_capture_index = captures_started();
|
| -
|
| - int capture_index = stored_state->capture_index();
|
| - SubexpressionType group_type = stored_state->group_type();
|
| + int end_capture_index = captures_started_;
|
|
|
| - // Restore previous state.
|
| - stored_state = stored_state->previous_state();
|
| - builder = stored_state->builder();
|
| + int capture_index = state->capture_index();
|
| + SubexpressionType group_type = state->group_type();
|
|
|
| // Build result of subexpression.
|
| if (group_type == CAPTURE) {
|
| - RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index);
|
| - captures_->at(capture_index - 1) = capture;
|
| + RegExpCapture* capture = GetCapture(capture_index);
|
| + capture->set_body(body);
|
| + capture->set_read_direction(state->read_direction());
|
| body = capture;
|
| } else if (group_type != GROUPING) {
|
| DCHECK(group_type == POSITIVE_LOOKAHEAD ||
|
| group_type == NEGATIVE_LOOKAHEAD);
|
| bool is_positive = (group_type == POSITIVE_LOOKAHEAD);
|
| - body = new(zone()) RegExpLookahead(body,
|
| - is_positive,
|
| - end_capture_index - capture_index,
|
| - capture_index);
|
| + body = new (zone()) RegExpLookaround(
|
| + body, is_positive, end_capture_index - capture_index, capture_index,
|
| + state->read_direction());
|
| }
|
| +
|
| + // Restore previous state.
|
| + state = state->previous_state();
|
| + builder = state->builder();
|
| +
|
| builder->AddAtom(body);
|
| // For compatability with JSC and ES3, we allow quantifiers after
|
| // lookaheads, and break in all cases.
|
| @@ -5389,11 +5399,11 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
| case '^': {
|
| Advance();
|
| if (multiline_) {
|
| - builder->AddAssertion(
|
| - new(zone()) RegExpAssertion(RegExpAssertion::START_OF_LINE));
|
| + builder->AddAssertion(new (zone()) RegExpAssertion(
|
| + RegExpAssertion::START_OF_LINE, state->read_direction()));
|
| } else {
|
| - builder->AddAssertion(
|
| - new(zone()) RegExpAssertion(RegExpAssertion::START_OF_INPUT));
|
| + builder->AddAssertion(new (zone()) RegExpAssertion(
|
| + RegExpAssertion::START_OF_INPUT, state->read_direction()));
|
| set_contains_anchor();
|
| }
|
| continue;
|
| @@ -5403,7 +5413,8 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
| RegExpAssertion::AssertionType assertion_type =
|
| multiline_ ? RegExpAssertion::END_OF_LINE :
|
| RegExpAssertion::END_OF_INPUT;
|
| - builder->AddAssertion(new(zone()) RegExpAssertion(assertion_type));
|
| + builder->AddAssertion(new (zone()) RegExpAssertion(
|
| + assertion_type, state->read_direction()));
|
| continue;
|
| }
|
| case '.': {
|
| @@ -5412,12 +5423,14 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
| ZoneList<CharacterRange>* ranges =
|
| new(zone()) ZoneList<CharacterRange>(2, zone());
|
| CharacterRange::AddClassEscape('.', ranges, zone());
|
| - RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false);
|
| + RegExpTree* atom = new (zone())
|
| + RegExpCharacterClass(ranges, false, state->read_direction());
|
| builder->AddAtom(atom);
|
| break;
|
| }
|
| case '(': {
|
| SubexpressionType subexpr_type = CAPTURE;
|
| + RegExpTree::ReadDirection read_direction = state->read_direction();
|
| Advance();
|
| if (current() == '?') {
|
| switch (Next()) {
|
| @@ -5425,33 +5438,46 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
| subexpr_type = GROUPING;
|
| break;
|
| case '=':
|
| + read_direction = RegExpTree::READ_FORWARD;
|
| subexpr_type = POSITIVE_LOOKAHEAD;
|
| break;
|
| case '!':
|
| + read_direction = RegExpTree::READ_FORWARD;
|
| subexpr_type = NEGATIVE_LOOKAHEAD;
|
| break;
|
| + case '<':
|
| + if (FLAG_harmony_regexp_lookbehind) {
|
| + Advance();
|
| + read_direction = RegExpTree::READ_BACKWARD;
|
| + if (Next() == '=') {
|
| + subexpr_type = POSITIVE_LOOKAHEAD;
|
| + break;
|
| + } else if (Next() == '!') {
|
| + subexpr_type = NEGATIVE_LOOKAHEAD;
|
| + break;
|
| + }
|
| + }
|
| + // Fall through.
|
| default:
|
| ReportError(CStrVector("Invalid group") CHECK_FAILED);
|
| break;
|
| }
|
| Advance(2);
|
| } else {
|
| - if (captures_ == NULL) {
|
| - captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone());
|
| - }
|
| - if (captures_started() >= kMaxCaptures) {
|
| + if (captures_started_ >= kMaxCaptures) {
|
| ReportError(CStrVector("Too many captures") CHECK_FAILED);
|
| }
|
| - captures_->Add(NULL, zone());
|
| + captures_started_++;
|
| }
|
| // Store current state and begin new disjunction parsing.
|
| - stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type,
|
| - captures_started(), zone());
|
| - builder = stored_state->builder();
|
| + state = new (zone()) RegExpParserState(
|
| + state, subexpr_type, read_direction, captures_started_, zone());
|
| + builder = state->builder();
|
| continue;
|
| }
|
| case '[': {
|
| - RegExpTree* atom = ParseCharacterClass(CHECK_FAILED);
|
| + RegExpTree* atom =
|
| + ParseCharacterClass(state->read_direction() CHECK_FAILED);
|
| builder->AddAtom(atom);
|
| break;
|
| }
|
| @@ -5463,13 +5489,13 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
| return ReportError(CStrVector("\\ at end of pattern"));
|
| case 'b':
|
| Advance(2);
|
| - builder->AddAssertion(
|
| - new(zone()) RegExpAssertion(RegExpAssertion::BOUNDARY));
|
| + builder->AddAssertion(new (zone()) RegExpAssertion(
|
| + RegExpAssertion::BOUNDARY, state->read_direction()));
|
| continue;
|
| case 'B':
|
| Advance(2);
|
| - builder->AddAssertion(
|
| - new(zone()) RegExpAssertion(RegExpAssertion::NON_BOUNDARY));
|
| + builder->AddAssertion(new (zone()) RegExpAssertion(
|
| + RegExpAssertion::NON_BOUNDARY, state->read_direction()));
|
| continue;
|
| // AtomEscape ::
|
| // CharacterClassEscape
|
| @@ -5482,7 +5508,8 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
| ZoneList<CharacterRange>* ranges =
|
| new(zone()) ZoneList<CharacterRange>(2, zone());
|
| CharacterRange::AddClassEscape(c, ranges, zone());
|
| - RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false);
|
| + RegExpTree* atom = new (zone())
|
| + RegExpCharacterClass(ranges, false, state->read_direction());
|
| builder->AddAtom(atom);
|
| break;
|
| }
|
| @@ -5490,15 +5517,9 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
| case '7': case '8': case '9': {
|
| int index = 0;
|
| if (ParseBackReferenceIndex(&index)) {
|
| - RegExpCapture* capture = NULL;
|
| - if (captures_ != NULL && index <= captures_->length()) {
|
| - capture = captures_->at(index - 1);
|
| - }
|
| - if (capture == NULL) {
|
| - builder->AddEmpty();
|
| - break;
|
| - }
|
| - RegExpTree* atom = new(zone()) RegExpBackReference(capture);
|
| + RegExpCapture* capture = GetCapture(index);
|
| + RegExpTree* atom = new (zone())
|
| + RegExpBackReference(capture, state->read_direction());
|
| builder->AddAtom(atom);
|
| break;
|
| }
|
| @@ -5692,7 +5713,7 @@ static bool IsSpecialClassEscape(uc32 c) {
|
| // characters.
|
| void RegExpParser::ScanForCaptures() {
|
| // Start with captures started previous to current position
|
| - int capture_count = captures_started();
|
| + int capture_count = captures_started_;
|
| // Add count of captures after this position.
|
| int n;
|
| while ((n = current()) != kEndMarker) {
|
| @@ -5744,7 +5765,7 @@ bool RegExpParser::ParseBackReferenceIndex(int* index_out) {
|
| break;
|
| }
|
| }
|
| - if (value > captures_started()) {
|
| + if (value > captures_started_) {
|
| if (!is_scanned_for_captures_) {
|
| int saved_position = position();
|
| ScanForCaptures();
|
| @@ -5760,6 +5781,22 @@ bool RegExpParser::ParseBackReferenceIndex(int* index_out) {
|
| }
|
|
|
|
|
| +RegExpCapture* RegExpParser::GetCapture(int index) {
|
| + // The index for the capture groups are one-based. Its index in the list is
|
| + // zero-based.
|
| + int know_captures =
|
| + is_scanned_for_captures_ ? capture_count_ : captures_started_;
|
| + DCHECK(index <= know_captures);
|
| + if (captures_ == NULL) {
|
| + captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());
|
| + }
|
| + while (captures_->length() < know_captures) {
|
| + captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());
|
| + }
|
| + return captures_->at(index - 1);
|
| +}
|
| +
|
| +
|
| // QuantifierPrefix ::
|
| // { DecimalDigits }
|
| // { DecimalDigits , }
|
| @@ -6039,7 +6076,8 @@ static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,
|
| }
|
|
|
|
|
| -RegExpTree* RegExpParser::ParseCharacterClass() {
|
| +RegExpTree* RegExpParser::ParseCharacterClass(
|
| + RegExpTree::ReadDirection read_direction) {
|
| static const char* kUnterminated = "Unterminated character class";
|
| static const char* kRangeOutOfOrder = "Range out of order in character class";
|
|
|
| @@ -6091,7 +6129,7 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
|
| ranges->Add(CharacterRange::Everything(), zone());
|
| is_negated = !is_negated;
|
| }
|
| - return new(zone()) RegExpCharacterClass(ranges, is_negated);
|
| + return new (zone()) RegExpCharacterClass(ranges, is_negated, read_direction);
|
| }
|
|
|
|
|
|
|