Index: src/parser.cc |
diff --git a/src/parser.cc b/src/parser.cc |
index 9227272e09ca51ed62dce91704ad6e940b7723e5..89362d082b347e0341e5c5b5bb4cbafb82dd936f 100644 |
--- a/src/parser.cc |
+++ b/src/parser.cc |
@@ -94,12 +94,14 @@ ParseInfo::ParseInfo(Zone* zone, Handle<Script> script) : ParseInfo(zone) { |
} |
-RegExpBuilder::RegExpBuilder(Zone* zone) |
+RegExpBuilder::RegExpBuilder(Zone* zone, |
+ RegExpTree::ReadDirection read_direction) |
: zone_(zone), |
pending_empty_(false), |
characters_(NULL), |
terms_(), |
- alternatives_() |
+ alternatives_(), |
+ read_direction_(read_direction) |
#ifdef DEBUG |
, last_added_(ADD_NONE) |
#endif |
@@ -109,7 +111,8 @@ RegExpBuilder::RegExpBuilder(Zone* zone) |
void RegExpBuilder::FlushCharacters() { |
pending_empty_ = false; |
if (characters_ != NULL) { |
- RegExpTree* atom = new(zone()) RegExpAtom(characters_->ToConstVector()); |
+ RegExpTree* atom = |
+ new (zone()) RegExpAtom(characters_->ToConstVector(), read_direction_); |
characters_ = NULL; |
text_.Add(atom, zone()); |
LAST(ADD_ATOM); |
@@ -125,7 +128,7 @@ void RegExpBuilder::FlushText() { |
} else if (num_text == 1) { |
terms_.Add(text_.last(), zone()); |
} else { |
- RegExpText* text = new(zone()) RegExpText(zone()); |
+ RegExpText* text = new (zone()) RegExpText(zone(), read_direction_); |
for (int i = 0; i < num_text; i++) |
text_.Get(i)->AppendToText(text, zone()); |
terms_.Add(text, zone()); |
@@ -186,7 +189,8 @@ void RegExpBuilder::FlushTerms() { |
} else if (num_terms == 1) { |
alternative = terms_.last(); |
} else { |
- alternative = new(zone()) RegExpAlternative(terms_.GetList(zone())); |
+ alternative = |
+ new (zone()) RegExpAlternative(terms_.GetList(zone()), read_direction_); |
} |
alternatives_.Add(alternative, zone()); |
terms_.Clear(); |
@@ -199,7 +203,8 @@ RegExpTree* RegExpBuilder::ToRegExp() { |
int num_alternatives = alternatives_.length(); |
if (num_alternatives == 0) return new (zone()) RegExpEmpty(); |
if (num_alternatives == 1) return alternatives_.last(); |
- return new(zone()) RegExpDisjunction(alternatives_.GetList(zone())); |
+ return new (zone()) |
+ RegExpDisjunction(alternatives_.GetList(zone()), read_direction_); |
} |
@@ -217,11 +222,11 @@ void RegExpBuilder::AddQuantifierToAtom( |
int num_chars = char_vector.length(); |
if (num_chars > 1) { |
Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); |
- text_.Add(new(zone()) RegExpAtom(prefix), zone()); |
+ text_.Add(new (zone()) RegExpAtom(prefix, read_direction_), zone()); |
char_vector = char_vector.SubVector(num_chars - 1, num_chars); |
} |
characters_ = NULL; |
- atom = new(zone()) RegExpAtom(char_vector); |
+ atom = new (zone()) RegExpAtom(char_vector, read_direction_); |
FlushText(); |
} else if (text_.length() > 0) { |
DCHECK(last_added_ == ADD_ATOM); |
@@ -244,8 +249,9 @@ void RegExpBuilder::AddQuantifierToAtom( |
UNREACHABLE(); |
return; |
} |
- terms_.Add( |
- new(zone()) RegExpQuantifier(min, max, quantifier_type, atom), zone()); |
+ terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom, |
+ read_direction_), |
+ zone()); |
LAST(ADD_TERM); |
} |
@@ -5221,6 +5227,7 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
in_(in), |
current_(kEndMarker), |
next_pos_(0), |
+ captures_started_(0), |
capture_count_(0), |
has_more_(true), |
multiline_(multiline), |
@@ -5302,6 +5309,7 @@ RegExpTree* RegExpParser::ReportError(Vector<const char> message) { |
// Disjunction |
RegExpTree* RegExpParser::ParsePattern() { |
RegExpTree* result = ParseDisjunction(CHECK_FAILED); |
+ |
DCHECK(!has_more()); |
// If the result of parsing is a literal string atom, and it has the |
// same length as the input, then the atom is identical to the input. |
@@ -5324,54 +5332,56 @@ RegExpTree* RegExpParser::ParsePattern() { |
// Atom Quantifier |
RegExpTree* RegExpParser::ParseDisjunction() { |
// Used to store current state while parsing subexpressions. |
- RegExpParserState initial_state(NULL, INITIAL, 0, zone()); |
- RegExpParserState* stored_state = &initial_state; |
+ RegExpParserState initial_state(NULL, INITIAL, RegExpTree::READ_FORWARD, 0, |
+ zone()); |
+ RegExpParserState* state = &initial_state; |
// Cache the builder in a local variable for quick access. |
RegExpBuilder* builder = initial_state.builder(); |
while (true) { |
switch (current()) { |
case kEndMarker: |
- if (stored_state->IsSubexpression()) { |
+ if (state->IsSubexpression()) { |
// Inside a parenthesized group when hitting end of input. |
ReportError(CStrVector("Unterminated group") CHECK_FAILED); |
} |
- DCHECK_EQ(INITIAL, stored_state->group_type()); |
+ DCHECK_EQ(INITIAL, state->group_type()); |
// Parsing completed successfully. |
return builder->ToRegExp(); |
case ')': { |
- if (!stored_state->IsSubexpression()) { |
+ if (!state->IsSubexpression()) { |
ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); |
} |
- DCHECK_NE(INITIAL, stored_state->group_type()); |
+ DCHECK_NE(INITIAL, state->group_type()); |
Advance(); |
// End disjunction parsing and convert builder content to new single |
// regexp atom. |
RegExpTree* body = builder->ToRegExp(); |
- int end_capture_index = captures_started(); |
- |
- int capture_index = stored_state->capture_index(); |
- SubexpressionType group_type = stored_state->group_type(); |
+ int end_capture_index = captures_started_; |
- // Restore previous state. |
- stored_state = stored_state->previous_state(); |
- builder = stored_state->builder(); |
+ int capture_index = state->capture_index(); |
+ SubexpressionType group_type = state->group_type(); |
// Build result of subexpression. |
if (group_type == CAPTURE) { |
- RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index); |
- captures_->at(capture_index - 1) = capture; |
+ RegExpCapture* capture = GetCapture(capture_index); |
+ capture->set_body(body); |
+ capture->set_read_direction(state->read_direction()); |
body = capture; |
} else if (group_type != GROUPING) { |
DCHECK(group_type == POSITIVE_LOOKAHEAD || |
group_type == NEGATIVE_LOOKAHEAD); |
bool is_positive = (group_type == POSITIVE_LOOKAHEAD); |
- body = new(zone()) RegExpLookahead(body, |
- is_positive, |
- end_capture_index - capture_index, |
- capture_index); |
+ body = new (zone()) RegExpLookaround( |
+ body, is_positive, end_capture_index - capture_index, capture_index, |
+ state->read_direction()); |
} |
+ |
+ // Restore previous state. |
+ state = state->previous_state(); |
+ builder = state->builder(); |
+ |
builder->AddAtom(body); |
// For compatability with JSC and ES3, we allow quantifiers after |
// lookaheads, and break in all cases. |
@@ -5389,11 +5399,11 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
case '^': { |
Advance(); |
if (multiline_) { |
- builder->AddAssertion( |
- new(zone()) RegExpAssertion(RegExpAssertion::START_OF_LINE)); |
+ builder->AddAssertion(new (zone()) RegExpAssertion( |
+ RegExpAssertion::START_OF_LINE, state->read_direction())); |
} else { |
- builder->AddAssertion( |
- new(zone()) RegExpAssertion(RegExpAssertion::START_OF_INPUT)); |
+ builder->AddAssertion(new (zone()) RegExpAssertion( |
+ RegExpAssertion::START_OF_INPUT, state->read_direction())); |
set_contains_anchor(); |
} |
continue; |
@@ -5403,7 +5413,8 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
RegExpAssertion::AssertionType assertion_type = |
multiline_ ? RegExpAssertion::END_OF_LINE : |
RegExpAssertion::END_OF_INPUT; |
- builder->AddAssertion(new(zone()) RegExpAssertion(assertion_type)); |
+ builder->AddAssertion(new (zone()) RegExpAssertion( |
+ assertion_type, state->read_direction())); |
continue; |
} |
case '.': { |
@@ -5412,12 +5423,14 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
ZoneList<CharacterRange>* ranges = |
new(zone()) ZoneList<CharacterRange>(2, zone()); |
CharacterRange::AddClassEscape('.', ranges, zone()); |
- RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); |
+ RegExpTree* atom = new (zone()) |
+ RegExpCharacterClass(ranges, false, state->read_direction()); |
builder->AddAtom(atom); |
break; |
} |
case '(': { |
SubexpressionType subexpr_type = CAPTURE; |
+ RegExpTree::ReadDirection read_direction = state->read_direction(); |
Advance(); |
if (current() == '?') { |
switch (Next()) { |
@@ -5425,33 +5438,46 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
subexpr_type = GROUPING; |
break; |
case '=': |
+ read_direction = RegExpTree::READ_FORWARD; |
subexpr_type = POSITIVE_LOOKAHEAD; |
break; |
case '!': |
+ read_direction = RegExpTree::READ_FORWARD; |
subexpr_type = NEGATIVE_LOOKAHEAD; |
break; |
+ case '<': |
+ if (FLAG_harmony_regexp_lookbehind) { |
+ Advance(); |
+ read_direction = RegExpTree::READ_BACKWARD; |
+ if (Next() == '=') { |
+ subexpr_type = POSITIVE_LOOKAHEAD; |
+ break; |
+ } else if (Next() == '!') { |
+ subexpr_type = NEGATIVE_LOOKAHEAD; |
+ break; |
+ } |
+ } |
+ // Fall through. |
default: |
ReportError(CStrVector("Invalid group") CHECK_FAILED); |
break; |
} |
Advance(2); |
} else { |
- if (captures_ == NULL) { |
- captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone()); |
- } |
- if (captures_started() >= kMaxCaptures) { |
+ if (captures_started_ >= kMaxCaptures) { |
ReportError(CStrVector("Too many captures") CHECK_FAILED); |
} |
- captures_->Add(NULL, zone()); |
+ captures_started_++; |
} |
// Store current state and begin new disjunction parsing. |
- stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type, |
- captures_started(), zone()); |
- builder = stored_state->builder(); |
+ state = new (zone()) RegExpParserState( |
+ state, subexpr_type, read_direction, captures_started_, zone()); |
+ builder = state->builder(); |
continue; |
} |
case '[': { |
- RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); |
+ RegExpTree* atom = |
+ ParseCharacterClass(state->read_direction() CHECK_FAILED); |
builder->AddAtom(atom); |
break; |
} |
@@ -5463,13 +5489,13 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
return ReportError(CStrVector("\\ at end of pattern")); |
case 'b': |
Advance(2); |
- builder->AddAssertion( |
- new(zone()) RegExpAssertion(RegExpAssertion::BOUNDARY)); |
+ builder->AddAssertion(new (zone()) RegExpAssertion( |
+ RegExpAssertion::BOUNDARY, state->read_direction())); |
continue; |
case 'B': |
Advance(2); |
- builder->AddAssertion( |
- new(zone()) RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); |
+ builder->AddAssertion(new (zone()) RegExpAssertion( |
+ RegExpAssertion::NON_BOUNDARY, state->read_direction())); |
continue; |
// AtomEscape :: |
// CharacterClassEscape |
@@ -5482,7 +5508,8 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
ZoneList<CharacterRange>* ranges = |
new(zone()) ZoneList<CharacterRange>(2, zone()); |
CharacterRange::AddClassEscape(c, ranges, zone()); |
- RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); |
+ RegExpTree* atom = new (zone()) |
+ RegExpCharacterClass(ranges, false, state->read_direction()); |
builder->AddAtom(atom); |
break; |
} |
@@ -5490,15 +5517,9 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
case '7': case '8': case '9': { |
int index = 0; |
if (ParseBackReferenceIndex(&index)) { |
- RegExpCapture* capture = NULL; |
- if (captures_ != NULL && index <= captures_->length()) { |
- capture = captures_->at(index - 1); |
- } |
- if (capture == NULL) { |
- builder->AddEmpty(); |
- break; |
- } |
- RegExpTree* atom = new(zone()) RegExpBackReference(capture); |
+ RegExpCapture* capture = GetCapture(index); |
+ RegExpTree* atom = new (zone()) |
+ RegExpBackReference(capture, state->read_direction()); |
builder->AddAtom(atom); |
break; |
} |
@@ -5692,7 +5713,7 @@ static bool IsSpecialClassEscape(uc32 c) { |
// characters. |
void RegExpParser::ScanForCaptures() { |
// Start with captures started previous to current position |
- int capture_count = captures_started(); |
+ int capture_count = captures_started_; |
// Add count of captures after this position. |
int n; |
while ((n = current()) != kEndMarker) { |
@@ -5744,7 +5765,7 @@ bool RegExpParser::ParseBackReferenceIndex(int* index_out) { |
break; |
} |
} |
- if (value > captures_started()) { |
+ if (value > captures_started_) { |
if (!is_scanned_for_captures_) { |
int saved_position = position(); |
ScanForCaptures(); |
@@ -5760,6 +5781,22 @@ bool RegExpParser::ParseBackReferenceIndex(int* index_out) { |
} |
+RegExpCapture* RegExpParser::GetCapture(int index) { |
+ // The index for the capture groups are one-based. Its index in the list is |
+ // zero-based. |
+ int know_captures = |
+ is_scanned_for_captures_ ? capture_count_ : captures_started_; |
+ DCHECK(index <= know_captures); |
+ if (captures_ == NULL) { |
+ captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); |
+ } |
+ while (captures_->length() < know_captures) { |
+ captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); |
+ } |
+ return captures_->at(index - 1); |
+} |
+ |
+ |
// QuantifierPrefix :: |
// { DecimalDigits } |
// { DecimalDigits , } |
@@ -6039,7 +6076,8 @@ static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, |
} |
-RegExpTree* RegExpParser::ParseCharacterClass() { |
+RegExpTree* RegExpParser::ParseCharacterClass( |
+ RegExpTree::ReadDirection read_direction) { |
static const char* kUnterminated = "Unterminated character class"; |
static const char* kRangeOutOfOrder = "Range out of order in character class"; |
@@ -6091,7 +6129,7 @@ RegExpTree* RegExpParser::ParseCharacterClass() { |
ranges->Add(CharacterRange::Everything(), zone()); |
is_negated = !is_negated; |
} |
- return new(zone()) RegExpCharacterClass(ranges, is_negated); |
+ return new (zone()) RegExpCharacterClass(ranges, is_negated, read_direction); |
} |