Chromium Code Reviews| Index: src/parser.cc |
| diff --git a/src/parser.cc b/src/parser.cc |
| index 82d0370a97b9c7e44d3d176b63f9b903db40abca..820b9ba3a2de9f7b99212f27f3c88212632e1248 100644 |
| --- a/src/parser.cc |
| +++ b/src/parser.cc |
| @@ -256,10 +256,28 @@ class BufferedZoneList { |
| T* RemoveLast() { |
| ASSERT(last_ != NULL); |
| T* result = last_; |
| - last_ = NULL; |
| + if (list_ != NULL && list_->length() > 0) |
| + last_ = list_->RemoveLast(); |
|
Lasse Reichstein
2008/11/15 23:58:06
This allows removing more than the last added elem
Christian Plesner Hansen
2008/11/16 09:05:06
Not as such, no, but it's not a good property that
Lasse Reichstein
2008/11/16 10:53:15
Ah, so there was a need :)
|
| + else |
| + last_ = NULL; |
| return result; |
| } |
| + T* Get(int i) { |
| + ASSERT(0 <= i && i < length()); |
| + if (list_ == NULL) { |
| + ASSERT_EQ(0, i); |
| + return last_; |
| + } else { |
| + if (i == list_->length()) { |
| + ASSERT(last_ != NULL); |
| + return last_; |
| + } else { |
| + return list_->at(i); |
| + } |
| + } |
| + } |
| + |
| void Clear() { |
| list_ = NULL; |
| last_ = NULL; |
| @@ -294,17 +312,19 @@ class RegExpBuilder { |
| // "Adds" an empty expression. Does nothing except consume a |
| // following quantifier |
| void AddEmpty(); |
| - void AddAtom(RegExpTree* tree); |
| + void AddTerm(RegExpTree* tree); |
|
Lasse Reichstein
2008/11/15 23:58:06
I would prefer if this is still called AddAtom (se
Christian Plesner Hansen
2008/11/16 09:05:06
You're right -- I've been using the term 'atom' in
|
| void AddAssertion(RegExpTree* tree); |
| void NewAlternative(); // '|' |
| void AddQuantifierToAtom(int min, int max, bool is_greedy); |
| RegExpTree* ToRegExp(); |
| private: |
| void FlushCharacters(); |
| + void FlushText(); |
| bool FlushTerms(); |
| bool pending_empty_; |
| ZoneList<uc16>* characters_; |
| BufferedZoneList<RegExpTree, 2> terms_; |
| + BufferedZoneList<RegExpTree, 2> text_; |
| BufferedZoneList<RegExpTree, 2> alternatives_; |
| #ifdef DEBUG |
| enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; |
| @@ -328,12 +348,29 @@ void RegExpBuilder::FlushCharacters() { |
| if (characters_ != NULL) { |
| RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); |
| characters_ = NULL; |
| - terms_.Add(atom); |
| + text_.Add(atom); |
| LAST(ADD_ATOM); |
| } |
| } |
| +void RegExpBuilder::FlushText() { |
| + FlushCharacters(); |
| + int num_text = text_.length(); |
| + if (num_text == 0) { |
| + return; |
| + } else if (num_text == 1) { |
| + terms_.Add(text_.last()); |
| + } else { |
| + RegExpText* text = new RegExpText(); |
| + for (int i = 0; i < num_text; i++) |
| + text_.Get(i)->AppendToText(text); |
| + terms_.Add(text); |
|
Lasse Reichstein
2008/11/15 23:58:06
The other places where a buffer is used, the conte
Christian Plesner Hansen
2008/11/16 09:05:06
If we're always using GetList anyway then why even
Lasse Reichstein
2008/11/16 10:53:15
The current uses all have special cases for one el
|
| + } |
| + text_.Clear(); |
| +} |
| + |
| + |
| void RegExpBuilder::AddCharacter(uc16 c) { |
| pending_empty_ = false; |
| if (characters_ == NULL) { |
| @@ -349,15 +386,20 @@ void RegExpBuilder::AddEmpty() { |
| } |
| -void RegExpBuilder::AddAtom(RegExpTree* atom) { |
| - FlushCharacters(); |
| - terms_.Add(atom); |
| +void RegExpBuilder::AddTerm(RegExpTree* term) { |
|
Lasse Reichstein
2008/11/15 23:58:06
The original method was called AddAtom to match th
Christian Plesner Hansen
2008/11/16 09:05:06
You don't always know that you're adding a charact
Lasse Reichstein
2008/11/16 10:53:15
I can see the point. If the result of a sub-parse
|
| + if (term->IsTextElement()) { |
| + FlushCharacters(); |
| + text_.Add(term); |
| + } else { |
| + FlushText(); |
| + terms_.Add(term); |
| + } |
| LAST(ADD_ATOM); |
| } |
| void RegExpBuilder::AddAssertion(RegExpTree* assert) { |
| - FlushCharacters(); |
| + FlushText(); |
| terms_.Add(assert); |
| LAST(ADD_ASSERT); |
| } |
| @@ -371,7 +413,7 @@ void RegExpBuilder::NewAlternative() { |
| bool RegExpBuilder::FlushTerms() { |
| - FlushCharacters(); |
| + FlushText(); |
| int num_terms = terms_.length(); |
| if (num_terms == 0) { |
| return false; |
| @@ -415,11 +457,16 @@ void RegExpBuilder::AddQuantifierToAtom(int min, int max, bool is_greedy) { |
| int num_chars = char_vector.length(); |
| if (num_chars > 1) { |
| Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); |
| - terms_.Add(new RegExpAtom(prefix)); |
| + text_.Add(new RegExpAtom(prefix)); |
| char_vector = char_vector.SubVector(num_chars - 1, num_chars); |
| } |
| characters_ = NULL; |
| atom = new RegExpAtom(char_vector); |
| + FlushText(); |
| + } else if (text_.length() > 0) { |
| + ASSERT(last_added_ == ADD_ATOM); |
|
Lasse Reichstein
2008/11/15 23:58:06
The last thing added was a character class.
The l
Christian Plesner Hansen
2008/11/16 09:05:06
What about "foo (?:bar) baz"?
Lasse Reichstein
2008/11/16 10:53:15
Indeed. It would have to be ADD_TEXT to capture th
|
| + atom = text_.RemoveLast(); |
| + FlushText(); |
| } else if (terms_.length() > 0) { |
| ASSERT(last_added_ == ADD_ATOM); |
| atom = terms_.RemoveLast(); |
| @@ -3585,17 +3632,17 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { |
| ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| CharacterRange::AddClassEscape('.', ranges); |
| RegExpTree* atom = new RegExpCharacterClass(ranges, false); |
| - builder.AddAtom(atom); |
| + builder.AddTerm(atom); |
| break; |
| } |
| case '(': { |
| RegExpTree* atom = ParseGroup(CHECK_OK); |
| - builder.AddAtom(atom); |
| + builder.AddTerm(atom); |
|
Lasse Reichstein
2008/11/16 10:53:15
At this point we could check whether the atom is a
|
| break; |
| } |
| case '[': { |
| RegExpTree* atom = ParseCharacterClass(CHECK_OK); |
| - builder.AddAtom(atom); |
| + builder.AddTerm(atom); |
| break; |
| } |
| // Atom :: |
| @@ -3625,7 +3672,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { |
| ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| CharacterRange::AddClassEscape(c, ranges); |
| RegExpTree* atom = new RegExpCharacterClass(ranges, false); |
| - builder.AddAtom(atom); |
| + builder.AddTerm(atom); |
| goto has_read_atom; // Avoid setting has_character_escapes_. |
| } |
| case '1': case '2': case '3': case '4': case '5': case '6': |
| @@ -3639,7 +3686,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { |
| goto has_read_atom; |
| } |
| RegExpTree* atom = new RegExpBackreference(capture); |
| - builder.AddAtom(atom); |
| + builder.AddTerm(atom); |
| goto has_read_atom; // Avoid setting has_character_escapes_. |
| } |
| uc32 first_digit = next(); |