Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(956)

Unified Diff: src/parser.cc

Issue 10759: Introduce text nodes (Closed)
Patch Set: Fixed repeated RemoveLast issue Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/jsregexp.cc ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/parser.cc
diff --git a/src/parser.cc b/src/parser.cc
index 82d0370a97b9c7e44d3d176b63f9b903db40abca..820b9ba3a2de9f7b99212f27f3c88212632e1248 100644
--- a/src/parser.cc
+++ b/src/parser.cc
@@ -256,10 +256,28 @@ class BufferedZoneList {
T* RemoveLast() {
ASSERT(last_ != NULL);
T* result = last_;
- last_ = NULL;
+ if (list_ != NULL && list_->length() > 0)
+ last_ = list_->RemoveLast();
Lasse Reichstein 2008/11/15 23:58:06 This allows removing more than the last added elem
Christian Plesner Hansen 2008/11/16 09:05:06 Not as such, no, but it's not a good property that
Lasse Reichstein 2008/11/16 10:53:15 Ah, so there was a need :)
+ else
+ last_ = NULL;
return result;
}
+ T* Get(int i) {
+ ASSERT(0 <= i && i < length());
+ if (list_ == NULL) {
+ ASSERT_EQ(0, i);
+ return last_;
+ } else {
+ if (i == list_->length()) {
+ ASSERT(last_ != NULL);
+ return last_;
+ } else {
+ return list_->at(i);
+ }
+ }
+ }
+
void Clear() {
list_ = NULL;
last_ = NULL;
@@ -294,17 +312,19 @@ class RegExpBuilder {
// "Adds" an empty expression. Does nothing except consume a
// following quantifier
void AddEmpty();
- void AddAtom(RegExpTree* tree);
+ void AddTerm(RegExpTree* tree);
Lasse Reichstein 2008/11/15 23:58:06 I would prefer if this is still called AddAtom (se
Christian Plesner Hansen 2008/11/16 09:05:06 You're right -- I've been using the term 'atom' in
void AddAssertion(RegExpTree* tree);
void NewAlternative(); // '|'
void AddQuantifierToAtom(int min, int max, bool is_greedy);
RegExpTree* ToRegExp();
private:
void FlushCharacters();
+ void FlushText();
bool FlushTerms();
bool pending_empty_;
ZoneList<uc16>* characters_;
BufferedZoneList<RegExpTree, 2> terms_;
+ BufferedZoneList<RegExpTree, 2> text_;
BufferedZoneList<RegExpTree, 2> alternatives_;
#ifdef DEBUG
enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
@@ -328,12 +348,29 @@ void RegExpBuilder::FlushCharacters() {
if (characters_ != NULL) {
RegExpTree* atom = new RegExpAtom(characters_->ToConstVector());
characters_ = NULL;
- terms_.Add(atom);
+ text_.Add(atom);
LAST(ADD_ATOM);
}
}
+void RegExpBuilder::FlushText() {
+ FlushCharacters();
+ int num_text = text_.length();
+ if (num_text == 0) {
+ return;
+ } else if (num_text == 1) {
+ terms_.Add(text_.last());
+ } else {
+ RegExpText* text = new RegExpText();
+ for (int i = 0; i < num_text; i++)
+ text_.Get(i)->AppendToText(text);
+ terms_.Add(text);
Lasse Reichstein 2008/11/15 23:58:06 The other places where a buffer is used, the conte
Christian Plesner Hansen 2008/11/16 09:05:06 If we're always using GetList anyway then why even
Lasse Reichstein 2008/11/16 10:53:15 The current uses all have special cases for one el
+ }
+ text_.Clear();
+}
+
+
void RegExpBuilder::AddCharacter(uc16 c) {
pending_empty_ = false;
if (characters_ == NULL) {
@@ -349,15 +386,20 @@ void RegExpBuilder::AddEmpty() {
}
-void RegExpBuilder::AddAtom(RegExpTree* atom) {
- FlushCharacters();
- terms_.Add(atom);
+void RegExpBuilder::AddTerm(RegExpTree* term) {
Lasse Reichstein 2008/11/15 23:58:06 The original method was called AddAtom to match th
Christian Plesner Hansen 2008/11/16 09:05:06 You don't always know that you're adding a charact
Lasse Reichstein 2008/11/16 10:53:15 I can see the point. If the result of a sub-parse
+ if (term->IsTextElement()) {
+ FlushCharacters();
+ text_.Add(term);
+ } else {
+ FlushText();
+ terms_.Add(term);
+ }
LAST(ADD_ATOM);
}
void RegExpBuilder::AddAssertion(RegExpTree* assert) {
- FlushCharacters();
+ FlushText();
terms_.Add(assert);
LAST(ADD_ASSERT);
}
@@ -371,7 +413,7 @@ void RegExpBuilder::NewAlternative() {
bool RegExpBuilder::FlushTerms() {
- FlushCharacters();
+ FlushText();
int num_terms = terms_.length();
if (num_terms == 0) {
return false;
@@ -415,11 +457,16 @@ void RegExpBuilder::AddQuantifierToAtom(int min, int max, bool is_greedy) {
int num_chars = char_vector.length();
if (num_chars > 1) {
Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);
- terms_.Add(new RegExpAtom(prefix));
+ text_.Add(new RegExpAtom(prefix));
char_vector = char_vector.SubVector(num_chars - 1, num_chars);
}
characters_ = NULL;
atom = new RegExpAtom(char_vector);
+ FlushText();
+ } else if (text_.length() > 0) {
+ ASSERT(last_added_ == ADD_ATOM);
Lasse Reichstein 2008/11/15 23:58:06 The last thing added was a character class. The l
Christian Plesner Hansen 2008/11/16 09:05:06 What about "foo (?:bar) baz"?
Lasse Reichstein 2008/11/16 10:53:15 Indeed. It would have to be ADD_TEXT to capture th
+ atom = text_.RemoveLast();
+ FlushText();
} else if (terms_.length() > 0) {
ASSERT(last_added_ == ADD_ATOM);
atom = terms_.RemoveLast();
@@ -3585,17 +3632,17 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {
ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
CharacterRange::AddClassEscape('.', ranges);
RegExpTree* atom = new RegExpCharacterClass(ranges, false);
- builder.AddAtom(atom);
+ builder.AddTerm(atom);
break;
}
case '(': {
RegExpTree* atom = ParseGroup(CHECK_OK);
- builder.AddAtom(atom);
+ builder.AddTerm(atom);
Lasse Reichstein 2008/11/16 10:53:15 At this point we could check whether the atom is a
break;
}
case '[': {
RegExpTree* atom = ParseCharacterClass(CHECK_OK);
- builder.AddAtom(atom);
+ builder.AddTerm(atom);
break;
}
// Atom ::
@@ -3625,7 +3672,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {
ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
CharacterRange::AddClassEscape(c, ranges);
RegExpTree* atom = new RegExpCharacterClass(ranges, false);
- builder.AddAtom(atom);
+ builder.AddTerm(atom);
goto has_read_atom; // Avoid setting has_character_escapes_.
}
case '1': case '2': case '3': case '4': case '5': case '6':
@@ -3639,7 +3686,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {
goto has_read_atom;
}
RegExpTree* atom = new RegExpBackreference(capture);
- builder.AddAtom(atom);
+ builder.AddTerm(atom);
goto has_read_atom; // Avoid setting has_character_escapes_.
}
uc32 first_digit = next();
« no previous file with comments | « src/jsregexp.cc ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698