Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(30)

Unified Diff: src/parser.cc

Issue 11231: Flat string reader (Closed)
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/parser.cc
diff --git a/src/parser.cc b/src/parser.cc
index c3d21161158e2237d3336776033b40f7ea8a4c79..eb7943f7558549180c84ebd5a1d633dea5374a1e 100644
--- a/src/parser.cc
+++ b/src/parser.cc
@@ -496,7 +496,7 @@ void RegExpBuilder::AddQuantifierToAtom(int min, int max, bool is_greedy) {
class RegExpParser {
public:
- RegExpParser(unibrow::CharacterStream* in,
+ RegExpParser(FlatStringReader* in,
Handle<String>* error,
bool multiline_mode);
RegExpTree* ParsePattern(bool* ok);
@@ -531,36 +531,26 @@ class RegExpParser {
RegExpTree* ReportError(Vector<const char> message, bool* ok);
void Advance();
void Advance(int dist);
- // Pushes a read character (or potentially some other character) back
- // on the input stream. After pushing it back, it becomes the character
- // returned by current(). There is a limited amount of push-back buffer.
- // A function using PushBack should check that it doesn't push back more
- // than kMaxPushback characters, and it should not push back more characters
- // than it has read.
- void PushBack(uc32 character);
- bool CanPushBack();
+ void Reset(int pos);
bool HasCharacterEscapes();
int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }
+ int position() { return next_pos_ - 1; }
static const uc32 kEndMarker = unibrow::Utf8::kBadChar;
private:
uc32 current() { return current_; }
- uc32 next() { return next_; }
bool has_more() { return has_more_; }
- bool has_next() { return has_next_; }
- unibrow::CharacterStream* in() { return in_; }
+ bool has_next() { return next_pos_ < in()->length(); }
+ uc32 Next();
+ FlatStringReader* in() { return in_; }
uc32 current_;
- uc32 next_;
bool has_more_;
- bool has_next_;
bool multiline_mode_;
- unibrow::CharacterStream* in_;
+ int next_pos_;
+ FlatStringReader* in_;
Handle<String>* error_;
- static const int kMaxPushback = 5;
- int pushback_count_;
- uc32 pushback_buffer_[kMaxPushback];
bool has_character_escapes_;
ZoneList<RegExpCapture*>* captures_;
};
@@ -3506,63 +3496,53 @@ Expression* Parser::NewThrowError(Handle<String> constructor,
// Regular expressions
-RegExpParser::RegExpParser(unibrow::CharacterStream* in,
+RegExpParser::RegExpParser(FlatStringReader* in,
Handle<String>* error,
bool multiline_mode)
: current_(kEndMarker),
- next_(kEndMarker),
has_more_(true),
- has_next_(true),
multiline_mode_(multiline_mode),
+ next_pos_(0),
in_(in),
error_(error),
- pushback_count_(0),
has_character_escapes_(false),
captures_(NULL) {
- Advance(2);
+ Advance(1);
}
-void RegExpParser::Advance() {
- current_ = next_;
- has_more_ = has_next_;
- if (pushback_count_ > 0) {
- pushback_count_--;
- next_ = pushback_buffer_[pushback_count_];
- } else if (in()->has_more()) {
- next_ = in()->GetNext();
+uc32 RegExpParser::Next() {
+ if (has_next()) {
+ return in()->Get(next_pos_);
} else {
- next_ = kEndMarker;
- has_next_ = false;
+ return kEndMarker;
}
}
-void RegExpParser::Advance(int dist) {
- for (int i = 0; i < dist; i++)
- Advance();
-}
-
-
-void RegExpParser::PushBack(uc32 character) {
- if (has_next_) {
- ASSERT(pushback_count_ < kMaxPushback);
- pushback_buffer_[pushback_count_] = next_;
- pushback_count_++;
+void RegExpParser::Advance() {
+ if (next_pos_ < in()->length()) {
+ current_ = in()->Get(next_pos_);
+ next_pos_++;
+ } else {
+ current_ = kEndMarker;
+ has_more_ = false;
}
+}
- next_ = current_;
- has_next_ = has_more_;
- current_ = character;
- has_more_ = true;
+void RegExpParser::Reset(int pos) {
+ next_pos_ = pos;
+ Advance();
}
-bool RegExpParser::CanPushBack() {
- return (pushback_count_ < kMaxPushback);
+void RegExpParser::Advance(int dist) {
+ for (int i = 0; i < dist; i++)
+ Advance();
}
+
// Reports whether the parsed string atoms contain any characters that were
// escaped in the original pattern. If not, all atoms are proper substrings
// of the original pattern.
@@ -3662,7 +3642,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {
// Atom ::
// \ AtomEscape
case '\\':
- switch (next()) {
+ switch (Next()) {
case kEndMarker:
ReportError(CStrVector("\\ at end of pattern"), CHECK_OK);
case 'b':
@@ -3681,7 +3661,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {
// CharacterClassEscape :: one of
// d D s S w W
case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {
- uc32 c = next();
+ uc32 c = Next();
Advance(2);
ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
CharacterRange::AddClassEscape(c, ranges);
@@ -3703,7 +3683,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {
builder.AddAtom(atom);
goto has_read_atom; // Avoid setting has_character_escapes_.
}
- uc32 first_digit = next();
+ uc32 first_digit = Next();
if (first_digit == '8' || first_digit == '9') {
// Treat as identity escape
builder.AddCharacter(first_digit);
@@ -3768,7 +3748,7 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {
}
default:
// Identity escape.
- builder.AddCharacter(next());
+ builder.AddCharacter(Next());
Advance(2);
break;
}
@@ -3861,8 +3841,7 @@ static bool IsSpecialClassEscape(uc32 c) {
bool RegExpParser::ParseBackreferenceIndex(int* index_out) {
ASSERT_EQ('\\', current());
- ASSERT('1' <= next() && next() <= '9');
- ASSERT_EQ(0, pushback_count_);
+ ASSERT('1' <= Next() && Next() <= '9');
// Try to parse a decimal literal that is no greater than the number
// of previously encountered left capturing parentheses.
// This is a not according the the ECMAScript specification. According to
@@ -3870,30 +3849,19 @@ bool RegExpParser::ParseBackreferenceIndex(int* index_out) {
// parentheses in the entire input, even if they are meaningless.
if (captures_ == NULL)
return false;
- int value = next() - '0';
+ int start = position();
+ int value = Next() - '0';
if (value > captures_->length())
return false;
- static const int kMaxChars = kMaxPushback - 2;
- EmbeddedVector<uc32, kMaxChars> chars_seen;
- chars_seen[0] = next();
- int char_count = 1;
Advance(2);
while (true) {
uc32 c = current();
if (IsDecimalDigit(c)) {
value = 10 * value + (c - '0');
- // To avoid reading past the end of the stack-allocated pushback
- // buffers we only read kMaxChars before giving up.
- if (value > captures_->length() || char_count > kMaxChars) {
- // If we give up we have to push the characters we read back
- // onto the pushback buffer in the reverse order.
- for (int i = 0; i < char_count; i++) {
- PushBack(chars_seen[char_count - i - 1]);
- }
- PushBack('\\');
+ if (value > captures_->length()) {
+ Reset(start);
return false;
}
- chars_seen[char_count++] = current();
Advance();
} else {
break;
@@ -3992,26 +3960,19 @@ uc32 RegExpParser::ParseOctalLiteral() {
bool RegExpParser::ParseHexEscape(int length, uc32 *value) {
- static const int kMaxChars = kMaxPushback;
- EmbeddedVector<uc32, kMaxChars> chars_seen;
- ASSERT(length <= kMaxChars);
+ int start = position();
uc32 val = 0;
bool done = false;
for (int i = 0; !done; i++) {
uc32 c = current();
int d = HexValue(c);
if (d < 0) {
- while (i > 0) {
- i--;
- PushBack(chars_seen[i]);
- }
+ Reset(start);
return false;
}
val = val * 16 + d;
Advance();
- if (i < length - 1) {
- chars_seen[i] = c;
- } else {
+ if (i == length - 1) {
done = true;
}
}
@@ -4022,7 +3983,7 @@ bool RegExpParser::ParseHexEscape(int length, uc32 *value) {
uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) {
ASSERT(current() == '\\');
- ASSERT(has_next() && !IsSpecialClassEscape(next()));
+ ASSERT(has_next() && !IsSpecialClassEscape(Next()));
Advance();
switch (current()) {
// ControlEscape :: one of
@@ -4088,9 +4049,9 @@ RegExpTree* RegExpParser::ParseGroup(bool* ok) {
char type = '(';
Advance();
if (current() == '?') {
- switch (next()) {
+ switch (Next()) {
case ':': case '=': case '!':
- type = next();
+ type = Next();
Advance(2);
break;
default:
@@ -4150,10 +4111,10 @@ CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class,
ASSERT_EQ(false, *is_char_class);
uc32 first = current();
if (first == '\\') {
- switch (next()) {
+ switch (Next()) {
case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {
*is_char_class = true;
- uc32 c = next();
+ uc32 c = Next();
CharacterRange::AddClassEscape(c, ranges);
Advance(2);
return NULL;
@@ -4267,10 +4228,10 @@ ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
}
-bool ParseRegExp(unibrow::CharacterStream* stream, RegExpParseResult* result) {
+bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result) {
ASSERT(result != NULL);
// Get multiline flag somehow
- RegExpParser parser(stream, &result->error, false);
+ RegExpParser parser(input, &result->error, false);
bool ok = true;
result->tree = parser.ParsePattern(&ok);
if (!ok) {
« src/objects.cc ('K') | « src/parser.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698