Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(781)

Unified Diff: src/regexp/regexp-parser.h

Issue 1578253005: [regexp] implement character classes for unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: more tests Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/regexp/regexp-ast.cc ('k') | src/regexp/regexp-parser.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/regexp/regexp-parser.h
diff --git a/src/regexp/regexp-parser.h b/src/regexp/regexp-parser.h
index af9b765fba16b0de0f00d814da565a8ebd993ec1..dc0d943f86eeb9860d4819c503b4cf3d3b1e2d6d 100644
--- a/src/regexp/regexp-parser.h
+++ b/src/regexp/regexp-parser.h
@@ -99,13 +99,15 @@ class BufferedZoneList {
// Accumulates RegExp atoms and assertions into lists of terms and alternatives.
class RegExpBuilder : public ZoneObject {
public:
- explicit RegExpBuilder(Zone* zone);
+ RegExpBuilder(Zone* zone, JSRegExp::Flags flags);
void AddCharacter(uc16 character);
void AddUnicodeCharacter(uc32 character);
// "Adds" an empty expression. Does nothing except consume a
// following quantifier
void AddEmpty();
+ void AddCharacterClass(RegExpCharacterClass* cc);
void AddAtom(RegExpTree* tree);
+ void AddTerm(RegExpTree* tree);
void AddAssertion(RegExpTree* tree);
void NewAlternative(); // '|'
void AddQuantifierToAtom(int min, int max,
@@ -113,14 +115,21 @@ class RegExpBuilder : public ZoneObject {
RegExpTree* ToRegExp();
private:
+ static const uc16 kNoPendingSurrogate = 0;
+ void AddLeadSurrogate(uc16 lead_surrogate);
+ void AddTrailSurrogate(uc16 trail_surrogate);
+ void FlushPendingSurrogate();
void FlushCharacters();
void FlushText();
void FlushTerms();
Zone* zone() const { return zone_; }
+ bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
Zone* zone_;
bool pending_empty_;
+ JSRegExp::Flags flags_;
ZoneList<uc16>* characters_;
+ uc16 pending_surrogate_;
BufferedZoneList<RegExpTree, 2> terms_;
BufferedZoneList<RegExpTree, 2> text_;
BufferedZoneList<RegExpTree, 2> alternatives_;
@@ -135,12 +144,11 @@ class RegExpBuilder : public ZoneObject {
class RegExpParser BASE_EMBEDDED {
public:
- RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode,
- bool unicode, Isolate* isolate, Zone* zone);
+ RegExpParser(FlatStringReader* in, Handle<String>* error,
+ JSRegExp::Flags flags, Isolate* isolate, Zone* zone);
static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input,
- bool multiline, bool unicode,
- RegExpCompileData* result);
+ JSRegExp::Flags flags, RegExpCompileData* result);
RegExpTree* ParsePattern();
RegExpTree* ParseDisjunction();
@@ -183,6 +191,8 @@ class RegExpParser BASE_EMBEDDED {
int captures_started() { return captures_started_; }
int position() { return next_pos_ - 1; }
bool failed() { return failed_; }
+ bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
+ bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; }
static bool IsSyntaxCharacter(uc32 c);
@@ -203,9 +213,10 @@ class RegExpParser BASE_EMBEDDED {
RegExpParserState(RegExpParserState* previous_state,
SubexpressionType group_type,
RegExpLookaround::Type lookaround_type,
- int disjunction_capture_index, Zone* zone)
+ int disjunction_capture_index, JSRegExp::Flags flags,
+ Zone* zone)
: previous_state_(previous_state),
- builder_(new (zone) RegExpBuilder(zone)),
+ builder_(new (zone) RegExpBuilder(zone, flags)),
group_type_(group_type),
lookaround_type_(lookaround_type),
disjunction_capture_index_(disjunction_capture_index) {}
@@ -249,6 +260,8 @@ class RegExpParser BASE_EMBEDDED {
bool has_more() { return has_more_; }
bool has_next() { return next_pos_ < in()->length(); }
uc32 Next();
+ template <bool update_position>
+ uc32 ReadNext();
FlatStringReader* in() { return in_; }
void ScanForCaptures();
@@ -258,13 +271,12 @@ class RegExpParser BASE_EMBEDDED {
ZoneList<RegExpCapture*>* captures_;
FlatStringReader* in_;
uc32 current_;
+ JSRegExp::Flags flags_;
int next_pos_;
int captures_started_;
// The capture count is only valid after we have scanned for captures.
int capture_count_;
bool has_more_;
- bool multiline_;
- bool unicode_;
bool simple_;
bool contains_anchor_;
bool is_scanned_for_captures_;
« no previous file with comments | « src/regexp/regexp-ast.cc ('k') | src/regexp/regexp-parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698