Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1215)

Unified Diff: src/regexp/regexp-parser.cc

Issue 2791163003: [regexp] Support unicode capture names in non-unicode patterns (Closed)
Patch Set: Remove template parameter from ReadNext Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/mjsunit/harmony/regexp-named-captures.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/regexp/regexp-parser.cc
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc
index 3e5998719436e1cfe743e694b2eaf526b46bc377..656e88cff89578f0ab488b0392956b681ace48a8 100644
--- a/src/regexp/regexp-parser.cc
+++ b/src/regexp/regexp-parser.cc
@@ -46,13 +46,13 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
Advance();
}
-template <bool update_position>
-inline uc32 RegExpParser::ReadNext() {
+inline uc32 RegExpParser::ReadNext(bool update_position, ScanMode mode) {
int position = next_pos_;
uc32 c0 = in()->Get(position);
position++;
- // Read the whole surrogate pair in case of unicode flag, if possible.
- if (unicode() && position < in()->length() &&
+ const bool try_combine_surrogate_pairs =
+ (unicode() || mode == ScanMode::FORCE_COMBINE_SURROGATE_PAIRS);
+ if (try_combine_surrogate_pairs && position < in()->length() &&
unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(c0))) {
uc16 c1 = in()->Get(position);
if (unibrow::Utf16::IsTrailSurrogate(c1)) {
@@ -67,14 +67,13 @@ inline uc32 RegExpParser::ReadNext() {
uc32 RegExpParser::Next() {
if (has_next()) {
- return ReadNext<false>();
+ return ReadNext(false, ScanMode::DEFAULT);
} else {
return kEndMarker;
}
}
-
-void RegExpParser::Advance() {
+void RegExpParser::Advance(ScanMode mode) {
if (has_next()) {
StackLimitCheck check(isolate());
if (check.HasOverflowed()) {
@@ -84,7 +83,7 @@ void RegExpParser::Advance() {
} else if (zone()->excess_allocation()) {
ReportError(CStrVector("Regular expression too large"));
} else {
- current_ = ReadNext<true>();
+ current_ = ReadNext(true, mode);
}
} else {
current_ = kEndMarker;
@@ -102,10 +101,9 @@ void RegExpParser::Reset(int pos) {
Advance();
}
-
-void RegExpParser::Advance(int dist) {
+void RegExpParser::Advance(int dist, ScanMode mode) {
next_pos_ += dist - 1;
- Advance();
+ Advance(mode);
}
@@ -329,7 +327,6 @@ RegExpTree* RegExpParser::ParseDisjunction() {
if (FLAG_harmony_regexp_named_captures) {
has_named_captures_ = true;
is_named_capture = true;
- Advance();
break;
}
// Fall through.
@@ -769,20 +766,26 @@ static void push_code_unit(ZoneVector<uc16>* v, uint32_t code_unit) {
const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() {
DCHECK(FLAG_harmony_regexp_named_captures);
+ DCHECK_EQ(current(), '<');
ZoneVector<uc16>* name =
new (zone()->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone());
+ // Capture names can always contain surrogate pairs, and we need to scan
+ // accordingly.
+ const ScanMode scan_mode = ScanMode::FORCE_COMBINE_SURROGATE_PAIRS;
+ Advance(scan_mode);
+
bool at_start = true;
while (true) {
uc32 c = current();
- Advance();
+ Advance(scan_mode);
// Convert unicode escapes.
if (c == '\\' && current() == 'u') {
// TODO(jgruber): Reconsider this once the spec has settled.
// https://github.com/tc39/proposal-regexp-named-groups/issues/23
- Advance();
+ Advance(scan_mode);
if (!ParseUnicodeEscape(&c)) {
ReportError(CStrVector("Invalid Unicode escape sequence"));
return nullptr;
@@ -853,7 +856,6 @@ bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder,
return false;
}
- Advance();
const ZoneVector<uc16>* name = ParseCaptureGroupName();
if (name == nullptr) {
return false;
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/mjsunit/harmony/regexp-named-captures.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698