Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(966)

Unified Diff: src/regexp/regexp-parser.cc

Issue 1571563003: [regexp] quantifier refers to the surrogate pair in unicode regexp. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@unicoderegexpatom
Patch Set: rebase Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/regexp/regexp-parser.cc
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc
index ad74b3d723e7b253af17094264df310663464a3c..fa8900342cfc4878411a1c06d753254024f138fe 100644
--- a/src/regexp/regexp-parser.cc
+++ b/src/regexp/regexp-parser.cc
@@ -56,6 +56,16 @@ void RegExpParser::Advance() {
} else {
current_ = in()->Get(next_pos_);
next_pos_++;
+ // Read the whole surrogate pair in case of unicode flag, if possible.
+ if (unicode_ && next_pos_ < in()->length() &&
+ unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) {
+ uc16 trail = in()->Get(next_pos_);
+ if (unibrow::Utf16::IsTrailSurrogate(trail)) {
+ current_ = unibrow::Utf16::CombineSurrogatePair(
+ static_cast<uc16>(current_), trail);
+ next_pos_++;
+ }
+ }
}
} else {
current_ = kEndMarker;
@@ -417,12 +427,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
Advance(2);
uc32 value;
if (ParseUnicodeEscape(&value)) {
- if (value > unibrow::Utf16::kMaxNonSurrogateCharCode) {
- builder->AddCharacter(unibrow::Utf16::LeadSurrogate(value));
- builder->AddCharacter(unibrow::Utf16::TrailSurrogate(value));
- } else {
- builder->AddCharacter(static_cast<uc16>(value));
- }
+ builder->AddUnicodeCharacter(value);
} else if (!unicode_) {
builder->AddCharacter('u');
} else {
@@ -456,7 +461,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
// fallthrough
}
default:
- builder->AddCharacter(current());
+ builder->AddUnicodeCharacter(current());
Advance();
break;
} // end switch(current())
@@ -1057,6 +1062,19 @@ void RegExpBuilder::AddCharacter(uc16 c) {
}
+void RegExpBuilder::AddUnicodeCharacter(uc32 c) {
+ if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
+ ZoneList<uc16> surrogate_pair(2, zone());
+ surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone());
+ surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone());
+ RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector());
+ AddAtom(atom);
+ } else {
+ AddCharacter(static_cast<uc16>(c));
+ }
+}
+
+
void RegExpBuilder::AddEmpty() { pending_empty_ = true; }
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698