src/regexp/regexp-parser.cc - Issue 1571563003: [regexp] quantifier refers to the surrogate pair in unicode regexp.

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: src/regexp/regexp-parser.cc

Issue 1571563003: [regexp] quantifier refers to the surrogate pair in unicode regexp. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@unicoderegexpatom

Patch Set: add parse tests Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/regexp/regexp-parser.cc

diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc

index 5cdad974b3886254a50073798a24df09754182b5..cb6073ee4a0bec92e908d98118599ee02a996d31 100644

--- a/src/regexp/regexp-parser.cc

+++ b/src/regexp/regexp-parser.cc

@@ -56,6 +56,16 @@ void RegExpParser::Advance() {

} else {

current_ = in()->Get(next_pos_);

next_pos_++;

+ // Read the whole surrogate pair in case of unicode flag, if possible.

+ if (unicode_ && next_pos_ < in()->length() &&

+ unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) {

+ uc16 trail = in()->Get(next_pos_);

+ if (unibrow::Utf16::IsTrailSurrogate(trail)) {

+ current_ = unibrow::Utf16::CombineSurrogatePair(

+ static_cast<uc16>(current_), trail);

+ next_pos_++;

+ }

}

} else {

current_ = kEndMarker;

@@ -417,12 +427,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {

Advance(2);

uc32 value;

if (ParseUnicodeEscape(&value)) {

- if (value > unibrow::Utf16::kMaxNonSurrogateCharCode) {

- builder->AddCharacter(unibrow::Utf16::LeadSurrogate(value));

- builder->AddCharacter(unibrow::Utf16::TrailSurrogate(value));

- } else {

- builder->AddCharacter(static_cast<uc16>(value));

- }

+ builder->AddUnicodeCharacter(value);

} else if (!FLAG_harmony_unicode_regexps || !unicode_) {

builder->AddCharacter('u');

} else {

@@ -457,7 +462,11 @@ RegExpTree* RegExpParser::ParseDisjunction() {

// fallthrough

}

default:

- builder->AddCharacter(current());

+ if (unicode_) {

rossberg 2016/01/11 12:25:41 Nit: is this if necessary? Can't you always use Ad

Yang 2016/01/11 14:41:44 Good point. Done.

+ builder->AddUnicodeCharacter(current());

+ } else {

+ builder->AddCharacter(current());

+ }

Advance();

break;

} // end switch(current())

@@ -1059,6 +1068,19 @@ void RegExpBuilder::AddCharacter(uc16 c) {

}

+void RegExpBuilder::AddUnicodeCharacter(uc32 c) {

+ if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {

+ ZoneList<uc16> surrogate_pair(2, zone());

+ surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone());

+ surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone());

+ RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector());

+ AddAtom(atom);

+ } else {

+ AddCharacter(static_cast<uc16>(c));

+ }

void RegExpBuilder::AddEmpty() { pending_empty_ = true; }

« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »