src/regexp/regexp-parser.cc - Issue 1571563003: [regexp] quantifier refers to the surrogate pair in unicode regexp.

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 1571563003: [regexp] quantifier refers to the surrogate pair in unicode regexp. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@unicoderegexpatom

Patch Set: rebase Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/regexp/regexp-parser.h"	5 #include "src/regexp/regexp-parser.h"

6	6

7 #include "src/char-predicates-inl.h"	7 #include "src/char-predicates-inl.h"

8 #include "src/factory.h"	8 #include "src/factory.h"

9 #include "src/isolate.h"	9 #include "src/isolate.h"

10 #include "src/objects-inl.h"	10 #include "src/objects-inl.h"

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
49 void RegExpParser::Advance() {	49 void RegExpParser::Advance() {

50 if (next_pos_ < in()->length()) {	50 if (next_pos_ < in()->length()) {

51 StackLimitCheck check(isolate());	51 StackLimitCheck check(isolate());

52 if (check.HasOverflowed()) {	52 if (check.HasOverflowed()) {

53 ReportError(CStrVector(Isolate::kStackOverflowMessage));	53 ReportError(CStrVector(Isolate::kStackOverflowMessage));

54 } else if (zone()->excess_allocation()) {	54 } else if (zone()->excess_allocation()) {

55 ReportError(CStrVector("Regular expression too large"));	55 ReportError(CStrVector("Regular expression too large"));

56 } else {	56 } else {

57 current_ = in()->Get(next_pos_);	57 current_ = in()->Get(next_pos_);

58 next_pos_++;	58 next_pos_++;

	59 // Read the whole surrogate pair in case of unicode flag, if possible.

	60 if (unicode_ && next_pos_ < in()->length() &&

	61 unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) {

	62 uc16 trail = in()->Get(next_pos_);

	63 if (unibrow::Utf16::IsTrailSurrogate(trail)) {

	64 current_ = unibrow::Utf16::CombineSurrogatePair(

	65 static_cast<uc16>(current_), trail);

	66 next_pos_++;

	67 }

	68 }

59 }	69 }

60 } else {	70 } else {

61 current_ = kEndMarker;	71 current_ = kEndMarker;

62 // Advance so that position() points to 1-after-the-last-character. This is	72 // Advance so that position() points to 1-after-the-last-character. This is

63 // important so that Reset() to this position works correctly.	73 // important so that Reset() to this position works correctly.

64 next_pos_ = in()->length() + 1;	74 next_pos_ = in()->length() + 1;

65 has_more_ = false;	75 has_more_ = false;

66 }	76 }

67 }	77 }

68	78

(...skipping 341 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
410 // If the 'u' flag is present, invalid escapes are not treated as	420 // If the 'u' flag is present, invalid escapes are not treated as

411 // identity escapes.	421 // identity escapes.

412 return ReportError(CStrVector("Invalid escape"));	422 return ReportError(CStrVector("Invalid escape"));

413 }	423 }

414 break;	424 break;

415 }	425 }

416 case 'u': {	426 case 'u': {

417 Advance(2);	427 Advance(2);

418 uc32 value;	428 uc32 value;

419 if (ParseUnicodeEscape(&value)) {	429 if (ParseUnicodeEscape(&value)) {

420 if (value > unibrow::Utf16::kMaxNonSurrogateCharCode) {	430 builder->AddUnicodeCharacter(value);

421 builder->AddCharacter(unibrow::Utf16::LeadSurrogate(value));

422 builder->AddCharacter(unibrow::Utf16::TrailSurrogate(value));

423 } else {

424 builder->AddCharacter(static_cast<uc16>(value));

425 }

426 } else if (!unicode_) {	431 } else if (!unicode_) {

427 builder->AddCharacter('u');	432 builder->AddCharacter('u');

428 } else {	433 } else {

429 // If the 'u' flag is present, invalid escapes are not treated as	434 // If the 'u' flag is present, invalid escapes are not treated as

430 // identity escapes.	435 // identity escapes.

431 return ReportError(CStrVector("Invalid unicode escape"));	436 return ReportError(CStrVector("Invalid unicode escape"));

432 }	437 }

433 break;	438 break;

434 }	439 }

435 default:	440 default:

(...skipping 13 matching lines...) Expand all Loading...
449 }	454 }

450 break;	455 break;

451 case '{': {	456 case '{': {

452 int dummy;	457 int dummy;

453 if (ParseIntervalQuantifier(&dummy, &dummy)) {	458 if (ParseIntervalQuantifier(&dummy, &dummy)) {

454 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);	459 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);

455 }	460 }

456 // fallthrough	461 // fallthrough

457 }	462 }

458 default:	463 default:

459 builder->AddCharacter(current());	464 builder->AddUnicodeCharacter(current());

460 Advance();	465 Advance();

461 break;	466 break;

462 } // end switch(current())	467 } // end switch(current())

463	468

464 int min;	469 int min;

465 int max;	470 int max;

466 switch (current()) {	471 switch (current()) {

467 // QuantifierPrefix ::	472 // QuantifierPrefix ::

468 // *	473 // *

469 // +	474 // +

(...skipping 580 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1050 void RegExpBuilder::AddCharacter(uc16 c) {	1055 void RegExpBuilder::AddCharacter(uc16 c) {

1051 pending_empty_ = false;	1056 pending_empty_ = false;

1052 if (characters_ == NULL) {	1057 if (characters_ == NULL) {

1053 characters_ = new (zone()) ZoneList<uc16>(4, zone());	1058 characters_ = new (zone()) ZoneList<uc16>(4, zone());

1054 }	1059 }

1055 characters_->Add(c, zone());	1060 characters_->Add(c, zone());

1056 LAST(ADD_CHAR);	1061 LAST(ADD_CHAR);

1057 }	1062 }

1058	1063

1059	1064

	1065 void RegExpBuilder::AddUnicodeCharacter(uc32 c) {

	1066 if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {

	1067 ZoneList<uc16> surrogate_pair(2, zone());

	1068 surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone());

	1069 surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone());

	1070 RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector());

	1071 AddAtom(atom);

	1072 } else {

	1073 AddCharacter(static_cast<uc16>(c));

	1074 }

	1075 }

	1076

	1077

1060 void RegExpBuilder::AddEmpty() { pending_empty_ = true; }	1078 void RegExpBuilder::AddEmpty() { pending_empty_ = true; }

1061	1079

1062	1080

1063 void RegExpBuilder::AddAtom(RegExpTree* term) {	1081 void RegExpBuilder::AddAtom(RegExpTree* term) {

1064 if (term->IsEmpty()) {	1082 if (term->IsEmpty()) {

1065 AddEmpty();	1083 AddEmpty();

1066 return;	1084 return;

1067 }	1085 }

1068 if (term->IsTextElement()) {	1086 if (term->IsTextElement()) {

1069 FlushCharacters();	1087 FlushCharacters();

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1153 UNREACHABLE();	1171 UNREACHABLE();

1154 return;	1172 return;

1155 }	1173 }

1156 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),	1174 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),

1157 zone());	1175 zone());

1158 LAST(ADD_TERM);	1176 LAST(ADD_TERM);

1159 }	1177 }

1160	1178

1161 } // namespace internal	1179 } // namespace internal

1162 } // namespace v8	1180 } // namespace v8

OLD	NEW

« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »