Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1427)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 1571563003: [regexp] quantifier refers to the surrogate pair in unicode regexp. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@unicoderegexpatom
Patch Set: rebase Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
49 void RegExpParser::Advance() { 49 void RegExpParser::Advance() {
50 if (next_pos_ < in()->length()) { 50 if (next_pos_ < in()->length()) {
51 StackLimitCheck check(isolate()); 51 StackLimitCheck check(isolate());
52 if (check.HasOverflowed()) { 52 if (check.HasOverflowed()) {
53 ReportError(CStrVector(Isolate::kStackOverflowMessage)); 53 ReportError(CStrVector(Isolate::kStackOverflowMessage));
54 } else if (zone()->excess_allocation()) { 54 } else if (zone()->excess_allocation()) {
55 ReportError(CStrVector("Regular expression too large")); 55 ReportError(CStrVector("Regular expression too large"));
56 } else { 56 } else {
57 current_ = in()->Get(next_pos_); 57 current_ = in()->Get(next_pos_);
58 next_pos_++; 58 next_pos_++;
59 // Read the whole surrogate pair in case of unicode flag, if possible.
60 if (unicode_ && next_pos_ < in()->length() &&
61 unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) {
62 uc16 trail = in()->Get(next_pos_);
63 if (unibrow::Utf16::IsTrailSurrogate(trail)) {
64 current_ = unibrow::Utf16::CombineSurrogatePair(
65 static_cast<uc16>(current_), trail);
66 next_pos_++;
67 }
68 }
59 } 69 }
60 } else { 70 } else {
61 current_ = kEndMarker; 71 current_ = kEndMarker;
62 // Advance so that position() points to 1-after-the-last-character. This is 72 // Advance so that position() points to 1-after-the-last-character. This is
63 // important so that Reset() to this position works correctly. 73 // important so that Reset() to this position works correctly.
64 next_pos_ = in()->length() + 1; 74 next_pos_ = in()->length() + 1;
65 has_more_ = false; 75 has_more_ = false;
66 } 76 }
67 } 77 }
68 78
(...skipping 341 matching lines...) Expand 10 before | Expand all | Expand 10 after
410 // If the 'u' flag is present, invalid escapes are not treated as 420 // If the 'u' flag is present, invalid escapes are not treated as
411 // identity escapes. 421 // identity escapes.
412 return ReportError(CStrVector("Invalid escape")); 422 return ReportError(CStrVector("Invalid escape"));
413 } 423 }
414 break; 424 break;
415 } 425 }
416 case 'u': { 426 case 'u': {
417 Advance(2); 427 Advance(2);
418 uc32 value; 428 uc32 value;
419 if (ParseUnicodeEscape(&value)) { 429 if (ParseUnicodeEscape(&value)) {
420 if (value > unibrow::Utf16::kMaxNonSurrogateCharCode) { 430 builder->AddUnicodeCharacter(value);
421 builder->AddCharacter(unibrow::Utf16::LeadSurrogate(value));
422 builder->AddCharacter(unibrow::Utf16::TrailSurrogate(value));
423 } else {
424 builder->AddCharacter(static_cast<uc16>(value));
425 }
426 } else if (!unicode_) { 431 } else if (!unicode_) {
427 builder->AddCharacter('u'); 432 builder->AddCharacter('u');
428 } else { 433 } else {
429 // If the 'u' flag is present, invalid escapes are not treated as 434 // If the 'u' flag is present, invalid escapes are not treated as
430 // identity escapes. 435 // identity escapes.
431 return ReportError(CStrVector("Invalid unicode escape")); 436 return ReportError(CStrVector("Invalid unicode escape"));
432 } 437 }
433 break; 438 break;
434 } 439 }
435 default: 440 default:
(...skipping 13 matching lines...) Expand all
449 } 454 }
450 break; 455 break;
451 case '{': { 456 case '{': {
452 int dummy; 457 int dummy;
453 if (ParseIntervalQuantifier(&dummy, &dummy)) { 458 if (ParseIntervalQuantifier(&dummy, &dummy)) {
454 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); 459 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);
455 } 460 }
456 // fallthrough 461 // fallthrough
457 } 462 }
458 default: 463 default:
459 builder->AddCharacter(current()); 464 builder->AddUnicodeCharacter(current());
460 Advance(); 465 Advance();
461 break; 466 break;
462 } // end switch(current()) 467 } // end switch(current())
463 468
464 int min; 469 int min;
465 int max; 470 int max;
466 switch (current()) { 471 switch (current()) {
467 // QuantifierPrefix :: 472 // QuantifierPrefix ::
468 // * 473 // *
469 // + 474 // +
(...skipping 580 matching lines...) Expand 10 before | Expand all | Expand 10 after
1050 void RegExpBuilder::AddCharacter(uc16 c) { 1055 void RegExpBuilder::AddCharacter(uc16 c) {
1051 pending_empty_ = false; 1056 pending_empty_ = false;
1052 if (characters_ == NULL) { 1057 if (characters_ == NULL) {
1053 characters_ = new (zone()) ZoneList<uc16>(4, zone()); 1058 characters_ = new (zone()) ZoneList<uc16>(4, zone());
1054 } 1059 }
1055 characters_->Add(c, zone()); 1060 characters_->Add(c, zone());
1056 LAST(ADD_CHAR); 1061 LAST(ADD_CHAR);
1057 } 1062 }
1058 1063
1059 1064
1065 void RegExpBuilder::AddUnicodeCharacter(uc32 c) {
1066 if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
1067 ZoneList<uc16> surrogate_pair(2, zone());
1068 surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone());
1069 surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone());
1070 RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector());
1071 AddAtom(atom);
1072 } else {
1073 AddCharacter(static_cast<uc16>(c));
1074 }
1075 }
1076
1077
1060 void RegExpBuilder::AddEmpty() { pending_empty_ = true; } 1078 void RegExpBuilder::AddEmpty() { pending_empty_ = true; }
1061 1079
1062 1080
1063 void RegExpBuilder::AddAtom(RegExpTree* term) { 1081 void RegExpBuilder::AddAtom(RegExpTree* term) {
1064 if (term->IsEmpty()) { 1082 if (term->IsEmpty()) {
1065 AddEmpty(); 1083 AddEmpty();
1066 return; 1084 return;
1067 } 1085 }
1068 if (term->IsTextElement()) { 1086 if (term->IsTextElement()) {
1069 FlushCharacters(); 1087 FlushCharacters();
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
1153 UNREACHABLE(); 1171 UNREACHABLE();
1154 return; 1172 return;
1155 } 1173 }
1156 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1174 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1157 zone()); 1175 zone());
1158 LAST(ADD_TERM); 1176 LAST(ADD_TERM);
1159 } 1177 }
1160 1178
1161 } // namespace internal 1179 } // namespace internal
1162 } // namespace v8 1180 } // namespace v8
OLDNEW
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698