Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 1571563003: [regexp] quantifier refers to the surrogate pair in unicode regexp. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@unicoderegexpatom
Patch Set: add parse tests Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
49 void RegExpParser::Advance() { 49 void RegExpParser::Advance() {
50 if (next_pos_ < in()->length()) { 50 if (next_pos_ < in()->length()) {
51 StackLimitCheck check(isolate()); 51 StackLimitCheck check(isolate());
52 if (check.HasOverflowed()) { 52 if (check.HasOverflowed()) {
53 ReportError(CStrVector(Isolate::kStackOverflowMessage)); 53 ReportError(CStrVector(Isolate::kStackOverflowMessage));
54 } else if (zone()->excess_allocation()) { 54 } else if (zone()->excess_allocation()) {
55 ReportError(CStrVector("Regular expression too large")); 55 ReportError(CStrVector("Regular expression too large"));
56 } else { 56 } else {
57 current_ = in()->Get(next_pos_); 57 current_ = in()->Get(next_pos_);
58 next_pos_++; 58 next_pos_++;
59 // Read the whole surrogate pair in case of unicode flag, if possible.
60 if (unicode_ && next_pos_ < in()->length() &&
61 unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) {
62 uc16 trail = in()->Get(next_pos_);
63 if (unibrow::Utf16::IsTrailSurrogate(trail)) {
64 current_ = unibrow::Utf16::CombineSurrogatePair(
65 static_cast<uc16>(current_), trail);
66 next_pos_++;
67 }
68 }
59 } 69 }
60 } else { 70 } else {
61 current_ = kEndMarker; 71 current_ = kEndMarker;
62 // Advance so that position() points to 1-after-the-last-character. This is 72 // Advance so that position() points to 1-after-the-last-character. This is
63 // important so that Reset() to this position works correctly. 73 // important so that Reset() to this position works correctly.
64 next_pos_ = in()->length() + 1; 74 next_pos_ = in()->length() + 1;
65 has_more_ = false; 75 has_more_ = false;
66 } 76 }
67 } 77 }
68 78
(...skipping 341 matching lines...) Expand 10 before | Expand all | Expand 10 after
410 // If the 'u' flag is present, invalid escapes are not treated as 420 // If the 'u' flag is present, invalid escapes are not treated as
411 // identity escapes. 421 // identity escapes.
412 return ReportError(CStrVector("Invalid escape")); 422 return ReportError(CStrVector("Invalid escape"));
413 } 423 }
414 break; 424 break;
415 } 425 }
416 case 'u': { 426 case 'u': {
417 Advance(2); 427 Advance(2);
418 uc32 value; 428 uc32 value;
419 if (ParseUnicodeEscape(&value)) { 429 if (ParseUnicodeEscape(&value)) {
420 if (value > unibrow::Utf16::kMaxNonSurrogateCharCode) { 430 builder->AddUnicodeCharacter(value);
421 builder->AddCharacter(unibrow::Utf16::LeadSurrogate(value));
422 builder->AddCharacter(unibrow::Utf16::TrailSurrogate(value));
423 } else {
424 builder->AddCharacter(static_cast<uc16>(value));
425 }
426 } else if (!FLAG_harmony_unicode_regexps || !unicode_) { 431 } else if (!FLAG_harmony_unicode_regexps || !unicode_) {
427 builder->AddCharacter('u'); 432 builder->AddCharacter('u');
428 } else { 433 } else {
429 // If the 'u' flag is present, invalid escapes are not treated as 434 // If the 'u' flag is present, invalid escapes are not treated as
430 // identity escapes. 435 // identity escapes.
431 return ReportError(CStrVector("Invalid unicode escape")); 436 return ReportError(CStrVector("Invalid unicode escape"));
432 } 437 }
433 break; 438 break;
434 } 439 }
435 default: 440 default:
(...skipping 14 matching lines...) Expand all
450 } 455 }
451 break; 456 break;
452 case '{': { 457 case '{': {
453 int dummy; 458 int dummy;
454 if (ParseIntervalQuantifier(&dummy, &dummy)) { 459 if (ParseIntervalQuantifier(&dummy, &dummy)) {
455 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); 460 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);
456 } 461 }
457 // fallthrough 462 // fallthrough
458 } 463 }
459 default: 464 default:
460 builder->AddCharacter(current()); 465 if (unicode_) {
rossberg 2016/01/11 12:25:41 Nit: is this if necessary? Can't you always use Ad
Yang 2016/01/11 14:41:44 Good point. Done.
466 builder->AddUnicodeCharacter(current());
467 } else {
468 builder->AddCharacter(current());
469 }
461 Advance(); 470 Advance();
462 break; 471 break;
463 } // end switch(current()) 472 } // end switch(current())
464 473
465 int min; 474 int min;
466 int max; 475 int max;
467 switch (current()) { 476 switch (current()) {
468 // QuantifierPrefix :: 477 // QuantifierPrefix ::
469 // * 478 // *
470 // + 479 // +
(...skipping 581 matching lines...) Expand 10 before | Expand all | Expand 10 after
1052 void RegExpBuilder::AddCharacter(uc16 c) { 1061 void RegExpBuilder::AddCharacter(uc16 c) {
1053 pending_empty_ = false; 1062 pending_empty_ = false;
1054 if (characters_ == NULL) { 1063 if (characters_ == NULL) {
1055 characters_ = new (zone()) ZoneList<uc16>(4, zone()); 1064 characters_ = new (zone()) ZoneList<uc16>(4, zone());
1056 } 1065 }
1057 characters_->Add(c, zone()); 1066 characters_->Add(c, zone());
1058 LAST(ADD_CHAR); 1067 LAST(ADD_CHAR);
1059 } 1068 }
1060 1069
1061 1070
1071 void RegExpBuilder::AddUnicodeCharacter(uc32 c) {
1072 if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
1073 ZoneList<uc16> surrogate_pair(2, zone());
1074 surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone());
1075 surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone());
1076 RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector());
1077 AddAtom(atom);
1078 } else {
1079 AddCharacter(static_cast<uc16>(c));
1080 }
1081 }
1082
1083
1062 void RegExpBuilder::AddEmpty() { pending_empty_ = true; } 1084 void RegExpBuilder::AddEmpty() { pending_empty_ = true; }
1063 1085
1064 1086
1065 void RegExpBuilder::AddAtom(RegExpTree* term) { 1087 void RegExpBuilder::AddAtom(RegExpTree* term) {
1066 if (term->IsEmpty()) { 1088 if (term->IsEmpty()) {
1067 AddEmpty(); 1089 AddEmpty();
1068 return; 1090 return;
1069 } 1091 }
1070 if (term->IsTextElement()) { 1092 if (term->IsTextElement()) {
1071 FlushCharacters(); 1093 FlushCharacters();
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
1155 UNREACHABLE(); 1177 UNREACHABLE();
1156 return; 1178 return;
1157 } 1179 }
1158 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1180 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1159 zone()); 1181 zone());
1160 LAST(ADD_TERM); 1182 LAST(ADD_TERM);
1161 } 1183 }
1162 1184
1163 } // namespace internal 1185 } // namespace internal
1164 } // namespace v8 1186 } // namespace v8
OLDNEW
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698