Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(660)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 1568623004: [regexp] correctly parse non-BMP unicode escapes in atoms. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: rebase correctly Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/parsing/scanner-character-streams.cc ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 399 matching lines...) Expand 10 before | Expand all | Expand 10 after
410 // If the 'u' flag is present, invalid escapes are not treated as 410 // If the 'u' flag is present, invalid escapes are not treated as
411 // identity escapes. 411 // identity escapes.
412 return ReportError(CStrVector("Invalid escape")); 412 return ReportError(CStrVector("Invalid escape"));
413 } 413 }
414 break; 414 break;
415 } 415 }
416 case 'u': { 416 case 'u': {
417 Advance(2); 417 Advance(2);
418 uc32 value; 418 uc32 value;
419 if (ParseUnicodeEscape(&value)) { 419 if (ParseUnicodeEscape(&value)) {
420 builder->AddCharacter(value); 420 if (value > unibrow::Utf16::kMaxNonSurrogateCharCode) {
421 builder->AddCharacter(unibrow::Utf16::LeadSurrogate(value));
422 builder->AddCharacter(unibrow::Utf16::TrailSurrogate(value));
423 } else {
424 builder->AddCharacter(static_cast<uc16>(value));
425 }
421 } else if (!unicode_) { 426 } else if (!unicode_) {
422 builder->AddCharacter('u'); 427 builder->AddCharacter('u');
423 } else { 428 } else {
424 // If the 'u' flag is present, invalid escapes are not treated as 429 // If the 'u' flag is present, invalid escapes are not treated as
425 // identity escapes. 430 // identity escapes.
426 return ReportError(CStrVector("Invalid unicode escape")); 431 return ReportError(CStrVector("Invalid unicode escape"));
427 } 432 }
428 break; 433 break;
429 } 434 }
430 default: 435 default:
(...skipping 548 matching lines...) Expand 10 before | Expand all | Expand 10 after
979 bool unicode, RegExpCompileData* result) { 984 bool unicode, RegExpCompileData* result) {
980 DCHECK(result != NULL); 985 DCHECK(result != NULL);
981 RegExpParser parser(input, &result->error, multiline, unicode, isolate, zone); 986 RegExpParser parser(input, &result->error, multiline, unicode, isolate, zone);
982 RegExpTree* tree = parser.ParsePattern(); 987 RegExpTree* tree = parser.ParsePattern();
983 if (parser.failed()) { 988 if (parser.failed()) {
984 DCHECK(tree == NULL); 989 DCHECK(tree == NULL);
985 DCHECK(!result->error.is_null()); 990 DCHECK(!result->error.is_null());
986 } else { 991 } else {
987 DCHECK(tree != NULL); 992 DCHECK(tree != NULL);
988 DCHECK(result->error.is_null()); 993 DCHECK(result->error.is_null());
994 if (FLAG_trace_regexp_parser) {
995 OFStream os(stdout);
996 tree->Print(os, zone);
997 os << "\n";
998 }
989 result->tree = tree; 999 result->tree = tree;
990 int capture_count = parser.captures_started(); 1000 int capture_count = parser.captures_started();
991 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; 1001 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;
992 result->contains_anchor = parser.contains_anchor(); 1002 result->contains_anchor = parser.contains_anchor();
993 result->capture_count = capture_count; 1003 result->capture_count = capture_count;
994 } 1004 }
995 return !parser.failed(); 1005 return !parser.failed();
996 } 1006 }
997 1007
998 1008
(...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after
1143 UNREACHABLE(); 1153 UNREACHABLE();
1144 return; 1154 return;
1145 } 1155 }
1146 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1156 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1147 zone()); 1157 zone());
1148 LAST(ADD_TERM); 1158 LAST(ADD_TERM);
1149 } 1159 }
1150 1160
1151 } // namespace internal 1161 } // namespace internal
1152 } // namespace v8 1162 } // namespace v8
OLDNEW
« no previous file with comments | « src/parsing/scanner-character-streams.cc ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698