Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(162)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 2793313002: [regexp] Add tests for recent changes in Annex B (Closed)
Patch Set: Rebase and address comments Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | test/mjsunit/regexp.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 484 matching lines...) Expand 10 before | Expand all | Expand 10 after
495 builder->AddCharacter('\v'); 495 builder->AddCharacter('\v');
496 break; 496 break;
497 case 'c': { 497 case 'c': {
498 Advance(); 498 Advance();
499 uc32 controlLetter = Next(); 499 uc32 controlLetter = Next();
500 // Special case if it is an ASCII letter. 500 // Special case if it is an ASCII letter.
501 // Convert lower case letters to uppercase. 501 // Convert lower case letters to uppercase.
502 uc32 letter = controlLetter & ~('a' ^ 'A'); 502 uc32 letter = controlLetter & ~('a' ^ 'A');
503 if (letter < 'A' || 'Z' < letter) { 503 if (letter < 'A' || 'Z' < letter) {
504 // controlLetter is not in range 'A'-'Z' or 'a'-'z'. 504 // controlLetter is not in range 'A'-'Z' or 'a'-'z'.
505 // This is outside the specification. We match JSC in 505 // Read the backslash as a literal character instead of as
506 // reading the backslash as a literal character instead 506 // starting an escape.
507 // of as starting an escape. 507 // ES#prod-annexB-ExtendedPatternCharacter
508 if (unicode()) { 508 if (unicode()) {
509 // With /u, invalid escapes are not treated as identity escapes. 509 // With /u, invalid escapes are not treated as identity escapes.
510 return ReportError(CStrVector("Invalid unicode escape")); 510 return ReportError(CStrVector("Invalid unicode escape"));
511 } 511 }
512 builder->AddCharacter('\\'); 512 builder->AddCharacter('\\');
513 } else { 513 } else {
514 Advance(2); 514 Advance(2);
515 builder->AddCharacter(controlLetter & 0x1f); 515 builder->AddCharacter(controlLetter & 0x1f);
516 } 516 }
517 break; 517 break;
(...skipping 519 matching lines...) Expand 10 before | Expand all | Expand 10 after
1037 *min_out = min; 1037 *min_out = min;
1038 *max_out = max; 1038 *max_out = max;
1039 return true; 1039 return true;
1040 } 1040 }
1041 1041
1042 1042
1043 uc32 RegExpParser::ParseOctalLiteral() { 1043 uc32 RegExpParser::ParseOctalLiteral() {
1044 DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker); 1044 DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker);
1045 // For compatibility with some other browsers (not all), we parse 1045 // For compatibility with some other browsers (not all), we parse
1046 // up to three octal digits with a value below 256. 1046 // up to three octal digits with a value below 256.
1047 // ES#prod-annexB-LegacyOctalEscapeSequence
1047 uc32 value = current() - '0'; 1048 uc32 value = current() - '0';
1048 Advance(); 1049 Advance();
1049 if ('0' <= current() && current() <= '7') { 1050 if ('0' <= current() && current() <= '7') {
1050 value = value * 8 + current() - '0'; 1051 value = value * 8 + current() - '0';
1051 Advance(); 1052 Advance();
1052 if (value < 32 && '0' <= current() && current() <= '7') { 1053 if (value < 32 && '0' <= current() && current() <= '7') {
1053 value = value * 8 + current() - '0'; 1054 value = value * 8 + current() - '0';
1054 Advance(); 1055 Advance();
1055 } 1056 }
1056 } 1057 }
(...skipping 268 matching lines...) Expand 10 before | Expand all | Expand 10 after
1325 return '\r'; 1326 return '\r';
1326 case 't': 1327 case 't':
1327 Advance(); 1328 Advance();
1328 return '\t'; 1329 return '\t';
1329 case 'v': 1330 case 'v':
1330 Advance(); 1331 Advance();
1331 return '\v'; 1332 return '\v';
1332 case 'c': { 1333 case 'c': {
1333 uc32 controlLetter = Next(); 1334 uc32 controlLetter = Next();
1334 uc32 letter = controlLetter & ~('A' ^ 'a'); 1335 uc32 letter = controlLetter & ~('A' ^ 'a');
1335 // For compatibility with JSC, inside a character class. We also accept 1336 // Inside a character class, we also accept digits and underscore as
1336 // digits and underscore as control characters, unless with /u. 1337 // control characters, unless with /u. See Annex B:
1338 // ES#prod-annexB-ClassControlLetter
1337 if (letter >= 'A' && letter <= 'Z') { 1339 if (letter >= 'A' && letter <= 'Z') {
1338 Advance(2); 1340 Advance(2);
1339 // Control letters mapped to ASCII control characters in the range 1341 // Control letters mapped to ASCII control characters in the range
1340 // 0x00-0x1f. 1342 // 0x00-0x1f.
1341 return controlLetter & 0x1f; 1343 return controlLetter & 0x1f;
1342 } 1344 }
1343 if (unicode()) { 1345 if (unicode()) {
1344 // With /u, invalid escapes are not treated as identity escapes. 1346 // With /u, invalid escapes are not treated as identity escapes.
1345 ReportError(CStrVector("Invalid class escape")); 1347 ReportError(CStrVector("Invalid class escape"));
1346 return 0; 1348 return 0;
1347 } 1349 }
1348 if ((controlLetter >= '0' && controlLetter <= '9') || 1350 if ((controlLetter >= '0' && controlLetter <= '9') ||
1349 controlLetter == '_') { 1351 controlLetter == '_') {
1350 Advance(2); 1352 Advance(2);
1351 return controlLetter & 0x1f; 1353 return controlLetter & 0x1f;
1352 } 1354 }
1353 // We match JSC in reading the backslash as a literal 1355 // We match JSC in reading the backslash as a literal
1354 // character instead of as starting an escape. 1356 // character instead of as starting an escape.
1357 // TODO(v8:6201): Not yet covered by the spec.
1355 return '\\'; 1358 return '\\';
1356 } 1359 }
1357 case '0': 1360 case '0':
1358 // With /u, \0 is interpreted as NUL if not followed by another digit. 1361 // With /u, \0 is interpreted as NUL if not followed by another digit.
1359 if (unicode() && !(Next() >= '0' && Next() <= '9')) { 1362 if (unicode() && !(Next() >= '0' && Next() <= '9')) {
1360 Advance(); 1363 Advance();
1361 return 0; 1364 return 0;
1362 } 1365 }
1363 // Fall through. 1366 // Fall through.
1364 case '1': 1367 case '1':
1365 case '2': 1368 case '2':
1366 case '3': 1369 case '3':
1367 case '4': 1370 case '4':
1368 case '5': 1371 case '5':
1369 case '6': 1372 case '6':
1370 case '7': 1373 case '7':
1371 // For compatibility, we interpret a decimal escape that isn't 1374 // For compatibility, we interpret a decimal escape that isn't
1372 // a back reference (and therefore either \0 or not valid according 1375 // a back reference (and therefore either \0 or not valid according
1373 // to the specification) as a 1..3 digit octal character code. 1376 // to the specification) as a 1..3 digit octal character code.
1377 // ES#prod-annexB-LegacyOctalEscapeSequence
1374 if (unicode()) { 1378 if (unicode()) {
1375 // With /u, decimal escape is not interpreted as octal character code. 1379 // With /u, decimal escape is not interpreted as octal character code.
1376 ReportError(CStrVector("Invalid class escape")); 1380 ReportError(CStrVector("Invalid class escape"));
1377 return 0; 1381 return 0;
1378 } 1382 }
1379 return ParseOctalLiteral(); 1383 return ParseOctalLiteral();
1380 case 'x': { 1384 case 'x': {
1381 Advance(); 1385 Advance();
1382 uc32 value; 1386 uc32 value;
1383 if (ParseHexEscape(2, &value)) return value; 1387 if (ParseHexEscape(2, &value)) return value;
(...skipping 486 matching lines...) Expand 10 before | Expand all | Expand 10 after
1870 return false; 1874 return false;
1871 } 1875 }
1872 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1876 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1873 zone()); 1877 zone());
1874 LAST(ADD_TERM); 1878 LAST(ADD_TERM);
1875 return true; 1879 return true;
1876 } 1880 }
1877 1881
1878 } // namespace internal 1882 } // namespace internal
1879 } // namespace v8 1883 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | test/mjsunit/regexp.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698