OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/factory.h" | 8 #include "src/factory.h" |
9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
(...skipping 484 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
495 builder->AddCharacter('\v'); | 495 builder->AddCharacter('\v'); |
496 break; | 496 break; |
497 case 'c': { | 497 case 'c': { |
498 Advance(); | 498 Advance(); |
499 uc32 controlLetter = Next(); | 499 uc32 controlLetter = Next(); |
500 // Special case if it is an ASCII letter. | 500 // Special case if it is an ASCII letter. |
501 // Convert lower case letters to uppercase. | 501 // Convert lower case letters to uppercase. |
502 uc32 letter = controlLetter & ~('a' ^ 'A'); | 502 uc32 letter = controlLetter & ~('a' ^ 'A'); |
503 if (letter < 'A' || 'Z' < letter) { | 503 if (letter < 'A' || 'Z' < letter) { |
504 // controlLetter is not in range 'A'-'Z' or 'a'-'z'. | 504 // controlLetter is not in range 'A'-'Z' or 'a'-'z'. |
505 // This is outside the specification. We match JSC in | 505 // Read the backslash as a literal character instead of as |
506 // reading the backslash as a literal character instead | 506 // starting an escape. |
507 // of as starting an escape. | 507 // ES#prod-annexB-ExtendedPatternCharacter |
508 if (unicode()) { | 508 if (unicode()) { |
509 // With /u, invalid escapes are not treated as identity escapes. | 509 // With /u, invalid escapes are not treated as identity escapes. |
510 return ReportError(CStrVector("Invalid unicode escape")); | 510 return ReportError(CStrVector("Invalid unicode escape")); |
511 } | 511 } |
512 builder->AddCharacter('\\'); | 512 builder->AddCharacter('\\'); |
513 } else { | 513 } else { |
514 Advance(2); | 514 Advance(2); |
515 builder->AddCharacter(controlLetter & 0x1f); | 515 builder->AddCharacter(controlLetter & 0x1f); |
516 } | 516 } |
517 break; | 517 break; |
(...skipping 519 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1037 *min_out = min; | 1037 *min_out = min; |
1038 *max_out = max; | 1038 *max_out = max; |
1039 return true; | 1039 return true; |
1040 } | 1040 } |
1041 | 1041 |
1042 | 1042 |
1043 uc32 RegExpParser::ParseOctalLiteral() { | 1043 uc32 RegExpParser::ParseOctalLiteral() { |
1044 DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker); | 1044 DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker); |
1045 // For compatibility with some other browsers (not all), we parse | 1045 // For compatibility with some other browsers (not all), we parse |
1046 // up to three octal digits with a value below 256. | 1046 // up to three octal digits with a value below 256. |
| 1047 // ES#prod-annexB-LegacyOctalEscapeSequence |
1047 uc32 value = current() - '0'; | 1048 uc32 value = current() - '0'; |
1048 Advance(); | 1049 Advance(); |
1049 if ('0' <= current() && current() <= '7') { | 1050 if ('0' <= current() && current() <= '7') { |
1050 value = value * 8 + current() - '0'; | 1051 value = value * 8 + current() - '0'; |
1051 Advance(); | 1052 Advance(); |
1052 if (value < 32 && '0' <= current() && current() <= '7') { | 1053 if (value < 32 && '0' <= current() && current() <= '7') { |
1053 value = value * 8 + current() - '0'; | 1054 value = value * 8 + current() - '0'; |
1054 Advance(); | 1055 Advance(); |
1055 } | 1056 } |
1056 } | 1057 } |
(...skipping 268 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1325 return '\r'; | 1326 return '\r'; |
1326 case 't': | 1327 case 't': |
1327 Advance(); | 1328 Advance(); |
1328 return '\t'; | 1329 return '\t'; |
1329 case 'v': | 1330 case 'v': |
1330 Advance(); | 1331 Advance(); |
1331 return '\v'; | 1332 return '\v'; |
1332 case 'c': { | 1333 case 'c': { |
1333 uc32 controlLetter = Next(); | 1334 uc32 controlLetter = Next(); |
1334 uc32 letter = controlLetter & ~('A' ^ 'a'); | 1335 uc32 letter = controlLetter & ~('A' ^ 'a'); |
1335 // For compatibility with JSC, inside a character class. We also accept | 1336 // Inside a character class, we also accept digits and underscore as |
1336 // digits and underscore as control characters, unless with /u. | 1337 // control characters, unless with /u. See Annex B: |
| 1338 // ES#prod-annexB-ClassControlLetter |
1337 if (letter >= 'A' && letter <= 'Z') { | 1339 if (letter >= 'A' && letter <= 'Z') { |
1338 Advance(2); | 1340 Advance(2); |
1339 // Control letters mapped to ASCII control characters in the range | 1341 // Control letters mapped to ASCII control characters in the range |
1340 // 0x00-0x1f. | 1342 // 0x00-0x1f. |
1341 return controlLetter & 0x1f; | 1343 return controlLetter & 0x1f; |
1342 } | 1344 } |
1343 if (unicode()) { | 1345 if (unicode()) { |
1344 // With /u, invalid escapes are not treated as identity escapes. | 1346 // With /u, invalid escapes are not treated as identity escapes. |
1345 ReportError(CStrVector("Invalid class escape")); | 1347 ReportError(CStrVector("Invalid class escape")); |
1346 return 0; | 1348 return 0; |
1347 } | 1349 } |
1348 if ((controlLetter >= '0' && controlLetter <= '9') || | 1350 if ((controlLetter >= '0' && controlLetter <= '9') || |
1349 controlLetter == '_') { | 1351 controlLetter == '_') { |
1350 Advance(2); | 1352 Advance(2); |
1351 return controlLetter & 0x1f; | 1353 return controlLetter & 0x1f; |
1352 } | 1354 } |
1353 // We match JSC in reading the backslash as a literal | 1355 // We match JSC in reading the backslash as a literal |
1354 // character instead of as starting an escape. | 1356 // character instead of as starting an escape. |
| 1357 // TODO(v8:6201): Not yet covered by the spec. |
1355 return '\\'; | 1358 return '\\'; |
1356 } | 1359 } |
1357 case '0': | 1360 case '0': |
1358 // With /u, \0 is interpreted as NUL if not followed by another digit. | 1361 // With /u, \0 is interpreted as NUL if not followed by another digit. |
1359 if (unicode() && !(Next() >= '0' && Next() <= '9')) { | 1362 if (unicode() && !(Next() >= '0' && Next() <= '9')) { |
1360 Advance(); | 1363 Advance(); |
1361 return 0; | 1364 return 0; |
1362 } | 1365 } |
1363 // Fall through. | 1366 // Fall through. |
1364 case '1': | 1367 case '1': |
1365 case '2': | 1368 case '2': |
1366 case '3': | 1369 case '3': |
1367 case '4': | 1370 case '4': |
1368 case '5': | 1371 case '5': |
1369 case '6': | 1372 case '6': |
1370 case '7': | 1373 case '7': |
1371 // For compatibility, we interpret a decimal escape that isn't | 1374 // For compatibility, we interpret a decimal escape that isn't |
1372 // a back reference (and therefore either \0 or not valid according | 1375 // a back reference (and therefore either \0 or not valid according |
1373 // to the specification) as a 1..3 digit octal character code. | 1376 // to the specification) as a 1..3 digit octal character code. |
| 1377 // ES#prod-annexB-LegacyOctalEscapeSequence |
1374 if (unicode()) { | 1378 if (unicode()) { |
1375 // With /u, decimal escape is not interpreted as octal character code. | 1379 // With /u, decimal escape is not interpreted as octal character code. |
1376 ReportError(CStrVector("Invalid class escape")); | 1380 ReportError(CStrVector("Invalid class escape")); |
1377 return 0; | 1381 return 0; |
1378 } | 1382 } |
1379 return ParseOctalLiteral(); | 1383 return ParseOctalLiteral(); |
1380 case 'x': { | 1384 case 'x': { |
1381 Advance(); | 1385 Advance(); |
1382 uc32 value; | 1386 uc32 value; |
1383 if (ParseHexEscape(2, &value)) return value; | 1387 if (ParseHexEscape(2, &value)) return value; |
(...skipping 486 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1870 return false; | 1874 return false; |
1871 } | 1875 } |
1872 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1876 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
1873 zone()); | 1877 zone()); |
1874 LAST(ADD_TERM); | 1878 LAST(ADD_TERM); |
1875 return true; | 1879 return true; |
1876 } | 1880 } |
1877 | 1881 |
1878 } // namespace internal | 1882 } // namespace internal |
1879 } // namespace v8 | 1883 } // namespace v8 |
OLD | NEW |