| OLD | NEW |
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
| 6 | 6 |
| 7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
| 8 #include "src/factory.h" | 8 #include "src/factory.h" |
| 9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
| 10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
| (...skipping 484 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 495 builder->AddCharacter('\v'); | 495 builder->AddCharacter('\v'); |
| 496 break; | 496 break; |
| 497 case 'c': { | 497 case 'c': { |
| 498 Advance(); | 498 Advance(); |
| 499 uc32 controlLetter = Next(); | 499 uc32 controlLetter = Next(); |
| 500 // Special case if it is an ASCII letter. | 500 // Special case if it is an ASCII letter. |
| 501 // Convert lower case letters to uppercase. | 501 // Convert lower case letters to uppercase. |
| 502 uc32 letter = controlLetter & ~('a' ^ 'A'); | 502 uc32 letter = controlLetter & ~('a' ^ 'A'); |
| 503 if (letter < 'A' || 'Z' < letter) { | 503 if (letter < 'A' || 'Z' < letter) { |
| 504 // controlLetter is not in range 'A'-'Z' or 'a'-'z'. | 504 // controlLetter is not in range 'A'-'Z' or 'a'-'z'. |
| 505 // This is outside the specification. We match JSC in | 505 // Read the backslash as a literal character instead of as |
| 506 // reading the backslash as a literal character instead | 506 // starting an escape. |
| 507 // of as starting an escape. | 507 // ES#prod-annexB-ExtendedPatternCharacter |
| 508 if (unicode()) { | 508 if (unicode()) { |
| 509 // With /u, invalid escapes are not treated as identity escapes. | 509 // With /u, invalid escapes are not treated as identity escapes. |
| 510 return ReportError(CStrVector("Invalid unicode escape")); | 510 return ReportError(CStrVector("Invalid unicode escape")); |
| 511 } | 511 } |
| 512 builder->AddCharacter('\\'); | 512 builder->AddCharacter('\\'); |
| 513 } else { | 513 } else { |
| 514 Advance(2); | 514 Advance(2); |
| 515 builder->AddCharacter(controlLetter & 0x1f); | 515 builder->AddCharacter(controlLetter & 0x1f); |
| 516 } | 516 } |
| 517 break; | 517 break; |
| (...skipping 519 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1037 *min_out = min; | 1037 *min_out = min; |
| 1038 *max_out = max; | 1038 *max_out = max; |
| 1039 return true; | 1039 return true; |
| 1040 } | 1040 } |
| 1041 | 1041 |
| 1042 | 1042 |
| 1043 uc32 RegExpParser::ParseOctalLiteral() { | 1043 uc32 RegExpParser::ParseOctalLiteral() { |
| 1044 DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker); | 1044 DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker); |
| 1045 // For compatibility with some other browsers (not all), we parse | 1045 // For compatibility with some other browsers (not all), we parse |
| 1046 // up to three octal digits with a value below 256. | 1046 // up to three octal digits with a value below 256. |
| 1047 // ES#prod-annexB-LegacyOctalEscapeSequence |
| 1047 uc32 value = current() - '0'; | 1048 uc32 value = current() - '0'; |
| 1048 Advance(); | 1049 Advance(); |
| 1049 if ('0' <= current() && current() <= '7') { | 1050 if ('0' <= current() && current() <= '7') { |
| 1050 value = value * 8 + current() - '0'; | 1051 value = value * 8 + current() - '0'; |
| 1051 Advance(); | 1052 Advance(); |
| 1052 if (value < 32 && '0' <= current() && current() <= '7') { | 1053 if (value < 32 && '0' <= current() && current() <= '7') { |
| 1053 value = value * 8 + current() - '0'; | 1054 value = value * 8 + current() - '0'; |
| 1054 Advance(); | 1055 Advance(); |
| 1055 } | 1056 } |
| 1056 } | 1057 } |
| (...skipping 268 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1325 return '\r'; | 1326 return '\r'; |
| 1326 case 't': | 1327 case 't': |
| 1327 Advance(); | 1328 Advance(); |
| 1328 return '\t'; | 1329 return '\t'; |
| 1329 case 'v': | 1330 case 'v': |
| 1330 Advance(); | 1331 Advance(); |
| 1331 return '\v'; | 1332 return '\v'; |
| 1332 case 'c': { | 1333 case 'c': { |
| 1333 uc32 controlLetter = Next(); | 1334 uc32 controlLetter = Next(); |
| 1334 uc32 letter = controlLetter & ~('A' ^ 'a'); | 1335 uc32 letter = controlLetter & ~('A' ^ 'a'); |
| 1335 // For compatibility with JSC, inside a character class. We also accept | 1336 // Inside a character class, we also accept digits and underscore as |
| 1336 // digits and underscore as control characters, unless with /u. | 1337 // control characters, unless with /u. See Annex B: |
| 1338 // ES#prod-annexB-ClassControlLetter |
| 1337 if (letter >= 'A' && letter <= 'Z') { | 1339 if (letter >= 'A' && letter <= 'Z') { |
| 1338 Advance(2); | 1340 Advance(2); |
| 1339 // Control letters mapped to ASCII control characters in the range | 1341 // Control letters mapped to ASCII control characters in the range |
| 1340 // 0x00-0x1f. | 1342 // 0x00-0x1f. |
| 1341 return controlLetter & 0x1f; | 1343 return controlLetter & 0x1f; |
| 1342 } | 1344 } |
| 1343 if (unicode()) { | 1345 if (unicode()) { |
| 1344 // With /u, invalid escapes are not treated as identity escapes. | 1346 // With /u, invalid escapes are not treated as identity escapes. |
| 1345 ReportError(CStrVector("Invalid class escape")); | 1347 ReportError(CStrVector("Invalid class escape")); |
| 1346 return 0; | 1348 return 0; |
| 1347 } | 1349 } |
| 1348 if ((controlLetter >= '0' && controlLetter <= '9') || | 1350 if ((controlLetter >= '0' && controlLetter <= '9') || |
| 1349 controlLetter == '_') { | 1351 controlLetter == '_') { |
| 1350 Advance(2); | 1352 Advance(2); |
| 1351 return controlLetter & 0x1f; | 1353 return controlLetter & 0x1f; |
| 1352 } | 1354 } |
| 1353 // We match JSC in reading the backslash as a literal | 1355 // We match JSC in reading the backslash as a literal |
| 1354 // character instead of as starting an escape. | 1356 // character instead of as starting an escape. |
| 1357 // TODO(v8:6201): Not yet covered by the spec. |
| 1355 return '\\'; | 1358 return '\\'; |
| 1356 } | 1359 } |
| 1357 case '0': | 1360 case '0': |
| 1358 // With /u, \0 is interpreted as NUL if not followed by another digit. | 1361 // With /u, \0 is interpreted as NUL if not followed by another digit. |
| 1359 if (unicode() && !(Next() >= '0' && Next() <= '9')) { | 1362 if (unicode() && !(Next() >= '0' && Next() <= '9')) { |
| 1360 Advance(); | 1363 Advance(); |
| 1361 return 0; | 1364 return 0; |
| 1362 } | 1365 } |
| 1363 // Fall through. | 1366 // Fall through. |
| 1364 case '1': | 1367 case '1': |
| 1365 case '2': | 1368 case '2': |
| 1366 case '3': | 1369 case '3': |
| 1367 case '4': | 1370 case '4': |
| 1368 case '5': | 1371 case '5': |
| 1369 case '6': | 1372 case '6': |
| 1370 case '7': | 1373 case '7': |
| 1371 // For compatibility, we interpret a decimal escape that isn't | 1374 // For compatibility, we interpret a decimal escape that isn't |
| 1372 // a back reference (and therefore either \0 or not valid according | 1375 // a back reference (and therefore either \0 or not valid according |
| 1373 // to the specification) as a 1..3 digit octal character code. | 1376 // to the specification) as a 1..3 digit octal character code. |
| 1377 // ES#prod-annexB-LegacyOctalEscapeSequence |
| 1374 if (unicode()) { | 1378 if (unicode()) { |
| 1375 // With /u, decimal escape is not interpreted as octal character code. | 1379 // With /u, decimal escape is not interpreted as octal character code. |
| 1376 ReportError(CStrVector("Invalid class escape")); | 1380 ReportError(CStrVector("Invalid class escape")); |
| 1377 return 0; | 1381 return 0; |
| 1378 } | 1382 } |
| 1379 return ParseOctalLiteral(); | 1383 return ParseOctalLiteral(); |
| 1380 case 'x': { | 1384 case 'x': { |
| 1381 Advance(); | 1385 Advance(); |
| 1382 uc32 value; | 1386 uc32 value; |
| 1383 if (ParseHexEscape(2, &value)) return value; | 1387 if (ParseHexEscape(2, &value)) return value; |
| (...skipping 486 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1870 return false; | 1874 return false; |
| 1871 } | 1875 } |
| 1872 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1876 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
| 1873 zone()); | 1877 zone()); |
| 1874 LAST(ADD_TERM); | 1878 LAST(ADD_TERM); |
| 1875 return true; | 1879 return true; |
| 1876 } | 1880 } |
| 1877 | 1881 |
| 1878 } // namespace internal | 1882 } // namespace internal |
| 1879 } // namespace v8 | 1883 } // namespace v8 |
| OLD | NEW |