Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(489)

Side by Side Diff: src/builtins/builtins-regexp.cc

Issue 2401643002: [regexp] Port split (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/builtins/builtins.h ('k') | src/js/regexp.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/builtins/builtins-utils.h" 5 #include "src/builtins/builtins-utils.h"
6 #include "src/builtins/builtins.h" 6 #include "src/builtins/builtins.h"
7 7
8 #include "src/code-factory.h" 8 #include "src/code-factory.h"
9 #include "src/regexp/jsregexp.h" 9 #include "src/regexp/jsregexp.h"
10 #include "src/string-builder.h" 10 #include "src/string-builder.h"
(...skipping 1217 matching lines...) Expand 10 before | Expand all | Expand 10 after
1228 previous_last_index_obj, STRICT)); 1228 previous_last_index_obj, STRICT));
1229 } 1229 }
1230 } 1230 }
1231 1231
1232 if (result->IsNull(isolate)) return Smi::FromInt(-1); 1232 if (result->IsNull(isolate)) return Smi::FromInt(-1);
1233 1233
1234 RETURN_RESULT_OR_FAILURE( 1234 RETURN_RESULT_OR_FAILURE(
1235 isolate, Object::GetProperty(result, isolate->factory()->index_string())); 1235 isolate, Object::GetProperty(result, isolate->factory()->index_string()));
1236 } 1236 }
1237 1237
1238 namespace {
1239
1240 MUST_USE_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1241 Handle<Object> object,
1242 uint32_t* out) {
1243 if (object->IsUndefined(isolate)) {
1244 *out = kMaxUInt32;
1245 return object;
1246 }
1247
1248 Handle<Object> number;
1249 ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(object), Object);
1250 *out = NumberToUint32(*number);
1251 return object;
1252 }
1253
1254 bool AtSurrogatePair(Isolate* isolate, Handle<String> string, int index) {
1255 if (index + 1 >= string->length()) return false;
1256 const uint16_t first = string->Get(index);
1257 if (first < 0xD800 || first > 0xDBFF) return false;
1258 const uint16_t second = string->Get(index + 1);
1259 return (second >= 0xDC00 && second <= 0xDFFF);
1260 }
1261
1262 MaybeHandle<JSArray> RegExpSplit(Isolate* isolate, Handle<JSRegExp> regexp,
1263 Handle<String> string,
1264 Handle<Object> limit_obj) {
1265 Factory* factory = isolate->factory();
1266
1267 uint32_t limit;
1268 RETURN_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit), JSArray);
1269
1270 const int length = string->length();
1271
1272 if (limit == 0) return factory->NewJSArray(0);
1273
1274 Handle<JSObject> last_match_info = isolate->regexp_last_match_info();
1275
1276 if (length == 0) {
1277 Handle<Object> match_indices;
1278 ASSIGN_RETURN_ON_EXCEPTION(
1279 isolate, match_indices,
1280 RegExpImpl::Exec(regexp, string, 0, last_match_info), JSArray);
1281
1282 if (!match_indices->IsNull(isolate)) return factory->NewJSArray(0);
1283
1284 Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1285 elems->set(0, *string);
1286 return factory->NewJSArrayWithElements(elems);
1287 }
1288
1289 int current_index = 0;
1290 int start_index = 0;
1291 int start_match = 0;
1292
1293 static const int kInitialArraySize = 8;
1294 Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1295 int num_elems = 0;
1296
1297 while (true) {
1298 if (start_index == length) {
1299 Handle<String> substr =
1300 factory->NewSubString(string, current_index, length);
1301 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1302 break;
1303 }
1304
1305 Handle<Object> match_indices_obj;
1306 ASSIGN_RETURN_ON_EXCEPTION(
1307 isolate, match_indices_obj,
1308 RegExpImpl::Exec(regexp, string, start_index, last_match_info),
1309 JSArray);
1310
1311 if (match_indices_obj->IsNull(isolate)) {
1312 Handle<String> substr =
1313 factory->NewSubString(string, current_index, length);
1314 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1315 break;
1316 }
1317
1318 auto match_indices = Handle<JSReceiver>::cast(match_indices_obj);
1319
1320 Handle<Object> start_match_obj =
1321 JSReceiver::GetElement(isolate, match_indices,
1322 RegExpImpl::kFirstCapture)
1323 .ToHandleChecked();
1324 start_match = Handle<Smi>::cast(start_match_obj)->value();
1325
1326 if (start_match == length) {
1327 Handle<String> substr =
1328 factory->NewSubString(string, current_index, length);
1329 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1330 break;
1331 }
1332
1333 Handle<Object> end_index_obj =
1334 JSReceiver::GetElement(isolate, match_indices,
1335 RegExpImpl::kFirstCapture + 1)
1336 .ToHandleChecked();
1337 const int end_index = Handle<Smi>::cast(end_index_obj)->value();
1338
1339 if (start_index == end_index && end_index == current_index) {
1340 const bool unicode = (regexp->GetFlags() & JSRegExp::kUnicode) != 0;
1341 if (unicode && AtSurrogatePair(isolate, string, start_index)) {
1342 start_index += 2;
1343 } else {
1344 start_index += 1;
1345 }
1346 continue;
1347 }
1348
1349 {
1350 Handle<String> substr =
1351 factory->NewSubString(string, current_index, start_match);
1352 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1353 }
1354
1355 if (num_elems == limit) break;
1356
1357 // TODO(jgruber): Refactor GetLastMatchInfo methods to take an input
1358 // argument.
1359 Handle<Object> num_captures_obj =
1360 JSReceiver::GetElement(isolate, match_indices,
1361 RegExpImpl::kLastCaptureCount)
1362 .ToHandleChecked();
1363 const int match_indices_len = Handle<Smi>::cast(num_captures_obj)->value() +
1364 RegExpImpl::kFirstCapture;
1365
1366 for (int i = RegExpImpl::kFirstCapture + 2; i < match_indices_len;) {
1367 Handle<Object> start_obj =
1368 JSReceiver::GetElement(isolate, match_indices, i++).ToHandleChecked();
1369 const int start = Handle<Smi>::cast(start_obj)->value();
1370
1371 Handle<Object> end_obj =
1372 JSReceiver::GetElement(isolate, match_indices, i++).ToHandleChecked();
1373 const int end = Handle<Smi>::cast(end_obj)->value();
1374
1375 if (end != -1) {
1376 Handle<String> substr = factory->NewSubString(string, start, end);
1377 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1378 } else {
1379 elems = FixedArray::SetAndGrow(elems, num_elems++,
1380 factory->undefined_value());
1381 }
1382
1383 if (num_elems == limit) goto out;
1384 }
1385
1386 start_index = current_index = end_index;
1387 }
1388
1389 out:
Yang 2016/10/07 13:37:23 Not a fan of goto... I suggest simply duplicating
jgruber 2016/10/10 13:29:12 Personally, I don't mind this goto pattern - I thi
1390 elems->Shrink(num_elems);
1391 return factory->NewJSArrayWithElements(elems);
1392 }
1393
1394 // ES##sec-speciesconstructor
1395 // SpeciesConstructor ( O, defaultConstructor )
1396 MaybeHandle<Object> SpeciesConstructor(Isolate* isolate,
1397 Handle<JSReceiver> recv,
1398 Handle<JSFunction> default_ctor) {
1399 Handle<Object> ctor_obj;
1400 ASSIGN_RETURN_ON_EXCEPTION(
1401 isolate, ctor_obj,
1402 JSObject::GetProperty(recv, isolate->factory()->constructor_string()),
1403 Object);
1404
1405 if (ctor_obj->IsUndefined(isolate)) return default_ctor;
1406
1407 if (!ctor_obj->IsJSReceiver()) {
1408 THROW_NEW_ERROR(isolate,
1409 NewTypeError(MessageTemplate::kConstructorNotReceiver),
1410 Object);
1411 }
1412
1413 Handle<JSReceiver> ctor = Handle<JSReceiver>::cast(ctor_obj);
1414
1415 Handle<Object> species;
1416 ASSIGN_RETURN_ON_EXCEPTION(
1417 isolate, species,
1418 JSObject::GetProperty(ctor, isolate->factory()->species_symbol()),
1419 Object);
1420
1421 if (species->IsNull(isolate) || species->IsUndefined(isolate)) {
1422 return default_ctor;
1423 }
1424
1425 if (species->IsConstructor()) return species;
1426
1427 THROW_NEW_ERROR(
1428 isolate, NewTypeError(MessageTemplate::kSpeciesNotConstructor), Object);
1429 }
1430
1431 bool IsBuiltinExec(Handle<Object> exec) {
1432 if (!exec->IsJSFunction()) return false;
1433
1434 Code* code = Handle<JSFunction>::cast(exec)->code();
1435 if (code == nullptr) return false;
1436
1437 return (code->builtin_index() == Builtins::kRegExpPrototypeExec);
1438 }
1439
1440 } // namespace
1441
1442 // ES#sec-regexp.prototype-@@split
1443 // RegExp.prototype [ @@split ] ( string, limit )
1444 BUILTIN(RegExpPrototypeSplit) {
1445 HandleScope scope(isolate);
1446 CHECK_RECEIVER(JSReceiver, recv, "RegExp.prototype.@@split");
1447
1448 Factory* factory = isolate->factory();
1449
1450 Handle<Object> string_obj = args.atOrUndefined(isolate, 1);
1451 Handle<Object> limit_obj = args.atOrUndefined(isolate, 2);
1452
1453 Handle<String> string;
1454 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
1455 Object::ToString(isolate, string_obj));
1456
1457 Handle<JSFunction> regexp_fun = isolate->regexp_function();
1458 Handle<Object> ctor;
1459 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1460 isolate, ctor, SpeciesConstructor(isolate, recv, regexp_fun));
1461
1462 if (recv->IsJSRegExp() && *ctor == *regexp_fun) {
1463 Handle<Object> exec;
1464 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1465 isolate, exec, JSObject::GetProperty(
1466 recv, factory->NewStringFromAsciiChecked("exec")));
1467 if (IsBuiltinExec(exec)) {
1468 RETURN_RESULT_OR_FAILURE(
1469 isolate, RegExpSplit(isolate, Handle<JSRegExp>::cast(recv), string,
1470 limit_obj));
1471 }
1472 }
1473
1474 Handle<Object> flags_obj;
1475 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1476 isolate, flags_obj, JSObject::GetProperty(recv, factory->flags_string()));
1477
1478 Handle<String> flags;
1479 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1480 Object::ToString(isolate, flags_obj));
1481
1482 Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1483 const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1484
1485 Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1486 const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1487
1488 Handle<String> new_flags = flags;
1489 if (!sticky) {
1490 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1491 factory->NewConsString(flags, y_str));
1492 }
1493
1494 Handle<JSReceiver> splitter;
1495 {
1496 const int argc = 2;
1497
1498 ScopedVector<Handle<Object>> argv(argc);
1499 argv[0] = recv;
1500 argv[1] = new_flags;
1501
1502 Handle<JSFunction> ctor_fun = Handle<JSFunction>::cast(ctor);
1503 Handle<Object> splitter_obj;
1504 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1505 isolate, splitter_obj, Execution::New(ctor_fun, argc, argv.start()));
1506
1507 splitter = Handle<JSReceiver>::cast(splitter_obj);
1508 }
1509
1510 uint32_t limit;
1511 RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1512
1513 const int length = string->length();
1514
1515 if (limit == 0) return *factory->NewJSArray(0);
1516
1517 if (length == 0) {
1518 Handle<Object> result;
1519 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1520 isolate, result,
1521 RegExpExec(isolate, splitter, string, factory->undefined_value()));
1522
1523 if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1524
1525 Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1526 elems->set(0, *string);
1527 return *factory->NewJSArrayWithElements(elems);
1528 }
1529
1530 // TODO(jgruber): Wrap this in a helper class.
1531 static const int kInitialArraySize = 8;
1532 Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1533 int num_elems = 0;
1534
1535 int string_index = 0;
1536 int prev_string_index = 0;
1537 while (string_index < length) {
1538 RETURN_FAILURE_ON_EXCEPTION(isolate,
1539 SetLastIndex(isolate, splitter, string_index));
1540
1541 Handle<Object> result;
1542 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1543 isolate, result,
1544 RegExpExec(isolate, splitter, string, factory->undefined_value()));
1545
1546 if (result->IsNull(isolate)) {
1547 string_index +=
1548 AdvanceStringIndex(isolate, string, string_index, unicode);
1549 continue;
1550 }
1551
1552 // TODO(jgruber): Extract toLength of some property into function.
1553 Handle<Object> last_index_obj;
1554 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, last_index_obj,
1555 GetLastIndex(isolate, splitter));
1556
1557 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1558 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1559 const int last_index = Handle<Smi>::cast(last_index_obj)->value();
1560
1561 const int end = std::min(last_index, length);
1562 if (end == prev_string_index) {
1563 string_index +=
1564 AdvanceStringIndex(isolate, string, string_index, unicode);
1565 continue;
1566 }
1567
1568 {
1569 Handle<String> substr =
1570 factory->NewSubString(string, prev_string_index, string_index);
1571 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1572 if (num_elems == limit) goto out;
1573 }
1574
1575 prev_string_index = end;
1576
1577 Handle<Object> num_captures_obj;
1578 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1579 isolate, num_captures_obj,
1580 Object::GetProperty(result, isolate->factory()->length_string()));
1581
1582 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1583 isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1584 const int num_captures =
1585 std::max(Handle<Smi>::cast(num_captures_obj)->value(), 0);
1586
1587 for (int i = 1; i < num_captures; i++) {
1588 Handle<Object> capture;
1589 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1590 isolate, capture, Object::GetElement(isolate, result, i));
1591 elems = FixedArray::SetAndGrow(elems, num_elems++, capture);
1592 if (num_elems == limit) goto out;
1593 }
1594
1595 string_index = prev_string_index;
1596 }
1597
1598 {
1599 Handle<String> substr =
1600 factory->NewSubString(string, prev_string_index, length);
1601 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1602 }
1603
1604 out:
1605 elems->Shrink(num_elems);
1606 return *factory->NewJSArrayWithElements(elems);
1607 }
1608
1238 } // namespace internal 1609 } // namespace internal
1239 } // namespace v8 1610 } // namespace v8
OLDNEW
« no previous file with comments | « src/builtins/builtins.h ('k') | src/js/regexp.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698