Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(621)

Side by Side Diff: src/builtins/builtins-regexp.cc

Issue 2401643002: [regexp] Port split (Closed)
Patch Set: Address comments Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/builtins/builtins.h ('k') | src/js/regexp.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/builtins/builtins-utils.h" 5 #include "src/builtins/builtins-utils.h"
6 #include "src/builtins/builtins.h" 6 #include "src/builtins/builtins.h"
7 7
8 #include "src/code-factory.h" 8 #include "src/code-factory.h"
9 #include "src/regexp/jsregexp.h" 9 #include "src/regexp/jsregexp.h"
10 #include "src/string-builder.h" 10 #include "src/string-builder.h"
(...skipping 1217 matching lines...) Expand 10 before | Expand all | Expand 10 after
1228 previous_last_index_obj, STRICT)); 1228 previous_last_index_obj, STRICT));
1229 } 1229 }
1230 } 1230 }
1231 1231
1232 if (result->IsNull(isolate)) return Smi::FromInt(-1); 1232 if (result->IsNull(isolate)) return Smi::FromInt(-1);
1233 1233
1234 RETURN_RESULT_OR_FAILURE( 1234 RETURN_RESULT_OR_FAILURE(
1235 isolate, Object::GetProperty(result, isolate->factory()->index_string())); 1235 isolate, Object::GetProperty(result, isolate->factory()->index_string()));
1236 } 1236 }
1237 1237
1238 namespace {
1239
1240 MUST_USE_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1241 Handle<Object> object,
1242 uint32_t* out) {
1243 if (object->IsUndefined(isolate)) {
1244 *out = kMaxUInt32;
1245 return object;
1246 }
1247
1248 Handle<Object> number;
1249 ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(object), Object);
1250 *out = NumberToUint32(*number);
1251 return object;
1252 }
1253
1254 bool AtSurrogatePair(Isolate* isolate, Handle<String> string, int index) {
1255 if (index + 1 >= string->length()) return false;
1256 const uint16_t first = string->Get(index);
1257 if (first < 0xD800 || first > 0xDBFF) return false;
1258 const uint16_t second = string->Get(index + 1);
1259 return (second >= 0xDC00 && second <= 0xDFFF);
1260 }
1261
1262 Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1263 Handle<FixedArray> elems,
1264 int num_elems) {
1265 elems->Shrink(num_elems);
1266 return isolate->factory()->NewJSArrayWithElements(elems);
1267 }
1268
1269 MaybeHandle<JSArray> RegExpSplit(Isolate* isolate, Handle<JSRegExp> regexp,
1270 Handle<String> string,
1271 Handle<Object> limit_obj) {
1272 Factory* factory = isolate->factory();
1273
1274 uint32_t limit;
1275 RETURN_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit), JSArray);
1276
1277 const int length = string->length();
1278
1279 if (limit == 0) return factory->NewJSArray(0);
1280
1281 Handle<JSObject> last_match_info = isolate->regexp_last_match_info();
1282
1283 if (length == 0) {
1284 Handle<Object> match_indices;
1285 ASSIGN_RETURN_ON_EXCEPTION(
1286 isolate, match_indices,
1287 RegExpImpl::Exec(regexp, string, 0, last_match_info), JSArray);
1288
1289 if (!match_indices->IsNull(isolate)) return factory->NewJSArray(0);
1290
1291 Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1292 elems->set(0, *string);
1293 return factory->NewJSArrayWithElements(elems);
1294 }
1295
1296 int current_index = 0;
1297 int start_index = 0;
1298 int start_match = 0;
1299
1300 static const int kInitialArraySize = 8;
1301 Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1302 int num_elems = 0;
1303
1304 while (true) {
1305 if (start_index == length) {
1306 Handle<String> substr =
1307 factory->NewSubString(string, current_index, length);
1308 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1309 break;
1310 }
1311
1312 Handle<Object> match_indices_obj;
1313 ASSIGN_RETURN_ON_EXCEPTION(
1314 isolate, match_indices_obj,
1315 RegExpImpl::Exec(regexp, string, start_index, last_match_info),
1316 JSArray);
1317
1318 if (match_indices_obj->IsNull(isolate)) {
1319 Handle<String> substr =
1320 factory->NewSubString(string, current_index, length);
1321 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1322 break;
1323 }
1324
1325 auto match_indices = Handle<JSReceiver>::cast(match_indices_obj);
1326
1327 Handle<Object> start_match_obj =
1328 JSReceiver::GetElement(isolate, match_indices,
1329 RegExpImpl::kFirstCapture)
1330 .ToHandleChecked();
1331 start_match = Handle<Smi>::cast(start_match_obj)->value();
1332
1333 if (start_match == length) {
1334 Handle<String> substr =
1335 factory->NewSubString(string, current_index, length);
1336 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1337 break;
1338 }
1339
1340 Handle<Object> end_index_obj =
1341 JSReceiver::GetElement(isolate, match_indices,
1342 RegExpImpl::kFirstCapture + 1)
1343 .ToHandleChecked();
1344 const int end_index = Handle<Smi>::cast(end_index_obj)->value();
1345
1346 if (start_index == end_index && end_index == current_index) {
1347 const bool unicode = (regexp->GetFlags() & JSRegExp::kUnicode) != 0;
1348 if (unicode && AtSurrogatePair(isolate, string, start_index)) {
1349 start_index += 2;
1350 } else {
1351 start_index += 1;
1352 }
1353 continue;
1354 }
1355
1356 {
1357 Handle<String> substr =
1358 factory->NewSubString(string, current_index, start_match);
1359 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1360 }
1361
1362 if (num_elems == limit) break;
1363
1364 // TODO(jgruber): Refactor GetLastMatchInfo methods to take an input
1365 // argument.
1366 Handle<Object> num_captures_obj =
1367 JSReceiver::GetElement(isolate, match_indices,
1368 RegExpImpl::kLastCaptureCount)
1369 .ToHandleChecked();
1370 const int match_indices_len = Handle<Smi>::cast(num_captures_obj)->value() +
1371 RegExpImpl::kFirstCapture;
1372
1373 for (int i = RegExpImpl::kFirstCapture + 2; i < match_indices_len;) {
1374 Handle<Object> start_obj =
1375 JSReceiver::GetElement(isolate, match_indices, i++).ToHandleChecked();
1376 const int start = Handle<Smi>::cast(start_obj)->value();
1377
1378 Handle<Object> end_obj =
1379 JSReceiver::GetElement(isolate, match_indices, i++).ToHandleChecked();
1380 const int end = Handle<Smi>::cast(end_obj)->value();
1381
1382 if (end != -1) {
1383 Handle<String> substr = factory->NewSubString(string, start, end);
1384 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1385 } else {
1386 elems = FixedArray::SetAndGrow(elems, num_elems++,
1387 factory->undefined_value());
1388 }
1389
1390 if (num_elems == limit) {
1391 return NewJSArrayWithElements(isolate, elems, num_elems);
1392 }
1393 }
1394
1395 start_index = current_index = end_index;
1396 }
1397
1398 return NewJSArrayWithElements(isolate, elems, num_elems);
1399 }
1400
1401 // ES##sec-speciesconstructor
1402 // SpeciesConstructor ( O, defaultConstructor )
1403 MaybeHandle<Object> SpeciesConstructor(Isolate* isolate,
1404 Handle<JSReceiver> recv,
1405 Handle<JSFunction> default_ctor) {
1406 Handle<Object> ctor_obj;
1407 ASSIGN_RETURN_ON_EXCEPTION(
1408 isolate, ctor_obj,
1409 JSObject::GetProperty(recv, isolate->factory()->constructor_string()),
1410 Object);
1411
1412 if (ctor_obj->IsUndefined(isolate)) return default_ctor;
1413
1414 if (!ctor_obj->IsJSReceiver()) {
1415 THROW_NEW_ERROR(isolate,
1416 NewTypeError(MessageTemplate::kConstructorNotReceiver),
1417 Object);
1418 }
1419
1420 Handle<JSReceiver> ctor = Handle<JSReceiver>::cast(ctor_obj);
1421
1422 Handle<Object> species;
1423 ASSIGN_RETURN_ON_EXCEPTION(
1424 isolate, species,
1425 JSObject::GetProperty(ctor, isolate->factory()->species_symbol()),
1426 Object);
1427
1428 if (species->IsNull(isolate) || species->IsUndefined(isolate)) {
1429 return default_ctor;
1430 }
1431
1432 if (species->IsConstructor()) return species;
1433
1434 THROW_NEW_ERROR(
1435 isolate, NewTypeError(MessageTemplate::kSpeciesNotConstructor), Object);
1436 }
1437
1438 bool IsBuiltinExec(Handle<Object> exec) {
1439 if (!exec->IsJSFunction()) return false;
1440
1441 Code* code = Handle<JSFunction>::cast(exec)->code();
1442 if (code == nullptr) return false;
1443
1444 return (code->builtin_index() == Builtins::kRegExpPrototypeExec);
1445 }
1446
1447 } // namespace
1448
1449 // ES#sec-regexp.prototype-@@split
1450 // RegExp.prototype [ @@split ] ( string, limit )
1451 BUILTIN(RegExpPrototypeSplit) {
1452 HandleScope scope(isolate);
1453 CHECK_RECEIVER(JSReceiver, recv, "RegExp.prototype.@@split");
1454
1455 Factory* factory = isolate->factory();
1456
1457 Handle<Object> string_obj = args.atOrUndefined(isolate, 1);
1458 Handle<Object> limit_obj = args.atOrUndefined(isolate, 2);
1459
1460 Handle<String> string;
1461 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
1462 Object::ToString(isolate, string_obj));
1463
1464 Handle<JSFunction> regexp_fun = isolate->regexp_function();
1465 Handle<Object> ctor;
1466 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1467 isolate, ctor, SpeciesConstructor(isolate, recv, regexp_fun));
1468
1469 if (recv->IsJSRegExp() && *ctor == *regexp_fun) {
1470 Handle<Object> exec;
1471 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1472 isolate, exec, JSObject::GetProperty(
1473 recv, factory->NewStringFromAsciiChecked("exec")));
1474 if (IsBuiltinExec(exec)) {
1475 RETURN_RESULT_OR_FAILURE(
1476 isolate, RegExpSplit(isolate, Handle<JSRegExp>::cast(recv), string,
1477 limit_obj));
1478 }
1479 }
1480
1481 Handle<Object> flags_obj;
1482 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1483 isolate, flags_obj, JSObject::GetProperty(recv, factory->flags_string()));
1484
1485 Handle<String> flags;
1486 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1487 Object::ToString(isolate, flags_obj));
1488
1489 Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1490 const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1491
1492 Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1493 const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1494
1495 Handle<String> new_flags = flags;
1496 if (!sticky) {
1497 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1498 factory->NewConsString(flags, y_str));
1499 }
1500
1501 Handle<JSReceiver> splitter;
1502 {
1503 const int argc = 2;
1504
1505 ScopedVector<Handle<Object>> argv(argc);
1506 argv[0] = recv;
1507 argv[1] = new_flags;
1508
1509 Handle<JSFunction> ctor_fun = Handle<JSFunction>::cast(ctor);
1510 Handle<Object> splitter_obj;
1511 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1512 isolate, splitter_obj, Execution::New(ctor_fun, argc, argv.start()));
1513
1514 splitter = Handle<JSReceiver>::cast(splitter_obj);
1515 }
1516
1517 uint32_t limit;
1518 RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1519
1520 const int length = string->length();
1521
1522 if (limit == 0) return *factory->NewJSArray(0);
1523
1524 if (length == 0) {
1525 Handle<Object> result;
1526 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1527 isolate, result,
1528 RegExpExec(isolate, splitter, string, factory->undefined_value()));
1529
1530 if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1531
1532 Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1533 elems->set(0, *string);
1534 return *factory->NewJSArrayWithElements(elems);
1535 }
1536
1537 // TODO(jgruber): Wrap this in a helper class.
1538 static const int kInitialArraySize = 8;
1539 Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1540 int num_elems = 0;
1541
1542 int string_index = 0;
1543 int prev_string_index = 0;
1544 while (string_index < length) {
1545 RETURN_FAILURE_ON_EXCEPTION(isolate,
1546 SetLastIndex(isolate, splitter, string_index));
1547
1548 Handle<Object> result;
1549 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1550 isolate, result,
1551 RegExpExec(isolate, splitter, string, factory->undefined_value()));
1552
1553 if (result->IsNull(isolate)) {
1554 string_index +=
1555 AdvanceStringIndex(isolate, string, string_index, unicode);
1556 continue;
1557 }
1558
1559 // TODO(jgruber): Extract toLength of some property into function.
1560 Handle<Object> last_index_obj;
1561 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, last_index_obj,
1562 GetLastIndex(isolate, splitter));
1563
1564 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1565 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1566 const int last_index = Handle<Smi>::cast(last_index_obj)->value();
1567
1568 const int end = std::min(last_index, length);
1569 if (end == prev_string_index) {
1570 string_index +=
1571 AdvanceStringIndex(isolate, string, string_index, unicode);
1572 continue;
1573 }
1574
1575 {
1576 Handle<String> substr =
1577 factory->NewSubString(string, prev_string_index, string_index);
1578 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1579 if (num_elems == limit) {
1580 return *NewJSArrayWithElements(isolate, elems, num_elems);
1581 }
1582 }
1583
1584 prev_string_index = end;
1585
1586 Handle<Object> num_captures_obj;
1587 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1588 isolate, num_captures_obj,
1589 Object::GetProperty(result, isolate->factory()->length_string()));
1590
1591 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1592 isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1593 const int num_captures =
1594 std::max(Handle<Smi>::cast(num_captures_obj)->value(), 0);
1595
1596 for (int i = 1; i < num_captures; i++) {
1597 Handle<Object> capture;
1598 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1599 isolate, capture, Object::GetElement(isolate, result, i));
1600 elems = FixedArray::SetAndGrow(elems, num_elems++, capture);
1601 if (num_elems == limit) {
1602 return *NewJSArrayWithElements(isolate, elems, num_elems);
1603 }
1604 }
1605
1606 string_index = prev_string_index;
1607 }
1608
1609 {
1610 Handle<String> substr =
1611 factory->NewSubString(string, prev_string_index, length);
1612 elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
1613 }
1614
1615 return *NewJSArrayWithElements(isolate, elems, num_elems);
1616 }
1617
1238 } // namespace internal 1618 } // namespace internal
1239 } // namespace v8 1619 } // namespace v8
OLDNEW
« no previous file with comments | « src/builtins/builtins.h ('k') | src/js/regexp.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698