Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(277)

Side by Side Diff: src/runtime/runtime-regexp.cc

Issue 2764343004: [regexp] Named capture support for callable replacements (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/runtime/runtime-utils.h" 5 #include "src/runtime/runtime-utils.h"
6 6
7 #include <functional>
8
7 #include "src/arguments.h" 9 #include "src/arguments.h"
8 #include "src/conversions-inl.h" 10 #include "src/conversions-inl.h"
9 #include "src/isolate-inl.h" 11 #include "src/isolate-inl.h"
10 #include "src/messages.h" 12 #include "src/messages.h"
11 #include "src/regexp/jsregexp-inl.h" 13 #include "src/regexp/jsregexp-inl.h"
12 #include "src/regexp/jsregexp.h" 14 #include "src/regexp/jsregexp.h"
13 #include "src/regexp/regexp-utils.h" 15 #include "src/regexp/regexp-utils.h"
14 #include "src/string-builder.h" 16 #include "src/string-builder.h"
15 #include "src/string-search.h" 17 #include "src/string-search.h"
16 18
(...skipping 861 matching lines...) Expand 10 before | Expand all | Expand 10 after
878 private: 880 private:
879 Isolate* isolate_; 881 Isolate* isolate_;
880 Handle<String> subject_; 882 Handle<String> subject_;
881 Handle<RegExpMatchInfo> match_info_; 883 Handle<RegExpMatchInfo> match_info_;
882 }; 884 };
883 885
884 class VectorBackedMatch : public String::Match { 886 class VectorBackedMatch : public String::Match {
885 public: 887 public:
886 VectorBackedMatch(Isolate* isolate, Handle<String> subject, 888 VectorBackedMatch(Isolate* isolate, Handle<String> subject,
887 Handle<String> match, int match_position, 889 Handle<String> match, int match_position,
888 ZoneVector<Handle<Object>>* captures) 890 std::vector<Handle<Object>>* captures)
889 : isolate_(isolate), 891 : isolate_(isolate),
890 match_(match), 892 match_(match),
891 match_position_(match_position), 893 match_position_(match_position),
892 captures_(captures) { 894 captures_(captures) {
893 subject_ = String::Flatten(subject); 895 subject_ = String::Flatten(subject);
894 } 896 }
895 897
896 Handle<String> GetMatch() override { return match_; } 898 Handle<String> GetMatch() override { return match_; }
897 899
898 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override { 900 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
(...skipping 18 matching lines...) Expand all
917 919
918 int CaptureCount() override { return static_cast<int>(captures_->size()); } 920 int CaptureCount() override { return static_cast<int>(captures_->size()); }
919 921
920 virtual ~VectorBackedMatch() {} 922 virtual ~VectorBackedMatch() {}
921 923
922 private: 924 private:
923 Isolate* isolate_; 925 Isolate* isolate_;
924 Handle<String> subject_; 926 Handle<String> subject_;
925 Handle<String> match_; 927 Handle<String> match_;
926 const int match_position_; 928 const int match_position_;
927 ZoneVector<Handle<Object>>* captures_; 929 std::vector<Handle<Object>>* captures_;
928 }; 930 };
929 931
932 // Create the groups object (see also the RegExp result creation in
933 // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
934 Handle<JSObject> ConstructNamedCaptureGroupsObject(
935 Isolate* isolate, Handle<FixedArray> capture_map,
936 std::function<Object*(int)> f_get_capture) {
937 Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
938
939 const int capture_count = capture_map->length() >> 1;
940 for (int i = 0; i < capture_count; i++) {
941 const int name_ix = i * 2;
942 const int index_ix = i * 2 + 1;
943
944 Handle<String> capture_name(String::cast(capture_map->get(name_ix)));
945 const int capture_ix = Smi::cast(capture_map->get(index_ix))->value();
946 DCHECK(1 <= capture_ix && capture_ix <= capture_count);
947
948 Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
949 DCHECK(capture_value->IsString());
950
951 Maybe<bool> success = JSReceiver::CreateDataProperty(
952 groups, capture_name, capture_value, Object::THROW_ON_ERROR);
953 CHECK(success.IsJust());
954 }
955
956 return groups;
957 }
958
930 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain 959 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
931 // separate last match info. See comment on that function. 960 // separate last match info. See comment on that function.
932 template <bool has_capture> 961 template <bool has_capture>
933 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject, 962 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
934 Handle<JSRegExp> regexp, 963 Handle<JSRegExp> regexp,
935 Handle<RegExpMatchInfo> last_match_array, 964 Handle<RegExpMatchInfo> last_match_array,
936 Handle<JSArray> result_array) { 965 Handle<JSArray> result_array) {
966 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
967 DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
937 DCHECK(subject->IsFlat()); 968 DCHECK(subject->IsFlat());
938 DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
939 969
940 int capture_count = regexp->CaptureCount(); 970 int capture_count = regexp->CaptureCount();
941 int subject_length = subject->length(); 971 int subject_length = subject->length();
942 972
943 static const int kMinLengthToCache = 0x1000; 973 static const int kMinLengthToCache = 0x1000;
944 974
945 if (subject_length > kMinLengthToCache) { 975 if (subject_length > kMinLengthToCache) {
946 FixedArray* last_match_cache; 976 FixedArray* last_match_cache;
947 Object* cached_answer = RegExpResultsCache::Lookup( 977 Object* cached_answer = RegExpResultsCache::Lookup(
948 isolate->heap(), *subject, regexp->data(), &last_match_cache, 978 isolate->heap(), *subject, regexp->data(), &last_match_cache,
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
1006 match = isolate->factory()->NewProperSubString(subject, match_start, 1036 match = isolate->factory()->NewProperSubString(subject, match_start,
1007 match_end); 1037 match_end);
1008 } else { 1038 } else {
1009 match = 1039 match =
1010 isolate->factory()->NewSubString(subject, match_start, match_end); 1040 isolate->factory()->NewSubString(subject, match_start, match_end);
1011 first = false; 1041 first = false;
1012 } 1042 }
1013 1043
1014 if (has_capture) { 1044 if (has_capture) {
1015 // Arguments array to replace function is match, captures, index and 1045 // Arguments array to replace function is match, captures, index and
1016 // subject, i.e., 3 + capture count in total. 1046 // subject, i.e., 3 + capture count in total. If the RegExp contains
1017 Handle<FixedArray> elements = 1047 // named captures, they are also passed just before the index.
jgruber 2017/03/24 14:06:46 The proposal just changed, groups are now passed a
1018 isolate->factory()->NewFixedArray(3 + capture_count);
1019 1048
1020 elements->set(0, *match); 1049 Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
1050 const bool has_named_captures = maybe_capture_map->IsFixedArray();
1051
1052 const int argc =
1053 has_named_captures ? 4 + capture_count : 3 + capture_count;
1054
1055 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1056 int cursor = 0;
1057
1058 elements->set(cursor++, *match);
1021 for (int i = 1; i <= capture_count; i++) { 1059 for (int i = 1; i <= capture_count; i++) {
1022 int start = current_match[i * 2]; 1060 int start = current_match[i * 2];
1023 if (start >= 0) { 1061 if (start >= 0) {
1024 int end = current_match[i * 2 + 1]; 1062 int end = current_match[i * 2 + 1];
1025 DCHECK(start <= end); 1063 DCHECK(start <= end);
1026 Handle<String> substring = 1064 Handle<String> substring =
1027 isolate->factory()->NewSubString(subject, start, end); 1065 isolate->factory()->NewSubString(subject, start, end);
1028 elements->set(i, *substring); 1066 elements->set(cursor++, *substring);
1029 } else { 1067 } else {
1030 DCHECK(current_match[i * 2 + 1] < 0); 1068 DCHECK(current_match[i * 2 + 1] < 0);
1031 elements->set(i, isolate->heap()->undefined_value()); 1069 elements->set(cursor++, isolate->heap()->undefined_value());
1032 } 1070 }
1033 } 1071 }
1034 elements->set(capture_count + 1, Smi::FromInt(match_start)); 1072
1035 elements->set(capture_count + 2, *subject); 1073 if (has_named_captures) {
1074 Handle<FixedArray> capture_map =
1075 Handle<FixedArray>::cast(maybe_capture_map);
1076 Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1077 isolate, capture_map, [=](int ix) { return elements->get(ix); });
1078 elements->set(cursor++, *groups);
1079 }
1080
1081 elements->set(cursor++, Smi::FromInt(match_start));
1082 elements->set(cursor++, *subject);
1083 DCHECK_EQ(cursor, argc);
1036 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements)); 1084 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1037 } else { 1085 } else {
1038 builder.Add(*match); 1086 builder.Add(*match);
1039 } 1087 }
1040 } 1088 }
1041 } 1089 }
1042 1090
1043 if (global_cache.HasException()) return isolate->heap()->exception(); 1091 if (global_cache.HasException()) return isolate->heap()->exception();
1044 1092
1045 if (match_start >= 0) { 1093 if (match_start >= 0) {
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
1077 return isolate->heap()->null_value(); // No matches at all. 1125 return isolate->heap()->null_value(); // No matches at all.
1078 } 1126 }
1079 } 1127 }
1080 1128
1081 // Legacy implementation of RegExp.prototype[Symbol.replace] which 1129 // Legacy implementation of RegExp.prototype[Symbol.replace] which
1082 // doesn't properly call the underlying exec method. 1130 // doesn't properly call the underlying exec method.
1083 MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate, 1131 MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
1084 Handle<JSRegExp> regexp, 1132 Handle<JSRegExp> regexp,
1085 Handle<String> string, 1133 Handle<String> string,
1086 Handle<Object> replace_obj) { 1134 Handle<Object> replace_obj) {
1135 // Functional fast-paths are dispatched directly by replace builtin.
1136 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1137 DCHECK(!replace_obj->IsCallable());
1138
1087 Factory* factory = isolate->factory(); 1139 Factory* factory = isolate->factory();
1088 1140
1089 const int flags = regexp->GetFlags(); 1141 const int flags = regexp->GetFlags();
1090 const bool global = (flags & JSRegExp::kGlobal) != 0; 1142 const bool global = (flags & JSRegExp::kGlobal) != 0;
1091 const bool sticky = (flags & JSRegExp::kSticky) != 0; 1143 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1092 1144
1093 // Functional fast-paths are dispatched directly by replace builtin.
1094 DCHECK(!replace_obj->IsCallable());
1095
1096 Handle<String> replace; 1145 Handle<String> replace;
1097 ASSIGN_RETURN_ON_EXCEPTION(isolate, replace, 1146 ASSIGN_RETURN_ON_EXCEPTION(isolate, replace,
1098 Object::ToString(isolate, replace_obj), String); 1147 Object::ToString(isolate, replace_obj), String);
1099 replace = String::Flatten(replace); 1148 replace = String::Flatten(replace);
1100 1149
1101 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info(); 1150 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1102 1151
1103 if (!global) { 1152 if (!global) {
1104 // Non-global regexp search, string replace. 1153 // Non-global regexp search, string replace.
1105 1154
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
1247 1296
1248 const int index = match_indices->Capture(0); 1297 const int index = match_indices->Capture(0);
1249 const int end_of_match = match_indices->Capture(1); 1298 const int end_of_match = match_indices->Capture(1);
1250 1299
1251 if (sticky) regexp->SetLastIndex(end_of_match); 1300 if (sticky) regexp->SetLastIndex(end_of_match);
1252 1301
1253 IncrementalStringBuilder builder(isolate); 1302 IncrementalStringBuilder builder(isolate);
1254 builder.AppendString(factory->NewSubString(subject, 0, index)); 1303 builder.AppendString(factory->NewSubString(subject, 0, index));
1255 1304
1256 // Compute the parameter list consisting of the match, captures, index, 1305 // Compute the parameter list consisting of the match, captures, index,
1257 // and subject for the replace function invocation. 1306 // and subject for the replace function invocation. If the RegExp contains
1307 // named captures, they are also passed just before the index.
1308
1258 // The number of captures plus one for the match. 1309 // The number of captures plus one for the match.
1259 const int m = match_indices->NumberOfCaptureRegisters() / 2; 1310 const int m = match_indices->NumberOfCaptureRegisters() / 2;
1260 1311
1261 const int argc = m + 2; 1312 bool has_named_captures = false;
1313 Handle<FixedArray> capture_map;
1314 if (m > 1) {
1315 // The existence of capture groups implies IRREGEXP kind.
1316 DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
1317
1318 Object* maybe_capture_map = regexp->CaptureNameMap();
1319 if (maybe_capture_map->IsFixedArray()) {
1320 has_named_captures = true;
1321 capture_map = handle(FixedArray::cast(maybe_capture_map));
1322 }
1323 }
1324
1325 const int argc = has_named_captures ? m + 3 : m + 2;
1262 ScopedVector<Handle<Object>> argv(argc); 1326 ScopedVector<Handle<Object>> argv(argc);
1263 1327
1264 for (int j = 0; j < m; j++) { 1328 for (int j = 0; j < m; j++) {
1265 bool ok; 1329 bool ok;
1266 Handle<String> capture = 1330 Handle<String> capture =
1267 RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok); 1331 RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1268 if (ok) { 1332 if (ok) {
1269 argv[j] = capture; 1333 argv[j] = capture;
1270 } else { 1334 } else {
1271 argv[j] = factory->undefined_value(); 1335 argv[j] = factory->undefined_value();
1272 } 1336 }
1273 } 1337 }
1274 1338
1339 if (has_named_captures) {
1340 argv[argc - 3] = ConstructNamedCaptureGroupsObject(
1341 isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1342 }
1343
1275 argv[argc - 2] = handle(Smi::FromInt(index), isolate); 1344 argv[argc - 2] = handle(Smi::FromInt(index), isolate);
1276 argv[argc - 1] = subject; 1345 argv[argc - 1] = subject;
1277 1346
1278 Handle<Object> replacement_obj; 1347 Handle<Object> replacement_obj;
1279 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1348 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1280 isolate, replacement_obj, 1349 isolate, replacement_obj,
1281 Execution::Call(isolate, replace_obj, factory->undefined_value(), argc, 1350 Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1282 argv.start())); 1351 argv.start()));
1283 1352
1284 Handle<String> replacement; 1353 Handle<String> replacement;
(...skipping 286 matching lines...) Expand 10 before | Expand all | Expand 10 after
1571 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match, 1640 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1572 Object::ToString(isolate, match_obj)); 1641 Object::ToString(isolate, match_obj));
1573 1642
1574 const int match_length = match->length(); 1643 const int match_length = match->length();
1575 1644
1576 Handle<Object> position_obj; 1645 Handle<Object> position_obj;
1577 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1646 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1578 isolate, position_obj, 1647 isolate, position_obj,
1579 Object::GetProperty(result, factory->index_string())); 1648 Object::GetProperty(result, factory->index_string()));
1580 1649
1581 // TODO(jgruber): Extract and correct error handling. Since we can go up to
1582 // 2^53 - 1 (at least for ToLength), we might actually need uint64_t here?
1583 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1650 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1584 isolate, position_obj, Object::ToInteger(isolate, position_obj)); 1651 isolate, position_obj, Object::ToInteger(isolate, position_obj));
1585 const uint32_t position = 1652 const uint32_t position =
1586 std::min(PositiveNumberToUint32(*position_obj), length); 1653 std::min(PositiveNumberToUint32(*position_obj), length);
1587 1654
1588 ZoneVector<Handle<Object>> captures(&zone); 1655 std::vector<Handle<Object>> captures;
1656 captures.reserve(captures_length);
1657
1589 for (int n = 0; n < captures_length; n++) { 1658 for (int n = 0; n < captures_length; n++) {
1590 Handle<Object> capture; 1659 Handle<Object> capture;
1591 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1660 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1592 isolate, capture, Object::GetElement(isolate, result, n)); 1661 isolate, capture, Object::GetElement(isolate, result, n));
1593 1662
1594 if (!capture->IsUndefined(isolate)) { 1663 if (!capture->IsUndefined(isolate)) {
1595 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture, 1664 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1596 Object::ToString(isolate, capture)); 1665 Object::ToString(isolate, capture));
1597 } 1666 }
1598 captures.push_back(capture); 1667 captures.push_back(capture);
1599 } 1668 }
1600 1669
1670 Handle<Object> groups_obj;
1671 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1672 isolate, groups_obj,
1673 Object::GetProperty(result, factory->groups_string()));
1674
1675 const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1676
1601 Handle<String> replacement; 1677 Handle<String> replacement;
1602 if (functional_replace) { 1678 if (functional_replace) {
1603 const int argc = captures_length + 2; 1679 const int argc =
1680 has_named_captures ? captures_length + 3 : captures_length + 2;
1604 ScopedVector<Handle<Object>> argv(argc); 1681 ScopedVector<Handle<Object>> argv(argc);
1605 1682
1606 for (int j = 0; j < captures_length; j++) { 1683 for (int j = 0; j < captures_length; j++) {
1607 argv[j] = captures[j]; 1684 argv[j] = captures[j];
1608 } 1685 }
1609 1686
1610 argv[captures_length] = handle(Smi::FromInt(position), isolate); 1687 if (has_named_captures) argv[argc - 3] = groups_obj;
1611 argv[captures_length + 1] = string; 1688 argv[argc - 2] = handle(Smi::FromInt(position), isolate);
1689 argv[argc - 1] = string;
1612 1690
1613 Handle<Object> replacement_obj; 1691 Handle<Object> replacement_obj;
1614 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1692 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1615 isolate, replacement_obj, 1693 isolate, replacement_obj,
1616 Execution::Call(isolate, replace_obj, factory->undefined_value(), 1694 Execution::Call(isolate, replace_obj, factory->undefined_value(),
1617 argc, argv.start())); 1695 argc, argv.start()));
1618 1696
1619 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1697 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1620 isolate, replacement, Object::ToString(isolate, replacement_obj)); 1698 isolate, replacement, Object::ToString(isolate, replacement_obj));
1621 } else { 1699 } else {
1700 DCHECK(!functional_replace);
1622 VectorBackedMatch m(isolate, string, match, position, &captures); 1701 VectorBackedMatch m(isolate, string, match, position, &captures);
1623 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1702 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1624 isolate, replacement, String::GetSubstitution(isolate, &m, replace)); 1703 isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1625 } 1704 }
1626 1705
1627 if (position >= next_source_position) { 1706 if (position >= next_source_position) {
1628 builder.AppendString( 1707 builder.AppendString(
1629 factory->NewSubString(string, next_source_position, position)); 1708 factory->NewSubString(string, next_source_position, position));
1630 builder.AppendString(replacement); 1709 builder.AppendString(replacement);
1631 1710
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
1664 1743
1665 RUNTIME_FUNCTION(Runtime_IsRegExp) { 1744 RUNTIME_FUNCTION(Runtime_IsRegExp) {
1666 SealHandleScope shs(isolate); 1745 SealHandleScope shs(isolate);
1667 DCHECK_EQ(1, args.length()); 1746 DCHECK_EQ(1, args.length());
1668 CONVERT_ARG_CHECKED(Object, obj, 0); 1747 CONVERT_ARG_CHECKED(Object, obj, 0);
1669 return isolate->heap()->ToBoolean(obj->IsJSRegExp()); 1748 return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1670 } 1749 }
1671 1750
1672 } // namespace internal 1751 } // namespace internal
1673 } // namespace v8 1752 } // namespace v8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698