Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(186)

Side by Side Diff: src/runtime/runtime-regexp.cc

Issue 2764343004: [regexp] Named capture support for callable replacements (Closed)
Patch Set: Final tweaks Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/objects-inl.h ('k') | test/mjsunit/harmony/regexp-named-captures.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/runtime/runtime-utils.h" 5 #include "src/runtime/runtime-utils.h"
6 6
7 #include <functional>
8
7 #include "src/arguments.h" 9 #include "src/arguments.h"
8 #include "src/conversions-inl.h" 10 #include "src/conversions-inl.h"
9 #include "src/isolate-inl.h" 11 #include "src/isolate-inl.h"
10 #include "src/messages.h" 12 #include "src/messages.h"
11 #include "src/regexp/jsregexp-inl.h" 13 #include "src/regexp/jsregexp-inl.h"
12 #include "src/regexp/jsregexp.h" 14 #include "src/regexp/jsregexp.h"
13 #include "src/regexp/regexp-utils.h" 15 #include "src/regexp/regexp-utils.h"
14 #include "src/string-builder.h" 16 #include "src/string-builder.h"
15 #include "src/string-search.h" 17 #include "src/string-search.h"
16 18
(...skipping 861 matching lines...) Expand 10 before | Expand all | Expand 10 after
878 private: 880 private:
879 Isolate* isolate_; 881 Isolate* isolate_;
880 Handle<String> subject_; 882 Handle<String> subject_;
881 Handle<RegExpMatchInfo> match_info_; 883 Handle<RegExpMatchInfo> match_info_;
882 }; 884 };
883 885
884 class VectorBackedMatch : public String::Match { 886 class VectorBackedMatch : public String::Match {
885 public: 887 public:
886 VectorBackedMatch(Isolate* isolate, Handle<String> subject, 888 VectorBackedMatch(Isolate* isolate, Handle<String> subject,
887 Handle<String> match, int match_position, 889 Handle<String> match, int match_position,
888 ZoneVector<Handle<Object>>* captures) 890 std::vector<Handle<Object>>* captures)
889 : isolate_(isolate), 891 : isolate_(isolate),
890 match_(match), 892 match_(match),
891 match_position_(match_position), 893 match_position_(match_position),
892 captures_(captures) { 894 captures_(captures) {
893 subject_ = String::Flatten(subject); 895 subject_ = String::Flatten(subject);
894 } 896 }
895 897
896 Handle<String> GetMatch() override { return match_; } 898 Handle<String> GetMatch() override { return match_; }
897 899
898 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override { 900 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
(...skipping 18 matching lines...) Expand all
917 919
918 int CaptureCount() override { return static_cast<int>(captures_->size()); } 920 int CaptureCount() override { return static_cast<int>(captures_->size()); }
919 921
920 virtual ~VectorBackedMatch() {} 922 virtual ~VectorBackedMatch() {}
921 923
922 private: 924 private:
923 Isolate* isolate_; 925 Isolate* isolate_;
924 Handle<String> subject_; 926 Handle<String> subject_;
925 Handle<String> match_; 927 Handle<String> match_;
926 const int match_position_; 928 const int match_position_;
927 ZoneVector<Handle<Object>>* captures_; 929 std::vector<Handle<Object>>* captures_;
928 }; 930 };
929 931
932 // Create the groups object (see also the RegExp result creation in
933 // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
934 Handle<JSObject> ConstructNamedCaptureGroupsObject(
935 Isolate* isolate, Handle<FixedArray> capture_map,
936 std::function<Object*(int)> f_get_capture) {
937 Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
938
939 const int capture_count = capture_map->length() >> 1;
940 for (int i = 0; i < capture_count; i++) {
941 const int name_ix = i * 2;
942 const int index_ix = i * 2 + 1;
943
944 Handle<String> capture_name(String::cast(capture_map->get(name_ix)));
945 const int capture_ix = Smi::cast(capture_map->get(index_ix))->value();
946 DCHECK(1 <= capture_ix && capture_ix <= capture_count);
947
948 Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
949 DCHECK(capture_value->IsString());
950
951 JSObject::AddProperty(groups, capture_name, capture_value, NONE);
952 }
953
954 return groups;
955 }
956
930 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain 957 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
931 // separate last match info. See comment on that function. 958 // separate last match info. See comment on that function.
932 template <bool has_capture> 959 template <bool has_capture>
933 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject, 960 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
934 Handle<JSRegExp> regexp, 961 Handle<JSRegExp> regexp,
935 Handle<RegExpMatchInfo> last_match_array, 962 Handle<RegExpMatchInfo> last_match_array,
936 Handle<JSArray> result_array) { 963 Handle<JSArray> result_array) {
964 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
965 DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
937 DCHECK(subject->IsFlat()); 966 DCHECK(subject->IsFlat());
938 DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
939 967
940 int capture_count = regexp->CaptureCount(); 968 int capture_count = regexp->CaptureCount();
941 int subject_length = subject->length(); 969 int subject_length = subject->length();
942 970
943 static const int kMinLengthToCache = 0x1000; 971 static const int kMinLengthToCache = 0x1000;
944 972
945 if (subject_length > kMinLengthToCache) { 973 if (subject_length > kMinLengthToCache) {
946 FixedArray* last_match_cache; 974 FixedArray* last_match_cache;
947 Object* cached_answer = RegExpResultsCache::Lookup( 975 Object* cached_answer = RegExpResultsCache::Lookup(
948 isolate->heap(), *subject, regexp->data(), &last_match_cache, 976 isolate->heap(), *subject, regexp->data(), &last_match_cache,
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
1006 match = isolate->factory()->NewProperSubString(subject, match_start, 1034 match = isolate->factory()->NewProperSubString(subject, match_start,
1007 match_end); 1035 match_end);
1008 } else { 1036 } else {
1009 match = 1037 match =
1010 isolate->factory()->NewSubString(subject, match_start, match_end); 1038 isolate->factory()->NewSubString(subject, match_start, match_end);
1011 first = false; 1039 first = false;
1012 } 1040 }
1013 1041
1014 if (has_capture) { 1042 if (has_capture) {
1015 // Arguments array to replace function is match, captures, index and 1043 // Arguments array to replace function is match, captures, index and
1016 // subject, i.e., 3 + capture count in total. 1044 // subject, i.e., 3 + capture count in total. If the RegExp contains
1017 Handle<FixedArray> elements = 1045 // named captures, they are also passed as the last argument.
1018 isolate->factory()->NewFixedArray(3 + capture_count);
1019 1046
1020 elements->set(0, *match); 1047 Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
1048 const bool has_named_captures = maybe_capture_map->IsFixedArray();
1049
1050 const int argc =
1051 has_named_captures ? 4 + capture_count : 3 + capture_count;
1052
1053 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1054 int cursor = 0;
1055
1056 elements->set(cursor++, *match);
1021 for (int i = 1; i <= capture_count; i++) { 1057 for (int i = 1; i <= capture_count; i++) {
1022 int start = current_match[i * 2]; 1058 int start = current_match[i * 2];
1023 if (start >= 0) { 1059 if (start >= 0) {
1024 int end = current_match[i * 2 + 1]; 1060 int end = current_match[i * 2 + 1];
1025 DCHECK(start <= end); 1061 DCHECK(start <= end);
1026 Handle<String> substring = 1062 Handle<String> substring =
1027 isolate->factory()->NewSubString(subject, start, end); 1063 isolate->factory()->NewSubString(subject, start, end);
1028 elements->set(i, *substring); 1064 elements->set(cursor++, *substring);
1029 } else { 1065 } else {
1030 DCHECK(current_match[i * 2 + 1] < 0); 1066 DCHECK(current_match[i * 2 + 1] < 0);
1031 elements->set(i, isolate->heap()->undefined_value()); 1067 elements->set(cursor++, isolate->heap()->undefined_value());
1032 } 1068 }
1033 } 1069 }
1034 elements->set(capture_count + 1, Smi::FromInt(match_start)); 1070
1035 elements->set(capture_count + 2, *subject); 1071 elements->set(cursor++, Smi::FromInt(match_start));
1072 elements->set(cursor++, *subject);
1073
1074 if (has_named_captures) {
1075 Handle<FixedArray> capture_map =
1076 Handle<FixedArray>::cast(maybe_capture_map);
1077 Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1078 isolate, capture_map, [=](int ix) { return elements->get(ix); });
1079 elements->set(cursor++, *groups);
1080 }
1081
1082 DCHECK_EQ(cursor, argc);
1036 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements)); 1083 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1037 } else { 1084 } else {
1038 builder.Add(*match); 1085 builder.Add(*match);
1039 } 1086 }
1040 } 1087 }
1041 } 1088 }
1042 1089
1043 if (global_cache.HasException()) return isolate->heap()->exception(); 1090 if (global_cache.HasException()) return isolate->heap()->exception();
1044 1091
1045 if (match_start >= 0) { 1092 if (match_start >= 0) {
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
1077 return isolate->heap()->null_value(); // No matches at all. 1124 return isolate->heap()->null_value(); // No matches at all.
1078 } 1125 }
1079 } 1126 }
1080 1127
1081 // Legacy implementation of RegExp.prototype[Symbol.replace] which 1128 // Legacy implementation of RegExp.prototype[Symbol.replace] which
1082 // doesn't properly call the underlying exec method. 1129 // doesn't properly call the underlying exec method.
1083 MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate, 1130 MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
1084 Handle<JSRegExp> regexp, 1131 Handle<JSRegExp> regexp,
1085 Handle<String> string, 1132 Handle<String> string,
1086 Handle<Object> replace_obj) { 1133 Handle<Object> replace_obj) {
1134 // Functional fast-paths are dispatched directly by replace builtin.
1135 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1136 DCHECK(!replace_obj->IsCallable());
1137
1087 Factory* factory = isolate->factory(); 1138 Factory* factory = isolate->factory();
1088 1139
1089 const int flags = regexp->GetFlags(); 1140 const int flags = regexp->GetFlags();
1090 const bool global = (flags & JSRegExp::kGlobal) != 0; 1141 const bool global = (flags & JSRegExp::kGlobal) != 0;
1091 const bool sticky = (flags & JSRegExp::kSticky) != 0; 1142 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1092 1143
1093 // Functional fast-paths are dispatched directly by replace builtin.
1094 DCHECK(!replace_obj->IsCallable());
1095
1096 Handle<String> replace; 1144 Handle<String> replace;
1097 ASSIGN_RETURN_ON_EXCEPTION(isolate, replace, 1145 ASSIGN_RETURN_ON_EXCEPTION(isolate, replace,
1098 Object::ToString(isolate, replace_obj), String); 1146 Object::ToString(isolate, replace_obj), String);
1099 replace = String::Flatten(replace); 1147 replace = String::Flatten(replace);
1100 1148
1101 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info(); 1149 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1102 1150
1103 if (!global) { 1151 if (!global) {
1104 // Non-global regexp search, string replace. 1152 // Non-global regexp search, string replace.
1105 1153
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
1247 1295
1248 const int index = match_indices->Capture(0); 1296 const int index = match_indices->Capture(0);
1249 const int end_of_match = match_indices->Capture(1); 1297 const int end_of_match = match_indices->Capture(1);
1250 1298
1251 if (sticky) regexp->SetLastIndex(end_of_match); 1299 if (sticky) regexp->SetLastIndex(end_of_match);
1252 1300
1253 IncrementalStringBuilder builder(isolate); 1301 IncrementalStringBuilder builder(isolate);
1254 builder.AppendString(factory->NewSubString(subject, 0, index)); 1302 builder.AppendString(factory->NewSubString(subject, 0, index));
1255 1303
1256 // Compute the parameter list consisting of the match, captures, index, 1304 // Compute the parameter list consisting of the match, captures, index,
1257 // and subject for the replace function invocation. 1305 // and subject for the replace function invocation. If the RegExp contains
1306 // named captures, they are also passed as the last argument.
1307
1258 // The number of captures plus one for the match. 1308 // The number of captures plus one for the match.
1259 const int m = match_indices->NumberOfCaptureRegisters() / 2; 1309 const int m = match_indices->NumberOfCaptureRegisters() / 2;
1260 1310
1261 const int argc = m + 2; 1311 bool has_named_captures = false;
1312 Handle<FixedArray> capture_map;
1313 if (m > 1) {
1314 // The existence of capture groups implies IRREGEXP kind.
1315 DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
1316
1317 Object* maybe_capture_map = regexp->CaptureNameMap();
1318 if (maybe_capture_map->IsFixedArray()) {
1319 has_named_captures = true;
1320 capture_map = handle(FixedArray::cast(maybe_capture_map));
1321 }
1322 }
1323
1324 const int argc = has_named_captures ? m + 3 : m + 2;
1262 ScopedVector<Handle<Object>> argv(argc); 1325 ScopedVector<Handle<Object>> argv(argc);
1263 1326
1327 int cursor = 0;
1264 for (int j = 0; j < m; j++) { 1328 for (int j = 0; j < m; j++) {
1265 bool ok; 1329 bool ok;
1266 Handle<String> capture = 1330 Handle<String> capture =
1267 RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok); 1331 RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1268 if (ok) { 1332 if (ok) {
1269 argv[j] = capture; 1333 argv[cursor++] = capture;
1270 } else { 1334 } else {
1271 argv[j] = factory->undefined_value(); 1335 argv[cursor++] = factory->undefined_value();
1272 } 1336 }
1273 } 1337 }
1274 1338
1275 argv[argc - 2] = handle(Smi::FromInt(index), isolate); 1339 argv[cursor++] = handle(Smi::FromInt(index), isolate);
1276 argv[argc - 1] = subject; 1340 argv[cursor++] = subject;
1341
1342 if (has_named_captures) {
1343 argv[cursor++] = ConstructNamedCaptureGroupsObject(
1344 isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1345 }
1346
1347 DCHECK_EQ(cursor, argc);
1277 1348
1278 Handle<Object> replacement_obj; 1349 Handle<Object> replacement_obj;
1279 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1350 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1280 isolate, replacement_obj, 1351 isolate, replacement_obj,
1281 Execution::Call(isolate, replace_obj, factory->undefined_value(), argc, 1352 Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1282 argv.start())); 1353 argv.start()));
1283 1354
1284 Handle<String> replacement; 1355 Handle<String> replacement;
1285 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1356 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1286 isolate, replacement, Object::ToString(isolate, replacement_obj)); 1357 isolate, replacement, Object::ToString(isolate, replacement_obj));
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after
1571 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match, 1642 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1572 Object::ToString(isolate, match_obj)); 1643 Object::ToString(isolate, match_obj));
1573 1644
1574 const int match_length = match->length(); 1645 const int match_length = match->length();
1575 1646
1576 Handle<Object> position_obj; 1647 Handle<Object> position_obj;
1577 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1648 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1578 isolate, position_obj, 1649 isolate, position_obj,
1579 Object::GetProperty(result, factory->index_string())); 1650 Object::GetProperty(result, factory->index_string()));
1580 1651
1581 // TODO(jgruber): Extract and correct error handling. Since we can go up to
1582 // 2^53 - 1 (at least for ToLength), we might actually need uint64_t here?
1583 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1652 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1584 isolate, position_obj, Object::ToInteger(isolate, position_obj)); 1653 isolate, position_obj, Object::ToInteger(isolate, position_obj));
1585 const uint32_t position = 1654 const uint32_t position =
1586 std::min(PositiveNumberToUint32(*position_obj), length); 1655 std::min(PositiveNumberToUint32(*position_obj), length);
1587 1656
1588 ZoneVector<Handle<Object>> captures(&zone); 1657 std::vector<Handle<Object>> captures;
1658 captures.reserve(captures_length);
1659
1589 for (int n = 0; n < captures_length; n++) { 1660 for (int n = 0; n < captures_length; n++) {
1590 Handle<Object> capture; 1661 Handle<Object> capture;
1591 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1662 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1592 isolate, capture, Object::GetElement(isolate, result, n)); 1663 isolate, capture, Object::GetElement(isolate, result, n));
1593 1664
1594 if (!capture->IsUndefined(isolate)) { 1665 if (!capture->IsUndefined(isolate)) {
1595 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture, 1666 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1596 Object::ToString(isolate, capture)); 1667 Object::ToString(isolate, capture));
1597 } 1668 }
1598 captures.push_back(capture); 1669 captures.push_back(capture);
1599 } 1670 }
1600 1671
1672 Handle<Object> groups_obj;
1673 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1674 isolate, groups_obj,
1675 Object::GetProperty(result, factory->groups_string()));
1676
1677 const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1678
1601 Handle<String> replacement; 1679 Handle<String> replacement;
1602 if (functional_replace) { 1680 if (functional_replace) {
1603 const int argc = captures_length + 2; 1681 const int argc =
1682 has_named_captures ? captures_length + 3 : captures_length + 2;
1604 ScopedVector<Handle<Object>> argv(argc); 1683 ScopedVector<Handle<Object>> argv(argc);
1605 1684
1685 int cursor = 0;
1606 for (int j = 0; j < captures_length; j++) { 1686 for (int j = 0; j < captures_length; j++) {
1607 argv[j] = captures[j]; 1687 argv[cursor++] = captures[j];
1608 } 1688 }
1609 1689
1610 argv[captures_length] = handle(Smi::FromInt(position), isolate); 1690 argv[cursor++] = handle(Smi::FromInt(position), isolate);
1611 argv[captures_length + 1] = string; 1691 argv[cursor++] = string;
1692 if (has_named_captures) argv[cursor++] = groups_obj;
1693
1694 DCHECK_EQ(cursor, argc);
1612 1695
1613 Handle<Object> replacement_obj; 1696 Handle<Object> replacement_obj;
1614 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1697 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1615 isolate, replacement_obj, 1698 isolate, replacement_obj,
1616 Execution::Call(isolate, replace_obj, factory->undefined_value(), 1699 Execution::Call(isolate, replace_obj, factory->undefined_value(),
1617 argc, argv.start())); 1700 argc, argv.start()));
1618 1701
1619 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1702 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1620 isolate, replacement, Object::ToString(isolate, replacement_obj)); 1703 isolate, replacement, Object::ToString(isolate, replacement_obj));
1621 } else { 1704 } else {
1705 DCHECK(!functional_replace);
1622 VectorBackedMatch m(isolate, string, match, position, &captures); 1706 VectorBackedMatch m(isolate, string, match, position, &captures);
1623 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1707 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1624 isolate, replacement, String::GetSubstitution(isolate, &m, replace)); 1708 isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1625 } 1709 }
1626 1710
1627 if (position >= next_source_position) { 1711 if (position >= next_source_position) {
1628 builder.AppendString( 1712 builder.AppendString(
1629 factory->NewSubString(string, next_source_position, position)); 1713 factory->NewSubString(string, next_source_position, position));
1630 builder.AppendString(replacement); 1714 builder.AppendString(replacement);
1631 1715
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
1664 1748
1665 RUNTIME_FUNCTION(Runtime_IsRegExp) { 1749 RUNTIME_FUNCTION(Runtime_IsRegExp) {
1666 SealHandleScope shs(isolate); 1750 SealHandleScope shs(isolate);
1667 DCHECK_EQ(1, args.length()); 1751 DCHECK_EQ(1, args.length());
1668 CONVERT_ARG_CHECKED(Object, obj, 0); 1752 CONVERT_ARG_CHECKED(Object, obj, 0);
1669 return isolate->heap()->ToBoolean(obj->IsJSRegExp()); 1753 return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1670 } 1754 }
1671 1755
1672 } // namespace internal 1756 } // namespace internal
1673 } // namespace v8 1757 } // namespace v8
OLDNEW
« no previous file with comments | « src/objects-inl.h ('k') | test/mjsunit/harmony/regexp-named-captures.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698