Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(58)

Unified Diff: src/builtins/builtins-regexp.cc

Issue 2307863003: [regexp] Port RegExpMatch, RegExpSearch, and RegExpTest (Closed)
Patch Set: Rebase Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/builtins/builtins.h ('k') | src/heap-symbols.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/builtins/builtins-regexp.cc
diff --git a/src/builtins/builtins-regexp.cc b/src/builtins/builtins-regexp.cc
index cf95dd6fab138bab02a0ab781f86cc89d2d49015..88dd49c9fb3d420ee46c8263a1fea01c973e86d0 100644
--- a/src/builtins/builtins-regexp.cc
+++ b/src/builtins/builtins-regexp.cc
@@ -5,6 +5,7 @@
#include "src/builtins/builtins.h"
#include "src/builtins/builtins-utils.h"
+#include "src/regexp/jsregexp.h"
#include "src/string-builder.h"
namespace v8 {
@@ -321,13 +322,16 @@ const int kLastSubjectIndex = 1;
const int kLastInputIndex = 2;
const int kFirstCaptureIndex = 3;
-Handle<Object> GetLastMatchField(Isolate* isolate, int index) {
+Handle<JSObject> GetLastMatchInfo(Isolate* isolate) {
Handle<JSFunction> global_regexp = isolate->regexp_function();
Handle<Object> last_match_info_obj = JSReceiver::GetDataProperty(
global_regexp, isolate->factory()->regexp_last_match_info_symbol());
- Handle<JSReceiver> last_match_info =
- Handle<JSReceiver>::cast(last_match_info_obj);
+ return Handle<JSObject>::cast(last_match_info_obj);
+}
+
+Handle<Object> GetLastMatchField(Isolate* isolate, int index) {
+ Handle<JSObject> last_match_info = GetLastMatchInfo(isolate);
return JSReceiver::GetElement(isolate, last_match_info, index)
.ToHandleChecked();
}
@@ -459,5 +463,340 @@ BUILTIN(RegExpPrototypeRightContextGetter) {
return *isolate->factory()->NewSubString(last_subject, start_index, len);
}
+namespace {
+
+MaybeHandle<Object> SetLastIndex(Isolate* isolate, Handle<JSReceiver> regexp,
+ int value) {
+ return Object::SetProperty(regexp, isolate->factory()->lastIndex_string(),
+ handle(Smi::FromInt(value), isolate), SLOPPY);
+}
+
+Handle<JSArray> ConstructResult(Isolate* isolate, int size, int index,
+ Handle<String> input) {
+ Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
+ Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
+ Handle<JSObject> object =
+ isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED);
+ Handle<JSArray> array = Handle<JSArray>::cast(object);
+ array->set_elements(*elements);
+ array->set_length(Smi::FromInt(size));
+ // Write in-object properties after the length of the array.
+ array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex,
+ Smi::FromInt(index));
+ array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
+ return array;
+}
+
+Handle<Object> ReturnNewResultFromMatchInfo(Isolate* isolate,
+ Handle<Object> match_info,
+ Handle<String> string) {
+ const int num_captures = GetLastMatchNumberOfCaptures(isolate);
+ DCHECK_EQ(0, num_captures % 2);
+
+ const int num_results = num_captures / 2;
+ int start = GetLastMatchCapture(isolate, 0);
+ int end = GetLastMatchCapture(isolate, 1);
+
+ // Calculate the substring of the first match before creating the result array
+ // to avoid an unnecessary write barrier storing the first result.
+ Handle<String> first = isolate->factory()->NewSubString(string, start, end);
+ Handle<JSArray> result = ConstructResult(isolate, num_results, start, string);
+
+ Handle<FixedArray> elems =
+ handle(FixedArray::cast(result->elements()), isolate);
+ elems->set(0, *first);
+
+ for (int i = 1; i < num_results; i++) {
+ start = GetLastMatchCapture(isolate, i * 2);
+ if (start != -1) {
+ end = GetLastMatchCapture(isolate, i * 2 + 1);
+ Handle<String> capture =
+ isolate->factory()->NewSubString(string, start, end);
+ elems->set(i, *capture);
+ }
+ }
+
+ return result;
+}
+
+MaybeHandle<Object> RegExpExecJS(Isolate* isolate, Handle<JSRegExp> regexp,
+ Handle<String> string) {
+ Handle<Object> last_index_obj;
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, last_index_obj,
+ Object::GetProperty(regexp, isolate->factory()->lastIndex_string()),
+ Object);
+
+ // Conversion is required by the ES2015 specification (RegExpBuiltinExec
+ // algorithm, step 4) even if the value is discarded for non-global RegExps.
+ ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
+ Object::ToLength(isolate, last_index_obj), Object);
+
+ int last_index = Handle<Smi>::cast(last_index_obj)->value();
+
+ const int flags = regexp->GetFlags();
+ const bool global = (flags & JSRegExp::kGlobal) != 0;
+ const bool sticky = (flags & JSRegExp::kSticky) != 0;
+ const bool update_last_index = (global || sticky);
+
+ if (update_last_index) {
+ if (last_index > string->length()) {
+ RETURN_ON_EXCEPTION(isolate, SetLastIndex(isolate, regexp, 0), Object);
+ return isolate->factory()->null_value();
+ }
+ } else {
+ last_index = 0;
+ }
+
+ Handle<JSObject> last_match_info = GetLastMatchInfo(isolate);
+
+ // matchIndices is either null or the RegExpLastMatchInfo array.
+ // TODO(littledan): Whether a RegExp is sticky is compiled into the RegExp
+ // itself, but ES2015 allows monkey-patching this property to differ from
+ // the internal flags. If it differs, recompile a different RegExp?
+ // TODO(jgruber): The result of Exec does not need to be a JSArray.
+ Handle<Object> match_indices;
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, match_indices,
+ RegExpImpl::Exec(regexp, string, last_index, last_match_info), Object);
+
+ if (match_indices->IsNull(isolate)) {
+ RETURN_ON_EXCEPTION(isolate, SetLastIndex(isolate, regexp, 0), Object);
+ return isolate->factory()->null_value();
+ }
+
+ // Successful match.
+ if (update_last_index) {
+ last_index = GetLastMatchCapture(isolate, 1);
+ RETURN_ON_EXCEPTION(isolate, SetLastIndex(isolate, regexp, last_index),
+ Object);
+ }
+
+ return ReturnNewResultFromMatchInfo(isolate, match_indices, string);
+}
+
+// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
+// Also takes an optional exec method in case our caller
+// has already fetched exec.
+MaybeHandle<Object> RegExpExec(Isolate* isolate, Handle<JSReceiver> regexp,
+ Handle<String> string, Handle<Object> exec) {
+ if (exec->IsUndefined(isolate)) {
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, exec,
+ Object::GetProperty(regexp, isolate->factory()->exec_string()), Object);
+ }
+
+ if (exec->IsCallable()) {
+ const int argc = 1;
+ ScopedVector<Handle<Object>> argv(argc);
+ argv[0] = string;
+
+ Handle<Object> result;
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, result,
+ Execution::Call(isolate, exec, regexp, argc, argv.start()), Object);
+
+ if (!result->IsJSReceiver() && !result->IsNull(isolate)) {
+ THROW_NEW_ERROR(isolate,
+ NewTypeError(MessageTemplate::kInvalidRegExpExecResult),
+ Object);
+ }
+ return result;
+ }
+
+ if (!regexp->IsJSRegExp()) {
+ THROW_NEW_ERROR(isolate,
+ NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
+ isolate->factory()->NewStringFromAsciiChecked(
+ "RegExp.prototype.exec"),
+ regexp),
+ Object);
+ }
+
+ return RegExpExecJS(isolate, Handle<JSRegExp>::cast(regexp), string);
+}
+
+} // namespace
+
+// ES#sec-regexp.prototype.exec
+// RegExp.prototype.exec ( string )
+BUILTIN(RegExpPrototypeExec) {
+ HandleScope scope(isolate);
+ CHECK_RECEIVER(JSRegExp, regexp, "RegExp.prototype.exec");
+
+ Handle<Object> string_obj = args.atOrUndefined(isolate, 1);
+
+ Handle<String> string;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
+ Object::ToString(isolate, string_obj));
+
+ RETURN_RESULT_OR_FAILURE(isolate, RegExpExecJS(isolate, regexp, string));
+}
+
+// ES#sec-regexp.prototype.test
+// RegExp.prototype.test ( S )
+BUILTIN(RegExpPrototypeTest) {
+ HandleScope scope(isolate);
+ CHECK_RECEIVER(JSReceiver, recv, "RegExp.prototype.test");
+
+ Handle<Object> string_obj = args.atOrUndefined(isolate, 1);
+
+ Handle<String> string;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
+ Object::ToString(isolate, string_obj));
+
+ Handle<Object> result;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
+ isolate, result,
+ RegExpExec(isolate, recv, string, isolate->factory()->undefined_value()));
+
+ return isolate->heap()->ToBoolean(!result->IsNull(isolate));
+}
+
+namespace {
+
+// ES#sec-advancestringindex
+// AdvanceStringIndex ( S, index, unicode )
+int AdvanceStringIndex(Isolate* isolate, Handle<String> string, int index,
+ bool unicode) {
+ int increment = 1;
+
+ if (unicode && index < string->length()) {
+ const uint16_t first = string->Get(index);
+ if (first >= 0xD800 && first <= 0xDBFF && string->length() > index + 1) {
+ const uint16_t second = string->Get(index + 1);
+ if (second >= 0xDC00 && second <= 0xDFFF) {
+ increment = 2;
+ }
+ }
+ }
+
+ return increment;
+}
+
+MaybeHandle<Object> SetAdvancedStringIndex(Isolate* isolate,
+ Handle<JSReceiver> regexp,
+ Handle<String> string,
+ bool unicode) {
+ Handle<Object> last_index_obj;
+ ASSIGN_RETURN_ON_EXCEPTION(
+ isolate, last_index_obj,
+ Object::GetProperty(regexp, isolate->factory()->lastIndex_string()),
+ Object);
+
+ ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
+ Object::ToLength(isolate, last_index_obj), Object);
+
+ const int last_index = Handle<Smi>::cast(last_index_obj)->value();
+ const int new_last_index =
+ last_index + AdvanceStringIndex(isolate, string, last_index, unicode);
+
+ return SetLastIndex(isolate, regexp, new_last_index);
+}
+
+} // namespace
+
+// ES#sec-regexp.prototype-@@match
+// RegExp.prototype [ @@match ] ( string )
+BUILTIN(RegExpPrototypeMatch) {
+ HandleScope scope(isolate);
+ CHECK_RECEIVER(JSReceiver, recv, "RegExp.prototype.@@match");
+
+ Handle<Object> string_obj = args.atOrUndefined(isolate, 1);
+
+ Handle<String> string;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
+ Object::ToString(isolate, string_obj));
+
+ Handle<Object> global_obj;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
+ isolate, global_obj,
+ JSReceiver::GetProperty(recv, isolate->factory()->global_string()));
+ const bool global = global_obj->BooleanValue();
+
+ if (!global) {
+ RETURN_RESULT_OR_FAILURE(isolate,
+ RegExpExec(isolate, recv, string,
+ isolate->factory()->undefined_value()));
+ }
+
+ Handle<Object> unicode_obj;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
+ isolate, unicode_obj,
+ JSReceiver::GetProperty(recv, isolate->factory()->unicode_string()));
+ const bool unicode = unicode_obj->BooleanValue();
+
+ RETURN_FAILURE_ON_EXCEPTION(isolate, SetLastIndex(isolate, recv, 0));
+
+ static const int kInitialArraySize = 8;
+ Handle<FixedArray> elems =
+ isolate->factory()->NewFixedArrayWithHoles(kInitialArraySize);
+
+ int n = 0;
+ for (;; n++) {
+ Handle<Object> result;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
+ isolate, result, RegExpExec(isolate, recv, string,
+ isolate->factory()->undefined_value()));
+
+ if (result->IsNull(isolate)) {
+ if (n == 0) return isolate->heap()->null_value();
+ break;
+ }
+
+ Handle<Object> match_obj;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
+ Object::GetElement(isolate, result, 0));
+
+ Handle<String> match;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
+ Object::ToString(isolate, match_obj));
+
+ elems = FixedArray::SetAndGrow(elems, n, match);
+
+ if (match->length() == 0) {
+ RETURN_FAILURE_ON_EXCEPTION(
+ isolate, SetAdvancedStringIndex(isolate, recv, string, unicode));
+ }
+ }
+
+ elems->Shrink(n);
+ return *isolate->factory()->NewJSArrayWithElements(elems);
+}
+
+// ES#sec-regexp.prototype-@@search
+// RegExp.prototype [ @@search ] ( string )
+BUILTIN(RegExpPrototypeSearch) {
+ HandleScope scope(isolate);
+ CHECK_RECEIVER(JSReceiver, recv, "RegExp.prototype.@@search");
+
+ Handle<Object> string_obj = args.atOrUndefined(isolate, 1);
+
+ Handle<String> string;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
+ Object::ToString(isolate, string_obj));
+
+ Handle<Object> last_index_obj;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
+ isolate, last_index_obj,
+ Object::GetProperty(recv, isolate->factory()->lastIndex_string()));
+
+ RETURN_FAILURE_ON_EXCEPTION(isolate, SetLastIndex(isolate, recv, 0));
+
+ Handle<Object> result;
+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
+ isolate, result,
+ RegExpExec(isolate, recv, string, isolate->factory()->undefined_value()));
+
+ RETURN_FAILURE_ON_EXCEPTION(
+ isolate, Object::SetProperty(recv, isolate->factory()->lastIndex_string(),
+ last_index_obj, SLOPPY));
+
+ if (result->IsNull(isolate)) return Smi::FromInt(-1);
+
+ RETURN_RESULT_OR_FAILURE(
+ isolate, Object::GetProperty(result, isolate->factory()->index_string()));
+}
+
} // namespace internal
} // namespace v8
« no previous file with comments | « src/builtins/builtins.h ('k') | src/heap-symbols.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698