src/builtins/builtins-regexp.cc - Issue 2738413002: [regexp] Port RegExpExecStub to CSA (mostly)

Unified Diff: src/builtins/builtins-regexp.cc

Issue 2738413002: [regexp] Port RegExpExecStub to CSA (mostly) (Closed)

Patch Set: Fix arm64 cobbled code register Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/builtins/builtins-regexp.cc

diff --git a/src/builtins/builtins-regexp.cc b/src/builtins/builtins-regexp.cc

index 93cb55899b449f95cecd87cde850d823a1551bbc..75cd660c0819491f1f701da031205a2a6c733575 100644

--- a/src/builtins/builtins-regexp.cc

+++ b/src/builtins/builtins-regexp.cc

@@ -215,6 +215,285 @@ Node* RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo(

return result;

}

+void RegExpBuiltinsAssembler::GetStringPointers(

+ Node* const string, Node* const offset, Node* const last_index,

+ Node* const string_length, bool is_one_byte, Variable* var_string_start,

+ Variable* var_string_end) {

+ DCHECK_EQ(var_string_start->rep(), MachineType::PointerRepresentation());

+ DCHECK_EQ(var_string_end->rep(), MachineType::PointerRepresentation());

+ STATIC_ASSERT(SeqOneByteString::kHeaderSize == SeqTwoByteString::kHeaderSize);

+ const int kHeaderSize = SeqOneByteString::kHeaderSize - kHeapObjectTag;

+ const ElementsKind kind = is_one_byte ? UINT8_ELEMENTS : UINT16_ELEMENTS;

+ Node* const from_offset = ElementOffsetFromIndex(

+ IntPtrAdd(offset, last_index), kind, INTPTR_PARAMETERS, kHeaderSize);

+ var_string_start->Bind(IntPtrAdd(string, from_offset));

+ Node* const to_offset = ElementOffsetFromIndex(

+ IntPtrAdd(offset, string_length), kind, INTPTR_PARAMETERS, kHeaderSize);

+ var_string_end->Bind(IntPtrAdd(string, to_offset));

+Node* RegExpBuiltinsAssembler::IrregexpExec(Node* const context,

+ Node* const regexp,

+ Node* const string,

+ Node* const last_index,

+ Node* const match_info) {

+// Just jump directly to runtime if native RegExp is not selected at compile

+// time or if regexp entry in generated code is turned off runtime switch or

+// at compilation.

+#ifdef V8_INTERPRETED_REGEXP

+ return CallRuntime(Runtime::kRegExpExec, context, regexp, string, last_index,

+ match_info);

+#else // V8_INTERPRETED_REGEXP

+ CSA_ASSERT(this, TaggedIsNotSmi(regexp));

+ CSA_ASSERT(this, HasInstanceType(regexp, JS_REGEXP_TYPE));

+ CSA_ASSERT(this, TaggedIsNotSmi(string));

+ CSA_ASSERT(this, IsString(string));

+ CSA_ASSERT(this, IsHeapNumberMap(LoadReceiverMap(last_index)));

+ CSA_ASSERT(this, IsFixedArrayMap(LoadReceiverMap(match_info)));

+ Node* const int_zero = IntPtrConstant(0);

+ Variable var_result(this, MachineRepresentation::kTagged);

+ Variable var_string(this, MachineType::PointerRepresentation(), int_zero);

+ Variable var_string_offset(this, MachineType::PointerRepresentation(),

+ int_zero);

+ Variable var_string_instance_type(this, MachineRepresentation::kWord32,

+ Int32Constant(0));

+ Label out(this), runtime(this, Label::kDeferred);

+ // External constants.

+ Node* const regexp_stack_memory_size_address = ExternalConstant(

+ ExternalReference::address_of_regexp_stack_memory_size(isolate()));

+ Node* const static_offsets_vector_address = ExternalConstant(

+ ExternalReference::address_of_static_offsets_vector(isolate()));

+ Node* const pending_exception_address = ExternalConstant(

+ ExternalReference(Isolate::kPendingExceptionAddress, isolate()));

+ // Ensure that a RegExp stack is allocated.

+ {

+ Node* const stack_size =

+ Load(MachineType::IntPtr(), regexp_stack_memory_size_address);

+ GotoIf(IntPtrEqual(stack_size, int_zero), &runtime);

+ }

+ Node* const data = LoadObjectField(regexp, JSRegExp::kDataOffset);

+ {

+ // Check that the RegExp has been compiled (data contains a fixed array).

+ CSA_ASSERT(this, TaggedIsNotSmi(data));

+ CSA_ASSERT(this, HasInstanceType(data, FIXED_ARRAY_TYPE));

+ // Check the type of the RegExp. Only continue if type is

+ // JSRegExp::IRREGEXP.

+ Node* const tag = LoadFixedArrayElement(data, JSRegExp::kTagIndex);

+ GotoIfNot(SmiEqual(tag, SmiConstant(JSRegExp::IRREGEXP)), &runtime);

+ // Check (number_of_captures + 1) * 2 <= offsets vector size

+ // Or number_of_captures <= offsets vector size / 2 - 1

+ Node* const capture_count =

+ LoadFixedArrayElement(data, JSRegExp::kIrregexpCaptureCountIndex);

+ CSA_ASSERT(this, TaggedIsSmi(capture_count));

+ STATIC_ASSERT(Isolate::kJSRegexpStaticOffsetsVectorSize >= 2);

+ GotoIf(SmiAbove(

+ capture_count,

+ SmiConstant(Isolate::kJSRegexpStaticOffsetsVectorSize / 2 - 1)),

+ &runtime);

+ }

+ // Unpack the string if possible.

+ var_string.Bind(BitcastTaggedToWord(string));

+ var_string_offset.Bind(int_zero);

+ var_string_instance_type.Bind(LoadInstanceType(string));

+ {

+ TryUnpackString(&var_string, &var_string_offset, &var_string_instance_type,

+ &runtime);

+ // At this point, {var_string} may contain a faked sequential string (i.e.

+ // an external string with an adjusted offset) so we cannot assert

+ // IsString({var_string}). We also cannot allocate after this point since

+ // GC could move {var_string}'s underlying string.

+ }

+ Node* const smi_string_length = LoadStringLength(string);

+ // Bail out to runtime for invalid {last_index} values.

+ GotoIfNot(TaggedIsSmi(last_index), &runtime);

+ GotoIf(SmiAboveOrEqual(last_index, smi_string_length), &runtime);

+ // Load the irregexp code object and offsets into the subject string. Both

+ // depend on whether the string is one- or two-byte.

+ Node* const int_last_index = SmiUntag(last_index);

+ Variable var_string_start(this, MachineType::PointerRepresentation());

+ Variable var_string_end(this, MachineType::PointerRepresentation());

+ Variable var_code(this, MachineRepresentation::kTagged);

+ {

+ Node* const int_string_length = SmiUntag(smi_string_length);

+ Node* const string_instance_type = var_string_instance_type.value();

+ CSA_ASSERT(this, IsSequentialStringInstanceType(string_instance_type));

+ Label next(this), if_isonebyte(this), if_istwobyte(this, Label::kDeferred);

+ Branch(IsOneByteStringInstanceType(string_instance_type), &if_isonebyte,

+ &if_istwobyte);

+ Bind(&if_isonebyte);

+ {

+ const bool kIsOneByte = true;

+ GetStringPointers(var_string.value(), var_string_offset.value(),

+ int_last_index, int_string_length, kIsOneByte,

+ &var_string_start, &var_string_end);

+ var_code.Bind(

+ LoadFixedArrayElement(data, JSRegExp::kIrregexpLatin1CodeIndex));

+ Goto(&next);

+ }

+ Bind(&if_istwobyte);

+ {

+ const bool kIsOneByte = false;

+ GetStringPointers(var_string.value(), var_string_offset.value(),

+ int_last_index, int_string_length, kIsOneByte,

+ &var_string_start, &var_string_end);

+ var_code.Bind(

+ LoadFixedArrayElement(data, JSRegExp::kIrregexpUC16CodeIndex));

+ Goto(&next);

+ }

+ Bind(&next);

+ }

+ // Check that the irregexp code has been generated for the actual string

+ // encoding. If it has, the field contains a code object otherwise it contains

+ // smi (code flushing support).

+ Node* const code = var_code.value();

+ GotoIf(TaggedIsSmi(code), &runtime);

+ CSA_ASSERT(this, HasInstanceType(code, CODE_TYPE));

+ Label if_success(this), if_failure(this),

+ if_exception(this, Label::kDeferred);

+ {

+ IncrementCounter(isolate()->counters()->regexp_entry_native(), 1);

+ Callable exec_callable = CodeFactory::RegExpExec(isolate());

Igor Sheludko 2017/03/15 00:39:04 I'd rather name the stub and this function the oth

jgruber 2017/03/15 13:43:44 Renamed the function RegExpExecInternal. The stub

+ Node* const result = CallStub(

+ exec_callable, context, string, TruncateWordToWord32(int_last_index),

+ var_string_start.value(), var_string_end.value(), code);

+ // Check the result.

+ // We expect exactly one result since we force the called regexp to behave

Igor Sheludko 2017/03/15 00:39:03 s/we force/the stub forces/ or maybe add a comment

jgruber 2017/03/15 13:43:44 Done.

+ // as non-global.

+ GotoIf(SmiEqual(result, SmiConstant(1)), &if_success);

+ GotoIf(SmiEqual(result, SmiConstant(NativeRegExpMacroAssembler::FAILURE)),

+ &if_failure);

+ GotoIf(SmiEqual(result, SmiConstant(NativeRegExpMacroAssembler::EXCEPTION)),

+ &if_exception);

+ CSA_ASSERT(

+ this, SmiEqual(result, SmiConstant(NativeRegExpMacroAssembler::RETRY)));

+ Goto(&runtime);

+ }

+ Bind(&if_success);

+ {

+ // Check that the last match info has space for the capture registers and

+ // the additional information. Ensure no overflow in add.

+ STATIC_ASSERT(FixedArray::kMaxLength < kMaxInt - FixedArray::kLengthOffset);

+ Node* const available_slots =

+ SmiSub(LoadFixedArrayBaseLength(match_info),

+ SmiConstant(RegExpMatchInfo::kLastMatchOverhead));

+ Node* const capture_count =

+ LoadFixedArrayElement(data, JSRegExp::kIrregexpCaptureCountIndex);

Igor Sheludko 2017/03/15 00:39:04 Maybe // Calculate number of capture registers (nu

jgruber 2017/03/15 13:43:44 Done.

+ Node* const register_count =

+ SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);

+ GotoIf(SmiGreaterThan(register_count, available_slots), &runtime);

+ // Fill match_info.

+ StoreFixedArrayElement(match_info, RegExpMatchInfo::kNumberOfCapturesIndex,

+ register_count, SKIP_WRITE_BARRIER);

+ StoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex,

+ string);

+ StoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex,

+ string);

+ // Fill match and capture offsets in match_info.

+ {

+ Node* const limit_offset = ElementOffsetFromIndex(

+ register_count, INT32_ELEMENTS, SMI_PARAMETERS, 0);

+ Node* const to_offset = ElementOffsetFromIndex(

+ IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex), FAST_ELEMENTS,

+ INTPTR_PARAMETERS, RegExpMatchInfo::kHeaderSize - kHeapObjectTag);

+ Variable var_to_offset(this, MachineType::PointerRepresentation(),

+ to_offset);

+ VariableList vars({&var_to_offset}, zone());

+ BuildFastLoop(

+ vars, int_zero, limit_offset,

+ [this, static_offsets_vector_address, match_info,

Igor Sheludko 2017/03/15 00:39:04 [=, &var_to_offset]

jgruber 2017/03/15 13:43:44 Done.

+ &var_to_offset](Node* offset) {

+ Node* const value = Load(MachineType::Int32(),

+ static_offsets_vector_address, offset);

+ Node* const smi_value = SmiFromWord32(value);

+ StoreNoWriteBarrier(MachineRepresentation::kTagged, match_info,

+ var_to_offset.value(), smi_value);

+ Increment(var_to_offset, kPointerSize);

+ },

+ kInt32Size, INTPTR_PARAMETERS, IndexAdvanceMode::kPost);

+ }

+ var_result.Bind(match_info);

+ Goto(&out);

+ }

+ Bind(&if_failure);

+ {

+ var_result.Bind(NullConstant());

+ Goto(&out);

+ }

+ Bind(&if_exception);

+ {

+ Node* const pending_exception =

+ Load(MachineType::AnyTagged(), pending_exception_address);

+ // If there is no pending exception, a

Igor Sheludko 2017/03/15 00:39:03 Please reformat the comment.

jgruber 2017/03/15 13:43:44 Done.

+ // stack overflow (on the backtrack stack) was detected in RegExp code.

+ Label stack_overflow(this), rethrow(this);

+ Branch(IsTheHole(pending_exception), &stack_overflow, &rethrow);

Igor Sheludko 2017/03/15 00:39:03 AFAICT the only non-gc-safe thing the Irregexp cod

jgruber 2017/03/15 13:43:44 Great find. The initial goal was to completely rem

+ Bind(&stack_overflow);

+ TailCallRuntime(Runtime::kThrowStackOverflow, context);

+ Bind(&rethrow);

+ TailCallRuntime(Runtime::kRegExpExecReThrow, context);

+ }

+ Bind(&runtime);

+ {

+ Node* const result = CallRuntime(Runtime::kRegExpExec, context, regexp,

+ string, last_index, match_info);

+ var_result.Bind(result);

+ Goto(&out);

+ }

+ Bind(&out);

+ return var_result.value();

+#endif // V8_INTERPRETED_REGEXP

// ES#sec-regexp.prototype.exec

// RegExp.prototype.exec ( string )

// Implements the core of RegExp.prototype.exec but without actually

@@ -311,9 +590,8 @@ Node* RegExpBuiltinsAssembler::RegExpPrototypeExecBodyWithoutResult(

native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);

// Call the exec stub.

- Callable exec_callable = CodeFactory::RegExpExec(isolate);

- match_indices = CallStub(exec_callable, context, regexp, string,

- var_lastindex.value(), last_match_info);

+ match_indices = IrregexpExec(context, regexp, string, var_lastindex.value(),

jgruber 2017/03/13 12:27:37 This will add even more to the snapshot size. It's

Igor Sheludko 2017/03/15 00:39:03 Why not leave this code in the stub in this CL?

jgruber 2017/03/15 13:43:44 I have a follow-up CL that sets up a couple more s

+ last_match_info);

var_result.Bind(match_indices);

// {match_indices} is either null or the RegExpMatchInfo array.

@@ -1810,8 +2088,6 @@ void RegExpBuiltinsAssembler::RegExpPrototypeSplitBody(Node* const context,

Node* const regexp,

Node* const string,

Node* const limit) {

- Isolate* isolate = this->isolate();

Node* const null = NullConstant();

Node* const smi_zero = SmiConstant(0);

Node* const int_zero = IntPtrConstant(0);

@@ -1846,9 +2122,8 @@ void RegExpBuiltinsAssembler::RegExpPrototypeSplitBody(Node* const context,

Node* const last_match_info = LoadContextElement(

native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);

- Callable exec_callable = CodeFactory::RegExpExec(isolate);

- Node* const match_indices = CallStub(exec_callable, context, regexp,

- string, smi_zero, last_match_info);

+ Node* const match_indices =

+ IrregexpExec(context, regexp, string, smi_zero, last_match_info);

Label return_singleton_array(this);

Branch(WordEqual(match_indices, null), &return_singleton_array,

@@ -1906,9 +2181,8 @@ void RegExpBuiltinsAssembler::RegExpPrototypeSplitBody(Node* const context,

Node* const last_match_info = LoadContextElement(

native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);

- Callable exec_callable = CodeFactory::RegExpExec(isolate);

- Node* const match_indices = CallStub(exec_callable, context, regexp, string,

- next_search_from, last_match_info);

+ Node* const match_indices = IrregexpExec(context, regexp, string,

+ next_search_from, last_match_info);

// We're done if no match was found.

{

@@ -2555,9 +2829,8 @@ TF_BUILTIN(RegExpInternalMatch, RegExpBuiltinsAssembler) {

Node* const internal_match_info = LoadContextElement(

native_context, Context::REGEXP_INTERNAL_MATCH_INFO_INDEX);

- Callable exec_callable = CodeFactory::RegExpExec(isolate());

- Node* const match_indices = CallStub(exec_callable, context, regexp, string,

- smi_zero, internal_match_info);

+ Node* const match_indices =

+ IrregexpExec(context, regexp, string, smi_zero, internal_match_info);

Label if_matched(this), if_didnotmatch(this);

Branch(WordEqual(match_indices, null), &if_didnotmatch, &if_matched);

« no previous file with comments | « src/builtins/builtins-regexp.h ('k') | src/code-stub-assembler.h » ('j') | src/code-stub-assembler.cc » ('J')