src/runtime/runtime-regexp.cc - Issue 604703004: Move string-related runtime functions into separate files.

Unified Diff: src/runtime/runtime-regexp.cc

Issue 604703004: Move string-related runtime functions into separate files. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/runtime/runtime-regexp.cc

diff --git a/src/runtime/runtime-regexp.cc b/src/runtime/runtime-regexp.cc

new file mode 100644

index 0000000000000000000000000000000000000000..e96d50195a80aaa8233ed3da9b89282bfd0aa469

--- /dev/null

+++ b/src/runtime/runtime-regexp.cc

@@ -0,0 +1,1131 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "src/v8.h"

+#include "src/arguments.h"

+#include "src/jsregexp-inl.h"

+#include "src/jsregexp.h"

+#include "src/runtime/runtime.h"

+#include "src/runtime/runtime-utils.h"

+#include "src/runtime/string-builder.h"

+#include "src/string-search.h"

+namespace v8 {

+namespace internal {

+class CompiledReplacement {

+ public:

+ explicit CompiledReplacement(Zone* zone)

+ : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}

+ // Return whether the replacement is simple.

+ bool Compile(Handle<String> replacement, int capture_count,

+ int subject_length);

+ // Use Apply only if Compile returned false.

+ void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,

+ int32_t* match);

+ // Number of distinct parts of the replacement pattern.

+ int parts() { return parts_.length(); }

+ Zone* zone() const { return zone_; }

+ private:

+ enum PartType {

+ SUBJECT_PREFIX = 1,

+ SUBJECT_SUFFIX,

+ SUBJECT_CAPTURE,

+ REPLACEMENT_SUBSTRING,

+ REPLACEMENT_STRING,

+ NUMBER_OF_PART_TYPES

+ };

+ struct ReplacementPart {

+ static inline ReplacementPart SubjectMatch() {

+ return ReplacementPart(SUBJECT_CAPTURE, 0);

+ }

+ static inline ReplacementPart SubjectCapture(int capture_index) {

+ return ReplacementPart(SUBJECT_CAPTURE, capture_index);

+ }

+ static inline ReplacementPart SubjectPrefix() {

+ return ReplacementPart(SUBJECT_PREFIX, 0);

+ }

+ static inline ReplacementPart SubjectSuffix(int subject_length) {

+ return ReplacementPart(SUBJECT_SUFFIX, subject_length);

+ }

+ static inline ReplacementPart ReplacementString() {

+ return ReplacementPart(REPLACEMENT_STRING, 0);

+ }

+ static inline ReplacementPart ReplacementSubString(int from, int to) {

+ DCHECK(from >= 0);

+ DCHECK(to > from);

+ return ReplacementPart(-from, to);

+ }

+ // If tag <= 0 then it is the negation of a start index of a substring of

+ // the replacement pattern, otherwise it's a value from PartType.

+ ReplacementPart(int tag, int data) : tag(tag), data(data) {

+ // Must be non-positive or a PartType value.

+ DCHECK(tag < NUMBER_OF_PART_TYPES);

+ }

+ // Either a value of PartType or a non-positive number that is

+ // the negation of an index into the replacement string.

+ int tag;

+ // The data value's interpretation depends on the value of tag:

+ // tag == SUBJECT_PREFIX ||

+ // tag == SUBJECT_SUFFIX: data is unused.

+ // tag == SUBJECT_CAPTURE: data is the number of the capture.

+ // tag == REPLACEMENT_SUBSTRING ||

+ // tag == REPLACEMENT_STRING: data is index into array of substrings

+ // of the replacement string.

+ // tag <= 0: Temporary representation of the substring of the replacement

+ // string ranging over -tag .. data.

+ // Is replaced by REPLACEMENT_{SUB,}STRING when we create the

+ // substring objects.

+ int data;

+ };

+ template <typename Char>

+ bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,

+ Vector<Char> characters, int capture_count,

+ int subject_length, Zone* zone) {

+ int length = characters.length();

+ int last = 0;

+ for (int i = 0; i < length; i++) {

+ Char c = characters[i];

+ if (c == '$') {

+ int next_index = i + 1;

+ if (next_index == length) { // No next character!

+ break;

+ }

+ Char c2 = characters[next_index];

+ switch (c2) {

+ case '$':

+ if (i > last) {

+ // There is a substring before. Include the first "$".

+ parts->Add(

+ ReplacementPart::ReplacementSubString(last, next_index),

+ zone);

+ last = next_index + 1; // Continue after the second "$".

+ } else {

+ // Let the next substring start with the second "$".

+ last = next_index;

+ }

+ i = next_index;

+ break;

+ case '`':

+ if (i > last) {

+ parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);

+ }

+ parts->Add(ReplacementPart::SubjectPrefix(), zone);

+ i = next_index;

+ last = i + 1;

+ break;

+ case '\'':

+ if (i > last) {

+ parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);

+ }

+ parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);

+ i = next_index;

+ last = i + 1;

+ break;

+ case '&':

+ if (i > last) {

+ parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);

+ }

+ parts->Add(ReplacementPart::SubjectMatch(), zone);

+ i = next_index;

+ last = i + 1;

+ break;

+ case '0':

+ case '1':

+ case '2':

+ case '3':

+ case '4':

+ case '5':

+ case '6':

+ case '7':

+ case '8':

+ case '9': {

+ int capture_ref = c2 - '0';

+ if (capture_ref > capture_count) {

+ i = next_index;

+ continue;

+ }

+ int second_digit_index = next_index + 1;

+ if (second_digit_index < length) {

+ // Peek ahead to see if we have two digits.

+ Char c3 = characters[second_digit_index];

+ if ('0' <= c3 && c3 <= '9') { // Double digits.

+ int double_digit_ref = capture_ref * 10 + c3 - '0';

+ if (double_digit_ref <= capture_count) {

+ next_index = second_digit_index;

+ capture_ref = double_digit_ref;

+ }

+ if (capture_ref > 0) {

+ if (i > last) {

+ parts->Add(ReplacementPart::ReplacementSubString(last, i),

+ zone);

+ }

+ DCHECK(capture_ref <= capture_count);

+ parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);

+ last = next_index + 1;

+ }

+ i = next_index;

+ break;

+ }

+ default:

+ i = next_index;

+ break;

+ }

+ if (length > last) {

+ if (last == 0) {

+ // Replacement is simple. Do not use Apply to do the replacement.

+ return true;

+ } else {

+ parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);

+ }

+ return false;

+ }

+ ZoneList<ReplacementPart> parts_;

+ ZoneList<Handle<String> > replacement_substrings_;

+ Zone* zone_;

+};

+bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,

+ int subject_length) {

+ {

+ DisallowHeapAllocation no_gc;

+ String::FlatContent content = replacement->GetFlatContent();

+ DCHECK(content.IsFlat());

+ bool simple = false;

+ if (content.IsOneByte()) {

+ simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),

+ capture_count, subject_length, zone());

+ } else {

+ DCHECK(content.IsTwoByte());

+ simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),

+ capture_count, subject_length, zone());

+ }

+ if (simple) return true;

+ }

+ Isolate* isolate = replacement->GetIsolate();

+ // Find substrings of replacement string and create them as String objects.

+ int substring_index = 0;

+ for (int i = 0, n = parts_.length(); i < n; i++) {

+ int tag = parts_[i].tag;

+ if (tag <= 0) { // A replacement string slice.

+ int from = -tag;

+ int to = parts_[i].data;

+ replacement_substrings_.Add(

+ isolate->factory()->NewSubString(replacement, from, to), zone());

+ parts_[i].tag = REPLACEMENT_SUBSTRING;

+ parts_[i].data = substring_index;

+ substring_index++;

+ } else if (tag == REPLACEMENT_STRING) {

+ replacement_substrings_.Add(replacement, zone());

+ parts_[i].data = substring_index;

+ substring_index++;

+ }

+ return false;

+void CompiledReplacement::Apply(ReplacementStringBuilder* builder,

+ int match_from, int match_to, int32_t* match) {

+ DCHECK_LT(0, parts_.length());

+ for (int i = 0, n = parts_.length(); i < n; i++) {

+ ReplacementPart part = parts_[i];

+ switch (part.tag) {

+ case SUBJECT_PREFIX:

+ if (match_from > 0) builder->AddSubjectSlice(0, match_from);

+ break;

+ case SUBJECT_SUFFIX: {

+ int subject_length = part.data;

+ if (match_to < subject_length) {

+ builder->AddSubjectSlice(match_to, subject_length);

+ }

+ break;

+ }

+ case SUBJECT_CAPTURE: {

+ int capture = part.data;

+ int from = match[capture * 2];

+ int to = match[capture * 2 + 1];

+ if (from >= 0 && to > from) {

+ builder->AddSubjectSlice(from, to);

+ }

+ break;

+ }

+ case REPLACEMENT_SUBSTRING:

+ case REPLACEMENT_STRING:

+ builder->AddString(replacement_substrings_[part.data]);

+ break;

+ default:

+ UNREACHABLE();

+ }

+void FindOneByteStringIndices(Vector<const uint8_t> subject, char pattern,

+ ZoneList<int>* indices, unsigned int limit,

+ Zone* zone) {

+ DCHECK(limit > 0);

+ // Collect indices of pattern in subject using memchr.

+ // Stop after finding at most limit values.

+ const uint8_t* subject_start = subject.start();

+ const uint8_t* subject_end = subject_start + subject.length();

+ const uint8_t* pos = subject_start;

+ while (limit > 0) {

+ pos = reinterpret_cast<const uint8_t*>(

+ memchr(pos, pattern, subject_end - pos));

+ if (pos == NULL) return;

+ indices->Add(static_cast<int>(pos - subject_start), zone);

+ pos++;

+ limit--;

+ }

+void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,

+ ZoneList<int>* indices, unsigned int limit,

+ Zone* zone) {

+ DCHECK(limit > 0);

+ const uc16* subject_start = subject.start();

+ const uc16* subject_end = subject_start + subject.length();

+ for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {

+ if (*pos == pattern) {

+ indices->Add(static_cast<int>(pos - subject_start), zone);

+ limit--;

+ }

+template <typename SubjectChar, typename PatternChar>

+void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,

+ Vector<const PatternChar> pattern,

+ ZoneList<int>* indices, unsigned int limit, Zone* zone) {

+ DCHECK(limit > 0);

+ // Collect indices of pattern in subject.

+ // Stop after finding at most limit values.

+ int pattern_length = pattern.length();

+ int index = 0;

+ StringSearch<PatternChar, SubjectChar> search(isolate, pattern);

+ while (limit > 0) {

+ index = search.Search(subject, index);

+ if (index < 0) return;

+ indices->Add(index, zone);

+ index += pattern_length;

+ limit--;

+ }

+void FindStringIndicesDispatch(Isolate* isolate, String* subject,

+ String* pattern, ZoneList<int>* indices,

+ unsigned int limit, Zone* zone) {

+ {

+ DisallowHeapAllocation no_gc;

+ String::FlatContent subject_content = subject->GetFlatContent();

+ String::FlatContent pattern_content = pattern->GetFlatContent();

+ DCHECK(subject_content.IsFlat());

+ DCHECK(pattern_content.IsFlat());

+ if (subject_content.IsOneByte()) {

+ Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();

+ if (pattern_content.IsOneByte()) {

+ Vector<const uint8_t> pattern_vector =

+ pattern_content.ToOneByteVector();

+ if (pattern_vector.length() == 1) {

+ FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,

+ limit, zone);

+ } else {

+ FindStringIndices(isolate, subject_vector, pattern_vector, indices,

+ limit, zone);

+ }

+ } else {

+ FindStringIndices(isolate, subject_vector,

+ pattern_content.ToUC16Vector(), indices, limit, zone);

+ }

+ } else {

+ Vector<const uc16> subject_vector = subject_content.ToUC16Vector();

+ if (pattern_content.IsOneByte()) {

+ Vector<const uint8_t> pattern_vector =

+ pattern_content.ToOneByteVector();

+ if (pattern_vector.length() == 1) {

+ FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,

+ limit, zone);

+ } else {

+ FindStringIndices(isolate, subject_vector, pattern_vector, indices,

+ limit, zone);

+ }

+ } else {

+ Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();

+ if (pattern_vector.length() == 1) {

+ FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,

+ limit, zone);

+ } else {

+ FindStringIndices(isolate, subject_vector, pattern_vector, indices,

+ limit, zone);

+ }

+template <typename ResultSeqString>

+MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(

+ Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,

+ Handle<String> replacement, Handle<JSArray> last_match_info) {

+ DCHECK(subject->IsFlat());

+ DCHECK(replacement->IsFlat());

+ ZoneScope zone_scope(isolate->runtime_zone());

+ ZoneList<int> indices(8, zone_scope.zone());

+ DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());

+ String* pattern =

+ String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));

+ int subject_len = subject->length();

+ int pattern_len = pattern->length();

+ int replacement_len = replacement->length();

+ FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,

+ zone_scope.zone());

+ int matches = indices.length();

+ if (matches == 0) return *subject;

+ // Detect integer overflow.

+ int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -

+ static_cast<int64_t>(pattern_len)) *

+ static_cast<int64_t>(matches) +

+ static_cast<int64_t>(subject_len);

+ int result_len;

+ if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {

+ STATIC_ASSERT(String::kMaxLength < kMaxInt);

+ result_len = kMaxInt; // Provoke exception.

+ } else {

+ result_len = static_cast<int>(result_len_64);

+ }

+ int subject_pos = 0;

+ int result_pos = 0;

+ MaybeHandle<SeqString> maybe_res;

+ if (ResultSeqString::kHasOneByteEncoding) {

+ maybe_res = isolate->factory()->NewRawOneByteString(result_len);

+ } else {

+ maybe_res = isolate->factory()->NewRawTwoByteString(result_len);

+ }

+ Handle<SeqString> untyped_res;

+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);

+ Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);

+ for (int i = 0; i < matches; i++) {

+ // Copy non-matched subject content.

+ if (subject_pos < indices.at(i)) {

+ String::WriteToFlat(*subject, result->GetChars() + result_pos,

+ subject_pos, indices.at(i));

+ result_pos += indices.at(i) - subject_pos;

+ }

+ // Replace match.

+ if (replacement_len > 0) {

+ String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,

+ replacement_len);

+ result_pos += replacement_len;

+ }

+ subject_pos = indices.at(i) + pattern_len;

+ }

+ // Add remaining subject content at the end.

+ if (subject_pos < subject_len) {

+ String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,

+ subject_len);

+ }

+ int32_t match_indices[] = {indices.at(matches - 1),

+ indices.at(matches - 1) + pattern_len};

+ RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);

+ return *result;

+MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(

+ Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,

+ Handle<String> replacement, Handle<JSArray> last_match_info) {

+ DCHECK(subject->IsFlat());

+ DCHECK(replacement->IsFlat());

+ int capture_count = regexp->CaptureCount();

+ int subject_length = subject->length();

+ // CompiledReplacement uses zone allocation.

+ ZoneScope zone_scope(isolate->runtime_zone());

+ CompiledReplacement compiled_replacement(zone_scope.zone());

+ bool simple_replace =

+ compiled_replacement.Compile(replacement, capture_count, subject_length);

+ // Shortcut for simple non-regexp global replacements

+ if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {

+ if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {

+ return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(

+ isolate, subject, regexp, replacement, last_match_info);

+ } else {

+ return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(

+ isolate, subject, regexp, replacement, last_match_info);

+ }

+ RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);

+ if (global_cache.HasException()) return isolate->heap()->exception();

+ int32_t* current_match = global_cache.FetchNext();

+ if (current_match == NULL) {

+ if (global_cache.HasException()) return isolate->heap()->exception();

+ return *subject;

+ }

+ // Guessing the number of parts that the final result string is built

+ // from. Global regexps can match any number of times, so we guess

+ // conservatively.

+ int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;

+ ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);

+ // Number of parts added by compiled replacement plus preceeding

+ // string and possibly suffix after last match. It is possible for

+ // all components to use two elements when encoded as two smis.

+ const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);

+ int prev = 0;

+ do {

+ builder.EnsureCapacity(parts_added_per_loop);

+ int start = current_match[0];

+ int end = current_match[1];

+ if (prev < start) {

+ builder.AddSubjectSlice(prev, start);

+ }

+ if (simple_replace) {

+ builder.AddString(replacement);

+ } else {

+ compiled_replacement.Apply(&builder, start, end, current_match);

+ }

+ prev = end;

+ current_match = global_cache.FetchNext();

+ } while (current_match != NULL);

+ if (global_cache.HasException()) return isolate->heap()->exception();

+ if (prev < subject_length) {

+ builder.EnsureCapacity(2);

+ builder.AddSubjectSlice(prev, subject_length);

+ }

+ RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,

+ global_cache.LastSuccessfulMatch());

+ Handle<String> result;

+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, builder.ToString());

+ return *result;

+template <typename ResultSeqString>

+MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(

+ Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,

+ Handle<JSArray> last_match_info) {

+ DCHECK(subject->IsFlat());

+ // Shortcut for simple non-regexp global replacements

+ if (regexp->TypeTag() == JSRegExp::ATOM) {

+ Handle<String> empty_string = isolate->factory()->empty_string();

+ if (subject->IsOneByteRepresentation()) {

+ return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(

+ isolate, subject, regexp, empty_string, last_match_info);

+ } else {

+ return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(

+ isolate, subject, regexp, empty_string, last_match_info);

+ }

+ RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);

+ if (global_cache.HasException()) return isolate->heap()->exception();

+ int32_t* current_match = global_cache.FetchNext();

+ if (current_match == NULL) {

+ if (global_cache.HasException()) return isolate->heap()->exception();

+ return *subject;

+ }

+ int start = current_match[0];

+ int end = current_match[1];

+ int capture_count = regexp->CaptureCount();

+ int subject_length = subject->length();

+ int new_length = subject_length - (end - start);

+ if (new_length == 0) return isolate->heap()->empty_string();

+ Handle<ResultSeqString> answer;

+ if (ResultSeqString::kHasOneByteEncoding) {

+ answer = Handle<ResultSeqString>::cast(

+ isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());

+ } else {

+ answer = Handle<ResultSeqString>::cast(

+ isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());

+ }

+ int prev = 0;

+ int position = 0;

+ do {

+ start = current_match[0];

+ end = current_match[1];

+ if (prev < start) {

+ // Add substring subject[prev;start] to answer string.

+ String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);

+ position += start - prev;

+ }

+ prev = end;

+ current_match = global_cache.FetchNext();

+ } while (current_match != NULL);

+ if (global_cache.HasException()) return isolate->heap()->exception();

+ RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,

+ global_cache.LastSuccessfulMatch());

+ if (prev < subject_length) {

+ // Add substring subject[prev;length] to answer string.

+ String::WriteToFlat(*subject, answer->GetChars() + position, prev,

+ subject_length);

+ position += subject_length - prev;

+ }

+ if (position == 0) return isolate->heap()->empty_string();

+ // Shorten string and fill

+ int string_size = ResultSeqString::SizeFor(position);

+ int allocated_string_size = ResultSeqString::SizeFor(new_length);

+ int delta = allocated_string_size - string_size;

+ answer->set_length(position);

+ if (delta == 0) return *answer;

+ Address end_of_string = answer->address() + string_size;

+ Heap* heap = isolate->heap();

+ // The trimming is performed on a newly allocated object, which is on a

+ // fresly allocated page or on an already swept page. Hence, the sweeper

+ // thread can not get confused with the filler creation. No synchronization

+ // needed.

+ heap->CreateFillerObjectAt(end_of_string, delta);

+ heap->AdjustLiveBytes(answer->address(), -delta, Heap::FROM_MUTATOR);

+ return *answer;

+RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {

+ HandleScope scope(isolate);

+ DCHECK(args.length() == 4);

+ CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);

+ CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);

+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);

+ CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);

+ RUNTIME_ASSERT(regexp->GetFlags().is_global());

+ RUNTIME_ASSERT(last_match_info->HasFastObjectElements());

+ subject = String::Flatten(subject);

+ if (replacement->length() == 0) {

+ if (subject->HasOnlyOneByteChars()) {

+ return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(

+ isolate, subject, regexp, last_match_info);

+ } else {

+ return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(

+ isolate, subject, regexp, last_match_info);

+ }

+ replacement = String::Flatten(replacement);

+ return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,

+ replacement, last_match_info);

+RUNTIME_FUNCTION(Runtime_StringSplit) {

+ HandleScope handle_scope(isolate);

+ DCHECK(args.length() == 3);

+ CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);

+ CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);

+ CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);

+ RUNTIME_ASSERT(limit > 0);

+ int subject_length = subject->length();

+ int pattern_length = pattern->length();

+ RUNTIME_ASSERT(pattern_length > 0);

+ if (limit == 0xffffffffu) {

+ Handle<Object> cached_answer(

+ RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,

+ RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),

+ isolate);

+ if (*cached_answer != Smi::FromInt(0)) {

+ // The cache FixedArray is a COW-array and can therefore be reused.

+ Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(

+ Handle<FixedArray>::cast(cached_answer));

+ return *result;

+ }

+ // The limit can be very large (0xffffffffu), but since the pattern

+ // isn't empty, we can never create more parts than ~half the length

+ // of the subject.

+ subject = String::Flatten(subject);

+ pattern = String::Flatten(pattern);

+ static const int kMaxInitialListCapacity = 16;

+ ZoneScope zone_scope(isolate->runtime_zone());

+ // Find (up to limit) indices of separator and end-of-string in subject

+ int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);

+ ZoneList<int> indices(initial_capacity, zone_scope.zone());

+ FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit,

+ zone_scope.zone());

+ if (static_cast<uint32_t>(indices.length()) < limit) {

+ indices.Add(subject_length, zone_scope.zone());

+ }

+ // The list indices now contains the end of each part to create.

+ // Create JSArray of substrings separated by separator.

+ int part_count = indices.length();

+ Handle<JSArray> result = isolate->factory()->NewJSArray(part_count);

+ JSObject::EnsureCanContainHeapObjectElements(result);

+ result->set_length(Smi::FromInt(part_count));

+ DCHECK(result->HasFastObjectElements());

+ if (part_count == 1 && indices.at(0) == subject_length) {

+ FixedArray::cast(result->elements())->set(0, *subject);

+ return *result;

+ }

+ Handle<FixedArray> elements(FixedArray::cast(result->elements()));

+ int part_start = 0;

+ for (int i = 0; i < part_count; i++) {

+ HandleScope local_loop_handle(isolate);

+ int part_end = indices.at(i);

+ Handle<String> substring =

+ isolate->factory()->NewProperSubString(subject, part_start, part_end);

+ elements->set(i, *substring);

+ part_start = part_end + pattern_length;

+ }

+ if (limit == 0xffffffffu) {

+ if (result->HasFastObjectElements()) {

+ RegExpResultsCache::Enter(isolate, subject, pattern, elements,

+ RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);

+ }

+ return *result;

+RUNTIME_FUNCTION(Runtime_RegExpCompile) {

+ HandleScope scope(isolate);

+ DCHECK(args.length() == 3);

+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, re, 0);

+ CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);

+ CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);

+ Handle<Object> result;

+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result,

+ RegExpImpl::Compile(re, pattern, flags));

+ return *result;

+RUNTIME_FUNCTION(Runtime_RegExpExecRT) {

+ HandleScope scope(isolate);

+ DCHECK(args.length() == 4);

+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);

+ CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);

+ CONVERT_INT32_ARG_CHECKED(index, 2);

+ CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);

+ // Due to the way the JS calls are constructed this must be less than the

+ // length of a string, i.e. it is always a Smi. We check anyway for security.

+ RUNTIME_ASSERT(index >= 0);

+ RUNTIME_ASSERT(index <= subject->length());

+ isolate->counters()->regexp_entry_runtime()->Increment();

+ Handle<Object> result;

+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(

+ isolate, result,

+ RegExpImpl::Exec(regexp, subject, index, last_match_info));

+ return *result;

+RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {

+ HandleScope handle_scope(isolate);

+ DCHECK(args.length() == 3);

+ CONVERT_SMI_ARG_CHECKED(size, 0);

+ RUNTIME_ASSERT(size >= 0 && size <= FixedArray::kMaxLength);

+ CONVERT_ARG_HANDLE_CHECKED(Object, index, 1);

+ CONVERT_ARG_HANDLE_CHECKED(Object, input, 2);

+ Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);

+ Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());

+ Handle<JSObject> object =

+ isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED, false);

+ Handle<JSArray> array = Handle<JSArray>::cast(object);

+ array->set_elements(*elements);

+ array->set_length(Smi::FromInt(size));

+ // Write in-object properties after the length of the array.

+ array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index);

+ array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);

+ return *array;

+RUNTIME_FUNCTION(Runtime_RegExpInitializeObject) {

+ HandleScope scope(isolate);

+ DCHECK(args.length() == 6);

+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);

+ CONVERT_ARG_HANDLE_CHECKED(String, source, 1);

+ // If source is the empty string we set it to "(?:)" instead as

+ // suggested by ECMA-262, 5th, section 15.10.4.1.

+ if (source->length() == 0) source = isolate->factory()->query_colon_string();

+ CONVERT_ARG_HANDLE_CHECKED(Object, global, 2);

+ if (!global->IsTrue()) global = isolate->factory()->false_value();

+ CONVERT_ARG_HANDLE_CHECKED(Object, ignoreCase, 3);

+ if (!ignoreCase->IsTrue()) ignoreCase = isolate->factory()->false_value();

+ CONVERT_ARG_HANDLE_CHECKED(Object, multiline, 4);

+ if (!multiline->IsTrue()) multiline = isolate->factory()->false_value();

+ CONVERT_ARG_HANDLE_CHECKED(Object, sticky, 5);

+ if (!sticky->IsTrue()) sticky = isolate->factory()->false_value();

+ Map* map = regexp->map();

+ Object* constructor = map->constructor();

+ if (!FLAG_harmony_regexps && constructor->IsJSFunction() &&

+ JSFunction::cast(constructor)->initial_map() == map) {

+ // If we still have the original map, set in-object properties directly.

+ regexp->InObjectPropertyAtPut(JSRegExp::kSourceFieldIndex, *source);

+ // Both true and false are immovable immortal objects so no need for write

+ // barrier.

+ regexp->InObjectPropertyAtPut(JSRegExp::kGlobalFieldIndex, *global,

+ SKIP_WRITE_BARRIER);

+ regexp->InObjectPropertyAtPut(JSRegExp::kIgnoreCaseFieldIndex, *ignoreCase,

+ SKIP_WRITE_BARRIER);

+ regexp->InObjectPropertyAtPut(JSRegExp::kMultilineFieldIndex, *multiline,

+ SKIP_WRITE_BARRIER);

+ regexp->InObjectPropertyAtPut(JSRegExp::kLastIndexFieldIndex,

+ Smi::FromInt(0), SKIP_WRITE_BARRIER);

+ return *regexp;

+ }

+ // Map has changed, so use generic, but slower, method. We also end here if

+ // the --harmony-regexp flag is set, because the initial map does not have

+ // space for the 'sticky' flag, since it is from the snapshot, but must work

+ // both with and without --harmony-regexp. When sticky comes out from under

+ // the flag, we will be able to use the fast initial map.

+ PropertyAttributes final =

+ static_cast<PropertyAttributes>(READ_ONLY | DONT_ENUM | DONT_DELETE);

+ PropertyAttributes writable =

+ static_cast<PropertyAttributes>(DONT_ENUM | DONT_DELETE);

+ Handle<Object> zero(Smi::FromInt(0), isolate);

+ Factory* factory = isolate->factory();

+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->source_string(),

+ source, final).Check();

+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->global_string(),

+ global, final).Check();

+ JSObject::SetOwnPropertyIgnoreAttributes(

+ regexp, factory->ignore_case_string(), ignoreCase, final).Check();

+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->multiline_string(),

+ multiline, final).Check();

+ if (FLAG_harmony_regexps) {

+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->sticky_string(),

+ sticky, final).Check();

+ }

+ JSObject::SetOwnPropertyIgnoreAttributes(regexp, factory->last_index_string(),

+ zero, writable).Check();

+ return *regexp;

+RUNTIME_FUNCTION(Runtime_MaterializeRegExpLiteral) {

+ HandleScope scope(isolate);

+ DCHECK(args.length() == 4);

+ CONVERT_ARG_HANDLE_CHECKED(FixedArray, literals, 0);

+ CONVERT_SMI_ARG_CHECKED(index, 1);

+ CONVERT_ARG_HANDLE_CHECKED(String, pattern, 2);

+ CONVERT_ARG_HANDLE_CHECKED(String, flags, 3);

+ // Get the RegExp function from the context in the literals array.

+ // This is the RegExp function from the context in which the

+ // function was created. We do not use the RegExp function from the

+ // current native context because this might be the RegExp function

+ // from another context which we should not have access to.

+ Handle<JSFunction> constructor = Handle<JSFunction>(

+ JSFunction::NativeContextFromLiterals(*literals)->regexp_function());

+ // Compute the regular expression literal.

+ Handle<Object> regexp;

+ ASSIGN_RETURN_FAILURE_ON_EXCEPTION(

+ isolate, regexp,

+ RegExpImpl::CreateRegExpLiteral(constructor, pattern, flags));

+ literals->set(index, *regexp);

+ return *regexp;

+// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain

+// separate last match info. See comment on that function.

+template <bool has_capture>

+static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,

+ Handle<JSRegExp> regexp,

+ Handle<JSArray> last_match_array,

+ Handle<JSArray> result_array) {

+ DCHECK(subject->IsFlat());

+ DCHECK_NE(has_capture, regexp->CaptureCount() == 0);

+ int capture_count = regexp->CaptureCount();

+ int subject_length = subject->length();

+ static const int kMinLengthToCache = 0x1000;

+ if (subject_length > kMinLengthToCache) {

+ Handle<Object> cached_answer(

+ RegExpResultsCache::Lookup(isolate->heap(), *subject, regexp->data(),

+ RegExpResultsCache::REGEXP_MULTIPLE_INDICES),

+ isolate);

+ if (*cached_answer != Smi::FromInt(0)) {

+ Handle<FixedArray> cached_fixed_array =

+ Handle<FixedArray>(FixedArray::cast(*cached_answer));

+ // The cache FixedArray is a COW-array and can therefore be reused.

+ JSArray::SetContent(result_array, cached_fixed_array);

+ // The actual length of the result array is stored in the last element of

+ // the backing store (the backing FixedArray may have a larger capacity).

+ Object* cached_fixed_array_last_element =

+ cached_fixed_array->get(cached_fixed_array->length() - 1);

+ Smi* js_array_length = Smi::cast(cached_fixed_array_last_element);

+ result_array->set_length(js_array_length);

+ RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,

+ NULL);

+ return *result_array;

+ }

+ RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);

+ if (global_cache.HasException()) return isolate->heap()->exception();

+ // Ensured in Runtime_RegExpExecMultiple.

+ DCHECK(result_array->HasFastObjectElements());

+ Handle<FixedArray> result_elements(

+ FixedArray::cast(result_array->elements()));

+ if (result_elements->length() < 16) {

+ result_elements = isolate->factory()->NewFixedArrayWithHoles(16);

+ }

+ FixedArrayBuilder builder(result_elements);

+ // Position to search from.

+ int match_start = -1;

+ int match_end = 0;

+ bool first = true;

+ // Two smis before and after the match, for very long strings.

+ static const int kMaxBuilderEntriesPerRegExpMatch = 5;

+ while (true) {

+ int32_t* current_match = global_cache.FetchNext();

+ if (current_match == NULL) break;

+ match_start = current_match[0];

+ builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);

+ if (match_end < match_start) {

+ ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,

+ match_start);

+ }

+ match_end = current_match[1];

+ {

+ // Avoid accumulating new handles inside loop.

+ HandleScope temp_scope(isolate);

+ Handle<String> match;

+ if (!first) {

+ match = isolate->factory()->NewProperSubString(subject, match_start,

+ match_end);

+ } else {

+ match =

+ isolate->factory()->NewSubString(subject, match_start, match_end);

+ first = false;

+ }

+ if (has_capture) {

+ // Arguments array to replace function is match, captures, index and

+ // subject, i.e., 3 + capture count in total.

+ Handle<FixedArray> elements =

+ isolate->factory()->NewFixedArray(3 + capture_count);

+ elements->set(0, *match);

+ for (int i = 1; i <= capture_count; i++) {

+ int start = current_match[i * 2];

+ if (start >= 0) {

+ int end = current_match[i * 2 + 1];

+ DCHECK(start <= end);

+ Handle<String> substring =

+ isolate->factory()->NewSubString(subject, start, end);

+ elements->set(i, *substring);

+ } else {

+ DCHECK(current_match[i * 2 + 1] < 0);

+ elements->set(i, isolate->heap()->undefined_value());

+ }

+ elements->set(capture_count + 1, Smi::FromInt(match_start));

+ elements->set(capture_count + 2, *subject);

+ builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));

+ } else {

+ builder.Add(*match);

+ }

+ if (global_cache.HasException()) return isolate->heap()->exception();

+ if (match_start >= 0) {

+ // Finished matching, with at least one match.

+ if (match_end < subject_length) {

+ ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,

+ subject_length);

+ }

+ RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,

+ NULL);

+ if (subject_length > kMinLengthToCache) {

+ // Store the length of the result array into the last element of the

+ // backing FixedArray.

+ builder.EnsureCapacity(1);

+ Handle<FixedArray> fixed_array = builder.array();

+ fixed_array->set(fixed_array->length() - 1,

+ Smi::FromInt(builder.length()));

+ // Cache the result and turn the FixedArray into a COW array.

+ RegExpResultsCache::Enter(isolate, subject,

+ handle(regexp->data(), isolate), fixed_array,

+ RegExpResultsCache::REGEXP_MULTIPLE_INDICES);

+ }

+ return *builder.ToJSArray(result_array);

+ } else {

+ return isolate->heap()->null_value(); // No matches at all.

+ }

+// This is only called for StringReplaceGlobalRegExpWithFunction. This sets

+// lastMatchInfoOverride to maintain the last match info, so we don't need to

+// set any other last match array info.

+RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {

+ HandleScope handles(isolate);

+ DCHECK(args.length() == 4);

+ CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);

+ CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);

+ CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);

+ CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);

+ RUNTIME_ASSERT(last_match_info->HasFastObjectElements());

+ RUNTIME_ASSERT(result_array->HasFastObjectElements());

+ subject = String::Flatten(subject);

+ RUNTIME_ASSERT(regexp->GetFlags().is_global());

+ if (regexp->CaptureCount() == 0) {

+ return SearchRegExpMultiple<false>(isolate, subject, regexp,

+ last_match_info, result_array);

+ } else {

+ return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,

+ result_array);

+ }

+RUNTIME_FUNCTION(RuntimeReference_RegExpConstructResult) {

+ SealHandleScope shs(isolate);

+ return __RT_impl_Runtime_RegExpConstructResult(args, isolate);

+RUNTIME_FUNCTION(RuntimeReference_RegExpExec) {

+ SealHandleScope shs(isolate);

+ return __RT_impl_Runtime_RegExpExecRT(args, isolate);

+// Perform string match of pattern on subject, starting at start index.

+// Caller must ensure that 0 <= start_index <= sub->length(),

+// and should check that pat->length() + start_index <= sub->length().

+int Runtime::StringMatch(Isolate* isolate, Handle<String> sub,

+ Handle<String> pat, int start_index) {

+ DCHECK(0 <= start_index);

+ DCHECK(start_index <= sub->length());

+ int pattern_length = pat->length();

+ if (pattern_length == 0) return start_index;

+ int subject_length = sub->length();

+ if (start_index + pattern_length > subject_length) return -1;

+ sub = String::Flatten(sub);

+ pat = String::Flatten(pat);

+ DisallowHeapAllocation no_gc; // ensure vectors stay valid

+ // Extract flattened substrings of cons strings before getting encoding.

+ String::FlatContent seq_sub = sub->GetFlatContent();

+ String::FlatContent seq_pat = pat->GetFlatContent();

+ // dispatch on type of strings

+ if (seq_pat.IsOneByte()) {

+ Vector<const uint8_t> pat_vector = seq_pat.ToOneByteVector();

+ if (seq_sub.IsOneByte()) {

+ return SearchString(isolate, seq_sub.ToOneByteVector(), pat_vector,

+ start_index);

+ }

+ return SearchString(isolate, seq_sub.ToUC16Vector(), pat_vector,

+ start_index);

+ }

+ Vector<const uc16> pat_vector = seq_pat.ToUC16Vector();

+ if (seq_sub.IsOneByte()) {

+ return SearchString(isolate, seq_sub.ToOneByteVector(), pat_vector,

+ start_index);

+ }

+ return SearchString(isolate, seq_sub.ToUC16Vector(), pat_vector, start_index);

+} // namespace v8::internal

« no previous file with comments | « src/runtime/runtime-json.cc ('k') | src/runtime/runtime-strings.cc » ('j') | no next file with comments »