Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Side by Side Diff: src/runtime.cc

Issue 7782028: Faster non-regexp global string.replace. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2751 matching lines...) Expand 10 before | Expand all | Expand 10 after
2762 case REPLACEMENT_STRING: 2762 case REPLACEMENT_STRING:
2763 builder->AddString(replacement_substrings_[part.data]); 2763 builder->AddString(replacement_substrings_[part.data]);
2764 break; 2764 break;
2765 default: 2765 default:
2766 UNREACHABLE(); 2766 UNREACHABLE();
2767 } 2767 }
2768 } 2768 }
2769 } 2769 }
2770 2770
2771 2771
2772 void FindAsciiStringIndices(Vector<const char> subject,
2773 char pattern,
Yang 2011/09/07 13:33:40 Moved up from further below.
2774 ZoneList<int>* indices,
2775 unsigned int limit) {
2776 ASSERT(limit > 0);
2777 // Collect indices of pattern in subject using memchr.
2778 // Stop after finding at most limit values.
2779 const char* subject_start = reinterpret_cast<const char*>(subject.start());
2780 const char* subject_end = subject_start + subject.length();
2781 const char* pos = subject_start;
2782 while (limit > 0) {
2783 pos = reinterpret_cast<const char*>(
2784 memchr(pos, pattern, subject_end - pos));
2785 if (pos == NULL) return;
2786 indices->Add(static_cast<int>(pos - subject_start));
2787 pos++;
2788 limit--;
2789 }
2790 }
2791
2792
2793 template <typename SubjectChar, typename PatternChar>
2794 void FindStringIndices(Isolate* isolate,
2795 Vector<const SubjectChar> subject,
Yang 2011/09/07 13:33:40 Moved up from further below.
2796 Vector<const PatternChar> pattern,
2797 ZoneList<int>* indices,
2798 unsigned int limit) {
2799 ASSERT(limit > 0);
2800 // Collect indices of pattern in subject.
2801 // Stop after finding at most limit values.
2802 int pattern_length = pattern.length();
2803 int index = 0;
2804 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
2805 while (limit > 0) {
2806 index = search.Search(subject, index);
2807 if (index < 0) return;
2808 indices->Add(index);
2809 index += pattern_length;
2810 limit--;
2811 }
2812 }
2813
2814
2815 void FindStringIndicesDispatch(Isolate* isolate,
2816 String* subject,
2817 String* pattern,
2818 ZoneList<int>* indices,
2819 unsigned int limit) {
2820 {
Yang 2011/09/07 13:33:40 Refactored code from Runtime_StringSplit.
2821 AssertNoAllocation no_gc;
2822 String::FlatContent subject_content = subject->GetFlatContent();
2823 String::FlatContent pattern_content = pattern->GetFlatContent();
2824 ASSERT(subject_content.IsFlat());
2825 ASSERT(pattern_content.IsFlat());
2826 if (subject_content.IsAscii()) {
2827 Vector<const char> subject_vector = subject_content.ToAsciiVector();
2828 if (pattern_content.IsAscii()) {
2829 Vector<const char> pattern_vector = pattern_content.ToAsciiVector();
2830 if (pattern_vector.length() == 1) {
2831 FindAsciiStringIndices(subject_vector,
2832 pattern_vector[0],
2833 indices,
2834 limit);
2835 } else {
2836 FindStringIndices(isolate,
2837 subject_vector,
2838 pattern_vector,
2839 indices,
2840 limit);
2841 }
2842 } else {
2843 FindStringIndices(isolate,
2844 subject_vector,
2845 pattern_content.ToUC16Vector(),
2846 indices,
2847 limit);
2848 }
2849 } else {
2850 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
2851 if (pattern->IsAsciiRepresentation()) {
2852 FindStringIndices(isolate,
2853 subject_vector,
2854 pattern_content.ToAsciiVector(),
2855 indices,
2856 limit);
2857 } else {
2858 FindStringIndices(isolate,
2859 subject_vector,
2860 pattern_content.ToUC16Vector(),
2861 indices,
2862 limit);
2863 }
2864 }
2865 }
2866 }
2867
2868
2869 template<typename ResultSeqString>
2870 MUST_USE_RESULT static MaybeObject* StringReplaceStringWithString(
2871 Isolate* isolate,
2872 Handle<String> subject,
2873 Handle<JSRegExp> pattern_regexp,
2874 Handle<String> replacement = Handle<String>::null()) {
Lasse Reichstein 2011/09/07 13:50:53 Don't use an optional argument, just pass the null
2875 ASSERT(subject->IsFlat());
2876 ASSERT(replacement->IsFlat());
2877
2878 ZoneScope zone_space(isolate, DELETE_ON_EXIT);
2879 ZoneList<int> indices(8);
2880 String* pattern =
2881 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
Lasse Reichstein 2011/09/07 13:50:53 Assert that the regexp is atomic.
2882 int subject_len = subject->length();
2883 int pattern_len = pattern->length();
2884 int replacement_len = (replacement.is_null()) ? 0 : replacement->length();
2885
2886 FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff);
2887
2888 int matches = indices.length();
2889 if (matches == 0) return *subject;
2890
2891 int result_len = (replacement_len - pattern_len) * matches + subject_len;
2892 int subject_pos = 0;
2893 int result_pos = 0;
2894
2895 Handle<ResultSeqString> result;
2896 if (ResultSeqString::kHasAsciiEncoding) {
2897 result = Handle<ResultSeqString>::cast(
2898 isolate->factory()->NewRawAsciiString(result_len));
2899 } else {
2900 result = Handle<ResultSeqString>::cast(
2901 isolate->factory()->NewRawTwoByteString(result_len));
2902 }
2903
2904 for(int i = 0; i < matches; i++) {
2905 // Copy non-matched subject content.
2906 String::WriteToFlat(*subject,
Lasse Reichstein 2011/09/07 13:50:53 Would it be worth it to check that that subject_po
2907 result->GetChars() + result_pos,
2908 subject_pos,
2909 indices.at(i));
2910 result_pos += indices.at(i) - subject_pos;
2911 // Replace match.
Lasse Reichstein 2011/09/07 13:50:53 Move comment down one line.
2912
2913 if (replacement_len > 0) {
2914 String::WriteToFlat(*replacement,
2915 result->GetChars() + result_pos,
2916 0,
2917 replacement_len);
2918 result_pos += replacement_len;
2919 }
2920
2921 subject_pos = indices.at(i) + pattern_len;
2922 }
2923 String::WriteToFlat(*subject,
2924 result->GetChars() + result_pos,
2925 subject_pos,
2926 subject_len);
2927 return *result;
2928 }
2929
2772 2930
2773 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( 2931 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
2774 Isolate* isolate, 2932 Isolate* isolate,
2775 String* subject, 2933 String* subject,
2776 JSRegExp* regexp, 2934 JSRegExp* regexp,
2777 String* replacement, 2935 String* replacement,
2778 JSArray* last_match_info) { 2936 JSArray* last_match_info) {
2779 ASSERT(subject->IsFlat()); 2937 ASSERT(subject->IsFlat());
2780 ASSERT(replacement->IsFlat()); 2938 ASSERT(replacement->IsFlat());
2781 2939
(...skipping 19 matching lines...) Expand all
2801 2959
2802 // CompiledReplacement uses zone allocation. 2960 // CompiledReplacement uses zone allocation.
2803 ZoneScope zone(isolate, DELETE_ON_EXIT); 2961 ZoneScope zone(isolate, DELETE_ON_EXIT);
2804 CompiledReplacement compiled_replacement; 2962 CompiledReplacement compiled_replacement;
2805 compiled_replacement.Compile(replacement_handle, 2963 compiled_replacement.Compile(replacement_handle,
2806 capture_count, 2964 capture_count,
2807 length); 2965 length);
2808 2966
2809 bool is_global = regexp_handle->GetFlags().is_global(); 2967 bool is_global = regexp_handle->GetFlags().is_global();
2810 2968
2969 // Shortcut for simple non-regexp global replacements
2970 if (is_global &&
2971 regexp->TypeTag() == JSRegExp::ATOM &&
2972 compiled_replacement.parts() == 1) {
2973 if (subject_handle->HasOnlyAsciiChars() &&
2974 replacement_handle->HasOnlyAsciiChars()) {
2975 return StringReplaceStringWithString<SeqAsciiString>(
2976 isolate, subject_handle, regexp_handle, replacement_handle);
2977 } else {
2978 return StringReplaceStringWithString<SeqTwoByteString>(
2979 isolate, subject_handle, regexp_handle, replacement_handle);
2980 }
2981 }
2982
2811 // Guessing the number of parts that the final result string is built 2983 // Guessing the number of parts that the final result string is built
2812 // from. Global regexps can match any number of times, so we guess 2984 // from. Global regexps can match any number of times, so we guess
2813 // conservatively. 2985 // conservatively.
2814 int expected_parts = 2986 int expected_parts =
2815 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1; 2987 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1;
2816 ReplacementStringBuilder builder(isolate->heap(), 2988 ReplacementStringBuilder builder(isolate->heap(),
2817 subject_handle, 2989 subject_handle,
2818 expected_parts); 2990 expected_parts);
2819 2991
2820 // Index of end of last match. 2992 // Index of end of last match.
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
2886 Isolate* isolate, 3058 Isolate* isolate,
2887 String* subject, 3059 String* subject,
2888 JSRegExp* regexp, 3060 JSRegExp* regexp,
2889 JSArray* last_match_info) { 3061 JSArray* last_match_info) {
2890 ASSERT(subject->IsFlat()); 3062 ASSERT(subject->IsFlat());
2891 3063
2892 HandleScope handles(isolate); 3064 HandleScope handles(isolate);
2893 3065
2894 Handle<String> subject_handle(subject); 3066 Handle<String> subject_handle(subject);
2895 Handle<JSRegExp> regexp_handle(regexp); 3067 Handle<JSRegExp> regexp_handle(regexp);
3068
3069 // Shortcut for simple non-regexp global replacements
3070 if (regexp_handle->GetFlags().is_global() &&
3071 regexp_handle->TypeTag() == JSRegExp::ATOM) {
3072 if (subject_handle->HasOnlyAsciiChars()) {
3073 return StringReplaceStringWithString<SeqAsciiString>(
3074 isolate, subject_handle, regexp_handle);
3075 } else {
3076 return StringReplaceStringWithString<SeqTwoByteString>(
3077 isolate, subject_handle, regexp_handle);
3078 }
3079 }
3080
2896 Handle<JSArray> last_match_info_handle(last_match_info); 3081 Handle<JSArray> last_match_info_handle(last_match_info);
2897 Handle<Object> match = RegExpImpl::Exec(regexp_handle, 3082 Handle<Object> match = RegExpImpl::Exec(regexp_handle,
2898 subject_handle, 3083 subject_handle,
2899 0, 3084 0,
2900 last_match_info_handle); 3085 last_match_info_handle);
2901 if (match.is_null()) return Failure::Exception(); 3086 if (match.is_null()) return Failure::Exception();
2902 if (match->IsNull()) return *subject_handle; 3087 if (match->IsNull()) return *subject_handle;
2903 3088
2904 ASSERT(last_match_info_handle->HasFastElements()); 3089 ASSERT(last_match_info_handle->HasFastElements());
2905 3090
(...skipping 3017 matching lines...) Expand 10 before | Expand all | Expand 10 after
5923 int right = length; 6108 int right = length;
5924 if (trimRight) { 6109 if (trimRight) {
5925 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) { 6110 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) {
5926 right--; 6111 right--;
5927 } 6112 }
5928 } 6113 }
5929 return s->SubString(left, right); 6114 return s->SubString(left, right);
5930 } 6115 }
5931 6116
5932 6117
5933 void FindAsciiStringIndices(Vector<const char> subject,
5934 char pattern,
5935 ZoneList<int>* indices,
5936 unsigned int limit) {
5937 ASSERT(limit > 0);
5938 // Collect indices of pattern in subject using memchr.
5939 // Stop after finding at most limit values.
5940 const char* subject_start = reinterpret_cast<const char*>(subject.start());
5941 const char* subject_end = subject_start + subject.length();
5942 const char* pos = subject_start;
5943 while (limit > 0) {
5944 pos = reinterpret_cast<const char*>(
5945 memchr(pos, pattern, subject_end - pos));
5946 if (pos == NULL) return;
5947 indices->Add(static_cast<int>(pos - subject_start));
5948 pos++;
5949 limit--;
5950 }
5951 }
5952
5953
5954 template <typename SubjectChar, typename PatternChar>
5955 void FindStringIndices(Isolate* isolate,
5956 Vector<const SubjectChar> subject,
5957 Vector<const PatternChar> pattern,
5958 ZoneList<int>* indices,
5959 unsigned int limit) {
5960 ASSERT(limit > 0);
5961 // Collect indices of pattern in subject.
5962 // Stop after finding at most limit values.
5963 int pattern_length = pattern.length();
5964 int index = 0;
5965 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
5966 while (limit > 0) {
5967 index = search.Search(subject, index);
5968 if (index < 0) return;
5969 indices->Add(index);
5970 index += pattern_length;
5971 limit--;
5972 }
5973 }
5974
5975
5976 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { 6118 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) {
5977 ASSERT(args.length() == 3); 6119 ASSERT(args.length() == 3);
5978 HandleScope handle_scope(isolate); 6120 HandleScope handle_scope(isolate);
5979 CONVERT_ARG_CHECKED(String, subject, 0); 6121 CONVERT_ARG_CHECKED(String, subject, 0);
5980 CONVERT_ARG_CHECKED(String, pattern, 1); 6122 CONVERT_ARG_CHECKED(String, pattern, 1);
5981 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); 6123 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
5982 6124
5983 int subject_length = subject->length(); 6125 int subject_length = subject->length();
5984 int pattern_length = pattern->length(); 6126 int pattern_length = pattern->length();
5985 RUNTIME_ASSERT(pattern_length > 0); 6127 RUNTIME_ASSERT(pattern_length > 0);
(...skipping 19 matching lines...) Expand all
6005 6147
6006 static const int kMaxInitialListCapacity = 16; 6148 static const int kMaxInitialListCapacity = 16;
6007 6149
6008 ZoneScope scope(isolate, DELETE_ON_EXIT); 6150 ZoneScope scope(isolate, DELETE_ON_EXIT);
6009 6151
6010 // Find (up to limit) indices of separator and end-of-string in subject 6152 // Find (up to limit) indices of separator and end-of-string in subject
6011 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); 6153 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
6012 ZoneList<int> indices(initial_capacity); 6154 ZoneList<int> indices(initial_capacity);
6013 if (!pattern->IsFlat()) FlattenString(pattern); 6155 if (!pattern->IsFlat()) FlattenString(pattern);
6014 6156
6015 // No allocation block. 6157 FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit);
6016 {
6017 AssertNoAllocation no_gc;
6018 String::FlatContent subject_content = subject->GetFlatContent();
6019 String::FlatContent pattern_content = pattern->GetFlatContent();
6020 ASSERT(subject_content.IsFlat());
6021 ASSERT(pattern_content.IsFlat());
6022 if (subject_content.IsAscii()) {
6023 Vector<const char> subject_vector = subject_content.ToAsciiVector();
6024 if (pattern_content.IsAscii()) {
6025 Vector<const char> pattern_vector = pattern_content.ToAsciiVector();
6026 if (pattern_vector.length() == 1) {
6027 FindAsciiStringIndices(subject_vector,
6028 pattern_vector[0],
6029 &indices,
6030 limit);
6031 } else {
6032 FindStringIndices(isolate,
6033 subject_vector,
6034 pattern_vector,
6035 &indices,
6036 limit);
6037 }
6038 } else {
6039 FindStringIndices(isolate,
6040 subject_vector,
6041 pattern_content.ToUC16Vector(),
6042 &indices,
6043 limit);
6044 }
6045 } else {
6046 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
6047 if (pattern->IsAsciiRepresentation()) {
6048 FindStringIndices(isolate,
6049 subject_vector,
6050 pattern_content.ToAsciiVector(),
6051 &indices,
6052 limit);
6053 } else {
6054 FindStringIndices(isolate,
6055 subject_vector,
6056 pattern_content.ToUC16Vector(),
6057 &indices,
6058 limit);
6059 }
6060 }
6061 }
6062 6158
6063 if (static_cast<uint32_t>(indices.length()) < limit) { 6159 if (static_cast<uint32_t>(indices.length()) < limit) {
6064 indices.Add(subject_length); 6160 indices.Add(subject_length);
6065 } 6161 }
6066 6162
6067 // The list indices now contains the end of each part to create. 6163 // The list indices now contains the end of each part to create.
6068 6164
6069 // Create JSArray of substrings separated by separator. 6165 // Create JSArray of substrings separated by separator.
6070 int part_count = indices.length(); 6166 int part_count = indices.length();
6071 6167
(...skipping 6950 matching lines...) Expand 10 before | Expand all | Expand 10 after
13022 } else { 13118 } else {
13023 // Handle last resort GC and make sure to allow future allocations 13119 // Handle last resort GC and make sure to allow future allocations
13024 // to grow the heap without causing GCs (if possible). 13120 // to grow the heap without causing GCs (if possible).
13025 isolate->counters()->gc_last_resort_from_js()->Increment(); 13121 isolate->counters()->gc_last_resort_from_js()->Increment();
13026 isolate->heap()->CollectAllGarbage(false); 13122 isolate->heap()->CollectAllGarbage(false);
13027 } 13123 }
13028 } 13124 }
13029 13125
13030 13126
13031 } } // namespace v8::internal 13127 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698