OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 2751 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2762 case REPLACEMENT_STRING: | 2762 case REPLACEMENT_STRING: |
2763 builder->AddString(replacement_substrings_[part.data]); | 2763 builder->AddString(replacement_substrings_[part.data]); |
2764 break; | 2764 break; |
2765 default: | 2765 default: |
2766 UNREACHABLE(); | 2766 UNREACHABLE(); |
2767 } | 2767 } |
2768 } | 2768 } |
2769 } | 2769 } |
2770 | 2770 |
2771 | 2771 |
2772 void FindAsciiStringIndices(Vector<const char> subject, | |
2773 char pattern, | |
Yang
2011/09/07 13:33:40
Moved up from further below.
| |
2774 ZoneList<int>* indices, | |
2775 unsigned int limit) { | |
2776 ASSERT(limit > 0); | |
2777 // Collect indices of pattern in subject using memchr. | |
2778 // Stop after finding at most limit values. | |
2779 const char* subject_start = reinterpret_cast<const char*>(subject.start()); | |
2780 const char* subject_end = subject_start + subject.length(); | |
2781 const char* pos = subject_start; | |
2782 while (limit > 0) { | |
2783 pos = reinterpret_cast<const char*>( | |
2784 memchr(pos, pattern, subject_end - pos)); | |
2785 if (pos == NULL) return; | |
2786 indices->Add(static_cast<int>(pos - subject_start)); | |
2787 pos++; | |
2788 limit--; | |
2789 } | |
2790 } | |
2791 | |
2792 | |
2793 template <typename SubjectChar, typename PatternChar> | |
2794 void FindStringIndices(Isolate* isolate, | |
2795 Vector<const SubjectChar> subject, | |
Yang
2011/09/07 13:33:40
Moved up from further below.
| |
2796 Vector<const PatternChar> pattern, | |
2797 ZoneList<int>* indices, | |
2798 unsigned int limit) { | |
2799 ASSERT(limit > 0); | |
2800 // Collect indices of pattern in subject. | |
2801 // Stop after finding at most limit values. | |
2802 int pattern_length = pattern.length(); | |
2803 int index = 0; | |
2804 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); | |
2805 while (limit > 0) { | |
2806 index = search.Search(subject, index); | |
2807 if (index < 0) return; | |
2808 indices->Add(index); | |
2809 index += pattern_length; | |
2810 limit--; | |
2811 } | |
2812 } | |
2813 | |
2814 | |
2815 void FindStringIndicesDispatch(Isolate* isolate, | |
2816 String* subject, | |
2817 String* pattern, | |
2818 ZoneList<int>* indices, | |
2819 unsigned int limit) { | |
2820 { | |
Yang
2011/09/07 13:33:40
Refactored code from Runtime_StringSplit.
| |
2821 AssertNoAllocation no_gc; | |
2822 String::FlatContent subject_content = subject->GetFlatContent(); | |
2823 String::FlatContent pattern_content = pattern->GetFlatContent(); | |
2824 ASSERT(subject_content.IsFlat()); | |
2825 ASSERT(pattern_content.IsFlat()); | |
2826 if (subject_content.IsAscii()) { | |
2827 Vector<const char> subject_vector = subject_content.ToAsciiVector(); | |
2828 if (pattern_content.IsAscii()) { | |
2829 Vector<const char> pattern_vector = pattern_content.ToAsciiVector(); | |
2830 if (pattern_vector.length() == 1) { | |
2831 FindAsciiStringIndices(subject_vector, | |
2832 pattern_vector[0], | |
2833 indices, | |
2834 limit); | |
2835 } else { | |
2836 FindStringIndices(isolate, | |
2837 subject_vector, | |
2838 pattern_vector, | |
2839 indices, | |
2840 limit); | |
2841 } | |
2842 } else { | |
2843 FindStringIndices(isolate, | |
2844 subject_vector, | |
2845 pattern_content.ToUC16Vector(), | |
2846 indices, | |
2847 limit); | |
2848 } | |
2849 } else { | |
2850 Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); | |
2851 if (pattern->IsAsciiRepresentation()) { | |
2852 FindStringIndices(isolate, | |
2853 subject_vector, | |
2854 pattern_content.ToAsciiVector(), | |
2855 indices, | |
2856 limit); | |
2857 } else { | |
2858 FindStringIndices(isolate, | |
2859 subject_vector, | |
2860 pattern_content.ToUC16Vector(), | |
2861 indices, | |
2862 limit); | |
2863 } | |
2864 } | |
2865 } | |
2866 } | |
2867 | |
2868 | |
2869 template<typename ResultSeqString> | |
2870 MUST_USE_RESULT static MaybeObject* StringReplaceStringWithString( | |
2871 Isolate* isolate, | |
2872 Handle<String> subject, | |
2873 Handle<JSRegExp> pattern_regexp, | |
2874 Handle<String> replacement = Handle<String>::null()) { | |
Lasse Reichstein
2011/09/07 13:50:53
Don't use an optional argument, just pass the null
| |
2875 ASSERT(subject->IsFlat()); | |
2876 ASSERT(replacement->IsFlat()); | |
2877 | |
2878 ZoneScope zone_space(isolate, DELETE_ON_EXIT); | |
2879 ZoneList<int> indices(8); | |
2880 String* pattern = | |
2881 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); | |
Lasse Reichstein
2011/09/07 13:50:53
Assert that the regexp is atomic.
| |
2882 int subject_len = subject->length(); | |
2883 int pattern_len = pattern->length(); | |
2884 int replacement_len = (replacement.is_null()) ? 0 : replacement->length(); | |
2885 | |
2886 FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff); | |
2887 | |
2888 int matches = indices.length(); | |
2889 if (matches == 0) return *subject; | |
2890 | |
2891 int result_len = (replacement_len - pattern_len) * matches + subject_len; | |
2892 int subject_pos = 0; | |
2893 int result_pos = 0; | |
2894 | |
2895 Handle<ResultSeqString> result; | |
2896 if (ResultSeqString::kHasAsciiEncoding) { | |
2897 result = Handle<ResultSeqString>::cast( | |
2898 isolate->factory()->NewRawAsciiString(result_len)); | |
2899 } else { | |
2900 result = Handle<ResultSeqString>::cast( | |
2901 isolate->factory()->NewRawTwoByteString(result_len)); | |
2902 } | |
2903 | |
2904 for(int i = 0; i < matches; i++) { | |
2905 // Copy non-matched subject content. | |
2906 String::WriteToFlat(*subject, | |
Lasse Reichstein
2011/09/07 13:50:53
Would it be worth it to check that that subject_po
| |
2907 result->GetChars() + result_pos, | |
2908 subject_pos, | |
2909 indices.at(i)); | |
2910 result_pos += indices.at(i) - subject_pos; | |
2911 // Replace match. | |
Lasse Reichstein
2011/09/07 13:50:53
Move comment down one line.
| |
2912 | |
2913 if (replacement_len > 0) { | |
2914 String::WriteToFlat(*replacement, | |
2915 result->GetChars() + result_pos, | |
2916 0, | |
2917 replacement_len); | |
2918 result_pos += replacement_len; | |
2919 } | |
2920 | |
2921 subject_pos = indices.at(i) + pattern_len; | |
2922 } | |
2923 String::WriteToFlat(*subject, | |
2924 result->GetChars() + result_pos, | |
2925 subject_pos, | |
2926 subject_len); | |
2927 return *result; | |
2928 } | |
2929 | |
2772 | 2930 |
2773 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( | 2931 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( |
2774 Isolate* isolate, | 2932 Isolate* isolate, |
2775 String* subject, | 2933 String* subject, |
2776 JSRegExp* regexp, | 2934 JSRegExp* regexp, |
2777 String* replacement, | 2935 String* replacement, |
2778 JSArray* last_match_info) { | 2936 JSArray* last_match_info) { |
2779 ASSERT(subject->IsFlat()); | 2937 ASSERT(subject->IsFlat()); |
2780 ASSERT(replacement->IsFlat()); | 2938 ASSERT(replacement->IsFlat()); |
2781 | 2939 |
(...skipping 19 matching lines...) Expand all Loading... | |
2801 | 2959 |
2802 // CompiledReplacement uses zone allocation. | 2960 // CompiledReplacement uses zone allocation. |
2803 ZoneScope zone(isolate, DELETE_ON_EXIT); | 2961 ZoneScope zone(isolate, DELETE_ON_EXIT); |
2804 CompiledReplacement compiled_replacement; | 2962 CompiledReplacement compiled_replacement; |
2805 compiled_replacement.Compile(replacement_handle, | 2963 compiled_replacement.Compile(replacement_handle, |
2806 capture_count, | 2964 capture_count, |
2807 length); | 2965 length); |
2808 | 2966 |
2809 bool is_global = regexp_handle->GetFlags().is_global(); | 2967 bool is_global = regexp_handle->GetFlags().is_global(); |
2810 | 2968 |
2969 // Shortcut for simple non-regexp global replacements | |
2970 if (is_global && | |
2971 regexp->TypeTag() == JSRegExp::ATOM && | |
2972 compiled_replacement.parts() == 1) { | |
2973 if (subject_handle->HasOnlyAsciiChars() && | |
2974 replacement_handle->HasOnlyAsciiChars()) { | |
2975 return StringReplaceStringWithString<SeqAsciiString>( | |
2976 isolate, subject_handle, regexp_handle, replacement_handle); | |
2977 } else { | |
2978 return StringReplaceStringWithString<SeqTwoByteString>( | |
2979 isolate, subject_handle, regexp_handle, replacement_handle); | |
2980 } | |
2981 } | |
2982 | |
2811 // Guessing the number of parts that the final result string is built | 2983 // Guessing the number of parts that the final result string is built |
2812 // from. Global regexps can match any number of times, so we guess | 2984 // from. Global regexps can match any number of times, so we guess |
2813 // conservatively. | 2985 // conservatively. |
2814 int expected_parts = | 2986 int expected_parts = |
2815 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1; | 2987 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1; |
2816 ReplacementStringBuilder builder(isolate->heap(), | 2988 ReplacementStringBuilder builder(isolate->heap(), |
2817 subject_handle, | 2989 subject_handle, |
2818 expected_parts); | 2990 expected_parts); |
2819 | 2991 |
2820 // Index of end of last match. | 2992 // Index of end of last match. |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2886 Isolate* isolate, | 3058 Isolate* isolate, |
2887 String* subject, | 3059 String* subject, |
2888 JSRegExp* regexp, | 3060 JSRegExp* regexp, |
2889 JSArray* last_match_info) { | 3061 JSArray* last_match_info) { |
2890 ASSERT(subject->IsFlat()); | 3062 ASSERT(subject->IsFlat()); |
2891 | 3063 |
2892 HandleScope handles(isolate); | 3064 HandleScope handles(isolate); |
2893 | 3065 |
2894 Handle<String> subject_handle(subject); | 3066 Handle<String> subject_handle(subject); |
2895 Handle<JSRegExp> regexp_handle(regexp); | 3067 Handle<JSRegExp> regexp_handle(regexp); |
3068 | |
3069 // Shortcut for simple non-regexp global replacements | |
3070 if (regexp_handle->GetFlags().is_global() && | |
3071 regexp_handle->TypeTag() == JSRegExp::ATOM) { | |
3072 if (subject_handle->HasOnlyAsciiChars()) { | |
3073 return StringReplaceStringWithString<SeqAsciiString>( | |
3074 isolate, subject_handle, regexp_handle); | |
3075 } else { | |
3076 return StringReplaceStringWithString<SeqTwoByteString>( | |
3077 isolate, subject_handle, regexp_handle); | |
3078 } | |
3079 } | |
3080 | |
2896 Handle<JSArray> last_match_info_handle(last_match_info); | 3081 Handle<JSArray> last_match_info_handle(last_match_info); |
2897 Handle<Object> match = RegExpImpl::Exec(regexp_handle, | 3082 Handle<Object> match = RegExpImpl::Exec(regexp_handle, |
2898 subject_handle, | 3083 subject_handle, |
2899 0, | 3084 0, |
2900 last_match_info_handle); | 3085 last_match_info_handle); |
2901 if (match.is_null()) return Failure::Exception(); | 3086 if (match.is_null()) return Failure::Exception(); |
2902 if (match->IsNull()) return *subject_handle; | 3087 if (match->IsNull()) return *subject_handle; |
2903 | 3088 |
2904 ASSERT(last_match_info_handle->HasFastElements()); | 3089 ASSERT(last_match_info_handle->HasFastElements()); |
2905 | 3090 |
(...skipping 3017 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5923 int right = length; | 6108 int right = length; |
5924 if (trimRight) { | 6109 if (trimRight) { |
5925 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) { | 6110 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) { |
5926 right--; | 6111 right--; |
5927 } | 6112 } |
5928 } | 6113 } |
5929 return s->SubString(left, right); | 6114 return s->SubString(left, right); |
5930 } | 6115 } |
5931 | 6116 |
5932 | 6117 |
5933 void FindAsciiStringIndices(Vector<const char> subject, | |
5934 char pattern, | |
5935 ZoneList<int>* indices, | |
5936 unsigned int limit) { | |
5937 ASSERT(limit > 0); | |
5938 // Collect indices of pattern in subject using memchr. | |
5939 // Stop after finding at most limit values. | |
5940 const char* subject_start = reinterpret_cast<const char*>(subject.start()); | |
5941 const char* subject_end = subject_start + subject.length(); | |
5942 const char* pos = subject_start; | |
5943 while (limit > 0) { | |
5944 pos = reinterpret_cast<const char*>( | |
5945 memchr(pos, pattern, subject_end - pos)); | |
5946 if (pos == NULL) return; | |
5947 indices->Add(static_cast<int>(pos - subject_start)); | |
5948 pos++; | |
5949 limit--; | |
5950 } | |
5951 } | |
5952 | |
5953 | |
5954 template <typename SubjectChar, typename PatternChar> | |
5955 void FindStringIndices(Isolate* isolate, | |
5956 Vector<const SubjectChar> subject, | |
5957 Vector<const PatternChar> pattern, | |
5958 ZoneList<int>* indices, | |
5959 unsigned int limit) { | |
5960 ASSERT(limit > 0); | |
5961 // Collect indices of pattern in subject. | |
5962 // Stop after finding at most limit values. | |
5963 int pattern_length = pattern.length(); | |
5964 int index = 0; | |
5965 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); | |
5966 while (limit > 0) { | |
5967 index = search.Search(subject, index); | |
5968 if (index < 0) return; | |
5969 indices->Add(index); | |
5970 index += pattern_length; | |
5971 limit--; | |
5972 } | |
5973 } | |
5974 | |
5975 | |
5976 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { | 6118 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { |
5977 ASSERT(args.length() == 3); | 6119 ASSERT(args.length() == 3); |
5978 HandleScope handle_scope(isolate); | 6120 HandleScope handle_scope(isolate); |
5979 CONVERT_ARG_CHECKED(String, subject, 0); | 6121 CONVERT_ARG_CHECKED(String, subject, 0); |
5980 CONVERT_ARG_CHECKED(String, pattern, 1); | 6122 CONVERT_ARG_CHECKED(String, pattern, 1); |
5981 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); | 6123 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); |
5982 | 6124 |
5983 int subject_length = subject->length(); | 6125 int subject_length = subject->length(); |
5984 int pattern_length = pattern->length(); | 6126 int pattern_length = pattern->length(); |
5985 RUNTIME_ASSERT(pattern_length > 0); | 6127 RUNTIME_ASSERT(pattern_length > 0); |
(...skipping 19 matching lines...) Expand all Loading... | |
6005 | 6147 |
6006 static const int kMaxInitialListCapacity = 16; | 6148 static const int kMaxInitialListCapacity = 16; |
6007 | 6149 |
6008 ZoneScope scope(isolate, DELETE_ON_EXIT); | 6150 ZoneScope scope(isolate, DELETE_ON_EXIT); |
6009 | 6151 |
6010 // Find (up to limit) indices of separator and end-of-string in subject | 6152 // Find (up to limit) indices of separator and end-of-string in subject |
6011 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); | 6153 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); |
6012 ZoneList<int> indices(initial_capacity); | 6154 ZoneList<int> indices(initial_capacity); |
6013 if (!pattern->IsFlat()) FlattenString(pattern); | 6155 if (!pattern->IsFlat()) FlattenString(pattern); |
6014 | 6156 |
6015 // No allocation block. | 6157 FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit); |
6016 { | |
6017 AssertNoAllocation no_gc; | |
6018 String::FlatContent subject_content = subject->GetFlatContent(); | |
6019 String::FlatContent pattern_content = pattern->GetFlatContent(); | |
6020 ASSERT(subject_content.IsFlat()); | |
6021 ASSERT(pattern_content.IsFlat()); | |
6022 if (subject_content.IsAscii()) { | |
6023 Vector<const char> subject_vector = subject_content.ToAsciiVector(); | |
6024 if (pattern_content.IsAscii()) { | |
6025 Vector<const char> pattern_vector = pattern_content.ToAsciiVector(); | |
6026 if (pattern_vector.length() == 1) { | |
6027 FindAsciiStringIndices(subject_vector, | |
6028 pattern_vector[0], | |
6029 &indices, | |
6030 limit); | |
6031 } else { | |
6032 FindStringIndices(isolate, | |
6033 subject_vector, | |
6034 pattern_vector, | |
6035 &indices, | |
6036 limit); | |
6037 } | |
6038 } else { | |
6039 FindStringIndices(isolate, | |
6040 subject_vector, | |
6041 pattern_content.ToUC16Vector(), | |
6042 &indices, | |
6043 limit); | |
6044 } | |
6045 } else { | |
6046 Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); | |
6047 if (pattern->IsAsciiRepresentation()) { | |
6048 FindStringIndices(isolate, | |
6049 subject_vector, | |
6050 pattern_content.ToAsciiVector(), | |
6051 &indices, | |
6052 limit); | |
6053 } else { | |
6054 FindStringIndices(isolate, | |
6055 subject_vector, | |
6056 pattern_content.ToUC16Vector(), | |
6057 &indices, | |
6058 limit); | |
6059 } | |
6060 } | |
6061 } | |
6062 | 6158 |
6063 if (static_cast<uint32_t>(indices.length()) < limit) { | 6159 if (static_cast<uint32_t>(indices.length()) < limit) { |
6064 indices.Add(subject_length); | 6160 indices.Add(subject_length); |
6065 } | 6161 } |
6066 | 6162 |
6067 // The list indices now contains the end of each part to create. | 6163 // The list indices now contains the end of each part to create. |
6068 | 6164 |
6069 // Create JSArray of substrings separated by separator. | 6165 // Create JSArray of substrings separated by separator. |
6070 int part_count = indices.length(); | 6166 int part_count = indices.length(); |
6071 | 6167 |
(...skipping 6950 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
13022 } else { | 13118 } else { |
13023 // Handle last resort GC and make sure to allow future allocations | 13119 // Handle last resort GC and make sure to allow future allocations |
13024 // to grow the heap without causing GCs (if possible). | 13120 // to grow the heap without causing GCs (if possible). |
13025 isolate->counters()->gc_last_resort_from_js()->Increment(); | 13121 isolate->counters()->gc_last_resort_from_js()->Increment(); |
13026 isolate->heap()->CollectAllGarbage(false); | 13122 isolate->heap()->CollectAllGarbage(false); |
13027 } | 13123 } |
13028 } | 13124 } |
13029 | 13125 |
13030 | 13126 |
13031 } } // namespace v8::internal | 13127 } } // namespace v8::internal |
OLD | NEW |