Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 2751 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2762 case REPLACEMENT_STRING: | 2762 case REPLACEMENT_STRING: |
| 2763 builder->AddString(replacement_substrings_[part.data]); | 2763 builder->AddString(replacement_substrings_[part.data]); |
| 2764 break; | 2764 break; |
| 2765 default: | 2765 default: |
| 2766 UNREACHABLE(); | 2766 UNREACHABLE(); |
| 2767 } | 2767 } |
| 2768 } | 2768 } |
| 2769 } | 2769 } |
| 2770 | 2770 |
| 2771 | 2771 |
| 2772 void FindAsciiStringIndices(Vector<const char> subject, | |
| 2773 char pattern, | |
|
Yang
2011/09/07 13:33:40
Moved up from further below.
| |
| 2774 ZoneList<int>* indices, | |
| 2775 unsigned int limit) { | |
| 2776 ASSERT(limit > 0); | |
| 2777 // Collect indices of pattern in subject using memchr. | |
| 2778 // Stop after finding at most limit values. | |
| 2779 const char* subject_start = reinterpret_cast<const char*>(subject.start()); | |
| 2780 const char* subject_end = subject_start + subject.length(); | |
| 2781 const char* pos = subject_start; | |
| 2782 while (limit > 0) { | |
| 2783 pos = reinterpret_cast<const char*>( | |
| 2784 memchr(pos, pattern, subject_end - pos)); | |
| 2785 if (pos == NULL) return; | |
| 2786 indices->Add(static_cast<int>(pos - subject_start)); | |
| 2787 pos++; | |
| 2788 limit--; | |
| 2789 } | |
| 2790 } | |
| 2791 | |
| 2792 | |
| 2793 template <typename SubjectChar, typename PatternChar> | |
| 2794 void FindStringIndices(Isolate* isolate, | |
| 2795 Vector<const SubjectChar> subject, | |
|
Yang
2011/09/07 13:33:40
Moved up from further below.
| |
| 2796 Vector<const PatternChar> pattern, | |
| 2797 ZoneList<int>* indices, | |
| 2798 unsigned int limit) { | |
| 2799 ASSERT(limit > 0); | |
| 2800 // Collect indices of pattern in subject. | |
| 2801 // Stop after finding at most limit values. | |
| 2802 int pattern_length = pattern.length(); | |
| 2803 int index = 0; | |
| 2804 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); | |
| 2805 while (limit > 0) { | |
| 2806 index = search.Search(subject, index); | |
| 2807 if (index < 0) return; | |
| 2808 indices->Add(index); | |
| 2809 index += pattern_length; | |
| 2810 limit--; | |
| 2811 } | |
| 2812 } | |
| 2813 | |
| 2814 | |
| 2815 void FindStringIndicesDispatch(Isolate* isolate, | |
| 2816 String* subject, | |
| 2817 String* pattern, | |
| 2818 ZoneList<int>* indices, | |
| 2819 unsigned int limit) { | |
| 2820 { | |
|
Yang
2011/09/07 13:33:40
Refactored code from Runtime_StringSplit.
| |
| 2821 AssertNoAllocation no_gc; | |
| 2822 String::FlatContent subject_content = subject->GetFlatContent(); | |
| 2823 String::FlatContent pattern_content = pattern->GetFlatContent(); | |
| 2824 ASSERT(subject_content.IsFlat()); | |
| 2825 ASSERT(pattern_content.IsFlat()); | |
| 2826 if (subject_content.IsAscii()) { | |
| 2827 Vector<const char> subject_vector = subject_content.ToAsciiVector(); | |
| 2828 if (pattern_content.IsAscii()) { | |
| 2829 Vector<const char> pattern_vector = pattern_content.ToAsciiVector(); | |
| 2830 if (pattern_vector.length() == 1) { | |
| 2831 FindAsciiStringIndices(subject_vector, | |
| 2832 pattern_vector[0], | |
| 2833 indices, | |
| 2834 limit); | |
| 2835 } else { | |
| 2836 FindStringIndices(isolate, | |
| 2837 subject_vector, | |
| 2838 pattern_vector, | |
| 2839 indices, | |
| 2840 limit); | |
| 2841 } | |
| 2842 } else { | |
| 2843 FindStringIndices(isolate, | |
| 2844 subject_vector, | |
| 2845 pattern_content.ToUC16Vector(), | |
| 2846 indices, | |
| 2847 limit); | |
| 2848 } | |
| 2849 } else { | |
| 2850 Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); | |
| 2851 if (pattern->IsAsciiRepresentation()) { | |
| 2852 FindStringIndices(isolate, | |
| 2853 subject_vector, | |
| 2854 pattern_content.ToAsciiVector(), | |
| 2855 indices, | |
| 2856 limit); | |
| 2857 } else { | |
| 2858 FindStringIndices(isolate, | |
| 2859 subject_vector, | |
| 2860 pattern_content.ToUC16Vector(), | |
| 2861 indices, | |
| 2862 limit); | |
| 2863 } | |
| 2864 } | |
| 2865 } | |
| 2866 } | |
| 2867 | |
| 2868 | |
| 2869 template<typename ResultSeqString> | |
| 2870 MUST_USE_RESULT static MaybeObject* StringReplaceStringWithString( | |
| 2871 Isolate* isolate, | |
| 2872 Handle<String> subject, | |
| 2873 Handle<JSRegExp> pattern_regexp, | |
| 2874 Handle<String> replacement = Handle<String>::null()) { | |
|
Lasse Reichstein
2011/09/07 13:50:53
Don't use an optional argument, just pass the null
| |
| 2875 ASSERT(subject->IsFlat()); | |
| 2876 ASSERT(replacement->IsFlat()); | |
| 2877 | |
| 2878 ZoneScope zone_space(isolate, DELETE_ON_EXIT); | |
| 2879 ZoneList<int> indices(8); | |
| 2880 String* pattern = | |
| 2881 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); | |
|
Lasse Reichstein
2011/09/07 13:50:53
Assert that the regexp is atomic.
| |
| 2882 int subject_len = subject->length(); | |
| 2883 int pattern_len = pattern->length(); | |
| 2884 int replacement_len = (replacement.is_null()) ? 0 : replacement->length(); | |
| 2885 | |
| 2886 FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff); | |
| 2887 | |
| 2888 int matches = indices.length(); | |
| 2889 if (matches == 0) return *subject; | |
| 2890 | |
| 2891 int result_len = (replacement_len - pattern_len) * matches + subject_len; | |
| 2892 int subject_pos = 0; | |
| 2893 int result_pos = 0; | |
| 2894 | |
| 2895 Handle<ResultSeqString> result; | |
| 2896 if (ResultSeqString::kHasAsciiEncoding) { | |
| 2897 result = Handle<ResultSeqString>::cast( | |
| 2898 isolate->factory()->NewRawAsciiString(result_len)); | |
| 2899 } else { | |
| 2900 result = Handle<ResultSeqString>::cast( | |
| 2901 isolate->factory()->NewRawTwoByteString(result_len)); | |
| 2902 } | |
| 2903 | |
| 2904 for(int i = 0; i < matches; i++) { | |
| 2905 // Copy non-matched subject content. | |
| 2906 String::WriteToFlat(*subject, | |
|
Lasse Reichstein
2011/09/07 13:50:53
Would it be worth it to check that that subject_po
| |
| 2907 result->GetChars() + result_pos, | |
| 2908 subject_pos, | |
| 2909 indices.at(i)); | |
| 2910 result_pos += indices.at(i) - subject_pos; | |
| 2911 // Replace match. | |
|
Lasse Reichstein
2011/09/07 13:50:53
Move comment down one line.
| |
| 2912 | |
| 2913 if (replacement_len > 0) { | |
| 2914 String::WriteToFlat(*replacement, | |
| 2915 result->GetChars() + result_pos, | |
| 2916 0, | |
| 2917 replacement_len); | |
| 2918 result_pos += replacement_len; | |
| 2919 } | |
| 2920 | |
| 2921 subject_pos = indices.at(i) + pattern_len; | |
| 2922 } | |
| 2923 String::WriteToFlat(*subject, | |
| 2924 result->GetChars() + result_pos, | |
| 2925 subject_pos, | |
| 2926 subject_len); | |
| 2927 return *result; | |
| 2928 } | |
| 2929 | |
| 2772 | 2930 |
| 2773 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( | 2931 MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( |
| 2774 Isolate* isolate, | 2932 Isolate* isolate, |
| 2775 String* subject, | 2933 String* subject, |
| 2776 JSRegExp* regexp, | 2934 JSRegExp* regexp, |
| 2777 String* replacement, | 2935 String* replacement, |
| 2778 JSArray* last_match_info) { | 2936 JSArray* last_match_info) { |
| 2779 ASSERT(subject->IsFlat()); | 2937 ASSERT(subject->IsFlat()); |
| 2780 ASSERT(replacement->IsFlat()); | 2938 ASSERT(replacement->IsFlat()); |
| 2781 | 2939 |
| (...skipping 19 matching lines...) Expand all Loading... | |
| 2801 | 2959 |
| 2802 // CompiledReplacement uses zone allocation. | 2960 // CompiledReplacement uses zone allocation. |
| 2803 ZoneScope zone(isolate, DELETE_ON_EXIT); | 2961 ZoneScope zone(isolate, DELETE_ON_EXIT); |
| 2804 CompiledReplacement compiled_replacement; | 2962 CompiledReplacement compiled_replacement; |
| 2805 compiled_replacement.Compile(replacement_handle, | 2963 compiled_replacement.Compile(replacement_handle, |
| 2806 capture_count, | 2964 capture_count, |
| 2807 length); | 2965 length); |
| 2808 | 2966 |
| 2809 bool is_global = regexp_handle->GetFlags().is_global(); | 2967 bool is_global = regexp_handle->GetFlags().is_global(); |
| 2810 | 2968 |
| 2969 // Shortcut for simple non-regexp global replacements | |
| 2970 if (is_global && | |
| 2971 regexp->TypeTag() == JSRegExp::ATOM && | |
| 2972 compiled_replacement.parts() == 1) { | |
| 2973 if (subject_handle->HasOnlyAsciiChars() && | |
| 2974 replacement_handle->HasOnlyAsciiChars()) { | |
| 2975 return StringReplaceStringWithString<SeqAsciiString>( | |
| 2976 isolate, subject_handle, regexp_handle, replacement_handle); | |
| 2977 } else { | |
| 2978 return StringReplaceStringWithString<SeqTwoByteString>( | |
| 2979 isolate, subject_handle, regexp_handle, replacement_handle); | |
| 2980 } | |
| 2981 } | |
| 2982 | |
| 2811 // Guessing the number of parts that the final result string is built | 2983 // Guessing the number of parts that the final result string is built |
| 2812 // from. Global regexps can match any number of times, so we guess | 2984 // from. Global regexps can match any number of times, so we guess |
| 2813 // conservatively. | 2985 // conservatively. |
| 2814 int expected_parts = | 2986 int expected_parts = |
| 2815 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1; | 2987 (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1; |
| 2816 ReplacementStringBuilder builder(isolate->heap(), | 2988 ReplacementStringBuilder builder(isolate->heap(), |
| 2817 subject_handle, | 2989 subject_handle, |
| 2818 expected_parts); | 2990 expected_parts); |
| 2819 | 2991 |
| 2820 // Index of end of last match. | 2992 // Index of end of last match. |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2886 Isolate* isolate, | 3058 Isolate* isolate, |
| 2887 String* subject, | 3059 String* subject, |
| 2888 JSRegExp* regexp, | 3060 JSRegExp* regexp, |
| 2889 JSArray* last_match_info) { | 3061 JSArray* last_match_info) { |
| 2890 ASSERT(subject->IsFlat()); | 3062 ASSERT(subject->IsFlat()); |
| 2891 | 3063 |
| 2892 HandleScope handles(isolate); | 3064 HandleScope handles(isolate); |
| 2893 | 3065 |
| 2894 Handle<String> subject_handle(subject); | 3066 Handle<String> subject_handle(subject); |
| 2895 Handle<JSRegExp> regexp_handle(regexp); | 3067 Handle<JSRegExp> regexp_handle(regexp); |
| 3068 | |
| 3069 // Shortcut for simple non-regexp global replacements | |
| 3070 if (regexp_handle->GetFlags().is_global() && | |
| 3071 regexp_handle->TypeTag() == JSRegExp::ATOM) { | |
| 3072 if (subject_handle->HasOnlyAsciiChars()) { | |
| 3073 return StringReplaceStringWithString<SeqAsciiString>( | |
| 3074 isolate, subject_handle, regexp_handle); | |
| 3075 } else { | |
| 3076 return StringReplaceStringWithString<SeqTwoByteString>( | |
| 3077 isolate, subject_handle, regexp_handle); | |
| 3078 } | |
| 3079 } | |
| 3080 | |
| 2896 Handle<JSArray> last_match_info_handle(last_match_info); | 3081 Handle<JSArray> last_match_info_handle(last_match_info); |
| 2897 Handle<Object> match = RegExpImpl::Exec(regexp_handle, | 3082 Handle<Object> match = RegExpImpl::Exec(regexp_handle, |
| 2898 subject_handle, | 3083 subject_handle, |
| 2899 0, | 3084 0, |
| 2900 last_match_info_handle); | 3085 last_match_info_handle); |
| 2901 if (match.is_null()) return Failure::Exception(); | 3086 if (match.is_null()) return Failure::Exception(); |
| 2902 if (match->IsNull()) return *subject_handle; | 3087 if (match->IsNull()) return *subject_handle; |
| 2903 | 3088 |
| 2904 ASSERT(last_match_info_handle->HasFastElements()); | 3089 ASSERT(last_match_info_handle->HasFastElements()); |
| 2905 | 3090 |
| (...skipping 3017 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5923 int right = length; | 6108 int right = length; |
| 5924 if (trimRight) { | 6109 if (trimRight) { |
| 5925 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) { | 6110 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) { |
| 5926 right--; | 6111 right--; |
| 5927 } | 6112 } |
| 5928 } | 6113 } |
| 5929 return s->SubString(left, right); | 6114 return s->SubString(left, right); |
| 5930 } | 6115 } |
| 5931 | 6116 |
| 5932 | 6117 |
| 5933 void FindAsciiStringIndices(Vector<const char> subject, | |
| 5934 char pattern, | |
| 5935 ZoneList<int>* indices, | |
| 5936 unsigned int limit) { | |
| 5937 ASSERT(limit > 0); | |
| 5938 // Collect indices of pattern in subject using memchr. | |
| 5939 // Stop after finding at most limit values. | |
| 5940 const char* subject_start = reinterpret_cast<const char*>(subject.start()); | |
| 5941 const char* subject_end = subject_start + subject.length(); | |
| 5942 const char* pos = subject_start; | |
| 5943 while (limit > 0) { | |
| 5944 pos = reinterpret_cast<const char*>( | |
| 5945 memchr(pos, pattern, subject_end - pos)); | |
| 5946 if (pos == NULL) return; | |
| 5947 indices->Add(static_cast<int>(pos - subject_start)); | |
| 5948 pos++; | |
| 5949 limit--; | |
| 5950 } | |
| 5951 } | |
| 5952 | |
| 5953 | |
| 5954 template <typename SubjectChar, typename PatternChar> | |
| 5955 void FindStringIndices(Isolate* isolate, | |
| 5956 Vector<const SubjectChar> subject, | |
| 5957 Vector<const PatternChar> pattern, | |
| 5958 ZoneList<int>* indices, | |
| 5959 unsigned int limit) { | |
| 5960 ASSERT(limit > 0); | |
| 5961 // Collect indices of pattern in subject. | |
| 5962 // Stop after finding at most limit values. | |
| 5963 int pattern_length = pattern.length(); | |
| 5964 int index = 0; | |
| 5965 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); | |
| 5966 while (limit > 0) { | |
| 5967 index = search.Search(subject, index); | |
| 5968 if (index < 0) return; | |
| 5969 indices->Add(index); | |
| 5970 index += pattern_length; | |
| 5971 limit--; | |
| 5972 } | |
| 5973 } | |
| 5974 | |
| 5975 | |
| 5976 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { | 6118 RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { |
| 5977 ASSERT(args.length() == 3); | 6119 ASSERT(args.length() == 3); |
| 5978 HandleScope handle_scope(isolate); | 6120 HandleScope handle_scope(isolate); |
| 5979 CONVERT_ARG_CHECKED(String, subject, 0); | 6121 CONVERT_ARG_CHECKED(String, subject, 0); |
| 5980 CONVERT_ARG_CHECKED(String, pattern, 1); | 6122 CONVERT_ARG_CHECKED(String, pattern, 1); |
| 5981 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); | 6123 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); |
| 5982 | 6124 |
| 5983 int subject_length = subject->length(); | 6125 int subject_length = subject->length(); |
| 5984 int pattern_length = pattern->length(); | 6126 int pattern_length = pattern->length(); |
| 5985 RUNTIME_ASSERT(pattern_length > 0); | 6127 RUNTIME_ASSERT(pattern_length > 0); |
| (...skipping 19 matching lines...) Expand all Loading... | |
| 6005 | 6147 |
| 6006 static const int kMaxInitialListCapacity = 16; | 6148 static const int kMaxInitialListCapacity = 16; |
| 6007 | 6149 |
| 6008 ZoneScope scope(isolate, DELETE_ON_EXIT); | 6150 ZoneScope scope(isolate, DELETE_ON_EXIT); |
| 6009 | 6151 |
| 6010 // Find (up to limit) indices of separator and end-of-string in subject | 6152 // Find (up to limit) indices of separator and end-of-string in subject |
| 6011 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); | 6153 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); |
| 6012 ZoneList<int> indices(initial_capacity); | 6154 ZoneList<int> indices(initial_capacity); |
| 6013 if (!pattern->IsFlat()) FlattenString(pattern); | 6155 if (!pattern->IsFlat()) FlattenString(pattern); |
| 6014 | 6156 |
| 6015 // No allocation block. | 6157 FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit); |
| 6016 { | |
| 6017 AssertNoAllocation no_gc; | |
| 6018 String::FlatContent subject_content = subject->GetFlatContent(); | |
| 6019 String::FlatContent pattern_content = pattern->GetFlatContent(); | |
| 6020 ASSERT(subject_content.IsFlat()); | |
| 6021 ASSERT(pattern_content.IsFlat()); | |
| 6022 if (subject_content.IsAscii()) { | |
| 6023 Vector<const char> subject_vector = subject_content.ToAsciiVector(); | |
| 6024 if (pattern_content.IsAscii()) { | |
| 6025 Vector<const char> pattern_vector = pattern_content.ToAsciiVector(); | |
| 6026 if (pattern_vector.length() == 1) { | |
| 6027 FindAsciiStringIndices(subject_vector, | |
| 6028 pattern_vector[0], | |
| 6029 &indices, | |
| 6030 limit); | |
| 6031 } else { | |
| 6032 FindStringIndices(isolate, | |
| 6033 subject_vector, | |
| 6034 pattern_vector, | |
| 6035 &indices, | |
| 6036 limit); | |
| 6037 } | |
| 6038 } else { | |
| 6039 FindStringIndices(isolate, | |
| 6040 subject_vector, | |
| 6041 pattern_content.ToUC16Vector(), | |
| 6042 &indices, | |
| 6043 limit); | |
| 6044 } | |
| 6045 } else { | |
| 6046 Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); | |
| 6047 if (pattern->IsAsciiRepresentation()) { | |
| 6048 FindStringIndices(isolate, | |
| 6049 subject_vector, | |
| 6050 pattern_content.ToAsciiVector(), | |
| 6051 &indices, | |
| 6052 limit); | |
| 6053 } else { | |
| 6054 FindStringIndices(isolate, | |
| 6055 subject_vector, | |
| 6056 pattern_content.ToUC16Vector(), | |
| 6057 &indices, | |
| 6058 limit); | |
| 6059 } | |
| 6060 } | |
| 6061 } | |
| 6062 | 6158 |
| 6063 if (static_cast<uint32_t>(indices.length()) < limit) { | 6159 if (static_cast<uint32_t>(indices.length()) < limit) { |
| 6064 indices.Add(subject_length); | 6160 indices.Add(subject_length); |
| 6065 } | 6161 } |
| 6066 | 6162 |
| 6067 // The list indices now contains the end of each part to create. | 6163 // The list indices now contains the end of each part to create. |
| 6068 | 6164 |
| 6069 // Create JSArray of substrings separated by separator. | 6165 // Create JSArray of substrings separated by separator. |
| 6070 int part_count = indices.length(); | 6166 int part_count = indices.length(); |
| 6071 | 6167 |
| (...skipping 6950 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 13022 } else { | 13118 } else { |
| 13023 // Handle last resort GC and make sure to allow future allocations | 13119 // Handle last resort GC and make sure to allow future allocations |
| 13024 // to grow the heap without causing GCs (if possible). | 13120 // to grow the heap without causing GCs (if possible). |
| 13025 isolate->counters()->gc_last_resort_from_js()->Increment(); | 13121 isolate->counters()->gc_last_resort_from_js()->Increment(); |
| 13026 isolate->heap()->CollectAllGarbage(false); | 13122 isolate->heap()->CollectAllGarbage(false); |
| 13027 } | 13123 } |
| 13028 } | 13124 } |
| 13029 | 13125 |
| 13030 | 13126 |
| 13031 } } // namespace v8::internal | 13127 } } // namespace v8::internal |
| OLD | NEW |