| OLD | NEW |
| 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 2797 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2808 shift = gs_shift; | 2808 shift = gs_shift; |
| 2809 } | 2809 } |
| 2810 idx += shift; | 2810 idx += shift; |
| 2811 } | 2811 } |
| 2812 } | 2812 } |
| 2813 | 2813 |
| 2814 return -1; | 2814 return -1; |
| 2815 } | 2815 } |
| 2816 | 2816 |
| 2817 | 2817 |
| 2818 template <typename schar> | |
| 2819 static inline int SingleCharIndexOf(Vector<const schar> string, | |
| 2820 schar pattern_char, | |
| 2821 int start_index) { | |
| 2822 if (sizeof(schar) == 1) { | |
| 2823 const schar* pos = reinterpret_cast<const schar*>( | |
| 2824 memchr(string.start() + start_index, | |
| 2825 pattern_char, | |
| 2826 string.length() - start_index)); | |
| 2827 if (pos == NULL) return -1; | |
| 2828 return static_cast<int>(pos - string.start()); | |
| 2829 } | |
| 2830 for (int i = start_index, n = string.length(); i < n; i++) { | |
| 2831 if (pattern_char == string[i]) { | |
| 2832 return i; | |
| 2833 } | |
| 2834 } | |
| 2835 return -1; | |
| 2836 } | |
| 2837 | |
| 2838 | |
| 2839 template <typename schar> | |
| 2840 static int SingleCharLastIndexOf(Vector<const schar> string, | |
| 2841 schar pattern_char, | |
| 2842 int start_index) { | |
| 2843 for (int i = start_index; i >= 0; i--) { | |
| 2844 if (pattern_char == string[i]) { | |
| 2845 return i; | |
| 2846 } | |
| 2847 } | |
| 2848 return -1; | |
| 2849 } | |
| 2850 | |
| 2851 | |
| 2852 // Trivial string search for shorter strings. | 2818 // Trivial string search for shorter strings. |
| 2853 // On return, if "complete" is set to true, the return value is the | 2819 // On return, if "complete" is set to true, the return value is the |
| 2854 // final result of searching for the patter in the subject. | 2820 // final result of searching for the patter in the subject. |
| 2855 // If "complete" is set to false, the return value is the index where | 2821 // If "complete" is set to false, the return value is the index where |
| 2856 // further checking should start, i.e., it's guaranteed that the pattern | 2822 // further checking should start, i.e., it's guaranteed that the pattern |
| 2857 // does not occur at a position prior to the returned index. | 2823 // does not occur at a position prior to the returned index. |
| 2858 template <typename pchar, typename schar> | 2824 template <typename pchar, typename schar> |
| 2859 static int SimpleIndexOf(Vector<const schar> subject, | 2825 static int SimpleIndexOf(Vector<const schar> subject, |
| 2860 Vector<const pchar> pattern, | 2826 Vector<const pchar> pattern, |
| 2861 int idx, | 2827 int idx, |
| 2862 bool* complete) { | 2828 bool* complete) { |
| 2829 ASSERT(pattern.length() > 1); |
| 2863 // Badness is a count of how much work we have done. When we have | 2830 // Badness is a count of how much work we have done. When we have |
| 2864 // done enough work we decide it's probably worth switching to a better | 2831 // done enough work we decide it's probably worth switching to a better |
| 2865 // algorithm. | 2832 // algorithm. |
| 2866 int badness = -10 - (pattern.length() << 2); | 2833 int badness = -10 - (pattern.length() << 2); |
| 2867 | 2834 |
| 2868 // We know our pattern is at least 2 characters, we cache the first so | 2835 // We know our pattern is at least 2 characters, we cache the first so |
| 2869 // the common case of the first character not matching is faster. | 2836 // the common case of the first character not matching is faster. |
| 2870 pchar pattern_first_char = pattern[0]; | 2837 pchar pattern_first_char = pattern[0]; |
| 2871 for (int i = idx, n = subject.length() - pattern.length(); i <= n; i++) { | 2838 for (int i = idx, n = subject.length() - pattern.length(); i <= n; i++) { |
| 2872 badness++; | 2839 badness++; |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2915 const schar* pos = reinterpret_cast<const schar*>( | 2882 const schar* pos = reinterpret_cast<const schar*>( |
| 2916 memchr(subject.start() + i, | 2883 memchr(subject.start() + i, |
| 2917 pattern_first_char, | 2884 pattern_first_char, |
| 2918 n - i + 1)); | 2885 n - i + 1)); |
| 2919 if (pos == NULL) return -1; | 2886 if (pos == NULL) return -1; |
| 2920 i = static_cast<int>(pos - subject.start()); | 2887 i = static_cast<int>(pos - subject.start()); |
| 2921 } else { | 2888 } else { |
| 2922 if (subject[i] != pattern_first_char) continue; | 2889 if (subject[i] != pattern_first_char) continue; |
| 2923 } | 2890 } |
| 2924 int j = 1; | 2891 int j = 1; |
| 2925 do { | 2892 while (j < pattern.length()) { |
| 2926 if (pattern[j] != subject[i+j]) { | 2893 if (pattern[j] != subject[i+j]) { |
| 2927 break; | 2894 break; |
| 2928 } | 2895 } |
| 2929 j++; | 2896 j++; |
| 2930 } while (j < pattern.length()); | 2897 } |
| 2931 if (j == pattern.length()) { | 2898 if (j == pattern.length()) { |
| 2932 return i; | 2899 return i; |
| 2933 } | 2900 } |
| 2934 } | 2901 } |
| 2935 return -1; | 2902 return -1; |
| 2936 } | 2903 } |
| 2937 | 2904 |
| 2938 | 2905 |
| 2939 // Strategy for searching for a string in another string. | 2906 // Strategy for searching for a string in another string. |
| 2940 enum StringSearchStrategy { SEARCH_FAIL, SEARCH_SHORT, SEARCH_LONG }; | 2907 enum StringSearchStrategy { SEARCH_FAIL, SEARCH_SHORT, SEARCH_LONG }; |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3022 int start_index) { | 2989 int start_index) { |
| 3023 ASSERT(0 <= start_index); | 2990 ASSERT(0 <= start_index); |
| 3024 ASSERT(start_index <= sub->length()); | 2991 ASSERT(start_index <= sub->length()); |
| 3025 | 2992 |
| 3026 int pattern_length = pat->length(); | 2993 int pattern_length = pat->length(); |
| 3027 if (pattern_length == 0) return start_index; | 2994 if (pattern_length == 0) return start_index; |
| 3028 | 2995 |
| 3029 int subject_length = sub->length(); | 2996 int subject_length = sub->length(); |
| 3030 if (start_index + pattern_length > subject_length) return -1; | 2997 if (start_index + pattern_length > subject_length) return -1; |
| 3031 | 2998 |
| 3032 if (!sub->IsFlat()) { | 2999 if (!sub->IsFlat()) FlattenString(sub); |
| 3033 FlattenString(sub); | 3000 if (!pat->IsFlat()) FlattenString(pat); |
| 3034 } | |
| 3035 | |
| 3036 // Searching for one specific character is common. For one | |
| 3037 // character patterns linear search is necessary, so any smart | |
| 3038 // algorithm is unnecessary overhead. | |
| 3039 if (pattern_length == 1) { | |
| 3040 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | |
| 3041 String* seq_sub = *sub; | |
| 3042 if (seq_sub->IsConsString()) { | |
| 3043 seq_sub = ConsString::cast(seq_sub)->first(); | |
| 3044 } | |
| 3045 if (seq_sub->IsAsciiRepresentation()) { | |
| 3046 uc16 pchar = pat->Get(0); | |
| 3047 if (pchar > String::kMaxAsciiCharCode) { | |
| 3048 return -1; | |
| 3049 } | |
| 3050 Vector<const char> ascii_vector = | |
| 3051 seq_sub->ToAsciiVector().SubVector(start_index, subject_length); | |
| 3052 const void* pos = memchr(ascii_vector.start(), | |
| 3053 static_cast<const char>(pchar), | |
| 3054 static_cast<size_t>(ascii_vector.length())); | |
| 3055 if (pos == NULL) { | |
| 3056 return -1; | |
| 3057 } | |
| 3058 return static_cast<int>(reinterpret_cast<const char*>(pos) | |
| 3059 - ascii_vector.start() + start_index); | |
| 3060 } | |
| 3061 return SingleCharIndexOf(seq_sub->ToUC16Vector(), | |
| 3062 pat->Get(0), | |
| 3063 start_index); | |
| 3064 } | |
| 3065 | |
| 3066 if (!pat->IsFlat()) { | |
| 3067 FlattenString(pat); | |
| 3068 } | |
| 3069 | 3001 |
| 3070 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 3002 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
| 3071 // Extract flattened substrings of cons strings before determining asciiness. | 3003 // Extract flattened substrings of cons strings before determining asciiness. |
| 3072 String* seq_sub = *sub; | 3004 String* seq_sub = *sub; |
| 3073 if (seq_sub->IsConsString()) { | 3005 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); |
| 3074 seq_sub = ConsString::cast(seq_sub)->first(); | |
| 3075 } | |
| 3076 String* seq_pat = *pat; | 3006 String* seq_pat = *pat; |
| 3077 if (seq_pat->IsConsString()) { | 3007 if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first(); |
| 3078 seq_pat = ConsString::cast(seq_pat)->first(); | |
| 3079 } | |
| 3080 | 3008 |
| 3081 // dispatch on type of strings | 3009 // dispatch on type of strings |
| 3082 if (seq_pat->IsAsciiRepresentation()) { | 3010 if (seq_pat->IsAsciiRepresentation()) { |
| 3083 Vector<const char> pat_vector = seq_pat->ToAsciiVector(); | 3011 Vector<const char> pat_vector = seq_pat->ToAsciiVector(); |
| 3084 if (seq_sub->IsAsciiRepresentation()) { | 3012 if (seq_sub->IsAsciiRepresentation()) { |
| 3085 return StringSearch(seq_sub->ToAsciiVector(), pat_vector, start_index); | 3013 return StringSearch(seq_sub->ToAsciiVector(), pat_vector, start_index); |
| 3086 } | 3014 } |
| 3087 return StringSearch(seq_sub->ToUC16Vector(), pat_vector, start_index); | 3015 return StringSearch(seq_sub->ToUC16Vector(), pat_vector, start_index); |
| 3088 } | 3016 } |
| 3089 Vector<const uc16> pat_vector = seq_pat->ToUC16Vector(); | 3017 Vector<const uc16> pat_vector = seq_pat->ToUC16Vector(); |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3159 uint32_t sub_length = sub->length(); | 3087 uint32_t sub_length = sub->length(); |
| 3160 | 3088 |
| 3161 if (start_index + pat_length > sub_length) { | 3089 if (start_index + pat_length > sub_length) { |
| 3162 start_index = sub_length - pat_length; | 3090 start_index = sub_length - pat_length; |
| 3163 } | 3091 } |
| 3164 | 3092 |
| 3165 if (pat_length == 0) { | 3093 if (pat_length == 0) { |
| 3166 return Smi::FromInt(start_index); | 3094 return Smi::FromInt(start_index); |
| 3167 } | 3095 } |
| 3168 | 3096 |
| 3169 if (!sub->IsFlat()) { | 3097 if (!sub->IsFlat()) FlattenString(sub); |
| 3170 FlattenString(sub); | 3098 if (!pat->IsFlat()) FlattenString(pat); |
| 3171 } | |
| 3172 | |
| 3173 if (pat_length == 1) { | |
| 3174 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | |
| 3175 if (sub->IsAsciiRepresentation()) { | |
| 3176 uc16 pchar = pat->Get(0); | |
| 3177 if (pchar > String::kMaxAsciiCharCode) { | |
| 3178 return Smi::FromInt(-1); | |
| 3179 } | |
| 3180 return Smi::FromInt(SingleCharLastIndexOf(sub->ToAsciiVector(), | |
| 3181 static_cast<char>(pat->Get(0)), | |
| 3182 start_index)); | |
| 3183 } else { | |
| 3184 return Smi::FromInt(SingleCharLastIndexOf(sub->ToUC16Vector(), | |
| 3185 pat->Get(0), | |
| 3186 start_index)); | |
| 3187 } | |
| 3188 } | |
| 3189 | |
| 3190 if (!pat->IsFlat()) { | |
| 3191 FlattenString(pat); | |
| 3192 } | |
| 3193 | 3099 |
| 3194 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 3100 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
| 3195 | 3101 |
| 3196 int position = -1; | 3102 int position = -1; |
| 3197 | 3103 |
| 3198 if (pat->IsAsciiRepresentation()) { | 3104 if (pat->IsAsciiRepresentation()) { |
| 3199 Vector<const char> pat_vector = pat->ToAsciiVector(); | 3105 Vector<const char> pat_vector = pat->ToAsciiVector(); |
| 3200 if (sub->IsAsciiRepresentation()) { | 3106 if (sub->IsAsciiRepresentation()) { |
| 3201 position = StringMatchBackwards(sub->ToAsciiVector(), | 3107 position = StringMatchBackwards(sub->ToAsciiVector(), |
| 3202 pat_vector, | 3108 pat_vector, |
| (...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3360 AssertNoAllocation no_gc; | 3266 AssertNoAllocation no_gc; |
| 3361 FixedArray* elements = FixedArray::cast(last_match_info->elements()); | 3267 FixedArray* elements = FixedArray::cast(last_match_info->elements()); |
| 3362 RegExpImpl::SetLastCaptureCount(elements, 2); | 3268 RegExpImpl::SetLastCaptureCount(elements, 2); |
| 3363 RegExpImpl::SetLastInput(elements, *subject); | 3269 RegExpImpl::SetLastInput(elements, *subject); |
| 3364 RegExpImpl::SetLastSubject(elements, *subject); | 3270 RegExpImpl::SetLastSubject(elements, *subject); |
| 3365 RegExpImpl::SetCapture(elements, 0, match_start); | 3271 RegExpImpl::SetCapture(elements, 0, match_start); |
| 3366 RegExpImpl::SetCapture(elements, 1, match_end); | 3272 RegExpImpl::SetCapture(elements, 1, match_end); |
| 3367 } | 3273 } |
| 3368 | 3274 |
| 3369 | 3275 |
| 3370 template <typename schar> | |
| 3371 static bool SearchCharMultiple(Vector<schar> subject, | |
| 3372 String* pattern, | |
| 3373 schar pattern_char, | |
| 3374 FixedArrayBuilder* builder, | |
| 3375 int* match_pos) { | |
| 3376 // Position of last match. | |
| 3377 int pos = *match_pos; | |
| 3378 int subject_length = subject.length(); | |
| 3379 while (pos < subject_length) { | |
| 3380 int match_end = pos + 1; | |
| 3381 if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) { | |
| 3382 *match_pos = pos; | |
| 3383 return false; | |
| 3384 } | |
| 3385 int new_pos = SingleCharIndexOf(subject, pattern_char, match_end); | |
| 3386 if (new_pos >= 0) { | |
| 3387 // Match has been found. | |
| 3388 if (new_pos > match_end) { | |
| 3389 ReplacementStringBuilder::AddSubjectSlice(builder, match_end, new_pos); | |
| 3390 } | |
| 3391 pos = new_pos; | |
| 3392 builder->Add(pattern); | |
| 3393 } else { | |
| 3394 break; | |
| 3395 } | |
| 3396 } | |
| 3397 if (pos + 1 < subject_length) { | |
| 3398 ReplacementStringBuilder::AddSubjectSlice(builder, pos + 1, subject_length); | |
| 3399 } | |
| 3400 *match_pos = pos; | |
| 3401 return true; | |
| 3402 } | |
| 3403 | |
| 3404 | |
| 3405 static bool SearchCharMultiple(Handle<String> subject, | |
| 3406 Handle<String> pattern, | |
| 3407 Handle<JSArray> last_match_info, | |
| 3408 FixedArrayBuilder* builder) { | |
| 3409 ASSERT(subject->IsFlat()); | |
| 3410 ASSERT_EQ(1, pattern->length()); | |
| 3411 uc16 pattern_char = pattern->Get(0); | |
| 3412 // Treating position before first as initial "previous match position". | |
| 3413 int match_pos = -1; | |
| 3414 | |
| 3415 for (;;) { // Break when search complete. | |
| 3416 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); | |
| 3417 AssertNoAllocation no_gc; | |
| 3418 if (subject->IsAsciiRepresentation()) { | |
| 3419 if (pattern_char > String::kMaxAsciiCharCode) { | |
| 3420 break; | |
| 3421 } | |
| 3422 Vector<const char> subject_vector = subject->ToAsciiVector(); | |
| 3423 char pattern_ascii_char = static_cast<char>(pattern_char); | |
| 3424 bool complete = SearchCharMultiple<const char>(subject_vector, | |
| 3425 *pattern, | |
| 3426 pattern_ascii_char, | |
| 3427 builder, | |
| 3428 &match_pos); | |
| 3429 if (complete) break; | |
| 3430 } else { | |
| 3431 Vector<const uc16> subject_vector = subject->ToUC16Vector(); | |
| 3432 bool complete = SearchCharMultiple<const uc16>(subject_vector, | |
| 3433 *pattern, | |
| 3434 pattern_char, | |
| 3435 builder, | |
| 3436 &match_pos); | |
| 3437 if (complete) break; | |
| 3438 } | |
| 3439 } | |
| 3440 | |
| 3441 if (match_pos >= 0) { | |
| 3442 SetLastMatchInfoNoCaptures(subject, | |
| 3443 last_match_info, | |
| 3444 match_pos, | |
| 3445 match_pos + 1); | |
| 3446 return true; | |
| 3447 } | |
| 3448 return false; // No matches at all. | |
| 3449 } | |
| 3450 | |
| 3451 | |
| 3452 template <typename schar, typename pchar> | 3276 template <typename schar, typename pchar> |
| 3453 static bool SearchStringMultiple(Vector<schar> subject, | 3277 static bool SearchStringMultiple(Vector<schar> subject, |
| 3454 String* pattern, | 3278 String* pattern, |
| 3455 Vector<pchar> pattern_string, | 3279 Vector<pchar> pattern_string, |
| 3456 FixedArrayBuilder* builder, | 3280 FixedArrayBuilder* builder, |
| 3457 int* match_pos) { | 3281 int* match_pos) { |
| 3458 int pos = *match_pos; | 3282 int pos = *match_pos; |
| 3459 int subject_length = subject.length(); | 3283 int subject_length = subject.length(); |
| 3460 int pattern_length = pattern_string.length(); | 3284 int pattern_length = pattern_string.length(); |
| 3461 int max_search_start = subject_length - pattern_length; | 3285 int max_search_start = subject_length - pattern_length; |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3519 return true; | 3343 return true; |
| 3520 } | 3344 } |
| 3521 | 3345 |
| 3522 | 3346 |
| 3523 static bool SearchStringMultiple(Handle<String> subject, | 3347 static bool SearchStringMultiple(Handle<String> subject, |
| 3524 Handle<String> pattern, | 3348 Handle<String> pattern, |
| 3525 Handle<JSArray> last_match_info, | 3349 Handle<JSArray> last_match_info, |
| 3526 FixedArrayBuilder* builder) { | 3350 FixedArrayBuilder* builder) { |
| 3527 ASSERT(subject->IsFlat()); | 3351 ASSERT(subject->IsFlat()); |
| 3528 ASSERT(pattern->IsFlat()); | 3352 ASSERT(pattern->IsFlat()); |
| 3529 ASSERT(pattern->length() > 1); | |
| 3530 | 3353 |
| 3531 // Treating as if a previous match was before first character. | 3354 // Treating as if a previous match was before first character. |
| 3532 int match_pos = -pattern->length(); | 3355 int match_pos = -pattern->length(); |
| 3533 | 3356 |
| 3534 for (;;) { // Break when search complete. | 3357 for (;;) { // Break when search complete. |
| 3535 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); | 3358 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); |
| 3536 AssertNoAllocation no_gc; | 3359 AssertNoAllocation no_gc; |
| 3537 if (subject->IsAsciiRepresentation()) { | 3360 if (subject->IsAsciiRepresentation()) { |
| 3538 Vector<const char> subject_vector = subject->ToAsciiVector(); | 3361 Vector<const char> subject_vector = subject->ToAsciiVector(); |
| 3539 if (pattern->IsAsciiRepresentation()) { | 3362 if (pattern->IsAsciiRepresentation()) { |
| (...skipping 237 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3777 result_elements = | 3600 result_elements = |
| 3778 Handle<FixedArray>(FixedArray::cast(result_array->elements())); | 3601 Handle<FixedArray>(FixedArray::cast(result_array->elements())); |
| 3779 } else { | 3602 } else { |
| 3780 result_elements = Factory::NewFixedArrayWithHoles(16); | 3603 result_elements = Factory::NewFixedArrayWithHoles(16); |
| 3781 } | 3604 } |
| 3782 FixedArrayBuilder builder(result_elements); | 3605 FixedArrayBuilder builder(result_elements); |
| 3783 | 3606 |
| 3784 if (regexp->TypeTag() == JSRegExp::ATOM) { | 3607 if (regexp->TypeTag() == JSRegExp::ATOM) { |
| 3785 Handle<String> pattern( | 3608 Handle<String> pattern( |
| 3786 String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex))); | 3609 String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex))); |
| 3787 int pattern_length = pattern->length(); | |
| 3788 if (pattern_length == 1) { | |
| 3789 if (SearchCharMultiple(subject, pattern, last_match_info, &builder)) { | |
| 3790 return *builder.ToJSArray(result_array); | |
| 3791 } | |
| 3792 return Heap::null_value(); | |
| 3793 } | |
| 3794 | |
| 3795 if (!pattern->IsFlat()) FlattenString(pattern); | 3610 if (!pattern->IsFlat()) FlattenString(pattern); |
| 3796 if (SearchStringMultiple(subject, pattern, last_match_info, &builder)) { | 3611 if (SearchStringMultiple(subject, pattern, last_match_info, &builder)) { |
| 3797 return *builder.ToJSArray(result_array); | 3612 return *builder.ToJSArray(result_array); |
| 3798 } | 3613 } |
| 3799 return Heap::null_value(); | 3614 return Heap::null_value(); |
| 3800 } | 3615 } |
| 3801 | 3616 |
| 3802 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); | 3617 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
| 3803 | 3618 |
| 3804 RegExpImpl::IrregexpResult result; | 3619 RegExpImpl::IrregexpResult result; |
| (...skipping 1580 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5385 limit--; | 5200 limit--; |
| 5386 } | 5201 } |
| 5387 return; | 5202 return; |
| 5388 } | 5203 } |
| 5389 default: | 5204 default: |
| 5390 UNREACHABLE(); | 5205 UNREACHABLE(); |
| 5391 return; | 5206 return; |
| 5392 } | 5207 } |
| 5393 } | 5208 } |
| 5394 | 5209 |
| 5395 template <typename schar> | |
| 5396 inline void FindCharIndices(Vector<const schar> subject, | |
| 5397 const schar pattern_char, | |
| 5398 ZoneList<int>* indices, | |
| 5399 unsigned int limit) { | |
| 5400 // Collect indices of pattern_char in subject, and the end-of-string index. | |
| 5401 // Stop after finding at most limit values. | |
| 5402 int index = 0; | |
| 5403 while (limit > 0) { | |
| 5404 index = SingleCharIndexOf(subject, pattern_char, index); | |
| 5405 if (index < 0) return; | |
| 5406 indices->Add(index); | |
| 5407 index++; | |
| 5408 limit--; | |
| 5409 } | |
| 5410 } | |
| 5411 | |
| 5412 | 5210 |
| 5413 static Object* Runtime_StringSplit(Arguments args) { | 5211 static Object* Runtime_StringSplit(Arguments args) { |
| 5414 ASSERT(args.length() == 3); | 5212 ASSERT(args.length() == 3); |
| 5415 HandleScope handle_scope; | 5213 HandleScope handle_scope; |
| 5416 CONVERT_ARG_CHECKED(String, subject, 0); | 5214 CONVERT_ARG_CHECKED(String, subject, 0); |
| 5417 CONVERT_ARG_CHECKED(String, pattern, 1); | 5215 CONVERT_ARG_CHECKED(String, pattern, 1); |
| 5418 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); | 5216 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); |
| 5419 | 5217 |
| 5420 int subject_length = subject->length(); | 5218 int subject_length = subject->length(); |
| 5421 int pattern_length = pattern->length(); | 5219 int pattern_length = pattern->length(); |
| 5422 RUNTIME_ASSERT(pattern_length > 0); | 5220 RUNTIME_ASSERT(pattern_length > 0); |
| 5423 | 5221 |
| 5424 // The limit can be very large (0xffffffffu), but since the pattern | 5222 // The limit can be very large (0xffffffffu), but since the pattern |
| 5425 // isn't empty, we can never create more parts than ~half the length | 5223 // isn't empty, we can never create more parts than ~half the length |
| 5426 // of the subject. | 5224 // of the subject. |
| 5427 | 5225 |
| 5428 if (!subject->IsFlat()) FlattenString(subject); | 5226 if (!subject->IsFlat()) FlattenString(subject); |
| 5429 | 5227 |
| 5430 static const int kMaxInitialListCapacity = 16; | 5228 static const int kMaxInitialListCapacity = 16; |
| 5431 | 5229 |
| 5432 ZoneScope scope(DELETE_ON_EXIT); | 5230 ZoneScope scope(DELETE_ON_EXIT); |
| 5433 | 5231 |
| 5434 // Find (up to limit) indices of separator and end-of-string in subject | 5232 // Find (up to limit) indices of separator and end-of-string in subject |
| 5435 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); | 5233 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); |
| 5436 ZoneList<int> indices(initial_capacity); | 5234 ZoneList<int> indices(initial_capacity); |
| 5437 if (pattern_length == 1) { | 5235 if (!pattern->IsFlat()) FlattenString(pattern); |
| 5438 // Special case, go directly to fast single-character split. | 5236 AssertNoAllocation nogc; |
| 5439 AssertNoAllocation nogc; | 5237 if (subject->IsAsciiRepresentation()) { |
| 5440 uc16 pattern_char = pattern->Get(0); | 5238 Vector<const char> subject_vector = subject->ToAsciiVector(); |
| 5441 if (subject->IsTwoByteRepresentation()) { | 5239 if (pattern->IsAsciiRepresentation()) { |
| 5442 FindCharIndices(subject->ToUC16Vector(), pattern_char, | 5240 FindStringIndices(subject_vector, |
| 5443 &indices, | 5241 pattern->ToAsciiVector(), |
| 5444 limit); | 5242 &indices, |
| 5445 } else if (pattern_char <= String::kMaxAsciiCharCode) { | 5243 limit); |
| 5446 FindCharIndices(subject->ToAsciiVector(), | 5244 } else { |
| 5447 static_cast<char>(pattern_char), | 5245 FindStringIndices(subject_vector, |
| 5448 &indices, | 5246 pattern->ToUC16Vector(), |
| 5449 limit); | 5247 &indices, |
| 5248 limit); |
| 5450 } | 5249 } |
| 5451 } else { | 5250 } else { |
| 5452 if (!pattern->IsFlat()) FlattenString(pattern); | 5251 Vector<const uc16> subject_vector = subject->ToUC16Vector(); |
| 5453 AssertNoAllocation nogc; | 5252 if (pattern->IsAsciiRepresentation()) { |
| 5454 if (subject->IsAsciiRepresentation()) { | 5253 FindStringIndices(subject_vector, |
| 5455 Vector<const char> subject_vector = subject->ToAsciiVector(); | 5254 pattern->ToAsciiVector(), |
| 5456 if (pattern->IsAsciiRepresentation()) { | 5255 &indices, |
| 5457 FindStringIndices(subject_vector, | 5256 limit); |
| 5458 pattern->ToAsciiVector(), | |
| 5459 &indices, | |
| 5460 limit); | |
| 5461 } else { | |
| 5462 FindStringIndices(subject_vector, | |
| 5463 pattern->ToUC16Vector(), | |
| 5464 &indices, | |
| 5465 limit); | |
| 5466 } | |
| 5467 } else { | 5257 } else { |
| 5468 Vector<const uc16> subject_vector = subject->ToUC16Vector(); | 5258 FindStringIndices(subject_vector, |
| 5469 if (pattern->IsAsciiRepresentation()) { | 5259 pattern->ToUC16Vector(), |
| 5470 FindStringIndices(subject_vector, | 5260 &indices, |
| 5471 pattern->ToAsciiVector(), | 5261 limit); |
| 5472 &indices, | |
| 5473 limit); | |
| 5474 } else { | |
| 5475 FindStringIndices(subject_vector, | |
| 5476 pattern->ToUC16Vector(), | |
| 5477 &indices, | |
| 5478 limit); | |
| 5479 } | |
| 5480 } | 5262 } |
| 5481 } | 5263 } |
| 5482 if (static_cast<uint32_t>(indices.length()) < limit) { | 5264 if (static_cast<uint32_t>(indices.length()) < limit) { |
| 5483 indices.Add(subject_length); | 5265 indices.Add(subject_length); |
| 5484 } | 5266 } |
| 5485 // The list indices now contains the end of each part to create. | 5267 // The list indices now contains the end of each part to create. |
| 5486 | 5268 |
| 5487 | 5269 |
| 5488 // Create JSArray of substrings separated by separator. | 5270 // Create JSArray of substrings separated by separator. |
| 5489 int part_count = indices.length(); | 5271 int part_count = indices.length(); |
| (...skipping 5249 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 10739 } else { | 10521 } else { |
| 10740 // Handle last resort GC and make sure to allow future allocations | 10522 // Handle last resort GC and make sure to allow future allocations |
| 10741 // to grow the heap without causing GCs (if possible). | 10523 // to grow the heap without causing GCs (if possible). |
| 10742 Counters::gc_last_resort_from_js.Increment(); | 10524 Counters::gc_last_resort_from_js.Increment(); |
| 10743 Heap::CollectAllGarbage(false); | 10525 Heap::CollectAllGarbage(false); |
| 10744 } | 10526 } |
| 10745 } | 10527 } |
| 10746 | 10528 |
| 10747 | 10529 |
| 10748 } } // namespace v8::internal | 10530 } } // namespace v8::internal |
| OLD | NEW |