Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(165)

Side by Side Diff: src/runtime.cc

Issue 3467010: Refactored string search code. (Closed)
Patch Set: Added to xcode project too. Created 10 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/SConscript ('k') | src/string-search.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2606 matching lines...) Expand 10 before | Expand all | Expand 10 after
2617 // Extract flattened substrings of cons strings before determining asciiness. 2617 // Extract flattened substrings of cons strings before determining asciiness.
2618 String* seq_sub = *sub; 2618 String* seq_sub = *sub;
2619 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); 2619 if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
2620 String* seq_pat = *pat; 2620 String* seq_pat = *pat;
2621 if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first(); 2621 if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first();
2622 2622
2623 // dispatch on type of strings 2623 // dispatch on type of strings
2624 if (seq_pat->IsAsciiRepresentation()) { 2624 if (seq_pat->IsAsciiRepresentation()) {
2625 Vector<const char> pat_vector = seq_pat->ToAsciiVector(); 2625 Vector<const char> pat_vector = seq_pat->ToAsciiVector();
2626 if (seq_sub->IsAsciiRepresentation()) { 2626 if (seq_sub->IsAsciiRepresentation()) {
2627 return StringSearch(seq_sub->ToAsciiVector(), pat_vector, start_index); 2627 return SearchString(seq_sub->ToAsciiVector(), pat_vector, start_index);
2628 } 2628 }
2629 return StringSearch(seq_sub->ToUC16Vector(), pat_vector, start_index); 2629 return SearchString(seq_sub->ToUC16Vector(), pat_vector, start_index);
2630 } 2630 }
2631 Vector<const uc16> pat_vector = seq_pat->ToUC16Vector(); 2631 Vector<const uc16> pat_vector = seq_pat->ToUC16Vector();
2632 if (seq_sub->IsAsciiRepresentation()) { 2632 if (seq_sub->IsAsciiRepresentation()) {
2633 return StringSearch(seq_sub->ToAsciiVector(), pat_vector, start_index); 2633 return SearchString(seq_sub->ToAsciiVector(), pat_vector, start_index);
2634 } 2634 }
2635 return StringSearch(seq_sub->ToUC16Vector(), pat_vector, start_index); 2635 return SearchString(seq_sub->ToUC16Vector(), pat_vector, start_index);
2636 } 2636 }
2637 2637
2638 2638
2639 static Object* Runtime_StringIndexOf(Arguments args) { 2639 static Object* Runtime_StringIndexOf(Arguments args) {
2640 HandleScope scope; // create a new handle scope 2640 HandleScope scope; // create a new handle scope
2641 ASSERT(args.length() == 3); 2641 ASSERT(args.length() == 3);
2642 2642
2643 CONVERT_ARG_CHECKED(String, sub, 0); 2643 CONVERT_ARG_CHECKED(String, sub, 0);
2644 CONVERT_ARG_CHECKED(String, pat, 1); 2644 CONVERT_ARG_CHECKED(String, pat, 1);
2645 2645
(...skipping 236 matching lines...) Expand 10 before | Expand all | Expand 10 after
2882 AssertNoAllocation no_gc; 2882 AssertNoAllocation no_gc;
2883 FixedArray* elements = FixedArray::cast(last_match_info->elements()); 2883 FixedArray* elements = FixedArray::cast(last_match_info->elements());
2884 RegExpImpl::SetLastCaptureCount(elements, 2); 2884 RegExpImpl::SetLastCaptureCount(elements, 2);
2885 RegExpImpl::SetLastInput(elements, *subject); 2885 RegExpImpl::SetLastInput(elements, *subject);
2886 RegExpImpl::SetLastSubject(elements, *subject); 2886 RegExpImpl::SetLastSubject(elements, *subject);
2887 RegExpImpl::SetCapture(elements, 0, match_start); 2887 RegExpImpl::SetCapture(elements, 0, match_start);
2888 RegExpImpl::SetCapture(elements, 1, match_end); 2888 RegExpImpl::SetCapture(elements, 1, match_end);
2889 } 2889 }
2890 2890
2891 2891
2892 template <typename schar, typename pchar> 2892 template <typename SubjectChar, typename PatternChar>
2893 static bool SearchStringMultiple(Vector<schar> subject, 2893 static bool SearchStringMultiple(Vector<const SubjectChar> subject,
2894 String* pattern, 2894 Vector<const PatternChar> pattern,
2895 Vector<pchar> pattern_string, 2895 String* pattern_string,
2896 FixedArrayBuilder* builder, 2896 FixedArrayBuilder* builder,
2897 int* match_pos) { 2897 int* match_pos) {
2898 int pos = *match_pos; 2898 int pos = *match_pos;
2899 int subject_length = subject.length(); 2899 int subject_length = subject.length();
2900 int pattern_length = pattern_string.length(); 2900 int pattern_length = pattern.length();
2901 int max_search_start = subject_length - pattern_length; 2901 int max_search_start = subject_length - pattern_length;
2902 bool is_ascii = (sizeof(schar) == 1); 2902 StringSearch<PatternChar, SubjectChar> search(pattern);
2903 StringSearchStrategy strategy = 2903 while (pos <= max_search_start) {
2904 InitializeStringSearch(pattern_string, is_ascii); 2904 if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
2905 switch (strategy) { 2905 *match_pos = pos;
2906 case SEARCH_FAIL: break; 2906 return false;
2907 case SEARCH_SHORT: 2907 }
2908 while (pos <= max_search_start) { 2908 // Position of end of previous match.
2909 if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) { 2909 int match_end = pos + pattern_length;
2910 *match_pos = pos; 2910 int new_pos = search.Search(subject, match_end);
2911 return false; 2911 if (new_pos >= 0) {
2912 } 2912 // A match.
2913 // Position of end of previous match. 2913 if (new_pos > match_end) {
2914 int match_end = pos + pattern_length; 2914 ReplacementStringBuilder::AddSubjectSlice(builder,
2915 int new_pos = SimpleIndexOf(subject, pattern_string, match_end); 2915 match_end,
2916 if (new_pos >= 0) { 2916 new_pos);
2917 // A match.
2918 if (new_pos > match_end) {
2919 ReplacementStringBuilder::AddSubjectSlice(builder,
2920 match_end,
2921 new_pos);
2922 }
2923 pos = new_pos;
2924 builder->Add(pattern);
2925 } else {
2926 break;
2927 }
2928 } 2917 }
2918 pos = new_pos;
2919 builder->Add(pattern_string);
2920 } else {
2929 break; 2921 break;
2930 case SEARCH_LONG: 2922 }
2931 while (pos <= max_search_start) {
2932 if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
2933 *match_pos = pos;
2934 return false;
2935 }
2936 int match_end = pos + pattern_length;
2937 int new_pos = ComplexIndexOf(subject, pattern_string, match_end);
2938 if (new_pos >= 0) {
2939 // A match has been found.
2940 if (new_pos > match_end) {
2941 ReplacementStringBuilder::AddSubjectSlice(builder,
2942 match_end,
2943 new_pos);
2944 }
2945 pos = new_pos;
2946 builder->Add(pattern);
2947 } else {
2948 break;
2949 }
2950 }
2951 break;
2952 } 2923 }
2924
2953 if (pos < max_search_start) { 2925 if (pos < max_search_start) {
2954 ReplacementStringBuilder::AddSubjectSlice(builder, 2926 ReplacementStringBuilder::AddSubjectSlice(builder,
2955 pos + pattern_length, 2927 pos + pattern_length,
2956 subject_length); 2928 subject_length);
2957 } 2929 }
2958 *match_pos = pos; 2930 *match_pos = pos;
2959 return true; 2931 return true;
2960 } 2932 }
2961 2933
2962 2934
2963 static bool SearchStringMultiple(Handle<String> subject, 2935 static bool SearchStringMultiple(Handle<String> subject,
2964 Handle<String> pattern, 2936 Handle<String> pattern,
2965 Handle<JSArray> last_match_info, 2937 Handle<JSArray> last_match_info,
2966 FixedArrayBuilder* builder) { 2938 FixedArrayBuilder* builder) {
2967 ASSERT(subject->IsFlat()); 2939 ASSERT(subject->IsFlat());
2968 ASSERT(pattern->IsFlat()); 2940 ASSERT(pattern->IsFlat());
2969 2941
2970 // Treating as if a previous match was before first character. 2942 // Treating as if a previous match was before first character.
2971 int match_pos = -pattern->length(); 2943 int match_pos = -pattern->length();
2972 2944
2973 for (;;) { // Break when search complete. 2945 for (;;) { // Break when search complete.
2974 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); 2946 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
2975 AssertNoAllocation no_gc; 2947 AssertNoAllocation no_gc;
2976 if (subject->IsAsciiRepresentation()) { 2948 if (subject->IsAsciiRepresentation()) {
2977 Vector<const char> subject_vector = subject->ToAsciiVector(); 2949 Vector<const char> subject_vector = subject->ToAsciiVector();
2978 if (pattern->IsAsciiRepresentation()) { 2950 if (pattern->IsAsciiRepresentation()) {
2979 if (SearchStringMultiple(subject_vector, 2951 if (SearchStringMultiple(subject_vector,
2952 pattern->ToAsciiVector(),
2980 *pattern, 2953 *pattern,
2981 pattern->ToAsciiVector(),
2982 builder, 2954 builder,
2983 &match_pos)) break; 2955 &match_pos)) break;
2984 } else { 2956 } else {
2985 if (SearchStringMultiple(subject_vector, 2957 if (SearchStringMultiple(subject_vector,
2958 pattern->ToUC16Vector(),
2986 *pattern, 2959 *pattern,
2987 pattern->ToUC16Vector(),
2988 builder, 2960 builder,
2989 &match_pos)) break; 2961 &match_pos)) break;
2990 } 2962 }
2991 } else { 2963 } else {
2992 Vector<const uc16> subject_vector = subject->ToUC16Vector(); 2964 Vector<const uc16> subject_vector = subject->ToUC16Vector();
2993 if (pattern->IsAsciiRepresentation()) { 2965 if (pattern->IsAsciiRepresentation()) {
2994 if (SearchStringMultiple(subject_vector, 2966 if (SearchStringMultiple(subject_vector,
2967 pattern->ToAsciiVector(),
2995 *pattern, 2968 *pattern,
2996 pattern->ToAsciiVector(),
2997 builder, 2969 builder,
2998 &match_pos)) break; 2970 &match_pos)) break;
2999 } else { 2971 } else {
3000 if (SearchStringMultiple(subject_vector, 2972 if (SearchStringMultiple(subject_vector,
2973 pattern->ToUC16Vector(),
3001 *pattern, 2974 *pattern,
3002 pattern->ToUC16Vector(),
3003 builder, 2975 builder,
3004 &match_pos)) break; 2976 &match_pos)) break;
3005 } 2977 }
3006 } 2978 }
3007 } 2979 }
3008 2980
3009 if (match_pos >= 0) { 2981 if (match_pos >= 0) {
3010 SetLastMatchInfoNoCaptures(subject, 2982 SetLastMatchInfoNoCaptures(subject,
3011 last_match_info, 2983 last_match_info,
3012 match_pos, 2984 match_pos,
(...skipping 1761 matching lines...) Expand 10 before | Expand all | Expand 10 after
4774 int right = length; 4746 int right = length;
4775 if (trimRight) { 4747 if (trimRight) {
4776 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) { 4748 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) {
4777 right--; 4749 right--;
4778 } 4750 }
4779 } 4751 }
4780 return s->SubString(left, right); 4752 return s->SubString(left, right);
4781 } 4753 }
4782 4754
4783 4755
4784 // Define storage for buffers declared in header file. 4756 template <typename SubjectChar, typename PatternChar>
4785 // TODO(lrn): Remove these when rewriting search code. 4757 void FindStringIndices(Vector<const SubjectChar> subject,
4786 int BMBuffers::bad_char_occurrence[kBMAlphabetSize]; 4758 Vector<const PatternChar> pattern,
4787 BMGoodSuffixBuffers BMBuffers::bmgs_buffers;
4788
4789
4790 template <typename schar, typename pchar>
4791 void FindStringIndices(Vector<const schar> subject,
4792 Vector<const pchar> pattern,
4793 ZoneList<int>* indices, 4759 ZoneList<int>* indices,
4794 unsigned int limit) { 4760 unsigned int limit) {
4795 ASSERT(limit > 0); 4761 ASSERT(limit > 0);
4796 // Collect indices of pattern in subject, and the end-of-string index. 4762 // Collect indices of pattern in subject, and the end-of-string index.
4797 // Stop after finding at most limit values. 4763 // Stop after finding at most limit values.
4798 StringSearchStrategy strategy = 4764 StringSearch<PatternChar, SubjectChar> search(pattern);
4799 InitializeStringSearch(pattern, sizeof(schar) == 1); 4765 int pattern_length = pattern.length();
4800 switch (strategy) { 4766 int index = 0;
4801 case SEARCH_FAIL: return; 4767 while (limit > 0) {
4802 case SEARCH_SHORT: { 4768 index = search.Search(subject, index);
4803 int pattern_length = pattern.length(); 4769 if (index < 0) return;
4804 int index = 0; 4770 indices->Add(index);
4805 while (limit > 0) { 4771 index += pattern_length;
4806 index = SimpleIndexOf(subject, pattern, index); 4772 limit--;
4807 if (index < 0) return;
4808 indices->Add(index);
4809 index += pattern_length;
4810 limit--;
4811 }
4812 return;
4813 }
4814 case SEARCH_LONG: {
4815 int pattern_length = pattern.length();
4816 int index = 0;
4817 while (limit > 0) {
4818 index = ComplexIndexOf(subject, pattern, index);
4819 if (index < 0) return;
4820 indices->Add(index);
4821 index += pattern_length;
4822 limit--;
4823 }
4824 return;
4825 }
4826 default:
4827 UNREACHABLE();
4828 return;
4829 } 4773 }
4830 } 4774 }
4831 4775
4832 4776
4833 static Object* Runtime_StringSplit(Arguments args) { 4777 static Object* Runtime_StringSplit(Arguments args) {
4834 ASSERT(args.length() == 3); 4778 ASSERT(args.length() == 3);
4835 HandleScope handle_scope; 4779 HandleScope handle_scope;
4836 CONVERT_ARG_CHECKED(String, subject, 0); 4780 CONVERT_ARG_CHECKED(String, subject, 0);
4837 CONVERT_ARG_CHECKED(String, pattern, 1); 4781 CONVERT_ARG_CHECKED(String, pattern, 1);
4838 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); 4782 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
(...skipping 5374 matching lines...) Expand 10 before | Expand all | Expand 10 after
10213 } else { 10157 } else {
10214 // Handle last resort GC and make sure to allow future allocations 10158 // Handle last resort GC and make sure to allow future allocations
10215 // to grow the heap without causing GCs (if possible). 10159 // to grow the heap without causing GCs (if possible).
10216 Counters::gc_last_resort_from_js.Increment(); 10160 Counters::gc_last_resort_from_js.Increment();
10217 Heap::CollectAllGarbage(false); 10161 Heap::CollectAllGarbage(false);
10218 } 10162 }
10219 } 10163 }
10220 10164
10221 10165
10222 } } // namespace v8::internal 10166 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/SConscript ('k') | src/string-search.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698