Chromium Code Reviews| Index: src/runtime.cc |
| diff --git a/src/runtime.cc b/src/runtime.cc |
| index 25ac6f37cb0e8296ec08c7260d8343b941fcd762..1744bedf77bc15065dc7084e8a46a6b0a4c663ce 100644 |
| --- a/src/runtime.cc |
| +++ b/src/runtime.cc |
| @@ -42,6 +42,7 @@ |
| #include "runtime.h" |
| #include "scopeinfo.h" |
| #include "v8threads.h" |
| +#include "smart-pointer.h" |
| namespace v8 { namespace internal { |
| @@ -925,98 +926,217 @@ static Object* Runtime_CharFromCode(Arguments args) { |
| } |
| -static inline void ComputeKMPNextTable(String* pattern, int next_table[]) { |
| - int i = 0; |
| - int j = -1; |
| - next_table[0] = -1; |
| +static Vector<const char> ToAsciiVector(String *string) { |
| + ASSERT(string->IsAscii()); |
| + ASSERT(string->IsFlat()); |
| - Access<StringInputBuffer> buffer(&string_input_buffer); |
| - buffer->Reset(pattern); |
| - int length = pattern->length(); |
| - uint16_t p = buffer->GetNext(); |
| - while (i < length - 1) { |
| - while (j > -1 && p != pattern->Get(j)) { |
| - j = next_table[j]; |
| + int offset = 0; |
| + int length = string->length(); |
| + for(;;) { |
|
Christian Plesner Hansen
2008/10/06 12:47:19
Flat strings are relatively simple in structure so
Lasse Reichstein
2008/10/07 09:16:28
Done.
|
| + switch(string->representation_tag()) { |
| + case kSeqStringTag: { |
| + AsciiString* seq = AsciiString::cast(string); |
| + char* start = reinterpret_cast<char*>(seq->GetCharsAddress()); |
| + return Vector<const char>(start + offset, length); |
| } |
| - i++; |
| - j++; |
| - p = buffer->GetNext(); |
| - if (p == pattern->Get(j)) { |
| - next_table[i] = next_table[j]; |
| - } else { |
| - next_table[i] = j; |
| + case kExternalStringTag: { |
| + ExternalAsciiString* ext = ExternalAsciiString::cast(string); |
| + const char* start = ext->resource()->data(); |
| + return Vector<const char>(start + offset, length); |
| + } |
| + case kConsStringTag: { |
| + ConsString* cons = ConsString::cast(string); |
| + ASSERT(String::cast(cons->second())->length() == 0); |
| + string = String::cast(cons->first()); |
| + continue; |
| + } |
| + case kSlicedStringTag: { |
| + SlicedString* sliced = SlicedString::cast(string); |
| + offset += sliced->start(); |
| + string = String::cast(sliced->buffer()); |
| + continue; |
| + } |
| + default: |
| + UNREACHABLE(); |
| } |
| } |
| } |
| -int Runtime::StringMatchKmp(String* sub, String* pat, int start_index) { |
| - sub->TryFlatten(); |
| - pat->TryFlatten(); |
| +static Vector<const uc16> ToUC16Vector(String *string) { |
| + ASSERT(string->IsTwoByteString()); |
| + ASSERT(string->IsFlat()); |
| - int subject_length = sub->length(); |
| - int pattern_length = pat->length(); |
| + int offset = 0; |
| + int length = string->length(); |
| + for(;;) { |
| + switch(string->representation_tag()) { |
| + case kSeqStringTag: { |
| + TwoByteString* seq = TwoByteString::cast(string); |
| + uc16* start = reinterpret_cast<uc16*>(seq->GetCharsAddress()); |
| + return Vector<const uc16>(start + offset, length); |
| + } |
| + case kExternalStringTag: { |
| + ExternalTwoByteString* ext = ExternalTwoByteString::cast(string); |
| + const uc16* start = |
| + reinterpret_cast<const uc16*>(ext->resource()->data()); |
| + return Vector<const uc16>(start + offset, length); |
| + } |
| + case kConsStringTag: { |
| + ConsString* cons = ConsString::cast(string); |
| + ASSERT(String::cast(cons->second())->length() == 0); |
| + string = String::cast(cons->first()); |
| + continue; |
| + } |
| + case kSlicedStringTag: { |
| + SlicedString* sliced = SlicedString::cast(string); |
| + offset += sliced->start(); |
| + string = String::cast(sliced->buffer()); |
| + continue; |
| + } |
| + default: |
| + UNREACHABLE(); |
| + } |
| + } |
| +} |
| - if (start_index > subject_length) return -1; |
| - if (pattern_length == 0) return start_index; |
| - // Searching for one specific character is common. For one |
| - // character patterns the KMP algorithm is guaranteed to slow down |
| - // the search, so we just run through the subject string. |
| - if (pattern_length == 1) { |
| - uint16_t pattern_char = pat->Get(0); |
| - for (int i = start_index; i < subject_length; i++) { |
| - if (sub->Get(i) == pattern_char) { |
| - return i; |
| - } |
| +template <typename schar, typename pchar> |
| +static int SingleCharIndexOf( |
| + Vector<const schar> string, |
|
Christian Plesner Hansen
2008/10/06 12:47:19
This argument should be on the same line as the fu
Lasse Reichstein
2008/10/07 09:16:28
Done.
|
| + pchar pattern_char, |
| + int start_index) { |
| + for (int i = start_index, n = string.length(); i < n; i++) { |
|
Christian Plesner Hansen
2008/10/06 12:47:19
I would move the declaration of n out before the l
Lasse Reichstein
2008/10/07 09:16:28
I consider
for (int i = ..., n = ...; i<n; i++)
a
|
| + if (pattern_char == string[i]) { |
| + return i; |
| } |
| - return -1; |
| } |
| + return -1; |
| +} |
| - // For small searches, KMP is not worth the setup overhead. |
| - if (subject_length < 100) { |
| - // We know our pattern is at least 2 characters, we cache the first so |
| - // the common case of the first character not matching is faster. |
| - uint16_t pattern_first_char = pat->Get(0); |
| - for (int i = start_index; i + pattern_length <= subject_length; i++) { |
| - if (sub->Get(i) != pattern_first_char) continue; |
| - |
| - for (int j = 1; j < pattern_length; j++) { |
| - if (pat->Get(j) != sub->Get(j + i)) break; |
| - if (j == pattern_length - 1) return i; |
| +// Trivial string search for shorter strings. |
| +template <typename pchar, typename schar> |
| +static int SimpleIndexOf( |
| + Vector<const schar> subject, |
| + Vector<const pchar> pattern, |
| + int start_index) { |
| + int pattern_length = pattern.length(); |
| + int subject_length = subject.length(); |
| + // We know our pattern is at least 2 characters, we cache the first so |
| + // the common case of the first character not matching is faster. |
| + pchar pattern_first_char = pattern[0]; |
| + for (int i = start_index, n = subject_length - pattern_length; i <= n; i++) { |
|
Christian Plesner Hansen
2008/10/06 12:47:19
I would move the declaration of n out just before
|
| + if (subject[i] != pattern_first_char) continue; |
| + |
| + bool failure = false; |
| + for (int j = 1; j < pattern_length; j++) { |
| + if (pattern[j] != subject[j+i]) { |
| + failure = true; |
| + break; |
| } |
| } |
| - return -1; |
| + if (!failure) { |
| + return i; |
| + } |
| + } |
| + return -1; |
| +} |
| + |
| +// Full KMP pattern match. |
| +template <typename schar, typename pchar> // Pattern & subject char types |
| +static int KMPIndexOf( |
| + Vector<const schar> subject, |
| + Vector<const pchar> pattern, |
| + int start_index) { |
| + int subject_length = subject.length(); |
| + int pattern_length = pattern.length(); |
| + SmartPointer<int> next_table(NewArray<int>(pattern_length)); |
| + |
| + // Compute KMP "next" table |
| + int i = 0; |
| + int j = -1; |
| + next_table[0] = -1; |
| + |
| + int length = pattern.length(); |
|
Christian Plesner Hansen
2008/10/06 12:47:19
Isn't this identical to pattern_length?
Lasse Reichstein
2008/10/07 09:16:28
Indeed, it should be removed. Done.
|
| + pchar p = pattern[0]; |
| + while (i < length - 1) { |
| + while (j > -1 && p != pattern[j]) { |
| + j = next_table[j]; |
| + } |
| + i++; |
| + j++; |
| + p = pattern[i]; |
| + if (p == pattern[j]) { |
| + next_table[i] = next_table[j]; |
| + } else { |
| + next_table[i] = j; |
| + } |
| } |
| - // For patterns with a larger length we use the KMP algorithm. |
| - // |
| - // Compute the 'next' table. |
| - int* next_table = NewArray<int>(pattern_length); |
| - ComputeKMPNextTable(pat, next_table); |
| // Search using the 'next' table. |
| int pattern_index = 0; |
| - // We would like to use StringInputBuffer here, but it does not have |
| - // the ability to start anywhere but the first character of a |
| - // string. It would be nice to have efficient forward-seeking |
| - // support on StringInputBuffers. |
| int subject_index = start_index; |
| while (subject_index < subject_length) { |
| - uint16_t subject_char = sub->Get(subject_index); |
| - while (pattern_index > -1 && pat->Get(pattern_index) != subject_char) { |
| + schar subject_char = subject[subject_index]; |
| + while (pattern_index > -1 && pattern[pattern_index] != subject_char) { |
| pattern_index = next_table[pattern_index]; |
| } |
| pattern_index++; |
| subject_index++; |
| if (pattern_index >= pattern_length) { |
| - DeleteArray(next_table); |
| return subject_index - pattern_index; |
| } |
| } |
| - DeleteArray(next_table); |
| return -1; |
| } |
| +// Dispatch to different algorithms for different length of pattern/subject |
| +template <typename schar, typename pchar> |
| +static int StringMatchKMP(Vector<const schar> sub, Vector<const pchar> pat, int start_index) { |
| + |
| + // Searching for one specific character is common. For one |
| + // character patterns the KMP algorithm is guaranteed to slow down |
| + // the search, so we just run through the subject string. |
| + if (pat.length() == 1) { |
| + return SingleCharIndexOf(sub, pat[0], start_index); |
| + } |
| + |
| + // For small searches, KMP is not worth the setup overhead. |
| + if (sub.length() - start_index < 100) { |
| + return SimpleIndexOf(sub, pat, start_index); |
| + } |
| + |
| + // For patterns with a larger length we use the KMP algorithm. |
| + return KMPIndexOf(sub, pat, start_index); |
| +} |
| + |
| +// Perform string match of pattern on subject, starting at start index. |
| +// Caller must ensure that 0 <= start_index <= sub->length(), |
| +// and should check that pat->length() + start_index <= sub->length() |
| +int Runtime::StringMatchKmp(String* sub, String* pat, int start_index) { |
| + ASSERT(0 <= start_index); |
| + ASSERT(start_index <= sub->length()); |
| + |
| + if (pat->length() == 0) return start_index; |
| + |
| + sub->TryFlatten(); |
|
Lasse Reichstein
2008/10/07 09:16:28
Doesn't handle failure to flatten.
Changed to the
|
| + pat->TryFlatten(); |
| + AssertNoAllocation heap_allocation_block; // ensure vectors stay valid |
| + // dispatch on type of strings |
| + if (pat->is_ascii()) { |
| + Vector<const char> pat_vector = ToAsciiVector(pat); |
| + if (sub->is_ascii()) { |
| + return StringMatchKMP(ToAsciiVector(sub), pat_vector, start_index); |
| + } |
| + return StringMatchKMP(ToUC16Vector(sub), pat_vector, start_index); |
| + } |
| + Vector<const uc16> pat_vector = ToUC16Vector(pat); |
| + if (sub->is_ascii()) { |
| + return StringMatchKMP(ToAsciiVector(sub), pat_vector, start_index); |
| + } |
| + return StringMatchKMP(ToUC16Vector(sub), pat_vector, start_index); |
| +} |
| + |
| static Object* Runtime_StringIndexOf(Arguments args) { |
| NoHandleAllocation ha; |