Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(572)

Unified Diff: src/runtime.cc

Issue 6269: KMP string search using direct memory access and templates for type specialization. (Closed)
Patch Set: Created 12 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/objects-inl.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/runtime.cc
diff --git a/src/runtime.cc b/src/runtime.cc
index 25ac6f37cb0e8296ec08c7260d8343b941fcd762..1744bedf77bc15065dc7084e8a46a6b0a4c663ce 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -42,6 +42,7 @@
#include "runtime.h"
#include "scopeinfo.h"
#include "v8threads.h"
+#include "smart-pointer.h"
namespace v8 { namespace internal {
@@ -925,98 +926,217 @@ static Object* Runtime_CharFromCode(Arguments args) {
}
-static inline void ComputeKMPNextTable(String* pattern, int next_table[]) {
- int i = 0;
- int j = -1;
- next_table[0] = -1;
+static Vector<const char> ToAsciiVector(String *string) {
+ ASSERT(string->IsAscii());
+ ASSERT(string->IsFlat());
- Access<StringInputBuffer> buffer(&string_input_buffer);
- buffer->Reset(pattern);
- int length = pattern->length();
- uint16_t p = buffer->GetNext();
- while (i < length - 1) {
- while (j > -1 && p != pattern->Get(j)) {
- j = next_table[j];
+ int offset = 0;
+ int length = string->length();
+ for(;;) {
Christian Plesner Hansen 2008/10/06 12:47:19 Flat strings are relatively simple in structure so
Lasse Reichstein 2008/10/07 09:16:28 Done.
+ switch(string->representation_tag()) {
+ case kSeqStringTag: {
+ AsciiString* seq = AsciiString::cast(string);
+ char* start = reinterpret_cast<char*>(seq->GetCharsAddress());
+ return Vector<const char>(start + offset, length);
}
- i++;
- j++;
- p = buffer->GetNext();
- if (p == pattern->Get(j)) {
- next_table[i] = next_table[j];
- } else {
- next_table[i] = j;
+ case kExternalStringTag: {
+ ExternalAsciiString* ext = ExternalAsciiString::cast(string);
+ const char* start = ext->resource()->data();
+ return Vector<const char>(start + offset, length);
+ }
+ case kConsStringTag: {
+ ConsString* cons = ConsString::cast(string);
+ ASSERT(String::cast(cons->second())->length() == 0);
+ string = String::cast(cons->first());
+ continue;
+ }
+ case kSlicedStringTag: {
+ SlicedString* sliced = SlicedString::cast(string);
+ offset += sliced->start();
+ string = String::cast(sliced->buffer());
+ continue;
+ }
+ default:
+ UNREACHABLE();
}
}
}
-int Runtime::StringMatchKmp(String* sub, String* pat, int start_index) {
- sub->TryFlatten();
- pat->TryFlatten();
+static Vector<const uc16> ToUC16Vector(String *string) {
+ ASSERT(string->IsTwoByteString());
+ ASSERT(string->IsFlat());
- int subject_length = sub->length();
- int pattern_length = pat->length();
+ int offset = 0;
+ int length = string->length();
+ for(;;) {
+ switch(string->representation_tag()) {
+ case kSeqStringTag: {
+ TwoByteString* seq = TwoByteString::cast(string);
+ uc16* start = reinterpret_cast<uc16*>(seq->GetCharsAddress());
+ return Vector<const uc16>(start + offset, length);
+ }
+ case kExternalStringTag: {
+ ExternalTwoByteString* ext = ExternalTwoByteString::cast(string);
+ const uc16* start =
+ reinterpret_cast<const uc16*>(ext->resource()->data());
+ return Vector<const uc16>(start + offset, length);
+ }
+ case kConsStringTag: {
+ ConsString* cons = ConsString::cast(string);
+ ASSERT(String::cast(cons->second())->length() == 0);
+ string = String::cast(cons->first());
+ continue;
+ }
+ case kSlicedStringTag: {
+ SlicedString* sliced = SlicedString::cast(string);
+ offset += sliced->start();
+ string = String::cast(sliced->buffer());
+ continue;
+ }
+ default:
+ UNREACHABLE();
+ }
+ }
+}
- if (start_index > subject_length) return -1;
- if (pattern_length == 0) return start_index;
- // Searching for one specific character is common. For one
- // character patterns the KMP algorithm is guaranteed to slow down
- // the search, so we just run through the subject string.
- if (pattern_length == 1) {
- uint16_t pattern_char = pat->Get(0);
- for (int i = start_index; i < subject_length; i++) {
- if (sub->Get(i) == pattern_char) {
- return i;
- }
+template <typename schar, typename pchar>
+static int SingleCharIndexOf(
+ Vector<const schar> string,
Christian Plesner Hansen 2008/10/06 12:47:19 This argument should be on the same line as the fu
Lasse Reichstein 2008/10/07 09:16:28 Done.
+ pchar pattern_char,
+ int start_index) {
+ for (int i = start_index, n = string.length(); i < n; i++) {
Christian Plesner Hansen 2008/10/06 12:47:19 I would move the declaration of n out before the l
Lasse Reichstein 2008/10/07 09:16:28 I consider for (int i = ..., n = ...; i<n; i++) a
+ if (pattern_char == string[i]) {
+ return i;
}
- return -1;
}
+ return -1;
+}
- // For small searches, KMP is not worth the setup overhead.
- if (subject_length < 100) {
- // We know our pattern is at least 2 characters, we cache the first so
- // the common case of the first character not matching is faster.
- uint16_t pattern_first_char = pat->Get(0);
- for (int i = start_index; i + pattern_length <= subject_length; i++) {
- if (sub->Get(i) != pattern_first_char) continue;
-
- for (int j = 1; j < pattern_length; j++) {
- if (pat->Get(j) != sub->Get(j + i)) break;
- if (j == pattern_length - 1) return i;
+// Trivial string search for shorter strings.
+template <typename pchar, typename schar>
+static int SimpleIndexOf(
+ Vector<const schar> subject,
+ Vector<const pchar> pattern,
+ int start_index) {
+ int pattern_length = pattern.length();
+ int subject_length = subject.length();
+ // We know our pattern is at least 2 characters, we cache the first so
+ // the common case of the first character not matching is faster.
+ pchar pattern_first_char = pattern[0];
+ for (int i = start_index, n = subject_length - pattern_length; i <= n; i++) {
Christian Plesner Hansen 2008/10/06 12:47:19 I would move the declaration of n out just before
+ if (subject[i] != pattern_first_char) continue;
+
+ bool failure = false;
+ for (int j = 1; j < pattern_length; j++) {
+ if (pattern[j] != subject[j+i]) {
+ failure = true;
+ break;
}
}
- return -1;
+ if (!failure) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+// Full KMP pattern match.
+template <typename schar, typename pchar> // Pattern & subject char types
+static int KMPIndexOf(
+ Vector<const schar> subject,
+ Vector<const pchar> pattern,
+ int start_index) {
+ int subject_length = subject.length();
+ int pattern_length = pattern.length();
+ SmartPointer<int> next_table(NewArray<int>(pattern_length));
+
+ // Compute KMP "next" table
+ int i = 0;
+ int j = -1;
+ next_table[0] = -1;
+
+ int length = pattern.length();
Christian Plesner Hansen 2008/10/06 12:47:19 Isn't this identical to pattern_length?
Lasse Reichstein 2008/10/07 09:16:28 Indeed, it should be removed. Done.
+ pchar p = pattern[0];
+ while (i < length - 1) {
+ while (j > -1 && p != pattern[j]) {
+ j = next_table[j];
+ }
+ i++;
+ j++;
+ p = pattern[i];
+ if (p == pattern[j]) {
+ next_table[i] = next_table[j];
+ } else {
+ next_table[i] = j;
+ }
}
- // For patterns with a larger length we use the KMP algorithm.
- //
- // Compute the 'next' table.
- int* next_table = NewArray<int>(pattern_length);
- ComputeKMPNextTable(pat, next_table);
// Search using the 'next' table.
int pattern_index = 0;
- // We would like to use StringInputBuffer here, but it does not have
- // the ability to start anywhere but the first character of a
- // string. It would be nice to have efficient forward-seeking
- // support on StringInputBuffers.
int subject_index = start_index;
while (subject_index < subject_length) {
- uint16_t subject_char = sub->Get(subject_index);
- while (pattern_index > -1 && pat->Get(pattern_index) != subject_char) {
+ schar subject_char = subject[subject_index];
+ while (pattern_index > -1 && pattern[pattern_index] != subject_char) {
pattern_index = next_table[pattern_index];
}
pattern_index++;
subject_index++;
if (pattern_index >= pattern_length) {
- DeleteArray(next_table);
return subject_index - pattern_index;
}
}
- DeleteArray(next_table);
return -1;
}
+// Dispatch to different algorithms for different length of pattern/subject
+template <typename schar, typename pchar>
+static int StringMatchKMP(Vector<const schar> sub, Vector<const pchar> pat, int start_index) {
+
+ // Searching for one specific character is common. For one
+ // character patterns the KMP algorithm is guaranteed to slow down
+ // the search, so we just run through the subject string.
+ if (pat.length() == 1) {
+ return SingleCharIndexOf(sub, pat[0], start_index);
+ }
+
+ // For small searches, KMP is not worth the setup overhead.
+ if (sub.length() - start_index < 100) {
+ return SimpleIndexOf(sub, pat, start_index);
+ }
+
+ // For patterns with a larger length we use the KMP algorithm.
+ return KMPIndexOf(sub, pat, start_index);
+}
+
+// Perform string match of pattern on subject, starting at start index.
+// Caller must ensure that 0 <= start_index <= sub->length(),
+// and should check that pat->length() + start_index <= sub->length()
+int Runtime::StringMatchKmp(String* sub, String* pat, int start_index) {
+ ASSERT(0 <= start_index);
+ ASSERT(start_index <= sub->length());
+
+ if (pat->length() == 0) return start_index;
+
+ sub->TryFlatten();
Lasse Reichstein 2008/10/07 09:16:28 Doesn't handle failure to flatten. Changed to the
+ pat->TryFlatten();
+ AssertNoAllocation heap_allocation_block; // ensure vectors stay valid
+ // dispatch on type of strings
+ if (pat->is_ascii()) {
+ Vector<const char> pat_vector = ToAsciiVector(pat);
+ if (sub->is_ascii()) {
+ return StringMatchKMP(ToAsciiVector(sub), pat_vector, start_index);
+ }
+ return StringMatchKMP(ToUC16Vector(sub), pat_vector, start_index);
+ }
+ Vector<const uc16> pat_vector = ToUC16Vector(pat);
+ if (sub->is_ascii()) {
+ return StringMatchKMP(ToAsciiVector(sub), pat_vector, start_index);
+ }
+ return StringMatchKMP(ToUC16Vector(sub), pat_vector, start_index);
+}
+
static Object* Runtime_StringIndexOf(Arguments args) {
NoHandleAllocation ha;
« no previous file with comments | « src/objects-inl.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698