Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(178)

Side by Side Diff: src/runtime.cc

Issue 6269: KMP string search using direct memory access and templates for type specialization. (Closed)
Patch Set: Created 12 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/objects-inl.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 24 matching lines...) Expand all
35 #include "compiler.h" 35 #include "compiler.h"
36 #include "cpu.h" 36 #include "cpu.h"
37 #include "dateparser.h" 37 #include "dateparser.h"
38 #include "debug.h" 38 #include "debug.h"
39 #include "execution.h" 39 #include "execution.h"
40 #include "jsregexp.h" 40 #include "jsregexp.h"
41 #include "platform.h" 41 #include "platform.h"
42 #include "runtime.h" 42 #include "runtime.h"
43 #include "scopeinfo.h" 43 #include "scopeinfo.h"
44 #include "v8threads.h" 44 #include "v8threads.h"
45 #include "smart-pointer.h"
45 46
46 namespace v8 { namespace internal { 47 namespace v8 { namespace internal {
47 48
48 49
49 #define RUNTIME_ASSERT(value) do { \ 50 #define RUNTIME_ASSERT(value) do { \
50 if (!(value)) return IllegalOperation(); \ 51 if (!(value)) return IllegalOperation(); \
51 } while (false) 52 } while (false)
52 53
53 // Cast the given object to a value of the specified type and store 54 // Cast the given object to a value of the specified type and store
54 // it in a variable with the given name. If the object is not of the 55 // it in a variable with the given name. If the object is not of the
(...skipping 863 matching lines...) Expand 10 before | Expand all | Expand 10 after
918 uint32_t code; 919 uint32_t code;
919 if (Array::IndexFromObject(args[0], &code)) { 920 if (Array::IndexFromObject(args[0], &code)) {
920 if (code <= 0xffff) { 921 if (code <= 0xffff) {
921 return Heap::LookupSingleCharacterStringFromCode(code); 922 return Heap::LookupSingleCharacterStringFromCode(code);
922 } 923 }
923 } 924 }
924 return Heap::empty_string(); 925 return Heap::empty_string();
925 } 926 }
926 927
927 928
928 static inline void ComputeKMPNextTable(String* pattern, int next_table[]) { 929 static Vector<const char> ToAsciiVector(String *string) {
930 ASSERT(string->IsAscii());
931 ASSERT(string->IsFlat());
932
933 int offset = 0;
934 int length = string->length();
935 for(;;) {
Christian Plesner Hansen 2008/10/06 12:47:19 Flat strings are relatively simple in structure so
Lasse Reichstein 2008/10/07 09:16:28 Done.
936 switch(string->representation_tag()) {
937 case kSeqStringTag: {
938 AsciiString* seq = AsciiString::cast(string);
939 char* start = reinterpret_cast<char*>(seq->GetCharsAddress());
940 return Vector<const char>(start + offset, length);
941 }
942 case kExternalStringTag: {
943 ExternalAsciiString* ext = ExternalAsciiString::cast(string);
944 const char* start = ext->resource()->data();
945 return Vector<const char>(start + offset, length);
946 }
947 case kConsStringTag: {
948 ConsString* cons = ConsString::cast(string);
949 ASSERT(String::cast(cons->second())->length() == 0);
950 string = String::cast(cons->first());
951 continue;
952 }
953 case kSlicedStringTag: {
954 SlicedString* sliced = SlicedString::cast(string);
955 offset += sliced->start();
956 string = String::cast(sliced->buffer());
957 continue;
958 }
959 default:
960 UNREACHABLE();
961 }
962 }
963 }
964
965
966 static Vector<const uc16> ToUC16Vector(String *string) {
967 ASSERT(string->IsTwoByteString());
968 ASSERT(string->IsFlat());
969
970 int offset = 0;
971 int length = string->length();
972 for(;;) {
973 switch(string->representation_tag()) {
974 case kSeqStringTag: {
975 TwoByteString* seq = TwoByteString::cast(string);
976 uc16* start = reinterpret_cast<uc16*>(seq->GetCharsAddress());
977 return Vector<const uc16>(start + offset, length);
978 }
979 case kExternalStringTag: {
980 ExternalTwoByteString* ext = ExternalTwoByteString::cast(string);
981 const uc16* start =
982 reinterpret_cast<const uc16*>(ext->resource()->data());
983 return Vector<const uc16>(start + offset, length);
984 }
985 case kConsStringTag: {
986 ConsString* cons = ConsString::cast(string);
987 ASSERT(String::cast(cons->second())->length() == 0);
988 string = String::cast(cons->first());
989 continue;
990 }
991 case kSlicedStringTag: {
992 SlicedString* sliced = SlicedString::cast(string);
993 offset += sliced->start();
994 string = String::cast(sliced->buffer());
995 continue;
996 }
997 default:
998 UNREACHABLE();
999 }
1000 }
1001 }
1002
1003
1004 template <typename schar, typename pchar>
1005 static int SingleCharIndexOf(
1006 Vector<const schar> string,
Christian Plesner Hansen 2008/10/06 12:47:19 This argument should be on the same line as the fu
Lasse Reichstein 2008/10/07 09:16:28 Done.
1007 pchar pattern_char,
1008 int start_index) {
1009 for (int i = start_index, n = string.length(); i < n; i++) {
Christian Plesner Hansen 2008/10/06 12:47:19 I would move the declaration of n out before the l
Lasse Reichstein 2008/10/07 09:16:28 I consider for (int i = ..., n = ...; i<n; i++) a
1010 if (pattern_char == string[i]) {
1011 return i;
1012 }
1013 }
1014 return -1;
1015 }
1016
1017 // Trivial string search for shorter strings.
1018 template <typename pchar, typename schar>
1019 static int SimpleIndexOf(
1020 Vector<const schar> subject,
1021 Vector<const pchar> pattern,
1022 int start_index) {
1023 int pattern_length = pattern.length();
1024 int subject_length = subject.length();
1025 // We know our pattern is at least 2 characters, we cache the first so
1026 // the common case of the first character not matching is faster.
1027 pchar pattern_first_char = pattern[0];
1028 for (int i = start_index, n = subject_length - pattern_length; i <= n; i++) {
Christian Plesner Hansen 2008/10/06 12:47:19 I would move the declaration of n out just before
1029 if (subject[i] != pattern_first_char) continue;
1030
1031 bool failure = false;
1032 for (int j = 1; j < pattern_length; j++) {
1033 if (pattern[j] != subject[j+i]) {
1034 failure = true;
1035 break;
1036 }
1037 }
1038 if (!failure) {
1039 return i;
1040 }
1041 }
1042 return -1;
1043 }
1044
1045 // Full KMP pattern match.
1046 template <typename schar, typename pchar> // Pattern & subject char types
1047 static int KMPIndexOf(
1048 Vector<const schar> subject,
1049 Vector<const pchar> pattern,
1050 int start_index) {
1051 int subject_length = subject.length();
1052 int pattern_length = pattern.length();
1053 SmartPointer<int> next_table(NewArray<int>(pattern_length));
1054
1055 // Compute KMP "next" table
929 int i = 0; 1056 int i = 0;
930 int j = -1; 1057 int j = -1;
931 next_table[0] = -1; 1058 next_table[0] = -1;
932 1059
933 Access<StringInputBuffer> buffer(&string_input_buffer); 1060 int length = pattern.length();
Christian Plesner Hansen 2008/10/06 12:47:19 Isn't this identical to pattern_length?
Lasse Reichstein 2008/10/07 09:16:28 Indeed, it should be removed. Done.
934 buffer->Reset(pattern); 1061 pchar p = pattern[0];
935 int length = pattern->length();
936 uint16_t p = buffer->GetNext();
937 while (i < length - 1) { 1062 while (i < length - 1) {
938 while (j > -1 && p != pattern->Get(j)) { 1063 while (j > -1 && p != pattern[j]) {
939 j = next_table[j]; 1064 j = next_table[j];
940 } 1065 }
941 i++; 1066 i++;
942 j++; 1067 j++;
943 p = buffer->GetNext(); 1068 p = pattern[i];
944 if (p == pattern->Get(j)) { 1069 if (p == pattern[j]) {
945 next_table[i] = next_table[j]; 1070 next_table[i] = next_table[j];
946 } else { 1071 } else {
947 next_table[i] = j; 1072 next_table[i] = j;
948 } 1073 }
949 } 1074 }
950 } 1075
951 1076 // Search using the 'next' table.
952 1077 int pattern_index = 0;
953 int Runtime::StringMatchKmp(String* sub, String* pat, int start_index) { 1078 int subject_index = start_index;
954 sub->TryFlatten(); 1079 while (subject_index < subject_length) {
955 pat->TryFlatten(); 1080 schar subject_char = subject[subject_index];
956 1081 while (pattern_index > -1 && pattern[pattern_index] != subject_char) {
957 int subject_length = sub->length(); 1082 pattern_index = next_table[pattern_index];
958 int pattern_length = pat->length(); 1083 }
959 1084 pattern_index++;
960 if (start_index > subject_length) return -1; 1085 subject_index++;
961 if (pattern_length == 0) return start_index; 1086 if (pattern_index >= pattern_length) {
1087 return subject_index - pattern_index;
1088 }
1089 }
1090 return -1;
1091 }
1092
1093 // Dispatch to different algorithms for different length of pattern/subject
1094 template <typename schar, typename pchar>
1095 static int StringMatchKMP(Vector<const schar> sub, Vector<const pchar> pat, int start_index) {
962 1096
963 // Searching for one specific character is common. For one 1097 // Searching for one specific character is common. For one
964 // character patterns the KMP algorithm is guaranteed to slow down 1098 // character patterns the KMP algorithm is guaranteed to slow down
965 // the search, so we just run through the subject string. 1099 // the search, so we just run through the subject string.
966 if (pattern_length == 1) { 1100 if (pat.length() == 1) {
967 uint16_t pattern_char = pat->Get(0); 1101 return SingleCharIndexOf(sub, pat[0], start_index);
968 for (int i = start_index; i < subject_length; i++) {
969 if (sub->Get(i) == pattern_char) {
970 return i;
971 }
972 }
973 return -1;
974 } 1102 }
975 1103
976 // For small searches, KMP is not worth the setup overhead. 1104 // For small searches, KMP is not worth the setup overhead.
977 if (subject_length < 100) { 1105 if (sub.length() - start_index < 100) {
978 // We know our pattern is at least 2 characters, we cache the first so 1106 return SimpleIndexOf(sub, pat, start_index);
979 // the common case of the first character not matching is faster.
980 uint16_t pattern_first_char = pat->Get(0);
981 for (int i = start_index; i + pattern_length <= subject_length; i++) {
982 if (sub->Get(i) != pattern_first_char) continue;
983
984 for (int j = 1; j < pattern_length; j++) {
985 if (pat->Get(j) != sub->Get(j + i)) break;
986 if (j == pattern_length - 1) return i;
987 }
988 }
989 return -1;
990 } 1107 }
991 1108
992 // For patterns with a larger length we use the KMP algorithm. 1109 // For patterns with a larger length we use the KMP algorithm.
993 // 1110 return KMPIndexOf(sub, pat, start_index);
994 // Compute the 'next' table. 1111 }
995 int* next_table = NewArray<int>(pattern_length); 1112
996 ComputeKMPNextTable(pat, next_table); 1113 // Perform string match of pattern on subject, starting at start index.
997 // Search using the 'next' table. 1114 // Caller must ensure that 0 <= start_index <= sub->length(),
998 int pattern_index = 0; 1115 // and should check that pat->length() + start_index <= sub->length()
999 // We would like to use StringInputBuffer here, but it does not have 1116 int Runtime::StringMatchKmp(String* sub, String* pat, int start_index) {
1000 // the ability to start anywhere but the first character of a 1117 ASSERT(0 <= start_index);
1001 // string. It would be nice to have efficient forward-seeking 1118 ASSERT(start_index <= sub->length());
1002 // support on StringInputBuffers. 1119
1003 int subject_index = start_index; 1120 if (pat->length() == 0) return start_index;
1004 while (subject_index < subject_length) { 1121
1005 uint16_t subject_char = sub->Get(subject_index); 1122 sub->TryFlatten();
Lasse Reichstein 2008/10/07 09:16:28 Doesn't handle failure to flatten. Changed to the
1006 while (pattern_index > -1 && pat->Get(pattern_index) != subject_char) { 1123 pat->TryFlatten();
1007 pattern_index = next_table[pattern_index]; 1124 AssertNoAllocation heap_allocation_block; // ensure vectors stay valid
1008 } 1125 // dispatch on type of strings
1009 pattern_index++; 1126 if (pat->is_ascii()) {
1010 subject_index++; 1127 Vector<const char> pat_vector = ToAsciiVector(pat);
1011 if (pattern_index >= pattern_length) { 1128 if (sub->is_ascii()) {
1012 DeleteArray(next_table); 1129 return StringMatchKMP(ToAsciiVector(sub), pat_vector, start_index);
1013 return subject_index - pattern_index; 1130 }
1014 } 1131 return StringMatchKMP(ToUC16Vector(sub), pat_vector, start_index);
1015 } 1132 }
1016 DeleteArray(next_table); 1133 Vector<const uc16> pat_vector = ToUC16Vector(pat);
1017 return -1; 1134 if (sub->is_ascii()) {
1135 return StringMatchKMP(ToAsciiVector(sub), pat_vector, start_index);
1136 }
1137 return StringMatchKMP(ToUC16Vector(sub), pat_vector, start_index);
1018 } 1138 }
1019 1139
1020 1140
1021 static Object* Runtime_StringIndexOf(Arguments args) { 1141 static Object* Runtime_StringIndexOf(Arguments args) {
1022 NoHandleAllocation ha; 1142 NoHandleAllocation ha;
1023 ASSERT(args.length() == 3); 1143 ASSERT(args.length() == 3);
1024 1144
1025 CONVERT_CHECKED(String, sub, args[0]); 1145 CONVERT_CHECKED(String, sub, args[0]);
1026 CONVERT_CHECKED(String, pat, args[1]); 1146 CONVERT_CHECKED(String, pat, args[1]);
1027 Object* index = args[2]; 1147 Object* index = args[2];
(...skipping 4022 matching lines...) Expand 10 before | Expand all | Expand 10 after
5050 5170
5051 void Runtime::PerformGC(Object* result) { 5171 void Runtime::PerformGC(Object* result) {
5052 Failure* failure = Failure::cast(result); 5172 Failure* failure = Failure::cast(result);
5053 // Try to do a garbage collection; ignore it if it fails. The C 5173 // Try to do a garbage collection; ignore it if it fails. The C
5054 // entry stub will throw an out-of-memory exception in that case. 5174 // entry stub will throw an out-of-memory exception in that case.
5055 Heap::CollectGarbage(failure->requested(), failure->allocation_space()); 5175 Heap::CollectGarbage(failure->requested(), failure->allocation_space());
5056 } 5176 }
5057 5177
5058 5178
5059 } } // namespace v8::internal 5179 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/objects-inl.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698