Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(904)

Side by Side Diff: src/runtime.cc

Issue 7297: * Added badness-check to trigger computation of good-suffix table in Boyer-Moore. (Closed)
Patch Set: Created 12 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 980 matching lines...) Expand 10 before | Expand all | Expand 10 after
991 991
992 // buffers reused by BoyerMoore 992 // buffers reused by BoyerMoore
993 static int bad_char_occurence[kBMAlphabetSize]; 993 static int bad_char_occurence[kBMAlphabetSize];
994 static BMGoodSuffixBuffers bmgs_buffers; 994 static BMGoodSuffixBuffers bmgs_buffers;
995 995
996 // Restricted Boyer-Moore string matching. Restricts tables to a 996 // Restricted Boyer-Moore string matching. Restricts tables to a
997 // suffix of long pattern strings and handles only equivalence classes 997 // suffix of long pattern strings and handles only equivalence classes
998 // of the full alphabet. This allows us to ensure that tables take only 998 // of the full alphabet. This allows us to ensure that tables take only
999 // a fixed amount of space. 999 // a fixed amount of space.
1000 template <typename schar, typename pchar> 1000 template <typename schar, typename pchar>
1001 static int BoyerMooreIndexOf(Vector<const schar> subject, 1001 static int BoyerMooreIndexOf(Vector<const schar> subject,
Erik Corry 2008/10/14 12:42:05 This function is too big now. It has to be split
1002 Vector<const pchar> pattern, 1002 Vector<const pchar> pattern,
1003 int start_index) { 1003 int start_index) {
1004 int m = pattern.length(); 1004 int m = pattern.length();
1005 int n = subject.length(); 1005 int n = subject.length();
1006 1006
1007 // Only preprocess at most kBMMaxShift last characters of pattern. 1007 // Only preprocess at most kBMMaxShift last characters of pattern.
1008 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; 1008 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;
1009 int len = m - start; 1009 int len = m - start;
1010 1010
1011 // Run forwards to populate bad_char_table, so that *last* instance 1011 // Run forwards to populate bad_char_table, so that *last* instance
1012 // of character equivalence class is the one registered. 1012 // of character equivalence class is the one registered.
1013 // Notice: Doesn't include last character. 1013 // Notice: Doesn't include last character.
1014 for (int i = 0; i < kBMAlphabetSize; i++) { 1014 for (int i = 0; i < kBMAlphabetSize; i++) {
1015 bad_char_occurence[i] = start - 1; 1015 bad_char_occurence[i] = start - 1;
1016 } 1016 }
1017 for (int i = start; i < m; i++) { 1017 for (int i = start; i < m; i++) {
1018 uc32 c = pattern[i]; 1018 uc32 c = pattern[i];
1019 bad_char_occurence[c % kBMAlphabetSize] = i; 1019 bad_char_occurence[c % kBMAlphabetSize] = i;
1020 if (sizeof(schar) == 1 && 1020 if (sizeof(schar) == 1 &&
1021 sizeof(pchar) > 1 && 1021 sizeof(pchar) > 1 &&
1022 c > String::kMaxAsciiCharCode) { 1022 c > String::kMaxAsciiCharCode) {
1023 return -1; 1023 return -1;
1024 } 1024 }
1025 } 1025 }
1026 // End of Bad Char computation. 1026 // End of Bad Char computation.
1027 1027
1028 // Compute Good Suffix shift table.
1029 bmgs_buffers.init(m);
1030 1028
1031 bmgs_buffers.shift(m-1) = 1; 1029
1032 bmgs_buffers.suffix(m) = m + 1; 1030 int badness = 0; // How bad we are doing without a good-suffix table.
1033 pchar last_char = pattern[m - 1]; 1031 int idx; // No matches found prior to this index.
1034 int suffix = m + 1; 1032 // Perform search
1035 for (int i = m; i > start;) { 1033 for (idx = start_index; idx <= n - m;) {
1036 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) { 1034 int j = m - 1;
1037 if (bmgs_buffers.shift(suffix) == len) { 1035 schar c;
1038 bmgs_buffers.shift(suffix) = suffix - i; 1036 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;
1037 if (j < 0) {
1038 return idx;
1039 } else {
1040 int bc_occ = bad_char_occurence[c % kBMAlphabetSize];
1041 int shift = bc_occ < j ? j - bc_occ : 1;
1042 idx += shift;
1043 // Badness increases by the number of characters we have
1044 // checked, and decreases by the number of characters we
1045 // can skip by shifting. It's a measure of how we are doing
1046 // compared to reading each character exactly once.
1047 badness += (m - j) - shift;
1048 if (badness > m) break;
1049 }
1050 }
1051
1052 // If we are not done, we got here because we should build the Good Suffix
1053 // table and continue searching.
1054 if (idx <= n - m) {
1055 // Compute Good Suffix tables.
1056 bmgs_buffers.init(m);
1057
1058 bmgs_buffers.shift(m-1) = 1;
1059 bmgs_buffers.suffix(m) = m + 1;
1060 pchar last_char = pattern[m - 1];
1061 int suffix = m + 1;
1062 for (int i = m; i > start;) {
1063 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) {
1064 if (bmgs_buffers.shift(suffix) == len) {
1065 bmgs_buffers.shift(suffix) = suffix - i;
1066 }
1067 suffix = bmgs_buffers.suffix(suffix);
1039 } 1068 }
1040 suffix = bmgs_buffers.suffix(suffix); 1069 i--;
1041 } 1070 suffix--;
1042 i--; 1071 bmgs_buffers.suffix(i) = suffix;
1043 suffix--; 1072 if (suffix == m) {
1044 bmgs_buffers.suffix(i) = suffix; 1073 // no suffix to extend, so we check against last_char only.
1045 if (suffix == m) { 1074 while (i > start && pattern[i - 1] != last_char) {
1046 // no suffix to extend, so we check against last_char only. 1075 if (bmgs_buffers.shift(m) == len) {
1047 while (i > start && pattern[i - 1] != last_char) { 1076 bmgs_buffers.shift(m) = m - i;
1048 if (bmgs_buffers.shift(m) == len) { 1077 }
1049 bmgs_buffers.shift(m) = m - i; 1078 i--;
1079 bmgs_buffers.suffix(i) = m;
1050 } 1080 }
1051 i--; 1081 if (i > start) {
1052 bmgs_buffers.suffix(i) = m; 1082 i--;
1053 } 1083 suffix--;
1054 if (i > start) { 1084 bmgs_buffers.suffix(i) = suffix;
1055 i--; 1085 }
1056 suffix--;
1057 bmgs_buffers.suffix(i) = suffix;
1058 } 1086 }
1059 } 1087 }
1060 } 1088 if (suffix < m) {
1061 if (suffix < m) { 1089 for (int i = start; i <= m; i++) {
1062 for (int i = start; i <= m; i++) { 1090 if (bmgs_buffers.shift(i) == len) {
1063 if (bmgs_buffers.shift(i) == len) { 1091 bmgs_buffers.shift(i) = suffix - start;
1064 bmgs_buffers.shift(i) = suffix - start; 1092 }
1065 } 1093 if (i == suffix) {
1066 if (i == suffix) { 1094 suffix = bmgs_buffers.suffix(suffix);
1067 suffix = bmgs_buffers.suffix(suffix); 1095 }
1068 } 1096 }
1069 } 1097 }
1098 // End of Good Suffix computation.
1099
1100 // Continue search from i.
1101 do {
1102 int j = m - 1;
1103 schar c;
1104 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;
1105 if (j < 0) {
1106 return idx;
1107 } else if (j < start) {
1108 // we have matched more than our tables allow us to be smart about.
1109 idx += 1;
1110 } else {
1111 int gs_shift = bmgs_buffers.shift(j + 1);
1112 int bc_occ = bad_char_occurence[c % kBMAlphabetSize];
1113 int bc_shift = j - bc_occ;
1114 idx += (gs_shift > bc_shift) ? gs_shift : bc_shift;
1115 }
1116 } while (idx <= n - m);
1070 } 1117 }
1071 // End of Good Suffix computation.
1072 1118
1073
1074 // Perform search
1075 for (int i = start_index; i <= n - m;) {
1076 int j = m - 1;
1077 schar c;
1078 while (j >= 0 && pattern[j] == (c = subject[i + j])) j--;
1079 if (j < 0) {
1080 return i;
1081 } else if (j < start) {
1082 // we have matched more than our tables allow us to be smart about.
1083 i += 1;
1084 } else {
1085 int gs_shift = bmgs_buffers.shift(j + 1);
1086 int bc_occ = bad_char_occurence[c % kBMAlphabetSize];
1087 int bc_shift = j - bc_occ;
1088 i += (gs_shift > bc_shift) ? gs_shift : bc_shift;
1089 }
1090 }
1091 return -1; 1119 return -1;
1092 } 1120 }
1093 1121
1094 template <typename schar, typename pchar> 1122 template <typename schar, typename pchar>
1095 static int SingleCharIndexOf(Vector<const schar> string, 1123 static int SingleCharIndexOf(Vector<const schar> string,
1096 pchar pattern_char, 1124 pchar pattern_char,
1097 int start_index) { 1125 int start_index) {
1098 for (int i = start_index, n = string.length(); i < n; i++) { 1126 for (int i = start_index, n = string.length(); i < n; i++) {
1099 if (pattern_char == string[i]) { 1127 if (pattern_char == string[i]) {
1100 return i; 1128 return i;
1101 } 1129 }
1102 } 1130 }
1103 return -1; 1131 return -1;
1104 } 1132 }
1105 1133
1106 // Trivial string search for shorter strings. 1134 // Trivial string search for shorter strings.
1107 template <typename pchar, typename schar> 1135 template <typename pchar, typename schar>
1108 static int SimpleIndexOf(Vector<const schar> subject, 1136 static int SimpleIndexOf(Vector<const schar> subject,
1109 Vector<const pchar> pattern, 1137 Vector<const pchar> pattern,
1110 int start_index) { 1138 int start_index) {
1111 int pattern_length = pattern.length(); 1139 int pattern_length = pattern.length();
1112 int subject_length = subject.length(); 1140 int subject_length = subject.length();
1113 // We know our pattern is at least 2 characters, we cache the first so 1141 // We know our pattern is at least 2 characters, we cache the first so
1114 // the common case of the first character not matching is faster. 1142 // the common case of the first character not matching is faster.
1115 pchar pattern_first_char = pattern[0]; 1143 pchar pattern_first_char = pattern[0];
1116 for (int i = start_index, n = subject_length - pattern_length; i <= n; i++) { 1144 for (int i = start_index, n = subject_length - pattern_length; i <= n; i++) {
1117 if (subject[i] != pattern_first_char) continue; 1145 if (subject[i] != pattern_first_char) continue;
1118 int j = 1; 1146 int j = 1;
1119 while (pattern[j] == subject[j+i]) { 1147 do {
1148 if (pattern[j] != subject[j+i]) {
1149 break;
1150 }
1120 j++; 1151 j++;
1121 if (j == pattern_length) { 1152 } while (j < pattern_length);
1122 return i; 1153 if (j == pattern_length) {
1123 } 1154 return i;
1124 } 1155 }
1125 } 1156 }
1126 return -1; 1157 return -1;
1127 } 1158 }
1128 1159
1129 // Dispatch to different algorithms for different length of pattern/subject 1160 // Dispatch to different algorithms for different length of pattern/subject
1130 template <typename schar, typename pchar> 1161 template <typename schar, typename pchar>
1131 static int StringMatchStrategy(Vector<const schar> sub, 1162 static int StringMatchStrategy(Vector<const schar> sub,
1132 Vector<const pchar> pat, 1163 Vector<const pchar> pat,
1133 int start_index) { 1164 int start_index) {
1134 int pattern_length = pat.length(); 1165 int pattern_length = pat.length();
1135 ASSERT(pattern_length > 1); 1166 ASSERT(pattern_length > 1);
1136 1167
1137 // For small searches, a complex sort is not worth the setup overhead. 1168 // For small searches, a complex sort is not worth the setup overhead.
1138 int subject_length = sub.length() - start_index; 1169 int subject_length = sub.length() - start_index;
1139 if (subject_length < 100 || pattern_length < 4) { 1170 if (subject_length < 100 || pattern_length < 8) {
1140 return SimpleIndexOf(sub, pat, start_index); 1171 return SimpleIndexOf(sub, pat, start_index);
1141 } 1172 }
1142 1173
1143 return BoyerMooreIndexOf(sub, pat, start_index); 1174 return BoyerMooreIndexOf(sub, pat, start_index);
1144 } 1175 }
1145 1176
1146 // Perform string match of pattern on subject, starting at start index. 1177 // Perform string match of pattern on subject, starting at start index.
1147 // Caller must ensure that 0 <= start_index <= sub->length(), 1178 // Caller must ensure that 0 <= start_index <= sub->length(),
1148 // and should check that pat->length() + start_index <= sub->length() 1179 // and should check that pat->length() + start_index <= sub->length()
1149 int Runtime::StringMatch(Handle<String> sub, 1180 int Runtime::StringMatch(Handle<String> sub,
(...skipping 4116 matching lines...) Expand 10 before | Expand all | Expand 10 after
5266 5297
5267 void Runtime::PerformGC(Object* result) { 5298 void Runtime::PerformGC(Object* result) {
5268 Failure* failure = Failure::cast(result); 5299 Failure* failure = Failure::cast(result);
5269 // Try to do a garbage collection; ignore it if it fails. The C 5300 // Try to do a garbage collection; ignore it if it fails. The C
5270 // entry stub will throw an out-of-memory exception in that case. 5301 // entry stub will throw an out-of-memory exception in that case.
5271 Heap::CollectGarbage(failure->requested(), failure->allocation_space()); 5302 Heap::CollectGarbage(failure->requested(), failure->allocation_space());
5272 } 5303 }
5273 5304
5274 5305
5275 } } // namespace v8::internal 5306 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698