Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Side by Side Diff: src/runtime.cc

Issue 7621: * Optimized the bad-char table of Boyer-Moore (Closed)
Patch Set: Created 12 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 947 matching lines...) Expand 10 before | Expand all | Expand 10 after
958 static const int kBMMaxShift = 0xff; 958 static const int kBMMaxShift = 0xff;
959 // Reduce alphabet to this size. 959 // Reduce alphabet to this size.
960 static const int kBMAlphabetSize = 0x100; 960 static const int kBMAlphabetSize = 0x100;
961 // For patterns below this length, the skip length of Boyer-Moore is too short 961 // For patterns below this length, the skip length of Boyer-Moore is too short
962 // to compensate for the algorithmic overhead compared to simple brute force. 962 // to compensate for the algorithmic overhead compared to simple brute force.
963 static const int kBMMinPatternLength = 5; 963 static const int kBMMinPatternLength = 5;
964 964
965 // Holds the two buffers used by Boyer-Moore string search's Good Suffix 965 // Holds the two buffers used by Boyer-Moore string search's Good Suffix
966 // shift. Only allows the last kBMMaxShift characters of the needle 966 // shift. Only allows the last kBMMaxShift characters of the needle
967 // to be indexed. 967 // to be indexed.
968 class BMGoodSuffixBuffers: public AllStatic { 968 class BMGoodSuffixBuffers {
969 public: 969 public:
970 BMGoodSuffixBuffers() {} 970 BMGoodSuffixBuffers() {}
971 inline void init(int needle_length) { 971 inline void init(int needle_length) {
972 ASSERT(needle_length > 1); 972 ASSERT(needle_length > 1);
973 int start = needle_length < kBMMaxShift ? 0 : needle_length - kBMMaxShift; 973 int start = needle_length < kBMMaxShift ? 0 : needle_length - kBMMaxShift;
974 int len = needle_length - start; 974 int len = needle_length - start;
975 biased_suffixes_ = suffixes_ - start; 975 biased_suffixes_ = suffixes_ - start;
976 biased_good_suffix_shift_ = good_suffix_shift_ - start; 976 biased_good_suffix_shift_ = good_suffix_shift_ - start;
977 for (int i = 0; i <= len; i++) { 977 for (int i = 0; i <= len; i++) {
978 good_suffix_shift_[i] = len; 978 good_suffix_shift_[i] = len;
(...skipping 19 matching lines...) Expand all
998 static int bad_char_occurence[kBMAlphabetSize]; 998 static int bad_char_occurence[kBMAlphabetSize];
999 static BMGoodSuffixBuffers bmgs_buffers; 999 static BMGoodSuffixBuffers bmgs_buffers;
1000 1000
1001 // Compute the bad-char table for Boyer-Moore in the static buffer. 1001 // Compute the bad-char table for Boyer-Moore in the static buffer.
1002 template <typename pchar> 1002 template <typename pchar>
1003 static void BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern, 1003 static void BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern,
1004 int start) { 1004 int start) {
1005 // Run forwards to populate bad_char_table, so that *last* instance 1005 // Run forwards to populate bad_char_table, so that *last* instance
1006 // of character equivalence class is the one registered. 1006 // of character equivalence class is the one registered.
1007 // Notice: Doesn't include the last character. 1007 // Notice: Doesn't include the last character.
1008 for (int i = 0; i < kBMAlphabetSize; i++) { 1008 int table_size = (sizeof(pchar) == 1) ? String::kMaxAsciiCharCode + 1
1009 bad_char_occurence[i] = start - 1; 1009 : kBMAlphabetSize;
1010 if (start == 0) { // All patterns less than kBMMaxShift in length.
1011 memset(bad_char_occurence, -1, table_size * sizeof(*bad_char_occurence));
1012 } else {
1013 for (int i = 0; i < table_size; i++) {
1014 bad_char_occurence[i] = start - 1;
1015 }
1010 } 1016 }
1011 for (int i = start; i < pattern.length() - 1; i++) { 1017 for (int i = start; i < pattern.length() - 1; i++) {
1012 pchar c = pattern[i]; 1018 pchar c = pattern[i];
1013 int bucket = c % kBMAlphabetSize; 1019 int bucket = (sizeof(pchar) ==1) ? c : c % kBMAlphabetSize;
1014 bad_char_occurence[bucket] = i; 1020 bad_char_occurence[bucket] = i;
1015 } 1021 }
1016 } 1022 }
1017 1023
1018 template <typename pchar> 1024 template <typename pchar>
1019 static void BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern, 1025 static void BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern,
1020 int start) { 1026 int start) {
1021 int m = pattern.length(); 1027 int m = pattern.length();
1022 int len = m - start; 1028 int len = m - start;
1023 // Compute Good Suffix tables. 1029 // Compute Good Suffix tables.
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
1058 if (bmgs_buffers.shift(i) == len) { 1064 if (bmgs_buffers.shift(i) == len) {
1059 bmgs_buffers.shift(i) = suffix - start; 1065 bmgs_buffers.shift(i) = suffix - start;
1060 } 1066 }
1061 if (i == suffix) { 1067 if (i == suffix) {
1062 suffix = bmgs_buffers.suffix(suffix); 1068 suffix = bmgs_buffers.suffix(suffix);
1063 } 1069 }
1064 } 1070 }
1065 } 1071 }
1066 } 1072 }
1067 1073
1074 template <typename schar, typename pchar>
1075 static inline int CharOccurence(int char_code) {
1076 if (sizeof(schar) == 1) {
1077 return bad_char_occurence[char_code];
1078 }
1079 if (sizeof(pchar) == 1) {
1080 if (char_code > String::kMaxAsciiCharCode) {
1081 return -1;
1082 }
1083 return bad_char_occurence[char_code];
1084 }
1085 return bad_char_occurence[char_code % kBMAlphabetSize];
1086 }
1068 1087
1069 // Restricted simplified Boyer-Moore string matching. Restricts tables to a 1088 // Restricted simplified Boyer-Moore string matching. Restricts tables to a
1070 // suffix of long pattern strings and handles only equivalence classes 1089 // suffix of long pattern strings and handles only equivalence classes
1071 // of the full alphabet. This allows us to ensure that tables take only 1090 // of the full alphabet. This allows us to ensure that tables take only
1072 // a fixed amount of space. 1091 // a fixed amount of space.
1073 template <typename schar, typename pchar> 1092 template <typename schar, typename pchar>
1074 static int BoyerMooreSimplified(Vector<const schar> subject, 1093 static int BoyerMooreSimplified(Vector<const schar> subject,
1075 Vector<const pchar> pattern, 1094 Vector<const pchar> pattern,
1076 int start_index, 1095 int start_index,
1077 bool& complete) { 1096 bool& complete) {
1078 int n = subject.length(); 1097 int n = subject.length();
1079 int m = pattern.length(); 1098 int m = pattern.length();
1080 // Only preprocess at most kBMMaxShift last characters of pattern. 1099 // Only preprocess at most kBMMaxShift last characters of pattern.
1081 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; 1100 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;
1082 1101
1083 BoyerMoorePopulateBadCharTable(pattern, start); 1102 BoyerMoorePopulateBadCharTable(pattern, start);
1084 1103
1085 int badness = -m; // How bad we are doing without a good-suffix table. 1104 int badness = -m; // How bad we are doing without a good-suffix table.
1086 int idx; // No matches found prior to this index. 1105 int idx; // No matches found prior to this index.
1087 pchar last_char = pattern[m - 1]; 1106 pchar last_char = pattern[m - 1];
1088 // Perform search 1107 // Perform search
1089 for (idx = start_index; idx <= n - m;) { 1108 for (idx = start_index; idx <= n - m;) {
1090 int j = m - 1; 1109 int j = m - 1;
1091 int c; 1110 int c;
1092 while (last_char != (c = subject[idx + j])) { 1111 while (last_char != (c = subject[idx + j])) {
1093 int bc_occ = bad_char_occurence[c % kBMAlphabetSize]; 1112 int bc_occ = CharOccurence<schar, pchar>(c);
1094 int shift = j - bc_occ; 1113 int shift = j - bc_occ;
1095 idx += shift; 1114 idx += shift;
1096 badness += 1 - shift; // at most zero, so badness cannot increase. 1115 badness += 1 - shift; // at most zero, so badness cannot increase.
1097 if (idx > n - m) { 1116 if (idx > n - m) {
1098 complete = true; 1117 complete = true;
1099 return -1; 1118 return -1;
1100 } 1119 }
1101 } 1120 }
1102 j--; 1121 j--;
1103 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--; 1122 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;
1104 if (j < 0) { 1123 if (j < 0) {
1105 complete = true; 1124 complete = true;
1106 return idx; 1125 return idx;
1107 } else { 1126 } else {
1108 int bc_occ = bad_char_occurence[c % kBMAlphabetSize]; 1127 int bc_occ = CharOccurence<schar, pchar>(c);
1109 int shift = bc_occ < j ? j - bc_occ : 1; 1128 int shift = bc_occ < j ? j - bc_occ : 1;
1110 idx += shift; 1129 idx += shift;
1111 // Badness increases by the number of characters we have 1130 // Badness increases by the number of characters we have
1112 // checked, and decreases by the number of characters we 1131 // checked, and decreases by the number of characters we
1113 // can skip by shifting. It's a measure of how we are doing 1132 // can skip by shifting. It's a measure of how we are doing
1114 // compared to reading each character exactly once. 1133 // compared to reading each character exactly once.
1115 badness += (m - j) - shift; 1134 badness += (m - j) - shift;
1116 if (badness > 0) { 1135 if (badness > 0) {
1117 complete = false; 1136 complete = false;
1118 return idx; 1137 return idx;
(...skipping 15 matching lines...) Expand all
1134 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; 1153 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;
1135 1154
1136 // Build the Good Suffix table and continue searching. 1155 // Build the Good Suffix table and continue searching.
1137 BoyerMoorePopulateGoodSuffixTable(pattern, start); 1156 BoyerMoorePopulateGoodSuffixTable(pattern, start);
1138 pchar last_char = pattern[m - 1]; 1157 pchar last_char = pattern[m - 1];
1139 // Continue search from i. 1158 // Continue search from i.
1140 do { 1159 do {
1141 int j = m - 1; 1160 int j = m - 1;
1142 schar c; 1161 schar c;
1143 while (last_char != (c = subject[idx + j])) { 1162 while (last_char != (c = subject[idx + j])) {
1144 int shift = j - bad_char_occurence[c % kBMAlphabetSize]; 1163 int shift = j - CharOccurence<schar, pchar>(c);
1145 idx += shift; 1164 idx += shift;
1146 if (idx > n - m) { 1165 if (idx > n - m) {
1147 return -1; 1166 return -1;
1148 } 1167 }
1149 } 1168 }
1150 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--; 1169 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;
1151 if (j < 0) { 1170 if (j < 0) {
1152 return idx; 1171 return idx;
1153 } else if (j < start) { 1172 } else if (j < start) {
1154 // we have matched more than our tables allow us to be smart about. 1173 // we have matched more than our tables allow us to be smart about.
1155 idx += 1; 1174 idx += 1;
1156 } else { 1175 } else {
1157 int gs_shift = bmgs_buffers.shift(j + 1); // Good suffix shift. 1176 int gs_shift = bmgs_buffers.shift(j + 1); // Good suffix shift.
1158 int bc_occ = bad_char_occurence[c % kBMAlphabetSize]; 1177 int bc_occ = CharOccurence<schar, pchar>(c);
1159 int shift = j - bc_occ; // Bad-char shift. 1178 int shift = j - bc_occ; // Bad-char shift.
1160 shift = (gs_shift > shift) ? gs_shift : shift; 1179 shift = (gs_shift > shift) ? gs_shift : shift;
1161 idx += shift; 1180 idx += shift;
1162 } 1181 }
1163 } while (idx <= n - m); 1182 } while (idx <= n - m);
1164 1183
1165 return -1; 1184 return -1;
1166 } 1185 }
1167 1186
1168 1187
(...skipping 4234 matching lines...) Expand 10 before | Expand all | Expand 10 after
5403 5422
5404 void Runtime::PerformGC(Object* result) { 5423 void Runtime::PerformGC(Object* result) {
5405 Failure* failure = Failure::cast(result); 5424 Failure* failure = Failure::cast(result);
5406 // Try to do a garbage collection; ignore it if it fails. The C 5425 // Try to do a garbage collection; ignore it if it fails. The C
5407 // entry stub will throw an out-of-memory exception in that case. 5426 // entry stub will throw an out-of-memory exception in that case.
5408 Heap::CollectGarbage(failure->requested(), failure->allocation_space()); 5427 Heap::CollectGarbage(failure->requested(), failure->allocation_space());
5409 } 5428 }
5410 5429
5411 5430
5412 } } // namespace v8::internal 5431 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698