Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(98)

Side by Side Diff: src/runtime.cc

Issue 7299: * Split the BoyerMooreStringSearch function into more separate functions. (Closed)
Patch Set: Separated BoyerMoore into more functions Created 12 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 975 matching lines...) Expand 10 before | Expand all | Expand 10 after
986 int good_suffix_shift_[kBMMaxShift + 1]; 986 int good_suffix_shift_[kBMMaxShift + 1];
987 int *biased_suffixes_; 987 int *biased_suffixes_;
988 int *biased_good_suffix_shift_; 988 int *biased_good_suffix_shift_;
989 DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers); 989 DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers);
990 }; 990 };
991 991
992 // buffers reused by BoyerMoore 992 // buffers reused by BoyerMoore
993 static int bad_char_occurence[kBMAlphabetSize]; 993 static int bad_char_occurence[kBMAlphabetSize];
994 static BMGoodSuffixBuffers bmgs_buffers; 994 static BMGoodSuffixBuffers bmgs_buffers;
995 995
996 // Compute the bad-char table for Boyer-Moore in the static buffer.
997 // Return false if the pattern contains non-ASCII characters that cannot be
998 // in the searched string.
999 template <typename pchar, bool check_ascii>
1000 static bool BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern,
1001 int start) {
1002 // Run forwards to populate bad_char_table, so that *last* instance
1003 // of character equivalence class is the one registered.
1004 // Notice: Doesn't include the last character.
1005 for (int i = 0; i < kBMAlphabetSize; i++) {
1006 bad_char_occurence[i] = start - 1;
1007 }
1008 for (int i = start; i < pattern.length(); i++) {
1009 uc32 c = pattern[i];
1010 bad_char_occurence[c % kBMAlphabetSize] = i;
1011 if (check_ascii &&
1012 c > String::kMaxAsciiCharCode) {
1013 return false;
1014 }
1015 }
1016 return true;
1017 }
1018
1019 template <typename pchar>
1020 static void BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern,
1021 int start,
1022 int len) {
1023 int m = pattern.length();
1024 // Compute Good Suffix tables.
1025 bmgs_buffers.init(m);
1026
1027 bmgs_buffers.shift(m-1) = 1;
1028 bmgs_buffers.suffix(m) = m + 1;
1029 pchar last_char = pattern[m - 1];
1030 int suffix = m + 1;
1031 for (int i = m; i > start;) {
1032 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) {
1033 if (bmgs_buffers.shift(suffix) == len) {
1034 bmgs_buffers.shift(suffix) = suffix - i;
1035 }
1036 suffix = bmgs_buffers.suffix(suffix);
1037 }
1038 i--;
1039 suffix--;
1040 bmgs_buffers.suffix(i) = suffix;
1041 if (suffix == m) {
1042 // No suffix to extend, so we check against last_char only.
1043 while (i > start && pattern[i - 1] != last_char) {
1044 if (bmgs_buffers.shift(m) == len) {
1045 bmgs_buffers.shift(m) = m - i;
1046 }
1047 i--;
1048 bmgs_buffers.suffix(i) = m;
1049 }
1050 if (i > start) {
1051 i--;
1052 suffix--;
1053 bmgs_buffers.suffix(i) = suffix;
1054 }
1055 }
1056 }
1057 if (suffix < m) {
1058 for (int i = start; i <= m; i++) {
1059 if (bmgs_buffers.shift(i) == len) {
1060 bmgs_buffers.shift(i) = suffix - start;
1061 }
1062 if (i == suffix) {
1063 suffix = bmgs_buffers.suffix(suffix);
1064 }
1065 }
1066 }
1067 }
1068
996 // Restricted Boyer-Moore string matching. Restricts tables to a 1069 // Restricted Boyer-Moore string matching. Restricts tables to a
997 // suffix of long pattern strings and handles only equivalence classes 1070 // suffix of long pattern strings and handles only equivalence classes
998 // of the full alphabet. This allows us to ensure that tables take only 1071 // of the full alphabet. This allows us to ensure that tables take only
999 // a fixed amount of space. 1072 // a fixed amount of space.
1000 template <typename schar, typename pchar> 1073 template <typename schar, typename pchar>
1001 static int BoyerMooreIndexOf(Vector<const schar> subject, 1074 static int BoyerMooreIndexOf(Vector<const schar> subject,
1002 Vector<const pchar> pattern, 1075 Vector<const pchar> pattern,
1003 int start_index) { 1076 int start_index) {
1004 int m = pattern.length(); 1077 int m = pattern.length();
1005 int n = subject.length(); 1078 int n = subject.length();
1006 1079
1007 // Only preprocess at most kBMMaxShift last characters of pattern. 1080 // Only preprocess at most kBMMaxShift last characters of pattern.
1008 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; 1081 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;
1009 int len = m - start; 1082 int len = m - start;
1010 1083
1011 // Run forwards to populate bad_char_table, so that *last* instance 1084 if (sizeof(pchar) > 1 && sizeof(schar) == 1) {
1012 // of character equivalence class is the one registered. 1085 BoyerMoorePopulateBadCharTable<pchar, true>(pattern, start);
1013 // Notice: Doesn't include last character. 1086 } else {
1014 for (int i = 0; i < kBMAlphabetSize; i++) { 1087 if (!BoyerMoorePopulateBadCharTable<pchar, false>(pattern, start)) {
1015 bad_char_occurence[i] = start - 1;
1016 }
1017 for (int i = start; i < m; i++) {
1018 uc32 c = pattern[i];
1019 bad_char_occurence[c % kBMAlphabetSize] = i;
1020 if (sizeof(schar) == 1 &&
1021 sizeof(pchar) > 1 &&
1022 c > String::kMaxAsciiCharCode) {
1023 return -1; 1088 return -1;
1024 } 1089 }
1025 } 1090 }
1026 // End of Bad Char computation.
1027
1028
1029 1091
1030 int badness = 0; // How bad we are doing without a good-suffix table. 1092 int badness = 0; // How bad we are doing without a good-suffix table.
1031 int idx; // No matches found prior to this index. 1093 int idx; // No matches found prior to this index.
1032 // Perform search 1094 // Perform search
1033 for (idx = start_index; idx <= n - m;) { 1095 for (idx = start_index; idx <= n - m;) {
1034 int j = m - 1; 1096 int j = m - 1;
1035 schar c; 1097 schar c;
1036 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--; 1098 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;
1037 if (j < 0) { 1099 if (j < 0) {
1038 return idx; 1100 return idx;
1039 } else { 1101 } else {
1040 int bc_occ = bad_char_occurence[c % kBMAlphabetSize]; 1102 int bc_occ = bad_char_occurence[c % kBMAlphabetSize];
1041 int shift = bc_occ < j ? j - bc_occ : 1; 1103 int shift = bc_occ < j ? j - bc_occ : 1;
1042 idx += shift; 1104 idx += shift;
1043 // Badness increases by the number of characters we have 1105 // Badness increases by the number of characters we have
1044 // checked, and decreases by the number of characters we 1106 // checked, and decreases by the number of characters we
1045 // can skip by shifting. It's a measure of how we are doing 1107 // can skip by shifting. It's a measure of how we are doing
1046 // compared to reading each character exactly once. 1108 // compared to reading each character exactly once.
1047 badness += (m - j) - shift; 1109 badness += (m - j) - shift;
1048 if (badness > m) break; 1110 if (badness > m) break;
1049 } 1111 }
1050 } 1112 }
1051 1113
1052 // If we are not done, we got here because we should build the Good Suffix 1114 // If we are not done, we got here because we should build the Good Suffix
1053 // table and continue searching. 1115 // table and continue searching.
1054 if (idx <= n - m) { 1116 if (idx <= n - m) {
1055 // Compute Good Suffix tables. 1117 BoyerMoorePopulateGoodSuffixTable(pattern, start, len);
1056 bmgs_buffers.init(m);
1057
1058 bmgs_buffers.shift(m-1) = 1;
1059 bmgs_buffers.suffix(m) = m + 1;
1060 pchar last_char = pattern[m - 1];
1061 int suffix = m + 1;
1062 for (int i = m; i > start;) {
1063 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) {
1064 if (bmgs_buffers.shift(suffix) == len) {
1065 bmgs_buffers.shift(suffix) = suffix - i;
1066 }
1067 suffix = bmgs_buffers.suffix(suffix);
1068 }
1069 i--;
1070 suffix--;
1071 bmgs_buffers.suffix(i) = suffix;
1072 if (suffix == m) {
1073 // no suffix to extend, so we check against last_char only.
1074 while (i > start && pattern[i - 1] != last_char) {
1075 if (bmgs_buffers.shift(m) == len) {
1076 bmgs_buffers.shift(m) = m - i;
1077 }
1078 i--;
1079 bmgs_buffers.suffix(i) = m;
1080 }
1081 if (i > start) {
1082 i--;
1083 suffix--;
1084 bmgs_buffers.suffix(i) = suffix;
1085 }
1086 }
1087 }
1088 if (suffix < m) {
1089 for (int i = start; i <= m; i++) {
1090 if (bmgs_buffers.shift(i) == len) {
1091 bmgs_buffers.shift(i) = suffix - start;
1092 }
1093 if (i == suffix) {
1094 suffix = bmgs_buffers.suffix(suffix);
1095 }
1096 }
1097 }
1098 // End of Good Suffix computation.
1099
1100 // Continue search from i. 1118 // Continue search from i.
1101 do { 1119 do {
1102 int j = m - 1; 1120 int j = m - 1;
1103 schar c; 1121 schar c;
1104 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--; 1122 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;
1105 if (j < 0) { 1123 if (j < 0) {
1106 return idx; 1124 return idx;
1107 } else if (j < start) { 1125 } else if (j < start) {
1108 // we have matched more than our tables allow us to be smart about. 1126 // we have matched more than our tables allow us to be smart about.
1109 idx += 1; 1127 idx += 1;
(...skipping 4187 matching lines...) Expand 10 before | Expand all | Expand 10 after
5297 5315
5298 void Runtime::PerformGC(Object* result) { 5316 void Runtime::PerformGC(Object* result) {
5299 Failure* failure = Failure::cast(result); 5317 Failure* failure = Failure::cast(result);
5300 // Try to do a garbage collection; ignore it if it fails. The C 5318 // Try to do a garbage collection; ignore it if it fails. The C
5301 // entry stub will throw an out-of-memory exception in that case. 5319 // entry stub will throw an out-of-memory exception in that case.
5302 Heap::CollectGarbage(failure->requested(), failure->allocation_space()); 5320 Heap::CollectGarbage(failure->requested(), failure->allocation_space());
5303 } 5321 }
5304 5322
5305 5323
5306 } } // namespace v8::internal 5324 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698