src/runtime.cc - Issue 7299: * Split the BoyerMooreStringSearch function into more separate functions.

Side by Side Diff: src/runtime.cc

Issue 7299: * Split the BoyerMooreStringSearch function into more separate functions. (Closed)

Patch Set: Separated BoyerMoore into more functions Created 12 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 975 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
986 int good_suffix_shift_[kBMMaxShift + 1];	986 int good_suffix_shift_[kBMMaxShift + 1];

987 int *biased_suffixes_;	987 int *biased_suffixes_;

988 int *biased_good_suffix_shift_;	988 int *biased_good_suffix_shift_;

989 DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers);	989 DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers);

990 };	990 };

991	991

992 // buffers reused by BoyerMoore	992 // buffers reused by BoyerMoore

993 static int bad_char_occurence[kBMAlphabetSize];	993 static int bad_char_occurence[kBMAlphabetSize];

994 static BMGoodSuffixBuffers bmgs_buffers;	994 static BMGoodSuffixBuffers bmgs_buffers;

995	995

	996 // Compute the bad-char table for Boyer-Moore in the static buffer.

	997 // Return false if the pattern contains non-ASCII characters that cannot be

	998 // in the searched string.

	999 template <typename pchar, bool check_ascii>

	1000 static bool BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern,

	1001 int start) {

	1002 // Run forwards to populate bad_char_table, so that last instance

	1003 // of character equivalence class is the one registered.

	1004 // Notice: Doesn't include the last character.

	1005 for (int i = 0; i < kBMAlphabetSize; i++) {

	1006 bad_char_occurence[i] = start - 1;

	1007 }

	1008 for (int i = start; i < pattern.length(); i++) {

	1009 uc32 c = pattern[i];

	1010 bad_char_occurence[c % kBMAlphabetSize] = i;

	1011 if (check_ascii &&

	1012 c > String::kMaxAsciiCharCode) {

	1013 return false;

	1014 }

	1015 }

	1016 return true;

	1017 }

	1018

	1019 template <typename pchar>

	1020 static void BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern,

	1021 int start,

	1022 int len) {

	1023 int m = pattern.length();

	1024 // Compute Good Suffix tables.

	1025 bmgs_buffers.init(m);

	1026

	1027 bmgs_buffers.shift(m-1) = 1;

	1028 bmgs_buffers.suffix(m) = m + 1;

	1029 pchar last_char = pattern[m - 1];

	1030 int suffix = m + 1;

	1031 for (int i = m; i > start;) {

	1032 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) {

	1033 if (bmgs_buffers.shift(suffix) == len) {

	1034 bmgs_buffers.shift(suffix) = suffix - i;

	1035 }

	1036 suffix = bmgs_buffers.suffix(suffix);

	1037 }

	1038 i--;

	1039 suffix--;

	1040 bmgs_buffers.suffix(i) = suffix;

	1041 if (suffix == m) {

	1042 // No suffix to extend, so we check against last_char only.

	1043 while (i > start && pattern[i - 1] != last_char) {

	1044 if (bmgs_buffers.shift(m) == len) {

	1045 bmgs_buffers.shift(m) = m - i;

	1046 }

	1047 i--;

	1048 bmgs_buffers.suffix(i) = m;

	1049 }

	1050 if (i > start) {

	1051 i--;

	1052 suffix--;

	1053 bmgs_buffers.suffix(i) = suffix;

	1054 }

	1055 }

	1056 }

	1057 if (suffix < m) {

	1058 for (int i = start; i <= m; i++) {

	1059 if (bmgs_buffers.shift(i) == len) {

	1060 bmgs_buffers.shift(i) = suffix - start;

	1061 }

	1062 if (i == suffix) {

	1063 suffix = bmgs_buffers.suffix(suffix);

	1064 }

	1065 }

	1066 }

	1067 }

	1068

996 // Restricted Boyer-Moore string matching. Restricts tables to a	1069 // Restricted Boyer-Moore string matching. Restricts tables to a

997 // suffix of long pattern strings and handles only equivalence classes	1070 // suffix of long pattern strings and handles only equivalence classes

998 // of the full alphabet. This allows us to ensure that tables take only	1071 // of the full alphabet. This allows us to ensure that tables take only

999 // a fixed amount of space.	1072 // a fixed amount of space.

1000 template <typename schar, typename pchar>	1073 template <typename schar, typename pchar>

1001 static int BoyerMooreIndexOf(Vector<const schar> subject,	1074 static int BoyerMooreIndexOf(Vector<const schar> subject,

1002 Vector<const pchar> pattern,	1075 Vector<const pchar> pattern,

1003 int start_index) {	1076 int start_index) {

1004 int m = pattern.length();	1077 int m = pattern.length();

1005 int n = subject.length();	1078 int n = subject.length();

1006	1079

1007 // Only preprocess at most kBMMaxShift last characters of pattern.	1080 // Only preprocess at most kBMMaxShift last characters of pattern.

1008 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;	1081 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;

1009 int len = m - start;	1082 int len = m - start;

1010	1083

1011 // Run forwards to populate bad_char_table, so that last instance	1084 if (sizeof(pchar) > 1 && sizeof(schar) == 1) {

1012 // of character equivalence class is the one registered.	1085 BoyerMoorePopulateBadCharTable<pchar, true>(pattern, start);

1013 // Notice: Doesn't include last character.	1086 } else {

1014 for (int i = 0; i < kBMAlphabetSize; i++) {	1087 if (!BoyerMoorePopulateBadCharTable<pchar, false>(pattern, start)) {

1015 bad_char_occurence[i] = start - 1;

1016 }

1017 for (int i = start; i < m; i++) {

1018 uc32 c = pattern[i];

1019 bad_char_occurence[c % kBMAlphabetSize] = i;

1020 if (sizeof(schar) == 1 &&

1021 sizeof(pchar) > 1 &&

1022 c > String::kMaxAsciiCharCode) {

1023 return -1;	1088 return -1;

1024 }	1089 }

1025 }	1090 }

1026 // End of Bad Char computation.

1027

1028

1029	1091

1030 int badness = 0; // How bad we are doing without a good-suffix table.	1092 int badness = 0; // How bad we are doing without a good-suffix table.

1031 int idx; // No matches found prior to this index.	1093 int idx; // No matches found prior to this index.

1032 // Perform search	1094 // Perform search

1033 for (idx = start_index; idx <= n - m;) {	1095 for (idx = start_index; idx <= n - m;) {

1034 int j = m - 1;	1096 int j = m - 1;

1035 schar c;	1097 schar c;

1036 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;	1098 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;

1037 if (j < 0) {	1099 if (j < 0) {

1038 return idx;	1100 return idx;

1039 } else {	1101 } else {

1040 int bc_occ = bad_char_occurence[c % kBMAlphabetSize];	1102 int bc_occ = bad_char_occurence[c % kBMAlphabetSize];

1041 int shift = bc_occ < j ? j - bc_occ : 1;	1103 int shift = bc_occ < j ? j - bc_occ : 1;

1042 idx += shift;	1104 idx += shift;

1043 // Badness increases by the number of characters we have	1105 // Badness increases by the number of characters we have

1044 // checked, and decreases by the number of characters we	1106 // checked, and decreases by the number of characters we

1045 // can skip by shifting. It's a measure of how we are doing	1107 // can skip by shifting. It's a measure of how we are doing

1046 // compared to reading each character exactly once.	1108 // compared to reading each character exactly once.

1047 badness += (m - j) - shift;	1109 badness += (m - j) - shift;

1048 if (badness > m) break;	1110 if (badness > m) break;

1049 }	1111 }

1050 }	1112 }

1051	1113

1052 // If we are not done, we got here because we should build the Good Suffix	1114 // If we are not done, we got here because we should build the Good Suffix

1053 // table and continue searching.	1115 // table and continue searching.

1054 if (idx <= n - m) {	1116 if (idx <= n - m) {

1055 // Compute Good Suffix tables.	1117 BoyerMoorePopulateGoodSuffixTable(pattern, start, len);

1056 bmgs_buffers.init(m);

1057

1058 bmgs_buffers.shift(m-1) = 1;

1059 bmgs_buffers.suffix(m) = m + 1;

1060 pchar last_char = pattern[m - 1];

1061 int suffix = m + 1;

1062 for (int i = m; i > start;) {

1063 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) {

1064 if (bmgs_buffers.shift(suffix) == len) {

1065 bmgs_buffers.shift(suffix) = suffix - i;

1066 }

1067 suffix = bmgs_buffers.suffix(suffix);

1068 }

1069 i--;

1070 suffix--;

1071 bmgs_buffers.suffix(i) = suffix;

1072 if (suffix == m) {

1073 // no suffix to extend, so we check against last_char only.

1074 while (i > start && pattern[i - 1] != last_char) {

1075 if (bmgs_buffers.shift(m) == len) {

1076 bmgs_buffers.shift(m) = m - i;

1077 }

1078 i--;

1079 bmgs_buffers.suffix(i) = m;

1080 }

1081 if (i > start) {

1082 i--;

1083 suffix--;

1084 bmgs_buffers.suffix(i) = suffix;

1085 }

1086 }

1087 }

1088 if (suffix < m) {

1089 for (int i = start; i <= m; i++) {

1090 if (bmgs_buffers.shift(i) == len) {

1091 bmgs_buffers.shift(i) = suffix - start;

1092 }

1093 if (i == suffix) {

1094 suffix = bmgs_buffers.suffix(suffix);

1095 }

1096 }

1097 }

1098 // End of Good Suffix computation.

1099

1100 // Continue search from i.	1118 // Continue search from i.

1101 do {	1119 do {

1102 int j = m - 1;	1120 int j = m - 1;

1103 schar c;	1121 schar c;

1104 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;	1122 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;

1105 if (j < 0) {	1123 if (j < 0) {

1106 return idx;	1124 return idx;

1107 } else if (j < start) {	1125 } else if (j < start) {

1108 // we have matched more than our tables allow us to be smart about.	1126 // we have matched more than our tables allow us to be smart about.

1109 idx += 1;	1127 idx += 1;

(...skipping 4187 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5297	5315

5298 void Runtime::PerformGC(Object* result) {	5316 void Runtime::PerformGC(Object* result) {

5299 Failure* failure = Failure::cast(result);	5317 Failure* failure = Failure::cast(result);

5300 // Try to do a garbage collection; ignore it if it fails. The C	5318 // Try to do a garbage collection; ignore it if it fails. The C

5301 // entry stub will throw an out-of-memory exception in that case.	5319 // entry stub will throw an out-of-memory exception in that case.

5302 Heap::CollectGarbage(failure->requested(), failure->allocation_space());	5320 Heap::CollectGarbage(failure->requested(), failure->allocation_space());

5303 }	5321 }

5304	5322

5305	5323

5306 } } // namespace v8::internal	5324 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »