Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1211)

Unified Diff: src/string-search.h

Issue 1324453007: Reland: Speedup stringsearch for two byte strings (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Fix out-of-bounds access Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « AUTHORS ('k') | test/mjsunit/string-indexof-1.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/string-search.h
diff --git a/src/string-search.h b/src/string-search.h
index 349d4fd2db72918560fd0e784bf9ea4664c803c2..e2e540b6cf8324f573a451b5061ebfd92ede1857 100644
--- a/src/string-search.h
+++ b/src/string-search.h
@@ -190,6 +190,41 @@ class StringSearch : private StringSearchBase {
};
+template <typename PatternChar, typename SubjectChar>
+int FindFirstCharacter(Vector<const PatternChar> pattern,
+ Vector<const SubjectChar> subject, int index) {
+ PatternChar pattern_first_char = pattern[0];
+ const int max_n = (subject.length() - pattern.length() + 1);
+
+ if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
+ DCHECK_GE(max_n - index, 0);
+ const SubjectChar* char_pos = reinterpret_cast<const SubjectChar*>(
+ memchr(subject.start() + index, pattern_first_char, max_n - index));
+ if (char_pos == NULL) return -1;
+ return static_cast<int>(char_pos - subject.start());
+ } else {
+ const uint8_t search_low_byte =
+ static_cast<uint8_t>(pattern_first_char & 0xFF);
+ const SubjectChar search_char =
+ static_cast<SubjectChar>(pattern_first_char);
+ int pos = index;
+ do {
+ DCHECK_GE(max_n - pos, 0);
+ const SubjectChar* char_pos = reinterpret_cast<const SubjectChar*>(
+ memchr(subject.start() + pos, search_low_byte,
+ (max_n - pos) * sizeof(SubjectChar)));
+ if (char_pos == NULL) return -1;
+ pos = static_cast<int>(char_pos - subject.start());
+ if (IsAligned(reinterpret_cast<uintptr_t>(char_pos),
+ sizeof(SubjectChar))) {
+ if (subject[pos] == search_char) return pos;
+ }
+ } while (++pos < max_n);
+ }
+ return -1;
+}
+
+
//---------------------------------------------------------------------
// Single Character Pattern Search Strategy
//---------------------------------------------------------------------
@@ -201,26 +236,15 @@ int StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
int index) {
DCHECK_EQ(1, search->pattern_.length());
PatternChar pattern_first_char = search->pattern_[0];
- int i = index;
if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
- const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
- memchr(subject.start() + i,
- pattern_first_char,
- subject.length() - i));
- if (pos == NULL) return -1;
- return static_cast<int>(pos - subject.start());
+ return FindFirstCharacter(search->pattern_, subject, index);
} else {
if (sizeof(PatternChar) > sizeof(SubjectChar)) {
if (exceedsOneByte(pattern_first_char)) {
return -1;
}
}
- SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
- int n = subject.length();
- while (i < n) {
- if (subject[i++] == search_char) return i - 1;
- }
- return -1;
+ return FindFirstCharacter(search->pattern_, subject, index);
}
}
@@ -254,20 +278,13 @@ int StringSearch<PatternChar, SubjectChar>::LinearSearch(
Vector<const PatternChar> pattern = search->pattern_;
DCHECK(pattern.length() > 1);
int pattern_length = pattern.length();
- PatternChar pattern_first_char = pattern[0];
int i = index;
int n = subject.length() - pattern_length;
while (i <= n) {
- if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
- const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
- memchr(subject.start() + i,
- pattern_first_char,
- n - i + 1));
- if (pos == NULL) return -1;
- i = static_cast<int>(pos - subject.start()) + 1;
- } else {
- if (subject[i++] != pattern_first_char) continue;
- }
+ i = FindFirstCharacter(pattern, subject, i);
+ if (i == -1) return -1;
+ DCHECK_LE(i, n);
+ i++;
// Loop extracted to separate function to allow using return to do
// a deeper break.
if (CharCompare(pattern.start() + 1,
@@ -505,22 +522,12 @@ int StringSearch<PatternChar, SubjectChar>::InitialSearch(
// We know our pattern is at least 2 characters, we cache the first so
// the common case of the first character not matching is faster.
- PatternChar pattern_first_char = pattern[0];
for (int i = index, n = subject.length() - pattern_length; i <= n; i++) {
badness++;
if (badness <= 0) {
- if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
- const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
- memchr(subject.start() + i,
- pattern_first_char,
- n - i + 1));
- if (pos == NULL) {
- return -1;
- }
- i = static_cast<int>(pos - subject.start());
- } else {
- if (subject[i] != pattern_first_char) continue;
- }
+ i = FindFirstCharacter(pattern, subject, i);
+ if (i == -1) return -1;
+ DCHECK_LE(i, n);
int j = 1;
do {
if (pattern[j] != subject[i + j]) {
« no previous file with comments | « AUTHORS ('k') | test/mjsunit/string-indexof-1.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698