Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(152)

Unified Diff: src/runtime.cc

Issue 3276004: Simplify code by removing special-casing for single-character patterns (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/runtime.cc
===================================================================
--- src/runtime.cc (revision 5377)
+++ src/runtime.cc (working copy)
@@ -2815,40 +2815,6 @@
}
-template <typename schar>
-static inline int SingleCharIndexOf(Vector<const schar> string,
- schar pattern_char,
- int start_index) {
- if (sizeof(schar) == 1) {
- const schar* pos = reinterpret_cast<const schar*>(
- memchr(string.start() + start_index,
- pattern_char,
- string.length() - start_index));
- if (pos == NULL) return -1;
- return static_cast<int>(pos - string.start());
- }
- for (int i = start_index, n = string.length(); i < n; i++) {
- if (pattern_char == string[i]) {
- return i;
- }
- }
- return -1;
-}
-
-
-template <typename schar>
-static int SingleCharLastIndexOf(Vector<const schar> string,
- schar pattern_char,
- int start_index) {
- for (int i = start_index; i >= 0; i--) {
- if (pattern_char == string[i]) {
- return i;
- }
- }
- return -1;
-}
-
-
// Trivial string search for shorter strings.
// On return, if "complete" is set to true, the return value is the
// final result of searching for the patter in the subject.
@@ -2860,6 +2826,7 @@
Vector<const pchar> pattern,
int idx,
bool* complete) {
+ ASSERT(pattern.length() > 1);
// Badness is a count of how much work we have done. When we have
// done enough work we decide it's probably worth switching to a better
// algorithm.
@@ -2922,12 +2889,12 @@
if (subject[i] != pattern_first_char) continue;
}
int j = 1;
- do {
+ while (j < pattern.length()) {
if (pattern[j] != subject[i+j]) {
break;
}
j++;
- } while (j < pattern.length());
+ }
if (j == pattern.length()) {
return i;
}
@@ -3029,54 +2996,15 @@
int subject_length = sub->length();
if (start_index + pattern_length > subject_length) return -1;
- if (!sub->IsFlat()) {
- FlattenString(sub);
- }
+ if (!sub->IsFlat()) FlattenString(sub);
+ if (!pat->IsFlat()) FlattenString(pat);
- // Searching for one specific character is common. For one
- // character patterns linear search is necessary, so any smart
- // algorithm is unnecessary overhead.
- if (pattern_length == 1) {
- AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
- String* seq_sub = *sub;
- if (seq_sub->IsConsString()) {
- seq_sub = ConsString::cast(seq_sub)->first();
- }
- if (seq_sub->IsAsciiRepresentation()) {
- uc16 pchar = pat->Get(0);
- if (pchar > String::kMaxAsciiCharCode) {
- return -1;
- }
- Vector<const char> ascii_vector =
- seq_sub->ToAsciiVector().SubVector(start_index, subject_length);
- const void* pos = memchr(ascii_vector.start(),
- static_cast<const char>(pchar),
- static_cast<size_t>(ascii_vector.length()));
- if (pos == NULL) {
- return -1;
- }
- return static_cast<int>(reinterpret_cast<const char*>(pos)
- - ascii_vector.start() + start_index);
- }
- return SingleCharIndexOf(seq_sub->ToUC16Vector(),
- pat->Get(0),
- start_index);
- }
-
- if (!pat->IsFlat()) {
- FlattenString(pat);
- }
-
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
// Extract flattened substrings of cons strings before determining asciiness.
String* seq_sub = *sub;
- if (seq_sub->IsConsString()) {
- seq_sub = ConsString::cast(seq_sub)->first();
- }
+ if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
String* seq_pat = *pat;
- if (seq_pat->IsConsString()) {
- seq_pat = ConsString::cast(seq_pat)->first();
- }
+ if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first();
// dispatch on type of strings
if (seq_pat->IsAsciiRepresentation()) {
@@ -3166,31 +3094,9 @@
return Smi::FromInt(start_index);
}
- if (!sub->IsFlat()) {
- FlattenString(sub);
- }
+ if (!sub->IsFlat()) FlattenString(sub);
+ if (!pat->IsFlat()) FlattenString(pat);
- if (pat_length == 1) {
- AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
- if (sub->IsAsciiRepresentation()) {
- uc16 pchar = pat->Get(0);
- if (pchar > String::kMaxAsciiCharCode) {
- return Smi::FromInt(-1);
- }
- return Smi::FromInt(SingleCharLastIndexOf(sub->ToAsciiVector(),
- static_cast<char>(pat->Get(0)),
- start_index));
- } else {
- return Smi::FromInt(SingleCharLastIndexOf(sub->ToUC16Vector(),
- pat->Get(0),
- start_index));
- }
- }
-
- if (!pat->IsFlat()) {
- FlattenString(pat);
- }
-
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
int position = -1;
@@ -3367,88 +3273,6 @@
}
-template <typename schar>
-static bool SearchCharMultiple(Vector<schar> subject,
- String* pattern,
- schar pattern_char,
- FixedArrayBuilder* builder,
- int* match_pos) {
- // Position of last match.
- int pos = *match_pos;
- int subject_length = subject.length();
- while (pos < subject_length) {
- int match_end = pos + 1;
- if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
- *match_pos = pos;
- return false;
- }
- int new_pos = SingleCharIndexOf(subject, pattern_char, match_end);
- if (new_pos >= 0) {
- // Match has been found.
- if (new_pos > match_end) {
- ReplacementStringBuilder::AddSubjectSlice(builder, match_end, new_pos);
- }
- pos = new_pos;
- builder->Add(pattern);
- } else {
- break;
- }
- }
- if (pos + 1 < subject_length) {
- ReplacementStringBuilder::AddSubjectSlice(builder, pos + 1, subject_length);
- }
- *match_pos = pos;
- return true;
-}
-
-
-static bool SearchCharMultiple(Handle<String> subject,
- Handle<String> pattern,
- Handle<JSArray> last_match_info,
- FixedArrayBuilder* builder) {
- ASSERT(subject->IsFlat());
- ASSERT_EQ(1, pattern->length());
- uc16 pattern_char = pattern->Get(0);
- // Treating position before first as initial "previous match position".
- int match_pos = -1;
-
- for (;;) { // Break when search complete.
- builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
- AssertNoAllocation no_gc;
- if (subject->IsAsciiRepresentation()) {
- if (pattern_char > String::kMaxAsciiCharCode) {
- break;
- }
- Vector<const char> subject_vector = subject->ToAsciiVector();
- char pattern_ascii_char = static_cast<char>(pattern_char);
- bool complete = SearchCharMultiple<const char>(subject_vector,
- *pattern,
- pattern_ascii_char,
- builder,
- &match_pos);
- if (complete) break;
- } else {
- Vector<const uc16> subject_vector = subject->ToUC16Vector();
- bool complete = SearchCharMultiple<const uc16>(subject_vector,
- *pattern,
- pattern_char,
- builder,
- &match_pos);
- if (complete) break;
- }
- }
-
- if (match_pos >= 0) {
- SetLastMatchInfoNoCaptures(subject,
- last_match_info,
- match_pos,
- match_pos + 1);
- return true;
- }
- return false; // No matches at all.
-}
-
-
template <typename schar, typename pchar>
static bool SearchStringMultiple(Vector<schar> subject,
String* pattern,
@@ -3526,7 +3350,6 @@
FixedArrayBuilder* builder) {
ASSERT(subject->IsFlat());
ASSERT(pattern->IsFlat());
- ASSERT(pattern->length() > 1);
// Treating as if a previous match was before first character.
int match_pos = -pattern->length();
@@ -3784,14 +3607,6 @@
if (regexp->TypeTag() == JSRegExp::ATOM) {
Handle<String> pattern(
String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)));
- int pattern_length = pattern->length();
- if (pattern_length == 1) {
- if (SearchCharMultiple(subject, pattern, last_match_info, &builder)) {
- return *builder.ToJSArray(result_array);
- }
- return Heap::null_value();
- }
-
if (!pattern->IsFlat()) FlattenString(pattern);
if (SearchStringMultiple(subject, pattern, last_match_info, &builder)) {
return *builder.ToJSArray(result_array);
@@ -5392,24 +5207,7 @@
}
}
-template <typename schar>
-inline void FindCharIndices(Vector<const schar> subject,
- const schar pattern_char,
- ZoneList<int>* indices,
- unsigned int limit) {
- // Collect indices of pattern_char in subject, and the end-of-string index.
- // Stop after finding at most limit values.
- int index = 0;
- while (limit > 0) {
- index = SingleCharIndexOf(subject, pattern_char, index);
- if (index < 0) return;
- indices->Add(index);
- index++;
- limit--;
- }
-}
-
static Object* Runtime_StringSplit(Arguments args) {
ASSERT(args.length() == 3);
HandleScope handle_scope;
@@ -5434,49 +5232,33 @@
// Find (up to limit) indices of separator and end-of-string in subject
int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
ZoneList<int> indices(initial_capacity);
- if (pattern_length == 1) {
- // Special case, go directly to fast single-character split.
- AssertNoAllocation nogc;
- uc16 pattern_char = pattern->Get(0);
- if (subject->IsTwoByteRepresentation()) {
- FindCharIndices(subject->ToUC16Vector(), pattern_char,
- &indices,
- limit);
- } else if (pattern_char <= String::kMaxAsciiCharCode) {
- FindCharIndices(subject->ToAsciiVector(),
- static_cast<char>(pattern_char),
- &indices,
- limit);
+ if (!pattern->IsFlat()) FlattenString(pattern);
+ AssertNoAllocation nogc;
+ if (subject->IsAsciiRepresentation()) {
+ Vector<const char> subject_vector = subject->ToAsciiVector();
+ if (pattern->IsAsciiRepresentation()) {
+ FindStringIndices(subject_vector,
+ pattern->ToAsciiVector(),
+ &indices,
+ limit);
+ } else {
+ FindStringIndices(subject_vector,
+ pattern->ToUC16Vector(),
+ &indices,
+ limit);
}
} else {
- if (!pattern->IsFlat()) FlattenString(pattern);
- AssertNoAllocation nogc;
- if (subject->IsAsciiRepresentation()) {
- Vector<const char> subject_vector = subject->ToAsciiVector();
- if (pattern->IsAsciiRepresentation()) {
- FindStringIndices(subject_vector,
- pattern->ToAsciiVector(),
- &indices,
- limit);
- } else {
- FindStringIndices(subject_vector,
- pattern->ToUC16Vector(),
- &indices,
- limit);
- }
+ Vector<const uc16> subject_vector = subject->ToUC16Vector();
+ if (pattern->IsAsciiRepresentation()) {
+ FindStringIndices(subject_vector,
+ pattern->ToAsciiVector(),
+ &indices,
+ limit);
} else {
- Vector<const uc16> subject_vector = subject->ToUC16Vector();
- if (pattern->IsAsciiRepresentation()) {
- FindStringIndices(subject_vector,
- pattern->ToAsciiVector(),
- &indices,
- limit);
- } else {
- FindStringIndices(subject_vector,
- pattern->ToUC16Vector(),
- &indices,
- limit);
- }
+ FindStringIndices(subject_vector,
+ pattern->ToUC16Vector(),
+ &indices,
+ limit);
}
}
if (static_cast<uint32_t>(indices.length()) < limit) {
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698