Index: src/runtime.cc |
=================================================================== |
--- src/runtime.cc (revision 5377) |
+++ src/runtime.cc (working copy) |
@@ -2815,40 +2815,6 @@ |
} |
-template <typename schar> |
-static inline int SingleCharIndexOf(Vector<const schar> string, |
- schar pattern_char, |
- int start_index) { |
- if (sizeof(schar) == 1) { |
- const schar* pos = reinterpret_cast<const schar*>( |
- memchr(string.start() + start_index, |
- pattern_char, |
- string.length() - start_index)); |
- if (pos == NULL) return -1; |
- return static_cast<int>(pos - string.start()); |
- } |
- for (int i = start_index, n = string.length(); i < n; i++) { |
- if (pattern_char == string[i]) { |
- return i; |
- } |
- } |
- return -1; |
-} |
- |
- |
-template <typename schar> |
-static int SingleCharLastIndexOf(Vector<const schar> string, |
- schar pattern_char, |
- int start_index) { |
- for (int i = start_index; i >= 0; i--) { |
- if (pattern_char == string[i]) { |
- return i; |
- } |
- } |
- return -1; |
-} |
- |
- |
// Trivial string search for shorter strings. |
// On return, if "complete" is set to true, the return value is the |
// final result of searching for the patter in the subject. |
@@ -2860,6 +2826,7 @@ |
Vector<const pchar> pattern, |
int idx, |
bool* complete) { |
+ ASSERT(pattern.length() > 1); |
// Badness is a count of how much work we have done. When we have |
// done enough work we decide it's probably worth switching to a better |
// algorithm. |
@@ -2922,12 +2889,12 @@ |
if (subject[i] != pattern_first_char) continue; |
} |
int j = 1; |
- do { |
+ while (j < pattern.length()) { |
if (pattern[j] != subject[i+j]) { |
break; |
} |
j++; |
- } while (j < pattern.length()); |
+ } |
if (j == pattern.length()) { |
return i; |
} |
@@ -3029,54 +2996,15 @@ |
int subject_length = sub->length(); |
if (start_index + pattern_length > subject_length) return -1; |
- if (!sub->IsFlat()) { |
- FlattenString(sub); |
- } |
+ if (!sub->IsFlat()) FlattenString(sub); |
+ if (!pat->IsFlat()) FlattenString(pat); |
- // Searching for one specific character is common. For one |
- // character patterns linear search is necessary, so any smart |
- // algorithm is unnecessary overhead. |
- if (pattern_length == 1) { |
- AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
- String* seq_sub = *sub; |
- if (seq_sub->IsConsString()) { |
- seq_sub = ConsString::cast(seq_sub)->first(); |
- } |
- if (seq_sub->IsAsciiRepresentation()) { |
- uc16 pchar = pat->Get(0); |
- if (pchar > String::kMaxAsciiCharCode) { |
- return -1; |
- } |
- Vector<const char> ascii_vector = |
- seq_sub->ToAsciiVector().SubVector(start_index, subject_length); |
- const void* pos = memchr(ascii_vector.start(), |
- static_cast<const char>(pchar), |
- static_cast<size_t>(ascii_vector.length())); |
- if (pos == NULL) { |
- return -1; |
- } |
- return static_cast<int>(reinterpret_cast<const char*>(pos) |
- - ascii_vector.start() + start_index); |
- } |
- return SingleCharIndexOf(seq_sub->ToUC16Vector(), |
- pat->Get(0), |
- start_index); |
- } |
- |
- if (!pat->IsFlat()) { |
- FlattenString(pat); |
- } |
- |
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
// Extract flattened substrings of cons strings before determining asciiness. |
String* seq_sub = *sub; |
- if (seq_sub->IsConsString()) { |
- seq_sub = ConsString::cast(seq_sub)->first(); |
- } |
+ if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); |
String* seq_pat = *pat; |
- if (seq_pat->IsConsString()) { |
- seq_pat = ConsString::cast(seq_pat)->first(); |
- } |
+ if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first(); |
// dispatch on type of strings |
if (seq_pat->IsAsciiRepresentation()) { |
@@ -3166,31 +3094,9 @@ |
return Smi::FromInt(start_index); |
} |
- if (!sub->IsFlat()) { |
- FlattenString(sub); |
- } |
+ if (!sub->IsFlat()) FlattenString(sub); |
+ if (!pat->IsFlat()) FlattenString(pat); |
- if (pat_length == 1) { |
- AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
- if (sub->IsAsciiRepresentation()) { |
- uc16 pchar = pat->Get(0); |
- if (pchar > String::kMaxAsciiCharCode) { |
- return Smi::FromInt(-1); |
- } |
- return Smi::FromInt(SingleCharLastIndexOf(sub->ToAsciiVector(), |
- static_cast<char>(pat->Get(0)), |
- start_index)); |
- } else { |
- return Smi::FromInt(SingleCharLastIndexOf(sub->ToUC16Vector(), |
- pat->Get(0), |
- start_index)); |
- } |
- } |
- |
- if (!pat->IsFlat()) { |
- FlattenString(pat); |
- } |
- |
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
int position = -1; |
@@ -3367,88 +3273,6 @@ |
} |
-template <typename schar> |
-static bool SearchCharMultiple(Vector<schar> subject, |
- String* pattern, |
- schar pattern_char, |
- FixedArrayBuilder* builder, |
- int* match_pos) { |
- // Position of last match. |
- int pos = *match_pos; |
- int subject_length = subject.length(); |
- while (pos < subject_length) { |
- int match_end = pos + 1; |
- if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) { |
- *match_pos = pos; |
- return false; |
- } |
- int new_pos = SingleCharIndexOf(subject, pattern_char, match_end); |
- if (new_pos >= 0) { |
- // Match has been found. |
- if (new_pos > match_end) { |
- ReplacementStringBuilder::AddSubjectSlice(builder, match_end, new_pos); |
- } |
- pos = new_pos; |
- builder->Add(pattern); |
- } else { |
- break; |
- } |
- } |
- if (pos + 1 < subject_length) { |
- ReplacementStringBuilder::AddSubjectSlice(builder, pos + 1, subject_length); |
- } |
- *match_pos = pos; |
- return true; |
-} |
- |
- |
-static bool SearchCharMultiple(Handle<String> subject, |
- Handle<String> pattern, |
- Handle<JSArray> last_match_info, |
- FixedArrayBuilder* builder) { |
- ASSERT(subject->IsFlat()); |
- ASSERT_EQ(1, pattern->length()); |
- uc16 pattern_char = pattern->Get(0); |
- // Treating position before first as initial "previous match position". |
- int match_pos = -1; |
- |
- for (;;) { // Break when search complete. |
- builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); |
- AssertNoAllocation no_gc; |
- if (subject->IsAsciiRepresentation()) { |
- if (pattern_char > String::kMaxAsciiCharCode) { |
- break; |
- } |
- Vector<const char> subject_vector = subject->ToAsciiVector(); |
- char pattern_ascii_char = static_cast<char>(pattern_char); |
- bool complete = SearchCharMultiple<const char>(subject_vector, |
- *pattern, |
- pattern_ascii_char, |
- builder, |
- &match_pos); |
- if (complete) break; |
- } else { |
- Vector<const uc16> subject_vector = subject->ToUC16Vector(); |
- bool complete = SearchCharMultiple<const uc16>(subject_vector, |
- *pattern, |
- pattern_char, |
- builder, |
- &match_pos); |
- if (complete) break; |
- } |
- } |
- |
- if (match_pos >= 0) { |
- SetLastMatchInfoNoCaptures(subject, |
- last_match_info, |
- match_pos, |
- match_pos + 1); |
- return true; |
- } |
- return false; // No matches at all. |
-} |
- |
- |
template <typename schar, typename pchar> |
static bool SearchStringMultiple(Vector<schar> subject, |
String* pattern, |
@@ -3526,7 +3350,6 @@ |
FixedArrayBuilder* builder) { |
ASSERT(subject->IsFlat()); |
ASSERT(pattern->IsFlat()); |
- ASSERT(pattern->length() > 1); |
// Treating as if a previous match was before first character. |
int match_pos = -pattern->length(); |
@@ -3784,14 +3607,6 @@ |
if (regexp->TypeTag() == JSRegExp::ATOM) { |
Handle<String> pattern( |
String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex))); |
- int pattern_length = pattern->length(); |
- if (pattern_length == 1) { |
- if (SearchCharMultiple(subject, pattern, last_match_info, &builder)) { |
- return *builder.ToJSArray(result_array); |
- } |
- return Heap::null_value(); |
- } |
- |
if (!pattern->IsFlat()) FlattenString(pattern); |
if (SearchStringMultiple(subject, pattern, last_match_info, &builder)) { |
return *builder.ToJSArray(result_array); |
@@ -5392,24 +5207,7 @@ |
} |
} |
-template <typename schar> |
-inline void FindCharIndices(Vector<const schar> subject, |
- const schar pattern_char, |
- ZoneList<int>* indices, |
- unsigned int limit) { |
- // Collect indices of pattern_char in subject, and the end-of-string index. |
- // Stop after finding at most limit values. |
- int index = 0; |
- while (limit > 0) { |
- index = SingleCharIndexOf(subject, pattern_char, index); |
- if (index < 0) return; |
- indices->Add(index); |
- index++; |
- limit--; |
- } |
-} |
- |
static Object* Runtime_StringSplit(Arguments args) { |
ASSERT(args.length() == 3); |
HandleScope handle_scope; |
@@ -5434,49 +5232,33 @@ |
// Find (up to limit) indices of separator and end-of-string in subject |
int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); |
ZoneList<int> indices(initial_capacity); |
- if (pattern_length == 1) { |
- // Special case, go directly to fast single-character split. |
- AssertNoAllocation nogc; |
- uc16 pattern_char = pattern->Get(0); |
- if (subject->IsTwoByteRepresentation()) { |
- FindCharIndices(subject->ToUC16Vector(), pattern_char, |
- &indices, |
- limit); |
- } else if (pattern_char <= String::kMaxAsciiCharCode) { |
- FindCharIndices(subject->ToAsciiVector(), |
- static_cast<char>(pattern_char), |
- &indices, |
- limit); |
+ if (!pattern->IsFlat()) FlattenString(pattern); |
+ AssertNoAllocation nogc; |
+ if (subject->IsAsciiRepresentation()) { |
+ Vector<const char> subject_vector = subject->ToAsciiVector(); |
+ if (pattern->IsAsciiRepresentation()) { |
+ FindStringIndices(subject_vector, |
+ pattern->ToAsciiVector(), |
+ &indices, |
+ limit); |
+ } else { |
+ FindStringIndices(subject_vector, |
+ pattern->ToUC16Vector(), |
+ &indices, |
+ limit); |
} |
} else { |
- if (!pattern->IsFlat()) FlattenString(pattern); |
- AssertNoAllocation nogc; |
- if (subject->IsAsciiRepresentation()) { |
- Vector<const char> subject_vector = subject->ToAsciiVector(); |
- if (pattern->IsAsciiRepresentation()) { |
- FindStringIndices(subject_vector, |
- pattern->ToAsciiVector(), |
- &indices, |
- limit); |
- } else { |
- FindStringIndices(subject_vector, |
- pattern->ToUC16Vector(), |
- &indices, |
- limit); |
- } |
+ Vector<const uc16> subject_vector = subject->ToUC16Vector(); |
+ if (pattern->IsAsciiRepresentation()) { |
+ FindStringIndices(subject_vector, |
+ pattern->ToAsciiVector(), |
+ &indices, |
+ limit); |
} else { |
- Vector<const uc16> subject_vector = subject->ToUC16Vector(); |
- if (pattern->IsAsciiRepresentation()) { |
- FindStringIndices(subject_vector, |
- pattern->ToAsciiVector(), |
- &indices, |
- limit); |
- } else { |
- FindStringIndices(subject_vector, |
- pattern->ToUC16Vector(), |
- &indices, |
- limit); |
- } |
+ FindStringIndices(subject_vector, |
+ pattern->ToUC16Vector(), |
+ &indices, |
+ limit); |
} |
} |
if (static_cast<uint32_t>(indices.length()) < limit) { |