OLD | NEW |
---|---|
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 2118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2129 int good_suffix_shift_[kBMMaxShift + 1]; | 2129 int good_suffix_shift_[kBMMaxShift + 1]; |
2130 int* biased_suffixes_; | 2130 int* biased_suffixes_; |
2131 int* biased_good_suffix_shift_; | 2131 int* biased_good_suffix_shift_; |
2132 DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers); | 2132 DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers); |
2133 }; | 2133 }; |
2134 | 2134 |
2135 // buffers reused by BoyerMoore | 2135 // buffers reused by BoyerMoore |
2136 static int bad_char_occurrence[kBMAlphabetSize]; | 2136 static int bad_char_occurrence[kBMAlphabetSize]; |
2137 static BMGoodSuffixBuffers bmgs_buffers; | 2137 static BMGoodSuffixBuffers bmgs_buffers; |
2138 | 2138 |
2139 // State of the string match tables. | |
2140 // SIMPLE: No usable content in the buffers. | |
2141 // BOYER_MOORE_HORSPOOL: The bad_char_occurences table has been populated. | |
2142 // BOYER_MOORE: The bmgs_buffers tables have also been populated. | |
2143 // Whenever starting with a new needle, one should call InitializeStringSearch | |
2144 // to determine which search strategy to use, and in the case of a long-needle | |
2145 // strategy, the call also initializes the algorithm to SIMPLE. | |
2146 enum StringSearchAlgorithm { SIMPLE_SEARCH, BOYER_MOORE_HORSPOOL, BOYER_MOORE }; | |
2147 static StringSearchAlgorithm algorithm; | |
2148 | |
2149 | |
2139 // Compute the bad-char table for Boyer-Moore in the static buffer. | 2150 // Compute the bad-char table for Boyer-Moore in the static buffer. |
2140 template <typename pchar> | 2151 template <typename pchar> |
2141 static void BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern, | 2152 static void BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern) { |
2142 int start) { | 2153 // Only preprocess at most kBMMaxShift last characters of pattern. |
2154 int start = pattern.length() < kBMMaxShift ? 0 | |
2155 : pattern.length() - kBMMaxShift; | |
2143 // Run forwards to populate bad_char_table, so that *last* instance | 2156 // Run forwards to populate bad_char_table, so that *last* instance |
2144 // of character equivalence class is the one registered. | 2157 // of character equivalence class is the one registered. |
2145 // Notice: Doesn't include the last character. | 2158 // Notice: Doesn't include the last character. |
2146 int table_size = (sizeof(pchar) == 1) ? String::kMaxAsciiCharCode + 1 | 2159 int table_size = (sizeof(pchar) == 1) ? String::kMaxAsciiCharCode + 1 |
2147 : kBMAlphabetSize; | 2160 : kBMAlphabetSize; |
2148 if (start == 0) { // All patterns less than kBMMaxShift in length. | 2161 if (start == 0) { // All patterns less than kBMMaxShift in length. |
2149 memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence)); | 2162 memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence)); |
2150 } else { | 2163 } else { |
2151 for (int i = 0; i < table_size; i++) { | 2164 for (int i = 0; i < table_size; i++) { |
2152 bad_char_occurrence[i] = start - 1; | 2165 bad_char_occurrence[i] = start - 1; |
2153 } | 2166 } |
2154 } | 2167 } |
2155 for (int i = start; i < pattern.length() - 1; i++) { | 2168 for (int i = start; i < pattern.length() - 1; i++) { |
2156 pchar c = pattern[i]; | 2169 pchar c = pattern[i]; |
2157 int bucket = (sizeof(pchar) ==1) ? c : c % kBMAlphabetSize; | 2170 int bucket = (sizeof(pchar) ==1) ? c : c % kBMAlphabetSize; |
2158 bad_char_occurrence[bucket] = i; | 2171 bad_char_occurrence[bucket] = i; |
2159 } | 2172 } |
2160 } | 2173 } |
2161 | 2174 |
2175 | |
2162 template <typename pchar> | 2176 template <typename pchar> |
2163 static void BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern, | 2177 static void BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern) { |
2164 int start) { | |
2165 int m = pattern.length(); | 2178 int m = pattern.length(); |
2179 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; | |
2166 int len = m - start; | 2180 int len = m - start; |
2167 // Compute Good Suffix tables. | 2181 // Compute Good Suffix tables. |
2168 bmgs_buffers.init(m); | 2182 bmgs_buffers.init(m); |
2169 | 2183 |
2170 bmgs_buffers.shift(m-1) = 1; | 2184 bmgs_buffers.shift(m-1) = 1; |
2171 bmgs_buffers.suffix(m) = m + 1; | 2185 bmgs_buffers.suffix(m) = m + 1; |
2172 pchar last_char = pattern[m - 1]; | 2186 pchar last_char = pattern[m - 1]; |
2173 int suffix = m + 1; | 2187 int suffix = m + 1; |
2174 for (int i = m; i > start;) { | 2188 for (int i = m; i > start;) { |
2175 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) { | 2189 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) { |
(...skipping 26 matching lines...) Expand all Loading... | |
2202 if (bmgs_buffers.shift(i) == len) { | 2216 if (bmgs_buffers.shift(i) == len) { |
2203 bmgs_buffers.shift(i) = suffix - start; | 2217 bmgs_buffers.shift(i) = suffix - start; |
2204 } | 2218 } |
2205 if (i == suffix) { | 2219 if (i == suffix) { |
2206 suffix = bmgs_buffers.suffix(suffix); | 2220 suffix = bmgs_buffers.suffix(suffix); |
2207 } | 2221 } |
2208 } | 2222 } |
2209 } | 2223 } |
2210 } | 2224 } |
2211 | 2225 |
2226 | |
2212 template <typename schar, typename pchar> | 2227 template <typename schar, typename pchar> |
2213 static inline int CharOccurrence(int char_code) { | 2228 static inline int CharOccurrence(int char_code) { |
2214 if (sizeof(schar) == 1) { | 2229 if (sizeof(schar) == 1) { |
2215 return bad_char_occurrence[char_code]; | 2230 return bad_char_occurrence[char_code]; |
2216 } | 2231 } |
2217 if (sizeof(pchar) == 1) { | 2232 if (sizeof(pchar) == 1) { |
2218 if (char_code > String::kMaxAsciiCharCode) { | 2233 if (char_code > String::kMaxAsciiCharCode) { |
2219 return -1; | 2234 return -1; |
2220 } | 2235 } |
2221 return bad_char_occurrence[char_code]; | 2236 return bad_char_occurrence[char_code]; |
2222 } | 2237 } |
2223 return bad_char_occurrence[char_code % kBMAlphabetSize]; | 2238 return bad_char_occurrence[char_code % kBMAlphabetSize]; |
2224 } | 2239 } |
2225 | 2240 |
2241 | |
2226 // Restricted simplified Boyer-Moore string matching. | 2242 // Restricted simplified Boyer-Moore string matching. |
2227 // Uses only the bad-shift table of Boyer-Moore and only uses it | 2243 // Uses only the bad-shift table of Boyer-Moore and only uses it |
2228 // for the character compared to the last character of the needle. | 2244 // for the character compared to the last character of the needle. |
2229 template <typename schar, typename pchar> | 2245 template <typename schar, typename pchar> |
2230 static int BoyerMooreHorspool(Vector<const schar> subject, | 2246 static int BoyerMooreHorspool(Vector<const schar> subject, |
2231 Vector<const pchar> pattern, | 2247 Vector<const pchar> pattern, |
2232 int start_index, | 2248 int start_index, |
2233 bool* complete) { | 2249 bool* complete) { |
2250 ASSERT(algorithm <= BOYER_MOORE_HORSPOOL); | |
2234 int n = subject.length(); | 2251 int n = subject.length(); |
2235 int m = pattern.length(); | 2252 int m = pattern.length(); |
2236 // Only preprocess at most kBMMaxShift last characters of pattern. | |
2237 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; | |
2238 | 2253 |
2239 BoyerMoorePopulateBadCharTable(pattern, start); | 2254 int badness = -m; |
2240 | 2255 |
2241 int badness = -m; // How bad we are doing without a good-suffix table. | 2256 // How bad we are doing without a good-suffix table. |
2242 int idx; // No matches found prior to this index. | 2257 int idx; // No matches found prior to this index. |
2243 pchar last_char = pattern[m - 1]; | 2258 pchar last_char = pattern[m - 1]; |
2244 int last_char_shift = m - 1 - CharOccurrence<schar, pchar>(last_char); | 2259 int last_char_shift = m - 1 - CharOccurrence<schar, pchar>(last_char); |
2245 // Perform search | 2260 // Perform search |
2246 for (idx = start_index; idx <= n - m;) { | 2261 for (idx = start_index; idx <= n - m;) { |
2247 int j = m - 1; | 2262 int j = m - 1; |
2248 int c; | 2263 int c; |
2249 while (last_char != (c = subject[idx + j])) { | 2264 while (last_char != (c = subject[idx + j])) { |
2250 int bc_occ = CharOccurrence<schar, pchar>(c); | 2265 int bc_occ = CharOccurrence<schar, pchar>(c); |
2251 int shift = j - bc_occ; | 2266 int shift = j - bc_occ; |
2252 idx += shift; | 2267 idx += shift; |
2253 badness += 1 - shift; // at most zero, so badness cannot increase. | 2268 badness += 1 - shift; // at most zero, so badness cannot increase. |
2254 if (idx > n - m) { | 2269 if (idx > n - m) { |
2270 // If we could just break out of the for loop here. | |
Erik Corry
2010/03/15 12:57:48
What would then happen?
| |
2255 *complete = true; | 2271 *complete = true; |
2256 return -1; | 2272 return -1; |
2257 } | 2273 } |
2258 } | 2274 } |
2259 j--; | 2275 j--; |
2260 while (j >= 0 && pattern[j] == (subject[idx + j])) j--; | 2276 while (j >= 0 && pattern[j] == (subject[idx + j])) j--; |
2261 if (j < 0) { | 2277 if (j < 0) { |
2262 *complete = true; | 2278 *complete = true; |
2263 return idx; | 2279 return idx; |
2264 } else { | 2280 } else { |
(...skipping 11 matching lines...) Expand all Loading... | |
2276 } | 2292 } |
2277 *complete = true; | 2293 *complete = true; |
2278 return -1; | 2294 return -1; |
2279 } | 2295 } |
2280 | 2296 |
2281 | 2297 |
2282 template <typename schar, typename pchar> | 2298 template <typename schar, typename pchar> |
2283 static int BoyerMooreIndexOf(Vector<const schar> subject, | 2299 static int BoyerMooreIndexOf(Vector<const schar> subject, |
2284 Vector<const pchar> pattern, | 2300 Vector<const pchar> pattern, |
2285 int idx) { | 2301 int idx) { |
2302 ASSERT(algorithm <= BOYER_MOORE); | |
2286 int n = subject.length(); | 2303 int n = subject.length(); |
2287 int m = pattern.length(); | 2304 int m = pattern.length(); |
2288 // Only preprocess at most kBMMaxShift last characters of pattern. | 2305 // Only preprocess at most kBMMaxShift last characters of pattern. |
2289 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; | 2306 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; |
2290 | 2307 |
2291 // Build the Good Suffix table and continue searching. | |
2292 BoyerMoorePopulateGoodSuffixTable(pattern, start); | |
2293 pchar last_char = pattern[m - 1]; | 2308 pchar last_char = pattern[m - 1]; |
2294 // Continue search from i. | 2309 // Continue search from i. |
2295 while (idx <= n - m) { | 2310 while (idx <= n - m) { |
2296 int j = m - 1; | 2311 int j = m - 1; |
2297 schar c; | 2312 schar c; |
2298 while (last_char != (c = subject[idx + j])) { | 2313 while (last_char != (c = subject[idx + j])) { |
2299 int shift = j - CharOccurrence<schar, pchar>(c); | 2314 int shift = j - CharOccurrence<schar, pchar>(c); |
2300 idx += shift; | 2315 idx += shift; |
2301 if (idx > n - m) { | 2316 if (idx > n - m) { |
2302 return -1; | 2317 return -1; |
(...skipping 15 matching lines...) Expand all Loading... | |
2318 } | 2333 } |
2319 idx += shift; | 2334 idx += shift; |
2320 } | 2335 } |
2321 } | 2336 } |
2322 | 2337 |
2323 return -1; | 2338 return -1; |
2324 } | 2339 } |
2325 | 2340 |
2326 | 2341 |
2327 template <typename schar> | 2342 template <typename schar> |
2328 static int SingleCharIndexOf(Vector<const schar> string, | 2343 static inline int SingleCharIndexOf(Vector<const schar> string, |
2329 schar pattern_char, | 2344 schar pattern_char, |
2330 int start_index) { | 2345 int start_index) { |
2331 for (int i = start_index, n = string.length(); i < n; i++) { | 2346 for (int i = start_index, n = string.length(); i < n; i++) { |
2332 if (pattern_char == string[i]) { | 2347 if (pattern_char == string[i]) { |
2333 return i; | 2348 return i; |
2334 } | 2349 } |
2335 } | 2350 } |
2336 return -1; | 2351 return -1; |
2337 } | 2352 } |
2338 | 2353 |
2339 | 2354 |
2340 template <typename schar> | 2355 template <typename schar> |
(...skipping 16 matching lines...) Expand all Loading... | |
2357 // further checking should start, i.e., it's guaranteed that the pattern | 2372 // further checking should start, i.e., it's guaranteed that the pattern |
2358 // does not occur at a position prior to the returned index. | 2373 // does not occur at a position prior to the returned index. |
2359 template <typename pchar, typename schar> | 2374 template <typename pchar, typename schar> |
2360 static int SimpleIndexOf(Vector<const schar> subject, | 2375 static int SimpleIndexOf(Vector<const schar> subject, |
2361 Vector<const pchar> pattern, | 2376 Vector<const pchar> pattern, |
2362 int idx, | 2377 int idx, |
2363 bool* complete) { | 2378 bool* complete) { |
2364 // Badness is a count of how much work we have done. When we have | 2379 // Badness is a count of how much work we have done. When we have |
2365 // done enough work we decide it's probably worth switching to a better | 2380 // done enough work we decide it's probably worth switching to a better |
2366 // algorithm. | 2381 // algorithm. |
2367 int badness = -10 - (pattern.length() << 2); | 2382 |
2368 // We know our pattern is at least 2 characters, we cache the first so | 2383 // We know our pattern is at least 2 characters, we cache the first so |
2369 // the common case of the first character not matching is faster. | 2384 // the common case of the first character not matching is faster. |
2370 pchar pattern_first_char = pattern[0]; | 2385 pchar pattern_first_char = pattern[0]; |
2371 | 2386 int badness = -10 - pattern.length() << 2; |
2372 for (int i = idx, n = subject.length() - pattern.length(); i <= n; i++) { | 2387 for (int i = idx, n = subject.length() - pattern.length(); i <= n; i++) { |
2373 badness++; | 2388 badness++; |
2374 if (badness > 0) { | 2389 if (badness > 0) { |
2375 *complete = false; | 2390 *complete = false; |
2376 return i; | 2391 return i; |
2377 } | 2392 } |
2378 if (subject[i] != pattern_first_char) continue; | 2393 if (subject[i] != pattern_first_char) continue; |
2379 int j = 1; | 2394 int j = 1; |
2380 do { | 2395 do { |
2381 if (pattern[j] != subject[i+j]) { | 2396 if (pattern[j] != subject[i+j]) { |
(...skipping 27 matching lines...) Expand all Loading... | |
2409 j++; | 2424 j++; |
2410 } while (j < pattern.length()); | 2425 } while (j < pattern.length()); |
2411 if (j == pattern.length()) { | 2426 if (j == pattern.length()) { |
2412 return i; | 2427 return i; |
2413 } | 2428 } |
2414 } | 2429 } |
2415 return -1; | 2430 return -1; |
2416 } | 2431 } |
2417 | 2432 |
2418 | 2433 |
2419 // Dispatch to different algorithms. | 2434 // Strategy for searching for a string in another string. |
2420 template <typename schar, typename pchar> | 2435 enum StringSearchStrategy { SEARCH_FAIL, SEARCH_SHORT, SEARCH_LONG }; |
2421 static int StringMatchStrategy(Vector<const schar> sub, | 2436 |
2422 Vector<const pchar> pat, | 2437 |
2423 int start_index) { | 2438 template <typename pchar> |
2439 static inline StringSearchStrategy InitializeStringSearch( | |
2440 Vector<const pchar> pat, bool ascii_subject) { | |
2424 ASSERT(pat.length() > 1); | 2441 ASSERT(pat.length() > 1); |
2425 | |
2426 // We have an ASCII haystack and a non-ASCII needle. Check if there | 2442 // We have an ASCII haystack and a non-ASCII needle. Check if there |
2427 // really is a non-ASCII character in the needle and bail out if there | 2443 // really is a non-ASCII character in the needle and bail out if there |
2428 // is. | 2444 // is. |
2429 if (sizeof(schar) == 1 && sizeof(pchar) > 1) { | 2445 if (ascii_subject && sizeof(pchar) > 1) { |
2430 for (int i = 0; i < pat.length(); i++) { | 2446 for (int i = 0; i < pat.length(); i++) { |
2431 uc16 c = pat[i]; | 2447 uc16 c = pat[i]; |
2432 if (c > String::kMaxAsciiCharCode) { | 2448 if (c > String::kMaxAsciiCharCode) { |
2433 return -1; | 2449 return SEARCH_FAIL; |
2434 } | 2450 } |
2435 } | 2451 } |
2436 } | 2452 } |
2437 if (pat.length() < kBMMinPatternLength) { | 2453 if (pat.length() < kBMMinPatternLength) { |
2438 // We don't believe fancy searching can ever be more efficient. | 2454 return SEARCH_SHORT; |
2439 // The max shift of Boyer-Moore on a pattern of this length does | |
2440 // not compensate for the overhead. | |
2441 return SimpleIndexOf(sub, pat, start_index); | |
2442 } | 2455 } |
2456 algorithm = SIMPLE_SEARCH; | |
2457 return SEARCH_LONG; | |
2458 } | |
2459 | |
2460 | |
2461 // Dispatch long needle searches to different algorithms. | |
2462 template <typename schar, typename pchar> | |
2463 static int ComplexIndexOf(Vector<const schar> sub, | |
2464 Vector<const pchar> pat, | |
2465 int start_index) { | |
2466 ASSERT(pat.length() >= kBMMinPatternLength); | |
2443 // Try algorithms in order of increasing setup cost and expected performance. | 2467 // Try algorithms in order of increasing setup cost and expected performance. |
2444 bool complete; | 2468 bool complete; |
2445 int idx = SimpleIndexOf(sub, pat, start_index, &complete); | 2469 int idx = start_index; |
2446 if (complete) return idx; | 2470 switch (algorithm) { |
2447 idx = BoyerMooreHorspool(sub, pat, idx, &complete); | 2471 case SIMPLE_SEARCH: |
2448 if (complete) return idx; | 2472 idx = SimpleIndexOf(sub, pat, idx, &complete); |
2449 return BoyerMooreIndexOf(sub, pat, idx); | 2473 if (complete) return idx; |
2474 BoyerMoorePopulateBadCharTable(pat); | |
2475 algorithm = BOYER_MOORE_HORSPOOL; | |
2476 // FALLTHROUGH. | |
Erik Corry
2010/03/15 12:57:48
Does the linter insist on this annoying capitaliza
| |
2477 case BOYER_MOORE_HORSPOOL: | |
2478 idx = BoyerMooreHorspool(sub, pat, idx, &complete); | |
2479 if (complete) return idx; | |
2480 // Build the Good Suffix table and continue searching. | |
2481 BoyerMoorePopulateGoodSuffixTable(pat); | |
2482 algorithm = BOYER_MOORE; | |
2483 // FALLTHROUGH. | |
2484 case BOYER_MOORE: | |
2485 return BoyerMooreIndexOf(sub, pat, idx); | |
2486 } | |
2487 UNREACHABLE(); | |
2488 return -1; | |
2450 } | 2489 } |
2451 | 2490 |
2491 | |
2492 // Dispatch to different search strategies for a single search. | |
2493 // If searching multiple times on the same needle, the search | |
2494 // strategy should only be computed once and then dispatch to different | |
2495 // loops. | |
2496 template <typename schar, typename pchar> | |
2497 static int StringSearch(Vector<const schar> sub, | |
2498 Vector<const pchar> pat, | |
2499 int start_index) { | |
2500 bool ascii_subject = (sizeof(schar) == 1); | |
2501 StringSearchStrategy strategy = InitializeStringSearch(pat, ascii_subject); | |
2502 switch (strategy) { | |
2503 case SEARCH_FAIL: return -1; | |
2504 case SEARCH_SHORT: return SimpleIndexOf(sub, pat, start_index); | |
2505 case SEARCH_LONG: return ComplexIndexOf(sub, pat, start_index); | |
2506 } | |
2507 UNREACHABLE(); | |
2508 return -1; | |
2509 } | |
2510 | |
2511 | |
2452 // Perform string match of pattern on subject, starting at start index. | 2512 // Perform string match of pattern on subject, starting at start index. |
2453 // Caller must ensure that 0 <= start_index <= sub->length(), | 2513 // Caller must ensure that 0 <= start_index <= sub->length(), |
2454 // and should check that pat->length() + start_index <= sub->length() | 2514 // and should check that pat->length() + start_index <= sub->length() |
2455 int Runtime::StringMatch(Handle<String> sub, | 2515 int Runtime::StringMatch(Handle<String> sub, |
2456 Handle<String> pat, | 2516 Handle<String> pat, |
2457 int start_index) { | 2517 int start_index) { |
2458 ASSERT(0 <= start_index); | 2518 ASSERT(0 <= start_index); |
2459 ASSERT(start_index <= sub->length()); | 2519 ASSERT(start_index <= sub->length()); |
2460 | 2520 |
2461 int pattern_length = pat->length(); | 2521 int pattern_length = pat->length(); |
2462 if (pattern_length == 0) return start_index; | 2522 if (pattern_length == 0) return start_index; |
2463 | 2523 |
2464 int subject_length = sub->length(); | 2524 int subject_length = sub->length(); |
2465 if (start_index + pattern_length > subject_length) return -1; | 2525 if (start_index + pattern_length > subject_length) return -1; |
2466 | 2526 |
2467 if (!sub->IsFlat()) { | 2527 if (!sub->IsFlat()) { |
2468 FlattenString(sub); | 2528 FlattenString(sub); |
2469 } | 2529 } |
2530 | |
2470 // Searching for one specific character is common. For one | 2531 // Searching for one specific character is common. For one |
2471 // character patterns linear search is necessary, so any smart | 2532 // character patterns linear search is necessary, so any smart |
2472 // algorithm is unnecessary overhead. | 2533 // algorithm is unnecessary overhead. |
2473 if (pattern_length == 1) { | 2534 if (pattern_length == 1) { |
2474 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 2535 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
2475 if (sub->IsAsciiRepresentation()) { | 2536 if (sub->IsAsciiRepresentation()) { |
2476 uc16 pchar = pat->Get(0); | 2537 uc16 pchar = pat->Get(0); |
2477 if (pchar > String::kMaxAsciiCharCode) { | 2538 if (pchar > String::kMaxAsciiCharCode) { |
2478 return -1; | 2539 return -1; |
2479 } | 2540 } |
(...skipping 13 matching lines...) Expand all Loading... | |
2493 | 2554 |
2494 if (!pat->IsFlat()) { | 2555 if (!pat->IsFlat()) { |
2495 FlattenString(pat); | 2556 FlattenString(pat); |
2496 } | 2557 } |
2497 | 2558 |
2498 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 2559 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
2499 // dispatch on type of strings | 2560 // dispatch on type of strings |
2500 if (pat->IsAsciiRepresentation()) { | 2561 if (pat->IsAsciiRepresentation()) { |
2501 Vector<const char> pat_vector = pat->ToAsciiVector(); | 2562 Vector<const char> pat_vector = pat->ToAsciiVector(); |
2502 if (sub->IsAsciiRepresentation()) { | 2563 if (sub->IsAsciiRepresentation()) { |
2503 return StringMatchStrategy(sub->ToAsciiVector(), pat_vector, start_index); | 2564 return StringSearch(sub->ToAsciiVector(), pat_vector, start_index); |
2504 } | 2565 } |
2505 return StringMatchStrategy(sub->ToUC16Vector(), pat_vector, start_index); | 2566 return StringSearch(sub->ToUC16Vector(), pat_vector, start_index); |
2506 } | 2567 } |
2507 Vector<const uc16> pat_vector = pat->ToUC16Vector(); | 2568 Vector<const uc16> pat_vector = pat->ToUC16Vector(); |
2508 if (sub->IsAsciiRepresentation()) { | 2569 if (sub->IsAsciiRepresentation()) { |
2509 return StringMatchStrategy(sub->ToAsciiVector(), pat_vector, start_index); | 2570 return StringSearch(sub->ToAsciiVector(), pat_vector, start_index); |
2510 } | 2571 } |
2511 return StringMatchStrategy(sub->ToUC16Vector(), pat_vector, start_index); | 2572 return StringSearch(sub->ToUC16Vector(), pat_vector, start_index); |
2512 } | 2573 } |
2513 | 2574 |
2514 | 2575 |
2515 static Object* Runtime_StringIndexOf(Arguments args) { | 2576 static Object* Runtime_StringIndexOf(Arguments args) { |
2516 HandleScope scope; // create a new handle scope | 2577 HandleScope scope; // create a new handle scope |
2517 ASSERT(args.length() == 3); | 2578 ASSERT(args.length() == 3); |
2518 | 2579 |
2519 CONVERT_ARG_CHECKED(String, sub, 0); | 2580 CONVERT_ARG_CHECKED(String, sub, 0); |
2520 CONVERT_ARG_CHECKED(String, pat, 1); | 2581 CONVERT_ARG_CHECKED(String, pat, 1); |
2521 | 2582 |
(...skipping 1734 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4256 int right = length; | 4317 int right = length; |
4257 if (trimRight) { | 4318 if (trimRight) { |
4258 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) { | 4319 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) { |
4259 right--; | 4320 right--; |
4260 } | 4321 } |
4261 } | 4322 } |
4262 return s->SubString(left, right); | 4323 return s->SubString(left, right); |
4263 } | 4324 } |
4264 | 4325 |
4265 | 4326 |
4327 template <typename schar, typename pchar> | |
4328 void FindStringIndices(Vector<const schar> subject, | |
4329 Vector<const pchar> pattern, | |
4330 ZoneList<int>* indices, | |
4331 unsigned int limit) { | |
4332 ASSERT(limit > 0); | |
4333 // Collect indices of pattern in subject, and the end-of-string index. | |
4334 // Stop after finding at most limit values. | |
4335 StringSearchStrategy strategy = | |
4336 InitializeStringSearch(pattern, sizeof(schar) == 1); | |
4337 switch (strategy) { | |
4338 case SEARCH_FAIL: return; | |
4339 case SEARCH_SHORT: { | |
4340 int pattern_length = pattern.length(); | |
4341 int index = 0; | |
4342 while (limit > 0) { | |
4343 index = SimpleIndexOf(subject, pattern, index); | |
4344 if (index < 0) return; | |
4345 indices->Add(index); | |
4346 index += pattern_length; | |
4347 limit--; | |
4348 } | |
4349 return; | |
4350 } | |
4351 case SEARCH_LONG: { | |
4352 int pattern_length = pattern.length(); | |
4353 int index = 0; | |
4354 while (limit > 0) { | |
4355 index = ComplexIndexOf(subject, pattern, index); | |
4356 if (index < 0) return; | |
4357 indices->Add(index); | |
4358 index += pattern_length; | |
4359 limit--; | |
4360 } | |
4361 return; | |
4362 } | |
4363 default: | |
4364 UNREACHABLE(); | |
4365 return; | |
4366 } | |
4367 } | |
4368 | |
4369 template <typename schar> | |
4370 void inline FindCharIndices(Vector<const schar> subject, | |
4371 const schar pattern_char, | |
4372 ZoneList<int>* indices, | |
4373 unsigned int limit) { | |
4374 // Collect indices of pattern_char in subject, and the end-of-string index. | |
4375 // Stop after finding at most limit values. | |
4376 int index = 0; | |
4377 while (limit > 0) { | |
4378 index = SingleCharIndexOf(subject, pattern_char, index); | |
4379 if (index < 0) return; | |
4380 indices->Add(index); | |
4381 index++; | |
4382 limit--; | |
4383 } | |
4384 } | |
4385 | |
4386 | |
4387 static Object* Runtime_StringSplit(Arguments args) { | |
4388 ASSERT(args.length() == 3); | |
4389 HandleScope handle_scope; | |
4390 CONVERT_ARG_CHECKED(String, subject, 0); | |
4391 CONVERT_ARG_CHECKED(String, pattern, 1); | |
4392 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); | |
4393 | |
4394 int subject_length = subject->length(); | |
4395 int pattern_length = pattern->length(); | |
4396 | |
4397 if (!subject->IsFlat()) FlattenString(subject); | |
4398 | |
4399 static const int kMaxInitialListCapacity = 16; | |
4400 | |
4401 ZoneScope scope(DELETE_ON_EXIT); | |
4402 | |
4403 // Find (up to limit) indices of separator and end-of-string in subject | |
4404 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); | |
4405 ZoneList<int> indices(initial_capacity); | |
4406 if (pattern_length == 1) { | |
4407 // Special case, go directly to fast single-character split. | |
4408 AssertNoAllocation nogc; | |
4409 uc16 pattern_char = pattern->Get(0); | |
4410 if (subject->IsTwoByteRepresentation()) { | |
4411 FindCharIndices(subject->ToUC16Vector(), pattern_char, | |
4412 &indices, | |
4413 limit); | |
4414 } else if (pattern_char <= String::kMaxAsciiCharCode) { | |
4415 FindCharIndices(subject->ToAsciiVector(), | |
4416 static_cast<char>(pattern_char), | |
4417 &indices, | |
4418 limit); | |
4419 } | |
4420 } else { | |
4421 if (!pattern->IsFlat()) FlattenString(pattern); | |
4422 AssertNoAllocation nogc; | |
4423 if (subject->IsAsciiRepresentation()) { | |
4424 Vector<const char> subject_vector = subject->ToAsciiVector(); | |
4425 if (pattern->IsAsciiRepresentation()) { | |
4426 FindStringIndices(subject_vector, | |
4427 pattern->ToAsciiVector(), | |
4428 &indices, | |
4429 limit); | |
4430 } else { | |
4431 FindStringIndices(subject_vector, | |
4432 pattern->ToUC16Vector(), | |
4433 &indices, | |
4434 limit); | |
4435 } | |
4436 } else { | |
4437 Vector<const uc16> subject_vector = subject->ToUC16Vector(); | |
4438 if (pattern->IsAsciiRepresentation()) { | |
4439 FindStringIndices(subject_vector, | |
4440 pattern->ToAsciiVector(), | |
4441 &indices, | |
4442 limit); | |
4443 } else { | |
4444 FindStringIndices(subject_vector, | |
4445 pattern->ToUC16Vector(), | |
4446 &indices, | |
4447 limit); | |
4448 } | |
4449 } | |
4450 } | |
4451 if (static_cast<uint32_t>(indices.length()) < limit) { | |
4452 indices.Add(subject_length); | |
4453 } | |
4454 // The list indices now contains the end of each part to create. | |
4455 | |
4456 | |
4457 // Create JSArray of substrings separated by separator. | |
4458 int part_count = indices.length(); | |
4459 | |
4460 Handle<JSArray> result = Factory::NewJSArray(part_count); | |
4461 result->set_length(Smi::FromInt(part_count)); | |
4462 | |
4463 ASSERT(result->HasFastElements()); | |
4464 | |
4465 if (part_count == 1 && indices.at(0) == subject_length) { | |
4466 FixedArray::cast(result->elements())->set(0, *subject); | |
4467 return *result; | |
4468 } | |
4469 | |
4470 Handle<FixedArray> elements(FixedArray::cast(result->elements())); | |
4471 int part_start = 0; | |
4472 for (int i = 0; i < part_count; i++) { | |
4473 HandleScope local_loop_handle; | |
4474 int part_end = indices.at(i); | |
4475 Handle<String> substring = | |
4476 Factory::NewSubString(subject, part_start, part_end); | |
4477 elements->set(i, *substring); | |
4478 part_start = part_end + pattern_length; | |
4479 } | |
4480 | |
4481 return *result; | |
4482 } | |
4483 | |
4484 | |
4266 // Copies ascii characters to the given fixed array looking up | 4485 // Copies ascii characters to the given fixed array looking up |
4267 // one-char strings in the cache. Gives up on the first char that is | 4486 // one-char strings in the cache. Gives up on the first char that is |
4268 // not in the cache and fills the remainder with smi zeros. Returns | 4487 // not in the cache and fills the remainder with smi zeros. Returns |
4269 // the length of the successfully copied prefix. | 4488 // the length of the successfully copied prefix. |
4270 static int CopyCachedAsciiCharsToArray(const char* chars, | 4489 static int CopyCachedAsciiCharsToArray(const char* chars, |
4271 FixedArray* elements, | 4490 FixedArray* elements, |
4272 int length) { | 4491 int length) { |
4273 AssertNoAllocation nogc; | 4492 AssertNoAllocation nogc; |
4274 FixedArray* ascii_cache = Heap::single_character_string_cache(); | 4493 FixedArray* ascii_cache = Heap::single_character_string_cache(); |
4275 Object* undefined = Heap::undefined_value(); | 4494 Object* undefined = Heap::undefined_value(); |
(...skipping 4521 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
8797 } else { | 9016 } else { |
8798 // Handle last resort GC and make sure to allow future allocations | 9017 // Handle last resort GC and make sure to allow future allocations |
8799 // to grow the heap without causing GCs (if possible). | 9018 // to grow the heap without causing GCs (if possible). |
8800 Counters::gc_last_resort_from_js.Increment(); | 9019 Counters::gc_last_resort_from_js.Increment(); |
8801 Heap::CollectAllGarbage(false); | 9020 Heap::CollectAllGarbage(false); |
8802 } | 9021 } |
8803 } | 9022 } |
8804 | 9023 |
8805 | 9024 |
8806 } } // namespace v8::internal | 9025 } } // namespace v8::internal |
OLD | NEW |