Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 2118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2129 int good_suffix_shift_[kBMMaxShift + 1]; | 2129 int good_suffix_shift_[kBMMaxShift + 1]; |
| 2130 int* biased_suffixes_; | 2130 int* biased_suffixes_; |
| 2131 int* biased_good_suffix_shift_; | 2131 int* biased_good_suffix_shift_; |
| 2132 DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers); | 2132 DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers); |
| 2133 }; | 2133 }; |
| 2134 | 2134 |
| 2135 // buffers reused by BoyerMoore | 2135 // buffers reused by BoyerMoore |
| 2136 static int bad_char_occurrence[kBMAlphabetSize]; | 2136 static int bad_char_occurrence[kBMAlphabetSize]; |
| 2137 static BMGoodSuffixBuffers bmgs_buffers; | 2137 static BMGoodSuffixBuffers bmgs_buffers; |
| 2138 | 2138 |
| 2139 // State of the string match tables. | |
| 2140 // SIMPLE: No usable content in the buffers. | |
| 2141 // BOYER_MOORE_HORSPOOL: The bad_char_occurences table has been populated. | |
| 2142 // BOYER_MOORE: The bmgs_buffers tables have also been populated. | |
| 2143 // Whenever starting with a new needle, one should call InitializeStringSearch | |
| 2144 // to determine which search strategy to use, and in the case of a long-needle | |
| 2145 // strategy, the call also initializes the algorithm to SIMPLE. | |
| 2146 enum StringSearchAlgorithm { SIMPLE_SEARCH, BOYER_MOORE_HORSPOOL, BOYER_MOORE }; | |
| 2147 static StringSearchAlgorithm algorithm; | |
| 2148 | |
| 2149 | |
| 2139 // Compute the bad-char table for Boyer-Moore in the static buffer. | 2150 // Compute the bad-char table for Boyer-Moore in the static buffer. |
| 2140 template <typename pchar> | 2151 template <typename pchar> |
| 2141 static void BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern, | 2152 static void BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern) { |
| 2142 int start) { | 2153 // Only preprocess at most kBMMaxShift last characters of pattern. |
| 2154 int start = pattern.length() < kBMMaxShift ? 0 | |
| 2155 : pattern.length() - kBMMaxShift; | |
| 2143 // Run forwards to populate bad_char_table, so that *last* instance | 2156 // Run forwards to populate bad_char_table, so that *last* instance |
| 2144 // of character equivalence class is the one registered. | 2157 // of character equivalence class is the one registered. |
| 2145 // Notice: Doesn't include the last character. | 2158 // Notice: Doesn't include the last character. |
| 2146 int table_size = (sizeof(pchar) == 1) ? String::kMaxAsciiCharCode + 1 | 2159 int table_size = (sizeof(pchar) == 1) ? String::kMaxAsciiCharCode + 1 |
| 2147 : kBMAlphabetSize; | 2160 : kBMAlphabetSize; |
| 2148 if (start == 0) { // All patterns less than kBMMaxShift in length. | 2161 if (start == 0) { // All patterns less than kBMMaxShift in length. |
| 2149 memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence)); | 2162 memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence)); |
| 2150 } else { | 2163 } else { |
| 2151 for (int i = 0; i < table_size; i++) { | 2164 for (int i = 0; i < table_size; i++) { |
| 2152 bad_char_occurrence[i] = start - 1; | 2165 bad_char_occurrence[i] = start - 1; |
| 2153 } | 2166 } |
| 2154 } | 2167 } |
| 2155 for (int i = start; i < pattern.length() - 1; i++) { | 2168 for (int i = start; i < pattern.length() - 1; i++) { |
| 2156 pchar c = pattern[i]; | 2169 pchar c = pattern[i]; |
| 2157 int bucket = (sizeof(pchar) ==1) ? c : c % kBMAlphabetSize; | 2170 int bucket = (sizeof(pchar) ==1) ? c : c % kBMAlphabetSize; |
| 2158 bad_char_occurrence[bucket] = i; | 2171 bad_char_occurrence[bucket] = i; |
| 2159 } | 2172 } |
| 2160 } | 2173 } |
| 2161 | 2174 |
| 2175 | |
| 2162 template <typename pchar> | 2176 template <typename pchar> |
| 2163 static void BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern, | 2177 static void BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern) { |
| 2164 int start) { | |
| 2165 int m = pattern.length(); | 2178 int m = pattern.length(); |
| 2179 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; | |
| 2166 int len = m - start; | 2180 int len = m - start; |
| 2167 // Compute Good Suffix tables. | 2181 // Compute Good Suffix tables. |
| 2168 bmgs_buffers.init(m); | 2182 bmgs_buffers.init(m); |
| 2169 | 2183 |
| 2170 bmgs_buffers.shift(m-1) = 1; | 2184 bmgs_buffers.shift(m-1) = 1; |
| 2171 bmgs_buffers.suffix(m) = m + 1; | 2185 bmgs_buffers.suffix(m) = m + 1; |
| 2172 pchar last_char = pattern[m - 1]; | 2186 pchar last_char = pattern[m - 1]; |
| 2173 int suffix = m + 1; | 2187 int suffix = m + 1; |
| 2174 for (int i = m; i > start;) { | 2188 for (int i = m; i > start;) { |
| 2175 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) { | 2189 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) { |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 2202 if (bmgs_buffers.shift(i) == len) { | 2216 if (bmgs_buffers.shift(i) == len) { |
| 2203 bmgs_buffers.shift(i) = suffix - start; | 2217 bmgs_buffers.shift(i) = suffix - start; |
| 2204 } | 2218 } |
| 2205 if (i == suffix) { | 2219 if (i == suffix) { |
| 2206 suffix = bmgs_buffers.suffix(suffix); | 2220 suffix = bmgs_buffers.suffix(suffix); |
| 2207 } | 2221 } |
| 2208 } | 2222 } |
| 2209 } | 2223 } |
| 2210 } | 2224 } |
| 2211 | 2225 |
| 2226 | |
| 2212 template <typename schar, typename pchar> | 2227 template <typename schar, typename pchar> |
| 2213 static inline int CharOccurrence(int char_code) { | 2228 static inline int CharOccurrence(int char_code) { |
| 2214 if (sizeof(schar) == 1) { | 2229 if (sizeof(schar) == 1) { |
| 2215 return bad_char_occurrence[char_code]; | 2230 return bad_char_occurrence[char_code]; |
| 2216 } | 2231 } |
| 2217 if (sizeof(pchar) == 1) { | 2232 if (sizeof(pchar) == 1) { |
| 2218 if (char_code > String::kMaxAsciiCharCode) { | 2233 if (char_code > String::kMaxAsciiCharCode) { |
| 2219 return -1; | 2234 return -1; |
| 2220 } | 2235 } |
| 2221 return bad_char_occurrence[char_code]; | 2236 return bad_char_occurrence[char_code]; |
| 2222 } | 2237 } |
| 2223 return bad_char_occurrence[char_code % kBMAlphabetSize]; | 2238 return bad_char_occurrence[char_code % kBMAlphabetSize]; |
| 2224 } | 2239 } |
| 2225 | 2240 |
| 2241 | |
| 2226 // Restricted simplified Boyer-Moore string matching. | 2242 // Restricted simplified Boyer-Moore string matching. |
| 2227 // Uses only the bad-shift table of Boyer-Moore and only uses it | 2243 // Uses only the bad-shift table of Boyer-Moore and only uses it |
| 2228 // for the character compared to the last character of the needle. | 2244 // for the character compared to the last character of the needle. |
| 2229 template <typename schar, typename pchar> | 2245 template <typename schar, typename pchar> |
| 2230 static int BoyerMooreHorspool(Vector<const schar> subject, | 2246 static int BoyerMooreHorspool(Vector<const schar> subject, |
| 2231 Vector<const pchar> pattern, | 2247 Vector<const pchar> pattern, |
| 2232 int start_index, | 2248 int start_index, |
| 2233 bool* complete) { | 2249 bool* complete) { |
| 2250 ASSERT(algorithm <= BOYER_MOORE_HORSPOOL); | |
| 2234 int n = subject.length(); | 2251 int n = subject.length(); |
| 2235 int m = pattern.length(); | 2252 int m = pattern.length(); |
| 2236 // Only preprocess at most kBMMaxShift last characters of pattern. | |
| 2237 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; | |
| 2238 | 2253 |
| 2239 BoyerMoorePopulateBadCharTable(pattern, start); | 2254 int badness = -m; |
| 2240 | 2255 |
| 2241 int badness = -m; // How bad we are doing without a good-suffix table. | 2256 // How bad we are doing without a good-suffix table. |
| 2242 int idx; // No matches found prior to this index. | 2257 int idx; // No matches found prior to this index. |
| 2243 pchar last_char = pattern[m - 1]; | 2258 pchar last_char = pattern[m - 1]; |
| 2244 int last_char_shift = m - 1 - CharOccurrence<schar, pchar>(last_char); | 2259 int last_char_shift = m - 1 - CharOccurrence<schar, pchar>(last_char); |
| 2245 // Perform search | 2260 // Perform search |
| 2246 for (idx = start_index; idx <= n - m;) { | 2261 for (idx = start_index; idx <= n - m;) { |
| 2247 int j = m - 1; | 2262 int j = m - 1; |
| 2248 int c; | 2263 int c; |
| 2249 while (last_char != (c = subject[idx + j])) { | 2264 while (last_char != (c = subject[idx + j])) { |
| 2250 int bc_occ = CharOccurrence<schar, pchar>(c); | 2265 int bc_occ = CharOccurrence<schar, pchar>(c); |
| 2251 int shift = j - bc_occ; | 2266 int shift = j - bc_occ; |
| 2252 idx += shift; | 2267 idx += shift; |
| 2253 badness += 1 - shift; // at most zero, so badness cannot increase. | 2268 badness += 1 - shift; // at most zero, so badness cannot increase. |
| 2254 if (idx > n - m) { | 2269 if (idx > n - m) { |
| 2270 // If we could just break out of the for loop here. | |
|
Erik Corry
2010/03/15 12:57:48
What would then happen?
| |
| 2255 *complete = true; | 2271 *complete = true; |
| 2256 return -1; | 2272 return -1; |
| 2257 } | 2273 } |
| 2258 } | 2274 } |
| 2259 j--; | 2275 j--; |
| 2260 while (j >= 0 && pattern[j] == (subject[idx + j])) j--; | 2276 while (j >= 0 && pattern[j] == (subject[idx + j])) j--; |
| 2261 if (j < 0) { | 2277 if (j < 0) { |
| 2262 *complete = true; | 2278 *complete = true; |
| 2263 return idx; | 2279 return idx; |
| 2264 } else { | 2280 } else { |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 2276 } | 2292 } |
| 2277 *complete = true; | 2293 *complete = true; |
| 2278 return -1; | 2294 return -1; |
| 2279 } | 2295 } |
| 2280 | 2296 |
| 2281 | 2297 |
| 2282 template <typename schar, typename pchar> | 2298 template <typename schar, typename pchar> |
| 2283 static int BoyerMooreIndexOf(Vector<const schar> subject, | 2299 static int BoyerMooreIndexOf(Vector<const schar> subject, |
| 2284 Vector<const pchar> pattern, | 2300 Vector<const pchar> pattern, |
| 2285 int idx) { | 2301 int idx) { |
| 2302 ASSERT(algorithm <= BOYER_MOORE); | |
| 2286 int n = subject.length(); | 2303 int n = subject.length(); |
| 2287 int m = pattern.length(); | 2304 int m = pattern.length(); |
| 2288 // Only preprocess at most kBMMaxShift last characters of pattern. | 2305 // Only preprocess at most kBMMaxShift last characters of pattern. |
| 2289 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; | 2306 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; |
| 2290 | 2307 |
| 2291 // Build the Good Suffix table and continue searching. | |
| 2292 BoyerMoorePopulateGoodSuffixTable(pattern, start); | |
| 2293 pchar last_char = pattern[m - 1]; | 2308 pchar last_char = pattern[m - 1]; |
| 2294 // Continue search from i. | 2309 // Continue search from i. |
| 2295 while (idx <= n - m) { | 2310 while (idx <= n - m) { |
| 2296 int j = m - 1; | 2311 int j = m - 1; |
| 2297 schar c; | 2312 schar c; |
| 2298 while (last_char != (c = subject[idx + j])) { | 2313 while (last_char != (c = subject[idx + j])) { |
| 2299 int shift = j - CharOccurrence<schar, pchar>(c); | 2314 int shift = j - CharOccurrence<schar, pchar>(c); |
| 2300 idx += shift; | 2315 idx += shift; |
| 2301 if (idx > n - m) { | 2316 if (idx > n - m) { |
| 2302 return -1; | 2317 return -1; |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 2318 } | 2333 } |
| 2319 idx += shift; | 2334 idx += shift; |
| 2320 } | 2335 } |
| 2321 } | 2336 } |
| 2322 | 2337 |
| 2323 return -1; | 2338 return -1; |
| 2324 } | 2339 } |
| 2325 | 2340 |
| 2326 | 2341 |
| 2327 template <typename schar> | 2342 template <typename schar> |
| 2328 static int SingleCharIndexOf(Vector<const schar> string, | 2343 static inline int SingleCharIndexOf(Vector<const schar> string, |
| 2329 schar pattern_char, | 2344 schar pattern_char, |
| 2330 int start_index) { | 2345 int start_index) { |
| 2331 for (int i = start_index, n = string.length(); i < n; i++) { | 2346 for (int i = start_index, n = string.length(); i < n; i++) { |
| 2332 if (pattern_char == string[i]) { | 2347 if (pattern_char == string[i]) { |
| 2333 return i; | 2348 return i; |
| 2334 } | 2349 } |
| 2335 } | 2350 } |
| 2336 return -1; | 2351 return -1; |
| 2337 } | 2352 } |
| 2338 | 2353 |
| 2339 | 2354 |
| 2340 template <typename schar> | 2355 template <typename schar> |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 2357 // further checking should start, i.e., it's guaranteed that the pattern | 2372 // further checking should start, i.e., it's guaranteed that the pattern |
| 2358 // does not occur at a position prior to the returned index. | 2373 // does not occur at a position prior to the returned index. |
| 2359 template <typename pchar, typename schar> | 2374 template <typename pchar, typename schar> |
| 2360 static int SimpleIndexOf(Vector<const schar> subject, | 2375 static int SimpleIndexOf(Vector<const schar> subject, |
| 2361 Vector<const pchar> pattern, | 2376 Vector<const pchar> pattern, |
| 2362 int idx, | 2377 int idx, |
| 2363 bool* complete) { | 2378 bool* complete) { |
| 2364 // Badness is a count of how much work we have done. When we have | 2379 // Badness is a count of how much work we have done. When we have |
| 2365 // done enough work we decide it's probably worth switching to a better | 2380 // done enough work we decide it's probably worth switching to a better |
| 2366 // algorithm. | 2381 // algorithm. |
| 2367 int badness = -10 - (pattern.length() << 2); | 2382 |
| 2368 // We know our pattern is at least 2 characters, we cache the first so | 2383 // We know our pattern is at least 2 characters, we cache the first so |
| 2369 // the common case of the first character not matching is faster. | 2384 // the common case of the first character not matching is faster. |
| 2370 pchar pattern_first_char = pattern[0]; | 2385 pchar pattern_first_char = pattern[0]; |
| 2371 | 2386 int badness = -10 - pattern.length() << 2; |
| 2372 for (int i = idx, n = subject.length() - pattern.length(); i <= n; i++) { | 2387 for (int i = idx, n = subject.length() - pattern.length(); i <= n; i++) { |
| 2373 badness++; | 2388 badness++; |
| 2374 if (badness > 0) { | 2389 if (badness > 0) { |
| 2375 *complete = false; | 2390 *complete = false; |
| 2376 return i; | 2391 return i; |
| 2377 } | 2392 } |
| 2378 if (subject[i] != pattern_first_char) continue; | 2393 if (subject[i] != pattern_first_char) continue; |
| 2379 int j = 1; | 2394 int j = 1; |
| 2380 do { | 2395 do { |
| 2381 if (pattern[j] != subject[i+j]) { | 2396 if (pattern[j] != subject[i+j]) { |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 2409 j++; | 2424 j++; |
| 2410 } while (j < pattern.length()); | 2425 } while (j < pattern.length()); |
| 2411 if (j == pattern.length()) { | 2426 if (j == pattern.length()) { |
| 2412 return i; | 2427 return i; |
| 2413 } | 2428 } |
| 2414 } | 2429 } |
| 2415 return -1; | 2430 return -1; |
| 2416 } | 2431 } |
| 2417 | 2432 |
| 2418 | 2433 |
| 2419 // Dispatch to different algorithms. | 2434 // Strategy for searching for a string in another string. |
| 2420 template <typename schar, typename pchar> | 2435 enum StringSearchStrategy { SEARCH_FAIL, SEARCH_SHORT, SEARCH_LONG }; |
| 2421 static int StringMatchStrategy(Vector<const schar> sub, | 2436 |
| 2422 Vector<const pchar> pat, | 2437 |
| 2423 int start_index) { | 2438 template <typename pchar> |
| 2439 static inline StringSearchStrategy InitializeStringSearch( | |
| 2440 Vector<const pchar> pat, bool ascii_subject) { | |
| 2424 ASSERT(pat.length() > 1); | 2441 ASSERT(pat.length() > 1); |
| 2425 | |
| 2426 // We have an ASCII haystack and a non-ASCII needle. Check if there | 2442 // We have an ASCII haystack and a non-ASCII needle. Check if there |
| 2427 // really is a non-ASCII character in the needle and bail out if there | 2443 // really is a non-ASCII character in the needle and bail out if there |
| 2428 // is. | 2444 // is. |
| 2429 if (sizeof(schar) == 1 && sizeof(pchar) > 1) { | 2445 if (ascii_subject && sizeof(pchar) > 1) { |
| 2430 for (int i = 0; i < pat.length(); i++) { | 2446 for (int i = 0; i < pat.length(); i++) { |
| 2431 uc16 c = pat[i]; | 2447 uc16 c = pat[i]; |
| 2432 if (c > String::kMaxAsciiCharCode) { | 2448 if (c > String::kMaxAsciiCharCode) { |
| 2433 return -1; | 2449 return SEARCH_FAIL; |
| 2434 } | 2450 } |
| 2435 } | 2451 } |
| 2436 } | 2452 } |
| 2437 if (pat.length() < kBMMinPatternLength) { | 2453 if (pat.length() < kBMMinPatternLength) { |
| 2438 // We don't believe fancy searching can ever be more efficient. | 2454 return SEARCH_SHORT; |
| 2439 // The max shift of Boyer-Moore on a pattern of this length does | |
| 2440 // not compensate for the overhead. | |
| 2441 return SimpleIndexOf(sub, pat, start_index); | |
| 2442 } | 2455 } |
| 2456 algorithm = SIMPLE_SEARCH; | |
| 2457 return SEARCH_LONG; | |
| 2458 } | |
| 2459 | |
| 2460 | |
| 2461 // Dispatch long needle searches to different algorithms. | |
| 2462 template <typename schar, typename pchar> | |
| 2463 static int ComplexIndexOf(Vector<const schar> sub, | |
| 2464 Vector<const pchar> pat, | |
| 2465 int start_index) { | |
| 2466 ASSERT(pat.length() >= kBMMinPatternLength); | |
| 2443 // Try algorithms in order of increasing setup cost and expected performance. | 2467 // Try algorithms in order of increasing setup cost and expected performance. |
| 2444 bool complete; | 2468 bool complete; |
| 2445 int idx = SimpleIndexOf(sub, pat, start_index, &complete); | 2469 int idx = start_index; |
| 2446 if (complete) return idx; | 2470 switch (algorithm) { |
| 2447 idx = BoyerMooreHorspool(sub, pat, idx, &complete); | 2471 case SIMPLE_SEARCH: |
| 2448 if (complete) return idx; | 2472 idx = SimpleIndexOf(sub, pat, idx, &complete); |
| 2449 return BoyerMooreIndexOf(sub, pat, idx); | 2473 if (complete) return idx; |
| 2474 BoyerMoorePopulateBadCharTable(pat); | |
| 2475 algorithm = BOYER_MOORE_HORSPOOL; | |
| 2476 // FALLTHROUGH. | |
|
Erik Corry
2010/03/15 12:57:48
Does the linter insist on this annoying capitaliza
| |
| 2477 case BOYER_MOORE_HORSPOOL: | |
| 2478 idx = BoyerMooreHorspool(sub, pat, idx, &complete); | |
| 2479 if (complete) return idx; | |
| 2480 // Build the Good Suffix table and continue searching. | |
| 2481 BoyerMoorePopulateGoodSuffixTable(pat); | |
| 2482 algorithm = BOYER_MOORE; | |
| 2483 // FALLTHROUGH. | |
| 2484 case BOYER_MOORE: | |
| 2485 return BoyerMooreIndexOf(sub, pat, idx); | |
| 2486 } | |
| 2487 UNREACHABLE(); | |
| 2488 return -1; | |
| 2450 } | 2489 } |
| 2451 | 2490 |
| 2491 | |
| 2492 // Dispatch to different search strategies for a single search. | |
| 2493 // If searching multiple times on the same needle, the search | |
| 2494 // strategy should only be computed once and then dispatch to different | |
| 2495 // loops. | |
| 2496 template <typename schar, typename pchar> | |
| 2497 static int StringSearch(Vector<const schar> sub, | |
| 2498 Vector<const pchar> pat, | |
| 2499 int start_index) { | |
| 2500 bool ascii_subject = (sizeof(schar) == 1); | |
| 2501 StringSearchStrategy strategy = InitializeStringSearch(pat, ascii_subject); | |
| 2502 switch (strategy) { | |
| 2503 case SEARCH_FAIL: return -1; | |
| 2504 case SEARCH_SHORT: return SimpleIndexOf(sub, pat, start_index); | |
| 2505 case SEARCH_LONG: return ComplexIndexOf(sub, pat, start_index); | |
| 2506 } | |
| 2507 UNREACHABLE(); | |
| 2508 return -1; | |
| 2509 } | |
| 2510 | |
| 2511 | |
| 2452 // Perform string match of pattern on subject, starting at start index. | 2512 // Perform string match of pattern on subject, starting at start index. |
| 2453 // Caller must ensure that 0 <= start_index <= sub->length(), | 2513 // Caller must ensure that 0 <= start_index <= sub->length(), |
| 2454 // and should check that pat->length() + start_index <= sub->length() | 2514 // and should check that pat->length() + start_index <= sub->length() |
| 2455 int Runtime::StringMatch(Handle<String> sub, | 2515 int Runtime::StringMatch(Handle<String> sub, |
| 2456 Handle<String> pat, | 2516 Handle<String> pat, |
| 2457 int start_index) { | 2517 int start_index) { |
| 2458 ASSERT(0 <= start_index); | 2518 ASSERT(0 <= start_index); |
| 2459 ASSERT(start_index <= sub->length()); | 2519 ASSERT(start_index <= sub->length()); |
| 2460 | 2520 |
| 2461 int pattern_length = pat->length(); | 2521 int pattern_length = pat->length(); |
| 2462 if (pattern_length == 0) return start_index; | 2522 if (pattern_length == 0) return start_index; |
| 2463 | 2523 |
| 2464 int subject_length = sub->length(); | 2524 int subject_length = sub->length(); |
| 2465 if (start_index + pattern_length > subject_length) return -1; | 2525 if (start_index + pattern_length > subject_length) return -1; |
| 2466 | 2526 |
| 2467 if (!sub->IsFlat()) { | 2527 if (!sub->IsFlat()) { |
| 2468 FlattenString(sub); | 2528 FlattenString(sub); |
| 2469 } | 2529 } |
| 2530 | |
| 2470 // Searching for one specific character is common. For one | 2531 // Searching for one specific character is common. For one |
| 2471 // character patterns linear search is necessary, so any smart | 2532 // character patterns linear search is necessary, so any smart |
| 2472 // algorithm is unnecessary overhead. | 2533 // algorithm is unnecessary overhead. |
| 2473 if (pattern_length == 1) { | 2534 if (pattern_length == 1) { |
| 2474 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 2535 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
| 2475 if (sub->IsAsciiRepresentation()) { | 2536 if (sub->IsAsciiRepresentation()) { |
| 2476 uc16 pchar = pat->Get(0); | 2537 uc16 pchar = pat->Get(0); |
| 2477 if (pchar > String::kMaxAsciiCharCode) { | 2538 if (pchar > String::kMaxAsciiCharCode) { |
| 2478 return -1; | 2539 return -1; |
| 2479 } | 2540 } |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 2493 | 2554 |
| 2494 if (!pat->IsFlat()) { | 2555 if (!pat->IsFlat()) { |
| 2495 FlattenString(pat); | 2556 FlattenString(pat); |
| 2496 } | 2557 } |
| 2497 | 2558 |
| 2498 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 2559 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
| 2499 // dispatch on type of strings | 2560 // dispatch on type of strings |
| 2500 if (pat->IsAsciiRepresentation()) { | 2561 if (pat->IsAsciiRepresentation()) { |
| 2501 Vector<const char> pat_vector = pat->ToAsciiVector(); | 2562 Vector<const char> pat_vector = pat->ToAsciiVector(); |
| 2502 if (sub->IsAsciiRepresentation()) { | 2563 if (sub->IsAsciiRepresentation()) { |
| 2503 return StringMatchStrategy(sub->ToAsciiVector(), pat_vector, start_index); | 2564 return StringSearch(sub->ToAsciiVector(), pat_vector, start_index); |
| 2504 } | 2565 } |
| 2505 return StringMatchStrategy(sub->ToUC16Vector(), pat_vector, start_index); | 2566 return StringSearch(sub->ToUC16Vector(), pat_vector, start_index); |
| 2506 } | 2567 } |
| 2507 Vector<const uc16> pat_vector = pat->ToUC16Vector(); | 2568 Vector<const uc16> pat_vector = pat->ToUC16Vector(); |
| 2508 if (sub->IsAsciiRepresentation()) { | 2569 if (sub->IsAsciiRepresentation()) { |
| 2509 return StringMatchStrategy(sub->ToAsciiVector(), pat_vector, start_index); | 2570 return StringSearch(sub->ToAsciiVector(), pat_vector, start_index); |
| 2510 } | 2571 } |
| 2511 return StringMatchStrategy(sub->ToUC16Vector(), pat_vector, start_index); | 2572 return StringSearch(sub->ToUC16Vector(), pat_vector, start_index); |
| 2512 } | 2573 } |
| 2513 | 2574 |
| 2514 | 2575 |
| 2515 static Object* Runtime_StringIndexOf(Arguments args) { | 2576 static Object* Runtime_StringIndexOf(Arguments args) { |
| 2516 HandleScope scope; // create a new handle scope | 2577 HandleScope scope; // create a new handle scope |
| 2517 ASSERT(args.length() == 3); | 2578 ASSERT(args.length() == 3); |
| 2518 | 2579 |
| 2519 CONVERT_ARG_CHECKED(String, sub, 0); | 2580 CONVERT_ARG_CHECKED(String, sub, 0); |
| 2520 CONVERT_ARG_CHECKED(String, pat, 1); | 2581 CONVERT_ARG_CHECKED(String, pat, 1); |
| 2521 | 2582 |
| (...skipping 1734 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4256 int right = length; | 4317 int right = length; |
| 4257 if (trimRight) { | 4318 if (trimRight) { |
| 4258 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) { | 4319 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) { |
| 4259 right--; | 4320 right--; |
| 4260 } | 4321 } |
| 4261 } | 4322 } |
| 4262 return s->SubString(left, right); | 4323 return s->SubString(left, right); |
| 4263 } | 4324 } |
| 4264 | 4325 |
| 4265 | 4326 |
| 4327 template <typename schar, typename pchar> | |
| 4328 void FindStringIndices(Vector<const schar> subject, | |
| 4329 Vector<const pchar> pattern, | |
| 4330 ZoneList<int>* indices, | |
| 4331 unsigned int limit) { | |
| 4332 ASSERT(limit > 0); | |
| 4333 // Collect indices of pattern in subject, and the end-of-string index. | |
| 4334 // Stop after finding at most limit values. | |
| 4335 StringSearchStrategy strategy = | |
| 4336 InitializeStringSearch(pattern, sizeof(schar) == 1); | |
| 4337 switch (strategy) { | |
| 4338 case SEARCH_FAIL: return; | |
| 4339 case SEARCH_SHORT: { | |
| 4340 int pattern_length = pattern.length(); | |
| 4341 int index = 0; | |
| 4342 while (limit > 0) { | |
| 4343 index = SimpleIndexOf(subject, pattern, index); | |
| 4344 if (index < 0) return; | |
| 4345 indices->Add(index); | |
| 4346 index += pattern_length; | |
| 4347 limit--; | |
| 4348 } | |
| 4349 return; | |
| 4350 } | |
| 4351 case SEARCH_LONG: { | |
| 4352 int pattern_length = pattern.length(); | |
| 4353 int index = 0; | |
| 4354 while (limit > 0) { | |
| 4355 index = ComplexIndexOf(subject, pattern, index); | |
| 4356 if (index < 0) return; | |
| 4357 indices->Add(index); | |
| 4358 index += pattern_length; | |
| 4359 limit--; | |
| 4360 } | |
| 4361 return; | |
| 4362 } | |
| 4363 default: | |
| 4364 UNREACHABLE(); | |
| 4365 return; | |
| 4366 } | |
| 4367 } | |
| 4368 | |
| 4369 template <typename schar> | |
| 4370 void inline FindCharIndices(Vector<const schar> subject, | |
| 4371 const schar pattern_char, | |
| 4372 ZoneList<int>* indices, | |
| 4373 unsigned int limit) { | |
| 4374 // Collect indices of pattern_char in subject, and the end-of-string index. | |
| 4375 // Stop after finding at most limit values. | |
| 4376 int index = 0; | |
| 4377 while (limit > 0) { | |
| 4378 index = SingleCharIndexOf(subject, pattern_char, index); | |
| 4379 if (index < 0) return; | |
| 4380 indices->Add(index); | |
| 4381 index++; | |
| 4382 limit--; | |
| 4383 } | |
| 4384 } | |
| 4385 | |
| 4386 | |
| 4387 static Object* Runtime_StringSplit(Arguments args) { | |
| 4388 ASSERT(args.length() == 3); | |
| 4389 HandleScope handle_scope; | |
| 4390 CONVERT_ARG_CHECKED(String, subject, 0); | |
| 4391 CONVERT_ARG_CHECKED(String, pattern, 1); | |
| 4392 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]); | |
| 4393 | |
| 4394 int subject_length = subject->length(); | |
| 4395 int pattern_length = pattern->length(); | |
| 4396 | |
| 4397 if (!subject->IsFlat()) FlattenString(subject); | |
| 4398 | |
| 4399 static const int kMaxInitialListCapacity = 16; | |
| 4400 | |
| 4401 ZoneScope scope(DELETE_ON_EXIT); | |
| 4402 | |
| 4403 // Find (up to limit) indices of separator and end-of-string in subject | |
| 4404 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); | |
| 4405 ZoneList<int> indices(initial_capacity); | |
| 4406 if (pattern_length == 1) { | |
| 4407 // Special case, go directly to fast single-character split. | |
| 4408 AssertNoAllocation nogc; | |
| 4409 uc16 pattern_char = pattern->Get(0); | |
| 4410 if (subject->IsTwoByteRepresentation()) { | |
| 4411 FindCharIndices(subject->ToUC16Vector(), pattern_char, | |
| 4412 &indices, | |
| 4413 limit); | |
| 4414 } else if (pattern_char <= String::kMaxAsciiCharCode) { | |
| 4415 FindCharIndices(subject->ToAsciiVector(), | |
| 4416 static_cast<char>(pattern_char), | |
| 4417 &indices, | |
| 4418 limit); | |
| 4419 } | |
| 4420 } else { | |
| 4421 if (!pattern->IsFlat()) FlattenString(pattern); | |
| 4422 AssertNoAllocation nogc; | |
| 4423 if (subject->IsAsciiRepresentation()) { | |
| 4424 Vector<const char> subject_vector = subject->ToAsciiVector(); | |
| 4425 if (pattern->IsAsciiRepresentation()) { | |
| 4426 FindStringIndices(subject_vector, | |
| 4427 pattern->ToAsciiVector(), | |
| 4428 &indices, | |
| 4429 limit); | |
| 4430 } else { | |
| 4431 FindStringIndices(subject_vector, | |
| 4432 pattern->ToUC16Vector(), | |
| 4433 &indices, | |
| 4434 limit); | |
| 4435 } | |
| 4436 } else { | |
| 4437 Vector<const uc16> subject_vector = subject->ToUC16Vector(); | |
| 4438 if (pattern->IsAsciiRepresentation()) { | |
| 4439 FindStringIndices(subject_vector, | |
| 4440 pattern->ToAsciiVector(), | |
| 4441 &indices, | |
| 4442 limit); | |
| 4443 } else { | |
| 4444 FindStringIndices(subject_vector, | |
| 4445 pattern->ToUC16Vector(), | |
| 4446 &indices, | |
| 4447 limit); | |
| 4448 } | |
| 4449 } | |
| 4450 } | |
| 4451 if (static_cast<uint32_t>(indices.length()) < limit) { | |
| 4452 indices.Add(subject_length); | |
| 4453 } | |
| 4454 // The list indices now contains the end of each part to create. | |
| 4455 | |
| 4456 | |
| 4457 // Create JSArray of substrings separated by separator. | |
| 4458 int part_count = indices.length(); | |
| 4459 | |
| 4460 Handle<JSArray> result = Factory::NewJSArray(part_count); | |
| 4461 result->set_length(Smi::FromInt(part_count)); | |
| 4462 | |
| 4463 ASSERT(result->HasFastElements()); | |
| 4464 | |
| 4465 if (part_count == 1 && indices.at(0) == subject_length) { | |
| 4466 FixedArray::cast(result->elements())->set(0, *subject); | |
| 4467 return *result; | |
| 4468 } | |
| 4469 | |
| 4470 Handle<FixedArray> elements(FixedArray::cast(result->elements())); | |
| 4471 int part_start = 0; | |
| 4472 for (int i = 0; i < part_count; i++) { | |
| 4473 HandleScope local_loop_handle; | |
| 4474 int part_end = indices.at(i); | |
| 4475 Handle<String> substring = | |
| 4476 Factory::NewSubString(subject, part_start, part_end); | |
| 4477 elements->set(i, *substring); | |
| 4478 part_start = part_end + pattern_length; | |
| 4479 } | |
| 4480 | |
| 4481 return *result; | |
| 4482 } | |
| 4483 | |
| 4484 | |
| 4266 // Copies ascii characters to the given fixed array looking up | 4485 // Copies ascii characters to the given fixed array looking up |
| 4267 // one-char strings in the cache. Gives up on the first char that is | 4486 // one-char strings in the cache. Gives up on the first char that is |
| 4268 // not in the cache and fills the remainder with smi zeros. Returns | 4487 // not in the cache and fills the remainder with smi zeros. Returns |
| 4269 // the length of the successfully copied prefix. | 4488 // the length of the successfully copied prefix. |
| 4270 static int CopyCachedAsciiCharsToArray(const char* chars, | 4489 static int CopyCachedAsciiCharsToArray(const char* chars, |
| 4271 FixedArray* elements, | 4490 FixedArray* elements, |
| 4272 int length) { | 4491 int length) { |
| 4273 AssertNoAllocation nogc; | 4492 AssertNoAllocation nogc; |
| 4274 FixedArray* ascii_cache = Heap::single_character_string_cache(); | 4493 FixedArray* ascii_cache = Heap::single_character_string_cache(); |
| 4275 Object* undefined = Heap::undefined_value(); | 4494 Object* undefined = Heap::undefined_value(); |
| (...skipping 4521 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 8797 } else { | 9016 } else { |
| 8798 // Handle last resort GC and make sure to allow future allocations | 9017 // Handle last resort GC and make sure to allow future allocations |
| 8799 // to grow the heap without causing GCs (if possible). | 9018 // to grow the heap without causing GCs (if possible). |
| 8800 Counters::gc_last_resort_from_js.Increment(); | 9019 Counters::gc_last_resort_from_js.Increment(); |
| 8801 Heap::CollectAllGarbage(false); | 9020 Heap::CollectAllGarbage(false); |
| 8802 } | 9021 } |
| 8803 } | 9022 } |
| 8804 | 9023 |
| 8805 | 9024 |
| 8806 } } // namespace v8::internal | 9025 } } // namespace v8::internal |
| OLD | NEW |