Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(25)

Side by Side Diff: src/runtime.cc

Issue 875001: Converted String.prototype.split with string to C++. (Closed)
Patch Set: Rewrote conditions to switch with fallthrough. Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/runtime.h ('k') | src/string.js » ('j') | src/string.js » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2118 matching lines...) Expand 10 before | Expand all | Expand 10 after
2129 int good_suffix_shift_[kBMMaxShift + 1]; 2129 int good_suffix_shift_[kBMMaxShift + 1];
2130 int* biased_suffixes_; 2130 int* biased_suffixes_;
2131 int* biased_good_suffix_shift_; 2131 int* biased_good_suffix_shift_;
2132 DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers); 2132 DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers);
2133 }; 2133 };
2134 2134
2135 // buffers reused by BoyerMoore 2135 // buffers reused by BoyerMoore
2136 static int bad_char_occurrence[kBMAlphabetSize]; 2136 static int bad_char_occurrence[kBMAlphabetSize];
2137 static BMGoodSuffixBuffers bmgs_buffers; 2137 static BMGoodSuffixBuffers bmgs_buffers;
2138 2138
2139 // State of the string match tables.
2140 // SIMPLE: No usable content in the buffers.
2141 // BOYER_MOORE_HORSPOOL: The bad_char_occurences table has been populated.
2142 // BOYER_MOORE: The bmgs_buffers tables have also been populated.
2143 // Whenever starting with a new needle, one should call InitializeStringSearch
2144 // to determine which search strategy to use, and in the case of a long-needle
2145 // strategy, the call also initializes the algorithm to SIMPLE.
2146 enum StringSearchAlgorithm { SIMPLE_SEARCH, BOYER_MOORE_HORSPOOL, BOYER_MOORE };
2147 static StringSearchAlgorithm algorithm;
2148
2149
2139 // Compute the bad-char table for Boyer-Moore in the static buffer. 2150 // Compute the bad-char table for Boyer-Moore in the static buffer.
2140 template <typename pchar> 2151 template <typename pchar>
2141 static void BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern, 2152 static void BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern) {
2142 int start) { 2153 // Only preprocess at most kBMMaxShift last characters of pattern.
2154 int start = pattern.length() < kBMMaxShift ? 0
2155 : pattern.length() - kBMMaxShift;
2143 // Run forwards to populate bad_char_table, so that *last* instance 2156 // Run forwards to populate bad_char_table, so that *last* instance
2144 // of character equivalence class is the one registered. 2157 // of character equivalence class is the one registered.
2145 // Notice: Doesn't include the last character. 2158 // Notice: Doesn't include the last character.
2146 int table_size = (sizeof(pchar) == 1) ? String::kMaxAsciiCharCode + 1 2159 int table_size = (sizeof(pchar) == 1) ? String::kMaxAsciiCharCode + 1
2147 : kBMAlphabetSize; 2160 : kBMAlphabetSize;
2148 if (start == 0) { // All patterns less than kBMMaxShift in length. 2161 if (start == 0) { // All patterns less than kBMMaxShift in length.
2149 memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence)); 2162 memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence));
2150 } else { 2163 } else {
2151 for (int i = 0; i < table_size; i++) { 2164 for (int i = 0; i < table_size; i++) {
2152 bad_char_occurrence[i] = start - 1; 2165 bad_char_occurrence[i] = start - 1;
2153 } 2166 }
2154 } 2167 }
2155 for (int i = start; i < pattern.length() - 1; i++) { 2168 for (int i = start; i < pattern.length() - 1; i++) {
2156 pchar c = pattern[i]; 2169 pchar c = pattern[i];
2157 int bucket = (sizeof(pchar) ==1) ? c : c % kBMAlphabetSize; 2170 int bucket = (sizeof(pchar) ==1) ? c : c % kBMAlphabetSize;
2158 bad_char_occurrence[bucket] = i; 2171 bad_char_occurrence[bucket] = i;
2159 } 2172 }
2160 } 2173 }
2161 2174
2175
2162 template <typename pchar> 2176 template <typename pchar>
2163 static void BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern, 2177 static void BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern) {
2164 int start) {
2165 int m = pattern.length(); 2178 int m = pattern.length();
2179 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;
2166 int len = m - start; 2180 int len = m - start;
2167 // Compute Good Suffix tables. 2181 // Compute Good Suffix tables.
2168 bmgs_buffers.init(m); 2182 bmgs_buffers.init(m);
2169 2183
2170 bmgs_buffers.shift(m-1) = 1; 2184 bmgs_buffers.shift(m-1) = 1;
2171 bmgs_buffers.suffix(m) = m + 1; 2185 bmgs_buffers.suffix(m) = m + 1;
2172 pchar last_char = pattern[m - 1]; 2186 pchar last_char = pattern[m - 1];
2173 int suffix = m + 1; 2187 int suffix = m + 1;
2174 for (int i = m; i > start;) { 2188 for (int i = m; i > start;) {
2175 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) { 2189 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) {
(...skipping 26 matching lines...) Expand all
2202 if (bmgs_buffers.shift(i) == len) { 2216 if (bmgs_buffers.shift(i) == len) {
2203 bmgs_buffers.shift(i) = suffix - start; 2217 bmgs_buffers.shift(i) = suffix - start;
2204 } 2218 }
2205 if (i == suffix) { 2219 if (i == suffix) {
2206 suffix = bmgs_buffers.suffix(suffix); 2220 suffix = bmgs_buffers.suffix(suffix);
2207 } 2221 }
2208 } 2222 }
2209 } 2223 }
2210 } 2224 }
2211 2225
2226
2212 template <typename schar, typename pchar> 2227 template <typename schar, typename pchar>
2213 static inline int CharOccurrence(int char_code) { 2228 static inline int CharOccurrence(int char_code) {
2214 if (sizeof(schar) == 1) { 2229 if (sizeof(schar) == 1) {
2215 return bad_char_occurrence[char_code]; 2230 return bad_char_occurrence[char_code];
2216 } 2231 }
2217 if (sizeof(pchar) == 1) { 2232 if (sizeof(pchar) == 1) {
2218 if (char_code > String::kMaxAsciiCharCode) { 2233 if (char_code > String::kMaxAsciiCharCode) {
2219 return -1; 2234 return -1;
2220 } 2235 }
2221 return bad_char_occurrence[char_code]; 2236 return bad_char_occurrence[char_code];
2222 } 2237 }
2223 return bad_char_occurrence[char_code % kBMAlphabetSize]; 2238 return bad_char_occurrence[char_code % kBMAlphabetSize];
2224 } 2239 }
2225 2240
2241
2226 // Restricted simplified Boyer-Moore string matching. 2242 // Restricted simplified Boyer-Moore string matching.
2227 // Uses only the bad-shift table of Boyer-Moore and only uses it 2243 // Uses only the bad-shift table of Boyer-Moore and only uses it
2228 // for the character compared to the last character of the needle. 2244 // for the character compared to the last character of the needle.
2229 template <typename schar, typename pchar> 2245 template <typename schar, typename pchar>
2230 static int BoyerMooreHorspool(Vector<const schar> subject, 2246 static int BoyerMooreHorspool(Vector<const schar> subject,
2231 Vector<const pchar> pattern, 2247 Vector<const pchar> pattern,
2232 int start_index, 2248 int start_index,
2233 bool* complete) { 2249 bool* complete) {
2250 ASSERT(algorithm <= BOYER_MOORE_HORSPOOL);
2234 int n = subject.length(); 2251 int n = subject.length();
2235 int m = pattern.length(); 2252 int m = pattern.length();
2236 // Only preprocess at most kBMMaxShift last characters of pattern.
2237 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;
2238 2253
2239 BoyerMoorePopulateBadCharTable(pattern, start); 2254 int badness = -m;
2240 2255
2241 int badness = -m; // How bad we are doing without a good-suffix table. 2256 // How bad we are doing without a good-suffix table.
2242 int idx; // No matches found prior to this index. 2257 int idx; // No matches found prior to this index.
2243 pchar last_char = pattern[m - 1]; 2258 pchar last_char = pattern[m - 1];
2244 int last_char_shift = m - 1 - CharOccurrence<schar, pchar>(last_char); 2259 int last_char_shift = m - 1 - CharOccurrence<schar, pchar>(last_char);
2245 // Perform search 2260 // Perform search
2246 for (idx = start_index; idx <= n - m;) { 2261 for (idx = start_index; idx <= n - m;) {
2247 int j = m - 1; 2262 int j = m - 1;
2248 int c; 2263 int c;
2249 while (last_char != (c = subject[idx + j])) { 2264 while (last_char != (c = subject[idx + j])) {
2250 int bc_occ = CharOccurrence<schar, pchar>(c); 2265 int bc_occ = CharOccurrence<schar, pchar>(c);
2251 int shift = j - bc_occ; 2266 int shift = j - bc_occ;
2252 idx += shift; 2267 idx += shift;
2253 badness += 1 - shift; // at most zero, so badness cannot increase. 2268 badness += 1 - shift; // at most zero, so badness cannot increase.
2254 if (idx > n - m) { 2269 if (idx > n - m) {
2270 // If we could just break out of the for loop here.
Erik Corry 2010/03/15 12:57:48 What would then happen?
2255 *complete = true; 2271 *complete = true;
2256 return -1; 2272 return -1;
2257 } 2273 }
2258 } 2274 }
2259 j--; 2275 j--;
2260 while (j >= 0 && pattern[j] == (subject[idx + j])) j--; 2276 while (j >= 0 && pattern[j] == (subject[idx + j])) j--;
2261 if (j < 0) { 2277 if (j < 0) {
2262 *complete = true; 2278 *complete = true;
2263 return idx; 2279 return idx;
2264 } else { 2280 } else {
(...skipping 11 matching lines...) Expand all
2276 } 2292 }
2277 *complete = true; 2293 *complete = true;
2278 return -1; 2294 return -1;
2279 } 2295 }
2280 2296
2281 2297
2282 template <typename schar, typename pchar> 2298 template <typename schar, typename pchar>
2283 static int BoyerMooreIndexOf(Vector<const schar> subject, 2299 static int BoyerMooreIndexOf(Vector<const schar> subject,
2284 Vector<const pchar> pattern, 2300 Vector<const pchar> pattern,
2285 int idx) { 2301 int idx) {
2302 ASSERT(algorithm <= BOYER_MOORE);
2286 int n = subject.length(); 2303 int n = subject.length();
2287 int m = pattern.length(); 2304 int m = pattern.length();
2288 // Only preprocess at most kBMMaxShift last characters of pattern. 2305 // Only preprocess at most kBMMaxShift last characters of pattern.
2289 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; 2306 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;
2290 2307
2291 // Build the Good Suffix table and continue searching.
2292 BoyerMoorePopulateGoodSuffixTable(pattern, start);
2293 pchar last_char = pattern[m - 1]; 2308 pchar last_char = pattern[m - 1];
2294 // Continue search from i. 2309 // Continue search from i.
2295 while (idx <= n - m) { 2310 while (idx <= n - m) {
2296 int j = m - 1; 2311 int j = m - 1;
2297 schar c; 2312 schar c;
2298 while (last_char != (c = subject[idx + j])) { 2313 while (last_char != (c = subject[idx + j])) {
2299 int shift = j - CharOccurrence<schar, pchar>(c); 2314 int shift = j - CharOccurrence<schar, pchar>(c);
2300 idx += shift; 2315 idx += shift;
2301 if (idx > n - m) { 2316 if (idx > n - m) {
2302 return -1; 2317 return -1;
(...skipping 15 matching lines...) Expand all
2318 } 2333 }
2319 idx += shift; 2334 idx += shift;
2320 } 2335 }
2321 } 2336 }
2322 2337
2323 return -1; 2338 return -1;
2324 } 2339 }
2325 2340
2326 2341
2327 template <typename schar> 2342 template <typename schar>
2328 static int SingleCharIndexOf(Vector<const schar> string, 2343 static inline int SingleCharIndexOf(Vector<const schar> string,
2329 schar pattern_char, 2344 schar pattern_char,
2330 int start_index) { 2345 int start_index) {
2331 for (int i = start_index, n = string.length(); i < n; i++) { 2346 for (int i = start_index, n = string.length(); i < n; i++) {
2332 if (pattern_char == string[i]) { 2347 if (pattern_char == string[i]) {
2333 return i; 2348 return i;
2334 } 2349 }
2335 } 2350 }
2336 return -1; 2351 return -1;
2337 } 2352 }
2338 2353
2339 2354
2340 template <typename schar> 2355 template <typename schar>
(...skipping 16 matching lines...) Expand all
2357 // further checking should start, i.e., it's guaranteed that the pattern 2372 // further checking should start, i.e., it's guaranteed that the pattern
2358 // does not occur at a position prior to the returned index. 2373 // does not occur at a position prior to the returned index.
2359 template <typename pchar, typename schar> 2374 template <typename pchar, typename schar>
2360 static int SimpleIndexOf(Vector<const schar> subject, 2375 static int SimpleIndexOf(Vector<const schar> subject,
2361 Vector<const pchar> pattern, 2376 Vector<const pchar> pattern,
2362 int idx, 2377 int idx,
2363 bool* complete) { 2378 bool* complete) {
2364 // Badness is a count of how much work we have done. When we have 2379 // Badness is a count of how much work we have done. When we have
2365 // done enough work we decide it's probably worth switching to a better 2380 // done enough work we decide it's probably worth switching to a better
2366 // algorithm. 2381 // algorithm.
2367 int badness = -10 - (pattern.length() << 2); 2382
2368 // We know our pattern is at least 2 characters, we cache the first so 2383 // We know our pattern is at least 2 characters, we cache the first so
2369 // the common case of the first character not matching is faster. 2384 // the common case of the first character not matching is faster.
2370 pchar pattern_first_char = pattern[0]; 2385 pchar pattern_first_char = pattern[0];
2371 2386 int badness = -10 - pattern.length() << 2;
2372 for (int i = idx, n = subject.length() - pattern.length(); i <= n; i++) { 2387 for (int i = idx, n = subject.length() - pattern.length(); i <= n; i++) {
2373 badness++; 2388 badness++;
2374 if (badness > 0) { 2389 if (badness > 0) {
2375 *complete = false; 2390 *complete = false;
2376 return i; 2391 return i;
2377 } 2392 }
2378 if (subject[i] != pattern_first_char) continue; 2393 if (subject[i] != pattern_first_char) continue;
2379 int j = 1; 2394 int j = 1;
2380 do { 2395 do {
2381 if (pattern[j] != subject[i+j]) { 2396 if (pattern[j] != subject[i+j]) {
(...skipping 27 matching lines...) Expand all
2409 j++; 2424 j++;
2410 } while (j < pattern.length()); 2425 } while (j < pattern.length());
2411 if (j == pattern.length()) { 2426 if (j == pattern.length()) {
2412 return i; 2427 return i;
2413 } 2428 }
2414 } 2429 }
2415 return -1; 2430 return -1;
2416 } 2431 }
2417 2432
2418 2433
2419 // Dispatch to different algorithms. 2434 // Strategy for searching for a string in another string.
2420 template <typename schar, typename pchar> 2435 enum StringSearchStrategy { SEARCH_FAIL, SEARCH_SHORT, SEARCH_LONG };
2421 static int StringMatchStrategy(Vector<const schar> sub, 2436
2422 Vector<const pchar> pat, 2437
2423 int start_index) { 2438 template <typename pchar>
2439 static inline StringSearchStrategy InitializeStringSearch(
2440 Vector<const pchar> pat, bool ascii_subject) {
2424 ASSERT(pat.length() > 1); 2441 ASSERT(pat.length() > 1);
2425
2426 // We have an ASCII haystack and a non-ASCII needle. Check if there 2442 // We have an ASCII haystack and a non-ASCII needle. Check if there
2427 // really is a non-ASCII character in the needle and bail out if there 2443 // really is a non-ASCII character in the needle and bail out if there
2428 // is. 2444 // is.
2429 if (sizeof(schar) == 1 && sizeof(pchar) > 1) { 2445 if (ascii_subject && sizeof(pchar) > 1) {
2430 for (int i = 0; i < pat.length(); i++) { 2446 for (int i = 0; i < pat.length(); i++) {
2431 uc16 c = pat[i]; 2447 uc16 c = pat[i];
2432 if (c > String::kMaxAsciiCharCode) { 2448 if (c > String::kMaxAsciiCharCode) {
2433 return -1; 2449 return SEARCH_FAIL;
2434 } 2450 }
2435 } 2451 }
2436 } 2452 }
2437 if (pat.length() < kBMMinPatternLength) { 2453 if (pat.length() < kBMMinPatternLength) {
2438 // We don't believe fancy searching can ever be more efficient. 2454 return SEARCH_SHORT;
2439 // The max shift of Boyer-Moore on a pattern of this length does
2440 // not compensate for the overhead.
2441 return SimpleIndexOf(sub, pat, start_index);
2442 } 2455 }
2456 algorithm = SIMPLE_SEARCH;
2457 return SEARCH_LONG;
2458 }
2459
2460
2461 // Dispatch long needle searches to different algorithms.
2462 template <typename schar, typename pchar>
2463 static int ComplexIndexOf(Vector<const schar> sub,
2464 Vector<const pchar> pat,
2465 int start_index) {
2466 ASSERT(pat.length() >= kBMMinPatternLength);
2443 // Try algorithms in order of increasing setup cost and expected performance. 2467 // Try algorithms in order of increasing setup cost and expected performance.
2444 bool complete; 2468 bool complete;
2445 int idx = SimpleIndexOf(sub, pat, start_index, &complete); 2469 int idx = start_index;
2446 if (complete) return idx; 2470 switch (algorithm) {
2447 idx = BoyerMooreHorspool(sub, pat, idx, &complete); 2471 case SIMPLE_SEARCH:
2448 if (complete) return idx; 2472 idx = SimpleIndexOf(sub, pat, idx, &complete);
2449 return BoyerMooreIndexOf(sub, pat, idx); 2473 if (complete) return idx;
2474 BoyerMoorePopulateBadCharTable(pat);
2475 algorithm = BOYER_MOORE_HORSPOOL;
2476 // FALLTHROUGH.
Erik Corry 2010/03/15 12:57:48 Does the linter insist on this annoying capitaliza
2477 case BOYER_MOORE_HORSPOOL:
2478 idx = BoyerMooreHorspool(sub, pat, idx, &complete);
2479 if (complete) return idx;
2480 // Build the Good Suffix table and continue searching.
2481 BoyerMoorePopulateGoodSuffixTable(pat);
2482 algorithm = BOYER_MOORE;
2483 // FALLTHROUGH.
2484 case BOYER_MOORE:
2485 return BoyerMooreIndexOf(sub, pat, idx);
2486 }
2487 UNREACHABLE();
2488 return -1;
2450 } 2489 }
2451 2490
2491
2492 // Dispatch to different search strategies for a single search.
2493 // If searching multiple times on the same needle, the search
2494 // strategy should only be computed once and then dispatch to different
2495 // loops.
2496 template <typename schar, typename pchar>
2497 static int StringSearch(Vector<const schar> sub,
2498 Vector<const pchar> pat,
2499 int start_index) {
2500 bool ascii_subject = (sizeof(schar) == 1);
2501 StringSearchStrategy strategy = InitializeStringSearch(pat, ascii_subject);
2502 switch (strategy) {
2503 case SEARCH_FAIL: return -1;
2504 case SEARCH_SHORT: return SimpleIndexOf(sub, pat, start_index);
2505 case SEARCH_LONG: return ComplexIndexOf(sub, pat, start_index);
2506 }
2507 UNREACHABLE();
2508 return -1;
2509 }
2510
2511
2452 // Perform string match of pattern on subject, starting at start index. 2512 // Perform string match of pattern on subject, starting at start index.
2453 // Caller must ensure that 0 <= start_index <= sub->length(), 2513 // Caller must ensure that 0 <= start_index <= sub->length(),
2454 // and should check that pat->length() + start_index <= sub->length() 2514 // and should check that pat->length() + start_index <= sub->length()
2455 int Runtime::StringMatch(Handle<String> sub, 2515 int Runtime::StringMatch(Handle<String> sub,
2456 Handle<String> pat, 2516 Handle<String> pat,
2457 int start_index) { 2517 int start_index) {
2458 ASSERT(0 <= start_index); 2518 ASSERT(0 <= start_index);
2459 ASSERT(start_index <= sub->length()); 2519 ASSERT(start_index <= sub->length());
2460 2520
2461 int pattern_length = pat->length(); 2521 int pattern_length = pat->length();
2462 if (pattern_length == 0) return start_index; 2522 if (pattern_length == 0) return start_index;
2463 2523
2464 int subject_length = sub->length(); 2524 int subject_length = sub->length();
2465 if (start_index + pattern_length > subject_length) return -1; 2525 if (start_index + pattern_length > subject_length) return -1;
2466 2526
2467 if (!sub->IsFlat()) { 2527 if (!sub->IsFlat()) {
2468 FlattenString(sub); 2528 FlattenString(sub);
2469 } 2529 }
2530
2470 // Searching for one specific character is common. For one 2531 // Searching for one specific character is common. For one
2471 // character patterns linear search is necessary, so any smart 2532 // character patterns linear search is necessary, so any smart
2472 // algorithm is unnecessary overhead. 2533 // algorithm is unnecessary overhead.
2473 if (pattern_length == 1) { 2534 if (pattern_length == 1) {
2474 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid 2535 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
2475 if (sub->IsAsciiRepresentation()) { 2536 if (sub->IsAsciiRepresentation()) {
2476 uc16 pchar = pat->Get(0); 2537 uc16 pchar = pat->Get(0);
2477 if (pchar > String::kMaxAsciiCharCode) { 2538 if (pchar > String::kMaxAsciiCharCode) {
2478 return -1; 2539 return -1;
2479 } 2540 }
(...skipping 13 matching lines...) Expand all
2493 2554
2494 if (!pat->IsFlat()) { 2555 if (!pat->IsFlat()) {
2495 FlattenString(pat); 2556 FlattenString(pat);
2496 } 2557 }
2497 2558
2498 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid 2559 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
2499 // dispatch on type of strings 2560 // dispatch on type of strings
2500 if (pat->IsAsciiRepresentation()) { 2561 if (pat->IsAsciiRepresentation()) {
2501 Vector<const char> pat_vector = pat->ToAsciiVector(); 2562 Vector<const char> pat_vector = pat->ToAsciiVector();
2502 if (sub->IsAsciiRepresentation()) { 2563 if (sub->IsAsciiRepresentation()) {
2503 return StringMatchStrategy(sub->ToAsciiVector(), pat_vector, start_index); 2564 return StringSearch(sub->ToAsciiVector(), pat_vector, start_index);
2504 } 2565 }
2505 return StringMatchStrategy(sub->ToUC16Vector(), pat_vector, start_index); 2566 return StringSearch(sub->ToUC16Vector(), pat_vector, start_index);
2506 } 2567 }
2507 Vector<const uc16> pat_vector = pat->ToUC16Vector(); 2568 Vector<const uc16> pat_vector = pat->ToUC16Vector();
2508 if (sub->IsAsciiRepresentation()) { 2569 if (sub->IsAsciiRepresentation()) {
2509 return StringMatchStrategy(sub->ToAsciiVector(), pat_vector, start_index); 2570 return StringSearch(sub->ToAsciiVector(), pat_vector, start_index);
2510 } 2571 }
2511 return StringMatchStrategy(sub->ToUC16Vector(), pat_vector, start_index); 2572 return StringSearch(sub->ToUC16Vector(), pat_vector, start_index);
2512 } 2573 }
2513 2574
2514 2575
2515 static Object* Runtime_StringIndexOf(Arguments args) { 2576 static Object* Runtime_StringIndexOf(Arguments args) {
2516 HandleScope scope; // create a new handle scope 2577 HandleScope scope; // create a new handle scope
2517 ASSERT(args.length() == 3); 2578 ASSERT(args.length() == 3);
2518 2579
2519 CONVERT_ARG_CHECKED(String, sub, 0); 2580 CONVERT_ARG_CHECKED(String, sub, 0);
2520 CONVERT_ARG_CHECKED(String, pat, 1); 2581 CONVERT_ARG_CHECKED(String, pat, 1);
2521 2582
(...skipping 1734 matching lines...) Expand 10 before | Expand all | Expand 10 after
4256 int right = length; 4317 int right = length;
4257 if (trimRight) { 4318 if (trimRight) {
4258 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) { 4319 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) {
4259 right--; 4320 right--;
4260 } 4321 }
4261 } 4322 }
4262 return s->SubString(left, right); 4323 return s->SubString(left, right);
4263 } 4324 }
4264 4325
4265 4326
4327 template <typename schar, typename pchar>
4328 void FindStringIndices(Vector<const schar> subject,
4329 Vector<const pchar> pattern,
4330 ZoneList<int>* indices,
4331 unsigned int limit) {
4332 ASSERT(limit > 0);
4333 // Collect indices of pattern in subject, and the end-of-string index.
4334 // Stop after finding at most limit values.
4335 StringSearchStrategy strategy =
4336 InitializeStringSearch(pattern, sizeof(schar) == 1);
4337 switch (strategy) {
4338 case SEARCH_FAIL: return;
4339 case SEARCH_SHORT: {
4340 int pattern_length = pattern.length();
4341 int index = 0;
4342 while (limit > 0) {
4343 index = SimpleIndexOf(subject, pattern, index);
4344 if (index < 0) return;
4345 indices->Add(index);
4346 index += pattern_length;
4347 limit--;
4348 }
4349 return;
4350 }
4351 case SEARCH_LONG: {
4352 int pattern_length = pattern.length();
4353 int index = 0;
4354 while (limit > 0) {
4355 index = ComplexIndexOf(subject, pattern, index);
4356 if (index < 0) return;
4357 indices->Add(index);
4358 index += pattern_length;
4359 limit--;
4360 }
4361 return;
4362 }
4363 default:
4364 UNREACHABLE();
4365 return;
4366 }
4367 }
4368
4369 template <typename schar>
4370 void inline FindCharIndices(Vector<const schar> subject,
4371 const schar pattern_char,
4372 ZoneList<int>* indices,
4373 unsigned int limit) {
4374 // Collect indices of pattern_char in subject, and the end-of-string index.
4375 // Stop after finding at most limit values.
4376 int index = 0;
4377 while (limit > 0) {
4378 index = SingleCharIndexOf(subject, pattern_char, index);
4379 if (index < 0) return;
4380 indices->Add(index);
4381 index++;
4382 limit--;
4383 }
4384 }
4385
4386
4387 static Object* Runtime_StringSplit(Arguments args) {
4388 ASSERT(args.length() == 3);
4389 HandleScope handle_scope;
4390 CONVERT_ARG_CHECKED(String, subject, 0);
4391 CONVERT_ARG_CHECKED(String, pattern, 1);
4392 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
4393
4394 int subject_length = subject->length();
4395 int pattern_length = pattern->length();
4396
4397 if (!subject->IsFlat()) FlattenString(subject);
4398
4399 static const int kMaxInitialListCapacity = 16;
4400
4401 ZoneScope scope(DELETE_ON_EXIT);
4402
4403 // Find (up to limit) indices of separator and end-of-string in subject
4404 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
4405 ZoneList<int> indices(initial_capacity);
4406 if (pattern_length == 1) {
4407 // Special case, go directly to fast single-character split.
4408 AssertNoAllocation nogc;
4409 uc16 pattern_char = pattern->Get(0);
4410 if (subject->IsTwoByteRepresentation()) {
4411 FindCharIndices(subject->ToUC16Vector(), pattern_char,
4412 &indices,
4413 limit);
4414 } else if (pattern_char <= String::kMaxAsciiCharCode) {
4415 FindCharIndices(subject->ToAsciiVector(),
4416 static_cast<char>(pattern_char),
4417 &indices,
4418 limit);
4419 }
4420 } else {
4421 if (!pattern->IsFlat()) FlattenString(pattern);
4422 AssertNoAllocation nogc;
4423 if (subject->IsAsciiRepresentation()) {
4424 Vector<const char> subject_vector = subject->ToAsciiVector();
4425 if (pattern->IsAsciiRepresentation()) {
4426 FindStringIndices(subject_vector,
4427 pattern->ToAsciiVector(),
4428 &indices,
4429 limit);
4430 } else {
4431 FindStringIndices(subject_vector,
4432 pattern->ToUC16Vector(),
4433 &indices,
4434 limit);
4435 }
4436 } else {
4437 Vector<const uc16> subject_vector = subject->ToUC16Vector();
4438 if (pattern->IsAsciiRepresentation()) {
4439 FindStringIndices(subject_vector,
4440 pattern->ToAsciiVector(),
4441 &indices,
4442 limit);
4443 } else {
4444 FindStringIndices(subject_vector,
4445 pattern->ToUC16Vector(),
4446 &indices,
4447 limit);
4448 }
4449 }
4450 }
4451 if (static_cast<uint32_t>(indices.length()) < limit) {
4452 indices.Add(subject_length);
4453 }
4454 // The list indices now contains the end of each part to create.
4455
4456
4457 // Create JSArray of substrings separated by separator.
4458 int part_count = indices.length();
4459
4460 Handle<JSArray> result = Factory::NewJSArray(part_count);
4461 result->set_length(Smi::FromInt(part_count));
4462
4463 ASSERT(result->HasFastElements());
4464
4465 if (part_count == 1 && indices.at(0) == subject_length) {
4466 FixedArray::cast(result->elements())->set(0, *subject);
4467 return *result;
4468 }
4469
4470 Handle<FixedArray> elements(FixedArray::cast(result->elements()));
4471 int part_start = 0;
4472 for (int i = 0; i < part_count; i++) {
4473 HandleScope local_loop_handle;
4474 int part_end = indices.at(i);
4475 Handle<String> substring =
4476 Factory::NewSubString(subject, part_start, part_end);
4477 elements->set(i, *substring);
4478 part_start = part_end + pattern_length;
4479 }
4480
4481 return *result;
4482 }
4483
4484
4266 // Copies ascii characters to the given fixed array looking up 4485 // Copies ascii characters to the given fixed array looking up
4267 // one-char strings in the cache. Gives up on the first char that is 4486 // one-char strings in the cache. Gives up on the first char that is
4268 // not in the cache and fills the remainder with smi zeros. Returns 4487 // not in the cache and fills the remainder with smi zeros. Returns
4269 // the length of the successfully copied prefix. 4488 // the length of the successfully copied prefix.
4270 static int CopyCachedAsciiCharsToArray(const char* chars, 4489 static int CopyCachedAsciiCharsToArray(const char* chars,
4271 FixedArray* elements, 4490 FixedArray* elements,
4272 int length) { 4491 int length) {
4273 AssertNoAllocation nogc; 4492 AssertNoAllocation nogc;
4274 FixedArray* ascii_cache = Heap::single_character_string_cache(); 4493 FixedArray* ascii_cache = Heap::single_character_string_cache();
4275 Object* undefined = Heap::undefined_value(); 4494 Object* undefined = Heap::undefined_value();
(...skipping 4521 matching lines...) Expand 10 before | Expand all | Expand 10 after
8797 } else { 9016 } else {
8798 // Handle last resort GC and make sure to allow future allocations 9017 // Handle last resort GC and make sure to allow future allocations
8799 // to grow the heap without causing GCs (if possible). 9018 // to grow the heap without causing GCs (if possible).
8800 Counters::gc_last_resort_from_js.Increment(); 9019 Counters::gc_last_resort_from_js.Increment();
8801 Heap::CollectAllGarbage(false); 9020 Heap::CollectAllGarbage(false);
8802 } 9021 }
8803 } 9022 }
8804 9023
8805 9024
8806 } } // namespace v8::internal 9025 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/runtime.h ('k') | src/string.js » ('j') | src/string.js » ('J')

Powered by Google App Engine
This is Rietveld 408576698