src/runtime.cc - Issue 875001: Converted String.prototype.split with string to C++.

Side by Side Diff: src/runtime.cc

Issue 875001: Converted String.prototype.split with string to C++. (Closed)

Patch Set: Rewrote conditions to switch with fallthrough. Created 10 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved.	1 // Copyright 2006-2009 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 2118 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2129 int good_suffix_shift_[kBMMaxShift + 1];	2129 int good_suffix_shift_[kBMMaxShift + 1];

2130 int* biased_suffixes_;	2130 int* biased_suffixes_;

2131 int* biased_good_suffix_shift_;	2131 int* biased_good_suffix_shift_;

2132 DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers);	2132 DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers);

2133 };	2133 };

2134	2134

2135 // buffers reused by BoyerMoore	2135 // buffers reused by BoyerMoore

2136 static int bad_char_occurrence[kBMAlphabetSize];	2136 static int bad_char_occurrence[kBMAlphabetSize];

2137 static BMGoodSuffixBuffers bmgs_buffers;	2137 static BMGoodSuffixBuffers bmgs_buffers;

2138	2138

	2139 // State of the string match tables.

	2140 // SIMPLE: No usable content in the buffers.

	2141 // BOYER_MOORE_HORSPOOL: The bad_char_occurences table has been populated.

	2142 // BOYER_MOORE: The bmgs_buffers tables have also been populated.

	2143 // Whenever starting with a new needle, one should call InitializeStringSearch

	2144 // to determine which search strategy to use, and in the case of a long-needle

	2145 // strategy, the call also initializes the algorithm to SIMPLE.

	2146 enum StringSearchAlgorithm { SIMPLE_SEARCH, BOYER_MOORE_HORSPOOL, BOYER_MOORE };

	2147 static StringSearchAlgorithm algorithm;

	2148

	2149

2139 // Compute the bad-char table for Boyer-Moore in the static buffer.	2150 // Compute the bad-char table for Boyer-Moore in the static buffer.

2140 template <typename pchar>	2151 template <typename pchar>

2141 static void BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern,	2152 static void BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern) {

2142 int start) {	2153 // Only preprocess at most kBMMaxShift last characters of pattern.

	2154 int start = pattern.length() < kBMMaxShift ? 0

	2155 : pattern.length() - kBMMaxShift;

2143 // Run forwards to populate bad_char_table, so that last instance	2156 // Run forwards to populate bad_char_table, so that last instance

2144 // of character equivalence class is the one registered.	2157 // of character equivalence class is the one registered.

2145 // Notice: Doesn't include the last character.	2158 // Notice: Doesn't include the last character.

2146 int table_size = (sizeof(pchar) == 1) ? String::kMaxAsciiCharCode + 1	2159 int table_size = (sizeof(pchar) == 1) ? String::kMaxAsciiCharCode + 1

2147 : kBMAlphabetSize;	2160 : kBMAlphabetSize;

2148 if (start == 0) { // All patterns less than kBMMaxShift in length.	2161 if (start == 0) { // All patterns less than kBMMaxShift in length.

2149 memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence));	2162 memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence));

2150 } else {	2163 } else {

2151 for (int i = 0; i < table_size; i++) {	2164 for (int i = 0; i < table_size; i++) {

2152 bad_char_occurrence[i] = start - 1;	2165 bad_char_occurrence[i] = start - 1;

2153 }	2166 }

2154 }	2167 }

2155 for (int i = start; i < pattern.length() - 1; i++) {	2168 for (int i = start; i < pattern.length() - 1; i++) {

2156 pchar c = pattern[i];	2169 pchar c = pattern[i];

2157 int bucket = (sizeof(pchar) ==1) ? c : c % kBMAlphabetSize;	2170 int bucket = (sizeof(pchar) ==1) ? c : c % kBMAlphabetSize;

2158 bad_char_occurrence[bucket] = i;	2171 bad_char_occurrence[bucket] = i;

2159 }	2172 }

2160 }	2173 }

2161	2174

	2175

2162 template <typename pchar>	2176 template <typename pchar>

2163 static void BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern,	2177 static void BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern) {

2164 int start) {

2165 int m = pattern.length();	2178 int m = pattern.length();

	2179 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;

2166 int len = m - start;	2180 int len = m - start;

2167 // Compute Good Suffix tables.	2181 // Compute Good Suffix tables.

2168 bmgs_buffers.init(m);	2182 bmgs_buffers.init(m);

2169	2183

2170 bmgs_buffers.shift(m-1) = 1;	2184 bmgs_buffers.shift(m-1) = 1;

2171 bmgs_buffers.suffix(m) = m + 1;	2185 bmgs_buffers.suffix(m) = m + 1;

2172 pchar last_char = pattern[m - 1];	2186 pchar last_char = pattern[m - 1];

2173 int suffix = m + 1;	2187 int suffix = m + 1;

2174 for (int i = m; i > start;) {	2188 for (int i = m; i > start;) {

2175 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) {	2189 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) {

(...skipping 26 matching lines...) Expand all Loading...
2202 if (bmgs_buffers.shift(i) == len) {	2216 if (bmgs_buffers.shift(i) == len) {

2203 bmgs_buffers.shift(i) = suffix - start;	2217 bmgs_buffers.shift(i) = suffix - start;

2204 }	2218 }

2205 if (i == suffix) {	2219 if (i == suffix) {

2206 suffix = bmgs_buffers.suffix(suffix);	2220 suffix = bmgs_buffers.suffix(suffix);

2207 }	2221 }

2208 }	2222 }

2209 }	2223 }

2210 }	2224 }

2211	2225

	2226

2212 template <typename schar, typename pchar>	2227 template <typename schar, typename pchar>

2213 static inline int CharOccurrence(int char_code) {	2228 static inline int CharOccurrence(int char_code) {

2214 if (sizeof(schar) == 1) {	2229 if (sizeof(schar) == 1) {

2215 return bad_char_occurrence[char_code];	2230 return bad_char_occurrence[char_code];

2216 }	2231 }

2217 if (sizeof(pchar) == 1) {	2232 if (sizeof(pchar) == 1) {

2218 if (char_code > String::kMaxAsciiCharCode) {	2233 if (char_code > String::kMaxAsciiCharCode) {

2219 return -1;	2234 return -1;

2220 }	2235 }

2221 return bad_char_occurrence[char_code];	2236 return bad_char_occurrence[char_code];

2222 }	2237 }

2223 return bad_char_occurrence[char_code % kBMAlphabetSize];	2238 return bad_char_occurrence[char_code % kBMAlphabetSize];

2224 }	2239 }

2225	2240

	2241

2226 // Restricted simplified Boyer-Moore string matching.	2242 // Restricted simplified Boyer-Moore string matching.

2227 // Uses only the bad-shift table of Boyer-Moore and only uses it	2243 // Uses only the bad-shift table of Boyer-Moore and only uses it

2228 // for the character compared to the last character of the needle.	2244 // for the character compared to the last character of the needle.

2229 template <typename schar, typename pchar>	2245 template <typename schar, typename pchar>

2230 static int BoyerMooreHorspool(Vector<const schar> subject,	2246 static int BoyerMooreHorspool(Vector<const schar> subject,

2231 Vector<const pchar> pattern,	2247 Vector<const pchar> pattern,

2232 int start_index,	2248 int start_index,

2233 bool* complete) {	2249 bool* complete) {

	2250 ASSERT(algorithm <= BOYER_MOORE_HORSPOOL);

2234 int n = subject.length();	2251 int n = subject.length();

2235 int m = pattern.length();	2252 int m = pattern.length();

2236 // Only preprocess at most kBMMaxShift last characters of pattern.

2237 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;

2238	2253

2239 BoyerMoorePopulateBadCharTable(pattern, start);	2254 int badness = -m;

2240	2255

2241 int badness = -m; // How bad we are doing without a good-suffix table.	2256 // How bad we are doing without a good-suffix table.

2242 int idx; // No matches found prior to this index.	2257 int idx; // No matches found prior to this index.

2243 pchar last_char = pattern[m - 1];	2258 pchar last_char = pattern[m - 1];

2244 int last_char_shift = m - 1 - CharOccurrence<schar, pchar>(last_char);	2259 int last_char_shift = m - 1 - CharOccurrence<schar, pchar>(last_char);

2245 // Perform search	2260 // Perform search

2246 for (idx = start_index; idx <= n - m;) {	2261 for (idx = start_index; idx <= n - m;) {

2247 int j = m - 1;	2262 int j = m - 1;

2248 int c;	2263 int c;

2249 while (last_char != (c = subject[idx + j])) {	2264 while (last_char != (c = subject[idx + j])) {

2250 int bc_occ = CharOccurrence<schar, pchar>(c);	2265 int bc_occ = CharOccurrence<schar, pchar>(c);

2251 int shift = j - bc_occ;	2266 int shift = j - bc_occ;

2252 idx += shift;	2267 idx += shift;

2253 badness += 1 - shift; // at most zero, so badness cannot increase.	2268 badness += 1 - shift; // at most zero, so badness cannot increase.

2254 if (idx > n - m) {	2269 if (idx > n - m) {

	2270 // If we could just break out of the for loop here.
	Erik Corry 2010/03/15 12:57:48 What would then happen? What would then happen?
2255 *complete = true;	2271 *complete = true;

2256 return -1;	2272 return -1;

2257 }	2273 }

2258 }	2274 }

2259 j--;	2275 j--;

2260 while (j >= 0 && pattern[j] == (subject[idx + j])) j--;	2276 while (j >= 0 && pattern[j] == (subject[idx + j])) j--;

2261 if (j < 0) {	2277 if (j < 0) {

2262 *complete = true;	2278 *complete = true;

2263 return idx;	2279 return idx;

2264 } else {	2280 } else {

(...skipping 11 matching lines...) Expand all Loading...
2276 }	2292 }

2277 *complete = true;	2293 *complete = true;

2278 return -1;	2294 return -1;

2279 }	2295 }

2280	2296

2281	2297

2282 template <typename schar, typename pchar>	2298 template <typename schar, typename pchar>

2283 static int BoyerMooreIndexOf(Vector<const schar> subject,	2299 static int BoyerMooreIndexOf(Vector<const schar> subject,

2284 Vector<const pchar> pattern,	2300 Vector<const pchar> pattern,

2285 int idx) {	2301 int idx) {

	2302 ASSERT(algorithm <= BOYER_MOORE);

2286 int n = subject.length();	2303 int n = subject.length();

2287 int m = pattern.length();	2304 int m = pattern.length();

2288 // Only preprocess at most kBMMaxShift last characters of pattern.	2305 // Only preprocess at most kBMMaxShift last characters of pattern.

2289 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;	2306 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;

2290	2307

2291 // Build the Good Suffix table and continue searching.

2292 BoyerMoorePopulateGoodSuffixTable(pattern, start);

2293 pchar last_char = pattern[m - 1];	2308 pchar last_char = pattern[m - 1];

2294 // Continue search from i.	2309 // Continue search from i.

2295 while (idx <= n - m) {	2310 while (idx <= n - m) {

2296 int j = m - 1;	2311 int j = m - 1;

2297 schar c;	2312 schar c;

2298 while (last_char != (c = subject[idx + j])) {	2313 while (last_char != (c = subject[idx + j])) {

2299 int shift = j - CharOccurrence<schar, pchar>(c);	2314 int shift = j - CharOccurrence<schar, pchar>(c);

2300 idx += shift;	2315 idx += shift;

2301 if (idx > n - m) {	2316 if (idx > n - m) {

2302 return -1;	2317 return -1;

(...skipping 15 matching lines...) Expand all Loading...
2318 }	2333 }

2319 idx += shift;	2334 idx += shift;

2320 }	2335 }

2321 }	2336 }

2322	2337

2323 return -1;	2338 return -1;

2324 }	2339 }

2325	2340

2326	2341

2327 template <typename schar>	2342 template <typename schar>

2328 static int SingleCharIndexOf(Vector<const schar> string,	2343 static inline int SingleCharIndexOf(Vector<const schar> string,

2329 schar pattern_char,	2344 schar pattern_char,

2330 int start_index) {	2345 int start_index) {

2331 for (int i = start_index, n = string.length(); i < n; i++) {	2346 for (int i = start_index, n = string.length(); i < n; i++) {

2332 if (pattern_char == string[i]) {	2347 if (pattern_char == string[i]) {

2333 return i;	2348 return i;

2334 }	2349 }

2335 }	2350 }

2336 return -1;	2351 return -1;

2337 }	2352 }

2338	2353

2339	2354

2340 template <typename schar>	2355 template <typename schar>

(...skipping 16 matching lines...) Expand all Loading...
2357 // further checking should start, i.e., it's guaranteed that the pattern	2372 // further checking should start, i.e., it's guaranteed that the pattern

2358 // does not occur at a position prior to the returned index.	2373 // does not occur at a position prior to the returned index.

2359 template <typename pchar, typename schar>	2374 template <typename pchar, typename schar>

2360 static int SimpleIndexOf(Vector<const schar> subject,	2375 static int SimpleIndexOf(Vector<const schar> subject,

2361 Vector<const pchar> pattern,	2376 Vector<const pchar> pattern,

2362 int idx,	2377 int idx,

2363 bool* complete) {	2378 bool* complete) {

2364 // Badness is a count of how much work we have done. When we have	2379 // Badness is a count of how much work we have done. When we have

2365 // done enough work we decide it's probably worth switching to a better	2380 // done enough work we decide it's probably worth switching to a better

2366 // algorithm.	2381 // algorithm.

2367 int badness = -10 - (pattern.length() << 2);	2382

2368 // We know our pattern is at least 2 characters, we cache the first so	2383 // We know our pattern is at least 2 characters, we cache the first so

2369 // the common case of the first character not matching is faster.	2384 // the common case of the first character not matching is faster.

2370 pchar pattern_first_char = pattern[0];	2385 pchar pattern_first_char = pattern[0];

2371	2386 int badness = -10 - pattern.length() << 2;

2372 for (int i = idx, n = subject.length() - pattern.length(); i <= n; i++) {	2387 for (int i = idx, n = subject.length() - pattern.length(); i <= n; i++) {

2373 badness++;	2388 badness++;

2374 if (badness > 0) {	2389 if (badness > 0) {

2375 *complete = false;	2390 *complete = false;

2376 return i;	2391 return i;

2377 }	2392 }

2378 if (subject[i] != pattern_first_char) continue;	2393 if (subject[i] != pattern_first_char) continue;

2379 int j = 1;	2394 int j = 1;

2380 do {	2395 do {

2381 if (pattern[j] != subject[i+j]) {	2396 if (pattern[j] != subject[i+j]) {

(...skipping 27 matching lines...) Expand all Loading...
2409 j++;	2424 j++;

2410 } while (j < pattern.length());	2425 } while (j < pattern.length());

2411 if (j == pattern.length()) {	2426 if (j == pattern.length()) {

2412 return i;	2427 return i;

2413 }	2428 }

2414 }	2429 }

2415 return -1;	2430 return -1;

2416 }	2431 }

2417	2432

2418	2433

2419 // Dispatch to different algorithms.	2434 // Strategy for searching for a string in another string.

2420 template <typename schar, typename pchar>	2435 enum StringSearchStrategy { SEARCH_FAIL, SEARCH_SHORT, SEARCH_LONG };

2421 static int StringMatchStrategy(Vector<const schar> sub,	2436

2422 Vector<const pchar> pat,	2437

2423 int start_index) {	2438 template <typename pchar>

	2439 static inline StringSearchStrategy InitializeStringSearch(

	2440 Vector<const pchar> pat, bool ascii_subject) {

2424 ASSERT(pat.length() > 1);	2441 ASSERT(pat.length() > 1);

2425

2426 // We have an ASCII haystack and a non-ASCII needle. Check if there	2442 // We have an ASCII haystack and a non-ASCII needle. Check if there

2427 // really is a non-ASCII character in the needle and bail out if there	2443 // really is a non-ASCII character in the needle and bail out if there

2428 // is.	2444 // is.

2429 if (sizeof(schar) == 1 && sizeof(pchar) > 1) {	2445 if (ascii_subject && sizeof(pchar) > 1) {

2430 for (int i = 0; i < pat.length(); i++) {	2446 for (int i = 0; i < pat.length(); i++) {

2431 uc16 c = pat[i];	2447 uc16 c = pat[i];

2432 if (c > String::kMaxAsciiCharCode) {	2448 if (c > String::kMaxAsciiCharCode) {

2433 return -1;	2449 return SEARCH_FAIL;

2434 }	2450 }

2435 }	2451 }

2436 }	2452 }

2437 if (pat.length() < kBMMinPatternLength) {	2453 if (pat.length() < kBMMinPatternLength) {

2438 // We don't believe fancy searching can ever be more efficient.	2454 return SEARCH_SHORT;

2439 // The max shift of Boyer-Moore on a pattern of this length does

2440 // not compensate for the overhead.

2441 return SimpleIndexOf(sub, pat, start_index);

2442 }	2455 }

	2456 algorithm = SIMPLE_SEARCH;

	2457 return SEARCH_LONG;

	2458 }

	2459

	2460

	2461 // Dispatch long needle searches to different algorithms.

	2462 template <typename schar, typename pchar>

	2463 static int ComplexIndexOf(Vector<const schar> sub,

	2464 Vector<const pchar> pat,

	2465 int start_index) {

	2466 ASSERT(pat.length() >= kBMMinPatternLength);

2443 // Try algorithms in order of increasing setup cost and expected performance.	2467 // Try algorithms in order of increasing setup cost and expected performance.

2444 bool complete;	2468 bool complete;

2445 int idx = SimpleIndexOf(sub, pat, start_index, &complete);	2469 int idx = start_index;

2446 if (complete) return idx;	2470 switch (algorithm) {

2447 idx = BoyerMooreHorspool(sub, pat, idx, &complete);	2471 case SIMPLE_SEARCH:

2448 if (complete) return idx;	2472 idx = SimpleIndexOf(sub, pat, idx, &complete);

2449 return BoyerMooreIndexOf(sub, pat, idx);	2473 if (complete) return idx;

	2474 BoyerMoorePopulateBadCharTable(pat);

	2475 algorithm = BOYER_MOORE_HORSPOOL;

	2476 // FALLTHROUGH.
	Erik Corry 2010/03/15 12:57:48 Does the linter insist on this annoying capitaliza Does the linter insist on this annoying capitalization?
	2477 case BOYER_MOORE_HORSPOOL:

	2478 idx = BoyerMooreHorspool(sub, pat, idx, &complete);

	2479 if (complete) return idx;

	2480 // Build the Good Suffix table and continue searching.

	2481 BoyerMoorePopulateGoodSuffixTable(pat);

	2482 algorithm = BOYER_MOORE;

	2483 // FALLTHROUGH.

	2484 case BOYER_MOORE:

	2485 return BoyerMooreIndexOf(sub, pat, idx);

	2486 }

	2487 UNREACHABLE();

	2488 return -1;

2450 }	2489 }

2451	2490

	2491

	2492 // Dispatch to different search strategies for a single search.

	2493 // If searching multiple times on the same needle, the search

	2494 // strategy should only be computed once and then dispatch to different

	2495 // loops.

	2496 template <typename schar, typename pchar>

	2497 static int StringSearch(Vector<const schar> sub,

	2498 Vector<const pchar> pat,

	2499 int start_index) {

	2500 bool ascii_subject = (sizeof(schar) == 1);

	2501 StringSearchStrategy strategy = InitializeStringSearch(pat, ascii_subject);

	2502 switch (strategy) {

	2503 case SEARCH_FAIL: return -1;

	2504 case SEARCH_SHORT: return SimpleIndexOf(sub, pat, start_index);

	2505 case SEARCH_LONG: return ComplexIndexOf(sub, pat, start_index);

	2506 }

	2507 UNREACHABLE();

	2508 return -1;

	2509 }

	2510

	2511

2452 // Perform string match of pattern on subject, starting at start index.	2512 // Perform string match of pattern on subject, starting at start index.

2453 // Caller must ensure that 0 <= start_index <= sub->length(),	2513 // Caller must ensure that 0 <= start_index <= sub->length(),

2454 // and should check that pat->length() + start_index <= sub->length()	2514 // and should check that pat->length() + start_index <= sub->length()

2455 int Runtime::StringMatch(Handle<String> sub,	2515 int Runtime::StringMatch(Handle<String> sub,

2456 Handle<String> pat,	2516 Handle<String> pat,

2457 int start_index) {	2517 int start_index) {

2458 ASSERT(0 <= start_index);	2518 ASSERT(0 <= start_index);

2459 ASSERT(start_index <= sub->length());	2519 ASSERT(start_index <= sub->length());

2460	2520

2461 int pattern_length = pat->length();	2521 int pattern_length = pat->length();

2462 if (pattern_length == 0) return start_index;	2522 if (pattern_length == 0) return start_index;

2463	2523

2464 int subject_length = sub->length();	2524 int subject_length = sub->length();

2465 if (start_index + pattern_length > subject_length) return -1;	2525 if (start_index + pattern_length > subject_length) return -1;

2466	2526

2467 if (!sub->IsFlat()) {	2527 if (!sub->IsFlat()) {

2468 FlattenString(sub);	2528 FlattenString(sub);

2469 }	2529 }

	2530

2470 // Searching for one specific character is common. For one	2531 // Searching for one specific character is common. For one

2471 // character patterns linear search is necessary, so any smart	2532 // character patterns linear search is necessary, so any smart

2472 // algorithm is unnecessary overhead.	2533 // algorithm is unnecessary overhead.

2473 if (pattern_length == 1) {	2534 if (pattern_length == 1) {

2474 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid	2535 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid

2475 if (sub->IsAsciiRepresentation()) {	2536 if (sub->IsAsciiRepresentation()) {

2476 uc16 pchar = pat->Get(0);	2537 uc16 pchar = pat->Get(0);

2477 if (pchar > String::kMaxAsciiCharCode) {	2538 if (pchar > String::kMaxAsciiCharCode) {

2478 return -1;	2539 return -1;

2479 }	2540 }

(...skipping 13 matching lines...) Expand all Loading...
2493	2554

2494 if (!pat->IsFlat()) {	2555 if (!pat->IsFlat()) {

2495 FlattenString(pat);	2556 FlattenString(pat);

2496 }	2557 }

2497	2558

2498 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid	2559 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid

2499 // dispatch on type of strings	2560 // dispatch on type of strings

2500 if (pat->IsAsciiRepresentation()) {	2561 if (pat->IsAsciiRepresentation()) {

2501 Vector<const char> pat_vector = pat->ToAsciiVector();	2562 Vector<const char> pat_vector = pat->ToAsciiVector();

2502 if (sub->IsAsciiRepresentation()) {	2563 if (sub->IsAsciiRepresentation()) {

2503 return StringMatchStrategy(sub->ToAsciiVector(), pat_vector, start_index);	2564 return StringSearch(sub->ToAsciiVector(), pat_vector, start_index);

2504 }	2565 }

2505 return StringMatchStrategy(sub->ToUC16Vector(), pat_vector, start_index);	2566 return StringSearch(sub->ToUC16Vector(), pat_vector, start_index);

2506 }	2567 }

2507 Vector<const uc16> pat_vector = pat->ToUC16Vector();	2568 Vector<const uc16> pat_vector = pat->ToUC16Vector();

2508 if (sub->IsAsciiRepresentation()) {	2569 if (sub->IsAsciiRepresentation()) {

2509 return StringMatchStrategy(sub->ToAsciiVector(), pat_vector, start_index);	2570 return StringSearch(sub->ToAsciiVector(), pat_vector, start_index);

2510 }	2571 }

2511 return StringMatchStrategy(sub->ToUC16Vector(), pat_vector, start_index);	2572 return StringSearch(sub->ToUC16Vector(), pat_vector, start_index);

2512 }	2573 }

2513	2574

2514	2575

2515 static Object* Runtime_StringIndexOf(Arguments args) {	2576 static Object* Runtime_StringIndexOf(Arguments args) {

2516 HandleScope scope; // create a new handle scope	2577 HandleScope scope; // create a new handle scope

2517 ASSERT(args.length() == 3);	2578 ASSERT(args.length() == 3);

2518	2579

2519 CONVERT_ARG_CHECKED(String, sub, 0);	2580 CONVERT_ARG_CHECKED(String, sub, 0);

2520 CONVERT_ARG_CHECKED(String, pat, 1);	2581 CONVERT_ARG_CHECKED(String, pat, 1);

2521	2582

(...skipping 1734 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4256 int right = length;	4317 int right = length;

4257 if (trimRight) {	4318 if (trimRight) {

4258 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) {	4319 while (right > left && IsTrimWhiteSpace(s->Get(right - 1))) {

4259 right--;	4320 right--;

4260 }	4321 }

4261 }	4322 }

4262 return s->SubString(left, right);	4323 return s->SubString(left, right);

4263 }	4324 }

4264	4325

4265	4326

	4327 template <typename schar, typename pchar>

	4328 void FindStringIndices(Vector<const schar> subject,

	4329 Vector<const pchar> pattern,

	4330 ZoneList<int>* indices,

	4331 unsigned int limit) {

	4332 ASSERT(limit > 0);

	4333 // Collect indices of pattern in subject, and the end-of-string index.

	4334 // Stop after finding at most limit values.

	4335 StringSearchStrategy strategy =

	4336 InitializeStringSearch(pattern, sizeof(schar) == 1);

	4337 switch (strategy) {

	4338 case SEARCH_FAIL: return;

	4339 case SEARCH_SHORT: {

	4340 int pattern_length = pattern.length();

	4341 int index = 0;

	4342 while (limit > 0) {

	4343 index = SimpleIndexOf(subject, pattern, index);

	4344 if (index < 0) return;

	4345 indices->Add(index);

	4346 index += pattern_length;

	4347 limit--;

	4348 }

	4349 return;

	4350 }

	4351 case SEARCH_LONG: {

	4352 int pattern_length = pattern.length();

	4353 int index = 0;

	4354 while (limit > 0) {

	4355 index = ComplexIndexOf(subject, pattern, index);

	4356 if (index < 0) return;

	4357 indices->Add(index);

	4358 index += pattern_length;

	4359 limit--;

	4360 }

	4361 return;

	4362 }

	4363 default:

	4364 UNREACHABLE();

	4365 return;

	4366 }

	4367 }

	4368

	4369 template <typename schar>

	4370 void inline FindCharIndices(Vector<const schar> subject,

	4371 const schar pattern_char,

	4372 ZoneList<int>* indices,

	4373 unsigned int limit) {

	4374 // Collect indices of pattern_char in subject, and the end-of-string index.

	4375 // Stop after finding at most limit values.

	4376 int index = 0;

	4377 while (limit > 0) {

	4378 index = SingleCharIndexOf(subject, pattern_char, index);

	4379 if (index < 0) return;

	4380 indices->Add(index);

	4381 index++;

	4382 limit--;

	4383 }

	4384 }

	4385

	4386

	4387 static Object* Runtime_StringSplit(Arguments args) {

	4388 ASSERT(args.length() == 3);

	4389 HandleScope handle_scope;

	4390 CONVERT_ARG_CHECKED(String, subject, 0);

	4391 CONVERT_ARG_CHECKED(String, pattern, 1);

	4392 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);

	4393

	4394 int subject_length = subject->length();

	4395 int pattern_length = pattern->length();

	4396

	4397 if (!subject->IsFlat()) FlattenString(subject);

	4398

	4399 static const int kMaxInitialListCapacity = 16;

	4400

	4401 ZoneScope scope(DELETE_ON_EXIT);

	4402

	4403 // Find (up to limit) indices of separator and end-of-string in subject

	4404 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);

	4405 ZoneList<int> indices(initial_capacity);

	4406 if (pattern_length == 1) {

	4407 // Special case, go directly to fast single-character split.

	4408 AssertNoAllocation nogc;

	4409 uc16 pattern_char = pattern->Get(0);

	4410 if (subject->IsTwoByteRepresentation()) {

	4411 FindCharIndices(subject->ToUC16Vector(), pattern_char,

	4412 &indices,

	4413 limit);

	4414 } else if (pattern_char <= String::kMaxAsciiCharCode) {

	4415 FindCharIndices(subject->ToAsciiVector(),

	4416 static_cast<char>(pattern_char),

	4417 &indices,

	4418 limit);

	4419 }

	4420 } else {

	4421 if (!pattern->IsFlat()) FlattenString(pattern);

	4422 AssertNoAllocation nogc;

	4423 if (subject->IsAsciiRepresentation()) {

	4424 Vector<const char> subject_vector = subject->ToAsciiVector();

	4425 if (pattern->IsAsciiRepresentation()) {

	4426 FindStringIndices(subject_vector,

	4427 pattern->ToAsciiVector(),

	4428 &indices,

	4429 limit);

	4430 } else {

	4431 FindStringIndices(subject_vector,

	4432 pattern->ToUC16Vector(),

	4433 &indices,

	4434 limit);

	4435 }

	4436 } else {

	4437 Vector<const uc16> subject_vector = subject->ToUC16Vector();

	4438 if (pattern->IsAsciiRepresentation()) {

	4439 FindStringIndices(subject_vector,

	4440 pattern->ToAsciiVector(),

	4441 &indices,

	4442 limit);

	4443 } else {

	4444 FindStringIndices(subject_vector,

	4445 pattern->ToUC16Vector(),

	4446 &indices,

	4447 limit);

	4448 }

	4449 }

	4450 }

	4451 if (static_cast<uint32_t>(indices.length()) < limit) {

	4452 indices.Add(subject_length);

	4453 }

	4454 // The list indices now contains the end of each part to create.

	4455

	4456

	4457 // Create JSArray of substrings separated by separator.

	4458 int part_count = indices.length();

	4459

	4460 Handle<JSArray> result = Factory::NewJSArray(part_count);

	4461 result->set_length(Smi::FromInt(part_count));

	4462

	4463 ASSERT(result->HasFastElements());

	4464

	4465 if (part_count == 1 && indices.at(0) == subject_length) {

	4466 FixedArray::cast(result->elements())->set(0, *subject);

	4467 return *result;

	4468 }

	4469

	4470 Handle<FixedArray> elements(FixedArray::cast(result->elements()));

	4471 int part_start = 0;

	4472 for (int i = 0; i < part_count; i++) {

	4473 HandleScope local_loop_handle;

	4474 int part_end = indices.at(i);

	4475 Handle<String> substring =

	4476 Factory::NewSubString(subject, part_start, part_end);

	4477 elements->set(i, *substring);

	4478 part_start = part_end + pattern_length;

	4479 }

	4480

	4481 return *result;

	4482 }

	4483

	4484

4266 // Copies ascii characters to the given fixed array looking up	4485 // Copies ascii characters to the given fixed array looking up

4267 // one-char strings in the cache. Gives up on the first char that is	4486 // one-char strings in the cache. Gives up on the first char that is

4268 // not in the cache and fills the remainder with smi zeros. Returns	4487 // not in the cache and fills the remainder with smi zeros. Returns

4269 // the length of the successfully copied prefix.	4488 // the length of the successfully copied prefix.

4270 static int CopyCachedAsciiCharsToArray(const char* chars,	4489 static int CopyCachedAsciiCharsToArray(const char* chars,

4271 FixedArray* elements,	4490 FixedArray* elements,

4272 int length) {	4491 int length) {

4273 AssertNoAllocation nogc;	4492 AssertNoAllocation nogc;

4274 FixedArray* ascii_cache = Heap::single_character_string_cache();	4493 FixedArray* ascii_cache = Heap::single_character_string_cache();

4275 Object* undefined = Heap::undefined_value();	4494 Object* undefined = Heap::undefined_value();

(...skipping 4521 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
8797 } else {	9016 } else {

8798 // Handle last resort GC and make sure to allow future allocations	9017 // Handle last resort GC and make sure to allow future allocations

8799 // to grow the heap without causing GCs (if possible).	9018 // to grow the heap without causing GCs (if possible).

8800 Counters::gc_last_resort_from_js.Increment();	9019 Counters::gc_last_resort_from_js.Increment();

8801 Heap::CollectAllGarbage(false);	9020 Heap::CollectAllGarbage(false);

8802 }	9021 }

8803 }	9022 }

8804	9023

8805	9024

8806 } } // namespace v8::internal	9025 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/runtime.h ('k') | src/string.js » ('j') | src/string.js » ('J')