OLD | NEW |
---|---|
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/runtime/runtime-utils.h" | 5 #include "src/runtime/runtime-utils.h" |
6 | 6 |
7 #include "src/arguments.h" | 7 #include "src/arguments.h" |
8 #include "src/conversions-inl.h" | 8 #include "src/conversions-inl.h" |
9 #include "src/isolate-inl.h" | 9 #include "src/isolate-inl.h" |
10 #include "src/messages.h" | 10 #include "src/messages.h" |
(...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
272 case REPLACEMENT_SUBSTRING: | 272 case REPLACEMENT_SUBSTRING: |
273 case REPLACEMENT_STRING: | 273 case REPLACEMENT_STRING: |
274 builder->AddString(replacement_substrings_[part.data]); | 274 builder->AddString(replacement_substrings_[part.data]); |
275 break; | 275 break; |
276 default: | 276 default: |
277 UNREACHABLE(); | 277 UNREACHABLE(); |
278 } | 278 } |
279 } | 279 } |
280 } | 280 } |
281 | 281 |
282 | |
283 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern, | 282 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern, |
284 ZoneList<int>* indices, unsigned int limit, | 283 List<int>* indices, unsigned int limit) { |
285 Zone* zone) { | |
286 DCHECK(limit > 0); | 284 DCHECK(limit > 0); |
287 // Collect indices of pattern in subject using memchr. | 285 // Collect indices of pattern in subject using memchr. |
288 // Stop after finding at most limit values. | 286 // Stop after finding at most limit values. |
289 const uint8_t* subject_start = subject.start(); | 287 const uint8_t* subject_start = subject.start(); |
290 const uint8_t* subject_end = subject_start + subject.length(); | 288 const uint8_t* subject_end = subject_start + subject.length(); |
291 const uint8_t* pos = subject_start; | 289 const uint8_t* pos = subject_start; |
292 while (limit > 0) { | 290 while (limit > 0) { |
293 pos = reinterpret_cast<const uint8_t*>( | 291 pos = reinterpret_cast<const uint8_t*>( |
294 memchr(pos, pattern, subject_end - pos)); | 292 memchr(pos, pattern, subject_end - pos)); |
295 if (pos == NULL) return; | 293 if (pos == NULL) return; |
296 indices->Add(static_cast<int>(pos - subject_start), zone); | 294 indices->Add(static_cast<int>(pos - subject_start)); |
297 pos++; | 295 pos++; |
298 limit--; | 296 limit--; |
299 } | 297 } |
300 } | 298 } |
301 | 299 |
302 | |
303 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern, | 300 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern, |
304 ZoneList<int>* indices, unsigned int limit, | 301 List<int>* indices, unsigned int limit) { |
305 Zone* zone) { | |
306 DCHECK(limit > 0); | 302 DCHECK(limit > 0); |
307 const uc16* subject_start = subject.start(); | 303 const uc16* subject_start = subject.start(); |
308 const uc16* subject_end = subject_start + subject.length(); | 304 const uc16* subject_end = subject_start + subject.length(); |
309 for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) { | 305 for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) { |
310 if (*pos == pattern) { | 306 if (*pos == pattern) { |
311 indices->Add(static_cast<int>(pos - subject_start), zone); | 307 indices->Add(static_cast<int>(pos - subject_start)); |
312 limit--; | 308 limit--; |
313 } | 309 } |
314 } | 310 } |
315 } | 311 } |
316 | 312 |
317 | |
318 template <typename SubjectChar, typename PatternChar> | 313 template <typename SubjectChar, typename PatternChar> |
319 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject, | 314 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject, |
320 Vector<const PatternChar> pattern, | 315 Vector<const PatternChar> pattern, List<int>* indices, |
321 ZoneList<int>* indices, unsigned int limit, Zone* zone) { | 316 unsigned int limit) { |
322 DCHECK(limit > 0); | 317 DCHECK(limit > 0); |
323 // Collect indices of pattern in subject. | 318 // Collect indices of pattern in subject. |
324 // Stop after finding at most limit values. | 319 // Stop after finding at most limit values. |
325 int pattern_length = pattern.length(); | 320 int pattern_length = pattern.length(); |
326 int index = 0; | 321 int index = 0; |
327 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); | 322 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
328 while (limit > 0) { | 323 while (limit > 0) { |
329 index = search.Search(subject, index); | 324 index = search.Search(subject, index); |
330 if (index < 0) return; | 325 if (index < 0) return; |
331 indices->Add(index, zone); | 326 indices->Add(index); |
332 index += pattern_length; | 327 index += pattern_length; |
333 limit--; | 328 limit--; |
334 } | 329 } |
335 } | 330 } |
336 | 331 |
337 | |
338 void FindStringIndicesDispatch(Isolate* isolate, String* subject, | 332 void FindStringIndicesDispatch(Isolate* isolate, String* subject, |
339 String* pattern, ZoneList<int>* indices, | 333 String* pattern, List<int>* indices, |
340 unsigned int limit, Zone* zone) { | 334 unsigned int limit) { |
341 { | 335 { |
342 DisallowHeapAllocation no_gc; | 336 DisallowHeapAllocation no_gc; |
343 String::FlatContent subject_content = subject->GetFlatContent(); | 337 String::FlatContent subject_content = subject->GetFlatContent(); |
344 String::FlatContent pattern_content = pattern->GetFlatContent(); | 338 String::FlatContent pattern_content = pattern->GetFlatContent(); |
345 DCHECK(subject_content.IsFlat()); | 339 DCHECK(subject_content.IsFlat()); |
346 DCHECK(pattern_content.IsFlat()); | 340 DCHECK(pattern_content.IsFlat()); |
347 if (subject_content.IsOneByte()) { | 341 if (subject_content.IsOneByte()) { |
348 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector(); | 342 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector(); |
349 if (pattern_content.IsOneByte()) { | 343 if (pattern_content.IsOneByte()) { |
350 Vector<const uint8_t> pattern_vector = | 344 Vector<const uint8_t> pattern_vector = |
351 pattern_content.ToOneByteVector(); | 345 pattern_content.ToOneByteVector(); |
352 if (pattern_vector.length() == 1) { | 346 if (pattern_vector.length() == 1) { |
353 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices, | 347 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices, |
354 limit, zone); | 348 limit); |
355 } else { | 349 } else { |
356 FindStringIndices(isolate, subject_vector, pattern_vector, indices, | 350 FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
357 limit, zone); | 351 limit); |
358 } | 352 } |
359 } else { | 353 } else { |
360 FindStringIndices(isolate, subject_vector, | 354 FindStringIndices(isolate, subject_vector, |
361 pattern_content.ToUC16Vector(), indices, limit, zone); | 355 pattern_content.ToUC16Vector(), indices, limit); |
362 } | 356 } |
363 } else { | 357 } else { |
364 Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); | 358 Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); |
365 if (pattern_content.IsOneByte()) { | 359 if (pattern_content.IsOneByte()) { |
366 Vector<const uint8_t> pattern_vector = | 360 Vector<const uint8_t> pattern_vector = |
367 pattern_content.ToOneByteVector(); | 361 pattern_content.ToOneByteVector(); |
368 if (pattern_vector.length() == 1) { | 362 if (pattern_vector.length() == 1) { |
369 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, | 363 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, |
370 limit, zone); | 364 limit); |
371 } else { | 365 } else { |
372 FindStringIndices(isolate, subject_vector, pattern_vector, indices, | 366 FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
373 limit, zone); | 367 limit); |
374 } | 368 } |
375 } else { | 369 } else { |
376 Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector(); | 370 Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector(); |
377 if (pattern_vector.length() == 1) { | 371 if (pattern_vector.length() == 1) { |
378 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, | 372 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices, |
379 limit, zone); | 373 limit); |
380 } else { | 374 } else { |
381 FindStringIndices(isolate, subject_vector, pattern_vector, indices, | 375 FindStringIndices(isolate, subject_vector, pattern_vector, indices, |
382 limit, zone); | 376 limit); |
383 } | 377 } |
384 } | 378 } |
385 } | 379 } |
386 } | 380 } |
387 } | 381 } |
388 | 382 |
389 template <typename ResultSeqString> | 383 template <typename ResultSeqString> |
390 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString( | 384 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString( |
391 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp, | 385 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp, |
392 Handle<String> replacement, Handle<JSObject> last_match_info) { | 386 Handle<String> replacement, Handle<JSObject> last_match_info) { |
393 DCHECK(subject->IsFlat()); | 387 DCHECK(subject->IsFlat()); |
394 DCHECK(replacement->IsFlat()); | 388 DCHECK(replacement->IsFlat()); |
395 | 389 |
396 ZoneScope zone_scope(isolate->runtime_zone()); | 390 List<int>* indices = isolate->regex_list(); |
397 ZoneList<int> indices(8, zone_scope.zone()); | 391 indices->Clear(); |
jgruber
2016/09/30 09:46:24
As discussed offline, this deletes indices.data_ j
| |
392 | |
398 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); | 393 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); |
399 String* pattern = | 394 String* pattern = |
400 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); | 395 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); |
401 int subject_len = subject->length(); | 396 int subject_len = subject->length(); |
402 int pattern_len = pattern->length(); | 397 int pattern_len = pattern->length(); |
403 int replacement_len = replacement->length(); | 398 int replacement_len = replacement->length(); |
404 | 399 |
405 FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff, | 400 FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xffffffff); |
406 zone_scope.zone()); | |
407 | 401 |
408 int matches = indices.length(); | 402 int matches = indices->length(); |
409 if (matches == 0) return *subject; | 403 if (matches == 0) return *subject; |
410 | 404 |
411 // Detect integer overflow. | 405 // Detect integer overflow. |
412 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) - | 406 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) - |
413 static_cast<int64_t>(pattern_len)) * | 407 static_cast<int64_t>(pattern_len)) * |
414 static_cast<int64_t>(matches) + | 408 static_cast<int64_t>(matches) + |
415 static_cast<int64_t>(subject_len); | 409 static_cast<int64_t>(subject_len); |
416 int result_len; | 410 int result_len; |
417 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) { | 411 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) { |
418 STATIC_ASSERT(String::kMaxLength < kMaxInt); | 412 STATIC_ASSERT(String::kMaxLength < kMaxInt); |
(...skipping 10 matching lines...) Expand all Loading... | |
429 maybe_res = isolate->factory()->NewRawOneByteString(result_len); | 423 maybe_res = isolate->factory()->NewRawOneByteString(result_len); |
430 } else { | 424 } else { |
431 maybe_res = isolate->factory()->NewRawTwoByteString(result_len); | 425 maybe_res = isolate->factory()->NewRawTwoByteString(result_len); |
432 } | 426 } |
433 Handle<SeqString> untyped_res; | 427 Handle<SeqString> untyped_res; |
434 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res); | 428 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res); |
435 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res); | 429 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res); |
436 | 430 |
437 for (int i = 0; i < matches; i++) { | 431 for (int i = 0; i < matches; i++) { |
438 // Copy non-matched subject content. | 432 // Copy non-matched subject content. |
439 if (subject_pos < indices.at(i)) { | 433 if (subject_pos < indices->at(i)) { |
440 String::WriteToFlat(*subject, result->GetChars() + result_pos, | 434 String::WriteToFlat(*subject, result->GetChars() + result_pos, |
441 subject_pos, indices.at(i)); | 435 subject_pos, indices->at(i)); |
442 result_pos += indices.at(i) - subject_pos; | 436 result_pos += indices->at(i) - subject_pos; |
443 } | 437 } |
444 | 438 |
445 // Replace match. | 439 // Replace match. |
446 if (replacement_len > 0) { | 440 if (replacement_len > 0) { |
447 String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0, | 441 String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0, |
448 replacement_len); | 442 replacement_len); |
449 result_pos += replacement_len; | 443 result_pos += replacement_len; |
450 } | 444 } |
451 | 445 |
452 subject_pos = indices.at(i) + pattern_len; | 446 subject_pos = indices->at(i) + pattern_len; |
453 } | 447 } |
454 // Add remaining subject content at the end. | 448 // Add remaining subject content at the end. |
455 if (subject_pos < subject_len) { | 449 if (subject_pos < subject_len) { |
456 String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos, | 450 String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos, |
457 subject_len); | 451 subject_len); |
458 } | 452 } |
459 | 453 |
460 int32_t match_indices[] = {indices.at(matches - 1), | 454 int32_t match_indices[] = {indices->at(matches - 1), |
461 indices.at(matches - 1) + pattern_len}; | 455 indices->at(matches - 1) + pattern_len}; |
462 RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices); | 456 RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices); |
463 | 457 |
464 return *result; | 458 return *result; |
465 } | 459 } |
466 | 460 |
467 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString( | 461 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString( |
468 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, | 462 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp, |
469 Handle<String> replacement, Handle<JSObject> last_match_info) { | 463 Handle<String> replacement, Handle<JSObject> last_match_info) { |
470 DCHECK(subject->IsFlat()); | 464 DCHECK(subject->IsFlat()); |
471 DCHECK(replacement->IsFlat()); | 465 DCHECK(replacement->IsFlat()); |
472 | 466 |
473 int capture_count = regexp->CaptureCount(); | 467 int capture_count = regexp->CaptureCount(); |
474 int subject_length = subject->length(); | 468 int subject_length = subject->length(); |
475 | 469 |
476 // CompiledReplacement uses zone allocation. | 470 // CompiledReplacement uses zone allocation. |
477 ZoneScope zone_scope(isolate->runtime_zone()); | 471 Zone zone(isolate->allocator()); |
478 CompiledReplacement compiled_replacement(zone_scope.zone()); | 472 CompiledReplacement compiled_replacement(&zone); |
479 bool simple_replace = | 473 bool simple_replace = |
480 compiled_replacement.Compile(replacement, capture_count, subject_length); | 474 compiled_replacement.Compile(replacement, capture_count, subject_length); |
481 | 475 |
482 // Shortcut for simple non-regexp global replacements | 476 // Shortcut for simple non-regexp global replacements |
483 if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) { | 477 if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) { |
484 if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) { | 478 if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) { |
485 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( | 479 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>( |
486 isolate, subject, regexp, replacement, last_match_info); | 480 isolate, subject, regexp, replacement, last_match_info); |
487 } else { | 481 } else { |
488 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( | 482 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>( |
(...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
704 | 698 |
705 // The limit can be very large (0xffffffffu), but since the pattern | 699 // The limit can be very large (0xffffffffu), but since the pattern |
706 // isn't empty, we can never create more parts than ~half the length | 700 // isn't empty, we can never create more parts than ~half the length |
707 // of the subject. | 701 // of the subject. |
708 | 702 |
709 subject = String::Flatten(subject); | 703 subject = String::Flatten(subject); |
710 pattern = String::Flatten(pattern); | 704 pattern = String::Flatten(pattern); |
711 | 705 |
712 static const int kMaxInitialListCapacity = 16; | 706 static const int kMaxInitialListCapacity = 16; |
713 | 707 |
714 ZoneScope zone_scope(isolate->runtime_zone()); | |
715 | |
716 // Find (up to limit) indices of separator and end-of-string in subject | 708 // Find (up to limit) indices of separator and end-of-string in subject |
717 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); | 709 int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit); |
718 ZoneList<int> indices(initial_capacity, zone_scope.zone()); | 710 List<int> indices(initial_capacity); |
jgruber
2016/09/30 09:46:24
Couldn't you reuse regexp_list here as well?
| |
719 | 711 |
720 FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit, | 712 FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit); |
721 zone_scope.zone()); | |
722 | 713 |
723 if (static_cast<uint32_t>(indices.length()) < limit) { | 714 if (static_cast<uint32_t>(indices.length()) < limit) { |
724 indices.Add(subject_length, zone_scope.zone()); | 715 indices.Add(subject_length); |
725 } | 716 } |
726 | 717 |
727 // The list indices now contains the end of each part to create. | 718 // The list indices now contains the end of each part to create. |
728 | 719 |
729 // Create JSArray of substrings separated by separator. | 720 // Create JSArray of substrings separated by separator. |
730 int part_count = indices.length(); | 721 int part_count = indices.length(); |
731 | 722 |
732 Handle<JSArray> result = | 723 Handle<JSArray> result = |
733 isolate->factory()->NewJSArray(FAST_ELEMENTS, part_count, part_count, | 724 isolate->factory()->NewJSArray(FAST_ELEMENTS, part_count, part_count, |
734 INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE); | 725 INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE); |
(...skipping 278 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1013 | 1004 |
1014 | 1005 |
1015 RUNTIME_FUNCTION(Runtime_IsRegExp) { | 1006 RUNTIME_FUNCTION(Runtime_IsRegExp) { |
1016 SealHandleScope shs(isolate); | 1007 SealHandleScope shs(isolate); |
1017 DCHECK(args.length() == 1); | 1008 DCHECK(args.length() == 1); |
1018 CONVERT_ARG_CHECKED(Object, obj, 0); | 1009 CONVERT_ARG_CHECKED(Object, obj, 0); |
1019 return isolate->heap()->ToBoolean(obj->IsJSRegExp()); | 1010 return isolate->heap()->ToBoolean(obj->IsJSRegExp()); |
1020 } | 1011 } |
1021 } // namespace internal | 1012 } // namespace internal |
1022 } // namespace v8 | 1013 } // namespace v8 |
OLD | NEW |