Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(253)

Side by Side Diff: src/builtins/builtins-regexp.cc

Issue 2307863003: [regexp] Port RegExpMatch, RegExpSearch, and RegExpTest (Closed)
Patch Set: Rebase Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/builtins/builtins.h ('k') | src/heap-symbols.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/builtins/builtins.h" 5 #include "src/builtins/builtins.h"
6 #include "src/builtins/builtins-utils.h" 6 #include "src/builtins/builtins-utils.h"
7 7
8 #include "src/regexp/jsregexp.h"
8 #include "src/string-builder.h" 9 #include "src/string-builder.h"
9 10
10 namespace v8 { 11 namespace v8 {
11 namespace internal { 12 namespace internal {
12 13
13 // ----------------------------------------------------------------------------- 14 // -----------------------------------------------------------------------------
14 // ES6 section 21.2 RegExp Objects 15 // ES6 section 21.2 RegExp Objects
15 16
16 namespace { 17 namespace {
17 18
(...skipping 296 matching lines...) Expand 10 before | Expand all | Expand 10 after
314 315
315 // Constants for accessing RegExpLastMatchInfo. 316 // Constants for accessing RegExpLastMatchInfo.
316 // TODO(jgruber): Currently, RegExpLastMatchInfo is still a JSObject maintained 317 // TODO(jgruber): Currently, RegExpLastMatchInfo is still a JSObject maintained
317 // and accessed from JS. This is a crutch until all RegExp logic is ported, then 318 // and accessed from JS. This is a crutch until all RegExp logic is ported, then
318 // we can take care of RegExpLastMatchInfo. 319 // we can take care of RegExpLastMatchInfo.
319 const int kNumberOfCapturesIndex = 0; 320 const int kNumberOfCapturesIndex = 0;
320 const int kLastSubjectIndex = 1; 321 const int kLastSubjectIndex = 1;
321 const int kLastInputIndex = 2; 322 const int kLastInputIndex = 2;
322 const int kFirstCaptureIndex = 3; 323 const int kFirstCaptureIndex = 3;
323 324
324 Handle<Object> GetLastMatchField(Isolate* isolate, int index) { 325 Handle<JSObject> GetLastMatchInfo(Isolate* isolate) {
325 Handle<JSFunction> global_regexp = isolate->regexp_function(); 326 Handle<JSFunction> global_regexp = isolate->regexp_function();
326 Handle<Object> last_match_info_obj = JSReceiver::GetDataProperty( 327 Handle<Object> last_match_info_obj = JSReceiver::GetDataProperty(
327 global_regexp, isolate->factory()->regexp_last_match_info_symbol()); 328 global_regexp, isolate->factory()->regexp_last_match_info_symbol());
328 329
329 Handle<JSReceiver> last_match_info = 330 return Handle<JSObject>::cast(last_match_info_obj);
330 Handle<JSReceiver>::cast(last_match_info_obj); 331 }
332
333 Handle<Object> GetLastMatchField(Isolate* isolate, int index) {
334 Handle<JSObject> last_match_info = GetLastMatchInfo(isolate);
331 return JSReceiver::GetElement(isolate, last_match_info, index) 335 return JSReceiver::GetElement(isolate, last_match_info, index)
332 .ToHandleChecked(); 336 .ToHandleChecked();
333 } 337 }
334 338
335 void SetLastMatchField(Isolate* isolate, int index, Handle<Object> value) { 339 void SetLastMatchField(Isolate* isolate, int index, Handle<Object> value) {
336 Handle<JSFunction> global_regexp = isolate->regexp_function(); 340 Handle<JSFunction> global_regexp = isolate->regexp_function();
337 Handle<Object> last_match_info_obj = JSReceiver::GetDataProperty( 341 Handle<Object> last_match_info_obj = JSReceiver::GetDataProperty(
338 global_regexp, isolate->factory()->regexp_last_match_info_symbol()); 342 global_regexp, isolate->factory()->regexp_last_match_info_symbol());
339 343
340 Handle<JSReceiver> last_match_info = 344 Handle<JSReceiver> last_match_info =
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
452 } 456 }
453 457
454 BUILTIN(RegExpPrototypeRightContextGetter) { 458 BUILTIN(RegExpPrototypeRightContextGetter) {
455 HandleScope scope(isolate); 459 HandleScope scope(isolate);
456 const int start_index = GetLastMatchCapture(isolate, 1); 460 const int start_index = GetLastMatchCapture(isolate, 1);
457 Handle<String> last_subject = GetLastMatchSubject(isolate); 461 Handle<String> last_subject = GetLastMatchSubject(isolate);
458 const int len = last_subject->length(); 462 const int len = last_subject->length();
459 return *isolate->factory()->NewSubString(last_subject, start_index, len); 463 return *isolate->factory()->NewSubString(last_subject, start_index, len);
460 } 464 }
461 465
466 namespace {
467
468 MaybeHandle<Object> SetLastIndex(Isolate* isolate, Handle<JSReceiver> regexp,
469 int value) {
470 return Object::SetProperty(regexp, isolate->factory()->lastIndex_string(),
471 handle(Smi::FromInt(value), isolate), SLOPPY);
472 }
473
474 Handle<JSArray> ConstructResult(Isolate* isolate, int size, int index,
475 Handle<String> input) {
476 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
477 Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
478 Handle<JSObject> object =
479 isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED);
480 Handle<JSArray> array = Handle<JSArray>::cast(object);
481 array->set_elements(*elements);
482 array->set_length(Smi::FromInt(size));
483 // Write in-object properties after the length of the array.
484 array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex,
485 Smi::FromInt(index));
486 array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
487 return array;
488 }
489
490 Handle<Object> ReturnNewResultFromMatchInfo(Isolate* isolate,
491 Handle<Object> match_info,
492 Handle<String> string) {
493 const int num_captures = GetLastMatchNumberOfCaptures(isolate);
494 DCHECK_EQ(0, num_captures % 2);
495
496 const int num_results = num_captures / 2;
497 int start = GetLastMatchCapture(isolate, 0);
498 int end = GetLastMatchCapture(isolate, 1);
499
500 // Calculate the substring of the first match before creating the result array
501 // to avoid an unnecessary write barrier storing the first result.
502 Handle<String> first = isolate->factory()->NewSubString(string, start, end);
503 Handle<JSArray> result = ConstructResult(isolate, num_results, start, string);
504
505 Handle<FixedArray> elems =
506 handle(FixedArray::cast(result->elements()), isolate);
507 elems->set(0, *first);
508
509 for (int i = 1; i < num_results; i++) {
510 start = GetLastMatchCapture(isolate, i * 2);
511 if (start != -1) {
512 end = GetLastMatchCapture(isolate, i * 2 + 1);
513 Handle<String> capture =
514 isolate->factory()->NewSubString(string, start, end);
515 elems->set(i, *capture);
516 }
517 }
518
519 return result;
520 }
521
522 MaybeHandle<Object> RegExpExecJS(Isolate* isolate, Handle<JSRegExp> regexp,
523 Handle<String> string) {
524 Handle<Object> last_index_obj;
525 ASSIGN_RETURN_ON_EXCEPTION(
526 isolate, last_index_obj,
527 Object::GetProperty(regexp, isolate->factory()->lastIndex_string()),
528 Object);
529
530 // Conversion is required by the ES2015 specification (RegExpBuiltinExec
531 // algorithm, step 4) even if the value is discarded for non-global RegExps.
532 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
533 Object::ToLength(isolate, last_index_obj), Object);
534
535 int last_index = Handle<Smi>::cast(last_index_obj)->value();
536
537 const int flags = regexp->GetFlags();
538 const bool global = (flags & JSRegExp::kGlobal) != 0;
539 const bool sticky = (flags & JSRegExp::kSticky) != 0;
540 const bool update_last_index = (global || sticky);
541
542 if (update_last_index) {
543 if (last_index > string->length()) {
544 RETURN_ON_EXCEPTION(isolate, SetLastIndex(isolate, regexp, 0), Object);
545 return isolate->factory()->null_value();
546 }
547 } else {
548 last_index = 0;
549 }
550
551 Handle<JSObject> last_match_info = GetLastMatchInfo(isolate);
552
553 // matchIndices is either null or the RegExpLastMatchInfo array.
554 // TODO(littledan): Whether a RegExp is sticky is compiled into the RegExp
555 // itself, but ES2015 allows monkey-patching this property to differ from
556 // the internal flags. If it differs, recompile a different RegExp?
557 // TODO(jgruber): The result of Exec does not need to be a JSArray.
558 Handle<Object> match_indices;
559 ASSIGN_RETURN_ON_EXCEPTION(
560 isolate, match_indices,
561 RegExpImpl::Exec(regexp, string, last_index, last_match_info), Object);
562
563 if (match_indices->IsNull(isolate)) {
564 RETURN_ON_EXCEPTION(isolate, SetLastIndex(isolate, regexp, 0), Object);
565 return isolate->factory()->null_value();
566 }
567
568 // Successful match.
569 if (update_last_index) {
570 last_index = GetLastMatchCapture(isolate, 1);
571 RETURN_ON_EXCEPTION(isolate, SetLastIndex(isolate, regexp, last_index),
572 Object);
573 }
574
575 return ReturnNewResultFromMatchInfo(isolate, match_indices, string);
576 }
577
578 // ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
579 // Also takes an optional exec method in case our caller
580 // has already fetched exec.
581 MaybeHandle<Object> RegExpExec(Isolate* isolate, Handle<JSReceiver> regexp,
582 Handle<String> string, Handle<Object> exec) {
583 if (exec->IsUndefined(isolate)) {
584 ASSIGN_RETURN_ON_EXCEPTION(
585 isolate, exec,
586 Object::GetProperty(regexp, isolate->factory()->exec_string()), Object);
587 }
588
589 if (exec->IsCallable()) {
590 const int argc = 1;
591 ScopedVector<Handle<Object>> argv(argc);
592 argv[0] = string;
593
594 Handle<Object> result;
595 ASSIGN_RETURN_ON_EXCEPTION(
596 isolate, result,
597 Execution::Call(isolate, exec, regexp, argc, argv.start()), Object);
598
599 if (!result->IsJSReceiver() && !result->IsNull(isolate)) {
600 THROW_NEW_ERROR(isolate,
601 NewTypeError(MessageTemplate::kInvalidRegExpExecResult),
602 Object);
603 }
604 return result;
605 }
606
607 if (!regexp->IsJSRegExp()) {
608 THROW_NEW_ERROR(isolate,
609 NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
610 isolate->factory()->NewStringFromAsciiChecked(
611 "RegExp.prototype.exec"),
612 regexp),
613 Object);
614 }
615
616 return RegExpExecJS(isolate, Handle<JSRegExp>::cast(regexp), string);
617 }
618
619 } // namespace
620
621 // ES#sec-regexp.prototype.exec
622 // RegExp.prototype.exec ( string )
623 BUILTIN(RegExpPrototypeExec) {
624 HandleScope scope(isolate);
625 CHECK_RECEIVER(JSRegExp, regexp, "RegExp.prototype.exec");
626
627 Handle<Object> string_obj = args.atOrUndefined(isolate, 1);
628
629 Handle<String> string;
630 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
631 Object::ToString(isolate, string_obj));
632
633 RETURN_RESULT_OR_FAILURE(isolate, RegExpExecJS(isolate, regexp, string));
634 }
635
636 // ES#sec-regexp.prototype.test
637 // RegExp.prototype.test ( S )
638 BUILTIN(RegExpPrototypeTest) {
639 HandleScope scope(isolate);
640 CHECK_RECEIVER(JSReceiver, recv, "RegExp.prototype.test");
641
642 Handle<Object> string_obj = args.atOrUndefined(isolate, 1);
643
644 Handle<String> string;
645 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
646 Object::ToString(isolate, string_obj));
647
648 Handle<Object> result;
649 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
650 isolate, result,
651 RegExpExec(isolate, recv, string, isolate->factory()->undefined_value()));
652
653 return isolate->heap()->ToBoolean(!result->IsNull(isolate));
654 }
655
656 namespace {
657
658 // ES#sec-advancestringindex
659 // AdvanceStringIndex ( S, index, unicode )
660 int AdvanceStringIndex(Isolate* isolate, Handle<String> string, int index,
661 bool unicode) {
662 int increment = 1;
663
664 if (unicode && index < string->length()) {
665 const uint16_t first = string->Get(index);
666 if (first >= 0xD800 && first <= 0xDBFF && string->length() > index + 1) {
667 const uint16_t second = string->Get(index + 1);
668 if (second >= 0xDC00 && second <= 0xDFFF) {
669 increment = 2;
670 }
671 }
672 }
673
674 return increment;
675 }
676
677 MaybeHandle<Object> SetAdvancedStringIndex(Isolate* isolate,
678 Handle<JSReceiver> regexp,
679 Handle<String> string,
680 bool unicode) {
681 Handle<Object> last_index_obj;
682 ASSIGN_RETURN_ON_EXCEPTION(
683 isolate, last_index_obj,
684 Object::GetProperty(regexp, isolate->factory()->lastIndex_string()),
685 Object);
686
687 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
688 Object::ToLength(isolate, last_index_obj), Object);
689
690 const int last_index = Handle<Smi>::cast(last_index_obj)->value();
691 const int new_last_index =
692 last_index + AdvanceStringIndex(isolate, string, last_index, unicode);
693
694 return SetLastIndex(isolate, regexp, new_last_index);
695 }
696
697 } // namespace
698
699 // ES#sec-regexp.prototype-@@match
700 // RegExp.prototype [ @@match ] ( string )
701 BUILTIN(RegExpPrototypeMatch) {
702 HandleScope scope(isolate);
703 CHECK_RECEIVER(JSReceiver, recv, "RegExp.prototype.@@match");
704
705 Handle<Object> string_obj = args.atOrUndefined(isolate, 1);
706
707 Handle<String> string;
708 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
709 Object::ToString(isolate, string_obj));
710
711 Handle<Object> global_obj;
712 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
713 isolate, global_obj,
714 JSReceiver::GetProperty(recv, isolate->factory()->global_string()));
715 const bool global = global_obj->BooleanValue();
716
717 if (!global) {
718 RETURN_RESULT_OR_FAILURE(isolate,
719 RegExpExec(isolate, recv, string,
720 isolate->factory()->undefined_value()));
721 }
722
723 Handle<Object> unicode_obj;
724 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
725 isolate, unicode_obj,
726 JSReceiver::GetProperty(recv, isolate->factory()->unicode_string()));
727 const bool unicode = unicode_obj->BooleanValue();
728
729 RETURN_FAILURE_ON_EXCEPTION(isolate, SetLastIndex(isolate, recv, 0));
730
731 static const int kInitialArraySize = 8;
732 Handle<FixedArray> elems =
733 isolate->factory()->NewFixedArrayWithHoles(kInitialArraySize);
734
735 int n = 0;
736 for (;; n++) {
737 Handle<Object> result;
738 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
739 isolate, result, RegExpExec(isolate, recv, string,
740 isolate->factory()->undefined_value()));
741
742 if (result->IsNull(isolate)) {
743 if (n == 0) return isolate->heap()->null_value();
744 break;
745 }
746
747 Handle<Object> match_obj;
748 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
749 Object::GetElement(isolate, result, 0));
750
751 Handle<String> match;
752 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
753 Object::ToString(isolate, match_obj));
754
755 elems = FixedArray::SetAndGrow(elems, n, match);
756
757 if (match->length() == 0) {
758 RETURN_FAILURE_ON_EXCEPTION(
759 isolate, SetAdvancedStringIndex(isolate, recv, string, unicode));
760 }
761 }
762
763 elems->Shrink(n);
764 return *isolate->factory()->NewJSArrayWithElements(elems);
765 }
766
767 // ES#sec-regexp.prototype-@@search
768 // RegExp.prototype [ @@search ] ( string )
769 BUILTIN(RegExpPrototypeSearch) {
770 HandleScope scope(isolate);
771 CHECK_RECEIVER(JSReceiver, recv, "RegExp.prototype.@@search");
772
773 Handle<Object> string_obj = args.atOrUndefined(isolate, 1);
774
775 Handle<String> string;
776 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
777 Object::ToString(isolate, string_obj));
778
779 Handle<Object> last_index_obj;
780 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
781 isolate, last_index_obj,
782 Object::GetProperty(recv, isolate->factory()->lastIndex_string()));
783
784 RETURN_FAILURE_ON_EXCEPTION(isolate, SetLastIndex(isolate, recv, 0));
785
786 Handle<Object> result;
787 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
788 isolate, result,
789 RegExpExec(isolate, recv, string, isolate->factory()->undefined_value()));
790
791 RETURN_FAILURE_ON_EXCEPTION(
792 isolate, Object::SetProperty(recv, isolate->factory()->lastIndex_string(),
793 last_index_obj, SLOPPY));
794
795 if (result->IsNull(isolate)) return Smi::FromInt(-1);
796
797 RETURN_RESULT_OR_FAILURE(
798 isolate, Object::GetProperty(result, isolate->factory()->index_string()));
799 }
800
462 } // namespace internal 801 } // namespace internal
463 } // namespace v8 802 } // namespace v8
OLDNEW
« no previous file with comments | « src/builtins/builtins.h ('k') | src/heap-symbols.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698