Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(649)

Side by Side Diff: src/jsregexp.cc

Issue 42115: Faster string.replace with regexp pattern. (Closed)
Patch Set: Addressed review comments Created 11 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | src/runtime.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 582 matching lines...) Expand 10 before | Expand all | Expand 10 after
593 if (!subject->IsFlat(StringShape(*subject))) { 593 if (!subject->IsFlat(StringShape(*subject))) {
594 FlattenString(subject); 594 FlattenString(subject);
595 } 595 }
596 596
597 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); 597 last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
598 598
599 while (true) { 599 while (true) {
600 if (previous_index > subject->length() || previous_index < 0) { 600 if (previous_index > subject->length() || previous_index < 0) {
601 // Per ECMA-262 15.10.6.2, if the previous index is greater than the 601 // Per ECMA-262 15.10.6.2, if the previous index is greater than the
602 // string length, there is no match. 602 // string length, there is no match.
603 matches = Factory::null_value();
604 return result; 603 return result;
605 } else { 604 } else {
606 #ifdef DEBUG 605 #ifdef DEBUG
607 if (FLAG_trace_regexp_bytecodes) { 606 if (FLAG_trace_regexp_bytecodes) {
608 String* pattern = regexp->Pattern(); 607 String* pattern = regexp->Pattern();
609 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 608 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
610 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 609 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
611 } 610 }
612 #endif 611 #endif
613 HandleScope scope; 612 HandleScope scope;
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
659 Handle<String> subject, 658 Handle<String> subject,
660 int previous_index, 659 int previous_index,
661 int* offsets_vector, 660 int* offsets_vector,
662 int offsets_vector_length) { 661 int offsets_vector_length) {
663 StringShape shape(*subject); 662 StringShape shape(*subject);
664 ASSERT(subject->IsFlat(shape)); 663 ASSERT(subject->IsFlat(shape));
665 bool is_ascii = shape.IsAsciiRepresentation(); 664 bool is_ascii = shape.IsAsciiRepresentation();
666 bool rc; 665 bool rc;
667 666
668 Handle<String> original_subject = subject; 667 Handle<String> original_subject = subject;
669 if (FLAG_regexp_native) { 668 if (UseNativeRegexp()) {
670 #ifndef ARM 669 #ifdef ARM
671 Handle<Code> code(IrregexpNativeCode(*regexp, is_ascii)); 670 UNREACHABLE();
672 671 #else
673 // Character offsets into string. 672 Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii));
674 int start_offset = previous_index; 673 RegExpMacroAssemblerIA32::Result res =
675 int end_offset = subject->length(shape); 674 RegExpMacroAssemblerIA32::Match(code,
676 675 subject,
677 if (shape.IsCons()) { 676 offsets_vector,
678 subject = Handle<String>(ConsString::cast(*subject)->first()); 677 offsets_vector_length,
679 } else if (shape.IsSliced()) { 678 previous_index);
680 SlicedString* slice = SlicedString::cast(*subject);
681 start_offset += slice->start();
682 end_offset += slice->start();
683 subject = Handle<String>(slice->buffer());
684 }
685
686 // String is now either Sequential or External
687 StringShape flatshape(*subject);
688 bool is_ascii = flatshape.IsAsciiRepresentation();
689 int char_size_shift = is_ascii ? 0 : 1;
690
691 RegExpMacroAssemblerIA32::Result res;
692
693 if (flatshape.IsExternal()) {
694 const byte* address;
695 if (is_ascii) {
696 ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
697 address = reinterpret_cast<const byte*>(ext->resource()->data());
698 } else {
699 ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
700 address = reinterpret_cast<const byte*>(ext->resource()->data());
701 }
702 res = RegExpMacroAssemblerIA32::Execute(
703 *code,
704 const_cast<Address*>(&address),
705 start_offset << char_size_shift,
706 end_offset << char_size_shift,
707 offsets_vector,
708 previous_index == 0);
709 } else { // Sequential string
710 ASSERT(StringShape(*subject).IsSequential());
711 Address char_address =
712 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
713 : SeqTwoByteString::cast(*subject)->GetCharsAddress();
714 int byte_offset = char_address - reinterpret_cast<Address>(*subject);
715 res = RegExpMacroAssemblerIA32::Execute(
716 *code,
717 reinterpret_cast<Address*>(subject.location()),
718 byte_offset + (start_offset << char_size_shift),
719 byte_offset + (end_offset << char_size_shift),
720 offsets_vector,
721 previous_index == 0);
722 }
723 679
724 if (res == RegExpMacroAssemblerIA32::EXCEPTION) { 680 if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
725 ASSERT(Top::has_pending_exception()); 681 ASSERT(Top::has_pending_exception());
726 return Handle<Object>::null(); 682 return Handle<Object>::null();
727 } 683 }
684 ASSERT(res == RegExpMacroAssemblerIA32::SUCCESS
685 || res == RegExpMacroAssemblerIA32::FAILURE);
686
728 rc = (res == RegExpMacroAssemblerIA32::SUCCESS); 687 rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
729 688 #endif
730 if (rc) {
731 // Capture values are relative to start_offset only.
732 for (int i = 0; i < offsets_vector_length; i++) {
733 if (offsets_vector[i] >= 0) {
734 offsets_vector[i] += previous_index;
735 }
736 }
737 }
738 } else { 689 } else {
739 #else
740 // Unimplemented on ARM, fall through to bytecode.
741 }
742 {
743 #endif
744 for (int i = number_of_capture_registers - 1; i >= 0; i--) { 690 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
745 offsets_vector[i] = -1; 691 offsets_vector[i] = -1;
746 } 692 }
747 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii)); 693 Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii));
748 694
749 rc = IrregexpInterpreter::Match(byte_codes, 695 rc = IrregexpInterpreter::Match(byte_codes,
750 subject, 696 subject,
751 offsets_vector, 697 offsets_vector,
752 previous_index); 698 previous_index);
753 } 699 }
(...skipping 3955 matching lines...) Expand 10 before | Expand all | Expand 10 after
4709 } else { 4655 } else {
4710 node = loop_node; 4656 node = loop_node;
4711 } 4657 }
4712 } 4658 }
4713 data->node = node; 4659 data->node = node;
4714 Analysis analysis(ignore_case); 4660 Analysis analysis(ignore_case);
4715 analysis.EnsureAnalyzed(node); 4661 analysis.EnsureAnalyzed(node);
4716 4662
4717 NodeInfo info = *node->info(); 4663 NodeInfo info = *node->info();
4718 4664
4719 if (FLAG_regexp_native) { 4665 if (RegExpImpl::UseNativeRegexp()) {
4720 #ifdef ARM 4666 #ifdef ARM
4721 // Unimplemented, fall-through to bytecode implementation. 4667 UNREACHABLE();
4722 #else // IA32 4668 #else // IA32
4723 RegExpMacroAssemblerIA32::Mode mode; 4669 RegExpMacroAssemblerIA32::Mode mode;
4724 if (is_ascii) { 4670 if (is_ascii) {
4725 mode = RegExpMacroAssemblerIA32::ASCII; 4671 mode = RegExpMacroAssemblerIA32::ASCII;
4726 } else { 4672 } else {
4727 mode = RegExpMacroAssemblerIA32::UC16; 4673 mode = RegExpMacroAssemblerIA32::UC16;
4728 } 4674 }
4729 RegExpMacroAssemblerIA32 macro_assembler(mode, 4675 RegExpMacroAssemblerIA32 macro_assembler(mode,
4730 (data->capture_count + 1) * 2); 4676 (data->capture_count + 1) * 2);
4731 return compiler.Assemble(&macro_assembler, 4677 return compiler.Assemble(&macro_assembler,
4732 node, 4678 node,
4733 data->capture_count, 4679 data->capture_count,
4734 pattern); 4680 pattern);
4735 #endif 4681 #endif
4736 } 4682 }
4737 EmbeddedVector<byte, 1024> codes; 4683 EmbeddedVector<byte, 1024> codes;
4738 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4684 RegExpMacroAssemblerIrregexp macro_assembler(codes);
4739 return compiler.Assemble(&macro_assembler, 4685 return compiler.Assemble(&macro_assembler,
4740 node, 4686 node,
4741 data->capture_count, 4687 data->capture_count,
4742 pattern); 4688 pattern);
4743 } 4689 }
4744 4690
4745 4691
4746 }} // namespace v8::internal 4692 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/objects.h » ('j') | src/runtime.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698