Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(260)

Side by Side Diff: src/runtime.cc

Issue 1109010: Run string replace regexp with function in C++ code loop. (Closed)
Patch Set: Fix to also work in debug mode. Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1546 matching lines...) Expand 10 before | Expand all | Expand 10 after
1557 return CharFromCode(code); 1557 return CharFromCode(code);
1558 } 1558 }
1559 1559
1560 1560
1561 static Object* Runtime_CharFromCode(Arguments args) { 1561 static Object* Runtime_CharFromCode(Arguments args) {
1562 NoHandleAllocation ha; 1562 NoHandleAllocation ha;
1563 ASSERT(args.length() == 1); 1563 ASSERT(args.length() == 1);
1564 return CharFromCode(args[0]); 1564 return CharFromCode(args[0]);
1565 } 1565 }
1566 1566
1567
1568 class FixedArrayBuilder {
1569 public:
1570 explicit FixedArrayBuilder(int initial_capacity)
1571 : array_(Factory::NewFixedArrayWithHoles(initial_capacity)),
1572 length_(0) {
1573 // Require a non-zero initial size. Ensures that doubling the size to
1574 // extend the array will work.
1575 ASSERT(initial_capacity > 0);
1576 }
1577
1578 explicit FixedArrayBuilder(Handle<FixedArray> backing_store)
1579 : array_(backing_store),
1580 length_(0) {
1581 // Require a non-zero initial size. Ensures that doubling the size to
1582 // extend the array will work.
1583 ASSERT(backing_store->length() > 0);
1584 }
1585
1586 bool HasCapacity(int elements) {
1587 int length = array_->length();
1588 int required_length = length_ + elements;
1589 return (length >= required_length);
1590 }
1591
1592 void EnsureCapacity(int elements) {
1593 int length = array_->length();
1594 int required_length = length_ + elements;
1595 if (length < required_length) {
1596 int new_length = length;
1597 do {
1598 new_length *= 2;
1599 } while (new_length < required_length);
1600 Handle<FixedArray> extended_array =
1601 Factory::NewFixedArrayWithHoles(new_length);
1602 array_->CopyTo(0, *extended_array, 0, length_);
1603 array_ = extended_array;
1604 }
1605 }
1606
1607 void Add(Object* value) {
1608 ASSERT(length_ < capacity());
1609 array_->set(length_, value);
1610 length_++;
1611 }
1612
1613 void Add(Smi* value) {
1614 ASSERT(length_ < capacity());
1615 array_->set(length_, value);
1616 length_++;
1617 }
1618
1619 inline Handle<FixedArray> array() {
fschneider 2010/03/25 10:26:06 I don't know enough how C++ compilers treat the in
Lasse Reichstein 2010/03/26 11:18:26 I think you are right. Inlining is default in this
1620 return array_;
1621 }
1622
1623 inline int length() {
1624 return length_;
1625 }
1626
1627 inline int capacity() {
1628 return array_->length();
1629 }
1630
1631 inline Handle<JSArray> ToJSArray() {
1632 Handle<JSArray> result_array = Factory::NewJSArrayWithElements(array_);
1633 result_array->set_length(Smi::FromInt(length_));
1634 return result_array;
1635 }
1636
1637 inline Handle<JSArray> ToJSArray(Handle<JSArray> target_array) {
1638 target_array->set_elements(*array_);
1639 target_array->set_length(Smi::FromInt(length_));
1640 return target_array;
1641 }
1642
1643 private:
1644 Handle<FixedArray> array_;
1645 int length_;
1646 };
1647
1648
1567 // Forward declarations. 1649 // Forward declarations.
1568 static const int kStringBuilderConcatHelperLengthBits = 11; 1650 static const int kStringBuilderConcatHelperLengthBits = 11;
fschneider 2010/03/25 10:26:06 No need for static here. It is the default for C++
Lasse Reichstein 2010/03/26 11:18:26 Fixed.
1569 static const int kStringBuilderConcatHelperPositionBits = 19; 1651 static const int kStringBuilderConcatHelperPositionBits = 19;
1570 1652
1571 template <typename schar> 1653 template <typename schar>
1572 static inline void StringBuilderConcatHelper(String*, 1654 static inline void StringBuilderConcatHelper(String*,
1573 schar*, 1655 schar*,
1574 FixedArray*, 1656 FixedArray*,
1575 int); 1657 int);
1576 1658
1577 typedef BitField<int, 0, 11> StringBuilderSubstringLength; 1659 typedef BitField<int, 0, 11> StringBuilderSubstringLength;
fschneider 2010/03/25 10:26:06 Would be better to use the defined constants here.
Lasse Reichstein 2010/03/26 11:18:26 Very long, but changed anyway.
1578 typedef BitField<int, 11, 19> StringBuilderSubstringPosition; 1660 typedef BitField<int, 11, 19> StringBuilderSubstringPosition;
1579 1661
1662
1580 class ReplacementStringBuilder { 1663 class ReplacementStringBuilder {
1581 public: 1664 public:
1582 ReplacementStringBuilder(Handle<String> subject, int estimated_part_count) 1665 ReplacementStringBuilder(Handle<String> subject, int estimated_part_count)
1583 : subject_(subject), 1666 : array_builder_(estimated_part_count),
1584 parts_(Factory::NewFixedArray(estimated_part_count)), 1667 subject_(subject),
1585 part_count_(0),
1586 character_count_(0), 1668 character_count_(0),
1587 is_ascii_(subject->IsAsciiRepresentation()) { 1669 is_ascii_(subject->IsAsciiRepresentation()) {
1588 // Require a non-zero initial size. Ensures that doubling the size to 1670 // Require a non-zero initial size. Ensures that doubling the size to
1589 // extend the array will work. 1671 // extend the array will work.
1590 ASSERT(estimated_part_count > 0); 1672 ASSERT(estimated_part_count > 0);
1591 } 1673 }
1592 1674
1593 void EnsureCapacity(int elements) { 1675 static inline void AddSubjectSlice(FixedArrayBuilder* builder,
1594 int length = parts_->length(); 1676 int from,
1595 int required_length = part_count_ + elements; 1677 int to) {
1596 if (length < required_length) {
1597 int new_length = length;
1598 do {
1599 new_length *= 2;
1600 } while (new_length < required_length);
1601 Handle<FixedArray> extended_array =
1602 Factory::NewFixedArray(new_length);
1603 parts_->CopyTo(0, *extended_array, 0, part_count_);
1604 parts_ = extended_array;
1605 }
1606 }
1607
1608 void AddSubjectSlice(int from, int to) {
1609 ASSERT(from >= 0); 1678 ASSERT(from >= 0);
1610 int length = to - from; 1679 int length = to - from;
1611 ASSERT(length > 0); 1680 ASSERT(length > 0);
1612 // Can we encode the slice in 11 bits for length and 19 bits for
1613 // start position - as used by StringBuilderConcatHelper?
1614 if (StringBuilderSubstringLength::is_valid(length) && 1681 if (StringBuilderSubstringLength::is_valid(length) &&
1615 StringBuilderSubstringPosition::is_valid(from)) { 1682 StringBuilderSubstringPosition::is_valid(from)) {
1616 int encoded_slice = StringBuilderSubstringLength::encode(length) | 1683 int encoded_slice = StringBuilderSubstringLength::encode(length) |
1617 StringBuilderSubstringPosition::encode(from); 1684 StringBuilderSubstringPosition::encode(from);
1618 AddElement(Smi::FromInt(encoded_slice)); 1685 builder->Add(Smi::FromInt(encoded_slice));
1619 } else { 1686 } else {
1620 // Otherwise encode as two smis. 1687 // Otherwise encode as two smis.
1621 AddElement(Smi::FromInt(-length)); 1688 builder->Add(Smi::FromInt(-length));
1622 AddElement(Smi::FromInt(from)); 1689 builder->Add(Smi::FromInt(from));
1623 } 1690 }
1624 IncrementCharacterCount(length);
1625 } 1691 }
1626 1692
1627 1693
1694 void EnsureCapacity(int elements) {
1695 array_builder_.EnsureCapacity(elements);
1696 }
1697
1698
1699 void AddSubjectSlice(int from, int to) {
1700 AddSubjectSlice(&array_builder_, from, to);
1701 // Can we encode the slice in 11 bits for length and 19 bits for
1702 // start position - as used by StringBuilderConcatHelper?
1703 IncrementCharacterCount(to - from);
1704 }
1705
1706
1628 void AddString(Handle<String> string) { 1707 void AddString(Handle<String> string) {
1629 int length = string->length(); 1708 int length = string->length();
1630 ASSERT(length > 0); 1709 ASSERT(length > 0);
1631 AddElement(*string); 1710 AddElement(*string);
1632 if (!string->IsAsciiRepresentation()) { 1711 if (!string->IsAsciiRepresentation()) {
1633 is_ascii_ = false; 1712 is_ascii_ = false;
1634 } 1713 }
1635 IncrementCharacterCount(length); 1714 IncrementCharacterCount(length);
1636 } 1715 }
1637 1716
1638 1717
1639 Handle<String> ToString() { 1718 Handle<String> ToString() {
1640 if (part_count_ == 0) { 1719 if (array_builder_.length() == 0) {
1641 return Factory::empty_string(); 1720 return Factory::empty_string();
1642 } 1721 }
1643 1722
1644 Handle<String> joined_string; 1723 Handle<String> joined_string;
1645 if (is_ascii_) { 1724 if (is_ascii_) {
1646 joined_string = NewRawAsciiString(character_count_); 1725 joined_string = NewRawAsciiString(character_count_);
1647 AssertNoAllocation no_alloc; 1726 AssertNoAllocation no_alloc;
1648 SeqAsciiString* seq = SeqAsciiString::cast(*joined_string); 1727 SeqAsciiString* seq = SeqAsciiString::cast(*joined_string);
1649 char* char_buffer = seq->GetChars(); 1728 char* char_buffer = seq->GetChars();
1650 StringBuilderConcatHelper(*subject_, 1729 StringBuilderConcatHelper(*subject_,
1651 char_buffer, 1730 char_buffer,
1652 *parts_, 1731 *array_builder_.array(),
1653 part_count_); 1732 array_builder_.length());
1654 } else { 1733 } else {
1655 // Non-ASCII. 1734 // Non-ASCII.
1656 joined_string = NewRawTwoByteString(character_count_); 1735 joined_string = NewRawTwoByteString(character_count_);
1657 AssertNoAllocation no_alloc; 1736 AssertNoAllocation no_alloc;
1658 SeqTwoByteString* seq = SeqTwoByteString::cast(*joined_string); 1737 SeqTwoByteString* seq = SeqTwoByteString::cast(*joined_string);
1659 uc16* char_buffer = seq->GetChars(); 1738 uc16* char_buffer = seq->GetChars();
1660 StringBuilderConcatHelper(*subject_, 1739 StringBuilderConcatHelper(*subject_,
1661 char_buffer, 1740 char_buffer,
1662 *parts_, 1741 *array_builder_.array(),
1663 part_count_); 1742 array_builder_.length());
1664 } 1743 }
1665 return joined_string; 1744 return joined_string;
1666 } 1745 }
1667 1746
1668 1747
1669 void IncrementCharacterCount(int by) { 1748 void IncrementCharacterCount(int by) {
1670 if (character_count_ > String::kMaxLength - by) { 1749 if (character_count_ > String::kMaxLength - by) {
1671 V8::FatalProcessOutOfMemory("String.replace result too large."); 1750 V8::FatalProcessOutOfMemory("String.replace result too large.");
1672 } 1751 }
1673 character_count_ += by; 1752 character_count_ += by;
1674 } 1753 }
1675 1754
1755 Handle<JSArray> GetParts() {
1756 Handle<JSArray> result =
1757 Factory::NewJSArrayWithElements(array_builder_.array());
1758 result->set_length(Smi::FromInt(array_builder_.length()));
1759 return result;
1760 }
1761
1676 private: 1762 private:
1677 1763
fschneider 2010/03/25 10:26:06 Extra empty line.
1678 Handle<String> NewRawAsciiString(int size) { 1764 Handle<String> NewRawAsciiString(int size) {
1679 CALL_HEAP_FUNCTION(Heap::AllocateRawAsciiString(size), String); 1765 CALL_HEAP_FUNCTION(Heap::AllocateRawAsciiString(size), String);
1680 } 1766 }
1681 1767
1682 1768
1683 Handle<String> NewRawTwoByteString(int size) { 1769 Handle<String> NewRawTwoByteString(int size) {
1684 CALL_HEAP_FUNCTION(Heap::AllocateRawTwoByteString(size), String); 1770 CALL_HEAP_FUNCTION(Heap::AllocateRawTwoByteString(size), String);
1685 } 1771 }
1686 1772
1687 1773
1688 void AddElement(Object* element) { 1774 void AddElement(Object* element) {
1689 ASSERT(element->IsSmi() || element->IsString()); 1775 ASSERT(element->IsSmi() || element->IsString());
1690 ASSERT(parts_->length() > part_count_); 1776 ASSERT(array_builder_.capacity() > array_builder_.length());
1691 parts_->set(part_count_, element); 1777 array_builder_.Add(element);
1692 part_count_++;
1693 } 1778 }
1694 1779
1780 FixedArrayBuilder array_builder_;
1695 Handle<String> subject_; 1781 Handle<String> subject_;
1696 Handle<FixedArray> parts_;
1697 int part_count_;
1698 int character_count_; 1782 int character_count_;
1699 bool is_ascii_; 1783 bool is_ascii_;
1700 }; 1784 };
1701 1785
1702 1786
1703 class CompiledReplacement { 1787 class CompiledReplacement {
1704 public: 1788 public:
1705 CompiledReplacement() 1789 CompiledReplacement()
1706 : parts_(1), replacement_substrings_(0) {} 1790 : parts_(1), replacement_substrings_(0) {}
1707 1791
(...skipping 387 matching lines...) Expand 10 before | Expand all | Expand 10 after
2095 2179
2096 ASSERT(last_match_info->HasFastElements()); 2180 ASSERT(last_match_info->HasFastElements());
2097 2181
2098 return StringReplaceRegExpWithString(subject, 2182 return StringReplaceRegExpWithString(subject,
2099 regexp, 2183 regexp,
2100 replacement, 2184 replacement,
2101 last_match_info); 2185 last_match_info);
2102 } 2186 }
2103 2187
2104 2188
2105
2106 // Cap on the maximal shift in the Boyer-Moore implementation. By setting a 2189 // Cap on the maximal shift in the Boyer-Moore implementation. By setting a
2107 // limit, we can fix the size of tables. 2190 // limit, we can fix the size of tables.
2108 static const int kBMMaxShift = 0xff; 2191 static const int kBMMaxShift = 0xff;
2109 // Reduce alphabet to this size. 2192 // Reduce alphabet to this size.
2110 static const int kBMAlphabetSize = 0x100; 2193 static const int kBMAlphabetSize = 0x100;
2111 // For patterns below this length, the skip length of Boyer-Moore is too short 2194 // For patterns below this length, the skip length of Boyer-Moore is too short
2112 // to compensate for the algorithmic overhead compared to simple brute force. 2195 // to compensate for the algorithmic overhead compared to simple brute force.
2113 static const int kBMMinPatternLength = 5; 2196 static const int kBMMinPatternLength = 5;
2114 2197
2115 // Holds the two buffers used by Boyer-Moore string search's Good Suffix 2198 // Holds the two buffers used by Boyer-Moore string search's Good Suffix
(...skipping 743 matching lines...) Expand 10 before | Expand all | Expand 10 after
2859 int from = offsets.at(i * 2); 2942 int from = offsets.at(i * 2);
2860 int to = offsets.at(i * 2 + 1); 2943 int to = offsets.at(i * 2 + 1);
2861 elements->set(i, *Factory::NewSubString(subject, from, to)); 2944 elements->set(i, *Factory::NewSubString(subject, from, to));
2862 } 2945 }
2863 Handle<JSArray> result = Factory::NewJSArrayWithElements(elements); 2946 Handle<JSArray> result = Factory::NewJSArrayWithElements(elements);
2864 result->set_length(Smi::FromInt(matches)); 2947 result->set_length(Smi::FromInt(matches));
2865 return *result; 2948 return *result;
2866 } 2949 }
2867 2950
2868 2951
2952 // Two smis before and after the match, for very long strings.
2953 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
fschneider 2010/03/25 10:26:06 No need for static here either.
Lasse Reichstein 2010/03/26 11:18:26 Removed.
2954
2955
2956 static void SetLastMatchInfoNoCaptures(Handle<String> subject,
2957 Handle<JSArray> last_match_info,
2958 int match_start,
2959 int match_end) {
2960 // Fill last_match_info with a single capture.
2961 last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead);
2962 AssertNoAllocation no_gc;
2963 FixedArray* elements = FixedArray::cast(last_match_info->elements());
2964 RegExpImpl::SetLastCaptureCount(elements, 2);
2965 RegExpImpl::SetLastInput(elements, *subject);
2966 RegExpImpl::SetLastSubject(elements, *subject);
2967 RegExpImpl::SetCapture(elements, 0, match_start);
2968 RegExpImpl::SetCapture(elements, 1, match_end);
2969 }
2970
2971
2972 template <typename schar>
2973 static bool SearchCharMultiple(Vector<schar> subject,
2974 String* pattern,
2975 schar pattern_char,
2976 FixedArrayBuilder* builder,
2977 int* match_pos) {
2978 // Position of last match.
2979 int pos = *match_pos;
2980 int subject_length = subject.length();
2981 while (pos < subject_length) {
2982 int match_end = pos + 1;
2983 if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
2984 *match_pos = pos;
2985 return false;
2986 }
2987 int new_pos = SingleCharIndexOf(subject, pattern_char, match_end);
2988 if (new_pos >= 0) {
fschneider 2010/03/25 10:26:06 More compact if you reverse the condition: if (ne
Lasse Reichstein 2010/03/26 11:18:26 I want to have the "fast case" as the first branch
2989 // A match.
fschneider 2010/03/25 10:26:06 Complete sentence for whole-line comments.
2990 if (new_pos > match_end) {
2991 ReplacementStringBuilder::AddSubjectSlice(builder, match_end, new_pos);
2992 }
2993 pos = new_pos;
2994 builder->Add(pattern);
2995 } else {
2996 break;
2997 }
2998 }
2999 if (pos + 1 < subject_length) {
3000 ReplacementStringBuilder::AddSubjectSlice(builder, pos + 1, subject_length);
3001 }
3002 *match_pos = pos;
3003 return true;
3004 }
3005
3006
3007 static bool SearchCharMultiple(Handle<String> subject,
3008 Handle<String> pattern,
3009 Handle<JSArray> last_match_info,
3010 FixedArrayBuilder* builder) {
3011 ASSERT(subject->IsFlat());
3012 ASSERT_EQ(1, pattern->length());
3013 uc16 pattern_char = pattern->Get(0);
fschneider 2010/03/25 10:26:06 ASSERT that pattern is a uc16-string?
Lasse Reichstein 2010/03/26 11:18:26 Not necessary. Get is generic and works on both ki
3014 // Treating position before first as initial "previous match position".
3015 int match_pos = -1;
3016
3017 for (;;) { // Break when search complete.
3018 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3019 AssertNoAllocation no_gc;
3020 if (subject->IsAsciiRepresentation()) {
3021 if (pattern_char > String::kMaxAsciiCharCode) {
fschneider 2010/03/25 10:26:06 No need for multi-line if.
Lasse Reichstein 2010/03/26 11:18:26 How not?
3022 break;
3023 }
3024 Vector<const char> subject_vector = subject->ToAsciiVector();
3025 char pattern_ascii_char = static_cast<char>(pattern_char);
3026 bool complete = SearchCharMultiple<const char>(subject_vector,
3027 *pattern,
3028 pattern_ascii_char,
3029 builder,
3030 &match_pos);
3031 if (complete) break;
3032 } else {
3033 Vector<const uc16> subject_vector = subject->ToUC16Vector();
3034 bool complete = SearchCharMultiple<const uc16>(subject_vector,
3035 *pattern,
3036 pattern_char,
3037 builder,
3038 &match_pos);
3039 if (complete) break;
3040 }
3041 }
3042
3043 if (match_pos >= 0) {
3044 SetLastMatchInfoNoCaptures(subject,
3045 last_match_info,
3046 match_pos,
3047 match_pos + 1);
3048 return true;
3049 }
3050 return false; // No matches at all.
3051 }
3052
3053
3054 template <typename schar, typename pchar>
3055 static bool SearchStringMultiple(Vector<schar> subject,
3056 String* pattern,
3057 Vector<pchar> pattern_string,
3058 FixedArrayBuilder* builder,
3059 int* match_pos) {
3060 int pos = *match_pos;
3061 int subject_length = subject.length();
3062 int pattern_length = pattern_string.length();
3063 int max_search_start = subject_length - pattern_length;
3064 bool is_ascii = (sizeof(schar) == 1);
3065 StringSearchStrategy strategy =
3066 InitializeStringSearch(pattern_string, is_ascii);
3067 switch (strategy) {
3068 case SEARCH_FAIL: return false;
3069 case SEARCH_SHORT: {
fschneider 2010/03/25 10:26:06 No need for { } block?
Lasse Reichstein 2010/03/26 11:18:26 Done.
3070 while (pos <= max_search_start) {
3071 if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
3072 *match_pos = pos;
3073 return false;
3074 }
3075 // Position of end of previous match.
3076 int match_end = pos + pattern_length;
3077 int new_pos = SimpleIndexOf(subject, pattern_string, match_end);
3078 if (new_pos >= 0) {
fschneider 2010/03/25 10:26:06 Reverse condition here too? if (new_pos < 0) brea
Lasse Reichstein 2010/03/26 11:18:26 Same argument as last time, I want the "then" bran
3079 // A match.
3080 if (new_pos > match_end) {
3081 ReplacementStringBuilder::AddSubjectSlice(builder,
3082 match_end,
3083 new_pos);
3084 }
3085 pos = new_pos;
3086 builder->Add(pattern);
3087 } else {
3088 break;
3089 }
3090 }
3091 break;
3092 }
3093 case SEARCH_LONG: {
fschneider 2010/03/25 10:26:06 No need for { } block?
Lasse Reichstein 2010/03/26 11:18:26 Done.
3094 while (pos <= max_search_start) {
3095 if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
3096 *match_pos = pos;
3097 return false;
3098 }
3099 int new_pos = ComplexIndexOf(subject,
3100 pattern_string,
3101 pos + pattern_length);
3102 if (new_pos >= 0) {
fschneider 2010/03/25 10:26:06 And here? if (new_pos < 0) break; ...
Lasse Reichstein 2010/03/26 11:18:26 Also retained.
3103 // A match.
3104 if (new_pos > pos) {
3105 ReplacementStringBuilder::AddSubjectSlice(builder, pos, new_pos);
3106 }
3107 pos = new_pos;
3108 builder->Add(pattern);
3109 } else {
3110 break;
3111 }
3112 }
3113 break;
3114 }
3115 }
3116 if (pos < max_search_start) {
3117 ReplacementStringBuilder::AddSubjectSlice(builder,
3118 pos + pattern_length,
3119 subject_length);
3120 }
3121 *match_pos = pos;
3122 return true;
3123 }
3124
3125
3126 static bool SearchStringMultiple(Handle<String> subject,
3127 Handle<String> pattern,
3128 Handle<JSArray> last_match_info,
3129 FixedArrayBuilder* builder) {
3130 ASSERT(subject->IsFlat());
3131 ASSERT(pattern->IsFlat());
3132 ASSERT(pattern->length() > 1);
3133
3134 // Treating as if a previous match was before first character.
3135 int match_pos = -pattern->length();
3136
3137 for (;;) { // Break when search complete.
3138 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3139 AssertNoAllocation no_gc;
3140 if (subject->IsAsciiRepresentation()) {
3141 Vector<const char> subject_vector = subject->ToAsciiVector();
3142 if (pattern->IsAsciiRepresentation()) {
3143 if (SearchStringMultiple(subject_vector,
3144 *pattern,
3145 pattern->ToAsciiVector(),
3146 builder,
3147 &match_pos)) break;
3148 } else {
3149 if (SearchStringMultiple(subject_vector,
3150 *pattern,
3151 pattern->ToUC16Vector(),
3152 builder,
3153 &match_pos)) break;
3154 }
3155 } else {
3156 Vector<const uc16> subject_vector = subject->ToUC16Vector();
3157 if (pattern->IsAsciiRepresentation()) {
3158 if (SearchStringMultiple(subject_vector,
3159 *pattern,
3160 pattern->ToAsciiVector(),
3161 builder,
3162 &match_pos)) break;
3163 } else {
3164 if (SearchStringMultiple(subject_vector,
3165 *pattern,
3166 pattern->ToUC16Vector(),
3167 builder,
3168 &match_pos)) break;
3169 }
3170 }
3171 }
3172
3173 if (match_pos >= 0) {
3174 SetLastMatchInfoNoCaptures(subject,
3175 last_match_info,
3176 match_pos,
3177 match_pos + pattern->length());
3178 return true;
3179 }
3180 return false; // No matches at all.
3181 }
3182
3183
3184 static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple(
3185 Handle<String> subject,
3186 Handle<JSRegExp> regexp,
3187 Handle<JSArray> last_match_array,
3188 FixedArrayBuilder* builder) {
3189 ASSERT(subject->IsFlat());
3190 int match_start = -1;
3191 int match_end = 0;
3192 int pos = 0;
3193 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
3194 if (required_registers < 0) return RegExpImpl::RE_EXCEPTION;
3195
3196 OffsetsVector registers(required_registers);
3197 Vector<int> register_vector(registers.vector(), registers.length());
3198 int subject_length = subject->length();
3199
3200 for (;;) { // Break on failure, return on exception.
3201 RegExpImpl::IrregexpResult result =
3202 RegExpImpl::IrregexpExecOnce(regexp,
3203 subject,
3204 pos,
3205 register_vector);
3206 if (result == RegExpImpl::RE_SUCCESS) {
3207 match_start = register_vector[0];
3208 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3209 if (match_end < match_start) {
3210 ReplacementStringBuilder::AddSubjectSlice(builder,
3211 match_end,
3212 match_start);
3213 }
3214 match_end = register_vector[1];
3215 HandleScope loop_scope;
3216 builder->Add(*Factory::NewSubString(subject, match_start, match_end));
3217 if (match_start != match_end) {
3218 pos = match_end;
3219 } else {
3220 pos = match_end + 1;
3221 if (pos > subject_length) break;
3222 }
3223 } else if (result == RegExpImpl::RE_FAILURE) {
3224 break;
3225 } else {
3226 ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
3227 return result;
3228 }
3229 }
3230
3231 if (match_start >= 0) {
3232 if (match_end < subject_length) {
3233 ReplacementStringBuilder::AddSubjectSlice(builder,
3234 match_end,
3235 subject_length);
3236 }
3237 SetLastMatchInfoNoCaptures(subject,
3238 last_match_array,
3239 match_start,
3240 match_end);
3241 return RegExpImpl::RE_SUCCESS;
3242 } else {
3243 return RegExpImpl::RE_FAILURE; // No matches at all.
3244 }
3245 }
3246
3247
3248 static RegExpImpl::IrregexpResult SearchRegExpMultiple(
3249 Handle<String> subject,
3250 Handle<JSRegExp> regexp,
3251 Handle<JSArray> last_match_array,
3252 FixedArrayBuilder* builder) {
3253
3254 ASSERT(subject->IsFlat());
3255 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
3256 if (required_registers < 0) return RegExpImpl::RE_EXCEPTION;
3257
3258 OffsetsVector registers(required_registers);
3259 Vector<int> register_vector(registers.vector(), registers.length());
3260
3261 RegExpImpl::IrregexpResult result =
3262 RegExpImpl::IrregexpExecOnce(regexp,
3263 subject,
3264 0,
3265 register_vector);
3266
3267 int capture_count = regexp->CaptureCount();
3268 int subject_length = subject->length();
3269
3270 // Position to search from.
3271 int pos = 0;
3272 // End of previous match. Differs from pos if match was empty.
3273 int match_end = 0;
3274 if (result == RegExpImpl::RE_SUCCESS) {
3275 // Need to keep a copy of the previous match for creating last_match_info
3276 // at the end, so we have two vectors that we swap between.
3277 OffsetsVector registers2(required_registers);
3278 Vector<int> prev_register_vector(registers2.vector(), registers2.length());
3279
3280 do {
3281 int match_start = register_vector[0];
3282 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3283 if (match_end < match_start) {
3284 ReplacementStringBuilder::AddSubjectSlice(builder,
3285 match_end,
3286 match_start);
3287 }
3288 match_end = register_vector[1];
3289
3290 {
3291 // Avoid accumulating new handles inside loop.
3292 HandleScope temp_scope;
3293 // Arguments array to replace function is match, captures, index and
3294 // subject, i.e., 3 + capture count in total.
3295 Handle<FixedArray> elements = Factory::NewFixedArray(3 + capture_count);
3296 elements->set(0, *Factory::NewSubString(subject,
3297 match_start,
3298 match_end));
3299 for (int i = 1; i <= capture_count; i++) {
3300 Handle<String> substring =
3301 Factory::NewSubString(subject,
3302 register_vector[i * 2],
3303 register_vector[i * 2 + 1]);
3304 elements->set(i, *substring);
3305 }
3306 elements->set(capture_count + 1, Smi::FromInt(match_start));
3307 elements->set(capture_count + 2, *subject);
3308 builder->Add(*Factory::NewJSArrayWithElements(elements));
3309 }
3310 // Swap register vectors, so the last successful match is in
3311 // prev_register_vector.
3312 Vector<int> tmp = prev_register_vector;
fschneider 2010/03/25 10:26:06 Could you have one stack-allocated instance of the
Lasse Reichstein 2010/03/26 11:18:26 Vector is a two-value struct. Allocating a single
3313 prev_register_vector = register_vector;
3314 register_vector = tmp;
3315
3316 if (match_end > match_start) {
3317 pos = match_end;
3318 } else {
3319 pos = match_end + 1;
3320 if (pos > subject_length) {
3321 break;
3322 }
3323 }
3324
3325 result = RegExpImpl::IrregexpExecOnce(regexp,
3326 subject,
3327 pos,
3328 register_vector);
3329 } while (result == RegExpImpl::RE_SUCCESS);
3330
3331 if (result != RegExpImpl::RE_EXCEPTION) {
3332 // Finished matching, with at least one match.
3333 if (match_end < subject_length) {
3334 ReplacementStringBuilder::AddSubjectSlice(builder,
3335 match_end,
3336 subject_length);
3337 }
3338
3339 int last_match_capture_count = (capture_count + 1) * 2;
3340 int last_match_array_size =
3341 last_match_capture_count + RegExpImpl::kLastMatchOverhead;
3342 last_match_array->EnsureSize(last_match_array_size);
3343 AssertNoAllocation no_gc;
3344 FixedArray* elements = FixedArray::cast(last_match_array->elements());
3345 RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count);
3346 RegExpImpl::SetLastSubject(elements, *subject);
3347 RegExpImpl::SetLastInput(elements, *subject);
3348 for (int i = 0; i < last_match_capture_count; i++) {
3349 RegExpImpl::SetCapture(elements, i, prev_register_vector[i]);
3350 }
3351 return RegExpImpl::RE_SUCCESS;
3352 }
3353 }
3354 // No matches at all, return failure or exception result directly.
3355 return result;
3356 }
3357
3358
3359 static Object* Runtime_RegExpExecMultiple(Arguments args) {
3360 ASSERT(args.length() == 4);
3361 HandleScope handles;
3362
3363 CONVERT_ARG_CHECKED(String, subject, 1);
3364 if (!subject->IsFlat()) { FlattenString(subject); }
3365 CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
3366 CONVERT_ARG_CHECKED(JSArray, last_match_info, 2);
3367 CONVERT_ARG_CHECKED(JSArray, result_array, 3);
3368
3369 ASSERT(last_match_info->HasFastElements());
3370 ASSERT(regexp->GetFlags().is_global());
3371 Handle<FixedArray> result_elements;
3372 if (result_array->HasFastElements()) {
3373 result_elements =
3374 Handle<FixedArray>(FixedArray::cast(result_array->elements()));
3375 } else {
3376 result_elements = Factory::NewFixedArrayWithHoles(16);
3377 }
3378 FixedArrayBuilder builder(result_elements);
3379
3380 if (regexp->TypeTag() == JSRegExp::ATOM) {
3381 Handle<String> pattern(
3382 String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)));
3383 int pattern_length = pattern->length();
3384 if (pattern_length == 1) {
fschneider 2010/03/25 10:26:06 Or just: if (pattern->length() == 1 && Search
Lasse Reichstein 2010/03/26 11:18:26 That wouldn't return null if SearchCharMultiple re
3385 if (SearchCharMultiple(subject, pattern, last_match_info, &builder)) {
3386 return *builder.ToJSArray(result_array);
3387 }
3388 return Heap::null_value();
3389 }
3390
3391 if (!pattern->IsFlat()) FlattenString(pattern);
3392 if (SearchStringMultiple(subject, pattern, last_match_info, &builder)) {
3393 return *builder.ToJSArray(result_array);
3394 }
3395 return Heap::null_value();
3396 }
3397
3398 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
3399
3400 int capture_count = regexp->CaptureCount();
3401 if (capture_count == 0) {
3402 RegExpImpl::IrregexpResult result =
3403 SearchRegExpNoCaptureMultiple(subject,
3404 regexp,
3405 last_match_info,
3406 &builder);
3407 if (result == RegExpImpl::RE_SUCCESS) {
3408 return *builder.ToJSArray(result_array);
3409 }
3410 if (result == RegExpImpl::RE_FAILURE) {
3411 return Heap::null_value();
3412 }
3413 ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
3414 return Failure::Exception();
3415 }
3416
3417 RegExpImpl::IrregexpResult result =
3418 SearchRegExpMultiple(subject, regexp, last_match_info, &builder);
3419 if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array);
fschneider 2010/03/25 10:26:06 The test for the result seems duplicated. You coul
Lasse Reichstein 2010/03/26 11:18:26 Done.
3420 if (result == RegExpImpl::RE_FAILURE) return Heap::null_value();
3421 ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
3422 return Failure::Exception();
3423 }
3424
3425
2869 static Object* Runtime_NumberToRadixString(Arguments args) { 3426 static Object* Runtime_NumberToRadixString(Arguments args) {
2870 NoHandleAllocation ha; 3427 NoHandleAllocation ha;
2871 ASSERT(args.length() == 2); 3428 ASSERT(args.length() == 2);
2872 3429
2873 // Fast case where the result is a one character string. 3430 // Fast case where the result is a one character string.
2874 if (args[0]->IsSmi() && args[1]->IsSmi()) { 3431 if (args[0]->IsSmi() && args[1]->IsSmi()) {
2875 int value = Smi::cast(args[0])->value(); 3432 int value = Smi::cast(args[0])->value();
2876 int radix = Smi::cast(args[1])->value(); 3433 int radix = Smi::cast(args[1])->value();
2877 if (value >= 0 && value < radix) { 3434 if (value >= 0 && value < radix) {
2878 RUNTIME_ASSERT(radix <= 36); 3435 RUNTIME_ASSERT(radix <= 36);
(...skipping 6546 matching lines...) Expand 10 before | Expand all | Expand 10 after
9425 } else { 9982 } else {
9426 // Handle last resort GC and make sure to allow future allocations 9983 // Handle last resort GC and make sure to allow future allocations
9427 // to grow the heap without causing GCs (if possible). 9984 // to grow the heap without causing GCs (if possible).
9428 Counters::gc_last_resort_from_js.Increment(); 9985 Counters::gc_last_resort_from_js.Increment();
9429 Heap::CollectAllGarbage(false); 9986 Heap::CollectAllGarbage(false);
9430 } 9987 }
9431 } 9988 }
9432 9989
9433 9990
9434 } } // namespace v8::internal 9991 } } // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698