Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 #include <set> | 28 #include <set> |
| 29 | 29 |
| 30 #include "v8.h" | 30 #include "v8.h" |
| 31 | 31 |
| 32 #include "ast.h" | 32 #include "ast.h" |
| 33 #include "execution.h" | 33 #include "execution.h" |
| 34 #include "factory.h" | 34 #include "factory.h" |
| 35 #include "jsregexp.h" | 35 #include "jsregexp-inl.h" |
| 36 #include "third_party/jscre/pcre.h" | 36 #include "third_party/jscre/pcre.h" |
| 37 #include "platform.h" | 37 #include "platform.h" |
| 38 #include "runtime.h" | 38 #include "runtime.h" |
| 39 #include "top.h" | 39 #include "top.h" |
| 40 #include "compilation-cache.h" | 40 #include "compilation-cache.h" |
| 41 #include "string-stream.h" | 41 #include "string-stream.h" |
| 42 | 42 |
| 43 | 43 |
| 44 namespace v8 { namespace internal { | 44 namespace v8 { namespace internal { |
| 45 | 45 |
| (...skipping 767 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 813 void* rest) { | 813 void* rest) { |
| 814 ZoneList<RegExpTree*>* children = that->nodes(); | 814 ZoneList<RegExpTree*>* children = that->nodes(); |
| 815 RegExpNode<Char>* current = static_cast<RegExpNode<Char>*>(rest); | 815 RegExpNode<Char>* current = static_cast<RegExpNode<Char>*>(rest); |
| 816 for (int i = children->length() - 1; i >= 0; i--) { | 816 for (int i = children->length() - 1; i >= 0; i--) { |
| 817 current = Compile(children->at(i), current); | 817 current = Compile(children->at(i), current); |
| 818 } | 818 } |
| 819 return current; | 819 return current; |
| 820 } | 820 } |
| 821 | 821 |
| 822 | 822 |
| 823 bool CharacterClass::Contains(uc16 c) { | |
| 824 switch (type()) { | |
| 825 case FIELD: { | |
| 826 int offset = segment_start(segment_); | |
| 827 if (c < offset) return false; | |
| 828 int index = c - offset; | |
| 829 if (index >= CharacterClass::kFieldMax) return false; | |
| 830 return (data_.u_field & long_bit(index)) != 0; | |
| 831 } | |
| 832 case RANGES: { | |
| 833 int offset = segment_start(segment_); | |
| 834 if (c < offset) return false; | |
| 835 bool is_on = false; | |
| 836 unsigned i = 0; | |
| 837 while (i < count_) { | |
| 838 int delta = 0; | |
| 839 int nibble = read_nibble(i); | |
| 840 int bit_offset = 0; | |
| 841 while ((nibble & 0x8) == 0) { | |
| 842 delta |= nibble << bit_offset; | |
| 843 bit_offset += 3; | |
| 844 nibble = read_nibble(++i); | |
| 845 } | |
| 846 delta |= (nibble & 0x7) << bit_offset; | |
| 847 i++; | |
| 848 offset += delta; | |
| 849 if (c < offset) | |
| 850 return is_on; | |
| 851 is_on = !is_on; | |
| 852 } | |
| 853 return false; | |
| 854 } | |
| 855 case UNION: { | |
| 856 return data_.u_union.left->Contains(c) | |
| 857 || data_.u_union.right->Contains(c); | |
| 858 } | |
| 859 default: | |
| 860 UNREACHABLE(); | |
| 861 return false; | |
| 862 } | |
| 863 } | |
| 864 | |
| 865 | |
| 866 template <int kCount> | |
| 867 CharacterClass* StaticCharacterClassAllocator<kCount>::Allocate() { | |
| 868 ASSERT(used_ < kCount); | |
| 869 CharacterClass* result = &preallocated_[used_]; | |
| 870 used_++; | |
| 871 return result; | |
| 872 } | |
| 873 | |
| 874 | |
| 875 class StaticCharacterClasses { | |
| 876 public: | |
| 877 StaticCharacterClasses(); | |
| 878 CharacterClass* GetClass(uc16 tag); | |
| 879 static StaticCharacterClasses* instance_; | |
| 880 private: | |
| 881 StaticCharacterClassAllocator<5> static_allocator_; | |
| 882 CharacterClass digit_; | |
| 883 CharacterClass whitespace_; | |
| 884 CharacterClass word_; | |
| 885 }; | |
| 886 | |
| 887 | |
| 888 StaticCharacterClasses* StaticCharacterClasses::instance_ = NULL; | |
| 889 | |
| 890 | |
| 891 StaticCharacterClasses::StaticCharacterClasses() { | |
| 892 #define MAKE_CLASS(Name)\ | |
| 893 CharacterClass::Ranges(Vector<CharacterClass::Range>(k##Name##Ranges,\ | |
| 894 k##Name##RangeCount), \ | |
| 895 &static_allocator_) | |
| 896 | |
| 897 const int kDigitRangeCount = 1; | |
| 898 CharacterClass::Range kDigitRanges[kDigitRangeCount] = { | |
| 899 CharacterClass::Range('0', '9') | |
| 900 }; | |
| 901 digit_ = MAKE_CLASS(Digit); | |
| 902 | |
| 903 const int kWhiteSpaceRangeCount = 10; | |
| 904 CharacterClass::Range kWhiteSpaceRanges[kWhiteSpaceRangeCount] = { | |
| 905 CharacterClass::Range(0x0009, 0x0009), CharacterClass::Range(0x000B, 0x000C), | |
| 906 CharacterClass::Range(0x0020, 0x0020), CharacterClass::Range(0x00A0, 0x00A0), | |
| 907 CharacterClass::Range(0x1680, 0x1680), CharacterClass::Range(0x180E, 0x180E), | |
| 908 CharacterClass::Range(0x2000, 0x200A), CharacterClass::Range(0x202F, 0x202F), | |
| 909 CharacterClass::Range(0x205F, 0x205F), CharacterClass::Range(0x3000, 0x3000) | |
| 910 }; | |
| 911 whitespace_ = MAKE_CLASS(WhiteSpace); | |
| 912 | |
| 913 const int kWordRangeCount = 4; | |
| 914 static CharacterClass::Range kWordRanges[kWordRangeCount] = { | |
| 915 CharacterClass::Range('0', '9'), CharacterClass::Range('A', 'Z'), | |
| 916 CharacterClass::Range('_', '_'), CharacterClass::Range('a', 'z') | |
| 917 }; | |
| 918 word_ = MAKE_CLASS(Word); | |
| 919 | |
| 920 #undef MAKE_CLASS | |
| 921 } | |
| 922 | |
| 923 | |
| 924 CharacterClass* StaticCharacterClasses::GetClass(uc16 tag) { | |
| 925 switch (tag) { | |
| 926 case 'd': | |
| 927 return &digit_; | |
| 928 case 's': | |
| 929 return &whitespace_; | |
| 930 case 'w': | |
| 931 return &word_; | |
| 932 default: | |
| 933 UNREACHABLE(); | |
| 934 return NULL; | |
| 935 } | |
| 936 } | |
| 937 | |
| 938 | |
| 939 CharacterClass* CharacterClass::GetCharacterClass(uc16 tag) { | |
| 940 if (StaticCharacterClasses::instance_ == NULL) | |
| 941 StaticCharacterClasses::instance_ = new StaticCharacterClasses(); | |
| 942 return StaticCharacterClasses::instance_->GetClass(tag); | |
| 943 } | |
| 944 | |
| 945 | |
| 946 CharacterClass CharacterClass::Ranges(Vector<Range> boundaries, | |
| 947 CharacterClassAllocator* alloc) { | |
| 948 CharacterClass result; | |
| 949 result.InitializeRangesFrom(boundaries, alloc); | |
| 950 return result; | |
| 951 } | |
| 952 | |
| 953 | |
| 954 void CharacterClass::InitializeFieldFrom(Vector<Range> boundaries) { | |
| 955 ASSERT(boundaries.length() > 0); | |
| 956 ASSERT_EQ(EMPTY, type_); | |
| 957 type_ = FIELD; | |
| 958 segment_ = segment_of(boundaries.first().from()); | |
| 959 ASSERT_EQ(static_cast<int>(segment_), | |
| 960 static_cast<int>(segment_of(boundaries.last().to()))); | |
| 961 data_.u_field = 0; | |
| 962 for (int i = 0; i < boundaries.length(); i++) { | |
| 963 int start = boundaries[i].from(); | |
| 964 int end = boundaries[i].to(); | |
| 965 // Mask in the range | |
| 966 uint64_t t0 = ~static_cast<uint64_t>(0) >> (63 - (end & kSegmentMask)); | |
| 967 uint64_t t1 = ~static_cast<uint64_t>(0) << (start & kSegmentMask); | |
| 968 data_.u_field |= (t0 & t1); | |
| 969 } | |
| 970 } | |
| 971 | |
| 972 | |
| 973 void CharacterClass::InitializeRangesFrom(Vector<Range> boundaries, | |
| 974 CharacterClassAllocator* alloc) { | |
| 975 ASSERT(boundaries.length() > 0); | |
| 976 ASSERT_EQ(EMPTY, type_); | |
| 977 int start_segment = segment_of(boundaries.first().from()); | |
| 978 int end_segment = segment_of(boundaries.last().to()); | |
| 979 if (start_segment == end_segment) { | |
| 980 InitializeFieldFrom(boundaries); | |
| 981 return; | |
| 982 } | |
| 983 type_ = RANGES; | |
| 984 segment_ = start_segment; | |
| 985 data_.u_ranges = 0; | |
| 986 int offset = 0; | |
| 987 uc16 last_boundary = segment_start(segment_); | |
| 988 int i; | |
| 989 for (i = 0; i < boundaries.length(); i++) { | |
| 990 Range current = boundaries[i]; | |
| 991 ASSERT(last_boundary <= current.from()); | |
| 992 int word = current.from() - last_boundary; | |
| 993 // We have to repeat the write operation to write both the delta | |
| 994 // from the last range to the start of this one, and the delta | |
| 995 // from the start to the end of this range. | |
| 996 for (int j = 0; j < 2; j++) { | |
| 997 do { | |
| 998 if (offset >= 16) | |
| 999 goto overflow; | |
| 1000 byte nibble = (word & 0x7); | |
| 1001 int next_word = word >> 3; | |
| 1002 if (next_word == 0) | |
| 1003 nibble |= 0x8; | |
| 1004 write_nibble(offset, nibble); | |
| 1005 word = next_word; | |
| 1006 offset++; | |
| 1007 } while (word > 0); | |
| 1008 word = current.to() - current.from() + 1; | |
| 1009 } | |
| 1010 count_ = offset; | |
| 1011 last_boundary = current.to() + 1; | |
| 1012 continue; | |
| 1013 overflow: | |
| 1014 CharacterClass* left = alloc->Allocate(); | |
| 1015 ASSERT(i > 0); | |
| 1016 if (segment_of(boundaries[i-1].to()) == segment_) { | |
| 1017 // If what we've written so far fits within a single segment we | |
| 1018 // can fit it into a bitfield. | |
| 1019 left->InitializeFieldFrom(boundaries.SubVector(0, i)); | |
|
Lasse Reichstein
2008/10/30 10:28:42
Try to read some more first, and see if more range
| |
| 1020 } else { | |
| 1021 // Otherwise we copy the work we've done so far into the left | |
| 1022 // heap-allocated charclass. | |
| 1023 *left = *this; | |
|
Lasse Reichstein
2008/10/30 10:28:42
As discussed, the segments won't necessarily be di
| |
| 1024 } | |
| 1025 CharacterClass* right = alloc->Allocate(); | |
| 1026 right->InitializeRangesFrom(boundaries.SubVector(i, boundaries.length()), | |
| 1027 alloc); | |
| 1028 type_ = UNION; | |
| 1029 data_.u_union.left = left; | |
| 1030 data_.u_union.right = right; | |
| 1031 return; | |
| 1032 } | |
| 1033 } | |
| 1034 | |
| 1035 | |
| 823 // ------------------------------------------------------------------- | 1036 // ------------------------------------------------------------------- |
| 824 // Execution | 1037 // Execution |
| 825 | 1038 |
| 826 | 1039 |
| 827 template <typename Char> | 1040 template <typename Char> |
| 828 class ExecutionState { | 1041 class ExecutionState { |
| 829 public: | 1042 public: |
| 830 ExecutionState(RegExpNode<Char>* start, Vector<Char> input) | 1043 ExecutionState(RegExpNode<Char>* start, Vector<Char> input) |
| 831 : current_(start), | 1044 : current_(start), |
| 832 input_(input), | 1045 input_(input), |
| (...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1021 template | 1234 template |
| 1022 bool RegExpEngine::Execute<const char>(RegExpNode<const char>* start, | 1235 bool RegExpEngine::Execute<const char>(RegExpNode<const char>* start, |
| 1023 Vector<const char> input); | 1236 Vector<const char> input); |
| 1024 | 1237 |
| 1025 template | 1238 template |
| 1026 bool RegExpEngine::Execute<const uc16>(RegExpNode<const uc16>* start, | 1239 bool RegExpEngine::Execute<const uc16>(RegExpNode<const uc16>* start, |
| 1027 Vector<const uc16> input); | 1240 Vector<const uc16> input); |
| 1028 | 1241 |
| 1029 | 1242 |
| 1030 }} // namespace v8::internal | 1243 }} // namespace v8::internal |
| OLD | NEW |