Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(34)

Side by Side Diff: src/jsregexp.cc

Issue 9104: Dispatch tables (Closed)
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/jsregexp.h ('k') | src/jsregexp-inl.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 674 matching lines...) Expand 10 before | Expand all | Expand 10 after
685 685
686 686
687 template <typename Char> 687 template <typename Char>
688 class ChoiceNode: public RegExpNode<Char> { 688 class ChoiceNode: public RegExpNode<Char> {
689 public: 689 public:
690 explicit ChoiceNode(ZoneList<RegExpNode<Char>*>* choices) 690 explicit ChoiceNode(ZoneList<RegExpNode<Char>*>* choices)
691 : choices_(choices) { } 691 : choices_(choices) { }
692 virtual void EmitDot(DotPrinter<Char>* out); 692 virtual void EmitDot(DotPrinter<Char>* out);
693 virtual bool Step(ExecutionState<Char>* state); 693 virtual bool Step(ExecutionState<Char>* state);
694 ZoneList<RegExpNode<Char>*>* choices() { return choices_; } 694 ZoneList<RegExpNode<Char>*>* choices() { return choices_; }
695 DispatchTable* table() { return table_; }
695 private: 696 private:
696 ZoneList<RegExpNode<Char>*>* choices_; 697 ZoneList<RegExpNode<Char>*>* choices_;
698 DispatchTable table_;
697 }; 699 };
698 700
699 701
700 // ------------------------------------------------------------------- 702 // -------------------------------------------------------------------
701 // Dot/dotty output 703 // Dot/dotty output
702 704
703 705
704 template <typename Char> 706 template <typename Char>
705 void DotPrinter<Char>::PrintNode(RegExpNode<Char>* node) { 707 void DotPrinter<Char>::PrintNode(RegExpNode<Char>* node) {
706 stream()->Add("digraph G {\n"); 708 stream()->Add("digraph G {\n");
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after
864 void* rest) { 866 void* rest) {
865 ZoneList<RegExpTree*>* children = that->nodes(); 867 ZoneList<RegExpTree*>* children = that->nodes();
866 RegExpNode<Char>* current = static_cast<RegExpNode<Char>*>(rest); 868 RegExpNode<Char>* current = static_cast<RegExpNode<Char>*>(rest);
867 for (int i = children->length() - 1; i >= 0; i--) { 869 for (int i = children->length() - 1; i >= 0; i--) {
868 current = Compile(children->at(i), current); 870 current = Compile(children->at(i), current);
869 } 871 }
870 return current; 872 return current;
871 } 873 }
872 874
873 875
874 bool CharacterClass::Contains(uc16 c) { 876 static const int kSpaceRangeCount = 20;
875 switch (type()) { 877 static const uc16 kSpaceRanges[kSpaceRangeCount] = {
876 case FIELD: { 878 0x0009, 0x0009, 0x000B, 0x000C, 0x0020, 0x0020, 0x00A0, 0x00A0,
877 int offset = segment_start(segment_); 879 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x202F, 0x202F,
878 if (c < offset) return false; 880 0x205F, 0x205F, 0x3000, 0x3000
879 int index = c - offset; 881 };
880 if (index >= CharacterClass::kFieldMax) return false; 882
881 return (data_.u_field & long_bit(index)) != 0; 883
882 } 884 static const int kWordRangeCount = 8;
883 case RANGES: { 885 static const uc16 kWordRanges[kWordRangeCount] = {
884 int offset = segment_start(segment_); 886 '0', '9', 'A', 'Z', '_', '_', 'a', 'z'
885 if (c < offset) return false; 887 };
886 bool is_on = false; 888
887 unsigned i = 0; 889
888 while (i < count_) { 890 static const int kDigitRangeCount = 2;
889 int delta = 0; 891 static const uc16 kDigitRanges[kDigitRangeCount] = {
890 int nibble = read_nibble(i); 892 '0', '9'
891 int bit_offset = 0; 893 };
892 while ((nibble & 0x8) == 0) { 894
893 delta |= nibble << bit_offset; 895
894 bit_offset += 3; 896 static void AddClass(const uc16* elmv,
895 nibble = read_nibble(++i); 897 int elmc,
896 } 898 ZoneList<CharacterRange>* ranges) {
897 delta |= (nibble & 0x7) << bit_offset; 899 for (int i = 0; i < elmc; i += 2) {
898 i++; 900 ASSERT(elmv[i] <= elmv[i + 1]);
899 offset += delta; 901 ranges->Add(CharacterRange(elmv[i], elmv[i + 1]));
900 if (c < offset) 902 }
901 return is_on; 903 }
902 is_on = !is_on; 904
903 } 905
904 return false; 906 static void AddClassNegated(const uc16 *elmv,
905 } 907 int elmc,
906 case UNION: { 908 ZoneList<CharacterRange>* ranges) {
Erik Corry 2008/11/04 09:48:57 I guess it's implicit here that the class does not
907 return data_.u_union.left->Contains(c) 909 uc16 last = 0x0000;
908 || data_.u_union.right->Contains(c); 910 for (int i = 0; i < elmc; i += 2) {
909 } 911 ASSERT(last <= elmv[i] - 1);
912 ASSERT(elmv[i] <= elmv[i + 1]);
913 ranges->Add(CharacterRange(last, elmv[i] - 1));
914 last = elmv[i + 1] + 1;
915 }
916 ranges->Add(CharacterRange(last, 0xFFFF));
917 }
918
919
920 void CharacterRange::AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges) {
921 switch (type) {
922 case 's':
923 AddClass(kSpaceRanges, kSpaceRangeCount, ranges);
924 break;
925 case 'S':
926 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges);
927 break;
928 case 'w':
929 AddClass(kWordRanges, kWordRangeCount, ranges);
930 break;
931 case 'W':
932 AddClassNegated(kWordRanges, kWordRangeCount, ranges);
933 break;
934 case 'd':
935 AddClass(kDigitRanges, kDigitRangeCount, ranges);
936 break;
937 case 'D':
938 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges);
939 break;
940 case '.':
941 ranges->Add(CharacterRange(0x0000, 0xFFFF));
942 break;
910 default: 943 default:
911 UNREACHABLE(); 944 UNREACHABLE();
912 return false; 945 }
913 } 946 }
914 } 947
915 948
916 949 // -------------------------------------------------------------------
917 template <int kCount> 950 // Splay tree
918 CharacterClass* StaticCharacterClassAllocator<kCount>::Allocate() { 951
919 ASSERT(used_ < kCount); 952
920 CharacterClass* result = &preallocated_[used_]; 953 void OutSet::Set(unsigned value) {
921 used_++; 954 if (value < kFirstLimit) {
922 return result; 955 first_ |= (1 << value);
923 } 956 } else {
924 957 if (remaining_ == NULL)
925 958 remaining_ = new ZoneList<unsigned>(1);
926 class StaticCharacterClasses { 959 if (remaining_->is_empty() || !remaining_->Contains(value))
927 public: 960 remaining_->Add(value);
928 StaticCharacterClasses(); 961 }
929 CharacterClass* GetClass(uc16 tag); 962 }
930 static StaticCharacterClasses* instance_; 963
931 private: 964
932 StaticCharacterClassAllocator<5> static_allocator_; 965 bool OutSet::Get(unsigned value) {
933 CharacterClass digit_; 966 if (value < kFirstLimit) {
934 CharacterClass whitespace_; 967 return (first_ & (1 << value)) != 0;
935 CharacterClass word_; 968 } else if (remaining_ == NULL) {
936 }; 969 return false;
937 970 } else {
938 971 return remaining_->Contains(value);
939 StaticCharacterClasses* StaticCharacterClasses::instance_ = NULL; 972 }
940 973 }
941 974
942 StaticCharacterClasses::StaticCharacterClasses() { 975
943 #define MAKE_CLASS(Name)\ 976 OutSet OutSet::Clone() {
944 CharacterClass::Ranges(Vector<CharacterClass::Range>(k##Name##Ranges, \ 977 if (remaining_ == NULL) {
945 k##Name##RangeCount), \ 978 return OutSet(first_, NULL);
946 &static_allocator_) 979 } else {
947 980 int length = remaining_->length();
948 const int kDigitRangeCount = 1; 981 ZoneList<unsigned>* clone = new ZoneList<unsigned>(length);
949 CharacterClass::Range kDigitRanges[kDigitRangeCount] = { 982 for (int i = 0; i < length; i++)
950 CharacterClass::Range('0', '9') 983 clone->Add(remaining_->at(i));
951 }; 984 return OutSet(first_, clone);
952 digit_ = MAKE_CLASS(Digit); 985 }
953 986 }
954 const int kWhiteSpaceRangeCount = 10; 987
955 CharacterClass::Range kWhiteSpaceRanges[kWhiteSpaceRangeCount] = { 988
956 CharacterClass::Range(0x0009, 0x0009), CharacterClass::Range(0x000B, 0x000C), 989 const uc16 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar;
957 CharacterClass::Range(0x0020, 0x0020), CharacterClass::Range(0x00A0, 0x00A0), 990 const DispatchTable::Entry DispatchTable::Config::kNoValue;
958 CharacterClass::Range(0x1680, 0x1680), CharacterClass::Range(0x180E, 0x180E), 991
959 CharacterClass::Range(0x2000, 0x200A), CharacterClass::Range(0x202F, 0x202F), 992
960 CharacterClass::Range(0x205F, 0x205F), CharacterClass::Range(0x3000, 0x3000) 993 void DispatchTable::AddRange(CharacterRange full_range, int value) {
961 }; 994 CharacterRange current = full_range;
962 whitespace_ = MAKE_CLASS(WhiteSpace); 995 if (tree()->is_empty()) {
963 996 // If this is the first range we just insert into the table.
964 const int kWordRangeCount = 4; 997 ZoneSplayTree<Config>::Locator loc;
965 static CharacterClass::Range kWordRanges[kWordRangeCount] = { 998 ASSERT_RESULT(tree()->Insert(current.from(), &loc));
966 CharacterClass::Range('0', '9'), CharacterClass::Range('A', 'Z'), 999 loc.set_value(Entry(current.from(), current.to(), value));
967 CharacterClass::Range('_', '_'), CharacterClass::Range('a', 'z')
968 };
969 word_ = MAKE_CLASS(Word);
970
971 #undef MAKE_CLASS
972 }
973
974
975 CharacterClass* StaticCharacterClasses::GetClass(uc16 tag) {
976 switch (tag) {
977 case 'd':
978 return &digit_;
979 case 's':
980 return &whitespace_;
981 case 'w':
982 return &word_;
983 default:
984 UNREACHABLE();
985 return NULL;
986 }
987 }
988
989
990 CharacterClass* CharacterClass::GetCharacterClass(uc16 tag) {
991 if (StaticCharacterClasses::instance_ == NULL)
992 StaticCharacterClasses::instance_ = new StaticCharacterClasses();
993 return StaticCharacterClasses::instance_->GetClass(tag);
994 }
995
996
997 CharacterClass CharacterClass::Ranges(Vector<Range> boundaries,
998 CharacterClassAllocator* alloc) {
999 CharacterClass result;
1000 result.InitializeRangesFrom(boundaries, alloc);
1001 return result;
1002 }
1003
1004
1005 void CharacterClass::InitializeFieldFrom(Vector<Range> boundaries) {
1006 ASSERT(boundaries.length() > 0);
1007 ASSERT_EQ(EMPTY, type_);
1008 type_ = FIELD;
1009 segment_ = segment_of(boundaries.first().from());
1010 ASSERT_EQ(static_cast<int>(segment_),
1011 static_cast<int>(segment_of(boundaries.last().to())));
1012 data_.u_field = 0;
1013 for (int i = 0; i < boundaries.length(); i++) {
1014 int start = boundaries[i].from();
1015 int end = boundaries[i].to();
1016 // Mask in the range
1017 uint64_t t0 = ~static_cast<uint64_t>(0) >> (63 - (end & kSegmentMask));
1018 uint64_t t1 = ~static_cast<uint64_t>(0) << (start & kSegmentMask);
1019 data_.u_field |= (t0 & t1);
1020 }
1021 }
1022
1023
1024 void CharacterClass::InitializeRangesFrom(Vector<Range> boundaries,
1025 CharacterClassAllocator* alloc) {
1026 ASSERT(boundaries.length() > 0);
1027 ASSERT_EQ(EMPTY, type_);
1028 int start_segment = segment_of(boundaries.first().from());
1029 int end_segment = segment_of(boundaries.last().to());
1030 if (start_segment == end_segment) {
1031 InitializeFieldFrom(boundaries);
1032 return; 1000 return;
1033 } 1001 }
1034 type_ = RANGES; 1002 // First see if there is a range to the left of this one that
1035 segment_ = start_segment; 1003 // overlaps.
1036 data_.u_ranges = 0; 1004 ZoneSplayTree<Config>::Locator loc;
1037 int offset = 0; 1005 if (tree()->FindGreatestLessThan(current.from(), &loc)) {
1038 uc16 last_boundary = segment_start(segment_); 1006 Entry* entry = &loc.value();
1039 int i; 1007 // If we've found a range that overlaps with this one, and it
1040 for (i = 0; i < boundaries.length(); i++) { 1008 // starts strictly to the left of this one, we have to fix it
1041 Range current = boundaries[i]; 1009 // because the following code only handles ranges that start on
1042 ASSERT(last_boundary <= current.from()); 1010 // or after the start point of the range we're adding.
1043 int word = current.from() - last_boundary; 1011 if (entry->from() < current.from() && entry->to() >= current.from()) {
1044 // We have to repeat the write operation to write both the delta 1012 // Snap the overlapping range in half around the start point of
1045 // from the last range to the start of this one, and the delta 1013 // the range we're adding.
1046 // from the start to the end of this range. 1014 CharacterRange left(entry->from(), current.from() - 1);
1047 for (int j = 0; j < 2; j++) { 1015 CharacterRange right(current.from(), entry->to());
1048 do { 1016 // The left part of the overlapping range doesn't overlap.
1049 if (offset >= 16) 1017 // Truncate the whole entry to be just the left part.
1050 goto overflow; 1018 entry->set_to(left.to());
1051 byte nibble = (word & 0x7); 1019 // The right part is the one that overlaps. We add this part
1052 int next_word = word >> 3; 1020 // to the map and let the next step deal with merging it with
1053 if (next_word == 0) 1021 // the range we're adding.
1054 nibble |= 0x8; 1022 ZoneSplayTree<Config>::Locator loc;
1055 write_nibble(offset, nibble); 1023 ASSERT_RESULT(tree()->Insert(right.from(), &loc));
1056 word = next_word; 1024 loc.set_value(Entry(right.from(),
1057 offset++; 1025 right.to(),
1058 } while (word > 0); 1026 entry->out_set().Clone()));
1059 word = current.to() - current.from() + 1;
1060 } 1027 }
1061 count_ = offset; 1028 }
1062 last_boundary = current.to() + 1; 1029 while (current.is_valid()) {
1063 continue; 1030 if (tree()->FindLeastGreaterThan(current.from(), &loc) &&
1064 overflow: 1031 (loc.value().from() <= current.to())) {
1065 CharacterClass* left = alloc->Allocate(); 1032 Entry* entry = &loc.value();
1066 ASSERT(i > 0); 1033 // We have overlap. If there is space between the start point of
1067 if (segment_of(boundaries[i-1].to()) == segment_) { 1034 // the range we're adding and where the overlapping range starts
1068 // If what we've written so far fits within a single segment we 1035 // then we have to add a range covering just that space.
1069 // can fit it into a bitfield. Scan forward to see if any more 1036 if (current.from() < entry->from()) {
1070 // ranges can be included. 1037 ZoneSplayTree<Config>::Locator ins;
1071 while (i < boundaries.length() 1038 ASSERT_RESULT(tree()->Insert(current.from(), &ins));
1072 && segment_of(boundaries[i].to()) == segment_) 1039 ins.set_value(Entry(current.from(), entry->from() - 1, value));
1073 i++; 1040 current.set_from(entry->from());
1074 ASSERT(i < boundaries.length()); 1041 }
1075 left->InitializeFieldFrom(boundaries.SubVector(0, i)); 1042 ASSERT_EQ(current.from(), entry->from());
1043 // If the overlapping range extends beyond the one we want to add
1044 // we have to snap the right part off and add it separately.
1045 if (entry->to() > current.to()) {
1046 ZoneSplayTree<Config>::Locator ins;
1047 ASSERT_RESULT(tree()->Insert(current.to() + 1, &ins));
1048 ins.set_value(Entry(current.to() + 1,
1049 entry->to(),
1050 entry->out_set().Clone()));
1051 entry->set_to(current.to());
1052 }
1053 ASSERT(entry->to() <= current.to());
1054 // The overlapping range is now completely contained by the range
1055 // we're adding so we can just update it and move the start point
1056 // of the range we're adding just past it.
1057 entry->AddValue(value);
1058 current.set_from(entry->to() + 1);
1076 } else { 1059 } else {
1077 // Otherwise we copy the work we've done so far into the left 1060 // There is no overlap so we can just add the range
1078 // heap-allocated charclass. 1061 ZoneSplayTree<Config>::Locator ins;
1079 *left = *this; 1062 ASSERT_RESULT(tree()->Insert(current.from(), &ins));
1063 ins.set_value(Entry(current.from(), current.to(), value));
1064 break;
1080 } 1065 }
1081 CharacterClass* right = alloc->Allocate(); 1066 }
1082 right->InitializeRangesFrom(boundaries.SubVector(i, boundaries.length()), 1067 }
1083 alloc); 1068
1084 type_ = UNION; 1069
1085 data_.u_union.left = left; 1070 OutSet DispatchTable::Get(uc16 value) {
1086 data_.u_union.right = right; 1071 ZoneSplayTree<Config>::Locator loc;
1087 return; 1072 if (!tree()->FindGreatestLessThan(value, &loc))
1088 } 1073 return OutSet::empty();
1074 Entry* entry = &loc.value();
1075 if (value <= entry->to())
1076 return entry->out_set();
1077 else
1078 return OutSet::empty();
1089 } 1079 }
1090 1080
1091 1081
1092 // ------------------------------------------------------------------- 1082 // -------------------------------------------------------------------
1093 // Execution 1083 // Execution
1094 1084
1095 1085
1096 template <typename Char> 1086 template <typename Char>
1097 class ExecutionState { 1087 class ExecutionState {
1098 public: 1088 public:
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
1225 } 1215 }
1226 1216
1227 1217
1228 template <typename Char> 1218 template <typename Char>
1229 bool CharacterClassNode<Char>::Step(ExecutionState<Char>* state) { 1219 bool CharacterClassNode<Char>::Step(ExecutionState<Char>* state) {
1230 if (state->AtEnd()) return false; 1220 if (state->AtEnd()) return false;
1231 ZoneList<CharacterRange>* ranges = this->ranges(); 1221 ZoneList<CharacterRange>* ranges = this->ranges();
1232 unsigned current = state->current_char(); 1222 unsigned current = state->current_char();
1233 for (int i = 0; i < ranges->length(); i++) { 1223 for (int i = 0; i < ranges->length(); i++) {
1234 CharacterRange& range = ranges->at(i); 1224 CharacterRange& range = ranges->at(i);
1235 if (range.is_character_class()) { 1225 if (range.from() <= current && current <= range.to()) {
1236 switch (range.from()) { 1226 state->Advance(1, this->next());
1237 case '.': 1227 return true;
1238 state->Advance(1, this->next());
1239 return true;
1240 default:
1241 UNIMPLEMENTED();
1242 }
1243 } else {
1244 if (range.from() <= current && current <= range.to()) {
1245 state->Advance(1, this->next());
1246 return true;
1247 }
1248 } 1228 }
1249 } 1229 }
1250 return false; 1230 return false;
1251 } 1231 }
1252 1232
1253 1233
1254 template <typename Char> 1234 template <typename Char>
1255 bool EndNode<Char>::Step(ExecutionState<Char>* state) { 1235 bool EndNode<Char>::Step(ExecutionState<Char>* state) {
1256 state->set_done(); 1236 state->set_done();
1257 return false; 1237 return false;
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
1290 template 1270 template
1291 bool RegExpEngine::Execute<const char>(RegExpNode<const char>* start, 1271 bool RegExpEngine::Execute<const char>(RegExpNode<const char>* start,
1292 Vector<const char> input); 1272 Vector<const char> input);
1293 1273
1294 template 1274 template
1295 bool RegExpEngine::Execute<const uc16>(RegExpNode<const uc16>* start, 1275 bool RegExpEngine::Execute<const uc16>(RegExpNode<const uc16>* start,
1296 Vector<const uc16> input); 1276 Vector<const uc16> input);
1297 1277
1298 1278
1299 }} // namespace v8::internal 1279 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/jsregexp-inl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698