Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 119 // subject JS String object is cached in the heap. We also cache a | 119 // subject JS String object is cached in the heap. We also cache a |
| 120 // translation between position and utf8 position. | 120 // translation between position and utf8 position. |
| 121 static char* utf8_subject_cache_; | 121 static char* utf8_subject_cache_; |
| 122 static int utf8_length_cache_; | 122 static int utf8_length_cache_; |
| 123 static int utf8_position_; | 123 static int utf8_position_; |
| 124 static int character_position_; | 124 static int character_position_; |
| 125 }; | 125 }; |
| 126 | 126 |
| 127 | 127 |
| 128 template <typename Char> class RegExpNode; | 128 template <typename Char> class RegExpNode; |
| 129 class CharacterClassAllocator; | |
| 130 | |
| 131 | |
| 132 class CharacterClass { | |
| 133 public: | |
| 134 | |
| 135 enum Type { EMPTY = 0, FIELD = 1, RANGES = 2, UNION = 3 }; | |
| 136 | |
| 137 // A closed range from and including 'from', to and including 'to'. | |
| 138 class Range { | |
| 139 public: | |
| 140 Range() : from_(0), to_(0) { } | |
| 141 Range(uc16 from, uc16 to) : from_(from), to_(to) { ASSERT(from <= to); } | |
| 142 uc16 from() { return from_; } | |
| 143 uc16 to() { return to_; } | |
| 144 private: | |
| 145 uc16 from_; | |
| 146 uc16 to_; | |
| 147 }; | |
| 148 | |
| 149 CharacterClass() : type_(EMPTY) { } | |
| 150 | |
| 151 explicit CharacterClass(Type type) : type_(type) { } | |
| 152 | |
| 153 bool Contains(uc16 c); | |
| 154 | |
| 155 // Returns a character class with a single bit set | |
| 156 static inline CharacterClass SingletonField(uc16 chr); | |
| 157 | |
| 158 // Returns a bitfield character class with a closed range set. The | |
| 159 // range must fit within one field, that is, fit between two adjacent | |
| 160 // kFieldMax-aligned boundaries. | |
| 161 static inline CharacterClass RangeField(Range range); | |
| 162 | |
| 163 static inline CharacterClass Union(CharacterClass* left, | |
| 164 CharacterClass* right); | |
| 165 | |
| 166 // Initializes an empty charclass as a bitfield containing the | |
| 167 // specified ranges. | |
| 168 void InitializeFieldFrom(Vector<Range> ranges); | |
| 169 | |
| 170 // Initializes this character class to be the specified ranges. | |
| 171 // This class must be empty. | |
| 172 void InitializeRangesFrom(Vector<Range> ranges, | |
| 173 CharacterClassAllocator* alloc); | |
| 174 | |
| 175 // Creates a new character class containing the specified ranges | |
| 176 // and allocating any sub-classes using the specified allocator. | |
| 177 static CharacterClass Ranges(Vector<Range> boundaries, | |
| 178 CharacterClassAllocator* alloc); | |
| 179 | |
| 180 // Returns one of the built-in character classes such as '\w' or | |
| 181 // '\S'. | |
| 182 static CharacterClass* GetCharacterClass(uc16 tag); | |
| 183 | |
| 184 inline void write_nibble(int index, byte value); | |
| 185 inline byte read_nibble(int index); | |
| 186 | |
| 187 static inline unsigned segment_of(uc16 value); | |
| 188 static inline uc16 segment_start(unsigned segment); | |
| 189 | |
| 190 private: | |
| 191 static const int kCharSize = 16; | |
| 192 static const int kFieldSegmentIndexWidth = 10; | |
| 193 static const int kFieldSegmentWidth = kCharSize - kFieldSegmentIndexWidth; | |
|
Lasse Reichstein
2008/10/30 10:28:42
Isn't this the field width, not the segment width?
| |
| 194 static const int kFieldMax = (1 << kFieldSegmentWidth); | |
| 195 static const int kSegmentMask = (1 << kFieldSegmentWidth) - 1; | |
| 196 static const int kNibbleCount = kFieldMax / 4; | |
| 197 STATIC_ASSERT(kFieldMax == 8 * sizeof(uint64_t)); | |
| 198 | |
| 199 Type type() { return type_; } | |
| 200 | |
| 201 static inline uint64_t long_bit(int index) { | |
| 202 return static_cast<uint64_t>(1) << index; | |
| 203 } | |
| 204 | |
| 205 Type type_: 2; | |
| 206 unsigned segment_ : 10; | |
| 207 unsigned count_ : 4; | |
| 208 union { | |
| 209 // These have the same type to make it easier to change one without | |
| 210 // touching the other. | |
| 211 uint64_t u_field; | |
| 212 uint64_t u_ranges; | |
| 213 struct { | |
| 214 CharacterClass* left; | |
| 215 CharacterClass* right; | |
| 216 } u_union; | |
|
Lasse Reichstein
2008/10/30 10:28:42
Slightly confusing name to have for a struct (espe
| |
| 217 } data_; | |
| 218 }; | |
| 219 | |
| 220 | |
| 221 STATIC_ASSERT(sizeof(CharacterClass) == 3 * kIntSize); | |
| 222 | |
| 223 | |
| 224 class CharacterClassAllocator { | |
| 225 public: | |
| 226 virtual CharacterClass* Allocate() = 0; | |
| 227 virtual ~CharacterClassAllocator() { } | |
| 228 }; | |
| 229 | |
| 230 | |
| 231 template <int kCount> | |
| 232 class StaticCharacterClassAllocator: public CharacterClassAllocator { | |
| 233 public: | |
| 234 StaticCharacterClassAllocator() : used_(0) { } | |
| 235 virtual CharacterClass* Allocate(); | |
| 236 private: | |
| 237 int used_; | |
| 238 CharacterClass preallocated_[kCount]; | |
| 239 }; | |
| 129 | 240 |
| 130 | 241 |
| 131 class RegExpEngine: public AllStatic { | 242 class RegExpEngine: public AllStatic { |
| 132 public: | 243 public: |
| 133 template <typename Char> | 244 template <typename Char> |
| 134 static RegExpNode<Char>* Compile(RegExpTree* regexp); | 245 static RegExpNode<Char>* Compile(RegExpTree* regexp); |
| 135 | 246 |
| 136 template <typename Char> | 247 template <typename Char> |
| 137 static bool Execute(RegExpNode<Char>* start, Vector<Char> input); | 248 static bool Execute(RegExpNode<Char>* start, Vector<Char> input); |
| 138 }; | 249 }; |
| 139 | 250 |
| 140 | 251 |
| 141 } } // namespace v8::internal | 252 } } // namespace v8::internal |
| 142 | 253 |
| 143 #endif // V8_JSREGEXP_H_ | 254 #endif // V8_JSREGEXP_H_ |
| OLD | NEW |