| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 129 // subject JS String object is cached in the heap. We also cache a | 129 // subject JS String object is cached in the heap. We also cache a |
| 130 // translation between position and utf8 position. | 130 // translation between position and utf8 position. |
| 131 static char* utf8_subject_cache_; | 131 static char* utf8_subject_cache_; |
| 132 static int utf8_length_cache_; | 132 static int utf8_length_cache_; |
| 133 static int utf8_position_; | 133 static int utf8_position_; |
| 134 static int character_position_; | 134 static int character_position_; |
| 135 }; | 135 }; |
| 136 | 136 |
| 137 | 137 |
| 138 template <typename Char> class RegExpNode; | 138 template <typename Char> class RegExpNode; |
| 139 class CharacterClassAllocator; | |
| 140 | 139 |
| 141 | 140 |
| 142 class CharacterClass { | 141 class CharacterRange { |
| 143 public: | 142 public: |
| 143 // For compatibility with the CHECK_OK macro |
| 144 CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT |
| 145 CharacterRange(uc16 from, uc16 to) |
| 146 : from_(from), |
| 147 to_(to) { |
| 148 } |
| 149 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges); |
| 150 static inline CharacterRange Singleton(uc16 value) { |
| 151 return CharacterRange(value, value); |
| 152 } |
| 153 static inline CharacterRange Range(uc16 from, uc16 to) { |
| 154 ASSERT(from <= to); |
| 155 return CharacterRange(from, to); |
| 156 } |
| 157 uc16 from() { return from_; } |
| 158 void set_from(uc16 value) { from_ = value; } |
| 159 uc16 to() { return to_; } |
| 160 void set_to(uc16 value) { to_ = value; } |
| 161 bool is_valid() { return from_ <= to_; } |
| 162 bool IsSingleton() { return (from_ == to_); } |
| 163 private: |
| 164 uc16 from_; |
| 165 uc16 to_; |
| 166 }; |
| 144 | 167 |
| 145 enum Type { EMPTY = 0, FIELD = 1, RANGES = 2, UNION = 3 }; | |
| 146 | 168 |
| 147 // A closed range from and including 'from', to and including 'to'. | 169 // A zone splay tree. The config type parameter encapsulates the |
| 148 class Range { | 170 // different configurations of a concrete splay tree: |
| 171 // |
| 172 // typedef Key: the key type |
| 173 // typedef Value: the value type |
| 174 // static const kNoKey: the dummy key used when no key is set |
| 175 // static const kNoValue: the dummy value used to initialize nodes |
| 176 // int (Compare)(Key& a, Key& b) -> {-1, 0, 1}: comparison function |
| 177 // |
| 178 template <typename Config> |
| 179 class ZoneSplayTree : public ZoneObject { |
| 180 public: |
| 181 typedef typename Config::Key Key; |
| 182 typedef typename Config::Value Value; |
| 183 |
| 184 class Locator; |
| 185 |
| 186 ZoneSplayTree() : root_(NULL) { } |
| 187 |
| 188 // Inserts the given key in this tree with the given value. Returns |
| 189 // true if a node was inserted, otherwise false. If found the locator |
| 190 // is enabled and provides access to the mapping for the key. |
| 191 bool Insert(const Key& key, Locator* locator); |
| 192 |
| 193 // Looks up the key in this tree and returns true if it was found, |
| 194 // otherwise false. If the node is found the locator is enabled and |
| 195 // provides access to the mapping for the key. |
| 196 bool Find(const Key& key, Locator* locator); |
| 197 |
| 198 // Finds the mapping with the greatest key less than or equal to the |
| 199 // given key. |
| 200 bool FindGreatestLessThan(const Key& key, Locator* locator); |
| 201 |
| 202 // Find the mapping with the greatest key in this tree. |
| 203 bool FindGreatest(Locator* locator); |
| 204 |
| 205 // Finds the mapping with the least key greater than or equal to the |
| 206 // given key. |
| 207 bool FindLeastGreaterThan(const Key& key, Locator* locator); |
| 208 |
| 209 // Find the mapping with the least key in this tree. |
| 210 bool FindLeast(Locator* locator); |
| 211 |
| 212 // Remove the node with the given key from the tree. |
| 213 bool Remove(const Key& key); |
| 214 |
| 215 bool is_empty() { return root_ == NULL; } |
| 216 |
| 217 // Perform the splay operation for the given key. Moves the node with |
| 218 // the given key to the top of the tree. If no node has the given |
| 219 // key, the last node on the search path is moved to the top of the |
| 220 // tree. |
| 221 void Splay(const Key& key); |
| 222 |
| 223 class Node : public ZoneObject { |
| 149 public: | 224 public: |
| 150 Range() : from_(0), to_(0) { } | 225 Node(const Key& key, const Value& value) |
| 151 Range(uc16 from, uc16 to) : from_(from), to_(to) { ASSERT(from <= to); } | 226 : key_(key), |
| 227 value_(value), |
| 228 left_(NULL), |
| 229 right_(NULL) { } |
| 230 private: |
| 231 friend class ZoneSplayTree; |
| 232 friend class Locator; |
| 233 Key key_; |
| 234 Value value_; |
| 235 Node* left_; |
| 236 Node* right_; |
| 237 }; |
| 238 |
| 239 // A locator provides access to a node in the tree without actually |
| 240 // exposing the node. |
| 241 class Locator { |
| 242 public: |
| 243 Locator() : node_(NULL) { } |
| 244 const Key& key() { return node_->key_; } |
| 245 Value& value() { return node_->value_; } |
| 246 void set_value(const Value& value) { node_->value_ = value; } |
| 247 inline void bind(Node* node) { node_ = node; } |
| 248 private: |
| 249 Node* node_; |
| 250 }; |
| 251 |
| 252 private: |
| 253 Node* root_; |
| 254 }; |
| 255 |
| 256 |
| 257 // A set of unsigned integers that behaves especially well on small |
| 258 // integers (< 32). May do zone-allocation. |
| 259 class OutSet { |
| 260 public: |
| 261 OutSet() : first_(0), remaining_(NULL) { } |
| 262 explicit inline OutSet(unsigned value); |
| 263 static inline OutSet empty() { return OutSet(); } |
| 264 void Set(unsigned value); |
| 265 bool Get(unsigned value); |
| 266 OutSet Clone(); |
| 267 private: |
| 268 OutSet(uint32_t first, ZoneList<unsigned>* remaining) |
| 269 : first_(first), remaining_(remaining) { } |
| 270 static const unsigned kFirstLimit = 32; |
| 271 uint32_t first_; |
| 272 ZoneList<unsigned>* remaining_; |
| 273 }; |
| 274 |
| 275 |
| 276 // A mapping from integers, specified as ranges, to a set of integers. |
| 277 // Used for mapping character ranges to choices. |
| 278 class DispatchTable { |
| 279 public: |
| 280 class Entry { |
| 281 public: |
| 282 Entry() |
| 283 : from_(0), to_(0) { } |
| 284 Entry(uc16 from, uc16 to, OutSet out_set) |
| 285 : from_(from), to_(to), out_set_(out_set) { } |
| 286 inline Entry(uc16 from, uc16 to, unsigned value); |
| 152 uc16 from() { return from_; } | 287 uc16 from() { return from_; } |
| 153 uc16 to() { return to_; } | 288 uc16 to() { return to_; } |
| 289 void set_to(uc16 value) { to_ = value; } |
| 290 void AddValue(int value) { out_set_.Set(value); } |
| 291 OutSet out_set() { return out_set_; } |
| 154 private: | 292 private: |
| 155 uc16 from_; | 293 uc16 from_; |
| 156 uc16 to_; | 294 uc16 to_; |
| 295 OutSet out_set_; |
| 157 }; | 296 }; |
| 158 | 297 class Config { |
| 159 CharacterClass() : type_(EMPTY) { } | 298 public: |
| 160 | 299 typedef uc16 Key; |
| 161 explicit CharacterClass(Type type) : type_(type) { } | 300 typedef Entry Value; |
| 162 | 301 static const uc16 kNoKey; |
| 163 bool Contains(uc16 c); | 302 static const Entry kNoValue; |
| 164 | 303 static inline int Compare(uc16 a, uc16 b) { |
| 165 // Returns a character class with a single bit set | 304 if (a == b) return 0; |
| 166 static inline CharacterClass SingletonField(uc16 chr); | 305 else if (a < b) return -1; |
| 167 | 306 else return 1; |
| 168 // Returns a bitfield character class with a closed range set. The | 307 } |
| 169 // range must fit within one field, that is, fit between two adjacent | 308 }; |
| 170 // kFieldMax-aligned boundaries. | 309 void AddRange(CharacterRange range, int value); |
| 171 static inline CharacterClass RangeField(Range range); | 310 OutSet Get(uc16 value); |
| 172 | |
| 173 static inline CharacterClass Union(CharacterClass* left, | |
| 174 CharacterClass* right); | |
| 175 | |
| 176 // Initializes an empty charclass as a bitfield containing the | |
| 177 // specified ranges. | |
| 178 void InitializeFieldFrom(Vector<Range> ranges); | |
| 179 | |
| 180 // Initializes this character class to be the specified ranges. | |
| 181 // This class must be empty. | |
| 182 void InitializeRangesFrom(Vector<Range> ranges, | |
| 183 CharacterClassAllocator* alloc); | |
| 184 | |
| 185 // Creates a new character class containing the specified ranges | |
| 186 // and allocating any sub-classes using the specified allocator. | |
| 187 static CharacterClass Ranges(Vector<Range> boundaries, | |
| 188 CharacterClassAllocator* alloc); | |
| 189 | |
| 190 // Returns one of the built-in character classes such as '\w' or | |
| 191 // '\S'. | |
| 192 static CharacterClass* GetCharacterClass(uc16 tag); | |
| 193 | |
| 194 inline void write_nibble(int index, byte value); | |
| 195 inline byte read_nibble(int index); | |
| 196 | |
| 197 static inline unsigned segment_of(uc16 value); | |
| 198 static inline uc16 segment_start(unsigned segment); | |
| 199 | |
| 200 private: | 311 private: |
| 201 static const int kCharSize = 16; | 312 ZoneSplayTree<Config>* tree() { return &tree_; } |
| 202 static const int kFieldSegmentIndexWidth = 10; | 313 ZoneSplayTree<Config> tree_; |
| 203 static const int kFieldWidth = kCharSize - kFieldSegmentIndexWidth; | |
| 204 static const int kFieldMax = (1 << kFieldWidth); | |
| 205 static const int kSegmentMask = (1 << kFieldWidth) - 1; | |
| 206 static const int kNibbleCount = kFieldMax / 4; | |
| 207 STATIC_CHECK(kFieldMax == 8 * sizeof(uint64_t)); | |
| 208 | |
| 209 Type type() { return type_; } | |
| 210 | |
| 211 static inline uint64_t long_bit(int index) { | |
| 212 return static_cast<uint64_t>(1) << index; | |
| 213 } | |
| 214 | |
| 215 Type type_: 2; | |
| 216 unsigned segment_ : 10; | |
| 217 unsigned count_ : 4; | |
| 218 union { | |
| 219 // These have the same type to make it easier to change one without | |
| 220 // touching the other. | |
| 221 uint64_t u_field; | |
| 222 uint64_t u_ranges; | |
| 223 struct { | |
| 224 CharacterClass* left; | |
| 225 CharacterClass* right; | |
| 226 } u_union; | |
| 227 } data_; | |
| 228 }; | 314 }; |
| 229 | 315 |
| 230 | 316 |
| 231 STATIC_CHECK(sizeof(CharacterClass) == 3 * kIntSize); | |
| 232 | |
| 233 | |
| 234 class CharacterClassAllocator { | |
| 235 public: | |
| 236 virtual CharacterClass* Allocate() = 0; | |
| 237 virtual ~CharacterClassAllocator() { } | |
| 238 }; | |
| 239 | |
| 240 | |
| 241 template <int kCount> | |
| 242 class StaticCharacterClassAllocator: public CharacterClassAllocator { | |
| 243 public: | |
| 244 StaticCharacterClassAllocator() : used_(0) { } | |
| 245 virtual CharacterClass* Allocate(); | |
| 246 private: | |
| 247 int used_; | |
| 248 CharacterClass preallocated_[kCount]; | |
| 249 }; | |
| 250 | |
| 251 | |
| 252 class RegExpEngine: public AllStatic { | 317 class RegExpEngine: public AllStatic { |
| 253 public: | 318 public: |
| 254 template <typename Char> | 319 template <typename Char> |
| 255 static RegExpNode<Char>* Compile(RegExpTree* regexp); | 320 static RegExpNode<Char>* Compile(RegExpTree* regexp); |
| 256 | 321 |
| 257 template <typename Char> | 322 template <typename Char> |
| 258 static bool Execute(RegExpNode<Char>* start, Vector<Char> input); | 323 static bool Execute(RegExpNode<Char>* start, Vector<Char> input); |
| 259 }; | 324 }; |
| 260 | 325 |
| 261 | 326 |
| 262 } } // namespace v8::internal | 327 } } // namespace v8::internal |
| 263 | 328 |
| 264 #endif // V8_JSREGEXP_H_ | 329 #endif // V8_JSREGEXP_H_ |
| OLD | NEW |