Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(865)

Side by Side Diff: src/jsregexp.h

Issue 8732: Character classes (Closed)
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/jsregexp.cc » ('j') | src/jsregexp.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
119 // subject JS String object is cached in the heap. We also cache a 119 // subject JS String object is cached in the heap. We also cache a
120 // translation between position and utf8 position. 120 // translation between position and utf8 position.
121 static char* utf8_subject_cache_; 121 static char* utf8_subject_cache_;
122 static int utf8_length_cache_; 122 static int utf8_length_cache_;
123 static int utf8_position_; 123 static int utf8_position_;
124 static int character_position_; 124 static int character_position_;
125 }; 125 };
126 126
127 127
128 template <typename Char> class RegExpNode; 128 template <typename Char> class RegExpNode;
129 class CharacterClassAllocator;
130
131
132 class CharacterClass {
133 public:
134
135 enum Type { EMPTY = 0, FIELD = 1, RANGES = 2, UNION = 3 };
136
137 // A closed range from and including 'from', to and including 'to'.
138 class Range {
139 public:
140 Range() : from_(0), to_(0) { }
141 Range(uc16 from, uc16 to) : from_(from), to_(to) { ASSERT(from <= to); }
142 uc16 from() { return from_; }
143 uc16 to() { return to_; }
144 private:
145 uc16 from_;
146 uc16 to_;
147 };
148
149 CharacterClass() : type_(EMPTY) { }
150
151 explicit CharacterClass(Type type) : type_(type) { }
152
153 bool Contains(uc16 c);
154
155 // Returns a character class with a single bit set
156 static inline CharacterClass SingletonField(uc16 chr);
157
158 // Returns a bitfield character class with a closed range set. The
159 // range must fit within one field, that is, fit between two adjacent
160 // kFieldMax-aligned boundaries.
161 static inline CharacterClass RangeField(Range range);
162
163 static inline CharacterClass Union(CharacterClass* left,
164 CharacterClass* right);
165
166 // Initializes an empty charclass as a bitfield containing the
167 // specified ranges.
168 void InitializeFieldFrom(Vector<Range> ranges);
169
170 // Initializes this character class to be the specified ranges.
171 // This class must be empty.
172 void InitializeRangesFrom(Vector<Range> ranges,
173 CharacterClassAllocator* alloc);
174
175 // Creates a new character class containing the specified ranges
176 // and allocating any sub-classes using the specified allocator.
177 static CharacterClass Ranges(Vector<Range> boundaries,
178 CharacterClassAllocator* alloc);
179
180 // Returns one of the built-in character classes such as '\w' or
181 // '\S'.
182 static CharacterClass* GetCharacterClass(uc16 tag);
183
184 inline void write_nibble(int index, byte value);
185 inline byte read_nibble(int index);
186
187 static inline unsigned segment_of(uc16 value);
188 static inline uc16 segment_start(unsigned segment);
189
190 private:
191 static const int kCharSize = 16;
192 static const int kFieldSegmentIndexWidth = 10;
193 static const int kFieldSegmentWidth = kCharSize - kFieldSegmentIndexWidth;
Lasse Reichstein 2008/10/30 10:28:42 Isn't this the field width, not the segment width?
194 static const int kFieldMax = (1 << kFieldSegmentWidth);
195 static const int kSegmentMask = (1 << kFieldSegmentWidth) - 1;
196 static const int kNibbleCount = kFieldMax / 4;
197 STATIC_ASSERT(kFieldMax == 8 * sizeof(uint64_t));
198
199 Type type() { return type_; }
200
201 static inline uint64_t long_bit(int index) {
202 return static_cast<uint64_t>(1) << index;
203 }
204
205 Type type_: 2;
206 unsigned segment_ : 10;
207 unsigned count_ : 4;
208 union {
209 // These have the same type to make it easier to change one without
210 // touching the other.
211 uint64_t u_field;
212 uint64_t u_ranges;
213 struct {
214 CharacterClass* left;
215 CharacterClass* right;
216 } u_union;
Lasse Reichstein 2008/10/30 10:28:42 Slightly confusing name to have for a struct (espe
217 } data_;
218 };
219
220
221 STATIC_ASSERT(sizeof(CharacterClass) == 3 * kIntSize);
222
223
224 class CharacterClassAllocator {
225 public:
226 virtual CharacterClass* Allocate() = 0;
227 virtual ~CharacterClassAllocator() { }
228 };
229
230
231 template <int kCount>
232 class StaticCharacterClassAllocator: public CharacterClassAllocator {
233 public:
234 StaticCharacterClassAllocator() : used_(0) { }
235 virtual CharacterClass* Allocate();
236 private:
237 int used_;
238 CharacterClass preallocated_[kCount];
239 };
129 240
130 241
131 class RegExpEngine: public AllStatic { 242 class RegExpEngine: public AllStatic {
132 public: 243 public:
133 template <typename Char> 244 template <typename Char>
134 static RegExpNode<Char>* Compile(RegExpTree* regexp); 245 static RegExpNode<Char>* Compile(RegExpTree* regexp);
135 246
136 template <typename Char> 247 template <typename Char>
137 static bool Execute(RegExpNode<Char>* start, Vector<Char> input); 248 static bool Execute(RegExpNode<Char>* start, Vector<Char> input);
138 }; 249 };
139 250
140 251
141 } } // namespace v8::internal 252 } } // namespace v8::internal
142 253
143 #endif // V8_JSREGEXP_H_ 254 #endif // V8_JSREGEXP_H_
OLDNEW
« no previous file with comments | « no previous file | src/jsregexp.cc » ('j') | src/jsregexp.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698