OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_REGEXP_JSREGEXP_H_ | 5 #ifndef V8_REGEXP_JSREGEXP_H_ |
6 #define V8_REGEXP_JSREGEXP_H_ | 6 #define V8_REGEXP_JSREGEXP_H_ |
7 | 7 |
8 #include "src/allocation.h" | 8 #include "src/allocation.h" |
9 #include "src/assembler.h" | 9 #include "src/assembler.h" |
| 10 #include "src/regexp/regexp-ast.h" |
10 | 11 |
11 namespace v8 { | 12 namespace v8 { |
12 namespace internal { | 13 namespace internal { |
13 | 14 |
14 class NodeVisitor; | 15 class NodeVisitor; |
15 class RegExpCompiler; | 16 class RegExpCompiler; |
16 class RegExpMacroAssembler; | 17 class RegExpMacroAssembler; |
17 class RegExpNode; | 18 class RegExpNode; |
18 class RegExpTree; | 19 class RegExpTree; |
19 class BoyerMooreLookahead; | 20 class BoyerMooreLookahead; |
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
219 // Represents the location of one element relative to the intersection of | 220 // Represents the location of one element relative to the intersection of |
220 // two sets. Corresponds to the four areas of a Venn diagram. | 221 // two sets. Corresponds to the four areas of a Venn diagram. |
221 enum ElementInSetsRelation { | 222 enum ElementInSetsRelation { |
222 kInsideNone = 0, | 223 kInsideNone = 0, |
223 kInsideFirst = 1, | 224 kInsideFirst = 1, |
224 kInsideSecond = 2, | 225 kInsideSecond = 2, |
225 kInsideBoth = 3 | 226 kInsideBoth = 3 |
226 }; | 227 }; |
227 | 228 |
228 | 229 |
229 // Represents code units in the range from from_ to to_, both ends are | |
230 // inclusive. | |
231 class CharacterRange { | |
232 public: | |
233 CharacterRange() : from_(0), to_(0) { } | |
234 // For compatibility with the CHECK_OK macro | |
235 CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT | |
236 CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { } | |
237 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, | |
238 Zone* zone); | |
239 static Vector<const int> GetWordBounds(); | |
240 static inline CharacterRange Singleton(uc16 value) { | |
241 return CharacterRange(value, value); | |
242 } | |
243 static inline CharacterRange Range(uc16 from, uc16 to) { | |
244 DCHECK(from <= to); | |
245 return CharacterRange(from, to); | |
246 } | |
247 static inline CharacterRange Everything() { | |
248 return CharacterRange(0, 0xFFFF); | |
249 } | |
250 bool Contains(uc16 i) { return from_ <= i && i <= to_; } | |
251 uc16 from() const { return from_; } | |
252 void set_from(uc16 value) { from_ = value; } | |
253 uc16 to() const { return to_; } | |
254 void set_to(uc16 value) { to_ = value; } | |
255 bool is_valid() { return from_ <= to_; } | |
256 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } | |
257 bool IsSingleton() { return (from_ == to_); } | |
258 void AddCaseEquivalents(Isolate* isolate, Zone* zone, | |
259 ZoneList<CharacterRange>* ranges, bool is_one_byte); | |
260 static void Split(ZoneList<CharacterRange>* base, | |
261 Vector<const int> overlay, | |
262 ZoneList<CharacterRange>** included, | |
263 ZoneList<CharacterRange>** excluded, | |
264 Zone* zone); | |
265 // Whether a range list is in canonical form: Ranges ordered by from value, | |
266 // and ranges non-overlapping and non-adjacent. | |
267 static bool IsCanonical(ZoneList<CharacterRange>* ranges); | |
268 // Convert range list to canonical form. The characters covered by the ranges | |
269 // will still be the same, but no character is in more than one range, and | |
270 // adjacent ranges are merged. The resulting list may be shorter than the | |
271 // original, but cannot be longer. | |
272 static void Canonicalize(ZoneList<CharacterRange>* ranges); | |
273 // Negate the contents of a character range in canonical form. | |
274 static void Negate(ZoneList<CharacterRange>* src, | |
275 ZoneList<CharacterRange>* dst, | |
276 Zone* zone); | |
277 static const int kStartMarker = (1 << 24); | |
278 static const int kPayloadMask = (1 << 24) - 1; | |
279 | |
280 private: | |
281 uc16 from_; | |
282 uc16 to_; | |
283 }; | |
284 | |
285 | |
286 // A set of unsigned integers that behaves especially well on small | 230 // A set of unsigned integers that behaves especially well on small |
287 // integers (< 32). May do zone-allocation. | 231 // integers (< 32). May do zone-allocation. |
288 class OutSet: public ZoneObject { | 232 class OutSet: public ZoneObject { |
289 public: | 233 public: |
290 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } | 234 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } |
291 OutSet* Extend(unsigned value, Zone* zone); | 235 OutSet* Extend(unsigned value, Zone* zone); |
292 bool Get(unsigned value) const; | 236 bool Get(unsigned value) const; |
293 static const unsigned kFirstLimit = 32; | 237 static const unsigned kFirstLimit = 32; |
294 | 238 |
295 private: | 239 private: |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
373 | 317 |
374 #define FOR_EACH_NODE_TYPE(VISIT) \ | 318 #define FOR_EACH_NODE_TYPE(VISIT) \ |
375 VISIT(End) \ | 319 VISIT(End) \ |
376 VISIT(Action) \ | 320 VISIT(Action) \ |
377 VISIT(Choice) \ | 321 VISIT(Choice) \ |
378 VISIT(BackReference) \ | 322 VISIT(BackReference) \ |
379 VISIT(Assertion) \ | 323 VISIT(Assertion) \ |
380 VISIT(Text) | 324 VISIT(Text) |
381 | 325 |
382 | 326 |
383 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ | |
384 VISIT(Disjunction) \ | |
385 VISIT(Alternative) \ | |
386 VISIT(Assertion) \ | |
387 VISIT(CharacterClass) \ | |
388 VISIT(Atom) \ | |
389 VISIT(Quantifier) \ | |
390 VISIT(Capture) \ | |
391 VISIT(Lookaround) \ | |
392 VISIT(BackReference) \ | |
393 VISIT(Empty) \ | |
394 VISIT(Text) | |
395 | |
396 | |
397 #define FORWARD_DECLARE(Name) class RegExp##Name; | |
398 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) | |
399 #undef FORWARD_DECLARE | |
400 | |
401 | |
402 class TextElement final BASE_EMBEDDED { | |
403 public: | |
404 enum TextType { | |
405 ATOM, | |
406 CHAR_CLASS | |
407 }; | |
408 | |
409 static TextElement Atom(RegExpAtom* atom); | |
410 static TextElement CharClass(RegExpCharacterClass* char_class); | |
411 | |
412 int cp_offset() const { return cp_offset_; } | |
413 void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; } | |
414 int length() const; | |
415 | |
416 TextType text_type() const { return text_type_; } | |
417 | |
418 RegExpTree* tree() const { return tree_; } | |
419 | |
420 RegExpAtom* atom() const { | |
421 DCHECK(text_type() == ATOM); | |
422 return reinterpret_cast<RegExpAtom*>(tree()); | |
423 } | |
424 | |
425 RegExpCharacterClass* char_class() const { | |
426 DCHECK(text_type() == CHAR_CLASS); | |
427 return reinterpret_cast<RegExpCharacterClass*>(tree()); | |
428 } | |
429 | |
430 private: | |
431 TextElement(TextType text_type, RegExpTree* tree) | |
432 : cp_offset_(-1), text_type_(text_type), tree_(tree) {} | |
433 | |
434 int cp_offset_; | |
435 TextType text_type_; | |
436 RegExpTree* tree_; | |
437 }; | |
438 | |
439 | |
440 class Trace; | 327 class Trace; |
441 struct PreloadState; | 328 struct PreloadState; |
442 class GreedyLoopState; | 329 class GreedyLoopState; |
443 class AlternativeGenerationList; | 330 class AlternativeGenerationList; |
444 | 331 |
445 struct NodeInfo { | 332 struct NodeInfo { |
446 NodeInfo() | 333 NodeInfo() |
447 : being_analyzed(false), | 334 : being_analyzed(false), |
448 been_analyzed(false), | 335 been_analyzed(false), |
449 follows_word_interest(false), | 336 follows_word_interest(false), |
(...skipping 231 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
681 // generated code is located unless the code is generated at the start of | 568 // generated code is located unless the code is generated at the start of |
682 // a trace, in which case it is generic and can be reused by flushing the | 569 // a trace, in which case it is generic and can be reused by flushing the |
683 // deferred operations in the current trace and generating a goto. | 570 // deferred operations in the current trace and generating a goto. |
684 int trace_count_; | 571 int trace_count_; |
685 BoyerMooreLookahead* bm_info_[2]; | 572 BoyerMooreLookahead* bm_info_[2]; |
686 | 573 |
687 Zone* zone_; | 574 Zone* zone_; |
688 }; | 575 }; |
689 | 576 |
690 | 577 |
691 // A simple closed interval. | |
692 class Interval { | |
693 public: | |
694 Interval() : from_(kNone), to_(kNone) { } | |
695 Interval(int from, int to) : from_(from), to_(to) { } | |
696 Interval Union(Interval that) { | |
697 if (that.from_ == kNone) | |
698 return *this; | |
699 else if (from_ == kNone) | |
700 return that; | |
701 else | |
702 return Interval(Min(from_, that.from_), Max(to_, that.to_)); | |
703 } | |
704 bool Contains(int value) { | |
705 return (from_ <= value) && (value <= to_); | |
706 } | |
707 bool is_empty() { return from_ == kNone; } | |
708 int from() const { return from_; } | |
709 int to() const { return to_; } | |
710 static Interval Empty() { return Interval(); } | |
711 static const int kNone = -1; | |
712 private: | |
713 int from_; | |
714 int to_; | |
715 }; | |
716 | |
717 | |
718 class SeqRegExpNode: public RegExpNode { | 578 class SeqRegExpNode: public RegExpNode { |
719 public: | 579 public: |
720 explicit SeqRegExpNode(RegExpNode* on_success) | 580 explicit SeqRegExpNode(RegExpNode* on_success) |
721 : RegExpNode(on_success->zone()), on_success_(on_success) { } | 581 : RegExpNode(on_success->zone()), on_success_(on_success) { } |
722 RegExpNode* on_success() { return on_success_; } | 582 RegExpNode* on_success() { return on_success_; } |
723 void set_on_success(RegExpNode* node) { on_success_ = node; } | 583 void set_on_success(RegExpNode* node) { on_success_ = node; } |
724 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); | 584 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
725 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, | 585 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, |
726 BoyerMooreLookahead* bm, bool not_at_start) { | 586 BoyerMooreLookahead* bm, bool not_at_start) { |
727 on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); | 587 on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); |
(...skipping 950 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1678 static const int kStringOffset = 0; | 1538 static const int kStringOffset = 0; |
1679 static const int kPatternOffset = 1; | 1539 static const int kPatternOffset = 1; |
1680 static const int kArrayOffset = 2; | 1540 static const int kArrayOffset = 2; |
1681 static const int kLastMatchOffset = 3; | 1541 static const int kLastMatchOffset = 3; |
1682 }; | 1542 }; |
1683 | 1543 |
1684 } // namespace internal | 1544 } // namespace internal |
1685 } // namespace v8 | 1545 } // namespace v8 |
1686 | 1546 |
1687 #endif // V8_REGEXP_JSREGEXP_H_ | 1547 #endif // V8_REGEXP_JSREGEXP_H_ |
OLD | NEW |