OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_REGEXP_REGEXP_AST_H_ | 5 #ifndef V8_REGEXP_REGEXP_AST_H_ |
6 #define V8_REGEXP_REGEXP_AST_H_ | 6 #define V8_REGEXP_REGEXP_AST_H_ |
7 | 7 |
8 #include "src/objects.h" | |
9 #include "src/utils.h" | 8 #include "src/utils.h" |
10 #include "src/zone.h" | 9 #include "src/zone.h" |
11 | 10 |
12 namespace v8 { | 11 namespace v8 { |
13 namespace internal { | 12 namespace internal { |
14 | 13 |
15 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ | 14 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ |
16 VISIT(Disjunction) \ | 15 VISIT(Disjunction) \ |
17 VISIT(Alternative) \ | 16 VISIT(Alternative) \ |
18 VISIT(Assertion) \ | 17 VISIT(Assertion) \ |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
71 }; | 70 }; |
72 | 71 |
73 | 72 |
74 // Represents code units in the range from from_ to to_, both ends are | 73 // Represents code units in the range from from_ to to_, both ends are |
75 // inclusive. | 74 // inclusive. |
76 class CharacterRange { | 75 class CharacterRange { |
77 public: | 76 public: |
78 CharacterRange() : from_(0), to_(0) {} | 77 CharacterRange() : from_(0), to_(0) {} |
79 // For compatibility with the CHECK_OK macro | 78 // For compatibility with the CHECK_OK macro |
80 CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT | 79 CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT |
81 CharacterRange(uc32 from, uc32 to) : from_(from), to_(to) {} | 80 CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) {} |
82 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, | 81 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, |
83 Zone* zone); | 82 Zone* zone); |
84 static Vector<const int> GetWordBounds(); | 83 static Vector<const int> GetWordBounds(); |
85 static inline CharacterRange Singleton(uc32 value) { | 84 static inline CharacterRange Singleton(uc16 value) { |
86 return CharacterRange(value, value); | 85 return CharacterRange(value, value); |
87 } | 86 } |
88 static inline CharacterRange Range(uc32 from, uc32 to) { | 87 static inline CharacterRange Range(uc16 from, uc16 to) { |
89 DCHECK(from <= to); | 88 DCHECK(from <= to); |
90 return CharacterRange(from, to); | 89 return CharacterRange(from, to); |
91 } | 90 } |
92 static inline CharacterRange Everything() { | 91 static inline CharacterRange Everything() { |
93 return CharacterRange(0, String::kMaxCodePoint); | 92 return CharacterRange(0, 0xFFFF); |
94 } | 93 } |
95 static inline ZoneList<CharacterRange>* List(Zone* zone, | 94 bool Contains(uc16 i) { return from_ <= i && i <= to_; } |
96 CharacterRange range) { | 95 uc16 from() const { return from_; } |
97 ZoneList<CharacterRange>* list = | 96 void set_from(uc16 value) { from_ = value; } |
98 new (zone) ZoneList<CharacterRange>(1, zone); | 97 uc16 to() const { return to_; } |
99 list->Add(range, zone); | 98 void set_to(uc16 value) { to_ = value; } |
100 return list; | |
101 } | |
102 bool Contains(uc32 i) { return from_ <= i && i <= to_; } | |
103 uc32 from() const { return from_; } | |
104 void set_from(uc32 value) { from_ = value; } | |
105 uc32 to() const { return to_; } | |
106 void set_to(uc32 value) { to_ = value; } | |
107 bool is_valid() { return from_ <= to_; } | 99 bool is_valid() { return from_ <= to_; } |
108 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } | 100 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } |
109 bool IsSingleton() { return (from_ == to_); } | 101 bool IsSingleton() { return (from_ == to_); } |
110 void AddCaseEquivalents(Isolate* isolate, Zone* zone, | 102 void AddCaseEquivalents(Isolate* isolate, Zone* zone, |
111 ZoneList<CharacterRange>* ranges, bool is_one_byte); | 103 ZoneList<CharacterRange>* ranges, bool is_one_byte); |
| 104 static void Split(ZoneList<CharacterRange>* base, Vector<const int> overlay, |
| 105 ZoneList<CharacterRange>** included, |
| 106 ZoneList<CharacterRange>** excluded, Zone* zone); |
112 // Whether a range list is in canonical form: Ranges ordered by from value, | 107 // Whether a range list is in canonical form: Ranges ordered by from value, |
113 // and ranges non-overlapping and non-adjacent. | 108 // and ranges non-overlapping and non-adjacent. |
114 static bool IsCanonical(ZoneList<CharacterRange>* ranges); | 109 static bool IsCanonical(ZoneList<CharacterRange>* ranges); |
115 // Convert range list to canonical form. The characters covered by the ranges | 110 // Convert range list to canonical form. The characters covered by the ranges |
116 // will still be the same, but no character is in more than one range, and | 111 // will still be the same, but no character is in more than one range, and |
117 // adjacent ranges are merged. The resulting list may be shorter than the | 112 // adjacent ranges are merged. The resulting list may be shorter than the |
118 // original, but cannot be longer. | 113 // original, but cannot be longer. |
119 static void Canonicalize(ZoneList<CharacterRange>* ranges); | 114 static void Canonicalize(ZoneList<CharacterRange>* ranges); |
120 // Negate the contents of a character range in canonical form. | 115 // Negate the contents of a character range in canonical form. |
121 static void Negate(ZoneList<CharacterRange>* src, | 116 static void Negate(ZoneList<CharacterRange>* src, |
122 ZoneList<CharacterRange>* dst, Zone* zone); | 117 ZoneList<CharacterRange>* dst, Zone* zone); |
123 static const int kStartMarker = (1 << 24); | 118 static const int kStartMarker = (1 << 24); |
124 static const int kPayloadMask = (1 << 24) - 1; | 119 static const int kPayloadMask = (1 << 24) - 1; |
125 | 120 |
126 private: | 121 private: |
127 uc32 from_; | 122 uc16 from_; |
128 uc32 to_; | 123 uc16 to_; |
129 }; | 124 }; |
130 | 125 |
131 | 126 |
132 class CharacterSet final BASE_EMBEDDED { | 127 class CharacterSet final BASE_EMBEDDED { |
133 public: | 128 public: |
134 explicit CharacterSet(uc16 standard_set_type) | 129 explicit CharacterSet(uc16 standard_set_type) |
135 : ranges_(NULL), standard_set_type_(standard_set_type) {} | 130 : ranges_(NULL), standard_set_type_(standard_set_type) {} |
136 explicit CharacterSet(ZoneList<CharacterRange>* ranges) | 131 explicit CharacterSet(ZoneList<CharacterRange>* ranges) |
137 : ranges_(ranges), standard_set_type_(0) {} | 132 : ranges_(ranges), standard_set_type_(0) {} |
138 ZoneList<CharacterRange>* ranges(Zone* zone); | 133 ZoneList<CharacterRange>* ranges(Zone* zone); |
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
285 class RegExpCharacterClass final : public RegExpTree { | 280 class RegExpCharacterClass final : public RegExpTree { |
286 public: | 281 public: |
287 RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated) | 282 RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated) |
288 : set_(ranges), is_negated_(is_negated) {} | 283 : set_(ranges), is_negated_(is_negated) {} |
289 explicit RegExpCharacterClass(uc16 type) : set_(type), is_negated_(false) {} | 284 explicit RegExpCharacterClass(uc16 type) : set_(type), is_negated_(false) {} |
290 void* Accept(RegExpVisitor* visitor, void* data) override; | 285 void* Accept(RegExpVisitor* visitor, void* data) override; |
291 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; | 286 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
292 RegExpCharacterClass* AsCharacterClass() override; | 287 RegExpCharacterClass* AsCharacterClass() override; |
293 bool IsCharacterClass() override; | 288 bool IsCharacterClass() override; |
294 bool IsTextElement() override { return true; } | 289 bool IsTextElement() override { return true; } |
295 bool NeedsDesugaringForUnicode(Zone* zone); | |
296 int min_match() override { return 1; } | 290 int min_match() override { return 1; } |
297 int max_match() override { return 1; } | 291 int max_match() override { return 1; } |
298 void AppendToText(RegExpText* text, Zone* zone) override; | 292 void AppendToText(RegExpText* text, Zone* zone) override; |
299 CharacterSet character_set() { return set_; } | 293 CharacterSet character_set() { return set_; } |
300 // TODO(lrn): Remove need for complex version if is_standard that | 294 // TODO(lrn): Remove need for complex version if is_standard that |
301 // recognizes a mangled standard set and just do { return set_.is_special(); } | 295 // recognizes a mangled standard set and just do { return set_.is_special(); } |
302 bool is_standard(Zone* zone); | 296 bool is_standard(Zone* zone); |
303 // Returns a value representing the standard character set if is_standard() | 297 // Returns a value representing the standard character set if is_standard() |
304 // returns true. | 298 // returns true. |
305 // Currently used values are: | 299 // Currently used values are: |
(...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
450 bool IsLookaround() override; | 444 bool IsLookaround() override; |
451 bool IsAnchoredAtStart() override; | 445 bool IsAnchoredAtStart() override; |
452 int min_match() override { return 0; } | 446 int min_match() override { return 0; } |
453 int max_match() override { return 0; } | 447 int max_match() override { return 0; } |
454 RegExpTree* body() { return body_; } | 448 RegExpTree* body() { return body_; } |
455 bool is_positive() { return is_positive_; } | 449 bool is_positive() { return is_positive_; } |
456 int capture_count() { return capture_count_; } | 450 int capture_count() { return capture_count_; } |
457 int capture_from() { return capture_from_; } | 451 int capture_from() { return capture_from_; } |
458 Type type() { return type_; } | 452 Type type() { return type_; } |
459 | 453 |
460 class Builder { | |
461 public: | |
462 Builder(bool is_positive, RegExpNode* on_success, | |
463 int stack_pointer_register, int position_register, | |
464 int capture_register_count = 0, int capture_register_start = 0); | |
465 RegExpNode* on_match_success() { return on_match_success_; } | |
466 RegExpNode* ForMatch(RegExpNode* match); | |
467 | |
468 private: | |
469 bool is_positive_; | |
470 RegExpNode* on_match_success_; | |
471 RegExpNode* on_success_; | |
472 int stack_pointer_register_; | |
473 int position_register_; | |
474 }; | |
475 | |
476 private: | 454 private: |
477 RegExpTree* body_; | 455 RegExpTree* body_; |
478 bool is_positive_; | 456 bool is_positive_; |
479 int capture_count_; | 457 int capture_count_; |
480 int capture_from_; | 458 int capture_from_; |
481 Type type_; | 459 Type type_; |
482 }; | 460 }; |
483 | 461 |
484 | 462 |
485 class RegExpBackReference final : public RegExpTree { | 463 class RegExpBackReference final : public RegExpTree { |
(...skipping 23 matching lines...) Expand all Loading... |
509 RegExpEmpty* AsEmpty() override; | 487 RegExpEmpty* AsEmpty() override; |
510 bool IsEmpty() override; | 488 bool IsEmpty() override; |
511 int min_match() override { return 0; } | 489 int min_match() override { return 0; } |
512 int max_match() override { return 0; } | 490 int max_match() override { return 0; } |
513 }; | 491 }; |
514 | 492 |
515 } // namespace internal | 493 } // namespace internal |
516 } // namespace v8 | 494 } // namespace v8 |
517 | 495 |
518 #endif // V8_REGEXP_REGEXP_AST_H_ | 496 #endif // V8_REGEXP_REGEXP_AST_H_ |
OLD | NEW |