| OLD | NEW |
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_REGEXP_REGEXP_AST_H_ | 5 #ifndef V8_REGEXP_REGEXP_AST_H_ |
| 6 #define V8_REGEXP_REGEXP_AST_H_ | 6 #define V8_REGEXP_REGEXP_AST_H_ |
| 7 | 7 |
| 8 #include "src/objects.h" |
| 8 #include "src/utils.h" | 9 #include "src/utils.h" |
| 9 #include "src/zone.h" | 10 #include "src/zone.h" |
| 10 | 11 |
| 11 namespace v8 { | 12 namespace v8 { |
| 12 namespace internal { | 13 namespace internal { |
| 13 | 14 |
| 14 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ | 15 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ |
| 15 VISIT(Disjunction) \ | 16 VISIT(Disjunction) \ |
| 16 VISIT(Alternative) \ | 17 VISIT(Alternative) \ |
| 17 VISIT(Assertion) \ | 18 VISIT(Assertion) \ |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 70 }; | 71 }; |
| 71 | 72 |
| 72 | 73 |
| 73 // Represents code units in the range from from_ to to_, both ends are | 74 // Represents code units in the range from from_ to to_, both ends are |
| 74 // inclusive. | 75 // inclusive. |
| 75 class CharacterRange { | 76 class CharacterRange { |
| 76 public: | 77 public: |
| 77 CharacterRange() : from_(0), to_(0) {} | 78 CharacterRange() : from_(0), to_(0) {} |
| 78 // For compatibility with the CHECK_OK macro | 79 // For compatibility with the CHECK_OK macro |
| 79 CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT | 80 CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT |
| 80 CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) {} | 81 CharacterRange(uc32 from, uc32 to) : from_(from), to_(to) {} |
| 81 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, | 82 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, |
| 82 Zone* zone); | 83 Zone* zone); |
| 83 static Vector<const int> GetWordBounds(); | 84 static Vector<const int> GetWordBounds(); |
| 84 static inline CharacterRange Singleton(uc16 value) { | 85 static inline CharacterRange Singleton(uc32 value) { |
| 85 return CharacterRange(value, value); | 86 return CharacterRange(value, value); |
| 86 } | 87 } |
| 87 static inline CharacterRange Range(uc16 from, uc16 to) { | 88 static inline CharacterRange Range(uc32 from, uc32 to) { |
| 88 DCHECK(from <= to); | 89 DCHECK(0 <= from && to <= String::kMaxCodePoint); |
| 90 DCHECK(static_cast<uint32_t>(from) <= static_cast<uint32_t>(to)); |
| 89 return CharacterRange(from, to); | 91 return CharacterRange(from, to); |
| 90 } | 92 } |
| 91 static inline CharacterRange Everything() { | 93 static inline CharacterRange Everything() { |
| 92 return CharacterRange(0, 0xFFFF); | 94 return CharacterRange(0, String::kMaxCodePoint); |
| 93 } | 95 } |
| 94 bool Contains(uc16 i) { return from_ <= i && i <= to_; } | 96 static inline ZoneList<CharacterRange>* List(Zone* zone, |
| 95 uc16 from() const { return from_; } | 97 CharacterRange range) { |
| 96 void set_from(uc16 value) { from_ = value; } | 98 ZoneList<CharacterRange>* list = |
| 97 uc16 to() const { return to_; } | 99 new (zone) ZoneList<CharacterRange>(1, zone); |
| 98 void set_to(uc16 value) { to_ = value; } | 100 list->Add(range, zone); |
| 101 return list; |
| 102 } |
| 103 bool Contains(uc32 i) { return from_ <= i && i <= to_; } |
| 104 uc32 from() const { return from_; } |
| 105 void set_from(uc32 value) { from_ = value; } |
| 106 uc32 to() const { return to_; } |
| 107 void set_to(uc32 value) { to_ = value; } |
| 99 bool is_valid() { return from_ <= to_; } | 108 bool is_valid() { return from_ <= to_; } |
| 100 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } | 109 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } |
| 101 bool IsSingleton() { return (from_ == to_); } | 110 bool IsSingleton() { return (from_ == to_); } |
| 102 void AddCaseEquivalents(Isolate* isolate, Zone* zone, | 111 void AddCaseEquivalents(Isolate* isolate, Zone* zone, |
| 103 ZoneList<CharacterRange>* ranges, bool is_one_byte); | 112 ZoneList<CharacterRange>* ranges, bool is_one_byte); |
| 104 static void Split(ZoneList<CharacterRange>* base, Vector<const int> overlay, | |
| 105 ZoneList<CharacterRange>** included, | |
| 106 ZoneList<CharacterRange>** excluded, Zone* zone); | |
| 107 // Whether a range list is in canonical form: Ranges ordered by from value, | 113 // Whether a range list is in canonical form: Ranges ordered by from value, |
| 108 // and ranges non-overlapping and non-adjacent. | 114 // and ranges non-overlapping and non-adjacent. |
| 109 static bool IsCanonical(ZoneList<CharacterRange>* ranges); | 115 static bool IsCanonical(ZoneList<CharacterRange>* ranges); |
| 110 // Convert range list to canonical form. The characters covered by the ranges | 116 // Convert range list to canonical form. The characters covered by the ranges |
| 111 // will still be the same, but no character is in more than one range, and | 117 // will still be the same, but no character is in more than one range, and |
| 112 // adjacent ranges are merged. The resulting list may be shorter than the | 118 // adjacent ranges are merged. The resulting list may be shorter than the |
| 113 // original, but cannot be longer. | 119 // original, but cannot be longer. |
| 114 static void Canonicalize(ZoneList<CharacterRange>* ranges); | 120 static void Canonicalize(ZoneList<CharacterRange>* ranges); |
| 115 // Negate the contents of a character range in canonical form. | 121 // Negate the contents of a character range in canonical form. |
| 116 static void Negate(ZoneList<CharacterRange>* src, | 122 static void Negate(ZoneList<CharacterRange>* src, |
| 117 ZoneList<CharacterRange>* dst, Zone* zone); | 123 ZoneList<CharacterRange>* dst, Zone* zone); |
| 118 static const int kStartMarker = (1 << 24); | 124 static const int kStartMarker = (1 << 24); |
| 119 static const int kPayloadMask = (1 << 24) - 1; | 125 static const int kPayloadMask = (1 << 24) - 1; |
| 120 | 126 |
| 121 private: | 127 private: |
| 122 uc16 from_; | 128 uc32 from_; |
| 123 uc16 to_; | 129 uc32 to_; |
| 124 }; | 130 }; |
| 125 | 131 |
| 126 | 132 |
| 127 class CharacterSet final BASE_EMBEDDED { | 133 class CharacterSet final BASE_EMBEDDED { |
| 128 public: | 134 public: |
| 129 explicit CharacterSet(uc16 standard_set_type) | 135 explicit CharacterSet(uc16 standard_set_type) |
| 130 : ranges_(NULL), standard_set_type_(standard_set_type) {} | 136 : ranges_(NULL), standard_set_type_(standard_set_type) {} |
| 131 explicit CharacterSet(ZoneList<CharacterRange>* ranges) | 137 explicit CharacterSet(ZoneList<CharacterRange>* ranges) |
| 132 : ranges_(ranges), standard_set_type_(0) {} | 138 : ranges_(ranges), standard_set_type_(0) {} |
| 133 ZoneList<CharacterRange>* ranges(Zone* zone); | 139 ZoneList<CharacterRange>* ranges(Zone* zone); |
| (...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 280 class RegExpCharacterClass final : public RegExpTree { | 286 class RegExpCharacterClass final : public RegExpTree { |
| 281 public: | 287 public: |
| 282 RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated) | 288 RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated) |
| 283 : set_(ranges), is_negated_(is_negated) {} | 289 : set_(ranges), is_negated_(is_negated) {} |
| 284 explicit RegExpCharacterClass(uc16 type) : set_(type), is_negated_(false) {} | 290 explicit RegExpCharacterClass(uc16 type) : set_(type), is_negated_(false) {} |
| 285 void* Accept(RegExpVisitor* visitor, void* data) override; | 291 void* Accept(RegExpVisitor* visitor, void* data) override; |
| 286 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; | 292 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
| 287 RegExpCharacterClass* AsCharacterClass() override; | 293 RegExpCharacterClass* AsCharacterClass() override; |
| 288 bool IsCharacterClass() override; | 294 bool IsCharacterClass() override; |
| 289 bool IsTextElement() override { return true; } | 295 bool IsTextElement() override { return true; } |
| 296 bool NeedsDesugaringForUnicode(Zone* zone); |
| 290 int min_match() override { return 1; } | 297 int min_match() override { return 1; } |
| 291 int max_match() override { return 1; } | 298 int max_match() override { return 1; } |
| 292 void AppendToText(RegExpText* text, Zone* zone) override; | 299 void AppendToText(RegExpText* text, Zone* zone) override; |
| 293 CharacterSet character_set() { return set_; } | 300 CharacterSet character_set() { return set_; } |
| 294 // TODO(lrn): Remove need for complex version if is_standard that | 301 // TODO(lrn): Remove need for complex version if is_standard that |
| 295 // recognizes a mangled standard set and just do { return set_.is_special(); } | 302 // recognizes a mangled standard set and just do { return set_.is_special(); } |
| 296 bool is_standard(Zone* zone); | 303 bool is_standard(Zone* zone); |
| 297 // Returns a value representing the standard character set if is_standard() | 304 // Returns a value representing the standard character set if is_standard() |
| 298 // returns true. | 305 // returns true. |
| 299 // Currently used values are: | 306 // Currently used values are: |
| (...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 444 bool IsLookaround() override; | 451 bool IsLookaround() override; |
| 445 bool IsAnchoredAtStart() override; | 452 bool IsAnchoredAtStart() override; |
| 446 int min_match() override { return 0; } | 453 int min_match() override { return 0; } |
| 447 int max_match() override { return 0; } | 454 int max_match() override { return 0; } |
| 448 RegExpTree* body() { return body_; } | 455 RegExpTree* body() { return body_; } |
| 449 bool is_positive() { return is_positive_; } | 456 bool is_positive() { return is_positive_; } |
| 450 int capture_count() { return capture_count_; } | 457 int capture_count() { return capture_count_; } |
| 451 int capture_from() { return capture_from_; } | 458 int capture_from() { return capture_from_; } |
| 452 Type type() { return type_; } | 459 Type type() { return type_; } |
| 453 | 460 |
| 461 class Builder { |
| 462 public: |
| 463 Builder(bool is_positive, RegExpNode* on_success, |
| 464 int stack_pointer_register, int position_register, |
| 465 int capture_register_count = 0, int capture_register_start = 0); |
| 466 RegExpNode* on_match_success() { return on_match_success_; } |
| 467 RegExpNode* ForMatch(RegExpNode* match); |
| 468 |
| 469 private: |
| 470 bool is_positive_; |
| 471 RegExpNode* on_match_success_; |
| 472 RegExpNode* on_success_; |
| 473 int stack_pointer_register_; |
| 474 int position_register_; |
| 475 }; |
| 476 |
| 454 private: | 477 private: |
| 455 RegExpTree* body_; | 478 RegExpTree* body_; |
| 456 bool is_positive_; | 479 bool is_positive_; |
| 457 int capture_count_; | 480 int capture_count_; |
| 458 int capture_from_; | 481 int capture_from_; |
| 459 Type type_; | 482 Type type_; |
| 460 }; | 483 }; |
| 461 | 484 |
| 462 | 485 |
| 463 class RegExpBackReference final : public RegExpTree { | 486 class RegExpBackReference final : public RegExpTree { |
| (...skipping 23 matching lines...) Expand all Loading... |
| 487 RegExpEmpty* AsEmpty() override; | 510 RegExpEmpty* AsEmpty() override; |
| 488 bool IsEmpty() override; | 511 bool IsEmpty() override; |
| 489 int min_match() override { return 0; } | 512 int min_match() override { return 0; } |
| 490 int max_match() override { return 0; } | 513 int max_match() override { return 0; } |
| 491 }; | 514 }; |
| 492 | 515 |
| 493 } // namespace internal | 516 } // namespace internal |
| 494 } // namespace v8 | 517 } // namespace v8 |
| 495 | 518 |
| 496 #endif // V8_REGEXP_REGEXP_AST_H_ | 519 #endif // V8_REGEXP_REGEXP_AST_H_ |
| OLD | NEW |