Index: runtime/vm/regexp_ast.h |
diff --git a/runtime/vm/regexp_ast.h b/runtime/vm/regexp_ast.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..d026bf01de6e2e30c14ac78cae90df4b5b15caad |
--- /dev/null |
+++ b/runtime/vm/regexp_ast.h |
@@ -0,0 +1,393 @@ |
+// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
+// for details. All rights reserved. Use of this source code is governed by a |
+// BSD-style license that can be found in the LICENSE file. |
+ |
+#ifndef VM_REGEXP_AST_H_ |
+#define VM_REGEXP_AST_H_ |
+ |
+#include "platform/globals.h" |
+#include "platform/utils.h" |
+#include "vm/allocation.h" |
+#include "vm/regexp.h" |
+ |
+namespace dart { |
+ |
+// Forward declarations. |
+class RegExpAlternative; |
+class RegExpAssertion; |
+class RegExpAtom; |
+class RegExpBackReference; |
+class RegExpCapture; |
+class RegExpCharacterClass; |
+class RegExpCompiler; |
+class RegExpDisjunction; |
+class RegExpEmpty; |
+class RegExpLookahead; |
+class RegExpQuantifier; |
+class RegExpText; |
+ |
+ |
+class RegExpVisitor : public ValueObject { |
+ public: |
+ virtual ~RegExpVisitor() { } |
+#define MAKE_CASE(Name) \ |
+ virtual void* Visit##Name(RegExp##Name*, void* data) = 0; |
+ FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) |
+#undef MAKE_CASE |
+}; |
+ |
+ |
+class RegExpTree : public ZoneAllocated { |
+ public: |
+ static const intptr_t kInfinity = kMaxInt32; |
+ virtual ~RegExpTree() {} |
+ virtual void* Accept(RegExpVisitor* visitor, void* data) = 0; |
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
+ RegExpNode* on_success) = 0; |
+ virtual bool IsTextElement() const { return false; } |
+ virtual bool IsAnchoredAtStart() const { return false; } |
+ virtual bool IsAnchoredAtEnd() const { return false; } |
+ virtual intptr_t min_match() const = 0; |
+ virtual intptr_t max_match() const = 0; |
+ // Returns the interval of registers used for captures within this |
+ // expression. |
+ virtual Interval CaptureRegisters() const { return Interval::Empty(); } |
+ virtual void AppendToText(RegExpText* text); |
+ void Print(); |
+#define MAKE_ASTYPE(Name) \ |
+ virtual RegExp##Name* As##Name(); \ |
+ virtual bool Is##Name() const; |
+ FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE) |
+#undef MAKE_ASTYPE |
+}; |
+ |
+ |
+class RegExpText : public RegExpTree { |
+ public: |
+ RegExpText() : elements_(2), length_(0) {} |
+ virtual void* Accept(RegExpVisitor* visitor, void* data); |
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
+ RegExpNode* on_success); |
+ virtual RegExpText* AsText(); |
+ virtual bool IsText() const; |
+ virtual bool IsTextElement() const { return true; } |
+ virtual intptr_t min_match() const { return length_; } |
+ virtual intptr_t max_match() const { return length_; } |
+ virtual void AppendToText(RegExpText* text); |
+ void AddElement(TextElement elm) { |
+ elements_.Add(elm); |
+ length_ += elm.length(); |
+ } |
+ GrowableArray<TextElement>* elements() { return &elements_; } |
+ private: |
+ GrowableArray<TextElement> elements_; |
+ intptr_t length_; |
+}; |
+ |
+ |
+class RegExpQuantifier : public RegExpTree { |
+ public: |
+ enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE }; |
+ RegExpQuantifier(intptr_t min, intptr_t max, |
+ QuantifierType type, RegExpTree* body) |
+ : body_(body), |
+ min_(min), |
+ max_(max), |
+ min_match_(min * body->min_match()), |
+ quantifier_type_(type) { |
+ if (max > 0 && body->max_match() > kInfinity / max) { |
+ max_match_ = kInfinity; |
+ } else { |
+ max_match_ = max * body->max_match(); |
+ } |
+ } |
+ virtual void* Accept(RegExpVisitor* visitor, void* data); |
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
+ RegExpNode* on_success); |
+ static RegExpNode* ToNode(intptr_t min, |
+ intptr_t max, |
+ bool is_greedy, |
+ RegExpTree* body, |
+ RegExpCompiler* compiler, |
+ RegExpNode* on_success, |
+ bool not_at_start = false); |
+ virtual RegExpQuantifier* AsQuantifier(); |
+ virtual Interval CaptureRegisters() const; |
+ virtual bool IsQuantifier() const; |
+ virtual intptr_t min_match() const { return min_match_; } |
+ virtual intptr_t max_match() const { return max_match_; } |
+ intptr_t min() const { return min_; } |
+ intptr_t max() const { return max_; } |
+ bool is_possessive() const { return quantifier_type_ == POSSESSIVE; } |
+ bool is_non_greedy() const { return quantifier_type_ == NON_GREEDY; } |
+ bool is_greedy() const { return quantifier_type_ == GREEDY; } |
+ RegExpTree* body() const { return body_; } |
+ |
+ private: |
+ RegExpTree* body_; |
+ intptr_t min_; |
+ intptr_t max_; |
+ intptr_t min_match_; |
+ intptr_t max_match_; |
+ QuantifierType quantifier_type_; |
+}; |
+ |
+ |
+class RegExpCapture : public RegExpTree { |
+ public: |
+ explicit RegExpCapture(RegExpTree* body, intptr_t index) |
+ : body_(body), index_(index) { } |
+ virtual void* Accept(RegExpVisitor* visitor, void* data); |
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
+ RegExpNode* on_success); |
+ static RegExpNode* ToNode(RegExpTree* body, |
+ intptr_t index, |
+ RegExpCompiler* compiler, |
+ RegExpNode* on_success); |
+ virtual RegExpCapture* AsCapture(); |
+ virtual bool IsAnchoredAtStart() const; |
+ virtual bool IsAnchoredAtEnd() const; |
+ virtual Interval CaptureRegisters() const; |
+ virtual bool IsCapture() const; |
+ virtual intptr_t min_match() const { return body_->min_match(); } |
+ virtual intptr_t max_match() const { return body_->max_match(); } |
+ RegExpTree* body() const { return body_; } |
+ intptr_t index() const { return index_; } |
+ static intptr_t StartRegister(intptr_t index) { return index * 2; } |
+ static intptr_t EndRegister(intptr_t index) { return index * 2 + 1; } |
+ |
+ private: |
+ RegExpTree* body_; |
+ intptr_t index_; |
+}; |
+ |
+ |
+class RegExpDisjunction : public RegExpTree { |
+ public: |
+ explicit RegExpDisjunction(ZoneGrowableArray<RegExpTree*>* alternatives); |
+ virtual void* Accept(RegExpVisitor* visitor, void* data); |
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
+ RegExpNode* on_success); |
+ virtual RegExpDisjunction* AsDisjunction(); |
+ virtual Interval CaptureRegisters() const; |
+ virtual bool IsDisjunction() const; |
+ virtual bool IsAnchoredAtStart() const; |
+ virtual bool IsAnchoredAtEnd() const; |
+ virtual intptr_t min_match() const { return min_match_; } |
+ virtual intptr_t max_match() const { return max_match_; } |
+ ZoneGrowableArray<RegExpTree*>* alternatives() const { return alternatives_; } |
+ private: |
+ ZoneGrowableArray<RegExpTree*>* alternatives_; |
+ intptr_t min_match_; |
+ intptr_t max_match_; |
+}; |
+ |
+ |
+class RegExpAlternative : public RegExpTree { |
+ public: |
+ explicit RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes); |
+ virtual void* Accept(RegExpVisitor* visitor, void* data); |
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
+ RegExpNode* on_success); |
+ virtual RegExpAlternative* AsAlternative(); |
+ virtual Interval CaptureRegisters() const; |
+ virtual bool IsAlternative() const; |
+ virtual bool IsAnchoredAtStart() const; |
+ virtual bool IsAnchoredAtEnd() const; |
+ virtual intptr_t min_match() const { return min_match_; } |
+ virtual intptr_t max_match() const { return max_match_; } |
+ ZoneGrowableArray<RegExpTree*>* nodes() const { return nodes_; } |
+ private: |
+ ZoneGrowableArray<RegExpTree*>* nodes_; |
+ intptr_t min_match_; |
+ intptr_t max_match_; |
+}; |
+ |
+ |
+class RegExpAssertion : public RegExpTree { |
+ public: |
+ enum AssertionType { |
+ START_OF_LINE, |
+ START_OF_INPUT, |
+ END_OF_LINE, |
+ END_OF_INPUT, |
+ BOUNDARY, |
+ NON_BOUNDARY |
+ }; |
+ explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { } |
+ virtual void* Accept(RegExpVisitor* visitor, void* data); |
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
+ RegExpNode* on_success); |
+ virtual RegExpAssertion* AsAssertion(); |
+ virtual bool IsAssertion() const; |
+ virtual bool IsAnchoredAtStart() const; |
+ virtual bool IsAnchoredAtEnd() const; |
+ virtual intptr_t min_match() const { return 0; } |
+ virtual intptr_t max_match() const { return 0; } |
+ AssertionType assertion_type() const { return assertion_type_; } |
+ private: |
+ AssertionType assertion_type_; |
+}; |
+ |
+ |
+class CharacterSet : public ValueObject { |
+ public: |
+ explicit CharacterSet(uint16_t standard_set_type) |
+ : ranges_(NULL), |
+ standard_set_type_(standard_set_type) {} |
+ explicit CharacterSet(ZoneGrowableArray<CharacterRange>* ranges) |
+ : ranges_(ranges), |
+ standard_set_type_(0) {} |
+ CharacterSet(const CharacterSet& that) |
+ : ValueObject(), |
+ ranges_(that.ranges_), |
+ standard_set_type_(that.standard_set_type_) {} |
+ ZoneGrowableArray<CharacterRange>* ranges(); |
+ uint16_t standard_set_type() const { return standard_set_type_; } |
+ void set_standard_set_type(uint16_t special_set_type) { |
+ standard_set_type_ = special_set_type; |
+ } |
+ bool is_standard() { return standard_set_type_ != 0; } |
+ void Canonicalize(); |
+ private: |
+ ZoneGrowableArray<CharacterRange>* ranges_; |
+ // If non-zero, the value represents a standard set (e.g., all whitespace |
+ // characters) without having to expand the ranges. |
+ uint16_t standard_set_type_; |
+}; |
+ |
+ |
+class RegExpCharacterClass : public RegExpTree { |
+ public: |
+ RegExpCharacterClass(ZoneGrowableArray<CharacterRange>* ranges, |
+ bool is_negated) |
+ : set_(ranges), |
+ is_negated_(is_negated) { } |
+ explicit RegExpCharacterClass(uint16_t type) |
+ : set_(type), |
+ is_negated_(false) { } |
+ virtual void* Accept(RegExpVisitor* visitor, void* data); |
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
+ RegExpNode* on_success); |
+ virtual RegExpCharacterClass* AsCharacterClass(); |
+ virtual bool IsCharacterClass() const; |
+ virtual bool IsTextElement() const { return true; } |
+ virtual intptr_t min_match() const { return 1; } |
+ virtual intptr_t max_match() const { return 1; } |
+ virtual void AppendToText(RegExpText* text); |
+ CharacterSet character_set() const { return set_; } |
+ // TODO(lrn): Remove need for complex version if is_standard that |
+ // recognizes a mangled standard set and just do { return set_.is_special(); } |
+ bool is_standard(); |
+ // Returns a value representing the standard character set if is_standard() |
+ // returns true. |
+ // Currently used values are: |
+ // s : unicode whitespace |
+ // S : unicode non-whitespace |
+ // w : ASCII word character (digit, letter, underscore) |
+ // W : non-ASCII word character |
+ // d : ASCII digit |
+ // D : non-ASCII digit |
+ // . : non-unicode non-newline |
+ // * : All characters |
+ uint16_t standard_type() const { return set_.standard_set_type(); } |
+ ZoneGrowableArray<CharacterRange>* ranges() { |
+ return set_.ranges(); |
+ } |
+ bool is_negated() const { return is_negated_; } |
+ |
+ private: |
+ CharacterSet set_; |
+ bool is_negated_; |
+}; |
+ |
+ |
+class RegExpAtom : public RegExpTree { |
+ public: |
+ explicit RegExpAtom(ZoneGrowableArray<uint16_t>* data) : data_(data) { } |
+ virtual void* Accept(RegExpVisitor* visitor, void* data); |
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
+ RegExpNode* on_success); |
+ virtual RegExpAtom* AsAtom(); |
+ virtual bool IsAtom() const; |
+ virtual bool IsTextElement() const { return true; } |
+ virtual intptr_t min_match() const { return data_->length(); } |
+ virtual intptr_t max_match() const { return data_->length(); } |
+ virtual void AppendToText(RegExpText* text); |
+ ZoneGrowableArray<uint16_t>* data() const { return data_; } |
+ intptr_t length() const { return data_->length(); } |
+ private: |
+ ZoneGrowableArray<uint16_t>* data_; |
+}; |
+ |
+ |
+class RegExpLookahead : public RegExpTree { |
+ public: |
+ RegExpLookahead(RegExpTree* body, |
+ bool is_positive, |
+ intptr_t capture_count, |
+ intptr_t capture_from) |
+ : body_(body), |
+ is_positive_(is_positive), |
+ capture_count_(capture_count), |
+ capture_from_(capture_from) { } |
+ |
+ virtual void* Accept(RegExpVisitor* visitor, void* data); |
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
+ RegExpNode* on_success); |
+ virtual RegExpLookahead* AsLookahead(); |
+ virtual Interval CaptureRegisters() const; |
+ virtual bool IsLookahead() const; |
+ virtual bool IsAnchoredAtStart() const; |
+ virtual intptr_t min_match() const { return 0; } |
+ virtual intptr_t max_match() const { return 0; } |
+ RegExpTree* body() const { return body_; } |
+ bool is_positive() const { return is_positive_; } |
+ intptr_t capture_count() const { return capture_count_; } |
+ intptr_t capture_from() const { return capture_from_; } |
+ |
+ private: |
+ RegExpTree* body_; |
+ bool is_positive_; |
+ intptr_t capture_count_; |
+ intptr_t capture_from_; |
+}; |
+ |
+ |
+class RegExpBackReference : public RegExpTree { |
+ public: |
+ explicit RegExpBackReference(RegExpCapture* capture) |
+ : capture_(capture) { } |
+ virtual void* Accept(RegExpVisitor* visitor, void* data); |
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
+ RegExpNode* on_success); |
+ virtual RegExpBackReference* AsBackReference(); |
+ virtual bool IsBackReference() const; |
+ virtual intptr_t min_match() const { return 0; } |
+ virtual intptr_t max_match() const { return capture_->max_match(); } |
+ intptr_t index() const { return capture_->index(); } |
+ RegExpCapture* capture() const { return capture_; } |
+ private: |
+ RegExpCapture* capture_; |
+}; |
+ |
+ |
+class RegExpEmpty : public RegExpTree { |
+ public: |
+ RegExpEmpty() { } |
+ virtual void* Accept(RegExpVisitor* visitor, void* data); |
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
+ RegExpNode* on_success); |
+ virtual RegExpEmpty* AsEmpty(); |
+ virtual bool IsEmpty() const; |
+ virtual intptr_t min_match() const { return 0; } |
+ virtual intptr_t max_match() const { return 0; } |
+ static RegExpEmpty* GetInstance() { |
+ static RegExpEmpty* instance = ::new RegExpEmpty(); |
+ return instance; |
+ } |
+}; |
+ |
+} // namespace dart |
+ |
+#endif // VM_REGEXP_AST_H_ |