| Index: runtime/vm/regexp_ast.h
|
| diff --git a/runtime/vm/regexp_ast.h b/runtime/vm/regexp_ast.h
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..d026bf01de6e2e30c14ac78cae90df4b5b15caad
|
| --- /dev/null
|
| +++ b/runtime/vm/regexp_ast.h
|
| @@ -0,0 +1,393 @@
|
| +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
|
| +// for details. All rights reserved. Use of this source code is governed by a
|
| +// BSD-style license that can be found in the LICENSE file.
|
| +
|
| +#ifndef VM_REGEXP_AST_H_
|
| +#define VM_REGEXP_AST_H_
|
| +
|
| +#include "platform/globals.h"
|
| +#include "platform/utils.h"
|
| +#include "vm/allocation.h"
|
| +#include "vm/regexp.h"
|
| +
|
| +namespace dart {
|
| +
|
| +// Forward declarations.
|
| +class RegExpAlternative;
|
| +class RegExpAssertion;
|
| +class RegExpAtom;
|
| +class RegExpBackReference;
|
| +class RegExpCapture;
|
| +class RegExpCharacterClass;
|
| +class RegExpCompiler;
|
| +class RegExpDisjunction;
|
| +class RegExpEmpty;
|
| +class RegExpLookahead;
|
| +class RegExpQuantifier;
|
| +class RegExpText;
|
| +
|
| +
|
| +class RegExpVisitor : public ValueObject {
|
| + public:
|
| + virtual ~RegExpVisitor() { }
|
| +#define MAKE_CASE(Name) \
|
| + virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
|
| + FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
|
| +#undef MAKE_CASE
|
| +};
|
| +
|
| +
|
| +class RegExpTree : public ZoneAllocated {
|
| + public:
|
| + static const intptr_t kInfinity = kMaxInt32;
|
| + virtual ~RegExpTree() {}
|
| + virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
|
| + virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
| + RegExpNode* on_success) = 0;
|
| + virtual bool IsTextElement() const { return false; }
|
| + virtual bool IsAnchoredAtStart() const { return false; }
|
| + virtual bool IsAnchoredAtEnd() const { return false; }
|
| + virtual intptr_t min_match() const = 0;
|
| + virtual intptr_t max_match() const = 0;
|
| + // Returns the interval of registers used for captures within this
|
| + // expression.
|
| + virtual Interval CaptureRegisters() const { return Interval::Empty(); }
|
| + virtual void AppendToText(RegExpText* text);
|
| + void Print();
|
| +#define MAKE_ASTYPE(Name) \
|
| + virtual RegExp##Name* As##Name(); \
|
| + virtual bool Is##Name() const;
|
| + FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
|
| +#undef MAKE_ASTYPE
|
| +};
|
| +
|
| +
|
| +class RegExpText : public RegExpTree {
|
| + public:
|
| + RegExpText() : elements_(2), length_(0) {}
|
| + virtual void* Accept(RegExpVisitor* visitor, void* data);
|
| + virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
| + RegExpNode* on_success);
|
| + virtual RegExpText* AsText();
|
| + virtual bool IsText() const;
|
| + virtual bool IsTextElement() const { return true; }
|
| + virtual intptr_t min_match() const { return length_; }
|
| + virtual intptr_t max_match() const { return length_; }
|
| + virtual void AppendToText(RegExpText* text);
|
| + void AddElement(TextElement elm) {
|
| + elements_.Add(elm);
|
| + length_ += elm.length();
|
| + }
|
| + GrowableArray<TextElement>* elements() { return &elements_; }
|
| + private:
|
| + GrowableArray<TextElement> elements_;
|
| + intptr_t length_;
|
| +};
|
| +
|
| +
|
| +class RegExpQuantifier : public RegExpTree {
|
| + public:
|
| + enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
|
| + RegExpQuantifier(intptr_t min, intptr_t max,
|
| + QuantifierType type, RegExpTree* body)
|
| + : body_(body),
|
| + min_(min),
|
| + max_(max),
|
| + min_match_(min * body->min_match()),
|
| + quantifier_type_(type) {
|
| + if (max > 0 && body->max_match() > kInfinity / max) {
|
| + max_match_ = kInfinity;
|
| + } else {
|
| + max_match_ = max * body->max_match();
|
| + }
|
| + }
|
| + virtual void* Accept(RegExpVisitor* visitor, void* data);
|
| + virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
| + RegExpNode* on_success);
|
| + static RegExpNode* ToNode(intptr_t min,
|
| + intptr_t max,
|
| + bool is_greedy,
|
| + RegExpTree* body,
|
| + RegExpCompiler* compiler,
|
| + RegExpNode* on_success,
|
| + bool not_at_start = false);
|
| + virtual RegExpQuantifier* AsQuantifier();
|
| + virtual Interval CaptureRegisters() const;
|
| + virtual bool IsQuantifier() const;
|
| + virtual intptr_t min_match() const { return min_match_; }
|
| + virtual intptr_t max_match() const { return max_match_; }
|
| + intptr_t min() const { return min_; }
|
| + intptr_t max() const { return max_; }
|
| + bool is_possessive() const { return quantifier_type_ == POSSESSIVE; }
|
| + bool is_non_greedy() const { return quantifier_type_ == NON_GREEDY; }
|
| + bool is_greedy() const { return quantifier_type_ == GREEDY; }
|
| + RegExpTree* body() const { return body_; }
|
| +
|
| + private:
|
| + RegExpTree* body_;
|
| + intptr_t min_;
|
| + intptr_t max_;
|
| + intptr_t min_match_;
|
| + intptr_t max_match_;
|
| + QuantifierType quantifier_type_;
|
| +};
|
| +
|
| +
|
| +class RegExpCapture : public RegExpTree {
|
| + public:
|
| + explicit RegExpCapture(RegExpTree* body, intptr_t index)
|
| + : body_(body), index_(index) { }
|
| + virtual void* Accept(RegExpVisitor* visitor, void* data);
|
| + virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
| + RegExpNode* on_success);
|
| + static RegExpNode* ToNode(RegExpTree* body,
|
| + intptr_t index,
|
| + RegExpCompiler* compiler,
|
| + RegExpNode* on_success);
|
| + virtual RegExpCapture* AsCapture();
|
| + virtual bool IsAnchoredAtStart() const;
|
| + virtual bool IsAnchoredAtEnd() const;
|
| + virtual Interval CaptureRegisters() const;
|
| + virtual bool IsCapture() const;
|
| + virtual intptr_t min_match() const { return body_->min_match(); }
|
| + virtual intptr_t max_match() const { return body_->max_match(); }
|
| + RegExpTree* body() const { return body_; }
|
| + intptr_t index() const { return index_; }
|
| + static intptr_t StartRegister(intptr_t index) { return index * 2; }
|
| + static intptr_t EndRegister(intptr_t index) { return index * 2 + 1; }
|
| +
|
| + private:
|
| + RegExpTree* body_;
|
| + intptr_t index_;
|
| +};
|
| +
|
| +
|
| +class RegExpDisjunction : public RegExpTree {
|
| + public:
|
| + explicit RegExpDisjunction(ZoneGrowableArray<RegExpTree*>* alternatives);
|
| + virtual void* Accept(RegExpVisitor* visitor, void* data);
|
| + virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
| + RegExpNode* on_success);
|
| + virtual RegExpDisjunction* AsDisjunction();
|
| + virtual Interval CaptureRegisters() const;
|
| + virtual bool IsDisjunction() const;
|
| + virtual bool IsAnchoredAtStart() const;
|
| + virtual bool IsAnchoredAtEnd() const;
|
| + virtual intptr_t min_match() const { return min_match_; }
|
| + virtual intptr_t max_match() const { return max_match_; }
|
| + ZoneGrowableArray<RegExpTree*>* alternatives() const { return alternatives_; }
|
| + private:
|
| + ZoneGrowableArray<RegExpTree*>* alternatives_;
|
| + intptr_t min_match_;
|
| + intptr_t max_match_;
|
| +};
|
| +
|
| +
|
| +class RegExpAlternative : public RegExpTree {
|
| + public:
|
| + explicit RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes);
|
| + virtual void* Accept(RegExpVisitor* visitor, void* data);
|
| + virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
| + RegExpNode* on_success);
|
| + virtual RegExpAlternative* AsAlternative();
|
| + virtual Interval CaptureRegisters() const;
|
| + virtual bool IsAlternative() const;
|
| + virtual bool IsAnchoredAtStart() const;
|
| + virtual bool IsAnchoredAtEnd() const;
|
| + virtual intptr_t min_match() const { return min_match_; }
|
| + virtual intptr_t max_match() const { return max_match_; }
|
| + ZoneGrowableArray<RegExpTree*>* nodes() const { return nodes_; }
|
| + private:
|
| + ZoneGrowableArray<RegExpTree*>* nodes_;
|
| + intptr_t min_match_;
|
| + intptr_t max_match_;
|
| +};
|
| +
|
| +
|
| +class RegExpAssertion : public RegExpTree {
|
| + public:
|
| + enum AssertionType {
|
| + START_OF_LINE,
|
| + START_OF_INPUT,
|
| + END_OF_LINE,
|
| + END_OF_INPUT,
|
| + BOUNDARY,
|
| + NON_BOUNDARY
|
| + };
|
| + explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { }
|
| + virtual void* Accept(RegExpVisitor* visitor, void* data);
|
| + virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
| + RegExpNode* on_success);
|
| + virtual RegExpAssertion* AsAssertion();
|
| + virtual bool IsAssertion() const;
|
| + virtual bool IsAnchoredAtStart() const;
|
| + virtual bool IsAnchoredAtEnd() const;
|
| + virtual intptr_t min_match() const { return 0; }
|
| + virtual intptr_t max_match() const { return 0; }
|
| + AssertionType assertion_type() const { return assertion_type_; }
|
| + private:
|
| + AssertionType assertion_type_;
|
| +};
|
| +
|
| +
|
| +class CharacterSet : public ValueObject {
|
| + public:
|
| + explicit CharacterSet(uint16_t standard_set_type)
|
| + : ranges_(NULL),
|
| + standard_set_type_(standard_set_type) {}
|
| + explicit CharacterSet(ZoneGrowableArray<CharacterRange>* ranges)
|
| + : ranges_(ranges),
|
| + standard_set_type_(0) {}
|
| + CharacterSet(const CharacterSet& that)
|
| + : ValueObject(),
|
| + ranges_(that.ranges_),
|
| + standard_set_type_(that.standard_set_type_) {}
|
| + ZoneGrowableArray<CharacterRange>* ranges();
|
| + uint16_t standard_set_type() const { return standard_set_type_; }
|
| + void set_standard_set_type(uint16_t special_set_type) {
|
| + standard_set_type_ = special_set_type;
|
| + }
|
| + bool is_standard() { return standard_set_type_ != 0; }
|
| + void Canonicalize();
|
| + private:
|
| + ZoneGrowableArray<CharacterRange>* ranges_;
|
| + // If non-zero, the value represents a standard set (e.g., all whitespace
|
| + // characters) without having to expand the ranges.
|
| + uint16_t standard_set_type_;
|
| +};
|
| +
|
| +
|
| +class RegExpCharacterClass : public RegExpTree {
|
| + public:
|
| + RegExpCharacterClass(ZoneGrowableArray<CharacterRange>* ranges,
|
| + bool is_negated)
|
| + : set_(ranges),
|
| + is_negated_(is_negated) { }
|
| + explicit RegExpCharacterClass(uint16_t type)
|
| + : set_(type),
|
| + is_negated_(false) { }
|
| + virtual void* Accept(RegExpVisitor* visitor, void* data);
|
| + virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
| + RegExpNode* on_success);
|
| + virtual RegExpCharacterClass* AsCharacterClass();
|
| + virtual bool IsCharacterClass() const;
|
| + virtual bool IsTextElement() const { return true; }
|
| + virtual intptr_t min_match() const { return 1; }
|
| + virtual intptr_t max_match() const { return 1; }
|
| + virtual void AppendToText(RegExpText* text);
|
| + CharacterSet character_set() const { return set_; }
|
| + // TODO(lrn): Remove need for complex version if is_standard that
|
| + // recognizes a mangled standard set and just do { return set_.is_special(); }
|
| + bool is_standard();
|
| + // Returns a value representing the standard character set if is_standard()
|
| + // returns true.
|
| + // Currently used values are:
|
| + // s : unicode whitespace
|
| + // S : unicode non-whitespace
|
| + // w : ASCII word character (digit, letter, underscore)
|
| + // W : non-ASCII word character
|
| + // d : ASCII digit
|
| + // D : non-ASCII digit
|
| + // . : non-unicode non-newline
|
| + // * : All characters
|
| + uint16_t standard_type() const { return set_.standard_set_type(); }
|
| + ZoneGrowableArray<CharacterRange>* ranges() {
|
| + return set_.ranges();
|
| + }
|
| + bool is_negated() const { return is_negated_; }
|
| +
|
| + private:
|
| + CharacterSet set_;
|
| + bool is_negated_;
|
| +};
|
| +
|
| +
|
| +class RegExpAtom : public RegExpTree {
|
| + public:
|
| + explicit RegExpAtom(ZoneGrowableArray<uint16_t>* data) : data_(data) { }
|
| + virtual void* Accept(RegExpVisitor* visitor, void* data);
|
| + virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
| + RegExpNode* on_success);
|
| + virtual RegExpAtom* AsAtom();
|
| + virtual bool IsAtom() const;
|
| + virtual bool IsTextElement() const { return true; }
|
| + virtual intptr_t min_match() const { return data_->length(); }
|
| + virtual intptr_t max_match() const { return data_->length(); }
|
| + virtual void AppendToText(RegExpText* text);
|
| + ZoneGrowableArray<uint16_t>* data() const { return data_; }
|
| + intptr_t length() const { return data_->length(); }
|
| + private:
|
| + ZoneGrowableArray<uint16_t>* data_;
|
| +};
|
| +
|
| +
|
| +class RegExpLookahead : public RegExpTree {
|
| + public:
|
| + RegExpLookahead(RegExpTree* body,
|
| + bool is_positive,
|
| + intptr_t capture_count,
|
| + intptr_t capture_from)
|
| + : body_(body),
|
| + is_positive_(is_positive),
|
| + capture_count_(capture_count),
|
| + capture_from_(capture_from) { }
|
| +
|
| + virtual void* Accept(RegExpVisitor* visitor, void* data);
|
| + virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
| + RegExpNode* on_success);
|
| + virtual RegExpLookahead* AsLookahead();
|
| + virtual Interval CaptureRegisters() const;
|
| + virtual bool IsLookahead() const;
|
| + virtual bool IsAnchoredAtStart() const;
|
| + virtual intptr_t min_match() const { return 0; }
|
| + virtual intptr_t max_match() const { return 0; }
|
| + RegExpTree* body() const { return body_; }
|
| + bool is_positive() const { return is_positive_; }
|
| + intptr_t capture_count() const { return capture_count_; }
|
| + intptr_t capture_from() const { return capture_from_; }
|
| +
|
| + private:
|
| + RegExpTree* body_;
|
| + bool is_positive_;
|
| + intptr_t capture_count_;
|
| + intptr_t capture_from_;
|
| +};
|
| +
|
| +
|
| +class RegExpBackReference : public RegExpTree {
|
| + public:
|
| + explicit RegExpBackReference(RegExpCapture* capture)
|
| + : capture_(capture) { }
|
| + virtual void* Accept(RegExpVisitor* visitor, void* data);
|
| + virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
| + RegExpNode* on_success);
|
| + virtual RegExpBackReference* AsBackReference();
|
| + virtual bool IsBackReference() const;
|
| + virtual intptr_t min_match() const { return 0; }
|
| + virtual intptr_t max_match() const { return capture_->max_match(); }
|
| + intptr_t index() const { return capture_->index(); }
|
| + RegExpCapture* capture() const { return capture_; }
|
| + private:
|
| + RegExpCapture* capture_;
|
| +};
|
| +
|
| +
|
| +class RegExpEmpty : public RegExpTree {
|
| + public:
|
| + RegExpEmpty() { }
|
| + virtual void* Accept(RegExpVisitor* visitor, void* data);
|
| + virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
| + RegExpNode* on_success);
|
| + virtual RegExpEmpty* AsEmpty();
|
| + virtual bool IsEmpty() const;
|
| + virtual intptr_t min_match() const { return 0; }
|
| + virtual intptr_t max_match() const { return 0; }
|
| + static RegExpEmpty* GetInstance() {
|
| + static RegExpEmpty* instance = ::new RegExpEmpty();
|
| + return instance;
|
| + }
|
| +};
|
| +
|
| +} // namespace dart
|
| +
|
| +#endif // VM_REGEXP_AST_H_
|
|
|