Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(319)

Unified Diff: runtime/vm/regexp_ast.h

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Addressed Ivan's comments. Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « runtime/vm/regexp_assembler.cc ('k') | runtime/vm/regexp_ast.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: runtime/vm/regexp_ast.h
diff --git a/runtime/vm/regexp_ast.h b/runtime/vm/regexp_ast.h
new file mode 100644
index 0000000000000000000000000000000000000000..d026bf01de6e2e30c14ac78cae90df4b5b15caad
--- /dev/null
+++ b/runtime/vm/regexp_ast.h
@@ -0,0 +1,393 @@
+// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+#ifndef VM_REGEXP_AST_H_
+#define VM_REGEXP_AST_H_
+
+#include "platform/globals.h"
+#include "platform/utils.h"
+#include "vm/allocation.h"
+#include "vm/regexp.h"
+
+namespace dart {
+
+// Forward declarations.
+class RegExpAlternative;
+class RegExpAssertion;
+class RegExpAtom;
+class RegExpBackReference;
+class RegExpCapture;
+class RegExpCharacterClass;
+class RegExpCompiler;
+class RegExpDisjunction;
+class RegExpEmpty;
+class RegExpLookahead;
+class RegExpQuantifier;
+class RegExpText;
+
+
+class RegExpVisitor : public ValueObject {
+ public:
+ virtual ~RegExpVisitor() { }
+#define MAKE_CASE(Name) \
+ virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
+ FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
+#undef MAKE_CASE
+};
+
+
+class RegExpTree : public ZoneAllocated {
+ public:
+ static const intptr_t kInfinity = kMaxInt32;
+ virtual ~RegExpTree() {}
+ virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success) = 0;
+ virtual bool IsTextElement() const { return false; }
+ virtual bool IsAnchoredAtStart() const { return false; }
+ virtual bool IsAnchoredAtEnd() const { return false; }
+ virtual intptr_t min_match() const = 0;
+ virtual intptr_t max_match() const = 0;
+ // Returns the interval of registers used for captures within this
+ // expression.
+ virtual Interval CaptureRegisters() const { return Interval::Empty(); }
+ virtual void AppendToText(RegExpText* text);
+ void Print();
+#define MAKE_ASTYPE(Name) \
+ virtual RegExp##Name* As##Name(); \
+ virtual bool Is##Name() const;
+ FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
+#undef MAKE_ASTYPE
+};
+
+
+class RegExpText : public RegExpTree {
+ public:
+ RegExpText() : elements_(2), length_(0) {}
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success);
+ virtual RegExpText* AsText();
+ virtual bool IsText() const;
+ virtual bool IsTextElement() const { return true; }
+ virtual intptr_t min_match() const { return length_; }
+ virtual intptr_t max_match() const { return length_; }
+ virtual void AppendToText(RegExpText* text);
+ void AddElement(TextElement elm) {
+ elements_.Add(elm);
+ length_ += elm.length();
+ }
+ GrowableArray<TextElement>* elements() { return &elements_; }
+ private:
+ GrowableArray<TextElement> elements_;
+ intptr_t length_;
+};
+
+
+class RegExpQuantifier : public RegExpTree {
+ public:
+ enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
+ RegExpQuantifier(intptr_t min, intptr_t max,
+ QuantifierType type, RegExpTree* body)
+ : body_(body),
+ min_(min),
+ max_(max),
+ min_match_(min * body->min_match()),
+ quantifier_type_(type) {
+ if (max > 0 && body->max_match() > kInfinity / max) {
+ max_match_ = kInfinity;
+ } else {
+ max_match_ = max * body->max_match();
+ }
+ }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success);
+ static RegExpNode* ToNode(intptr_t min,
+ intptr_t max,
+ bool is_greedy,
+ RegExpTree* body,
+ RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ bool not_at_start = false);
+ virtual RegExpQuantifier* AsQuantifier();
+ virtual Interval CaptureRegisters() const;
+ virtual bool IsQuantifier() const;
+ virtual intptr_t min_match() const { return min_match_; }
+ virtual intptr_t max_match() const { return max_match_; }
+ intptr_t min() const { return min_; }
+ intptr_t max() const { return max_; }
+ bool is_possessive() const { return quantifier_type_ == POSSESSIVE; }
+ bool is_non_greedy() const { return quantifier_type_ == NON_GREEDY; }
+ bool is_greedy() const { return quantifier_type_ == GREEDY; }
+ RegExpTree* body() const { return body_; }
+
+ private:
+ RegExpTree* body_;
+ intptr_t min_;
+ intptr_t max_;
+ intptr_t min_match_;
+ intptr_t max_match_;
+ QuantifierType quantifier_type_;
+};
+
+
+class RegExpCapture : public RegExpTree {
+ public:
+ explicit RegExpCapture(RegExpTree* body, intptr_t index)
+ : body_(body), index_(index) { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success);
+ static RegExpNode* ToNode(RegExpTree* body,
+ intptr_t index,
+ RegExpCompiler* compiler,
+ RegExpNode* on_success);
+ virtual RegExpCapture* AsCapture();
+ virtual bool IsAnchoredAtStart() const;
+ virtual bool IsAnchoredAtEnd() const;
+ virtual Interval CaptureRegisters() const;
+ virtual bool IsCapture() const;
+ virtual intptr_t min_match() const { return body_->min_match(); }
+ virtual intptr_t max_match() const { return body_->max_match(); }
+ RegExpTree* body() const { return body_; }
+ intptr_t index() const { return index_; }
+ static intptr_t StartRegister(intptr_t index) { return index * 2; }
+ static intptr_t EndRegister(intptr_t index) { return index * 2 + 1; }
+
+ private:
+ RegExpTree* body_;
+ intptr_t index_;
+};
+
+
+class RegExpDisjunction : public RegExpTree {
+ public:
+ explicit RegExpDisjunction(ZoneGrowableArray<RegExpTree*>* alternatives);
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success);
+ virtual RegExpDisjunction* AsDisjunction();
+ virtual Interval CaptureRegisters() const;
+ virtual bool IsDisjunction() const;
+ virtual bool IsAnchoredAtStart() const;
+ virtual bool IsAnchoredAtEnd() const;
+ virtual intptr_t min_match() const { return min_match_; }
+ virtual intptr_t max_match() const { return max_match_; }
+ ZoneGrowableArray<RegExpTree*>* alternatives() const { return alternatives_; }
+ private:
+ ZoneGrowableArray<RegExpTree*>* alternatives_;
+ intptr_t min_match_;
+ intptr_t max_match_;
+};
+
+
+class RegExpAlternative : public RegExpTree {
+ public:
+ explicit RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes);
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success);
+ virtual RegExpAlternative* AsAlternative();
+ virtual Interval CaptureRegisters() const;
+ virtual bool IsAlternative() const;
+ virtual bool IsAnchoredAtStart() const;
+ virtual bool IsAnchoredAtEnd() const;
+ virtual intptr_t min_match() const { return min_match_; }
+ virtual intptr_t max_match() const { return max_match_; }
+ ZoneGrowableArray<RegExpTree*>* nodes() const { return nodes_; }
+ private:
+ ZoneGrowableArray<RegExpTree*>* nodes_;
+ intptr_t min_match_;
+ intptr_t max_match_;
+};
+
+
+class RegExpAssertion : public RegExpTree {
+ public:
+ enum AssertionType {
+ START_OF_LINE,
+ START_OF_INPUT,
+ END_OF_LINE,
+ END_OF_INPUT,
+ BOUNDARY,
+ NON_BOUNDARY
+ };
+ explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success);
+ virtual RegExpAssertion* AsAssertion();
+ virtual bool IsAssertion() const;
+ virtual bool IsAnchoredAtStart() const;
+ virtual bool IsAnchoredAtEnd() const;
+ virtual intptr_t min_match() const { return 0; }
+ virtual intptr_t max_match() const { return 0; }
+ AssertionType assertion_type() const { return assertion_type_; }
+ private:
+ AssertionType assertion_type_;
+};
+
+
+class CharacterSet : public ValueObject {
+ public:
+ explicit CharacterSet(uint16_t standard_set_type)
+ : ranges_(NULL),
+ standard_set_type_(standard_set_type) {}
+ explicit CharacterSet(ZoneGrowableArray<CharacterRange>* ranges)
+ : ranges_(ranges),
+ standard_set_type_(0) {}
+ CharacterSet(const CharacterSet& that)
+ : ValueObject(),
+ ranges_(that.ranges_),
+ standard_set_type_(that.standard_set_type_) {}
+ ZoneGrowableArray<CharacterRange>* ranges();
+ uint16_t standard_set_type() const { return standard_set_type_; }
+ void set_standard_set_type(uint16_t special_set_type) {
+ standard_set_type_ = special_set_type;
+ }
+ bool is_standard() { return standard_set_type_ != 0; }
+ void Canonicalize();
+ private:
+ ZoneGrowableArray<CharacterRange>* ranges_;
+ // If non-zero, the value represents a standard set (e.g., all whitespace
+ // characters) without having to expand the ranges.
+ uint16_t standard_set_type_;
+};
+
+
+class RegExpCharacterClass : public RegExpTree {
+ public:
+ RegExpCharacterClass(ZoneGrowableArray<CharacterRange>* ranges,
+ bool is_negated)
+ : set_(ranges),
+ is_negated_(is_negated) { }
+ explicit RegExpCharacterClass(uint16_t type)
+ : set_(type),
+ is_negated_(false) { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success);
+ virtual RegExpCharacterClass* AsCharacterClass();
+ virtual bool IsCharacterClass() const;
+ virtual bool IsTextElement() const { return true; }
+ virtual intptr_t min_match() const { return 1; }
+ virtual intptr_t max_match() const { return 1; }
+ virtual void AppendToText(RegExpText* text);
+ CharacterSet character_set() const { return set_; }
+ // TODO(lrn): Remove need for complex version if is_standard that
+ // recognizes a mangled standard set and just do { return set_.is_special(); }
+ bool is_standard();
+ // Returns a value representing the standard character set if is_standard()
+ // returns true.
+ // Currently used values are:
+ // s : unicode whitespace
+ // S : unicode non-whitespace
+ // w : ASCII word character (digit, letter, underscore)
+ // W : non-ASCII word character
+ // d : ASCII digit
+ // D : non-ASCII digit
+ // . : non-unicode non-newline
+ // * : All characters
+ uint16_t standard_type() const { return set_.standard_set_type(); }
+ ZoneGrowableArray<CharacterRange>* ranges() {
+ return set_.ranges();
+ }
+ bool is_negated() const { return is_negated_; }
+
+ private:
+ CharacterSet set_;
+ bool is_negated_;
+};
+
+
+class RegExpAtom : public RegExpTree {
+ public:
+ explicit RegExpAtom(ZoneGrowableArray<uint16_t>* data) : data_(data) { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success);
+ virtual RegExpAtom* AsAtom();
+ virtual bool IsAtom() const;
+ virtual bool IsTextElement() const { return true; }
+ virtual intptr_t min_match() const { return data_->length(); }
+ virtual intptr_t max_match() const { return data_->length(); }
+ virtual void AppendToText(RegExpText* text);
+ ZoneGrowableArray<uint16_t>* data() const { return data_; }
+ intptr_t length() const { return data_->length(); }
+ private:
+ ZoneGrowableArray<uint16_t>* data_;
+};
+
+
+class RegExpLookahead : public RegExpTree {
+ public:
+ RegExpLookahead(RegExpTree* body,
+ bool is_positive,
+ intptr_t capture_count,
+ intptr_t capture_from)
+ : body_(body),
+ is_positive_(is_positive),
+ capture_count_(capture_count),
+ capture_from_(capture_from) { }
+
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success);
+ virtual RegExpLookahead* AsLookahead();
+ virtual Interval CaptureRegisters() const;
+ virtual bool IsLookahead() const;
+ virtual bool IsAnchoredAtStart() const;
+ virtual intptr_t min_match() const { return 0; }
+ virtual intptr_t max_match() const { return 0; }
+ RegExpTree* body() const { return body_; }
+ bool is_positive() const { return is_positive_; }
+ intptr_t capture_count() const { return capture_count_; }
+ intptr_t capture_from() const { return capture_from_; }
+
+ private:
+ RegExpTree* body_;
+ bool is_positive_;
+ intptr_t capture_count_;
+ intptr_t capture_from_;
+};
+
+
+class RegExpBackReference : public RegExpTree {
+ public:
+ explicit RegExpBackReference(RegExpCapture* capture)
+ : capture_(capture) { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success);
+ virtual RegExpBackReference* AsBackReference();
+ virtual bool IsBackReference() const;
+ virtual intptr_t min_match() const { return 0; }
+ virtual intptr_t max_match() const { return capture_->max_match(); }
+ intptr_t index() const { return capture_->index(); }
+ RegExpCapture* capture() const { return capture_; }
+ private:
+ RegExpCapture* capture_;
+};
+
+
+class RegExpEmpty : public RegExpTree {
+ public:
+ RegExpEmpty() { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpNode* ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success);
+ virtual RegExpEmpty* AsEmpty();
+ virtual bool IsEmpty() const;
+ virtual intptr_t min_match() const { return 0; }
+ virtual intptr_t max_match() const { return 0; }
+ static RegExpEmpty* GetInstance() {
+ static RegExpEmpty* instance = ::new RegExpEmpty();
+ return instance;
+ }
+};
+
+} // namespace dart
+
+#endif // VM_REGEXP_AST_H_
« no previous file with comments | « runtime/vm/regexp_assembler.cc ('k') | runtime/vm/regexp_ast.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698