Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(113)

Unified Diff: src/ast.h

Issue 8188: Some new regexp infrastructure. (Closed)
Patch Set: Created 12 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | src/ast.cc » ('j') | src/ast.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/ast.h
diff --git a/src/ast.h b/src/ast.h
index fe946d98f2a4ed5d1aedc956e2984c87b3edb5d1..cbb0a0b43c8433ee576327e322983823589bb9b5 100644
--- a/src/ast.h
+++ b/src/ast.h
@@ -1181,6 +1181,200 @@ class ThisFunction: public Expression {
// ----------------------------------------------------------------------------
+// Regular expressions
Erik Corry 2008/10/27 14:58:44 full stop
Christian Plesner Hansen 2008/10/27 18:57:02 I copied the banner style from parser.cc which doe
+
+
+#define FOR_EACH_REG_EXP_NODE_TYPE(VISIT) \
+ VISIT(Disjunction) \
+ VISIT(Alternative) \
+ VISIT(Assertion) \
+ VISIT(CharacterClass) \
+ VISIT(Atom) \
+ VISIT(Quantifier) \
+ VISIT(Capture) \
+ VISIT(Lookahead) \
+ VISIT(Empty)
+
+
+class RegExpVisitor;
+template <typename Char> class RegExpNode;
+#define FORWARD_DECLARE(Name) class RegExp##Name;
+FOR_EACH_REG_EXP_NODE_TYPE(FORWARD_DECLARE)
+#undef FORWARD_DECLARE
+
+
+class RegExpTree: public ZoneObject {
+ public:
+ virtual ~RegExpTree() { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
+ SmartPointer<char> ToString();
+};
+
+
+class RegExpDisjunction: public RegExpTree {
+ public:
+ RegExpDisjunction(ZoneList<RegExpTree*>* nodes) : nodes_(nodes) { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ ZoneList<RegExpTree*>* nodes() { return nodes_; }
+ private:
+ ZoneList<RegExpTree*>* nodes_;
+};
+
+
+class RegExpAlternative: public RegExpTree {
+ public:
+ RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes) { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ ZoneList<RegExpTree*>* nodes() { return nodes_; }
+ private:
+ ZoneList<RegExpTree*>* nodes_;
+};
+
+
+class RegExpAssertion: public RegExpTree {
+ public:
+ enum Type { START, END, BOUNDARY, NON_BOUNDARY };
Lasse Reichstein 2008/10/27 13:12:58 I suggest having both START_OF_INPUT and START_OF_
Christian Plesner Hansen 2008/10/27 18:57:02 Good point.
+ RegExpAssertion(Type type) : type_(type) { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ Type type() { return type_; }
+ private:
+ Type type_;
+};
+
+
+class CharacterRange {
+ public:
+ // For compatibility with the CHECK_OK macro
+ CharacterRange(void* null) { ASSERT_EQ(NULL, null); }
+ CharacterRange(uc32 from, uc32 to, bool is_special)
Lasse Reichstein 2008/10/27 13:12:58 Could you document what "is_special" signifies? (o
Christian Plesner Hansen 2008/10/27 18:57:02 I've renamed it to is_character_class_.
+ : from_(from),
+ to_(to),
+ is_special_(is_special) {
+ // Assert that truncating doesn't throw away information.
+ ASSERT_EQ(from, from_);
+ ASSERT_EQ(to_, to);
+ }
+ static inline CharacterRange Special(uc32 tag) {
+ return CharacterRange(tag, tag, true);
+ }
+ static inline CharacterRange Singleton(uc32 value) {
+ return CharacterRange(value, value, false);
+ }
+ static inline CharacterRange Range(uc32 from, uc32 to) {
+ return CharacterRange(from, to, false);
+ }
+ unsigned from() { return from_; }
Erik Corry 2008/10/27 14:58:44 ASSERT !special?
+ unsigned to() { return to_; }
+ bool is_special() { return is_special_; }
+ bool IsSingleton() { return (from_ == to_) && !is_special_; }
+ private:
+ unsigned from_ : 21;
Erik Corry 2008/10/27 14:58:44 JS Regexps don't handle anything outside the basic
Christian Plesner Hansen 2008/10/27 18:57:02 Yes, and then when we unfold the character classes
+ unsigned to_ : 21;
+ bool is_special_ : 1;
+};
+
+
+STATIC_CHECK(sizeof(CharacterRange) == 2 * sizeof(int));
+
+
+class RegExpCharacterClass: public RegExpTree {
+ public:
+ RegExpCharacterClass(CharacterRange range)
+ : ranges_(new ZoneList<CharacterRange>(1)),
+ is_negated_(false) {
+ ranges_->Add(range);
+ }
+ RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated)
+ : ranges_(ranges),
+ is_negated_(is_negated) { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ ZoneList<CharacterRange>* ranges() { return ranges_; }
+ bool is_negated() { return is_negated_; }
+ private:
+ ZoneList<CharacterRange>* ranges_;
+ bool is_negated_;
+};
+
+
+class RegExpAtom: public RegExpTree {
+ public:
+ RegExpAtom(Vector<const uc16> data) : data_(data) { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ Vector<const uc16> data() { return data_; }
+ private:
+ Vector<const uc16> data_;
+};
+
+
+class RegExpQuantifier: public RegExpTree {
+ public:
+ RegExpQuantifier(int min, int max, bool is_greedy, RegExpTree* body)
+ : min_(min),
+ max_(max),
+ is_greedy_(is_greedy),
+ body_(body) { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ int min() { return min_; }
+ int max() { return max_; }
+ bool is_greedy() { return is_greedy_; }
+ RegExpTree* body() { return body_; }
+ // We just use a very large integer value as infinity because 1^31
+ // is infinite in practice.
+ static const int kInfinity = (1 << 31);
+ private:
+ int min_;
+ int max_;
+ bool is_greedy_;
+ RegExpTree* body_;
+};
+
+
+class RegExpCapture: public RegExpTree {
Lasse Reichstein 2008/10/27 13:12:58 I think keeping the index of the capture in the no
Christian Plesner Hansen 2008/10/27 18:57:02 The ast nodes that I don't yet convert into nodes
+ public:
+ RegExpCapture(RegExpTree* body)
+ : body_(body) { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ RegExpTree* body() { return body_; }
+ private:
+ RegExpTree* body_;
+};
+
+
+class RegExpLookahead: public RegExpTree {
+ public:
+ RegExpLookahead(RegExpTree* body, bool is_positive)
+ : body_(body),
+ is_positive_(is_positive) { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ RegExpTree* body() { return body_; }
+ bool is_positive() { return is_positive_; }
+ private:
+ RegExpTree* body_;
+ bool is_positive_;
+};
+
+
+class RegExpEmpty: public RegExpTree {
+ public:
+ RegExpEmpty() { }
+ virtual void* Accept(RegExpVisitor* visitor, void* data);
+ static RegExpEmpty* GetInstance() { return &kInstance; }
+ private:
+ static RegExpEmpty kInstance;
+};
+
+
+class RegExpVisitor BASE_EMBEDDED {
+ public:
+ virtual ~RegExpVisitor() { }
+#define MAKE_CASE(Name) \
+ virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
+ FOR_EACH_REG_EXP_NODE_TYPE(MAKE_CASE)
+#undef MAKE_CASE
+};
+
+
+// ----------------------------------------------------------------------------
// Basic visitor
// - leaf node visitors are abstract.
« no previous file with comments | « no previous file | src/ast.cc » ('j') | src/ast.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698