Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(215)

Side by Side Diff: src/ast/ast.h

Issue 1565183002: [regexp] move regexp parser into own files. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: fix test compile Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « BUILD.gn ('k') | src/ast/ast.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_AST_AST_H_ 5 #ifndef V8_AST_AST_H_
6 #define V8_AST_AST_H_ 6 #define V8_AST_AST_H_
7 7
8 #include "src/assembler.h" 8 #include "src/assembler.h"
9 #include "src/ast/ast-value-factory.h" 9 #include "src/ast/ast-value-factory.h"
10 #include "src/ast/modules.h" 10 #include "src/ast/modules.h"
11 #include "src/ast/variables.h" 11 #include "src/ast/variables.h"
12 #include "src/bailout-reason.h" 12 #include "src/bailout-reason.h"
13 #include "src/base/flags.h" 13 #include "src/base/flags.h"
14 #include "src/base/smart-pointers.h" 14 #include "src/base/smart-pointers.h"
15 #include "src/factory.h" 15 #include "src/factory.h"
16 #include "src/isolate.h" 16 #include "src/isolate.h"
17 #include "src/list.h" 17 #include "src/list.h"
18 #include "src/parsing/token.h" 18 #include "src/parsing/token.h"
19 #include "src/regexp/jsregexp.h"
20 #include "src/runtime/runtime.h" 19 #include "src/runtime/runtime.h"
21 #include "src/small-pointer-list.h" 20 #include "src/small-pointer-list.h"
22 #include "src/types.h" 21 #include "src/types.h"
23 #include "src/utils.h" 22 #include "src/utils.h"
24 23
25 namespace v8 { 24 namespace v8 {
26 namespace internal { 25 namespace internal {
27 26
28 // The abstract syntax tree is an intermediate, light-weight 27 // The abstract syntax tree is an intermediate, light-weight
29 // representation of the parsed JavaScript code suitable for 28 // representation of the parsed JavaScript code suitable for
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
104 class AstVisitor; 103 class AstVisitor;
105 class Declaration; 104 class Declaration;
106 class Module; 105 class Module;
107 class BreakableStatement; 106 class BreakableStatement;
108 class Expression; 107 class Expression;
109 class IterationStatement; 108 class IterationStatement;
110 class MaterializedLiteral; 109 class MaterializedLiteral;
111 class Statement; 110 class Statement;
112 class TypeFeedbackOracle; 111 class TypeFeedbackOracle;
113 112
114 class RegExpAlternative;
115 class RegExpAssertion;
116 class RegExpAtom;
117 class RegExpBackReference;
118 class RegExpCapture;
119 class RegExpCharacterClass;
120 class RegExpCompiler;
121 class RegExpDisjunction;
122 class RegExpEmpty;
123 class RegExpLookaround;
124 class RegExpQuantifier;
125 class RegExpText;
126
127 #define DEF_FORWARD_DECLARATION(type) class type; 113 #define DEF_FORWARD_DECLARATION(type) class type;
128 AST_NODE_LIST(DEF_FORWARD_DECLARATION) 114 AST_NODE_LIST(DEF_FORWARD_DECLARATION)
129 #undef DEF_FORWARD_DECLARATION 115 #undef DEF_FORWARD_DECLARATION
130 116
131 117
132 // Typedef only introduced to avoid unreadable code. 118 // Typedef only introduced to avoid unreadable code.
133 typedef ZoneList<Handle<String>> ZoneStringList; 119 typedef ZoneList<Handle<String>> ZoneStringList;
134 typedef ZoneList<Handle<Object>> ZoneObjectList; 120 typedef ZoneList<Handle<Object>> ZoneObjectList;
135 121
136 122
(...skipping 2773 matching lines...) Expand 10 before | Expand all | Expand 10 after
2910 2896
2911 private: 2897 private:
2912 EmptyParentheses(Zone* zone, int pos) : Expression(zone, pos) {} 2898 EmptyParentheses(Zone* zone, int pos) : Expression(zone, pos) {}
2913 }; 2899 };
2914 2900
2915 2901
2916 #undef DECLARE_NODE_TYPE 2902 #undef DECLARE_NODE_TYPE
2917 2903
2918 2904
2919 // ---------------------------------------------------------------------------- 2905 // ----------------------------------------------------------------------------
2920 // Regular expressions
2921
2922
2923 class RegExpVisitor BASE_EMBEDDED {
2924 public:
2925 virtual ~RegExpVisitor() { }
2926 #define MAKE_CASE(Name) \
2927 virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
2928 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
2929 #undef MAKE_CASE
2930 };
2931
2932
2933 class RegExpTree : public ZoneObject {
2934 public:
2935 static const int kInfinity = kMaxInt;
2936 virtual ~RegExpTree() {}
2937 virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
2938 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
2939 RegExpNode* on_success) = 0;
2940 virtual bool IsTextElement() { return false; }
2941 virtual bool IsAnchoredAtStart() { return false; }
2942 virtual bool IsAnchoredAtEnd() { return false; }
2943 virtual int min_match() = 0;
2944 virtual int max_match() = 0;
2945 // Returns the interval of registers used for captures within this
2946 // expression.
2947 virtual Interval CaptureRegisters() { return Interval::Empty(); }
2948 virtual void AppendToText(RegExpText* text, Zone* zone);
2949 std::ostream& Print(std::ostream& os, Zone* zone); // NOLINT
2950 #define MAKE_ASTYPE(Name) \
2951 virtual RegExp##Name* As##Name(); \
2952 virtual bool Is##Name();
2953 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
2954 #undef MAKE_ASTYPE
2955 };
2956
2957
2958 class RegExpDisjunction final : public RegExpTree {
2959 public:
2960 explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives);
2961 void* Accept(RegExpVisitor* visitor, void* data) override;
2962 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
2963 RegExpDisjunction* AsDisjunction() override;
2964 Interval CaptureRegisters() override;
2965 bool IsDisjunction() override;
2966 bool IsAnchoredAtStart() override;
2967 bool IsAnchoredAtEnd() override;
2968 int min_match() override { return min_match_; }
2969 int max_match() override { return max_match_; }
2970 ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
2971 private:
2972 bool SortConsecutiveAtoms(RegExpCompiler* compiler);
2973 void RationalizeConsecutiveAtoms(RegExpCompiler* compiler);
2974 void FixSingleCharacterDisjunctions(RegExpCompiler* compiler);
2975 ZoneList<RegExpTree*>* alternatives_;
2976 int min_match_;
2977 int max_match_;
2978 };
2979
2980
2981 class RegExpAlternative final : public RegExpTree {
2982 public:
2983 explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes);
2984 void* Accept(RegExpVisitor* visitor, void* data) override;
2985 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
2986 RegExpAlternative* AsAlternative() override;
2987 Interval CaptureRegisters() override;
2988 bool IsAlternative() override;
2989 bool IsAnchoredAtStart() override;
2990 bool IsAnchoredAtEnd() override;
2991 int min_match() override { return min_match_; }
2992 int max_match() override { return max_match_; }
2993 ZoneList<RegExpTree*>* nodes() { return nodes_; }
2994 private:
2995 ZoneList<RegExpTree*>* nodes_;
2996 int min_match_;
2997 int max_match_;
2998 };
2999
3000
3001 class RegExpAssertion final : public RegExpTree {
3002 public:
3003 enum AssertionType {
3004 START_OF_LINE,
3005 START_OF_INPUT,
3006 END_OF_LINE,
3007 END_OF_INPUT,
3008 BOUNDARY,
3009 NON_BOUNDARY
3010 };
3011 explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { }
3012 void* Accept(RegExpVisitor* visitor, void* data) override;
3013 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
3014 RegExpAssertion* AsAssertion() override;
3015 bool IsAssertion() override;
3016 bool IsAnchoredAtStart() override;
3017 bool IsAnchoredAtEnd() override;
3018 int min_match() override { return 0; }
3019 int max_match() override { return 0; }
3020 AssertionType assertion_type() { return assertion_type_; }
3021 private:
3022 AssertionType assertion_type_;
3023 };
3024
3025
3026 class CharacterSet final BASE_EMBEDDED {
3027 public:
3028 explicit CharacterSet(uc16 standard_set_type)
3029 : ranges_(NULL),
3030 standard_set_type_(standard_set_type) {}
3031 explicit CharacterSet(ZoneList<CharacterRange>* ranges)
3032 : ranges_(ranges),
3033 standard_set_type_(0) {}
3034 ZoneList<CharacterRange>* ranges(Zone* zone);
3035 uc16 standard_set_type() { return standard_set_type_; }
3036 void set_standard_set_type(uc16 special_set_type) {
3037 standard_set_type_ = special_set_type;
3038 }
3039 bool is_standard() { return standard_set_type_ != 0; }
3040 void Canonicalize();
3041 private:
3042 ZoneList<CharacterRange>* ranges_;
3043 // If non-zero, the value represents a standard set (e.g., all whitespace
3044 // characters) without having to expand the ranges.
3045 uc16 standard_set_type_;
3046 };
3047
3048
3049 class RegExpCharacterClass final : public RegExpTree {
3050 public:
3051 RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated)
3052 : set_(ranges),
3053 is_negated_(is_negated) { }
3054 explicit RegExpCharacterClass(uc16 type)
3055 : set_(type),
3056 is_negated_(false) { }
3057 void* Accept(RegExpVisitor* visitor, void* data) override;
3058 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
3059 RegExpCharacterClass* AsCharacterClass() override;
3060 bool IsCharacterClass() override;
3061 bool IsTextElement() override { return true; }
3062 int min_match() override { return 1; }
3063 int max_match() override { return 1; }
3064 void AppendToText(RegExpText* text, Zone* zone) override;
3065 CharacterSet character_set() { return set_; }
3066 // TODO(lrn): Remove need for complex version if is_standard that
3067 // recognizes a mangled standard set and just do { return set_.is_special(); }
3068 bool is_standard(Zone* zone);
3069 // Returns a value representing the standard character set if is_standard()
3070 // returns true.
3071 // Currently used values are:
3072 // s : unicode whitespace
3073 // S : unicode non-whitespace
3074 // w : ASCII word character (digit, letter, underscore)
3075 // W : non-ASCII word character
3076 // d : ASCII digit
3077 // D : non-ASCII digit
3078 // . : non-unicode non-newline
3079 // * : All characters
3080 uc16 standard_type() { return set_.standard_set_type(); }
3081 ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
3082 bool is_negated() { return is_negated_; }
3083
3084 private:
3085 CharacterSet set_;
3086 bool is_negated_;
3087 };
3088
3089
3090 class RegExpAtom final : public RegExpTree {
3091 public:
3092 explicit RegExpAtom(Vector<const uc16> data) : data_(data) { }
3093 void* Accept(RegExpVisitor* visitor, void* data) override;
3094 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
3095 RegExpAtom* AsAtom() override;
3096 bool IsAtom() override;
3097 bool IsTextElement() override { return true; }
3098 int min_match() override { return data_.length(); }
3099 int max_match() override { return data_.length(); }
3100 void AppendToText(RegExpText* text, Zone* zone) override;
3101 Vector<const uc16> data() { return data_; }
3102 int length() { return data_.length(); }
3103 private:
3104 Vector<const uc16> data_;
3105 };
3106
3107
3108 class RegExpText final : public RegExpTree {
3109 public:
3110 explicit RegExpText(Zone* zone) : elements_(2, zone), length_(0) {}
3111 void* Accept(RegExpVisitor* visitor, void* data) override;
3112 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
3113 RegExpText* AsText() override;
3114 bool IsText() override;
3115 bool IsTextElement() override { return true; }
3116 int min_match() override { return length_; }
3117 int max_match() override { return length_; }
3118 void AppendToText(RegExpText* text, Zone* zone) override;
3119 void AddElement(TextElement elm, Zone* zone) {
3120 elements_.Add(elm, zone);
3121 length_ += elm.length();
3122 }
3123 ZoneList<TextElement>* elements() { return &elements_; }
3124 private:
3125 ZoneList<TextElement> elements_;
3126 int length_;
3127 };
3128
3129
3130 class RegExpQuantifier final : public RegExpTree {
3131 public:
3132 enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
3133 RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body)
3134 : body_(body),
3135 min_(min),
3136 max_(max),
3137 min_match_(min * body->min_match()),
3138 quantifier_type_(type) {
3139 if (max > 0 && body->max_match() > kInfinity / max) {
3140 max_match_ = kInfinity;
3141 } else {
3142 max_match_ = max * body->max_match();
3143 }
3144 }
3145 void* Accept(RegExpVisitor* visitor, void* data) override;
3146 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
3147 static RegExpNode* ToNode(int min,
3148 int max,
3149 bool is_greedy,
3150 RegExpTree* body,
3151 RegExpCompiler* compiler,
3152 RegExpNode* on_success,
3153 bool not_at_start = false);
3154 RegExpQuantifier* AsQuantifier() override;
3155 Interval CaptureRegisters() override;
3156 bool IsQuantifier() override;
3157 int min_match() override { return min_match_; }
3158 int max_match() override { return max_match_; }
3159 int min() { return min_; }
3160 int max() { return max_; }
3161 bool is_possessive() { return quantifier_type_ == POSSESSIVE; }
3162 bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; }
3163 bool is_greedy() { return quantifier_type_ == GREEDY; }
3164 RegExpTree* body() { return body_; }
3165
3166 private:
3167 RegExpTree* body_;
3168 int min_;
3169 int max_;
3170 int min_match_;
3171 int max_match_;
3172 QuantifierType quantifier_type_;
3173 };
3174
3175
3176 class RegExpCapture final : public RegExpTree {
3177 public:
3178 explicit RegExpCapture(int index) : body_(NULL), index_(index) {}
3179 void* Accept(RegExpVisitor* visitor, void* data) override;
3180 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
3181 static RegExpNode* ToNode(RegExpTree* body,
3182 int index,
3183 RegExpCompiler* compiler,
3184 RegExpNode* on_success);
3185 RegExpCapture* AsCapture() override;
3186 bool IsAnchoredAtStart() override;
3187 bool IsAnchoredAtEnd() override;
3188 Interval CaptureRegisters() override;
3189 bool IsCapture() override;
3190 int min_match() override { return body_->min_match(); }
3191 int max_match() override { return body_->max_match(); }
3192 RegExpTree* body() { return body_; }
3193 void set_body(RegExpTree* body) { body_ = body; }
3194 int index() { return index_; }
3195 static int StartRegister(int index) { return index * 2; }
3196 static int EndRegister(int index) { return index * 2 + 1; }
3197
3198 private:
3199 RegExpTree* body_;
3200 int index_;
3201 };
3202
3203
3204 class RegExpLookaround final : public RegExpTree {
3205 public:
3206 enum Type { LOOKAHEAD, LOOKBEHIND };
3207
3208 RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count,
3209 int capture_from, Type type)
3210 : body_(body),
3211 is_positive_(is_positive),
3212 capture_count_(capture_count),
3213 capture_from_(capture_from),
3214 type_(type) {}
3215
3216 void* Accept(RegExpVisitor* visitor, void* data) override;
3217 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
3218 RegExpLookaround* AsLookaround() override;
3219 Interval CaptureRegisters() override;
3220 bool IsLookaround() override;
3221 bool IsAnchoredAtStart() override;
3222 int min_match() override { return 0; }
3223 int max_match() override { return 0; }
3224 RegExpTree* body() { return body_; }
3225 bool is_positive() { return is_positive_; }
3226 int capture_count() { return capture_count_; }
3227 int capture_from() { return capture_from_; }
3228 Type type() { return type_; }
3229
3230 private:
3231 RegExpTree* body_;
3232 bool is_positive_;
3233 int capture_count_;
3234 int capture_from_;
3235 Type type_;
3236 };
3237
3238
3239 class RegExpBackReference final : public RegExpTree {
3240 public:
3241 explicit RegExpBackReference(RegExpCapture* capture)
3242 : capture_(capture) { }
3243 void* Accept(RegExpVisitor* visitor, void* data) override;
3244 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
3245 RegExpBackReference* AsBackReference() override;
3246 bool IsBackReference() override;
3247 int min_match() override { return 0; }
3248 // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite
3249 // recursion, we give up. Ignorance is bliss.
3250 int max_match() override { return kInfinity; }
3251 int index() { return capture_->index(); }
3252 RegExpCapture* capture() { return capture_; }
3253 private:
3254 RegExpCapture* capture_;
3255 };
3256
3257
3258 class RegExpEmpty final : public RegExpTree {
3259 public:
3260 RegExpEmpty() { }
3261 void* Accept(RegExpVisitor* visitor, void* data) override;
3262 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
3263 RegExpEmpty* AsEmpty() override;
3264 bool IsEmpty() override;
3265 int min_match() override { return 0; }
3266 int max_match() override { return 0; }
3267 };
3268
3269
3270 // ----------------------------------------------------------------------------
3271 // Basic visitor 2906 // Basic visitor
3272 // - leaf node visitors are abstract. 2907 // - leaf node visitors are abstract.
3273 2908
3274 class AstVisitor BASE_EMBEDDED { 2909 class AstVisitor BASE_EMBEDDED {
3275 public: 2910 public:
3276 AstVisitor() {} 2911 AstVisitor() {}
3277 virtual ~AstVisitor() {} 2912 virtual ~AstVisitor() {}
3278 2913
3279 // Stack overflow check and dynamic dispatch. 2914 // Stack overflow check and dynamic dispatch.
3280 virtual void Visit(AstNode* node) = 0; 2915 virtual void Visit(AstNode* node) = 0;
(...skipping 488 matching lines...) Expand 10 before | Expand all | Expand 10 after
3769 // the parser-level zone. 3404 // the parser-level zone.
3770 Zone* parser_zone_; 3405 Zone* parser_zone_;
3771 AstValueFactory* ast_value_factory_; 3406 AstValueFactory* ast_value_factory_;
3772 }; 3407 };
3773 3408
3774 3409
3775 } // namespace internal 3410 } // namespace internal
3776 } // namespace v8 3411 } // namespace v8
3777 3412
3778 #endif // V8_AST_AST_H_ 3413 #endif // V8_AST_AST_H_
OLDNEW
« no previous file with comments | « BUILD.gn ('k') | src/ast/ast.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698