Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(320)

Side by Side Diff: src/ast/ast.h

Issue 1522353002: [regexp] break recursion in mutually recursive capture/back references. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: lazify min/max match computation Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/ast/ast.cc » ('j') | src/ast/ast.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_AST_AST_H_ 5 #ifndef V8_AST_AST_H_
6 #define V8_AST_AST_H_ 6 #define V8_AST_AST_H_
7 7
8 #include "src/assembler.h" 8 #include "src/assembler.h"
9 #include "src/ast/ast-value-factory.h" 9 #include "src/ast/ast-value-factory.h"
10 #include "src/ast/modules.h" 10 #include "src/ast/modules.h"
(...skipping 2856 matching lines...) Expand 10 before | Expand all | Expand 10 after
2867 #define MAKE_CASE(Name) \ 2867 #define MAKE_CASE(Name) \
2868 virtual void* Visit##Name(RegExp##Name*, void* data) = 0; 2868 virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
2869 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) 2869 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
2870 #undef MAKE_CASE 2870 #undef MAKE_CASE
2871 }; 2871 };
2872 2872
2873 2873
2874 class RegExpTree : public ZoneObject { 2874 class RegExpTree : public ZoneObject {
2875 public: 2875 public:
2876 static const int kInfinity = kMaxInt; 2876 static const int kInfinity = kMaxInt;
2877 static const int kUninitialized = -1;
2877 virtual ~RegExpTree() {} 2878 virtual ~RegExpTree() {}
2878 virtual void* Accept(RegExpVisitor* visitor, void* data) = 0; 2879 virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
2879 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 2880 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
2880 RegExpNode* on_success) = 0; 2881 RegExpNode* on_success) = 0;
2881 virtual bool IsTextElement() { return false; } 2882 virtual bool IsTextElement() { return false; }
2882 virtual bool IsAnchoredAtStart() { return false; } 2883 virtual bool IsAnchoredAtStart() { return false; }
2883 virtual bool IsAnchoredAtEnd() { return false; } 2884 virtual bool IsAnchoredAtEnd() { return false; }
2884 virtual int min_match() = 0; 2885 virtual int min_match() = 0;
2885 virtual int max_match() = 0; 2886 virtual int max_match() = 0;
2886 // Returns the interval of registers used for captures within this 2887 // Returns the interval of registers used for captures within this
2887 // expression. 2888 // expression.
2888 virtual Interval CaptureRegisters() { return Interval::Empty(); } 2889 virtual Interval CaptureRegisters() { return Interval::Empty(); }
2889 virtual void AppendToText(RegExpText* text, Zone* zone); 2890 virtual void AppendToText(RegExpText* text, Zone* zone);
2890 std::ostream& Print(std::ostream& os, Zone* zone); // NOLINT 2891 std::ostream& Print(std::ostream& os, Zone* zone); // NOLINT
2891 #define MAKE_ASTYPE(Name) \ 2892 #define MAKE_ASTYPE(Name) \
2892 virtual RegExp##Name* As##Name(); \ 2893 virtual RegExp##Name* As##Name(); \
2893 virtual bool Is##Name(); 2894 virtual bool Is##Name();
2894 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE) 2895 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
2895 #undef MAKE_ASTYPE 2896 #undef MAKE_ASTYPE
2896 }; 2897 };
2897 2898
2898 2899
2899 class RegExpDisjunction final : public RegExpTree { 2900 class RegExpDisjunction final : public RegExpTree {
2900 public: 2901 public:
2901 explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives); 2902 explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
2903 : alternatives_(alternatives),
2904 min_match_(kUninitialized),
2905 max_match_(kUninitialized) {
2906 DCHECK(alternatives->length() > 1);
2907 }
2902 void* Accept(RegExpVisitor* visitor, void* data) override; 2908 void* Accept(RegExpVisitor* visitor, void* data) override;
2903 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 2909 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
2904 RegExpDisjunction* AsDisjunction() override; 2910 RegExpDisjunction* AsDisjunction() override;
2905 Interval CaptureRegisters() override; 2911 Interval CaptureRegisters() override;
2906 bool IsDisjunction() override; 2912 bool IsDisjunction() override;
2907 bool IsAnchoredAtStart() override; 2913 bool IsAnchoredAtStart() override;
2908 bool IsAnchoredAtEnd() override; 2914 bool IsAnchoredAtEnd() override;
2909 int min_match() override { return min_match_; } 2915 int min_match() override {
2910 int max_match() override { return max_match_; } 2916 if (min_match_ == kUninitialized) LazilyComputeMatchLengths();
2917 return min_match_;
2918 }
2919 int max_match() override {
2920 if (max_match_ == kUninitialized) LazilyComputeMatchLengths();
2921 return max_match_;
2922 }
2911 ZoneList<RegExpTree*>* alternatives() { return alternatives_; } 2923 ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
2924
2912 private: 2925 private:
2926 void LazilyComputeMatchLengths();
2913 bool SortConsecutiveAtoms(RegExpCompiler* compiler); 2927 bool SortConsecutiveAtoms(RegExpCompiler* compiler);
2914 void RationalizeConsecutiveAtoms(RegExpCompiler* compiler); 2928 void RationalizeConsecutiveAtoms(RegExpCompiler* compiler);
2915 void FixSingleCharacterDisjunctions(RegExpCompiler* compiler); 2929 void FixSingleCharacterDisjunctions(RegExpCompiler* compiler);
2916 ZoneList<RegExpTree*>* alternatives_; 2930 ZoneList<RegExpTree*>* alternatives_;
2917 int min_match_; 2931 int min_match_;
2918 int max_match_; 2932 int max_match_;
2919 }; 2933 };
2920 2934
2921 2935
2922 class RegExpAlternative final : public RegExpTree { 2936 class RegExpAlternative final : public RegExpTree {
2923 public: 2937 public:
2924 explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes); 2938 explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes)
2939 : nodes_(nodes), min_match_(kUninitialized), max_match_(kUninitialized) {
2940 DCHECK(nodes->length() > 1);
2941 }
2925 void* Accept(RegExpVisitor* visitor, void* data) override; 2942 void* Accept(RegExpVisitor* visitor, void* data) override;
2926 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 2943 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
2927 RegExpAlternative* AsAlternative() override; 2944 RegExpAlternative* AsAlternative() override;
2928 Interval CaptureRegisters() override; 2945 Interval CaptureRegisters() override;
2929 bool IsAlternative() override; 2946 bool IsAlternative() override;
2930 bool IsAnchoredAtStart() override; 2947 bool IsAnchoredAtStart() override;
2931 bool IsAnchoredAtEnd() override; 2948 bool IsAnchoredAtEnd() override;
2932 int min_match() override { return min_match_; } 2949 int min_match() override {
2933 int max_match() override { return max_match_; } 2950 if (min_match_ == kUninitialized) LazilyComputeMatchLengths();
2951 return min_match_;
2952 }
2953 int max_match() override {
2954 if (max_match_ == kUninitialized) LazilyComputeMatchLengths();
2955 return max_match_;
2956 }
2934 ZoneList<RegExpTree*>* nodes() { return nodes_; } 2957 ZoneList<RegExpTree*>* nodes() { return nodes_; }
2958
2935 private: 2959 private:
2960 void LazilyComputeMatchLengths();
2936 ZoneList<RegExpTree*>* nodes_; 2961 ZoneList<RegExpTree*>* nodes_;
2937 int min_match_; 2962 int min_match_;
2938 int max_match_; 2963 int max_match_;
2939 }; 2964 };
2940 2965
2941 2966
2942 class RegExpAssertion final : public RegExpTree { 2967 class RegExpAssertion final : public RegExpTree {
2943 public: 2968 public:
2944 enum AssertionType { 2969 enum AssertionType {
2945 START_OF_LINE, 2970 START_OF_LINE,
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
3068 }; 3093 };
3069 3094
3070 3095
3071 class RegExpQuantifier final : public RegExpTree { 3096 class RegExpQuantifier final : public RegExpTree {
3072 public: 3097 public:
3073 enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE }; 3098 enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
3074 RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body) 3099 RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body)
3075 : body_(body), 3100 : body_(body),
3076 min_(min), 3101 min_(min),
3077 max_(max), 3102 max_(max),
3078 min_match_(min * body->min_match()), 3103 min_match_(kUninitialized),
3079 quantifier_type_(type) { 3104 max_match_(kUninitialized),
3080 if (max > 0 && body->max_match() > kInfinity / max) { 3105 quantifier_type_(type) {}
3081 max_match_ = kInfinity;
3082 } else {
3083 max_match_ = max * body->max_match();
3084 }
3085 }
3086 void* Accept(RegExpVisitor* visitor, void* data) override; 3106 void* Accept(RegExpVisitor* visitor, void* data) override;
3087 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 3107 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
3088 static RegExpNode* ToNode(int min, 3108 static RegExpNode* ToNode(int min,
3089 int max, 3109 int max,
3090 bool is_greedy, 3110 bool is_greedy,
3091 RegExpTree* body, 3111 RegExpTree* body,
3092 RegExpCompiler* compiler, 3112 RegExpCompiler* compiler,
3093 RegExpNode* on_success, 3113 RegExpNode* on_success,
3094 bool not_at_start = false); 3114 bool not_at_start = false);
3095 RegExpQuantifier* AsQuantifier() override; 3115 RegExpQuantifier* AsQuantifier() override;
3096 Interval CaptureRegisters() override; 3116 Interval CaptureRegisters() override;
3097 bool IsQuantifier() override; 3117 bool IsQuantifier() override;
3098 int min_match() override { return min_match_; } 3118 int min_match() override {
3099 int max_match() override { return max_match_; } 3119 if (min_match_ == kUninitialized) LazilyComputeMatchLengths();
3120 return min_match_;
3121 }
3122 int max_match() override {
3123 if (max_match_ == kUninitialized) LazilyComputeMatchLengths();
3124 return max_match_;
3125 }
3100 int min() { return min_; } 3126 int min() { return min_; }
3101 int max() { return max_; } 3127 int max() { return max_; }
3102 bool is_possessive() { return quantifier_type_ == POSSESSIVE; } 3128 bool is_possessive() { return quantifier_type_ == POSSESSIVE; }
3103 bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; } 3129 bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; }
3104 bool is_greedy() { return quantifier_type_ == GREEDY; } 3130 bool is_greedy() { return quantifier_type_ == GREEDY; }
3105 RegExpTree* body() { return body_; } 3131 RegExpTree* body() { return body_; }
3106 3132
3107 private: 3133 private:
3134 void LazilyComputeMatchLengths();
3108 RegExpTree* body_; 3135 RegExpTree* body_;
3109 int min_; 3136 int min_;
3110 int max_; 3137 int max_;
3111 int min_match_; 3138 int min_match_;
3112 int max_match_; 3139 int max_match_;
3113 QuantifierType quantifier_type_; 3140 QuantifierType quantifier_type_;
3114 }; 3141 };
3115 3142
3116 3143
3117 class RegExpCapture final : public RegExpTree { 3144 class RegExpCapture final : public RegExpTree {
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
3173 bool is_positive_; 3200 bool is_positive_;
3174 int capture_count_; 3201 int capture_count_;
3175 int capture_from_; 3202 int capture_from_;
3176 Type type_; 3203 Type type_;
3177 }; 3204 };
3178 3205
3179 3206
3180 class RegExpBackReference final : public RegExpTree { 3207 class RegExpBackReference final : public RegExpTree {
3181 public: 3208 public:
3182 explicit RegExpBackReference(RegExpCapture* capture) 3209 explicit RegExpBackReference(RegExpCapture* capture)
3183 : capture_(capture) { } 3210 : capture_(capture), max_match_(kUninitialized) {}
3184 void* Accept(RegExpVisitor* visitor, void* data) override; 3211 void* Accept(RegExpVisitor* visitor, void* data) override;
3185 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 3212 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
3186 RegExpBackReference* AsBackReference() override; 3213 RegExpBackReference* AsBackReference() override;
3187 bool IsBackReference() override; 3214 bool IsBackReference() override;
3188 int min_match() override { return 0; } 3215 int min_match() override { return 0; }
3189 // The capture may not be completely parsed yet, if the reference occurs 3216 int max_match() override;
3190 // before the capture. In the ordinary case, nothing has been captured yet,
3191 // so the back reference must have the length 0. If the back reference is
3192 // inside a lookbehind, effectively making it a forward reference, we return
3193 // 0 since lookbehinds have a length of 0.
3194 int max_match() override {
3195 return capture_->body() ? capture_->max_match() : 0;
3196 }
3197 int index() { return capture_->index(); } 3217 int index() { return capture_->index(); }
3198 RegExpCapture* capture() { return capture_; } 3218 RegExpCapture* capture() { return capture_; }
3199 private: 3219 private:
3220 static const int kRecursionMarker = -2;
3221 STATIC_ASSERT(kRecursionMarker != kUninitialized);
3222
3200 RegExpCapture* capture_; 3223 RegExpCapture* capture_;
3224 int max_match_;
3201 }; 3225 };
3202 3226
3203 3227
3204 class RegExpEmpty final : public RegExpTree { 3228 class RegExpEmpty final : public RegExpTree {
3205 public: 3229 public:
3206 RegExpEmpty() { } 3230 RegExpEmpty() { }
3207 void* Accept(RegExpVisitor* visitor, void* data) override; 3231 void* Accept(RegExpVisitor* visitor, void* data) override;
3208 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 3232 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
3209 RegExpEmpty* AsEmpty() override; 3233 RegExpEmpty* AsEmpty() override;
3210 bool IsEmpty() override; 3234 bool IsEmpty() override;
(...skipping 504 matching lines...) Expand 10 before | Expand all | Expand 10 after
3715 // the parser-level zone. 3739 // the parser-level zone.
3716 Zone* parser_zone_; 3740 Zone* parser_zone_;
3717 AstValueFactory* ast_value_factory_; 3741 AstValueFactory* ast_value_factory_;
3718 }; 3742 };
3719 3743
3720 3744
3721 } // namespace internal 3745 } // namespace internal
3722 } // namespace v8 3746 } // namespace v8
3723 3747
3724 #endif // V8_AST_AST_H_ 3748 #endif // V8_AST_AST_H_
OLDNEW
« no previous file with comments | « no previous file | src/ast/ast.cc » ('j') | src/ast/ast.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698