Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(289)

Side by Side Diff: src/ast/ast.cc

Issue 1522353002: [regexp] break recursion in mutually recursive capture/back references. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: lazify min/max match computation Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/ast/ast.h ('k') | src/parsing/parser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/ast/ast.h" 5 #include "src/ast/ast.h"
6 6
7 #include <cmath> // For isfinite. 7 #include <cmath> // For isfinite.
8 #include "src/ast/scopes.h" 8 #include "src/ast/scopes.h"
9 #include "src/builtins.h" 9 #include "src/builtins.h"
10 #include "src/code-stubs.h" 10 #include "src/code-stubs.h"
(...skipping 908 matching lines...) Expand 10 before | Expand all | Expand 10 after
919 bool RegExpCapture::IsAnchoredAtStart() { 919 bool RegExpCapture::IsAnchoredAtStart() {
920 return body()->IsAnchoredAtStart(); 920 return body()->IsAnchoredAtStart();
921 } 921 }
922 922
923 923
924 bool RegExpCapture::IsAnchoredAtEnd() { 924 bool RegExpCapture::IsAnchoredAtEnd() {
925 return body()->IsAnchoredAtEnd(); 925 return body()->IsAnchoredAtEnd();
926 } 926 }
927 927
928 928
929 int RegExpBackReference::max_match() {
930 switch (max_match_) {
931 case kRecursionMarker:
932 // The back reference references a capture that references this back
933 // reference, leading to a recursion, e.g. /(\2)(\1)/.
934 // Such captures cannot be satisfied and have the length 0.
erikcorry 2015/12/15 09:45:43 This may well just be a bug in a comment, but here
Yang 2015/12/15 09:52:18 You are right. It should be "Such back references
erikcorry 2015/12/15 10:18:18 My example regexp seems to contradict the revised
935 max_match_ = 0;
936 break;
937 case kUninitialized:
938 // The max match length of a back reference is the length of the capture.
939 // Mark this back reference to break possible recursion before
940 // descending into the capture.
941 max_match_ = kRecursionMarker;
942 // The capture has been parsed.
943 DCHECK_NOT_NULL(capture_->body());
944 max_match_ = capture_->max_match();
945 break;
946 default:
947 // max_match_ has already been found.
948 break;
949 }
950 return max_match_;
951 }
952
953
929 // Convert regular expression trees to a simple sexp representation. 954 // Convert regular expression trees to a simple sexp representation.
930 // This representation should be different from the input grammar 955 // This representation should be different from the input grammar
931 // in as many cases as possible, to make it more difficult for incorrect 956 // in as many cases as possible, to make it more difficult for incorrect
932 // parses to look as correct ones which is likely if the input and 957 // parses to look as correct ones which is likely if the input and
933 // output formats are alike. 958 // output formats are alike.
934 class RegExpUnparser final : public RegExpVisitor { 959 class RegExpUnparser final : public RegExpVisitor {
935 public: 960 public:
936 RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {} 961 RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {}
937 void VisitCharacterRange(CharacterRange that); 962 void VisitCharacterRange(CharacterRange that);
938 #define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override; 963 #define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override;
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after
1084 } 1109 }
1085 1110
1086 1111
1087 std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { // NOLINT 1112 std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { // NOLINT
1088 RegExpUnparser unparser(os, zone); 1113 RegExpUnparser unparser(os, zone);
1089 Accept(&unparser, NULL); 1114 Accept(&unparser, NULL);
1090 return os; 1115 return os;
1091 } 1116 }
1092 1117
1093 1118
1094 RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives) 1119 void RegExpDisjunction::LazilyComputeMatchLengths() {
1095 : alternatives_(alternatives) { 1120 // We cannot compute this in the constructor because at the time of parsing
1096 DCHECK(alternatives->length() > 1); 1121 // back referenced captures have not yet been parsed yet.
1097 RegExpTree* first_alternative = alternatives->at(0); 1122 RegExpTree* first_alternative = alternatives_->at(0);
1098 min_match_ = first_alternative->min_match(); 1123 min_match_ = first_alternative->min_match();
erikcorry 2015/12/15 09:45:43 This looks a bit dodgy. We are not done calculati
Yang 2015/12/15 09:52:18 I think this is fine. We can only have a recursion
1099 max_match_ = first_alternative->max_match(); 1124 max_match_ = first_alternative->max_match();
1100 for (int i = 1; i < alternatives->length(); i++) { 1125 for (int i = 1; i < alternatives_->length(); i++) {
1101 RegExpTree* alternative = alternatives->at(i); 1126 RegExpTree* alternative = alternatives_->at(i);
1102 min_match_ = Min(min_match_, alternative->min_match()); 1127 min_match_ = Min(min_match_, alternative->min_match());
1103 max_match_ = Max(max_match_, alternative->max_match()); 1128 max_match_ = Max(max_match_, alternative->max_match());
1104 } 1129 }
1105 } 1130 }
1106 1131
1107 1132
1108 static int IncreaseBy(int previous, int increase) { 1133 static int IncreaseBy(int previous, int increase) {
1109 if (RegExpTree::kInfinity - previous < increase) { 1134 if (RegExpTree::kInfinity - previous < increase) {
1110 return RegExpTree::kInfinity; 1135 return RegExpTree::kInfinity;
1111 } else { 1136 } else {
1112 return previous + increase; 1137 return previous + increase;
1113 } 1138 }
1114 } 1139 }
1115 1140
1116 RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes) 1141
1117 : nodes_(nodes) { 1142 void RegExpAlternative::LazilyComputeMatchLengths() {
1118 DCHECK(nodes->length() > 1); 1143 // We cannot compute this in the constructor because at the time of parsing
1144 // back referenced captures have not yet been parsed yet.
1119 min_match_ = 0; 1145 min_match_ = 0;
1120 max_match_ = 0; 1146 max_match_ = 0;
1121 for (int i = 0; i < nodes->length(); i++) { 1147 for (int i = 0; i < nodes_->length(); i++) {
1122 RegExpTree* node = nodes->at(i); 1148 RegExpTree* node = nodes_->at(i);
1123 int node_min_match = node->min_match(); 1149 int node_min_match = node->min_match();
1124 min_match_ = IncreaseBy(min_match_, node_min_match); 1150 min_match_ = IncreaseBy(min_match_, node_min_match);
1125 int node_max_match = node->max_match(); 1151 int node_max_match = node->max_match();
1126 max_match_ = IncreaseBy(max_match_, node_max_match); 1152 max_match_ = IncreaseBy(max_match_, node_max_match);
1127 } 1153 }
1128 } 1154 }
1129 1155
1130 1156
1157 void RegExpQuantifier::LazilyComputeMatchLengths() {
1158 // We cannot compute this in the constructor because at the time of parsing
1159 // back referenced captures have not yet been parsed yet.
1160 min_match_ = min_ * body_->min_match();
1161 if (max_ > 0 && body_->max_match() > kInfinity / max_) {
1162 max_match_ = kInfinity;
1163 } else {
1164 max_match_ = max_ * body_->max_match();
1165 }
1166 }
1167
1168
1131 CaseClause::CaseClause(Zone* zone, Expression* label, 1169 CaseClause::CaseClause(Zone* zone, Expression* label,
1132 ZoneList<Statement*>* statements, int pos) 1170 ZoneList<Statement*>* statements, int pos)
1133 : Expression(zone, pos), 1171 : Expression(zone, pos),
1134 label_(label), 1172 label_(label),
1135 statements_(statements), 1173 statements_(statements),
1136 compare_type_(Type::None(zone)) {} 1174 compare_type_(Type::None(zone)) {}
1137 1175
1138 1176
1139 uint32_t Literal::Hash() { 1177 uint32_t Literal::Hash() {
1140 return raw_value()->IsString() 1178 return raw_value()->IsString()
1141 ? raw_value()->AsString()->hash() 1179 ? raw_value()->AsString()->hash()
1142 : ComputeLongHash(double_to_uint64(raw_value()->AsNumber())); 1180 : ComputeLongHash(double_to_uint64(raw_value()->AsNumber()));
1143 } 1181 }
1144 1182
1145 1183
1146 // static 1184 // static
1147 bool Literal::Match(void* literal1, void* literal2) { 1185 bool Literal::Match(void* literal1, void* literal2) {
1148 const AstValue* x = static_cast<Literal*>(literal1)->raw_value(); 1186 const AstValue* x = static_cast<Literal*>(literal1)->raw_value();
1149 const AstValue* y = static_cast<Literal*>(literal2)->raw_value(); 1187 const AstValue* y = static_cast<Literal*>(literal2)->raw_value();
1150 return (x->IsString() && y->IsString() && x->AsString() == y->AsString()) || 1188 return (x->IsString() && y->IsString() && x->AsString() == y->AsString()) ||
1151 (x->IsNumber() && y->IsNumber() && x->AsNumber() == y->AsNumber()); 1189 (x->IsNumber() && y->IsNumber() && x->AsNumber() == y->AsNumber());
1152 } 1190 }
1153 1191
1154 1192
1155 } // namespace internal 1193 } // namespace internal
1156 } // namespace v8 1194 } // namespace v8
OLDNEW
« no previous file with comments | « src/ast/ast.h ('k') | src/parsing/parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698