OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "src/ostreams.h" |
| 6 #include "src/regexp/regexp-ast.h" |
| 7 |
| 8 namespace v8 { |
| 9 namespace internal { |
| 10 |
| 11 #define MAKE_ACCEPT(Name) \ |
| 12 void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \ |
| 13 return visitor->Visit##Name(this, data); \ |
| 14 } |
| 15 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT) |
| 16 #undef MAKE_ACCEPT |
| 17 |
| 18 #define MAKE_TYPE_CASE(Name) \ |
| 19 RegExp##Name* RegExpTree::As##Name() { return NULL; } \ |
| 20 bool RegExpTree::Is##Name() { return false; } |
| 21 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) |
| 22 #undef MAKE_TYPE_CASE |
| 23 |
| 24 #define MAKE_TYPE_CASE(Name) \ |
| 25 RegExp##Name* RegExp##Name::As##Name() { return this; } \ |
| 26 bool RegExp##Name::Is##Name() { return true; } |
| 27 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) |
| 28 #undef MAKE_TYPE_CASE |
| 29 |
| 30 |
| 31 static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) { |
| 32 Interval result = Interval::Empty(); |
| 33 for (int i = 0; i < children->length(); i++) |
| 34 result = result.Union(children->at(i)->CaptureRegisters()); |
| 35 return result; |
| 36 } |
| 37 |
| 38 |
| 39 Interval RegExpAlternative::CaptureRegisters() { |
| 40 return ListCaptureRegisters(nodes()); |
| 41 } |
| 42 |
| 43 |
| 44 Interval RegExpDisjunction::CaptureRegisters() { |
| 45 return ListCaptureRegisters(alternatives()); |
| 46 } |
| 47 |
| 48 |
| 49 Interval RegExpLookaround::CaptureRegisters() { |
| 50 return body()->CaptureRegisters(); |
| 51 } |
| 52 |
| 53 |
| 54 Interval RegExpCapture::CaptureRegisters() { |
| 55 Interval self(StartRegister(index()), EndRegister(index())); |
| 56 return self.Union(body()->CaptureRegisters()); |
| 57 } |
| 58 |
| 59 |
| 60 Interval RegExpQuantifier::CaptureRegisters() { |
| 61 return body()->CaptureRegisters(); |
| 62 } |
| 63 |
| 64 |
| 65 bool RegExpAssertion::IsAnchoredAtStart() { |
| 66 return assertion_type() == RegExpAssertion::START_OF_INPUT; |
| 67 } |
| 68 |
| 69 |
| 70 bool RegExpAssertion::IsAnchoredAtEnd() { |
| 71 return assertion_type() == RegExpAssertion::END_OF_INPUT; |
| 72 } |
| 73 |
| 74 |
| 75 bool RegExpAlternative::IsAnchoredAtStart() { |
| 76 ZoneList<RegExpTree*>* nodes = this->nodes(); |
| 77 for (int i = 0; i < nodes->length(); i++) { |
| 78 RegExpTree* node = nodes->at(i); |
| 79 if (node->IsAnchoredAtStart()) { |
| 80 return true; |
| 81 } |
| 82 if (node->max_match() > 0) { |
| 83 return false; |
| 84 } |
| 85 } |
| 86 return false; |
| 87 } |
| 88 |
| 89 |
| 90 bool RegExpAlternative::IsAnchoredAtEnd() { |
| 91 ZoneList<RegExpTree*>* nodes = this->nodes(); |
| 92 for (int i = nodes->length() - 1; i >= 0; i--) { |
| 93 RegExpTree* node = nodes->at(i); |
| 94 if (node->IsAnchoredAtEnd()) { |
| 95 return true; |
| 96 } |
| 97 if (node->max_match() > 0) { |
| 98 return false; |
| 99 } |
| 100 } |
| 101 return false; |
| 102 } |
| 103 |
| 104 |
| 105 bool RegExpDisjunction::IsAnchoredAtStart() { |
| 106 ZoneList<RegExpTree*>* alternatives = this->alternatives(); |
| 107 for (int i = 0; i < alternatives->length(); i++) { |
| 108 if (!alternatives->at(i)->IsAnchoredAtStart()) return false; |
| 109 } |
| 110 return true; |
| 111 } |
| 112 |
| 113 |
| 114 bool RegExpDisjunction::IsAnchoredAtEnd() { |
| 115 ZoneList<RegExpTree*>* alternatives = this->alternatives(); |
| 116 for (int i = 0; i < alternatives->length(); i++) { |
| 117 if (!alternatives->at(i)->IsAnchoredAtEnd()) return false; |
| 118 } |
| 119 return true; |
| 120 } |
| 121 |
| 122 |
| 123 bool RegExpLookaround::IsAnchoredAtStart() { |
| 124 return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart(); |
| 125 } |
| 126 |
| 127 |
| 128 bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); } |
| 129 |
| 130 |
| 131 bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); } |
| 132 |
| 133 |
| 134 // Convert regular expression trees to a simple sexp representation. |
| 135 // This representation should be different from the input grammar |
| 136 // in as many cases as possible, to make it more difficult for incorrect |
| 137 // parses to look as correct ones which is likely if the input and |
| 138 // output formats are alike. |
| 139 class RegExpUnparser final : public RegExpVisitor { |
| 140 public: |
| 141 RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {} |
| 142 void VisitCharacterRange(CharacterRange that); |
| 143 #define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override; |
| 144 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) |
| 145 #undef MAKE_CASE |
| 146 private: |
| 147 std::ostream& os_; |
| 148 Zone* zone_; |
| 149 }; |
| 150 |
| 151 |
| 152 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) { |
| 153 os_ << "(|"; |
| 154 for (int i = 0; i < that->alternatives()->length(); i++) { |
| 155 os_ << " "; |
| 156 that->alternatives()->at(i)->Accept(this, data); |
| 157 } |
| 158 os_ << ")"; |
| 159 return NULL; |
| 160 } |
| 161 |
| 162 |
| 163 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) { |
| 164 os_ << "(:"; |
| 165 for (int i = 0; i < that->nodes()->length(); i++) { |
| 166 os_ << " "; |
| 167 that->nodes()->at(i)->Accept(this, data); |
| 168 } |
| 169 os_ << ")"; |
| 170 return NULL; |
| 171 } |
| 172 |
| 173 |
| 174 void RegExpUnparser::VisitCharacterRange(CharacterRange that) { |
| 175 os_ << AsUC16(that.from()); |
| 176 if (!that.IsSingleton()) { |
| 177 os_ << "-" << AsUC16(that.to()); |
| 178 } |
| 179 } |
| 180 |
| 181 |
| 182 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that, |
| 183 void* data) { |
| 184 if (that->is_negated()) os_ << "^"; |
| 185 os_ << "["; |
| 186 for (int i = 0; i < that->ranges(zone_)->length(); i++) { |
| 187 if (i > 0) os_ << " "; |
| 188 VisitCharacterRange(that->ranges(zone_)->at(i)); |
| 189 } |
| 190 os_ << "]"; |
| 191 return NULL; |
| 192 } |
| 193 |
| 194 |
| 195 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) { |
| 196 switch (that->assertion_type()) { |
| 197 case RegExpAssertion::START_OF_INPUT: |
| 198 os_ << "@^i"; |
| 199 break; |
| 200 case RegExpAssertion::END_OF_INPUT: |
| 201 os_ << "@$i"; |
| 202 break; |
| 203 case RegExpAssertion::START_OF_LINE: |
| 204 os_ << "@^l"; |
| 205 break; |
| 206 case RegExpAssertion::END_OF_LINE: |
| 207 os_ << "@$l"; |
| 208 break; |
| 209 case RegExpAssertion::BOUNDARY: |
| 210 os_ << "@b"; |
| 211 break; |
| 212 case RegExpAssertion::NON_BOUNDARY: |
| 213 os_ << "@B"; |
| 214 break; |
| 215 } |
| 216 return NULL; |
| 217 } |
| 218 |
| 219 |
| 220 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) { |
| 221 os_ << "'"; |
| 222 Vector<const uc16> chardata = that->data(); |
| 223 for (int i = 0; i < chardata.length(); i++) { |
| 224 os_ << AsUC16(chardata[i]); |
| 225 } |
| 226 os_ << "'"; |
| 227 return NULL; |
| 228 } |
| 229 |
| 230 |
| 231 void* RegExpUnparser::VisitText(RegExpText* that, void* data) { |
| 232 if (that->elements()->length() == 1) { |
| 233 that->elements()->at(0).tree()->Accept(this, data); |
| 234 } else { |
| 235 os_ << "(!"; |
| 236 for (int i = 0; i < that->elements()->length(); i++) { |
| 237 os_ << " "; |
| 238 that->elements()->at(i).tree()->Accept(this, data); |
| 239 } |
| 240 os_ << ")"; |
| 241 } |
| 242 return NULL; |
| 243 } |
| 244 |
| 245 |
| 246 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) { |
| 247 os_ << "(# " << that->min() << " "; |
| 248 if (that->max() == RegExpTree::kInfinity) { |
| 249 os_ << "- "; |
| 250 } else { |
| 251 os_ << that->max() << " "; |
| 252 } |
| 253 os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n "); |
| 254 that->body()->Accept(this, data); |
| 255 os_ << ")"; |
| 256 return NULL; |
| 257 } |
| 258 |
| 259 |
| 260 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) { |
| 261 os_ << "(^ "; |
| 262 that->body()->Accept(this, data); |
| 263 os_ << ")"; |
| 264 return NULL; |
| 265 } |
| 266 |
| 267 |
| 268 void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) { |
| 269 os_ << "("; |
| 270 os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-"); |
| 271 os_ << (that->is_positive() ? " + " : " - "); |
| 272 that->body()->Accept(this, data); |
| 273 os_ << ")"; |
| 274 return NULL; |
| 275 } |
| 276 |
| 277 |
| 278 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that, |
| 279 void* data) { |
| 280 os_ << "(<- " << that->index() << ")"; |
| 281 return NULL; |
| 282 } |
| 283 |
| 284 |
| 285 void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) { |
| 286 os_ << '%'; |
| 287 return NULL; |
| 288 } |
| 289 |
| 290 |
| 291 std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { // NOLINT |
| 292 RegExpUnparser unparser(os, zone); |
| 293 Accept(&unparser, NULL); |
| 294 return os; |
| 295 } |
| 296 |
| 297 |
| 298 RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives) |
| 299 : alternatives_(alternatives) { |
| 300 DCHECK(alternatives->length() > 1); |
| 301 RegExpTree* first_alternative = alternatives->at(0); |
| 302 min_match_ = first_alternative->min_match(); |
| 303 max_match_ = first_alternative->max_match(); |
| 304 for (int i = 1; i < alternatives->length(); i++) { |
| 305 RegExpTree* alternative = alternatives->at(i); |
| 306 min_match_ = Min(min_match_, alternative->min_match()); |
| 307 max_match_ = Max(max_match_, alternative->max_match()); |
| 308 } |
| 309 } |
| 310 |
| 311 |
| 312 static int IncreaseBy(int previous, int increase) { |
| 313 if (RegExpTree::kInfinity - previous < increase) { |
| 314 return RegExpTree::kInfinity; |
| 315 } else { |
| 316 return previous + increase; |
| 317 } |
| 318 } |
| 319 |
| 320 |
| 321 RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes) |
| 322 : nodes_(nodes) { |
| 323 DCHECK(nodes->length() > 1); |
| 324 min_match_ = 0; |
| 325 max_match_ = 0; |
| 326 for (int i = 0; i < nodes->length(); i++) { |
| 327 RegExpTree* node = nodes->at(i); |
| 328 int node_min_match = node->min_match(); |
| 329 min_match_ = IncreaseBy(min_match_, node_min_match); |
| 330 int node_max_match = node->max_match(); |
| 331 max_match_ = IncreaseBy(max_match_, node_max_match); |
| 332 } |
| 333 } |
| 334 |
| 335 |
| 336 } // namespace internal |
| 337 } // namespace v8 |
OLD | NEW |