OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 #include "vm/regexp_ast.h" |
| 6 |
| 7 // SNIP |
| 8 |
| 9 namespace dart { |
| 10 |
| 11 #define MAKE_ACCEPT(Name) \ |
| 12 void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \ |
| 13 return visitor->Visit##Name(this, data); \ |
| 14 } |
| 15 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT) |
| 16 #undef MAKE_ACCEPT |
| 17 |
| 18 #define MAKE_TYPE_CASE(Name) \ |
| 19 RegExp##Name* RegExpTree::As##Name() { \ |
| 20 return NULL; \ |
| 21 } \ |
| 22 bool RegExpTree::Is##Name() { return false; } |
| 23 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) |
| 24 #undef MAKE_TYPE_CASE |
| 25 |
| 26 #define MAKE_TYPE_CASE(Name) \ |
| 27 RegExp##Name* RegExp##Name::As##Name() { \ |
| 28 return this; \ |
| 29 } \ |
| 30 bool RegExp##Name::Is##Name() { return true; } |
| 31 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) |
| 32 #undef MAKE_TYPE_CASE |
| 33 |
| 34 |
| 35 static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) { |
| 36 Interval result = Interval::Empty(); |
| 37 for (int i = 0; i < children->length(); i++) |
| 38 result = result.Union(children->at(i)->CaptureRegisters()); |
| 39 return result; |
| 40 } |
| 41 |
| 42 |
| 43 Interval RegExpAlternative::CaptureRegisters() { |
| 44 return ListCaptureRegisters(nodes()); |
| 45 } |
| 46 |
| 47 |
| 48 Interval RegExpDisjunction::CaptureRegisters() { |
| 49 return ListCaptureRegisters(alternatives()); |
| 50 } |
| 51 |
| 52 |
| 53 Interval RegExpLookahead::CaptureRegisters() { |
| 54 return body()->CaptureRegisters(); |
| 55 } |
| 56 |
| 57 |
| 58 Interval RegExpCapture::CaptureRegisters() { |
| 59 Interval self(StartRegister(index()), EndRegister(index())); |
| 60 return self.Union(body()->CaptureRegisters()); |
| 61 } |
| 62 |
| 63 |
| 64 Interval RegExpQuantifier::CaptureRegisters() { |
| 65 return body()->CaptureRegisters(); |
| 66 } |
| 67 |
| 68 |
| 69 bool RegExpAssertion::IsAnchoredAtStart() { |
| 70 return assertion_type() == RegExpAssertion::START_OF_INPUT; |
| 71 } |
| 72 |
| 73 |
| 74 bool RegExpAssertion::IsAnchoredAtEnd() { |
| 75 return assertion_type() == RegExpAssertion::END_OF_INPUT; |
| 76 } |
| 77 |
| 78 |
| 79 bool RegExpAlternative::IsAnchoredAtStart() { |
| 80 ZoneList<RegExpTree*>* nodes = this->nodes(); |
| 81 for (int i = 0; i < nodes->length(); i++) { |
| 82 RegExpTree* node = nodes->at(i); |
| 83 if (node->IsAnchoredAtStart()) { return true; } |
| 84 if (node->max_match() > 0) { return false; } |
| 85 } |
| 86 return false; |
| 87 } |
| 88 |
| 89 |
| 90 bool RegExpAlternative::IsAnchoredAtEnd() { |
| 91 ZoneList<RegExpTree*>* nodes = this->nodes(); |
| 92 for (int i = nodes->length() - 1; i >= 0; i--) { |
| 93 RegExpTree* node = nodes->at(i); |
| 94 if (node->IsAnchoredAtEnd()) { return true; } |
| 95 if (node->max_match() > 0) { return false; } |
| 96 } |
| 97 return false; |
| 98 } |
| 99 |
| 100 |
| 101 bool RegExpDisjunction::IsAnchoredAtStart() { |
| 102 ZoneList<RegExpTree*>* alternatives = this->alternatives(); |
| 103 for (int i = 0; i < alternatives->length(); i++) { |
| 104 if (!alternatives->at(i)->IsAnchoredAtStart()) |
| 105 return false; |
| 106 } |
| 107 return true; |
| 108 } |
| 109 |
| 110 |
| 111 bool RegExpDisjunction::IsAnchoredAtEnd() { |
| 112 ZoneList<RegExpTree*>* alternatives = this->alternatives(); |
| 113 for (int i = 0; i < alternatives->length(); i++) { |
| 114 if (!alternatives->at(i)->IsAnchoredAtEnd()) |
| 115 return false; |
| 116 } |
| 117 return true; |
| 118 } |
| 119 |
| 120 |
| 121 bool RegExpLookahead::IsAnchoredAtStart() { |
| 122 return is_positive() && body()->IsAnchoredAtStart(); |
| 123 } |
| 124 |
| 125 |
| 126 bool RegExpCapture::IsAnchoredAtStart() { |
| 127 return body()->IsAnchoredAtStart(); |
| 128 } |
| 129 |
| 130 |
| 131 bool RegExpCapture::IsAnchoredAtEnd() { |
| 132 return body()->IsAnchoredAtEnd(); |
| 133 } |
| 134 |
| 135 |
| 136 // Convert regular expression trees to a simple sexp representation. |
| 137 // This representation should be different from the input grammar |
| 138 // in as many cases as possible, to make it more difficult for incorrect |
| 139 // parses to look as correct ones which is likely if the input and |
| 140 // output formats are alike. |
| 141 class RegExpUnparser FINAL : public RegExpVisitor { |
| 142 public: |
| 143 RegExpUnparser(OStream& os, Zone* zone) : os_(os), zone_(zone) {} |
| 144 void VisitCharacterRange(CharacterRange that); |
| 145 #define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, \ |
| 146 void* data) OVERRIDE; |
| 147 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) |
| 148 #undef MAKE_CASE |
| 149 private: |
| 150 OStream& os_; |
| 151 Zone* zone_; |
| 152 }; |
| 153 |
| 154 |
| 155 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) { |
| 156 os_ << "(|"; |
| 157 for (int i = 0; i < that->alternatives()->length(); i++) { |
| 158 os_ << " "; |
| 159 that->alternatives()->at(i)->Accept(this, data); |
| 160 } |
| 161 os_ << ")"; |
| 162 return NULL; |
| 163 } |
| 164 |
| 165 |
| 166 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) { |
| 167 os_ << "(:"; |
| 168 for (int i = 0; i < that->nodes()->length(); i++) { |
| 169 os_ << " "; |
| 170 that->nodes()->at(i)->Accept(this, data); |
| 171 } |
| 172 os_ << ")"; |
| 173 return NULL; |
| 174 } |
| 175 |
| 176 |
| 177 void RegExpUnparser::VisitCharacterRange(CharacterRange that) { |
| 178 os_ << AsUC16(that.from()); |
| 179 if (!that.IsSingleton()) { |
| 180 os_ << "-" << AsUC16(that.to()); |
| 181 } |
| 182 } |
| 183 |
| 184 |
| 185 |
| 186 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that, |
| 187 void* data) { |
| 188 if (that->is_negated()) os_ << "^"; |
| 189 os_ << "["; |
| 190 for (int i = 0; i < that->ranges(zone_)->length(); i++) { |
| 191 if (i > 0) os_ << " "; |
| 192 VisitCharacterRange(that->ranges(zone_)->at(i)); |
| 193 } |
| 194 os_ << "]"; |
| 195 return NULL; |
| 196 } |
| 197 |
| 198 |
| 199 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) { |
| 200 switch (that->assertion_type()) { |
| 201 case RegExpAssertion::START_OF_INPUT: |
| 202 os_ << "@^i"; |
| 203 break; |
| 204 case RegExpAssertion::END_OF_INPUT: |
| 205 os_ << "@$i"; |
| 206 break; |
| 207 case RegExpAssertion::START_OF_LINE: |
| 208 os_ << "@^l"; |
| 209 break; |
| 210 case RegExpAssertion::END_OF_LINE: |
| 211 os_ << "@$l"; |
| 212 break; |
| 213 case RegExpAssertion::BOUNDARY: |
| 214 os_ << "@b"; |
| 215 break; |
| 216 case RegExpAssertion::NON_BOUNDARY: |
| 217 os_ << "@B"; |
| 218 break; |
| 219 } |
| 220 return NULL; |
| 221 } |
| 222 |
| 223 |
| 224 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) { |
| 225 os_ << "'"; |
| 226 Vector<const uc16> chardata = that->data(); |
| 227 for (int i = 0; i < chardata.length(); i++) { |
| 228 os_ << AsUC16(chardata[i]); |
| 229 } |
| 230 os_ << "'"; |
| 231 return NULL; |
| 232 } |
| 233 |
| 234 |
| 235 void* RegExpUnparser::VisitText(RegExpText* that, void* data) { |
| 236 if (that->elements()->length() == 1) { |
| 237 that->elements()->at(0).tree()->Accept(this, data); |
| 238 } else { |
| 239 os_ << "(!"; |
| 240 for (int i = 0; i < that->elements()->length(); i++) { |
| 241 os_ << " "; |
| 242 that->elements()->at(i).tree()->Accept(this, data); |
| 243 } |
| 244 os_ << ")"; |
| 245 } |
| 246 return NULL; |
| 247 } |
| 248 |
| 249 |
| 250 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) { |
| 251 os_ << "(# " << that->min() << " "; |
| 252 if (that->max() == RegExpTree::kInfinity) { |
| 253 os_ << "- "; |
| 254 } else { |
| 255 os_ << that->max() << " "; |
| 256 } |
| 257 os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n "); |
| 258 that->body()->Accept(this, data); |
| 259 os_ << ")"; |
| 260 return NULL; |
| 261 } |
| 262 |
| 263 |
| 264 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) { |
| 265 os_ << "(^ "; |
| 266 that->body()->Accept(this, data); |
| 267 os_ << ")"; |
| 268 return NULL; |
| 269 } |
| 270 |
| 271 |
| 272 void* RegExpUnparser::VisitLookahead(RegExpLookahead* that, void* data) { |
| 273 os_ << "(-> " << (that->is_positive() ? "+ " : "- "); |
| 274 that->body()->Accept(this, data); |
| 275 os_ << ")"; |
| 276 return NULL; |
| 277 } |
| 278 |
| 279 |
| 280 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that, |
| 281 void* data) { |
| 282 os_ << "(<- " << that->index() << ")"; |
| 283 return NULL; |
| 284 } |
| 285 |
| 286 |
| 287 void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) { |
| 288 os_ << '%'; |
| 289 return NULL; |
| 290 } |
| 291 |
| 292 |
| 293 OStream& RegExpTree::Print(OStream& os, Zone* zone) { // NOLINT |
| 294 RegExpUnparser unparser(os, zone); |
| 295 Accept(&unparser, NULL); |
| 296 return os; |
| 297 } |
| 298 |
| 299 |
| 300 RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives) |
| 301 : alternatives_(alternatives) { |
| 302 DCHECK(alternatives->length() > 1); |
| 303 RegExpTree* first_alternative = alternatives->at(0); |
| 304 min_match_ = first_alternative->min_match(); |
| 305 max_match_ = first_alternative->max_match(); |
| 306 for (int i = 1; i < alternatives->length(); i++) { |
| 307 RegExpTree* alternative = alternatives->at(i); |
| 308 min_match_ = Min(min_match_, alternative->min_match()); |
| 309 max_match_ = Max(max_match_, alternative->max_match()); |
| 310 } |
| 311 } |
| 312 |
| 313 |
| 314 static int IncreaseBy(int previous, int increase) { |
| 315 if (RegExpTree::kInfinity - previous < increase) { |
| 316 return RegExpTree::kInfinity; |
| 317 } else { |
| 318 return previous + increase; |
| 319 } |
| 320 } |
| 321 |
| 322 RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes) |
| 323 : nodes_(nodes) { |
| 324 DCHECK(nodes->length() > 1); |
| 325 min_match_ = 0; |
| 326 max_match_ = 0; |
| 327 for (int i = 0; i < nodes->length(); i++) { |
| 328 RegExpTree* node = nodes->at(i); |
| 329 int node_min_match = node->min_match(); |
| 330 min_match_ = IncreaseBy(min_match_, node_min_match); |
| 331 int node_max_match = node->max_match(); |
| 332 max_match_ = IncreaseBy(max_match_, node_max_match); |
| 333 } |
| 334 } |
| 335 |
| 336 } // namespace dart |
OLD | NEW |