OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 #include "vm/regexp_ast.h" |
| 6 |
| 7 #include "platform/utils.h" |
| 8 #include "vm/os.h" |
| 9 |
| 10 namespace dart { |
| 11 |
| 12 #define MAKE_ACCEPT(Name) \ |
| 13 void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \ |
| 14 return visitor->Visit##Name(this, data); \ |
| 15 } |
| 16 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT) |
| 17 #undef MAKE_ACCEPT |
| 18 |
| 19 #define MAKE_TYPE_CASE(Name) \ |
| 20 RegExp##Name* RegExpTree::As##Name() { \ |
| 21 return NULL; \ |
| 22 } \ |
| 23 bool RegExpTree::Is##Name() const { return false; } |
| 24 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) |
| 25 #undef MAKE_TYPE_CASE |
| 26 |
| 27 #define MAKE_TYPE_CASE(Name) \ |
| 28 RegExp##Name* RegExp##Name::As##Name() { \ |
| 29 return this; \ |
| 30 } \ |
| 31 bool RegExp##Name::Is##Name() const { return true; } |
| 32 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) |
| 33 #undef MAKE_TYPE_CASE |
| 34 |
| 35 |
| 36 static Interval ListCaptureRegisters(ZoneGrowableArray<RegExpTree*>* children) { |
| 37 Interval result = Interval::Empty(); |
| 38 for (intptr_t i = 0; i < children->length(); i++) |
| 39 result = result.Union(children->At(i)->CaptureRegisters()); |
| 40 return result; |
| 41 } |
| 42 |
| 43 |
| 44 Interval RegExpAlternative::CaptureRegisters() const { |
| 45 return ListCaptureRegisters(nodes()); |
| 46 } |
| 47 |
| 48 |
| 49 Interval RegExpDisjunction::CaptureRegisters() const { |
| 50 return ListCaptureRegisters(alternatives()); |
| 51 } |
| 52 |
| 53 |
| 54 Interval RegExpLookahead::CaptureRegisters() const { |
| 55 return body()->CaptureRegisters(); |
| 56 } |
| 57 |
| 58 |
| 59 Interval RegExpCapture::CaptureRegisters() const { |
| 60 Interval self(StartRegister(index()), EndRegister(index())); |
| 61 return self.Union(body()->CaptureRegisters()); |
| 62 } |
| 63 |
| 64 |
| 65 Interval RegExpQuantifier::CaptureRegisters() const { |
| 66 return body()->CaptureRegisters(); |
| 67 } |
| 68 |
| 69 |
| 70 bool RegExpAssertion::IsAnchoredAtStart() const { |
| 71 return assertion_type() == RegExpAssertion::START_OF_INPUT; |
| 72 } |
| 73 |
| 74 |
| 75 bool RegExpAssertion::IsAnchoredAtEnd() const { |
| 76 return assertion_type() == RegExpAssertion::END_OF_INPUT; |
| 77 } |
| 78 |
| 79 |
| 80 bool RegExpAlternative::IsAnchoredAtStart() const { |
| 81 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes(); |
| 82 for (intptr_t i = 0; i < nodes->length(); i++) { |
| 83 RegExpTree* node = nodes->At(i); |
| 84 if (node->IsAnchoredAtStart()) { return true; } |
| 85 if (node->max_match() > 0) { return false; } |
| 86 } |
| 87 return false; |
| 88 } |
| 89 |
| 90 |
| 91 bool RegExpAlternative::IsAnchoredAtEnd() const { |
| 92 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes(); |
| 93 for (intptr_t i = nodes->length() - 1; i >= 0; i--) { |
| 94 RegExpTree* node = nodes->At(i); |
| 95 if (node->IsAnchoredAtEnd()) { return true; } |
| 96 if (node->max_match() > 0) { return false; } |
| 97 } |
| 98 return false; |
| 99 } |
| 100 |
| 101 |
| 102 bool RegExpDisjunction::IsAnchoredAtStart() const { |
| 103 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); |
| 104 for (intptr_t i = 0; i < alternatives->length(); i++) { |
| 105 if (!alternatives->At(i)->IsAnchoredAtStart()) |
| 106 return false; |
| 107 } |
| 108 return true; |
| 109 } |
| 110 |
| 111 |
| 112 bool RegExpDisjunction::IsAnchoredAtEnd() const { |
| 113 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); |
| 114 for (intptr_t i = 0; i < alternatives->length(); i++) { |
| 115 if (!alternatives->At(i)->IsAnchoredAtEnd()) |
| 116 return false; |
| 117 } |
| 118 return true; |
| 119 } |
| 120 |
| 121 |
| 122 bool RegExpLookahead::IsAnchoredAtStart() const { |
| 123 return is_positive() && body()->IsAnchoredAtStart(); |
| 124 } |
| 125 |
| 126 |
| 127 bool RegExpCapture::IsAnchoredAtStart() const { |
| 128 return body()->IsAnchoredAtStart(); |
| 129 } |
| 130 |
| 131 |
| 132 bool RegExpCapture::IsAnchoredAtEnd() const { |
| 133 return body()->IsAnchoredAtEnd(); |
| 134 } |
| 135 |
| 136 |
| 137 // Convert regular expression trees to a simple sexp representation. |
| 138 // This representation should be different from the input grammar |
| 139 // in as many cases as possible, to make it more difficult for incorrect |
| 140 // parses to look as correct ones which is likely if the input and |
| 141 // output formats are alike. |
| 142 class RegExpUnparser : public RegExpVisitor { |
| 143 public: |
| 144 void VisitCharacterRange(CharacterRange that); |
| 145 #define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, \ |
| 146 void* data); |
| 147 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) |
| 148 #undef MAKE_CASE |
| 149 }; |
| 150 |
| 151 |
| 152 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) { |
| 153 OS::Print("(|"); |
| 154 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
| 155 OS::Print(" "); |
| 156 (*that->alternatives())[i]->Accept(this, data); |
| 157 } |
| 158 OS::Print(")"); |
| 159 return NULL; |
| 160 } |
| 161 |
| 162 |
| 163 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) { |
| 164 OS::Print("(:"); |
| 165 for (intptr_t i = 0; i < that->nodes()->length(); i++) { |
| 166 OS::Print(" "); |
| 167 (*that->nodes())[i]->Accept(this, data); |
| 168 } |
| 169 OS::Print(")"); |
| 170 return NULL; |
| 171 } |
| 172 |
| 173 |
| 174 void RegExpUnparser::VisitCharacterRange(CharacterRange that) { |
| 175 PrintUtf16(that.from()); |
| 176 if (!that.IsSingleton()) { |
| 177 OS::Print("-"); |
| 178 PrintUtf16(that.to()); |
| 179 } |
| 180 } |
| 181 |
| 182 |
| 183 |
| 184 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that, |
| 185 void* data) { |
| 186 if (that->is_negated()) OS::Print("^"); |
| 187 OS::Print("["); |
| 188 for (intptr_t i = 0; i < that->ranges()->length(); i++) { |
| 189 if (i > 0) OS::Print(" "); |
| 190 VisitCharacterRange((*that->ranges())[i]); |
| 191 } |
| 192 OS::Print("]"); |
| 193 return NULL; |
| 194 } |
| 195 |
| 196 |
| 197 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) { |
| 198 switch (that->assertion_type()) { |
| 199 case RegExpAssertion::START_OF_INPUT: |
| 200 OS::Print("@^i"); |
| 201 break; |
| 202 case RegExpAssertion::END_OF_INPUT: |
| 203 OS::Print("@$i"); |
| 204 break; |
| 205 case RegExpAssertion::START_OF_LINE: |
| 206 OS::Print("@^l"); |
| 207 break; |
| 208 case RegExpAssertion::END_OF_LINE: |
| 209 OS::Print("@$l"); |
| 210 break; |
| 211 case RegExpAssertion::BOUNDARY: |
| 212 OS::Print("@b"); |
| 213 break; |
| 214 case RegExpAssertion::NON_BOUNDARY: |
| 215 OS::Print("@B"); |
| 216 break; |
| 217 } |
| 218 return NULL; |
| 219 } |
| 220 |
| 221 |
| 222 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) { |
| 223 OS::Print("'"); |
| 224 ZoneGrowableArray<uint16_t>* chardata = that->data(); |
| 225 for (intptr_t i = 0; i < chardata->length(); i++) { |
| 226 PrintUtf16(chardata->At(i)); |
| 227 } |
| 228 OS::Print("'"); |
| 229 return NULL; |
| 230 } |
| 231 |
| 232 |
| 233 void* RegExpUnparser::VisitText(RegExpText* that, void* data) { |
| 234 if (that->elements()->length() == 1) { |
| 235 (*that->elements())[0].tree()->Accept(this, data); |
| 236 } else { |
| 237 OS::Print("(!"); |
| 238 for (intptr_t i = 0; i < that->elements()->length(); i++) { |
| 239 OS::Print(" "); |
| 240 (*that->elements())[i].tree()->Accept(this, data); |
| 241 } |
| 242 OS::Print(")"); |
| 243 } |
| 244 return NULL; |
| 245 } |
| 246 |
| 247 |
| 248 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) { |
| 249 OS::Print("(# %" Pd " ", that->min()); |
| 250 if (that->max() == RegExpTree::kInfinity) { |
| 251 OS::Print("- "); |
| 252 } else { |
| 253 OS::Print("%" Pd " ", that->max()); |
| 254 } |
| 255 OS::Print(that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n "); |
| 256 that->body()->Accept(this, data); |
| 257 OS::Print(")"); |
| 258 return NULL; |
| 259 } |
| 260 |
| 261 |
| 262 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) { |
| 263 OS::Print("(^ "); |
| 264 that->body()->Accept(this, data); |
| 265 OS::Print(")"); |
| 266 return NULL; |
| 267 } |
| 268 |
| 269 |
| 270 void* RegExpUnparser::VisitLookahead(RegExpLookahead* that, void* data) { |
| 271 OS::Print("(-> %s", (that->is_positive() ? "+ " : "- ")); |
| 272 that->body()->Accept(this, data); |
| 273 OS::Print(")"); |
| 274 return NULL; |
| 275 } |
| 276 |
| 277 |
| 278 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that, |
| 279 void*) { |
| 280 OS::Print("(<- %" Pd ")", that->index()); |
| 281 return NULL; |
| 282 } |
| 283 |
| 284 |
| 285 void* RegExpUnparser::VisitEmpty(RegExpEmpty*, void*) { |
| 286 OS::Print("%%"); |
| 287 return NULL; |
| 288 } |
| 289 |
| 290 |
| 291 void RegExpTree::Print() { |
| 292 RegExpUnparser unparser; |
| 293 Accept(&unparser, NULL); |
| 294 } |
| 295 |
| 296 |
| 297 RegExpDisjunction::RegExpDisjunction( |
| 298 ZoneGrowableArray<RegExpTree*>* alternatives) |
| 299 : alternatives_(alternatives) { |
| 300 ASSERT(alternatives->length() > 1); |
| 301 RegExpTree* first_alternative = alternatives->At(0); |
| 302 min_match_ = first_alternative->min_match(); |
| 303 max_match_ = first_alternative->max_match(); |
| 304 for (intptr_t i = 1; i < alternatives->length(); i++) { |
| 305 RegExpTree* alternative = alternatives->At(i); |
| 306 min_match_ = Utils::Minimum(min_match_, alternative->min_match()); |
| 307 max_match_ = Utils::Maximum(max_match_, alternative->max_match()); |
| 308 } |
| 309 } |
| 310 |
| 311 |
| 312 static intptr_t IncreaseBy(intptr_t previous, intptr_t increase) { |
| 313 if (RegExpTree::kInfinity - previous < increase) { |
| 314 return RegExpTree::kInfinity; |
| 315 } else { |
| 316 return previous + increase; |
| 317 } |
| 318 } |
| 319 |
| 320 RegExpAlternative::RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes) |
| 321 : nodes_(nodes) { |
| 322 ASSERT(nodes->length() > 1); |
| 323 min_match_ = 0; |
| 324 max_match_ = 0; |
| 325 for (intptr_t i = 0; i < nodes->length(); i++) { |
| 326 RegExpTree* node = nodes->At(i); |
| 327 intptr_t node_min_match = node->min_match(); |
| 328 min_match_ = IncreaseBy(min_match_, node_min_match); |
| 329 intptr_t node_max_match = node->max_match(); |
| 330 max_match_ = IncreaseBy(max_match_, node_max_match); |
| 331 } |
| 332 } |
| 333 |
| 334 } // namespace dart |
OLD | NEW |