OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 #include "vm/regexp_ast.h" |
| 6 |
| 7 #include "platform/utils.h" |
| 8 #include "vm/os.h" |
| 9 |
| 10 namespace dart { |
| 11 |
| 12 |
| 13 // Convert regular expression trees to a simple sexp representation. |
| 14 // This representation should be different from the input grammar |
| 15 // in as many cases as possible, to make it more difficult for incorrect |
| 16 // parses to look as correct ones which is likely if the input and |
| 17 // output formats are alike. |
| 18 class RegExpUnparser : public RegExpVisitor { |
| 19 public: |
| 20 void VisitCharacterRange(CharacterRange that); |
| 21 #define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, \ |
| 22 void* data); |
| 23 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) |
| 24 #undef MAKE_CASE |
| 25 }; |
| 26 |
| 27 |
| 28 static inline void PrintUtf16(uint16_t c) { |
| 29 const char* format = (0x20 <= c && c <= 0x7F) |
| 30 ? "%c" |
| 31 : (c <= 0xff) ? "\\x%02x" : "\\u%04x"; |
| 32 OS::Print(format, c); |
| 33 } |
| 34 |
| 35 |
| 36 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) { |
| 37 OS::Print("(|"); |
| 38 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
| 39 OS::Print(" "); |
| 40 (*that->alternatives())[i]->Accept(this, data); |
| 41 } |
| 42 OS::Print(")"); |
| 43 return NULL; |
| 44 } |
| 45 |
| 46 |
| 47 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) { |
| 48 OS::Print("(:"); |
| 49 for (intptr_t i = 0; i < that->nodes()->length(); i++) { |
| 50 OS::Print(" "); |
| 51 (*that->nodes())[i]->Accept(this, data); |
| 52 } |
| 53 OS::Print(")"); |
| 54 return NULL; |
| 55 } |
| 56 |
| 57 |
| 58 void RegExpUnparser::VisitCharacterRange(CharacterRange that) { |
| 59 PrintUtf16(that.from()); |
| 60 if (!that.IsSingleton()) { |
| 61 OS::Print("-"); |
| 62 PrintUtf16(that.to()); |
| 63 } |
| 64 } |
| 65 |
| 66 |
| 67 |
| 68 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that, |
| 69 void* data) { |
| 70 if (that->is_negated()) OS::Print("^"); |
| 71 OS::Print("["); |
| 72 for (intptr_t i = 0; i < that->ranges()->length(); i++) { |
| 73 if (i > 0) OS::Print(" "); |
| 74 VisitCharacterRange((*that->ranges())[i]); |
| 75 } |
| 76 OS::Print("]"); |
| 77 return NULL; |
| 78 } |
| 79 |
| 80 |
| 81 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) { |
| 82 switch (that->assertion_type()) { |
| 83 case RegExpAssertion::START_OF_INPUT: |
| 84 OS::Print("@^i"); |
| 85 break; |
| 86 case RegExpAssertion::END_OF_INPUT: |
| 87 OS::Print("@$i"); |
| 88 break; |
| 89 case RegExpAssertion::START_OF_LINE: |
| 90 OS::Print("@^l"); |
| 91 break; |
| 92 case RegExpAssertion::END_OF_LINE: |
| 93 OS::Print("@$l"); |
| 94 break; |
| 95 case RegExpAssertion::BOUNDARY: |
| 96 OS::Print("@b"); |
| 97 break; |
| 98 case RegExpAssertion::NON_BOUNDARY: |
| 99 OS::Print("@B"); |
| 100 break; |
| 101 } |
| 102 return NULL; |
| 103 } |
| 104 |
| 105 |
| 106 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) { |
| 107 OS::Print("'"); |
| 108 ZoneGrowableArray<uint16_t>* chardata = that->data(); |
| 109 for (intptr_t i = 0; i < chardata->length(); i++) { |
| 110 PrintUtf16(chardata->At(i)); |
| 111 } |
| 112 OS::Print("'"); |
| 113 return NULL; |
| 114 } |
| 115 |
| 116 |
| 117 void* RegExpUnparser::VisitText(RegExpText* that, void* data) { |
| 118 if (that->elements()->length() == 1) { |
| 119 (*that->elements())[0].tree()->Accept(this, data); |
| 120 } else { |
| 121 OS::Print("(!"); |
| 122 for (intptr_t i = 0; i < that->elements()->length(); i++) { |
| 123 OS::Print(" "); |
| 124 (*that->elements())[i].tree()->Accept(this, data); |
| 125 } |
| 126 OS::Print(")"); |
| 127 } |
| 128 return NULL; |
| 129 } |
| 130 |
| 131 |
| 132 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) { |
| 133 OS::Print("(# %" Pd " ", that->min()); |
| 134 if (that->max() == RegExpTree::kInfinity) { |
| 135 OS::Print("- "); |
| 136 } else { |
| 137 OS::Print("%" Pd " ", that->max()); |
| 138 } |
| 139 OS::Print(that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n "); |
| 140 that->body()->Accept(this, data); |
| 141 OS::Print(")"); |
| 142 return NULL; |
| 143 } |
| 144 |
| 145 |
| 146 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) { |
| 147 OS::Print("(^ "); |
| 148 that->body()->Accept(this, data); |
| 149 OS::Print(")"); |
| 150 return NULL; |
| 151 } |
| 152 |
| 153 |
| 154 void* RegExpUnparser::VisitLookahead(RegExpLookahead* that, void* data) { |
| 155 OS::Print("(-> %s", (that->is_positive() ? "+ " : "- ")); |
| 156 that->body()->Accept(this, data); |
| 157 OS::Print(")"); |
| 158 return NULL; |
| 159 } |
| 160 |
| 161 |
| 162 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that, |
| 163 void*) { |
| 164 OS::Print("(<- %" Pd ")", that->index()); |
| 165 return NULL; |
| 166 } |
| 167 |
| 168 |
| 169 void* RegExpUnparser::VisitEmpty(RegExpEmpty*, void*) { |
| 170 OS::Print("%%"); |
| 171 return NULL; |
| 172 } |
| 173 |
| 174 |
| 175 #define MAKE_ACCEPT(Name) \ |
| 176 void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \ |
| 177 return visitor->Visit##Name(this, data); \ |
| 178 } |
| 179 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT) |
| 180 #undef MAKE_ACCEPT |
| 181 |
| 182 #define MAKE_TYPE_CASE(Name) \ |
| 183 RegExp##Name* RegExpTree::As##Name() { \ |
| 184 return NULL; \ |
| 185 } \ |
| 186 bool RegExpTree::Is##Name() const { return false; } |
| 187 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) |
| 188 #undef MAKE_TYPE_CASE |
| 189 |
| 190 #define MAKE_TYPE_CASE(Name) \ |
| 191 RegExp##Name* RegExp##Name::As##Name() { \ |
| 192 return this; \ |
| 193 } \ |
| 194 bool RegExp##Name::Is##Name() const { return true; } |
| 195 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) |
| 196 #undef MAKE_TYPE_CASE |
| 197 |
| 198 void RegExpTree::Print() { |
| 199 RegExpUnparser unparser; |
| 200 Accept(&unparser, NULL); |
| 201 } |
| 202 |
| 203 |
| 204 static Interval ListCaptureRegisters(ZoneGrowableArray<RegExpTree*>* children) { |
| 205 Interval result = Interval::Empty(); |
| 206 for (intptr_t i = 0; i < children->length(); i++) |
| 207 result = result.Union(children->At(i)->CaptureRegisters()); |
| 208 return result; |
| 209 } |
| 210 |
| 211 |
| 212 Interval RegExpQuantifier::CaptureRegisters() const { |
| 213 return body()->CaptureRegisters(); |
| 214 } |
| 215 |
| 216 |
| 217 bool RegExpCapture::IsAnchoredAtStart() const { |
| 218 return body()->IsAnchoredAtStart(); |
| 219 } |
| 220 |
| 221 |
| 222 bool RegExpCapture::IsAnchoredAtEnd() const { |
| 223 return body()->IsAnchoredAtEnd(); |
| 224 } |
| 225 |
| 226 |
| 227 Interval RegExpCapture::CaptureRegisters() const { |
| 228 Interval self(StartRegister(index()), EndRegister(index())); |
| 229 return self.Union(body()->CaptureRegisters()); |
| 230 } |
| 231 |
| 232 |
| 233 RegExpDisjunction::RegExpDisjunction( |
| 234 ZoneGrowableArray<RegExpTree*>* alternatives) |
| 235 : alternatives_(alternatives) { |
| 236 ASSERT(alternatives->length() > 1); |
| 237 RegExpTree* first_alternative = alternatives->At(0); |
| 238 min_match_ = first_alternative->min_match(); |
| 239 max_match_ = first_alternative->max_match(); |
| 240 for (intptr_t i = 1; i < alternatives->length(); i++) { |
| 241 RegExpTree* alternative = alternatives->At(i); |
| 242 min_match_ = Utils::Minimum(min_match_, alternative->min_match()); |
| 243 max_match_ = Utils::Maximum(max_match_, alternative->max_match()); |
| 244 } |
| 245 } |
| 246 |
| 247 |
| 248 Interval RegExpDisjunction::CaptureRegisters() const { |
| 249 return ListCaptureRegisters(alternatives()); |
| 250 } |
| 251 |
| 252 |
| 253 bool RegExpDisjunction::IsAnchoredAtStart() const { |
| 254 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); |
| 255 for (intptr_t i = 0; i < alternatives->length(); i++) { |
| 256 if (!alternatives->At(i)->IsAnchoredAtStart()) |
| 257 return false; |
| 258 } |
| 259 return true; |
| 260 } |
| 261 |
| 262 |
| 263 bool RegExpDisjunction::IsAnchoredAtEnd() const { |
| 264 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); |
| 265 for (intptr_t i = 0; i < alternatives->length(); i++) { |
| 266 if (!alternatives->At(i)->IsAnchoredAtEnd()) |
| 267 return false; |
| 268 } |
| 269 return true; |
| 270 } |
| 271 |
| 272 |
| 273 static intptr_t IncreaseBy(intptr_t previous, intptr_t increase) { |
| 274 if (RegExpTree::kInfinity - previous < increase) { |
| 275 return RegExpTree::kInfinity; |
| 276 } else { |
| 277 return previous + increase; |
| 278 } |
| 279 } |
| 280 |
| 281 RegExpAlternative::RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes) |
| 282 : nodes_(nodes) { |
| 283 ASSERT(nodes->length() > 1); |
| 284 min_match_ = 0; |
| 285 max_match_ = 0; |
| 286 for (intptr_t i = 0; i < nodes->length(); i++) { |
| 287 RegExpTree* node = nodes->At(i); |
| 288 intptr_t node_min_match = node->min_match(); |
| 289 min_match_ = IncreaseBy(min_match_, node_min_match); |
| 290 intptr_t node_max_match = node->max_match(); |
| 291 max_match_ = IncreaseBy(max_match_, node_max_match); |
| 292 } |
| 293 } |
| 294 |
| 295 |
| 296 Interval RegExpAlternative::CaptureRegisters() const { |
| 297 return ListCaptureRegisters(nodes()); |
| 298 } |
| 299 |
| 300 |
| 301 bool RegExpAlternative::IsAnchoredAtStart() const { |
| 302 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes(); |
| 303 for (intptr_t i = 0; i < nodes->length(); i++) { |
| 304 RegExpTree* node = nodes->At(i); |
| 305 if (node->IsAnchoredAtStart()) { return true; } |
| 306 if (node->max_match() > 0) { return false; } |
| 307 } |
| 308 return false; |
| 309 } |
| 310 |
| 311 |
| 312 bool RegExpAlternative::IsAnchoredAtEnd() const { |
| 313 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes(); |
| 314 for (intptr_t i = nodes->length() - 1; i >= 0; i--) { |
| 315 RegExpTree* node = nodes->At(i); |
| 316 if (node->IsAnchoredAtEnd()) { return true; } |
| 317 if (node->max_match() > 0) { return false; } |
| 318 } |
| 319 return false; |
| 320 } |
| 321 |
| 322 |
| 323 bool RegExpAssertion::IsAnchoredAtStart() const { |
| 324 return assertion_type() == RegExpAssertion::START_OF_INPUT; |
| 325 } |
| 326 |
| 327 |
| 328 bool RegExpAssertion::IsAnchoredAtEnd() const { |
| 329 return assertion_type() == RegExpAssertion::END_OF_INPUT; |
| 330 } |
| 331 |
| 332 |
| 333 void RegExpTree::AppendToText(RegExpText* text) { |
| 334 UNREACHABLE(); |
| 335 } |
| 336 |
| 337 |
| 338 void RegExpAtom::AppendToText(RegExpText* text) { |
| 339 text->AddElement(TextElement::Atom(this)); |
| 340 } |
| 341 |
| 342 |
| 343 void RegExpCharacterClass::AppendToText(RegExpText* text) { |
| 344 text->AddElement(TextElement::CharClass(this)); |
| 345 } |
| 346 |
| 347 |
| 348 void RegExpText::AppendToText(RegExpText* text) { |
| 349 for (intptr_t i = 0; i < elements()->length(); i++) |
| 350 text->AddElement((*elements())[i]); |
| 351 } |
| 352 |
| 353 |
| 354 Interval RegExpLookahead::CaptureRegisters() const { |
| 355 return body()->CaptureRegisters(); |
| 356 } |
| 357 |
| 358 |
| 359 bool RegExpLookahead::IsAnchoredAtStart() const { |
| 360 return is_positive() && body()->IsAnchoredAtStart(); |
| 361 } |
| 362 |
| 363 } // namespace dart |
OLD | NEW |