OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 #ifndef VM_REGEXP_AST_H_ |
| 6 #define VM_REGEXP_AST_H_ |
| 7 |
| 8 #include "platform/globals.h" |
| 9 #include "platform/utils.h" |
| 10 #include "vm/allocation.h" |
| 11 #include "vm/regexp.h" |
| 12 |
| 13 namespace dart { |
| 14 |
| 15 // Forward declarations. |
| 16 class RegExpAlternative; |
| 17 class RegExpAssertion; |
| 18 class RegExpAtom; |
| 19 class RegExpBackReference; |
| 20 class RegExpCapture; |
| 21 class RegExpCharacterClass; |
| 22 class RegExpCompiler; |
| 23 class RegExpDisjunction; |
| 24 class RegExpEmpty; |
| 25 class RegExpLookahead; |
| 26 class RegExpQuantifier; |
| 27 class RegExpText; |
| 28 |
| 29 |
| 30 class RegExpVisitor : public ValueObject { |
| 31 public: |
| 32 virtual ~RegExpVisitor() { } |
| 33 #define MAKE_CASE(Name) \ |
| 34 virtual void* Visit##Name(RegExp##Name*, void* data) = 0; |
| 35 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) |
| 36 #undef MAKE_CASE |
| 37 }; |
| 38 |
| 39 |
| 40 class RegExpTree : public ZoneAllocated { |
| 41 public: |
| 42 static const intptr_t kInfinity = kMaxInt32; |
| 43 virtual ~RegExpTree() {} |
| 44 virtual void* Accept(RegExpVisitor* visitor, void* data) = 0; |
| 45 virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
| 46 RegExpNode* on_success) = 0; |
| 47 virtual bool IsTextElement() const { return false; } |
| 48 virtual bool IsAnchoredAtStart() const { return false; } |
| 49 virtual bool IsAnchoredAtEnd() const { return false; } |
| 50 virtual intptr_t min_match() const = 0; |
| 51 virtual intptr_t max_match() const = 0; |
| 52 // Returns the interval of registers used for captures within this |
| 53 // expression. |
| 54 virtual Interval CaptureRegisters() const { return Interval::Empty(); } |
| 55 virtual void AppendToText(RegExpText* text); |
| 56 void Print(); |
| 57 #define MAKE_ASTYPE(Name) \ |
| 58 virtual RegExp##Name* As##Name(); \ |
| 59 virtual bool Is##Name() const; |
| 60 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE) |
| 61 #undef MAKE_ASTYPE |
| 62 }; |
| 63 |
| 64 |
| 65 class RegExpText : public RegExpTree { |
| 66 public: |
| 67 RegExpText() : elements_(2), length_(0) {} |
| 68 virtual void* Accept(RegExpVisitor* visitor, void* data); |
| 69 virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
| 70 RegExpNode* on_success); |
| 71 virtual RegExpText* AsText(); |
| 72 virtual bool IsText() const; |
| 73 virtual bool IsTextElement() const { return true; } |
| 74 virtual intptr_t min_match() const { return length_; } |
| 75 virtual intptr_t max_match() const { return length_; } |
| 76 virtual void AppendToText(RegExpText* text); |
| 77 void AddElement(TextElement elm) { |
| 78 elements_.Add(elm); |
| 79 length_ += elm.length(); |
| 80 } |
| 81 GrowableArray<TextElement>* elements() { return &elements_; } |
| 82 private: |
| 83 GrowableArray<TextElement> elements_; |
| 84 intptr_t length_; |
| 85 }; |
| 86 |
| 87 |
| 88 class RegExpQuantifier : public RegExpTree { |
| 89 public: |
| 90 enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE }; |
| 91 RegExpQuantifier(intptr_t min, intptr_t max, |
| 92 QuantifierType type, RegExpTree* body) |
| 93 : body_(body), |
| 94 min_(min), |
| 95 max_(max), |
| 96 min_match_(min * body->min_match()), |
| 97 quantifier_type_(type) { |
| 98 if (max > 0 && body->max_match() > kInfinity / max) { |
| 99 max_match_ = kInfinity; |
| 100 } else { |
| 101 max_match_ = max * body->max_match(); |
| 102 } |
| 103 } |
| 104 virtual void* Accept(RegExpVisitor* visitor, void* data); |
| 105 virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
| 106 RegExpNode* on_success); |
| 107 static RegExpNode* ToNode(intptr_t min, |
| 108 intptr_t max, |
| 109 bool is_greedy, |
| 110 RegExpTree* body, |
| 111 RegExpCompiler* compiler, |
| 112 RegExpNode* on_success, |
| 113 bool not_at_start = false); |
| 114 virtual RegExpQuantifier* AsQuantifier(); |
| 115 virtual Interval CaptureRegisters() const; |
| 116 virtual bool IsQuantifier() const; |
| 117 virtual intptr_t min_match() const { return min_match_; } |
| 118 virtual intptr_t max_match() const { return max_match_; } |
| 119 intptr_t min() const { return min_; } |
| 120 intptr_t max() const { return max_; } |
| 121 bool is_possessive() const { return quantifier_type_ == POSSESSIVE; } |
| 122 bool is_non_greedy() const { return quantifier_type_ == NON_GREEDY; } |
| 123 bool is_greedy() const { return quantifier_type_ == GREEDY; } |
| 124 RegExpTree* body() const { return body_; } |
| 125 |
| 126 private: |
| 127 RegExpTree* body_; |
| 128 intptr_t min_; |
| 129 intptr_t max_; |
| 130 intptr_t min_match_; |
| 131 intptr_t max_match_; |
| 132 QuantifierType quantifier_type_; |
| 133 }; |
| 134 |
| 135 |
| 136 class RegExpCapture : public RegExpTree { |
| 137 public: |
| 138 explicit RegExpCapture(RegExpTree* body, intptr_t index) |
| 139 : body_(body), index_(index) { } |
| 140 virtual void* Accept(RegExpVisitor* visitor, void* data); |
| 141 virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
| 142 RegExpNode* on_success); |
| 143 static RegExpNode* ToNode(RegExpTree* body, |
| 144 intptr_t index, |
| 145 RegExpCompiler* compiler, |
| 146 RegExpNode* on_success); |
| 147 virtual RegExpCapture* AsCapture(); |
| 148 virtual bool IsAnchoredAtStart() const; |
| 149 virtual bool IsAnchoredAtEnd() const; |
| 150 virtual Interval CaptureRegisters() const; |
| 151 virtual bool IsCapture() const; |
| 152 virtual intptr_t min_match() const { return body_->min_match(); } |
| 153 virtual intptr_t max_match() const { return body_->max_match(); } |
| 154 RegExpTree* body() const { return body_; } |
| 155 intptr_t index() const { return index_; } |
| 156 static intptr_t StartRegister(intptr_t index) { return index * 2; } |
| 157 static intptr_t EndRegister(intptr_t index) { return index * 2 + 1; } |
| 158 |
| 159 private: |
| 160 RegExpTree* body_; |
| 161 intptr_t index_; |
| 162 }; |
| 163 |
| 164 |
| 165 class RegExpDisjunction : public RegExpTree { |
| 166 public: |
| 167 explicit RegExpDisjunction(ZoneGrowableArray<RegExpTree*>* alternatives); |
| 168 virtual void* Accept(RegExpVisitor* visitor, void* data); |
| 169 virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
| 170 RegExpNode* on_success); |
| 171 virtual RegExpDisjunction* AsDisjunction(); |
| 172 virtual Interval CaptureRegisters() const; |
| 173 virtual bool IsDisjunction() const; |
| 174 virtual bool IsAnchoredAtStart() const; |
| 175 virtual bool IsAnchoredAtEnd() const; |
| 176 virtual intptr_t min_match() const { return min_match_; } |
| 177 virtual intptr_t max_match() const { return max_match_; } |
| 178 ZoneGrowableArray<RegExpTree*>* alternatives() const { return alternatives_; } |
| 179 private: |
| 180 ZoneGrowableArray<RegExpTree*>* alternatives_; |
| 181 intptr_t min_match_; |
| 182 intptr_t max_match_; |
| 183 }; |
| 184 |
| 185 |
| 186 class RegExpAlternative : public RegExpTree { |
| 187 public: |
| 188 explicit RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes); |
| 189 virtual void* Accept(RegExpVisitor* visitor, void* data); |
| 190 virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
| 191 RegExpNode* on_success); |
| 192 virtual RegExpAlternative* AsAlternative(); |
| 193 virtual Interval CaptureRegisters() const; |
| 194 virtual bool IsAlternative() const; |
| 195 virtual bool IsAnchoredAtStart() const; |
| 196 virtual bool IsAnchoredAtEnd() const; |
| 197 virtual intptr_t min_match() const { return min_match_; } |
| 198 virtual intptr_t max_match() const { return max_match_; } |
| 199 ZoneGrowableArray<RegExpTree*>* nodes() const { return nodes_; } |
| 200 private: |
| 201 ZoneGrowableArray<RegExpTree*>* nodes_; |
| 202 intptr_t min_match_; |
| 203 intptr_t max_match_; |
| 204 }; |
| 205 |
| 206 |
| 207 class RegExpAssertion : public RegExpTree { |
| 208 public: |
| 209 enum AssertionType { |
| 210 START_OF_LINE, |
| 211 START_OF_INPUT, |
| 212 END_OF_LINE, |
| 213 END_OF_INPUT, |
| 214 BOUNDARY, |
| 215 NON_BOUNDARY |
| 216 }; |
| 217 explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { } |
| 218 virtual void* Accept(RegExpVisitor* visitor, void* data); |
| 219 virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
| 220 RegExpNode* on_success); |
| 221 virtual RegExpAssertion* AsAssertion(); |
| 222 virtual bool IsAssertion() const; |
| 223 virtual bool IsAnchoredAtStart() const; |
| 224 virtual bool IsAnchoredAtEnd() const; |
| 225 virtual intptr_t min_match() const { return 0; } |
| 226 virtual intptr_t max_match() const { return 0; } |
| 227 AssertionType assertion_type() const { return assertion_type_; } |
| 228 private: |
| 229 AssertionType assertion_type_; |
| 230 }; |
| 231 |
| 232 |
| 233 class CharacterSet : public ValueObject { |
| 234 public: |
| 235 explicit CharacterSet(uint16_t standard_set_type) |
| 236 : ranges_(NULL), |
| 237 standard_set_type_(standard_set_type) {} |
| 238 explicit CharacterSet(ZoneGrowableArray<CharacterRange>* ranges) |
| 239 : ranges_(ranges), |
| 240 standard_set_type_(0) {} |
| 241 CharacterSet(const CharacterSet& that) |
| 242 : ValueObject(), |
| 243 ranges_(that.ranges_), |
| 244 standard_set_type_(that.standard_set_type_) {} |
| 245 ZoneGrowableArray<CharacterRange>* ranges(); |
| 246 uint16_t standard_set_type() const { return standard_set_type_; } |
| 247 void set_standard_set_type(uint16_t special_set_type) { |
| 248 standard_set_type_ = special_set_type; |
| 249 } |
| 250 bool is_standard() { return standard_set_type_ != 0; } |
| 251 void Canonicalize(); |
| 252 private: |
| 253 ZoneGrowableArray<CharacterRange>* ranges_; |
| 254 // If non-zero, the value represents a standard set (e.g., all whitespace |
| 255 // characters) without having to expand the ranges. |
| 256 uint16_t standard_set_type_; |
| 257 }; |
| 258 |
| 259 |
| 260 class RegExpCharacterClass : public RegExpTree { |
| 261 public: |
| 262 RegExpCharacterClass(ZoneGrowableArray<CharacterRange>* ranges, |
| 263 bool is_negated) |
| 264 : set_(ranges), |
| 265 is_negated_(is_negated) { } |
| 266 explicit RegExpCharacterClass(uint16_t type) |
| 267 : set_(type), |
| 268 is_negated_(false) { } |
| 269 virtual void* Accept(RegExpVisitor* visitor, void* data); |
| 270 virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
| 271 RegExpNode* on_success); |
| 272 virtual RegExpCharacterClass* AsCharacterClass(); |
| 273 virtual bool IsCharacterClass() const; |
| 274 virtual bool IsTextElement() const { return true; } |
| 275 virtual intptr_t min_match() const { return 1; } |
| 276 virtual intptr_t max_match() const { return 1; } |
| 277 virtual void AppendToText(RegExpText* text); |
| 278 CharacterSet character_set() const { return set_; } |
| 279 // TODO(lrn): Remove need for complex version if is_standard that |
| 280 // recognizes a mangled standard set and just do { return set_.is_special(); } |
| 281 bool is_standard(); |
| 282 // Returns a value representing the standard character set if is_standard() |
| 283 // returns true. |
| 284 // Currently used values are: |
| 285 // s : unicode whitespace |
| 286 // S : unicode non-whitespace |
| 287 // w : ASCII word character (digit, letter, underscore) |
| 288 // W : non-ASCII word character |
| 289 // d : ASCII digit |
| 290 // D : non-ASCII digit |
| 291 // . : non-unicode non-newline |
| 292 // * : All characters |
| 293 uint16_t standard_type() const { return set_.standard_set_type(); } |
| 294 ZoneGrowableArray<CharacterRange>* ranges() { |
| 295 return set_.ranges(); |
| 296 } |
| 297 bool is_negated() const { return is_negated_; } |
| 298 |
| 299 private: |
| 300 CharacterSet set_; |
| 301 bool is_negated_; |
| 302 }; |
| 303 |
| 304 |
| 305 class RegExpAtom : public RegExpTree { |
| 306 public: |
| 307 explicit RegExpAtom(ZoneGrowableArray<uint16_t>* data) : data_(data) { } |
| 308 virtual void* Accept(RegExpVisitor* visitor, void* data); |
| 309 virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
| 310 RegExpNode* on_success); |
| 311 virtual RegExpAtom* AsAtom(); |
| 312 virtual bool IsAtom() const; |
| 313 virtual bool IsTextElement() const { return true; } |
| 314 virtual intptr_t min_match() const { return data_->length(); } |
| 315 virtual intptr_t max_match() const { return data_->length(); } |
| 316 virtual void AppendToText(RegExpText* text); |
| 317 ZoneGrowableArray<uint16_t>* data() const { return data_; } |
| 318 intptr_t length() const { return data_->length(); } |
| 319 private: |
| 320 ZoneGrowableArray<uint16_t>* data_; |
| 321 }; |
| 322 |
| 323 |
| 324 class RegExpLookahead : public RegExpTree { |
| 325 public: |
| 326 RegExpLookahead(RegExpTree* body, |
| 327 bool is_positive, |
| 328 intptr_t capture_count, |
| 329 intptr_t capture_from) |
| 330 : body_(body), |
| 331 is_positive_(is_positive), |
| 332 capture_count_(capture_count), |
| 333 capture_from_(capture_from) { } |
| 334 |
| 335 virtual void* Accept(RegExpVisitor* visitor, void* data); |
| 336 virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
| 337 RegExpNode* on_success); |
| 338 virtual RegExpLookahead* AsLookahead(); |
| 339 virtual Interval CaptureRegisters() const; |
| 340 virtual bool IsLookahead() const; |
| 341 virtual bool IsAnchoredAtStart() const; |
| 342 virtual intptr_t min_match() const { return 0; } |
| 343 virtual intptr_t max_match() const { return 0; } |
| 344 RegExpTree* body() const { return body_; } |
| 345 bool is_positive() const { return is_positive_; } |
| 346 intptr_t capture_count() const { return capture_count_; } |
| 347 intptr_t capture_from() const { return capture_from_; } |
| 348 |
| 349 private: |
| 350 RegExpTree* body_; |
| 351 bool is_positive_; |
| 352 intptr_t capture_count_; |
| 353 intptr_t capture_from_; |
| 354 }; |
| 355 |
| 356 |
| 357 class RegExpBackReference : public RegExpTree { |
| 358 public: |
| 359 explicit RegExpBackReference(RegExpCapture* capture) |
| 360 : capture_(capture) { } |
| 361 virtual void* Accept(RegExpVisitor* visitor, void* data); |
| 362 virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
| 363 RegExpNode* on_success); |
| 364 virtual RegExpBackReference* AsBackReference(); |
| 365 virtual bool IsBackReference() const; |
| 366 virtual intptr_t min_match() const { return 0; } |
| 367 virtual intptr_t max_match() const { return capture_->max_match(); } |
| 368 intptr_t index() const { return capture_->index(); } |
| 369 RegExpCapture* capture() const { return capture_; } |
| 370 private: |
| 371 RegExpCapture* capture_; |
| 372 }; |
| 373 |
| 374 |
| 375 class RegExpEmpty : public RegExpTree { |
| 376 public: |
| 377 RegExpEmpty() { } |
| 378 virtual void* Accept(RegExpVisitor* visitor, void* data); |
| 379 virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
| 380 RegExpNode* on_success); |
| 381 virtual RegExpEmpty* AsEmpty(); |
| 382 virtual bool IsEmpty() const; |
| 383 virtual intptr_t min_match() const { return 0; } |
| 384 virtual intptr_t max_match() const { return 0; } |
| 385 static RegExpEmpty* GetInstance() { |
| 386 static RegExpEmpty* instance = ::new RegExpEmpty(); |
| 387 return instance; |
| 388 } |
| 389 }; |
| 390 |
| 391 } // namespace dart |
| 392 |
| 393 #endif // VM_REGEXP_AST_H_ |
OLD | NEW |