| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_REGEXP_JSREGEXP_H_ | 5 #ifndef V8_REGEXP_JSREGEXP_H_ |
| 6 #define V8_REGEXP_JSREGEXP_H_ | 6 #define V8_REGEXP_JSREGEXP_H_ |
| 7 | 7 |
| 8 #include "src/allocation.h" | 8 #include "src/allocation.h" |
| 9 #include "src/assembler.h" | 9 #include "src/assembler.h" |
| 10 #include "src/regexp/regexp-ast.h" |
| 10 | 11 |
| 11 namespace v8 { | 12 namespace v8 { |
| 12 namespace internal { | 13 namespace internal { |
| 13 | 14 |
| 14 class NodeVisitor; | 15 class NodeVisitor; |
| 15 class RegExpCompiler; | 16 class RegExpCompiler; |
| 16 class RegExpMacroAssembler; | 17 class RegExpMacroAssembler; |
| 17 class RegExpNode; | 18 class RegExpNode; |
| 18 class RegExpTree; | 19 class RegExpTree; |
| 19 class BoyerMooreLookahead; | 20 class BoyerMooreLookahead; |
| (...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 219 // Represents the location of one element relative to the intersection of | 220 // Represents the location of one element relative to the intersection of |
| 220 // two sets. Corresponds to the four areas of a Venn diagram. | 221 // two sets. Corresponds to the four areas of a Venn diagram. |
| 221 enum ElementInSetsRelation { | 222 enum ElementInSetsRelation { |
| 222 kInsideNone = 0, | 223 kInsideNone = 0, |
| 223 kInsideFirst = 1, | 224 kInsideFirst = 1, |
| 224 kInsideSecond = 2, | 225 kInsideSecond = 2, |
| 225 kInsideBoth = 3 | 226 kInsideBoth = 3 |
| 226 }; | 227 }; |
| 227 | 228 |
| 228 | 229 |
| 229 // Represents code units in the range from from_ to to_, both ends are | |
| 230 // inclusive. | |
| 231 class CharacterRange { | |
| 232 public: | |
| 233 CharacterRange() : from_(0), to_(0) { } | |
| 234 // For compatibility with the CHECK_OK macro | |
| 235 CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT | |
| 236 CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { } | |
| 237 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, | |
| 238 Zone* zone); | |
| 239 static Vector<const int> GetWordBounds(); | |
| 240 static inline CharacterRange Singleton(uc16 value) { | |
| 241 return CharacterRange(value, value); | |
| 242 } | |
| 243 static inline CharacterRange Range(uc16 from, uc16 to) { | |
| 244 DCHECK(from <= to); | |
| 245 return CharacterRange(from, to); | |
| 246 } | |
| 247 static inline CharacterRange Everything() { | |
| 248 return CharacterRange(0, 0xFFFF); | |
| 249 } | |
| 250 bool Contains(uc16 i) { return from_ <= i && i <= to_; } | |
| 251 uc16 from() const { return from_; } | |
| 252 void set_from(uc16 value) { from_ = value; } | |
| 253 uc16 to() const { return to_; } | |
| 254 void set_to(uc16 value) { to_ = value; } | |
| 255 bool is_valid() { return from_ <= to_; } | |
| 256 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } | |
| 257 bool IsSingleton() { return (from_ == to_); } | |
| 258 void AddCaseEquivalents(Isolate* isolate, Zone* zone, | |
| 259 ZoneList<CharacterRange>* ranges, bool is_one_byte); | |
| 260 static void Split(ZoneList<CharacterRange>* base, | |
| 261 Vector<const int> overlay, | |
| 262 ZoneList<CharacterRange>** included, | |
| 263 ZoneList<CharacterRange>** excluded, | |
| 264 Zone* zone); | |
| 265 // Whether a range list is in canonical form: Ranges ordered by from value, | |
| 266 // and ranges non-overlapping and non-adjacent. | |
| 267 static bool IsCanonical(ZoneList<CharacterRange>* ranges); | |
| 268 // Convert range list to canonical form. The characters covered by the ranges | |
| 269 // will still be the same, but no character is in more than one range, and | |
| 270 // adjacent ranges are merged. The resulting list may be shorter than the | |
| 271 // original, but cannot be longer. | |
| 272 static void Canonicalize(ZoneList<CharacterRange>* ranges); | |
| 273 // Negate the contents of a character range in canonical form. | |
| 274 static void Negate(ZoneList<CharacterRange>* src, | |
| 275 ZoneList<CharacterRange>* dst, | |
| 276 Zone* zone); | |
| 277 static const int kStartMarker = (1 << 24); | |
| 278 static const int kPayloadMask = (1 << 24) - 1; | |
| 279 | |
| 280 private: | |
| 281 uc16 from_; | |
| 282 uc16 to_; | |
| 283 }; | |
| 284 | |
| 285 | |
| 286 // A set of unsigned integers that behaves especially well on small | 230 // A set of unsigned integers that behaves especially well on small |
| 287 // integers (< 32). May do zone-allocation. | 231 // integers (< 32). May do zone-allocation. |
| 288 class OutSet: public ZoneObject { | 232 class OutSet: public ZoneObject { |
| 289 public: | 233 public: |
| 290 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } | 234 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } |
| 291 OutSet* Extend(unsigned value, Zone* zone); | 235 OutSet* Extend(unsigned value, Zone* zone); |
| 292 bool Get(unsigned value) const; | 236 bool Get(unsigned value) const; |
| 293 static const unsigned kFirstLimit = 32; | 237 static const unsigned kFirstLimit = 32; |
| 294 | 238 |
| 295 private: | 239 private: |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 373 | 317 |
| 374 #define FOR_EACH_NODE_TYPE(VISIT) \ | 318 #define FOR_EACH_NODE_TYPE(VISIT) \ |
| 375 VISIT(End) \ | 319 VISIT(End) \ |
| 376 VISIT(Action) \ | 320 VISIT(Action) \ |
| 377 VISIT(Choice) \ | 321 VISIT(Choice) \ |
| 378 VISIT(BackReference) \ | 322 VISIT(BackReference) \ |
| 379 VISIT(Assertion) \ | 323 VISIT(Assertion) \ |
| 380 VISIT(Text) | 324 VISIT(Text) |
| 381 | 325 |
| 382 | 326 |
| 383 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ | |
| 384 VISIT(Disjunction) \ | |
| 385 VISIT(Alternative) \ | |
| 386 VISIT(Assertion) \ | |
| 387 VISIT(CharacterClass) \ | |
| 388 VISIT(Atom) \ | |
| 389 VISIT(Quantifier) \ | |
| 390 VISIT(Capture) \ | |
| 391 VISIT(Lookaround) \ | |
| 392 VISIT(BackReference) \ | |
| 393 VISIT(Empty) \ | |
| 394 VISIT(Text) | |
| 395 | |
| 396 | |
| 397 #define FORWARD_DECLARE(Name) class RegExp##Name; | |
| 398 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) | |
| 399 #undef FORWARD_DECLARE | |
| 400 | |
| 401 | |
| 402 class TextElement final BASE_EMBEDDED { | |
| 403 public: | |
| 404 enum TextType { | |
| 405 ATOM, | |
| 406 CHAR_CLASS | |
| 407 }; | |
| 408 | |
| 409 static TextElement Atom(RegExpAtom* atom); | |
| 410 static TextElement CharClass(RegExpCharacterClass* char_class); | |
| 411 | |
| 412 int cp_offset() const { return cp_offset_; } | |
| 413 void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; } | |
| 414 int length() const; | |
| 415 | |
| 416 TextType text_type() const { return text_type_; } | |
| 417 | |
| 418 RegExpTree* tree() const { return tree_; } | |
| 419 | |
| 420 RegExpAtom* atom() const { | |
| 421 DCHECK(text_type() == ATOM); | |
| 422 return reinterpret_cast<RegExpAtom*>(tree()); | |
| 423 } | |
| 424 | |
| 425 RegExpCharacterClass* char_class() const { | |
| 426 DCHECK(text_type() == CHAR_CLASS); | |
| 427 return reinterpret_cast<RegExpCharacterClass*>(tree()); | |
| 428 } | |
| 429 | |
| 430 private: | |
| 431 TextElement(TextType text_type, RegExpTree* tree) | |
| 432 : cp_offset_(-1), text_type_(text_type), tree_(tree) {} | |
| 433 | |
| 434 int cp_offset_; | |
| 435 TextType text_type_; | |
| 436 RegExpTree* tree_; | |
| 437 }; | |
| 438 | |
| 439 | |
| 440 class Trace; | 327 class Trace; |
| 441 struct PreloadState; | 328 struct PreloadState; |
| 442 class GreedyLoopState; | 329 class GreedyLoopState; |
| 443 class AlternativeGenerationList; | 330 class AlternativeGenerationList; |
| 444 | 331 |
| 445 struct NodeInfo { | 332 struct NodeInfo { |
| 446 NodeInfo() | 333 NodeInfo() |
| 447 : being_analyzed(false), | 334 : being_analyzed(false), |
| 448 been_analyzed(false), | 335 been_analyzed(false), |
| 449 follows_word_interest(false), | 336 follows_word_interest(false), |
| (...skipping 231 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 681 // generated code is located unless the code is generated at the start of | 568 // generated code is located unless the code is generated at the start of |
| 682 // a trace, in which case it is generic and can be reused by flushing the | 569 // a trace, in which case it is generic and can be reused by flushing the |
| 683 // deferred operations in the current trace and generating a goto. | 570 // deferred operations in the current trace and generating a goto. |
| 684 int trace_count_; | 571 int trace_count_; |
| 685 BoyerMooreLookahead* bm_info_[2]; | 572 BoyerMooreLookahead* bm_info_[2]; |
| 686 | 573 |
| 687 Zone* zone_; | 574 Zone* zone_; |
| 688 }; | 575 }; |
| 689 | 576 |
| 690 | 577 |
| 691 // A simple closed interval. | |
| 692 class Interval { | |
| 693 public: | |
| 694 Interval() : from_(kNone), to_(kNone) { } | |
| 695 Interval(int from, int to) : from_(from), to_(to) { } | |
| 696 Interval Union(Interval that) { | |
| 697 if (that.from_ == kNone) | |
| 698 return *this; | |
| 699 else if (from_ == kNone) | |
| 700 return that; | |
| 701 else | |
| 702 return Interval(Min(from_, that.from_), Max(to_, that.to_)); | |
| 703 } | |
| 704 bool Contains(int value) { | |
| 705 return (from_ <= value) && (value <= to_); | |
| 706 } | |
| 707 bool is_empty() { return from_ == kNone; } | |
| 708 int from() const { return from_; } | |
| 709 int to() const { return to_; } | |
| 710 static Interval Empty() { return Interval(); } | |
| 711 static const int kNone = -1; | |
| 712 private: | |
| 713 int from_; | |
| 714 int to_; | |
| 715 }; | |
| 716 | |
| 717 | |
| 718 class SeqRegExpNode: public RegExpNode { | 578 class SeqRegExpNode: public RegExpNode { |
| 719 public: | 579 public: |
| 720 explicit SeqRegExpNode(RegExpNode* on_success) | 580 explicit SeqRegExpNode(RegExpNode* on_success) |
| 721 : RegExpNode(on_success->zone()), on_success_(on_success) { } | 581 : RegExpNode(on_success->zone()), on_success_(on_success) { } |
| 722 RegExpNode* on_success() { return on_success_; } | 582 RegExpNode* on_success() { return on_success_; } |
| 723 void set_on_success(RegExpNode* node) { on_success_ = node; } | 583 void set_on_success(RegExpNode* node) { on_success_ = node; } |
| 724 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); | 584 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
| 725 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, | 585 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, |
| 726 BoyerMooreLookahead* bm, bool not_at_start) { | 586 BoyerMooreLookahead* bm, bool not_at_start) { |
| 727 on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); | 587 on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); |
| (...skipping 950 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1678 static const int kStringOffset = 0; | 1538 static const int kStringOffset = 0; |
| 1679 static const int kPatternOffset = 1; | 1539 static const int kPatternOffset = 1; |
| 1680 static const int kArrayOffset = 2; | 1540 static const int kArrayOffset = 2; |
| 1681 static const int kLastMatchOffset = 3; | 1541 static const int kLastMatchOffset = 3; |
| 1682 }; | 1542 }; |
| 1683 | 1543 |
| 1684 } // namespace internal | 1544 } // namespace internal |
| 1685 } // namespace v8 | 1545 } // namespace v8 |
| 1686 | 1546 |
| 1687 #endif // V8_REGEXP_JSREGEXP_H_ | 1547 #endif // V8_REGEXP_JSREGEXP_H_ |
| OLD | NEW |