| OLD | NEW |
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #include "vm/regexp_parser.h" |
| 5 #include "vm/longjump.h" | 6 #include "vm/longjump.h" |
| 6 #include "vm/object_store.h" | 7 #include "vm/object_store.h" |
| 7 #include "vm/regexp_parser.h" | |
| 8 | 8 |
| 9 namespace dart { | 9 namespace dart { |
| 10 | 10 |
| 11 #define Z zone() | 11 #define Z zone() |
| 12 | 12 |
| 13 // Enables possessive quantifier syntax for testing. | 13 // Enables possessive quantifier syntax for testing. |
| 14 static const bool FLAG_regexp_possessive_quantifier = false; | 14 static const bool FLAG_regexp_possessive_quantifier = false; |
| 15 | 15 |
| 16 RegExpBuilder::RegExpBuilder() | 16 RegExpBuilder::RegExpBuilder() |
| 17 : zone_(Thread::Current()->zone()), | 17 : zone_(Thread::Current()->zone()), |
| 18 pending_empty_(false), | 18 pending_empty_(false), |
| 19 characters_(NULL), | 19 characters_(NULL), |
| 20 terms_(), | 20 terms_(), |
| 21 text_(), | 21 text_(), |
| 22 alternatives_() | 22 alternatives_() |
| 23 #ifdef DEBUG | 23 #ifdef DEBUG |
| 24 , | 24 , |
| 25 last_added_(ADD_NONE) | 25 last_added_(ADD_NONE) |
| 26 #endif | 26 #endif |
| 27 { | 27 { |
| 28 } | 28 } |
| 29 | 29 |
| 30 | |
| 31 void RegExpBuilder::FlushCharacters() { | 30 void RegExpBuilder::FlushCharacters() { |
| 32 pending_empty_ = false; | 31 pending_empty_ = false; |
| 33 if (characters_ != NULL) { | 32 if (characters_ != NULL) { |
| 34 RegExpTree* atom = new (Z) RegExpAtom(characters_); | 33 RegExpTree* atom = new (Z) RegExpAtom(characters_); |
| 35 characters_ = NULL; | 34 characters_ = NULL; |
| 36 text_.Add(atom); | 35 text_.Add(atom); |
| 37 LAST(ADD_ATOM); | 36 LAST(ADD_ATOM); |
| 38 } | 37 } |
| 39 } | 38 } |
| 40 | 39 |
| 41 | |
| 42 void RegExpBuilder::FlushText() { | 40 void RegExpBuilder::FlushText() { |
| 43 FlushCharacters(); | 41 FlushCharacters(); |
| 44 intptr_t num_text = text_.length(); | 42 intptr_t num_text = text_.length(); |
| 45 if (num_text == 0) { | 43 if (num_text == 0) { |
| 46 return; | 44 return; |
| 47 } else if (num_text == 1) { | 45 } else if (num_text == 1) { |
| 48 terms_.Add(text_.Last()); | 46 terms_.Add(text_.Last()); |
| 49 } else { | 47 } else { |
| 50 RegExpText* text = new (Z) RegExpText(); | 48 RegExpText* text = new (Z) RegExpText(); |
| 51 for (intptr_t i = 0; i < num_text; i++) | 49 for (intptr_t i = 0; i < num_text; i++) |
| 52 text_[i]->AppendToText(text); | 50 text_[i]->AppendToText(text); |
| 53 terms_.Add(text); | 51 terms_.Add(text); |
| 54 } | 52 } |
| 55 text_.Clear(); | 53 text_.Clear(); |
| 56 } | 54 } |
| 57 | 55 |
| 58 | |
| 59 void RegExpBuilder::AddCharacter(uint16_t c) { | 56 void RegExpBuilder::AddCharacter(uint16_t c) { |
| 60 pending_empty_ = false; | 57 pending_empty_ = false; |
| 61 if (characters_ == NULL) { | 58 if (characters_ == NULL) { |
| 62 characters_ = new (Z) ZoneGrowableArray<uint16_t>(4); | 59 characters_ = new (Z) ZoneGrowableArray<uint16_t>(4); |
| 63 } | 60 } |
| 64 characters_->Add(c); | 61 characters_->Add(c); |
| 65 LAST(ADD_CHAR); | 62 LAST(ADD_CHAR); |
| 66 } | 63 } |
| 67 | 64 |
| 68 | |
| 69 void RegExpBuilder::AddEmpty() { | 65 void RegExpBuilder::AddEmpty() { |
| 70 pending_empty_ = true; | 66 pending_empty_ = true; |
| 71 } | 67 } |
| 72 | 68 |
| 73 | |
| 74 void RegExpBuilder::AddAtom(RegExpTree* term) { | 69 void RegExpBuilder::AddAtom(RegExpTree* term) { |
| 75 if (term->IsEmpty()) { | 70 if (term->IsEmpty()) { |
| 76 AddEmpty(); | 71 AddEmpty(); |
| 77 return; | 72 return; |
| 78 } | 73 } |
| 79 if (term->IsTextElement()) { | 74 if (term->IsTextElement()) { |
| 80 FlushCharacters(); | 75 FlushCharacters(); |
| 81 text_.Add(term); | 76 text_.Add(term); |
| 82 } else { | 77 } else { |
| 83 FlushText(); | 78 FlushText(); |
| 84 terms_.Add(term); | 79 terms_.Add(term); |
| 85 } | 80 } |
| 86 LAST(ADD_ATOM); | 81 LAST(ADD_ATOM); |
| 87 } | 82 } |
| 88 | 83 |
| 89 | |
| 90 void RegExpBuilder::AddAssertion(RegExpTree* assert) { | 84 void RegExpBuilder::AddAssertion(RegExpTree* assert) { |
| 91 FlushText(); | 85 FlushText(); |
| 92 terms_.Add(assert); | 86 terms_.Add(assert); |
| 93 LAST(ADD_ASSERT); | 87 LAST(ADD_ASSERT); |
| 94 } | 88 } |
| 95 | 89 |
| 96 | |
| 97 void RegExpBuilder::NewAlternative() { | 90 void RegExpBuilder::NewAlternative() { |
| 98 FlushTerms(); | 91 FlushTerms(); |
| 99 } | 92 } |
| 100 | 93 |
| 101 | |
| 102 void RegExpBuilder::FlushTerms() { | 94 void RegExpBuilder::FlushTerms() { |
| 103 FlushText(); | 95 FlushText(); |
| 104 intptr_t num_terms = terms_.length(); | 96 intptr_t num_terms = terms_.length(); |
| 105 RegExpTree* alternative; | 97 RegExpTree* alternative; |
| 106 if (num_terms == 0) { | 98 if (num_terms == 0) { |
| 107 alternative = RegExpEmpty::GetInstance(); | 99 alternative = RegExpEmpty::GetInstance(); |
| 108 } else if (num_terms == 1) { | 100 } else if (num_terms == 1) { |
| 109 alternative = terms_.Last(); | 101 alternative = terms_.Last(); |
| 110 } else { | 102 } else { |
| 111 ZoneGrowableArray<RegExpTree*>* terms = | 103 ZoneGrowableArray<RegExpTree*>* terms = |
| 112 new (Z) ZoneGrowableArray<RegExpTree*>(); | 104 new (Z) ZoneGrowableArray<RegExpTree*>(); |
| 113 for (intptr_t i = 0; i < terms_.length(); i++) { | 105 for (intptr_t i = 0; i < terms_.length(); i++) { |
| 114 terms->Add(terms_[i]); | 106 terms->Add(terms_[i]); |
| 115 } | 107 } |
| 116 alternative = new (Z) RegExpAlternative(terms); | 108 alternative = new (Z) RegExpAlternative(terms); |
| 117 } | 109 } |
| 118 alternatives_.Add(alternative); | 110 alternatives_.Add(alternative); |
| 119 terms_.Clear(); | 111 terms_.Clear(); |
| 120 LAST(ADD_NONE); | 112 LAST(ADD_NONE); |
| 121 } | 113 } |
| 122 | 114 |
| 123 | |
| 124 RegExpTree* RegExpBuilder::ToRegExp() { | 115 RegExpTree* RegExpBuilder::ToRegExp() { |
| 125 FlushTerms(); | 116 FlushTerms(); |
| 126 intptr_t num_alternatives = alternatives_.length(); | 117 intptr_t num_alternatives = alternatives_.length(); |
| 127 if (num_alternatives == 0) { | 118 if (num_alternatives == 0) { |
| 128 return RegExpEmpty::GetInstance(); | 119 return RegExpEmpty::GetInstance(); |
| 129 } | 120 } |
| 130 if (num_alternatives == 1) { | 121 if (num_alternatives == 1) { |
| 131 return alternatives_.Last(); | 122 return alternatives_.Last(); |
| 132 } | 123 } |
| 133 ZoneGrowableArray<RegExpTree*>* alternatives = | 124 ZoneGrowableArray<RegExpTree*>* alternatives = |
| 134 new (Z) ZoneGrowableArray<RegExpTree*>(); | 125 new (Z) ZoneGrowableArray<RegExpTree*>(); |
| 135 for (intptr_t i = 0; i < alternatives_.length(); i++) { | 126 for (intptr_t i = 0; i < alternatives_.length(); i++) { |
| 136 alternatives->Add(alternatives_[i]); | 127 alternatives->Add(alternatives_[i]); |
| 137 } | 128 } |
| 138 return new (Z) RegExpDisjunction(alternatives); | 129 return new (Z) RegExpDisjunction(alternatives); |
| 139 } | 130 } |
| 140 | 131 |
| 141 | |
| 142 void RegExpBuilder::AddQuantifierToAtom( | 132 void RegExpBuilder::AddQuantifierToAtom( |
| 143 intptr_t min, | 133 intptr_t min, |
| 144 intptr_t max, | 134 intptr_t max, |
| 145 RegExpQuantifier::QuantifierType quantifier_type) { | 135 RegExpQuantifier::QuantifierType quantifier_type) { |
| 146 if (pending_empty_) { | 136 if (pending_empty_) { |
| 147 pending_empty_ = false; | 137 pending_empty_ = false; |
| 148 return; | 138 return; |
| 149 } | 139 } |
| 150 RegExpTree* atom; | 140 RegExpTree* atom; |
| 151 if (characters_ != NULL) { | 141 if (characters_ != NULL) { |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 208 capture_count_(0), | 198 capture_count_(0), |
| 209 has_more_(true), | 199 has_more_(true), |
| 210 multiline_(multiline), | 200 multiline_(multiline), |
| 211 simple_(false), | 201 simple_(false), |
| 212 contains_anchor_(false), | 202 contains_anchor_(false), |
| 213 is_scanned_for_captures_(false), | 203 is_scanned_for_captures_(false), |
| 214 failed_(false) { | 204 failed_(false) { |
| 215 Advance(); | 205 Advance(); |
| 216 } | 206 } |
| 217 | 207 |
| 218 | |
| 219 uint32_t RegExpParser::Next() { | 208 uint32_t RegExpParser::Next() { |
| 220 if (has_next()) { | 209 if (has_next()) { |
| 221 return in().CharAt(next_pos_); | 210 return in().CharAt(next_pos_); |
| 222 } else { | 211 } else { |
| 223 return kEndMarker; | 212 return kEndMarker; |
| 224 } | 213 } |
| 225 } | 214 } |
| 226 | 215 |
| 227 | |
| 228 void RegExpParser::Advance() { | 216 void RegExpParser::Advance() { |
| 229 if (next_pos_ < in().Length()) { | 217 if (next_pos_ < in().Length()) { |
| 230 current_ = in().CharAt(next_pos_); | 218 current_ = in().CharAt(next_pos_); |
| 231 next_pos_++; | 219 next_pos_++; |
| 232 } else { | 220 } else { |
| 233 current_ = kEndMarker; | 221 current_ = kEndMarker; |
| 234 has_more_ = false; | 222 has_more_ = false; |
| 235 } | 223 } |
| 236 } | 224 } |
| 237 | 225 |
| 238 | |
| 239 void RegExpParser::Reset(intptr_t pos) { | 226 void RegExpParser::Reset(intptr_t pos) { |
| 240 next_pos_ = pos; | 227 next_pos_ = pos; |
| 241 has_more_ = (pos < in().Length()); | 228 has_more_ = (pos < in().Length()); |
| 242 Advance(); | 229 Advance(); |
| 243 } | 230 } |
| 244 | 231 |
| 245 | |
| 246 void RegExpParser::Advance(intptr_t dist) { | 232 void RegExpParser::Advance(intptr_t dist) { |
| 247 next_pos_ += dist - 1; | 233 next_pos_ += dist - 1; |
| 248 Advance(); | 234 Advance(); |
| 249 } | 235 } |
| 250 | 236 |
| 251 | |
| 252 bool RegExpParser::simple() { | 237 bool RegExpParser::simple() { |
| 253 return simple_; | 238 return simple_; |
| 254 } | 239 } |
| 255 | 240 |
| 256 | |
| 257 void RegExpParser::ReportError(const char* message) { | 241 void RegExpParser::ReportError(const char* message) { |
| 258 failed_ = true; | 242 failed_ = true; |
| 259 *error_ = String::New(message); | 243 *error_ = String::New(message); |
| 260 // Zip to the end to make sure the no more input is read. | 244 // Zip to the end to make sure the no more input is read. |
| 261 current_ = kEndMarker; | 245 current_ = kEndMarker; |
| 262 next_pos_ = in().Length(); | 246 next_pos_ = in().Length(); |
| 263 | 247 |
| 264 const Error& error = Error::Handle(LanguageError::New(*error_)); | 248 const Error& error = Error::Handle(LanguageError::New(*error_)); |
| 265 Report::LongJump(error); | 249 Report::LongJump(error); |
| 266 UNREACHABLE(); | 250 UNREACHABLE(); |
| 267 } | 251 } |
| 268 | 252 |
| 269 | |
| 270 // Pattern :: | 253 // Pattern :: |
| 271 // Disjunction | 254 // Disjunction |
| 272 RegExpTree* RegExpParser::ParsePattern() { | 255 RegExpTree* RegExpParser::ParsePattern() { |
| 273 RegExpTree* result = ParseDisjunction(); | 256 RegExpTree* result = ParseDisjunction(); |
| 274 ASSERT(!has_more()); | 257 ASSERT(!has_more()); |
| 275 // If the result of parsing is a literal string atom, and it has the | 258 // If the result of parsing is a literal string atom, and it has the |
| 276 // same length as the input, then the atom is identical to the input. | 259 // same length as the input, then the atom is identical to the input. |
| 277 if (result->IsAtom() && result->AsAtom()->length() == in().Length()) { | 260 if (result->IsAtom() && result->AsAtom()->length() == in().Length()) { |
| 278 simple_ = true; | 261 simple_ = true; |
| 279 } | 262 } |
| 280 return result; | 263 return result; |
| 281 } | 264 } |
| 282 | 265 |
| 283 | |
| 284 // Disjunction :: | 266 // Disjunction :: |
| 285 // Alternative | 267 // Alternative |
| 286 // Alternative | Disjunction | 268 // Alternative | Disjunction |
| 287 // Alternative :: | 269 // Alternative :: |
| 288 // [empty] | 270 // [empty] |
| 289 // Term Alternative | 271 // Term Alternative |
| 290 // Term :: | 272 // Term :: |
| 291 // Assertion | 273 // Assertion |
| 292 // Atom | 274 // Atom |
| 293 // Atom Quantifier | 275 // Atom Quantifier |
| (...skipping 331 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 625 Advance(); | 607 Advance(); |
| 626 } else if (FLAG_regexp_possessive_quantifier && current() == '+') { | 608 } else if (FLAG_regexp_possessive_quantifier && current() == '+') { |
| 627 // FLAG_regexp_possessive_quantifier is a debug-only flag. | 609 // FLAG_regexp_possessive_quantifier is a debug-only flag. |
| 628 quantifier_type = RegExpQuantifier::POSSESSIVE; | 610 quantifier_type = RegExpQuantifier::POSSESSIVE; |
| 629 Advance(); | 611 Advance(); |
| 630 } | 612 } |
| 631 builder->AddQuantifierToAtom(min, max, quantifier_type); | 613 builder->AddQuantifierToAtom(min, max, quantifier_type); |
| 632 } | 614 } |
| 633 } | 615 } |
| 634 | 616 |
| 635 | |
| 636 #ifdef DEBUG | 617 #ifdef DEBUG |
| 637 // Currently only used in an ASSERT. | 618 // Currently only used in an ASSERT. |
| 638 static bool IsSpecialClassEscape(uint32_t c) { | 619 static bool IsSpecialClassEscape(uint32_t c) { |
| 639 switch (c) { | 620 switch (c) { |
| 640 case 'd': | 621 case 'd': |
| 641 case 'D': | 622 case 'D': |
| 642 case 's': | 623 case 's': |
| 643 case 'S': | 624 case 'S': |
| 644 case 'w': | 625 case 'w': |
| 645 case 'W': | 626 case 'W': |
| 646 return true; | 627 return true; |
| 647 default: | 628 default: |
| 648 return false; | 629 return false; |
| 649 } | 630 } |
| 650 } | 631 } |
| 651 #endif | 632 #endif |
| 652 | 633 |
| 653 | |
| 654 // In order to know whether an escape is a backreference or not we have to scan | 634 // In order to know whether an escape is a backreference or not we have to scan |
| 655 // the entire regexp and find the number of capturing parentheses. However we | 635 // the entire regexp and find the number of capturing parentheses. However we |
| 656 // don't want to scan the regexp twice unless it is necessary. This mini-parser | 636 // don't want to scan the regexp twice unless it is necessary. This mini-parser |
| 657 // is called when needed. It can see the difference between capturing and | 637 // is called when needed. It can see the difference between capturing and |
| 658 // noncapturing parentheses and can skip character classes and backslash-escaped | 638 // noncapturing parentheses and can skip character classes and backslash-escaped |
| 659 // characters. | 639 // characters. |
| 660 void RegExpParser::ScanForCaptures() { | 640 void RegExpParser::ScanForCaptures() { |
| 661 // Start with captures started previous to current position | 641 // Start with captures started previous to current position |
| 662 intptr_t capture_count = captures_started(); | 642 intptr_t capture_count = captures_started(); |
| 663 // Add count of captures after this position. | 643 // Add count of captures after this position. |
| (...skipping 18 matching lines...) Expand all Loading... |
| 682 } | 662 } |
| 683 case '(': | 663 case '(': |
| 684 if (current() != '?') capture_count++; | 664 if (current() != '?') capture_count++; |
| 685 break; | 665 break; |
| 686 } | 666 } |
| 687 } | 667 } |
| 688 capture_count_ = capture_count; | 668 capture_count_ = capture_count; |
| 689 is_scanned_for_captures_ = true; | 669 is_scanned_for_captures_ = true; |
| 690 } | 670 } |
| 691 | 671 |
| 692 | |
| 693 static inline bool IsDecimalDigit(int32_t c) { | 672 static inline bool IsDecimalDigit(int32_t c) { |
| 694 return '0' <= c && c <= '9'; | 673 return '0' <= c && c <= '9'; |
| 695 } | 674 } |
| 696 | 675 |
| 697 | |
| 698 bool RegExpParser::ParseBackReferenceIndex(intptr_t* index_out) { | 676 bool RegExpParser::ParseBackReferenceIndex(intptr_t* index_out) { |
| 699 ASSERT('\\' == current()); | 677 ASSERT('\\' == current()); |
| 700 ASSERT('1' <= Next() && Next() <= '9'); | 678 ASSERT('1' <= Next() && Next() <= '9'); |
| 701 // Try to parse a decimal literal that is no greater than the total number | 679 // Try to parse a decimal literal that is no greater than the total number |
| 702 // of left capturing parentheses in the input. | 680 // of left capturing parentheses in the input. |
| 703 intptr_t start = position(); | 681 intptr_t start = position(); |
| 704 intptr_t value = Next() - '0'; | 682 intptr_t value = Next() - '0'; |
| 705 Advance(2); | 683 Advance(2); |
| 706 while (true) { | 684 while (true) { |
| 707 uint32_t c = current(); | 685 uint32_t c = current(); |
| (...skipping 16 matching lines...) Expand all Loading... |
| 724 } | 702 } |
| 725 if (value > capture_count_) { | 703 if (value > capture_count_) { |
| 726 Reset(start); | 704 Reset(start); |
| 727 return false; | 705 return false; |
| 728 } | 706 } |
| 729 } | 707 } |
| 730 *index_out = value; | 708 *index_out = value; |
| 731 return true; | 709 return true; |
| 732 } | 710 } |
| 733 | 711 |
| 734 | |
| 735 // QuantifierPrefix :: | 712 // QuantifierPrefix :: |
| 736 // { DecimalDigits } | 713 // { DecimalDigits } |
| 737 // { DecimalDigits , } | 714 // { DecimalDigits , } |
| 738 // { DecimalDigits , DecimalDigits } | 715 // { DecimalDigits , DecimalDigits } |
| 739 // | 716 // |
| 740 // Returns true if parsing succeeds, and set the min_out and max_out | 717 // Returns true if parsing succeeds, and set the min_out and max_out |
| 741 // values. Values are truncated to RegExpTree::kInfinity if they overflow. | 718 // values. Values are truncated to RegExpTree::kInfinity if they overflow. |
| 742 bool RegExpParser::ParseIntervalQuantifier(intptr_t* min_out, | 719 bool RegExpParser::ParseIntervalQuantifier(intptr_t* min_out, |
| 743 intptr_t* max_out) { | 720 intptr_t* max_out) { |
| 744 ASSERT(current() == '{'); | 721 ASSERT(current() == '{'); |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 792 } | 769 } |
| 793 } else { | 770 } else { |
| 794 Reset(start); | 771 Reset(start); |
| 795 return false; | 772 return false; |
| 796 } | 773 } |
| 797 *min_out = min; | 774 *min_out = min; |
| 798 *max_out = max; | 775 *max_out = max; |
| 799 return true; | 776 return true; |
| 800 } | 777 } |
| 801 | 778 |
| 802 | |
| 803 uint32_t RegExpParser::ParseOctalLiteral() { | 779 uint32_t RegExpParser::ParseOctalLiteral() { |
| 804 ASSERT(('0' <= current() && current() <= '7') || current() == kEndMarker); | 780 ASSERT(('0' <= current() && current() <= '7') || current() == kEndMarker); |
| 805 // For compatibility with some other browsers (not all), we parse | 781 // For compatibility with some other browsers (not all), we parse |
| 806 // up to three octal digits with a value below 256. | 782 // up to three octal digits with a value below 256. |
| 807 uint32_t value = current() - '0'; | 783 uint32_t value = current() - '0'; |
| 808 Advance(); | 784 Advance(); |
| 809 if ('0' <= current() && current() <= '7') { | 785 if ('0' <= current() && current() <= '7') { |
| 810 value = value * 8 + current() - '0'; | 786 value = value * 8 + current() - '0'; |
| 811 Advance(); | 787 Advance(); |
| 812 if (value < 32 && '0' <= current() && current() <= '7') { | 788 if (value < 32 && '0' <= current() && current() <= '7') { |
| 813 value = value * 8 + current() - '0'; | 789 value = value * 8 + current() - '0'; |
| 814 Advance(); | 790 Advance(); |
| 815 } | 791 } |
| 816 } | 792 } |
| 817 return value; | 793 return value; |
| 818 } | 794 } |
| 819 | 795 |
| 820 | |
| 821 // Returns the value (0 .. 15) of a hexadecimal character c. | 796 // Returns the value (0 .. 15) of a hexadecimal character c. |
| 822 // If c is not a legal hexadecimal character, returns a value < 0. | 797 // If c is not a legal hexadecimal character, returns a value < 0. |
| 823 static inline intptr_t HexValue(uint32_t c) { | 798 static inline intptr_t HexValue(uint32_t c) { |
| 824 c -= '0'; | 799 c -= '0'; |
| 825 if (static_cast<unsigned>(c) <= 9) return c; | 800 if (static_cast<unsigned>(c) <= 9) return c; |
| 826 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. | 801 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. |
| 827 if (static_cast<unsigned>(c) <= 5) return c + 10; | 802 if (static_cast<unsigned>(c) <= 5) return c + 10; |
| 828 return -1; | 803 return -1; |
| 829 } | 804 } |
| 830 | 805 |
| 831 | |
| 832 bool RegExpParser::ParseHexEscape(intptr_t length, uint32_t* value) { | 806 bool RegExpParser::ParseHexEscape(intptr_t length, uint32_t* value) { |
| 833 intptr_t start = position(); | 807 intptr_t start = position(); |
| 834 uint32_t val = 0; | 808 uint32_t val = 0; |
| 835 bool done = false; | 809 bool done = false; |
| 836 for (intptr_t i = 0; !done; i++) { | 810 for (intptr_t i = 0; !done; i++) { |
| 837 uint32_t c = current(); | 811 uint32_t c = current(); |
| 838 intptr_t d = HexValue(c); | 812 intptr_t d = HexValue(c); |
| 839 if (d < 0) { | 813 if (d < 0) { |
| 840 Reset(start); | 814 Reset(start); |
| 841 return false; | 815 return false; |
| 842 } | 816 } |
| 843 val = val * 16 + d; | 817 val = val * 16 + d; |
| 844 Advance(); | 818 Advance(); |
| 845 if (i == length - 1) { | 819 if (i == length - 1) { |
| 846 done = true; | 820 done = true; |
| 847 } | 821 } |
| 848 } | 822 } |
| 849 *value = val; | 823 *value = val; |
| 850 return true; | 824 return true; |
| 851 } | 825 } |
| 852 | 826 |
| 853 | |
| 854 uint32_t RegExpParser::ParseClassCharacterEscape() { | 827 uint32_t RegExpParser::ParseClassCharacterEscape() { |
| 855 ASSERT(current() == '\\'); | 828 ASSERT(current() == '\\'); |
| 856 DEBUG_ASSERT(has_next() && !IsSpecialClassEscape(Next())); | 829 DEBUG_ASSERT(has_next() && !IsSpecialClassEscape(Next())); |
| 857 Advance(); | 830 Advance(); |
| 858 switch (current()) { | 831 switch (current()) { |
| 859 case 'b': | 832 case 'b': |
| 860 Advance(); | 833 Advance(); |
| 861 return '\b'; | 834 return '\b'; |
| 862 // ControlEscape :: one of | 835 // ControlEscape :: one of |
| 863 // f n r t v | 836 // f n r t v |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 929 // been matched by a more specific case, not just the subset required | 902 // been matched by a more specific case, not just the subset required |
| 930 // by the ECMAScript specification. | 903 // by the ECMAScript specification. |
| 931 uint32_t result = current(); | 904 uint32_t result = current(); |
| 932 Advance(); | 905 Advance(); |
| 933 return result; | 906 return result; |
| 934 } | 907 } |
| 935 } | 908 } |
| 936 return 0; | 909 return 0; |
| 937 } | 910 } |
| 938 | 911 |
| 939 | |
| 940 CharacterRange RegExpParser::ParseClassAtom(uint16_t* char_class) { | 912 CharacterRange RegExpParser::ParseClassAtom(uint16_t* char_class) { |
| 941 ASSERT(0 == *char_class); | 913 ASSERT(0 == *char_class); |
| 942 uint32_t first = current(); | 914 uint32_t first = current(); |
| 943 if (first == '\\') { | 915 if (first == '\\') { |
| 944 switch (Next()) { | 916 switch (Next()) { |
| 945 case 'w': | 917 case 'w': |
| 946 case 'W': | 918 case 'W': |
| 947 case 'd': | 919 case 'd': |
| 948 case 'D': | 920 case 'D': |
| 949 case 's': | 921 case 's': |
| 950 case 'S': { | 922 case 'S': { |
| 951 *char_class = Next(); | 923 *char_class = Next(); |
| 952 Advance(2); | 924 Advance(2); |
| 953 return CharacterRange::Singleton(0); // Return dummy value. | 925 return CharacterRange::Singleton(0); // Return dummy value. |
| 954 } | 926 } |
| 955 case kEndMarker: | 927 case kEndMarker: |
| 956 ReportError("\\ at end of pattern"); | 928 ReportError("\\ at end of pattern"); |
| 957 UNREACHABLE(); | 929 UNREACHABLE(); |
| 958 default: | 930 default: |
| 959 uint32_t c = ParseClassCharacterEscape(); | 931 uint32_t c = ParseClassCharacterEscape(); |
| 960 return CharacterRange::Singleton(c); | 932 return CharacterRange::Singleton(c); |
| 961 } | 933 } |
| 962 } else { | 934 } else { |
| 963 Advance(); | 935 Advance(); |
| 964 return CharacterRange::Singleton(first); | 936 return CharacterRange::Singleton(first); |
| 965 } | 937 } |
| 966 } | 938 } |
| 967 | 939 |
| 968 | |
| 969 static const uint16_t kNoCharClass = 0; | 940 static const uint16_t kNoCharClass = 0; |
| 970 | 941 |
| 971 // Adds range or pre-defined character class to character ranges. | 942 // Adds range or pre-defined character class to character ranges. |
| 972 // If char_class is not kInvalidClass, it's interpreted as a class | 943 // If char_class is not kInvalidClass, it's interpreted as a class |
| 973 // escape (i.e., 's' means whitespace, from '\s'). | 944 // escape (i.e., 's' means whitespace, from '\s'). |
| 974 static inline void AddRangeOrEscape(ZoneGrowableArray<CharacterRange>* ranges, | 945 static inline void AddRangeOrEscape(ZoneGrowableArray<CharacterRange>* ranges, |
| 975 uint16_t char_class, | 946 uint16_t char_class, |
| 976 CharacterRange range) { | 947 CharacterRange range) { |
| 977 if (char_class != kNoCharClass) { | 948 if (char_class != kNoCharClass) { |
| 978 CharacterRange::AddClassEscape(char_class, ranges); | 949 CharacterRange::AddClassEscape(char_class, ranges); |
| 979 } else { | 950 } else { |
| 980 ranges->Add(range); | 951 ranges->Add(range); |
| 981 } | 952 } |
| 982 } | 953 } |
| 983 | 954 |
| 984 | |
| 985 RegExpTree* RegExpParser::ParseCharacterClass() { | 955 RegExpTree* RegExpParser::ParseCharacterClass() { |
| 986 static const char* kUnterminated = "Unterminated character class"; | 956 static const char* kUnterminated = "Unterminated character class"; |
| 987 static const char* kRangeOutOfOrder = "Range out of order in character class"; | 957 static const char* kRangeOutOfOrder = "Range out of order in character class"; |
| 988 | 958 |
| 989 ASSERT(current() == '['); | 959 ASSERT(current() == '['); |
| 990 Advance(); | 960 Advance(); |
| 991 bool is_negated = false; | 961 bool is_negated = false; |
| 992 if (current() == '^') { | 962 if (current() == '^') { |
| 993 is_negated = true; | 963 is_negated = true; |
| 994 Advance(); | 964 Advance(); |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1032 UNREACHABLE(); | 1002 UNREACHABLE(); |
| 1033 } | 1003 } |
| 1034 Advance(); | 1004 Advance(); |
| 1035 if (ranges->length() == 0) { | 1005 if (ranges->length() == 0) { |
| 1036 ranges->Add(CharacterRange::Everything()); | 1006 ranges->Add(CharacterRange::Everything()); |
| 1037 is_negated = !is_negated; | 1007 is_negated = !is_negated; |
| 1038 } | 1008 } |
| 1039 return new (Z) RegExpCharacterClass(ranges, is_negated); | 1009 return new (Z) RegExpCharacterClass(ranges, is_negated); |
| 1040 } | 1010 } |
| 1041 | 1011 |
| 1042 | |
| 1043 // ---------------------------------------------------------------------------- | 1012 // ---------------------------------------------------------------------------- |
| 1044 // The Parser interface. | 1013 // The Parser interface. |
| 1045 | 1014 |
| 1046 bool RegExpParser::ParseRegExp(const String& input, | 1015 bool RegExpParser::ParseRegExp(const String& input, |
| 1047 bool multiline, | 1016 bool multiline, |
| 1048 RegExpCompileData* result) { | 1017 RegExpCompileData* result) { |
| 1049 ASSERT(result != NULL); | 1018 ASSERT(result != NULL); |
| 1050 LongJumpScope jump; | 1019 LongJumpScope jump; |
| 1051 RegExpParser parser(input, &result->error, multiline); | 1020 RegExpParser parser(input, &result->error, multiline); |
| 1052 if (setjmp(*jump.Set()) == 0) { | 1021 if (setjmp(*jump.Set()) == 0) { |
| (...skipping 14 matching lines...) Expand all Loading... |
| 1067 String::Handle(String::Concat(result->error, input)); | 1036 String::Handle(String::Concat(result->error, input)); |
| 1068 const Array& args = Array::Handle(Array::New(1)); | 1037 const Array& args = Array::Handle(Array::New(1)); |
| 1069 args.SetAt(0, message); | 1038 args.SetAt(0, message); |
| 1070 | 1039 |
| 1071 Exceptions::ThrowByType(Exceptions::kFormat, args); | 1040 Exceptions::ThrowByType(Exceptions::kFormat, args); |
| 1072 } | 1041 } |
| 1073 return !parser.failed(); | 1042 return !parser.failed(); |
| 1074 } | 1043 } |
| 1075 | 1044 |
| 1076 } // namespace dart | 1045 } // namespace dart |
| OLD | NEW |