OLD | NEW |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
| 5 #include "vm/regexp_parser.h" |
5 #include "vm/longjump.h" | 6 #include "vm/longjump.h" |
6 #include "vm/object_store.h" | 7 #include "vm/object_store.h" |
7 #include "vm/regexp_parser.h" | |
8 | 8 |
9 namespace dart { | 9 namespace dart { |
10 | 10 |
11 #define Z zone() | 11 #define Z zone() |
12 | 12 |
13 // Enables possessive quantifier syntax for testing. | 13 // Enables possessive quantifier syntax for testing. |
14 static const bool FLAG_regexp_possessive_quantifier = false; | 14 static const bool FLAG_regexp_possessive_quantifier = false; |
15 | 15 |
16 RegExpBuilder::RegExpBuilder() | 16 RegExpBuilder::RegExpBuilder() |
17 : zone_(Thread::Current()->zone()), | 17 : zone_(Thread::Current()->zone()), |
18 pending_empty_(false), | 18 pending_empty_(false), |
19 characters_(NULL), | 19 characters_(NULL), |
20 terms_(), | 20 terms_(), |
21 text_(), | 21 text_(), |
22 alternatives_() | 22 alternatives_() |
23 #ifdef DEBUG | 23 #ifdef DEBUG |
24 , | 24 , |
25 last_added_(ADD_NONE) | 25 last_added_(ADD_NONE) |
26 #endif | 26 #endif |
27 { | 27 { |
28 } | 28 } |
29 | 29 |
30 | |
31 void RegExpBuilder::FlushCharacters() { | 30 void RegExpBuilder::FlushCharacters() { |
32 pending_empty_ = false; | 31 pending_empty_ = false; |
33 if (characters_ != NULL) { | 32 if (characters_ != NULL) { |
34 RegExpTree* atom = new (Z) RegExpAtom(characters_); | 33 RegExpTree* atom = new (Z) RegExpAtom(characters_); |
35 characters_ = NULL; | 34 characters_ = NULL; |
36 text_.Add(atom); | 35 text_.Add(atom); |
37 LAST(ADD_ATOM); | 36 LAST(ADD_ATOM); |
38 } | 37 } |
39 } | 38 } |
40 | 39 |
41 | |
42 void RegExpBuilder::FlushText() { | 40 void RegExpBuilder::FlushText() { |
43 FlushCharacters(); | 41 FlushCharacters(); |
44 intptr_t num_text = text_.length(); | 42 intptr_t num_text = text_.length(); |
45 if (num_text == 0) { | 43 if (num_text == 0) { |
46 return; | 44 return; |
47 } else if (num_text == 1) { | 45 } else if (num_text == 1) { |
48 terms_.Add(text_.Last()); | 46 terms_.Add(text_.Last()); |
49 } else { | 47 } else { |
50 RegExpText* text = new (Z) RegExpText(); | 48 RegExpText* text = new (Z) RegExpText(); |
51 for (intptr_t i = 0; i < num_text; i++) | 49 for (intptr_t i = 0; i < num_text; i++) |
52 text_[i]->AppendToText(text); | 50 text_[i]->AppendToText(text); |
53 terms_.Add(text); | 51 terms_.Add(text); |
54 } | 52 } |
55 text_.Clear(); | 53 text_.Clear(); |
56 } | 54 } |
57 | 55 |
58 | |
59 void RegExpBuilder::AddCharacter(uint16_t c) { | 56 void RegExpBuilder::AddCharacter(uint16_t c) { |
60 pending_empty_ = false; | 57 pending_empty_ = false; |
61 if (characters_ == NULL) { | 58 if (characters_ == NULL) { |
62 characters_ = new (Z) ZoneGrowableArray<uint16_t>(4); | 59 characters_ = new (Z) ZoneGrowableArray<uint16_t>(4); |
63 } | 60 } |
64 characters_->Add(c); | 61 characters_->Add(c); |
65 LAST(ADD_CHAR); | 62 LAST(ADD_CHAR); |
66 } | 63 } |
67 | 64 |
68 | |
69 void RegExpBuilder::AddEmpty() { | 65 void RegExpBuilder::AddEmpty() { |
70 pending_empty_ = true; | 66 pending_empty_ = true; |
71 } | 67 } |
72 | 68 |
73 | |
74 void RegExpBuilder::AddAtom(RegExpTree* term) { | 69 void RegExpBuilder::AddAtom(RegExpTree* term) { |
75 if (term->IsEmpty()) { | 70 if (term->IsEmpty()) { |
76 AddEmpty(); | 71 AddEmpty(); |
77 return; | 72 return; |
78 } | 73 } |
79 if (term->IsTextElement()) { | 74 if (term->IsTextElement()) { |
80 FlushCharacters(); | 75 FlushCharacters(); |
81 text_.Add(term); | 76 text_.Add(term); |
82 } else { | 77 } else { |
83 FlushText(); | 78 FlushText(); |
84 terms_.Add(term); | 79 terms_.Add(term); |
85 } | 80 } |
86 LAST(ADD_ATOM); | 81 LAST(ADD_ATOM); |
87 } | 82 } |
88 | 83 |
89 | |
90 void RegExpBuilder::AddAssertion(RegExpTree* assert) { | 84 void RegExpBuilder::AddAssertion(RegExpTree* assert) { |
91 FlushText(); | 85 FlushText(); |
92 terms_.Add(assert); | 86 terms_.Add(assert); |
93 LAST(ADD_ASSERT); | 87 LAST(ADD_ASSERT); |
94 } | 88 } |
95 | 89 |
96 | |
97 void RegExpBuilder::NewAlternative() { | 90 void RegExpBuilder::NewAlternative() { |
98 FlushTerms(); | 91 FlushTerms(); |
99 } | 92 } |
100 | 93 |
101 | |
102 void RegExpBuilder::FlushTerms() { | 94 void RegExpBuilder::FlushTerms() { |
103 FlushText(); | 95 FlushText(); |
104 intptr_t num_terms = terms_.length(); | 96 intptr_t num_terms = terms_.length(); |
105 RegExpTree* alternative; | 97 RegExpTree* alternative; |
106 if (num_terms == 0) { | 98 if (num_terms == 0) { |
107 alternative = RegExpEmpty::GetInstance(); | 99 alternative = RegExpEmpty::GetInstance(); |
108 } else if (num_terms == 1) { | 100 } else if (num_terms == 1) { |
109 alternative = terms_.Last(); | 101 alternative = terms_.Last(); |
110 } else { | 102 } else { |
111 ZoneGrowableArray<RegExpTree*>* terms = | 103 ZoneGrowableArray<RegExpTree*>* terms = |
112 new (Z) ZoneGrowableArray<RegExpTree*>(); | 104 new (Z) ZoneGrowableArray<RegExpTree*>(); |
113 for (intptr_t i = 0; i < terms_.length(); i++) { | 105 for (intptr_t i = 0; i < terms_.length(); i++) { |
114 terms->Add(terms_[i]); | 106 terms->Add(terms_[i]); |
115 } | 107 } |
116 alternative = new (Z) RegExpAlternative(terms); | 108 alternative = new (Z) RegExpAlternative(terms); |
117 } | 109 } |
118 alternatives_.Add(alternative); | 110 alternatives_.Add(alternative); |
119 terms_.Clear(); | 111 terms_.Clear(); |
120 LAST(ADD_NONE); | 112 LAST(ADD_NONE); |
121 } | 113 } |
122 | 114 |
123 | |
124 RegExpTree* RegExpBuilder::ToRegExp() { | 115 RegExpTree* RegExpBuilder::ToRegExp() { |
125 FlushTerms(); | 116 FlushTerms(); |
126 intptr_t num_alternatives = alternatives_.length(); | 117 intptr_t num_alternatives = alternatives_.length(); |
127 if (num_alternatives == 0) { | 118 if (num_alternatives == 0) { |
128 return RegExpEmpty::GetInstance(); | 119 return RegExpEmpty::GetInstance(); |
129 } | 120 } |
130 if (num_alternatives == 1) { | 121 if (num_alternatives == 1) { |
131 return alternatives_.Last(); | 122 return alternatives_.Last(); |
132 } | 123 } |
133 ZoneGrowableArray<RegExpTree*>* alternatives = | 124 ZoneGrowableArray<RegExpTree*>* alternatives = |
134 new (Z) ZoneGrowableArray<RegExpTree*>(); | 125 new (Z) ZoneGrowableArray<RegExpTree*>(); |
135 for (intptr_t i = 0; i < alternatives_.length(); i++) { | 126 for (intptr_t i = 0; i < alternatives_.length(); i++) { |
136 alternatives->Add(alternatives_[i]); | 127 alternatives->Add(alternatives_[i]); |
137 } | 128 } |
138 return new (Z) RegExpDisjunction(alternatives); | 129 return new (Z) RegExpDisjunction(alternatives); |
139 } | 130 } |
140 | 131 |
141 | |
142 void RegExpBuilder::AddQuantifierToAtom( | 132 void RegExpBuilder::AddQuantifierToAtom( |
143 intptr_t min, | 133 intptr_t min, |
144 intptr_t max, | 134 intptr_t max, |
145 RegExpQuantifier::QuantifierType quantifier_type) { | 135 RegExpQuantifier::QuantifierType quantifier_type) { |
146 if (pending_empty_) { | 136 if (pending_empty_) { |
147 pending_empty_ = false; | 137 pending_empty_ = false; |
148 return; | 138 return; |
149 } | 139 } |
150 RegExpTree* atom; | 140 RegExpTree* atom; |
151 if (characters_ != NULL) { | 141 if (characters_ != NULL) { |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
208 capture_count_(0), | 198 capture_count_(0), |
209 has_more_(true), | 199 has_more_(true), |
210 multiline_(multiline), | 200 multiline_(multiline), |
211 simple_(false), | 201 simple_(false), |
212 contains_anchor_(false), | 202 contains_anchor_(false), |
213 is_scanned_for_captures_(false), | 203 is_scanned_for_captures_(false), |
214 failed_(false) { | 204 failed_(false) { |
215 Advance(); | 205 Advance(); |
216 } | 206 } |
217 | 207 |
218 | |
219 uint32_t RegExpParser::Next() { | 208 uint32_t RegExpParser::Next() { |
220 if (has_next()) { | 209 if (has_next()) { |
221 return in().CharAt(next_pos_); | 210 return in().CharAt(next_pos_); |
222 } else { | 211 } else { |
223 return kEndMarker; | 212 return kEndMarker; |
224 } | 213 } |
225 } | 214 } |
226 | 215 |
227 | |
228 void RegExpParser::Advance() { | 216 void RegExpParser::Advance() { |
229 if (next_pos_ < in().Length()) { | 217 if (next_pos_ < in().Length()) { |
230 current_ = in().CharAt(next_pos_); | 218 current_ = in().CharAt(next_pos_); |
231 next_pos_++; | 219 next_pos_++; |
232 } else { | 220 } else { |
233 current_ = kEndMarker; | 221 current_ = kEndMarker; |
234 has_more_ = false; | 222 has_more_ = false; |
235 } | 223 } |
236 } | 224 } |
237 | 225 |
238 | |
239 void RegExpParser::Reset(intptr_t pos) { | 226 void RegExpParser::Reset(intptr_t pos) { |
240 next_pos_ = pos; | 227 next_pos_ = pos; |
241 has_more_ = (pos < in().Length()); | 228 has_more_ = (pos < in().Length()); |
242 Advance(); | 229 Advance(); |
243 } | 230 } |
244 | 231 |
245 | |
246 void RegExpParser::Advance(intptr_t dist) { | 232 void RegExpParser::Advance(intptr_t dist) { |
247 next_pos_ += dist - 1; | 233 next_pos_ += dist - 1; |
248 Advance(); | 234 Advance(); |
249 } | 235 } |
250 | 236 |
251 | |
252 bool RegExpParser::simple() { | 237 bool RegExpParser::simple() { |
253 return simple_; | 238 return simple_; |
254 } | 239 } |
255 | 240 |
256 | |
257 void RegExpParser::ReportError(const char* message) { | 241 void RegExpParser::ReportError(const char* message) { |
258 failed_ = true; | 242 failed_ = true; |
259 *error_ = String::New(message); | 243 *error_ = String::New(message); |
260 // Zip to the end to make sure the no more input is read. | 244 // Zip to the end to make sure the no more input is read. |
261 current_ = kEndMarker; | 245 current_ = kEndMarker; |
262 next_pos_ = in().Length(); | 246 next_pos_ = in().Length(); |
263 | 247 |
264 const Error& error = Error::Handle(LanguageError::New(*error_)); | 248 const Error& error = Error::Handle(LanguageError::New(*error_)); |
265 Report::LongJump(error); | 249 Report::LongJump(error); |
266 UNREACHABLE(); | 250 UNREACHABLE(); |
267 } | 251 } |
268 | 252 |
269 | |
270 // Pattern :: | 253 // Pattern :: |
271 // Disjunction | 254 // Disjunction |
272 RegExpTree* RegExpParser::ParsePattern() { | 255 RegExpTree* RegExpParser::ParsePattern() { |
273 RegExpTree* result = ParseDisjunction(); | 256 RegExpTree* result = ParseDisjunction(); |
274 ASSERT(!has_more()); | 257 ASSERT(!has_more()); |
275 // If the result of parsing is a literal string atom, and it has the | 258 // If the result of parsing is a literal string atom, and it has the |
276 // same length as the input, then the atom is identical to the input. | 259 // same length as the input, then the atom is identical to the input. |
277 if (result->IsAtom() && result->AsAtom()->length() == in().Length()) { | 260 if (result->IsAtom() && result->AsAtom()->length() == in().Length()) { |
278 simple_ = true; | 261 simple_ = true; |
279 } | 262 } |
280 return result; | 263 return result; |
281 } | 264 } |
282 | 265 |
283 | |
284 // Disjunction :: | 266 // Disjunction :: |
285 // Alternative | 267 // Alternative |
286 // Alternative | Disjunction | 268 // Alternative | Disjunction |
287 // Alternative :: | 269 // Alternative :: |
288 // [empty] | 270 // [empty] |
289 // Term Alternative | 271 // Term Alternative |
290 // Term :: | 272 // Term :: |
291 // Assertion | 273 // Assertion |
292 // Atom | 274 // Atom |
293 // Atom Quantifier | 275 // Atom Quantifier |
(...skipping 331 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
625 Advance(); | 607 Advance(); |
626 } else if (FLAG_regexp_possessive_quantifier && current() == '+') { | 608 } else if (FLAG_regexp_possessive_quantifier && current() == '+') { |
627 // FLAG_regexp_possessive_quantifier is a debug-only flag. | 609 // FLAG_regexp_possessive_quantifier is a debug-only flag. |
628 quantifier_type = RegExpQuantifier::POSSESSIVE; | 610 quantifier_type = RegExpQuantifier::POSSESSIVE; |
629 Advance(); | 611 Advance(); |
630 } | 612 } |
631 builder->AddQuantifierToAtom(min, max, quantifier_type); | 613 builder->AddQuantifierToAtom(min, max, quantifier_type); |
632 } | 614 } |
633 } | 615 } |
634 | 616 |
635 | |
636 #ifdef DEBUG | 617 #ifdef DEBUG |
637 // Currently only used in an ASSERT. | 618 // Currently only used in an ASSERT. |
638 static bool IsSpecialClassEscape(uint32_t c) { | 619 static bool IsSpecialClassEscape(uint32_t c) { |
639 switch (c) { | 620 switch (c) { |
640 case 'd': | 621 case 'd': |
641 case 'D': | 622 case 'D': |
642 case 's': | 623 case 's': |
643 case 'S': | 624 case 'S': |
644 case 'w': | 625 case 'w': |
645 case 'W': | 626 case 'W': |
646 return true; | 627 return true; |
647 default: | 628 default: |
648 return false; | 629 return false; |
649 } | 630 } |
650 } | 631 } |
651 #endif | 632 #endif |
652 | 633 |
653 | |
654 // In order to know whether an escape is a backreference or not we have to scan | 634 // In order to know whether an escape is a backreference or not we have to scan |
655 // the entire regexp and find the number of capturing parentheses. However we | 635 // the entire regexp and find the number of capturing parentheses. However we |
656 // don't want to scan the regexp twice unless it is necessary. This mini-parser | 636 // don't want to scan the regexp twice unless it is necessary. This mini-parser |
657 // is called when needed. It can see the difference between capturing and | 637 // is called when needed. It can see the difference between capturing and |
658 // noncapturing parentheses and can skip character classes and backslash-escaped | 638 // noncapturing parentheses and can skip character classes and backslash-escaped |
659 // characters. | 639 // characters. |
660 void RegExpParser::ScanForCaptures() { | 640 void RegExpParser::ScanForCaptures() { |
661 // Start with captures started previous to current position | 641 // Start with captures started previous to current position |
662 intptr_t capture_count = captures_started(); | 642 intptr_t capture_count = captures_started(); |
663 // Add count of captures after this position. | 643 // Add count of captures after this position. |
(...skipping 18 matching lines...) Expand all Loading... |
682 } | 662 } |
683 case '(': | 663 case '(': |
684 if (current() != '?') capture_count++; | 664 if (current() != '?') capture_count++; |
685 break; | 665 break; |
686 } | 666 } |
687 } | 667 } |
688 capture_count_ = capture_count; | 668 capture_count_ = capture_count; |
689 is_scanned_for_captures_ = true; | 669 is_scanned_for_captures_ = true; |
690 } | 670 } |
691 | 671 |
692 | |
693 static inline bool IsDecimalDigit(int32_t c) { | 672 static inline bool IsDecimalDigit(int32_t c) { |
694 return '0' <= c && c <= '9'; | 673 return '0' <= c && c <= '9'; |
695 } | 674 } |
696 | 675 |
697 | |
698 bool RegExpParser::ParseBackReferenceIndex(intptr_t* index_out) { | 676 bool RegExpParser::ParseBackReferenceIndex(intptr_t* index_out) { |
699 ASSERT('\\' == current()); | 677 ASSERT('\\' == current()); |
700 ASSERT('1' <= Next() && Next() <= '9'); | 678 ASSERT('1' <= Next() && Next() <= '9'); |
701 // Try to parse a decimal literal that is no greater than the total number | 679 // Try to parse a decimal literal that is no greater than the total number |
702 // of left capturing parentheses in the input. | 680 // of left capturing parentheses in the input. |
703 intptr_t start = position(); | 681 intptr_t start = position(); |
704 intptr_t value = Next() - '0'; | 682 intptr_t value = Next() - '0'; |
705 Advance(2); | 683 Advance(2); |
706 while (true) { | 684 while (true) { |
707 uint32_t c = current(); | 685 uint32_t c = current(); |
(...skipping 16 matching lines...) Expand all Loading... |
724 } | 702 } |
725 if (value > capture_count_) { | 703 if (value > capture_count_) { |
726 Reset(start); | 704 Reset(start); |
727 return false; | 705 return false; |
728 } | 706 } |
729 } | 707 } |
730 *index_out = value; | 708 *index_out = value; |
731 return true; | 709 return true; |
732 } | 710 } |
733 | 711 |
734 | |
735 // QuantifierPrefix :: | 712 // QuantifierPrefix :: |
736 // { DecimalDigits } | 713 // { DecimalDigits } |
737 // { DecimalDigits , } | 714 // { DecimalDigits , } |
738 // { DecimalDigits , DecimalDigits } | 715 // { DecimalDigits , DecimalDigits } |
739 // | 716 // |
740 // Returns true if parsing succeeds, and set the min_out and max_out | 717 // Returns true if parsing succeeds, and set the min_out and max_out |
741 // values. Values are truncated to RegExpTree::kInfinity if they overflow. | 718 // values. Values are truncated to RegExpTree::kInfinity if they overflow. |
742 bool RegExpParser::ParseIntervalQuantifier(intptr_t* min_out, | 719 bool RegExpParser::ParseIntervalQuantifier(intptr_t* min_out, |
743 intptr_t* max_out) { | 720 intptr_t* max_out) { |
744 ASSERT(current() == '{'); | 721 ASSERT(current() == '{'); |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
792 } | 769 } |
793 } else { | 770 } else { |
794 Reset(start); | 771 Reset(start); |
795 return false; | 772 return false; |
796 } | 773 } |
797 *min_out = min; | 774 *min_out = min; |
798 *max_out = max; | 775 *max_out = max; |
799 return true; | 776 return true; |
800 } | 777 } |
801 | 778 |
802 | |
803 uint32_t RegExpParser::ParseOctalLiteral() { | 779 uint32_t RegExpParser::ParseOctalLiteral() { |
804 ASSERT(('0' <= current() && current() <= '7') || current() == kEndMarker); | 780 ASSERT(('0' <= current() && current() <= '7') || current() == kEndMarker); |
805 // For compatibility with some other browsers (not all), we parse | 781 // For compatibility with some other browsers (not all), we parse |
806 // up to three octal digits with a value below 256. | 782 // up to three octal digits with a value below 256. |
807 uint32_t value = current() - '0'; | 783 uint32_t value = current() - '0'; |
808 Advance(); | 784 Advance(); |
809 if ('0' <= current() && current() <= '7') { | 785 if ('0' <= current() && current() <= '7') { |
810 value = value * 8 + current() - '0'; | 786 value = value * 8 + current() - '0'; |
811 Advance(); | 787 Advance(); |
812 if (value < 32 && '0' <= current() && current() <= '7') { | 788 if (value < 32 && '0' <= current() && current() <= '7') { |
813 value = value * 8 + current() - '0'; | 789 value = value * 8 + current() - '0'; |
814 Advance(); | 790 Advance(); |
815 } | 791 } |
816 } | 792 } |
817 return value; | 793 return value; |
818 } | 794 } |
819 | 795 |
820 | |
821 // Returns the value (0 .. 15) of a hexadecimal character c. | 796 // Returns the value (0 .. 15) of a hexadecimal character c. |
822 // If c is not a legal hexadecimal character, returns a value < 0. | 797 // If c is not a legal hexadecimal character, returns a value < 0. |
823 static inline intptr_t HexValue(uint32_t c) { | 798 static inline intptr_t HexValue(uint32_t c) { |
824 c -= '0'; | 799 c -= '0'; |
825 if (static_cast<unsigned>(c) <= 9) return c; | 800 if (static_cast<unsigned>(c) <= 9) return c; |
826 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. | 801 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. |
827 if (static_cast<unsigned>(c) <= 5) return c + 10; | 802 if (static_cast<unsigned>(c) <= 5) return c + 10; |
828 return -1; | 803 return -1; |
829 } | 804 } |
830 | 805 |
831 | |
832 bool RegExpParser::ParseHexEscape(intptr_t length, uint32_t* value) { | 806 bool RegExpParser::ParseHexEscape(intptr_t length, uint32_t* value) { |
833 intptr_t start = position(); | 807 intptr_t start = position(); |
834 uint32_t val = 0; | 808 uint32_t val = 0; |
835 bool done = false; | 809 bool done = false; |
836 for (intptr_t i = 0; !done; i++) { | 810 for (intptr_t i = 0; !done; i++) { |
837 uint32_t c = current(); | 811 uint32_t c = current(); |
838 intptr_t d = HexValue(c); | 812 intptr_t d = HexValue(c); |
839 if (d < 0) { | 813 if (d < 0) { |
840 Reset(start); | 814 Reset(start); |
841 return false; | 815 return false; |
842 } | 816 } |
843 val = val * 16 + d; | 817 val = val * 16 + d; |
844 Advance(); | 818 Advance(); |
845 if (i == length - 1) { | 819 if (i == length - 1) { |
846 done = true; | 820 done = true; |
847 } | 821 } |
848 } | 822 } |
849 *value = val; | 823 *value = val; |
850 return true; | 824 return true; |
851 } | 825 } |
852 | 826 |
853 | |
854 uint32_t RegExpParser::ParseClassCharacterEscape() { | 827 uint32_t RegExpParser::ParseClassCharacterEscape() { |
855 ASSERT(current() == '\\'); | 828 ASSERT(current() == '\\'); |
856 DEBUG_ASSERT(has_next() && !IsSpecialClassEscape(Next())); | 829 DEBUG_ASSERT(has_next() && !IsSpecialClassEscape(Next())); |
857 Advance(); | 830 Advance(); |
858 switch (current()) { | 831 switch (current()) { |
859 case 'b': | 832 case 'b': |
860 Advance(); | 833 Advance(); |
861 return '\b'; | 834 return '\b'; |
862 // ControlEscape :: one of | 835 // ControlEscape :: one of |
863 // f n r t v | 836 // f n r t v |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
929 // been matched by a more specific case, not just the subset required | 902 // been matched by a more specific case, not just the subset required |
930 // by the ECMAScript specification. | 903 // by the ECMAScript specification. |
931 uint32_t result = current(); | 904 uint32_t result = current(); |
932 Advance(); | 905 Advance(); |
933 return result; | 906 return result; |
934 } | 907 } |
935 } | 908 } |
936 return 0; | 909 return 0; |
937 } | 910 } |
938 | 911 |
939 | |
940 CharacterRange RegExpParser::ParseClassAtom(uint16_t* char_class) { | 912 CharacterRange RegExpParser::ParseClassAtom(uint16_t* char_class) { |
941 ASSERT(0 == *char_class); | 913 ASSERT(0 == *char_class); |
942 uint32_t first = current(); | 914 uint32_t first = current(); |
943 if (first == '\\') { | 915 if (first == '\\') { |
944 switch (Next()) { | 916 switch (Next()) { |
945 case 'w': | 917 case 'w': |
946 case 'W': | 918 case 'W': |
947 case 'd': | 919 case 'd': |
948 case 'D': | 920 case 'D': |
949 case 's': | 921 case 's': |
950 case 'S': { | 922 case 'S': { |
951 *char_class = Next(); | 923 *char_class = Next(); |
952 Advance(2); | 924 Advance(2); |
953 return CharacterRange::Singleton(0); // Return dummy value. | 925 return CharacterRange::Singleton(0); // Return dummy value. |
954 } | 926 } |
955 case kEndMarker: | 927 case kEndMarker: |
956 ReportError("\\ at end of pattern"); | 928 ReportError("\\ at end of pattern"); |
957 UNREACHABLE(); | 929 UNREACHABLE(); |
958 default: | 930 default: |
959 uint32_t c = ParseClassCharacterEscape(); | 931 uint32_t c = ParseClassCharacterEscape(); |
960 return CharacterRange::Singleton(c); | 932 return CharacterRange::Singleton(c); |
961 } | 933 } |
962 } else { | 934 } else { |
963 Advance(); | 935 Advance(); |
964 return CharacterRange::Singleton(first); | 936 return CharacterRange::Singleton(first); |
965 } | 937 } |
966 } | 938 } |
967 | 939 |
968 | |
969 static const uint16_t kNoCharClass = 0; | 940 static const uint16_t kNoCharClass = 0; |
970 | 941 |
971 // Adds range or pre-defined character class to character ranges. | 942 // Adds range or pre-defined character class to character ranges. |
972 // If char_class is not kInvalidClass, it's interpreted as a class | 943 // If char_class is not kInvalidClass, it's interpreted as a class |
973 // escape (i.e., 's' means whitespace, from '\s'). | 944 // escape (i.e., 's' means whitespace, from '\s'). |
974 static inline void AddRangeOrEscape(ZoneGrowableArray<CharacterRange>* ranges, | 945 static inline void AddRangeOrEscape(ZoneGrowableArray<CharacterRange>* ranges, |
975 uint16_t char_class, | 946 uint16_t char_class, |
976 CharacterRange range) { | 947 CharacterRange range) { |
977 if (char_class != kNoCharClass) { | 948 if (char_class != kNoCharClass) { |
978 CharacterRange::AddClassEscape(char_class, ranges); | 949 CharacterRange::AddClassEscape(char_class, ranges); |
979 } else { | 950 } else { |
980 ranges->Add(range); | 951 ranges->Add(range); |
981 } | 952 } |
982 } | 953 } |
983 | 954 |
984 | |
985 RegExpTree* RegExpParser::ParseCharacterClass() { | 955 RegExpTree* RegExpParser::ParseCharacterClass() { |
986 static const char* kUnterminated = "Unterminated character class"; | 956 static const char* kUnterminated = "Unterminated character class"; |
987 static const char* kRangeOutOfOrder = "Range out of order in character class"; | 957 static const char* kRangeOutOfOrder = "Range out of order in character class"; |
988 | 958 |
989 ASSERT(current() == '['); | 959 ASSERT(current() == '['); |
990 Advance(); | 960 Advance(); |
991 bool is_negated = false; | 961 bool is_negated = false; |
992 if (current() == '^') { | 962 if (current() == '^') { |
993 is_negated = true; | 963 is_negated = true; |
994 Advance(); | 964 Advance(); |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1032 UNREACHABLE(); | 1002 UNREACHABLE(); |
1033 } | 1003 } |
1034 Advance(); | 1004 Advance(); |
1035 if (ranges->length() == 0) { | 1005 if (ranges->length() == 0) { |
1036 ranges->Add(CharacterRange::Everything()); | 1006 ranges->Add(CharacterRange::Everything()); |
1037 is_negated = !is_negated; | 1007 is_negated = !is_negated; |
1038 } | 1008 } |
1039 return new (Z) RegExpCharacterClass(ranges, is_negated); | 1009 return new (Z) RegExpCharacterClass(ranges, is_negated); |
1040 } | 1010 } |
1041 | 1011 |
1042 | |
1043 // ---------------------------------------------------------------------------- | 1012 // ---------------------------------------------------------------------------- |
1044 // The Parser interface. | 1013 // The Parser interface. |
1045 | 1014 |
1046 bool RegExpParser::ParseRegExp(const String& input, | 1015 bool RegExpParser::ParseRegExp(const String& input, |
1047 bool multiline, | 1016 bool multiline, |
1048 RegExpCompileData* result) { | 1017 RegExpCompileData* result) { |
1049 ASSERT(result != NULL); | 1018 ASSERT(result != NULL); |
1050 LongJumpScope jump; | 1019 LongJumpScope jump; |
1051 RegExpParser parser(input, &result->error, multiline); | 1020 RegExpParser parser(input, &result->error, multiline); |
1052 if (setjmp(*jump.Set()) == 0) { | 1021 if (setjmp(*jump.Set()) == 0) { |
(...skipping 14 matching lines...) Expand all Loading... |
1067 String::Handle(String::Concat(result->error, input)); | 1036 String::Handle(String::Concat(result->error, input)); |
1068 const Array& args = Array::Handle(Array::New(1)); | 1037 const Array& args = Array::Handle(Array::New(1)); |
1069 args.SetAt(0, message); | 1038 args.SetAt(0, message); |
1070 | 1039 |
1071 Exceptions::ThrowByType(Exceptions::kFormat, args); | 1040 Exceptions::ThrowByType(Exceptions::kFormat, args); |
1072 } | 1041 } |
1073 return !parser.failed(); | 1042 return !parser.failed(); |
1074 } | 1043 } |
1075 | 1044 |
1076 } // namespace dart | 1045 } // namespace dart |
OLD | NEW |