OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/parser.h" | 5 #include "src/parser.h" |
6 | 6 |
7 #include "src/api.h" | 7 #include "src/api.h" |
8 #include "src/ast.h" | 8 #include "src/ast.h" |
9 #include "src/ast-literal-reindexer.h" | 9 #include "src/ast-literal-reindexer.h" |
10 #include "src/bailout-reason.h" | 10 #include "src/bailout-reason.h" |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
87 set_stack_limit(isolate_->stack_guard()->real_climit()); | 87 set_stack_limit(isolate_->stack_guard()->real_climit()); |
88 set_unicode_cache(isolate_->unicode_cache()); | 88 set_unicode_cache(isolate_->unicode_cache()); |
89 set_script(script); | 89 set_script(script); |
90 | 90 |
91 if (script->type() == Script::TYPE_NATIVE) { | 91 if (script->type() == Script::TYPE_NATIVE) { |
92 set_native(); | 92 set_native(); |
93 } | 93 } |
94 } | 94 } |
95 | 95 |
96 | 96 |
97 RegExpBuilder::RegExpBuilder(Zone* zone) | 97 RegExpBuilder::RegExpBuilder(Zone* zone, |
| 98 RegExpTree::ReadDirection read_direction) |
98 : zone_(zone), | 99 : zone_(zone), |
99 pending_empty_(false), | 100 pending_empty_(false), |
100 characters_(NULL), | 101 characters_(NULL), |
101 terms_(), | 102 terms_(), |
102 alternatives_() | 103 alternatives_(), |
| 104 read_direction_(read_direction) |
103 #ifdef DEBUG | 105 #ifdef DEBUG |
104 , last_added_(ADD_NONE) | 106 , last_added_(ADD_NONE) |
105 #endif | 107 #endif |
106 {} | 108 {} |
107 | 109 |
108 | 110 |
109 void RegExpBuilder::FlushCharacters() { | 111 void RegExpBuilder::FlushCharacters() { |
110 pending_empty_ = false; | 112 pending_empty_ = false; |
111 if (characters_ != NULL) { | 113 if (characters_ != NULL) { |
112 RegExpTree* atom = new(zone()) RegExpAtom(characters_->ToConstVector()); | 114 RegExpTree* atom = |
| 115 new (zone()) RegExpAtom(characters_->ToConstVector(), read_direction_); |
113 characters_ = NULL; | 116 characters_ = NULL; |
114 text_.Add(atom, zone()); | 117 text_.Add(atom, zone()); |
115 LAST(ADD_ATOM); | 118 LAST(ADD_ATOM); |
116 } | 119 } |
117 } | 120 } |
118 | 121 |
119 | 122 |
120 void RegExpBuilder::FlushText() { | 123 void RegExpBuilder::FlushText() { |
121 FlushCharacters(); | 124 FlushCharacters(); |
122 int num_text = text_.length(); | 125 int num_text = text_.length(); |
123 if (num_text == 0) { | 126 if (num_text == 0) { |
124 return; | 127 return; |
125 } else if (num_text == 1) { | 128 } else if (num_text == 1) { |
126 terms_.Add(text_.last(), zone()); | 129 terms_.Add(text_.last(), zone()); |
127 } else { | 130 } else { |
128 RegExpText* text = new(zone()) RegExpText(zone()); | 131 RegExpText* text = new (zone()) RegExpText(zone(), read_direction_); |
129 for (int i = 0; i < num_text; i++) | 132 for (int i = 0; i < num_text; i++) |
130 text_.Get(i)->AppendToText(text, zone()); | 133 text_.Get(i)->AppendToText(text, zone()); |
131 terms_.Add(text, zone()); | 134 terms_.Add(text, zone()); |
132 } | 135 } |
133 text_.Clear(); | 136 text_.Clear(); |
134 } | 137 } |
135 | 138 |
136 | 139 |
137 void RegExpBuilder::AddCharacter(uc16 c) { | 140 void RegExpBuilder::AddCharacter(uc16 c) { |
138 pending_empty_ = false; | 141 pending_empty_ = false; |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
179 | 182 |
180 void RegExpBuilder::FlushTerms() { | 183 void RegExpBuilder::FlushTerms() { |
181 FlushText(); | 184 FlushText(); |
182 int num_terms = terms_.length(); | 185 int num_terms = terms_.length(); |
183 RegExpTree* alternative; | 186 RegExpTree* alternative; |
184 if (num_terms == 0) { | 187 if (num_terms == 0) { |
185 alternative = new (zone()) RegExpEmpty(); | 188 alternative = new (zone()) RegExpEmpty(); |
186 } else if (num_terms == 1) { | 189 } else if (num_terms == 1) { |
187 alternative = terms_.last(); | 190 alternative = terms_.last(); |
188 } else { | 191 } else { |
189 alternative = new(zone()) RegExpAlternative(terms_.GetList(zone())); | 192 alternative = |
| 193 new (zone()) RegExpAlternative(terms_.GetList(zone()), read_direction_); |
190 } | 194 } |
191 alternatives_.Add(alternative, zone()); | 195 alternatives_.Add(alternative, zone()); |
192 terms_.Clear(); | 196 terms_.Clear(); |
193 LAST(ADD_NONE); | 197 LAST(ADD_NONE); |
194 } | 198 } |
195 | 199 |
196 | 200 |
197 RegExpTree* RegExpBuilder::ToRegExp() { | 201 RegExpTree* RegExpBuilder::ToRegExp() { |
198 FlushTerms(); | 202 FlushTerms(); |
199 int num_alternatives = alternatives_.length(); | 203 int num_alternatives = alternatives_.length(); |
200 if (num_alternatives == 0) return new (zone()) RegExpEmpty(); | 204 if (num_alternatives == 0) return new (zone()) RegExpEmpty(); |
201 if (num_alternatives == 1) return alternatives_.last(); | 205 if (num_alternatives == 1) return alternatives_.last(); |
202 return new(zone()) RegExpDisjunction(alternatives_.GetList(zone())); | 206 return new (zone()) |
| 207 RegExpDisjunction(alternatives_.GetList(zone()), read_direction_); |
203 } | 208 } |
204 | 209 |
205 | 210 |
206 void RegExpBuilder::AddQuantifierToAtom( | 211 void RegExpBuilder::AddQuantifierToAtom( |
207 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) { | 212 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) { |
208 if (pending_empty_) { | 213 if (pending_empty_) { |
209 pending_empty_ = false; | 214 pending_empty_ = false; |
210 return; | 215 return; |
211 } | 216 } |
212 RegExpTree* atom; | 217 RegExpTree* atom; |
213 if (characters_ != NULL) { | 218 if (characters_ != NULL) { |
214 DCHECK(last_added_ == ADD_CHAR); | 219 DCHECK(last_added_ == ADD_CHAR); |
215 // Last atom was character. | 220 // Last atom was character. |
216 Vector<const uc16> char_vector = characters_->ToConstVector(); | 221 Vector<const uc16> char_vector = characters_->ToConstVector(); |
217 int num_chars = char_vector.length(); | 222 int num_chars = char_vector.length(); |
218 if (num_chars > 1) { | 223 if (num_chars > 1) { |
219 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); | 224 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); |
220 text_.Add(new(zone()) RegExpAtom(prefix), zone()); | 225 text_.Add(new (zone()) RegExpAtom(prefix, read_direction_), zone()); |
221 char_vector = char_vector.SubVector(num_chars - 1, num_chars); | 226 char_vector = char_vector.SubVector(num_chars - 1, num_chars); |
222 } | 227 } |
223 characters_ = NULL; | 228 characters_ = NULL; |
224 atom = new(zone()) RegExpAtom(char_vector); | 229 atom = new (zone()) RegExpAtom(char_vector, read_direction_); |
225 FlushText(); | 230 FlushText(); |
226 } else if (text_.length() > 0) { | 231 } else if (text_.length() > 0) { |
227 DCHECK(last_added_ == ADD_ATOM); | 232 DCHECK(last_added_ == ADD_ATOM); |
228 atom = text_.RemoveLast(); | 233 atom = text_.RemoveLast(); |
229 FlushText(); | 234 FlushText(); |
230 } else if (terms_.length() > 0) { | 235 } else if (terms_.length() > 0) { |
231 DCHECK(last_added_ == ADD_ATOM); | 236 DCHECK(last_added_ == ADD_ATOM); |
232 atom = terms_.RemoveLast(); | 237 atom = terms_.RemoveLast(); |
233 if (atom->max_match() == 0) { | 238 if (atom->max_match() == 0) { |
234 // Guaranteed to only match an empty string. | 239 // Guaranteed to only match an empty string. |
235 LAST(ADD_TERM); | 240 LAST(ADD_TERM); |
236 if (min == 0) { | 241 if (min == 0) { |
237 return; | 242 return; |
238 } | 243 } |
239 terms_.Add(atom, zone()); | 244 terms_.Add(atom, zone()); |
240 return; | 245 return; |
241 } | 246 } |
242 } else { | 247 } else { |
243 // Only call immediately after adding an atom or character! | 248 // Only call immediately after adding an atom or character! |
244 UNREACHABLE(); | 249 UNREACHABLE(); |
245 return; | 250 return; |
246 } | 251 } |
247 terms_.Add( | 252 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom, |
248 new(zone()) RegExpQuantifier(min, max, quantifier_type, atom), zone()); | 253 read_direction_), |
| 254 zone()); |
249 LAST(ADD_TERM); | 255 LAST(ADD_TERM); |
250 } | 256 } |
251 | 257 |
252 | 258 |
253 FunctionEntry ParseData::GetFunctionEntry(int start) { | 259 FunctionEntry ParseData::GetFunctionEntry(int start) { |
254 // The current pre-data entry must be a FunctionEntry with the given | 260 // The current pre-data entry must be a FunctionEntry with the given |
255 // start position. | 261 // start position. |
256 if ((function_index_ + FunctionEntry::kSize <= Length()) && | 262 if ((function_index_ + FunctionEntry::kSize <= Length()) && |
257 (static_cast<int>(Data()[function_index_]) == start)) { | 263 (static_cast<int>(Data()[function_index_]) == start)) { |
258 int index = function_index_; | 264 int index = function_index_; |
(...skipping 4955 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5214 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, | 5220 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
5215 bool multiline, bool unicode, Isolate* isolate, | 5221 bool multiline, bool unicode, Isolate* isolate, |
5216 Zone* zone) | 5222 Zone* zone) |
5217 : isolate_(isolate), | 5223 : isolate_(isolate), |
5218 zone_(zone), | 5224 zone_(zone), |
5219 error_(error), | 5225 error_(error), |
5220 captures_(NULL), | 5226 captures_(NULL), |
5221 in_(in), | 5227 in_(in), |
5222 current_(kEndMarker), | 5228 current_(kEndMarker), |
5223 next_pos_(0), | 5229 next_pos_(0), |
| 5230 captures_started_(0), |
5224 capture_count_(0), | 5231 capture_count_(0), |
5225 has_more_(true), | 5232 has_more_(true), |
5226 multiline_(multiline), | 5233 multiline_(multiline), |
5227 unicode_(unicode), | 5234 unicode_(unicode), |
5228 simple_(false), | 5235 simple_(false), |
5229 contains_anchor_(false), | 5236 contains_anchor_(false), |
5230 is_scanned_for_captures_(false), | 5237 is_scanned_for_captures_(false), |
5231 failed_(false) { | 5238 failed_(false) { |
5232 Advance(); | 5239 Advance(); |
5233 } | 5240 } |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5295 current_ = kEndMarker; | 5302 current_ = kEndMarker; |
5296 next_pos_ = in()->length(); | 5303 next_pos_ = in()->length(); |
5297 return NULL; | 5304 return NULL; |
5298 } | 5305 } |
5299 | 5306 |
5300 | 5307 |
5301 // Pattern :: | 5308 // Pattern :: |
5302 // Disjunction | 5309 // Disjunction |
5303 RegExpTree* RegExpParser::ParsePattern() { | 5310 RegExpTree* RegExpParser::ParsePattern() { |
5304 RegExpTree* result = ParseDisjunction(CHECK_FAILED); | 5311 RegExpTree* result = ParseDisjunction(CHECK_FAILED); |
| 5312 |
5305 DCHECK(!has_more()); | 5313 DCHECK(!has_more()); |
5306 // If the result of parsing is a literal string atom, and it has the | 5314 // If the result of parsing is a literal string atom, and it has the |
5307 // same length as the input, then the atom is identical to the input. | 5315 // same length as the input, then the atom is identical to the input. |
5308 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { | 5316 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { |
5309 simple_ = true; | 5317 simple_ = true; |
5310 } | 5318 } |
5311 return result; | 5319 return result; |
5312 } | 5320 } |
5313 | 5321 |
5314 | 5322 |
5315 // Disjunction :: | 5323 // Disjunction :: |
5316 // Alternative | 5324 // Alternative |
5317 // Alternative | Disjunction | 5325 // Alternative | Disjunction |
5318 // Alternative :: | 5326 // Alternative :: |
5319 // [empty] | 5327 // [empty] |
5320 // Term Alternative | 5328 // Term Alternative |
5321 // Term :: | 5329 // Term :: |
5322 // Assertion | 5330 // Assertion |
5323 // Atom | 5331 // Atom |
5324 // Atom Quantifier | 5332 // Atom Quantifier |
5325 RegExpTree* RegExpParser::ParseDisjunction() { | 5333 RegExpTree* RegExpParser::ParseDisjunction() { |
5326 // Used to store current state while parsing subexpressions. | 5334 // Used to store current state while parsing subexpressions. |
5327 RegExpParserState initial_state(NULL, INITIAL, 0, zone()); | 5335 RegExpParserState initial_state(NULL, INITIAL, RegExpTree::READ_FORWARD, 0, |
5328 RegExpParserState* stored_state = &initial_state; | 5336 zone()); |
| 5337 RegExpParserState* state = &initial_state; |
5329 // Cache the builder in a local variable for quick access. | 5338 // Cache the builder in a local variable for quick access. |
5330 RegExpBuilder* builder = initial_state.builder(); | 5339 RegExpBuilder* builder = initial_state.builder(); |
5331 while (true) { | 5340 while (true) { |
5332 switch (current()) { | 5341 switch (current()) { |
5333 case kEndMarker: | 5342 case kEndMarker: |
5334 if (stored_state->IsSubexpression()) { | 5343 if (state->IsSubexpression()) { |
5335 // Inside a parenthesized group when hitting end of input. | 5344 // Inside a parenthesized group when hitting end of input. |
5336 ReportError(CStrVector("Unterminated group") CHECK_FAILED); | 5345 ReportError(CStrVector("Unterminated group") CHECK_FAILED); |
5337 } | 5346 } |
5338 DCHECK_EQ(INITIAL, stored_state->group_type()); | 5347 DCHECK_EQ(INITIAL, state->group_type()); |
5339 // Parsing completed successfully. | 5348 // Parsing completed successfully. |
5340 return builder->ToRegExp(); | 5349 return builder->ToRegExp(); |
5341 case ')': { | 5350 case ')': { |
5342 if (!stored_state->IsSubexpression()) { | 5351 if (!state->IsSubexpression()) { |
5343 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); | 5352 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); |
5344 } | 5353 } |
5345 DCHECK_NE(INITIAL, stored_state->group_type()); | 5354 DCHECK_NE(INITIAL, state->group_type()); |
5346 | 5355 |
5347 Advance(); | 5356 Advance(); |
5348 // End disjunction parsing and convert builder content to new single | 5357 // End disjunction parsing and convert builder content to new single |
5349 // regexp atom. | 5358 // regexp atom. |
5350 RegExpTree* body = builder->ToRegExp(); | 5359 RegExpTree* body = builder->ToRegExp(); |
5351 | 5360 |
5352 int end_capture_index = captures_started(); | 5361 int end_capture_index = captures_started_; |
5353 | 5362 |
5354 int capture_index = stored_state->capture_index(); | 5363 int capture_index = state->capture_index(); |
5355 SubexpressionType group_type = stored_state->group_type(); | 5364 SubexpressionType group_type = state->group_type(); |
5356 | |
5357 // Restore previous state. | |
5358 stored_state = stored_state->previous_state(); | |
5359 builder = stored_state->builder(); | |
5360 | 5365 |
5361 // Build result of subexpression. | 5366 // Build result of subexpression. |
5362 if (group_type == CAPTURE) { | 5367 if (group_type == CAPTURE) { |
5363 RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index); | 5368 RegExpCapture* capture = GetCapture(capture_index); |
5364 captures_->at(capture_index - 1) = capture; | 5369 capture->set_body(body); |
| 5370 capture->set_read_direction(state->read_direction()); |
5365 body = capture; | 5371 body = capture; |
5366 } else if (group_type != GROUPING) { | 5372 } else if (group_type != GROUPING) { |
5367 DCHECK(group_type == POSITIVE_LOOKAHEAD || | 5373 DCHECK(group_type == POSITIVE_LOOKAHEAD || |
5368 group_type == NEGATIVE_LOOKAHEAD); | 5374 group_type == NEGATIVE_LOOKAHEAD); |
5369 bool is_positive = (group_type == POSITIVE_LOOKAHEAD); | 5375 bool is_positive = (group_type == POSITIVE_LOOKAHEAD); |
5370 body = new(zone()) RegExpLookahead(body, | 5376 body = new (zone()) RegExpLookaround( |
5371 is_positive, | 5377 body, is_positive, end_capture_index - capture_index, capture_index, |
5372 end_capture_index - capture_index, | 5378 state->read_direction()); |
5373 capture_index); | |
5374 } | 5379 } |
| 5380 |
| 5381 // Restore previous state. |
| 5382 state = state->previous_state(); |
| 5383 builder = state->builder(); |
| 5384 |
5375 builder->AddAtom(body); | 5385 builder->AddAtom(body); |
5376 // For compatability with JSC and ES3, we allow quantifiers after | 5386 // For compatability with JSC and ES3, we allow quantifiers after |
5377 // lookaheads, and break in all cases. | 5387 // lookaheads, and break in all cases. |
5378 break; | 5388 break; |
5379 } | 5389 } |
5380 case '|': { | 5390 case '|': { |
5381 Advance(); | 5391 Advance(); |
5382 builder->NewAlternative(); | 5392 builder->NewAlternative(); |
5383 continue; | 5393 continue; |
5384 } | 5394 } |
5385 case '*': | 5395 case '*': |
5386 case '+': | 5396 case '+': |
5387 case '?': | 5397 case '?': |
5388 return ReportError(CStrVector("Nothing to repeat")); | 5398 return ReportError(CStrVector("Nothing to repeat")); |
5389 case '^': { | 5399 case '^': { |
5390 Advance(); | 5400 Advance(); |
5391 if (multiline_) { | 5401 if (multiline_) { |
5392 builder->AddAssertion( | 5402 builder->AddAssertion(new (zone()) RegExpAssertion( |
5393 new(zone()) RegExpAssertion(RegExpAssertion::START_OF_LINE)); | 5403 RegExpAssertion::START_OF_LINE, state->read_direction())); |
5394 } else { | 5404 } else { |
5395 builder->AddAssertion( | 5405 builder->AddAssertion(new (zone()) RegExpAssertion( |
5396 new(zone()) RegExpAssertion(RegExpAssertion::START_OF_INPUT)); | 5406 RegExpAssertion::START_OF_INPUT, state->read_direction())); |
5397 set_contains_anchor(); | 5407 set_contains_anchor(); |
5398 } | 5408 } |
5399 continue; | 5409 continue; |
5400 } | 5410 } |
5401 case '$': { | 5411 case '$': { |
5402 Advance(); | 5412 Advance(); |
5403 RegExpAssertion::AssertionType assertion_type = | 5413 RegExpAssertion::AssertionType assertion_type = |
5404 multiline_ ? RegExpAssertion::END_OF_LINE : | 5414 multiline_ ? RegExpAssertion::END_OF_LINE : |
5405 RegExpAssertion::END_OF_INPUT; | 5415 RegExpAssertion::END_OF_INPUT; |
5406 builder->AddAssertion(new(zone()) RegExpAssertion(assertion_type)); | 5416 builder->AddAssertion(new (zone()) RegExpAssertion( |
| 5417 assertion_type, state->read_direction())); |
5407 continue; | 5418 continue; |
5408 } | 5419 } |
5409 case '.': { | 5420 case '.': { |
5410 Advance(); | 5421 Advance(); |
5411 // everything except \x0a, \x0d, \u2028 and \u2029 | 5422 // everything except \x0a, \x0d, \u2028 and \u2029 |
5412 ZoneList<CharacterRange>* ranges = | 5423 ZoneList<CharacterRange>* ranges = |
5413 new(zone()) ZoneList<CharacterRange>(2, zone()); | 5424 new(zone()) ZoneList<CharacterRange>(2, zone()); |
5414 CharacterRange::AddClassEscape('.', ranges, zone()); | 5425 CharacterRange::AddClassEscape('.', ranges, zone()); |
5415 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | 5426 RegExpTree* atom = new (zone()) |
| 5427 RegExpCharacterClass(ranges, false, state->read_direction()); |
5416 builder->AddAtom(atom); | 5428 builder->AddAtom(atom); |
5417 break; | 5429 break; |
5418 } | 5430 } |
5419 case '(': { | 5431 case '(': { |
5420 SubexpressionType subexpr_type = CAPTURE; | 5432 SubexpressionType subexpr_type = CAPTURE; |
| 5433 RegExpTree::ReadDirection read_direction = state->read_direction(); |
5421 Advance(); | 5434 Advance(); |
5422 if (current() == '?') { | 5435 if (current() == '?') { |
5423 switch (Next()) { | 5436 switch (Next()) { |
5424 case ':': | 5437 case ':': |
5425 subexpr_type = GROUPING; | 5438 subexpr_type = GROUPING; |
5426 break; | 5439 break; |
5427 case '=': | 5440 case '=': |
| 5441 read_direction = RegExpTree::READ_FORWARD; |
5428 subexpr_type = POSITIVE_LOOKAHEAD; | 5442 subexpr_type = POSITIVE_LOOKAHEAD; |
5429 break; | 5443 break; |
5430 case '!': | 5444 case '!': |
| 5445 read_direction = RegExpTree::READ_FORWARD; |
5431 subexpr_type = NEGATIVE_LOOKAHEAD; | 5446 subexpr_type = NEGATIVE_LOOKAHEAD; |
5432 break; | 5447 break; |
| 5448 case '<': |
| 5449 if (FLAG_harmony_regexp_lookbehind) { |
| 5450 Advance(); |
| 5451 read_direction = RegExpTree::READ_BACKWARD; |
| 5452 if (Next() == '=') { |
| 5453 subexpr_type = POSITIVE_LOOKAHEAD; |
| 5454 break; |
| 5455 } else if (Next() == '!') { |
| 5456 subexpr_type = NEGATIVE_LOOKAHEAD; |
| 5457 break; |
| 5458 } |
| 5459 } |
| 5460 // Fall through. |
5433 default: | 5461 default: |
5434 ReportError(CStrVector("Invalid group") CHECK_FAILED); | 5462 ReportError(CStrVector("Invalid group") CHECK_FAILED); |
5435 break; | 5463 break; |
5436 } | 5464 } |
5437 Advance(2); | 5465 Advance(2); |
5438 } else { | 5466 } else { |
5439 if (captures_ == NULL) { | 5467 if (captures_started_ >= kMaxCaptures) { |
5440 captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone()); | |
5441 } | |
5442 if (captures_started() >= kMaxCaptures) { | |
5443 ReportError(CStrVector("Too many captures") CHECK_FAILED); | 5468 ReportError(CStrVector("Too many captures") CHECK_FAILED); |
5444 } | 5469 } |
5445 captures_->Add(NULL, zone()); | 5470 captures_started_++; |
5446 } | 5471 } |
5447 // Store current state and begin new disjunction parsing. | 5472 // Store current state and begin new disjunction parsing. |
5448 stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type, | 5473 state = new (zone()) RegExpParserState( |
5449 captures_started(), zone()); | 5474 state, subexpr_type, read_direction, captures_started_, zone()); |
5450 builder = stored_state->builder(); | 5475 builder = state->builder(); |
5451 continue; | 5476 continue; |
5452 } | 5477 } |
5453 case '[': { | 5478 case '[': { |
5454 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); | 5479 RegExpTree* atom = |
| 5480 ParseCharacterClass(state->read_direction() CHECK_FAILED); |
5455 builder->AddAtom(atom); | 5481 builder->AddAtom(atom); |
5456 break; | 5482 break; |
5457 } | 5483 } |
5458 // Atom :: | 5484 // Atom :: |
5459 // \ AtomEscape | 5485 // \ AtomEscape |
5460 case '\\': | 5486 case '\\': |
5461 switch (Next()) { | 5487 switch (Next()) { |
5462 case kEndMarker: | 5488 case kEndMarker: |
5463 return ReportError(CStrVector("\\ at end of pattern")); | 5489 return ReportError(CStrVector("\\ at end of pattern")); |
5464 case 'b': | 5490 case 'b': |
5465 Advance(2); | 5491 Advance(2); |
5466 builder->AddAssertion( | 5492 builder->AddAssertion(new (zone()) RegExpAssertion( |
5467 new(zone()) RegExpAssertion(RegExpAssertion::BOUNDARY)); | 5493 RegExpAssertion::BOUNDARY, state->read_direction())); |
5468 continue; | 5494 continue; |
5469 case 'B': | 5495 case 'B': |
5470 Advance(2); | 5496 Advance(2); |
5471 builder->AddAssertion( | 5497 builder->AddAssertion(new (zone()) RegExpAssertion( |
5472 new(zone()) RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); | 5498 RegExpAssertion::NON_BOUNDARY, state->read_direction())); |
5473 continue; | 5499 continue; |
5474 // AtomEscape :: | 5500 // AtomEscape :: |
5475 // CharacterClassEscape | 5501 // CharacterClassEscape |
5476 // | 5502 // |
5477 // CharacterClassEscape :: one of | 5503 // CharacterClassEscape :: one of |
5478 // d D s S w W | 5504 // d D s S w W |
5479 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { | 5505 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { |
5480 uc32 c = Next(); | 5506 uc32 c = Next(); |
5481 Advance(2); | 5507 Advance(2); |
5482 ZoneList<CharacterRange>* ranges = | 5508 ZoneList<CharacterRange>* ranges = |
5483 new(zone()) ZoneList<CharacterRange>(2, zone()); | 5509 new(zone()) ZoneList<CharacterRange>(2, zone()); |
5484 CharacterRange::AddClassEscape(c, ranges, zone()); | 5510 CharacterRange::AddClassEscape(c, ranges, zone()); |
5485 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | 5511 RegExpTree* atom = new (zone()) |
| 5512 RegExpCharacterClass(ranges, false, state->read_direction()); |
5486 builder->AddAtom(atom); | 5513 builder->AddAtom(atom); |
5487 break; | 5514 break; |
5488 } | 5515 } |
5489 case '1': case '2': case '3': case '4': case '5': case '6': | 5516 case '1': case '2': case '3': case '4': case '5': case '6': |
5490 case '7': case '8': case '9': { | 5517 case '7': case '8': case '9': { |
5491 int index = 0; | 5518 int index = 0; |
5492 if (ParseBackReferenceIndex(&index)) { | 5519 if (ParseBackReferenceIndex(&index)) { |
5493 RegExpCapture* capture = NULL; | 5520 RegExpCapture* capture = GetCapture(index); |
5494 if (captures_ != NULL && index <= captures_->length()) { | 5521 RegExpTree* atom = new (zone()) |
5495 capture = captures_->at(index - 1); | 5522 RegExpBackReference(capture, state->read_direction()); |
5496 } | |
5497 if (capture == NULL) { | |
5498 builder->AddEmpty(); | |
5499 break; | |
5500 } | |
5501 RegExpTree* atom = new(zone()) RegExpBackReference(capture); | |
5502 builder->AddAtom(atom); | 5523 builder->AddAtom(atom); |
5503 break; | 5524 break; |
5504 } | 5525 } |
5505 uc32 first_digit = Next(); | 5526 uc32 first_digit = Next(); |
5506 if (first_digit == '8' || first_digit == '9') { | 5527 if (first_digit == '8' || first_digit == '9') { |
5507 // If the 'u' flag is present, only syntax characters can be escaped, | 5528 // If the 'u' flag is present, only syntax characters can be escaped, |
5508 // no other identity escapes are allowed. If the 'u' flag is not | 5529 // no other identity escapes are allowed. If the 'u' flag is not |
5509 // present, all identity escapes are allowed. | 5530 // present, all identity escapes are allowed. |
5510 if (!FLAG_harmony_unicode_regexps || !unicode_) { | 5531 if (!FLAG_harmony_unicode_regexps || !unicode_) { |
5511 builder->AddCharacter(first_digit); | 5532 builder->AddCharacter(first_digit); |
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5685 | 5706 |
5686 | 5707 |
5687 // In order to know whether an escape is a backreference or not we have to scan | 5708 // In order to know whether an escape is a backreference or not we have to scan |
5688 // the entire regexp and find the number of capturing parentheses. However we | 5709 // the entire regexp and find the number of capturing parentheses. However we |
5689 // don't want to scan the regexp twice unless it is necessary. This mini-parser | 5710 // don't want to scan the regexp twice unless it is necessary. This mini-parser |
5690 // is called when needed. It can see the difference between capturing and | 5711 // is called when needed. It can see the difference between capturing and |
5691 // noncapturing parentheses and can skip character classes and backslash-escaped | 5712 // noncapturing parentheses and can skip character classes and backslash-escaped |
5692 // characters. | 5713 // characters. |
5693 void RegExpParser::ScanForCaptures() { | 5714 void RegExpParser::ScanForCaptures() { |
5694 // Start with captures started previous to current position | 5715 // Start with captures started previous to current position |
5695 int capture_count = captures_started(); | 5716 int capture_count = captures_started_; |
5696 // Add count of captures after this position. | 5717 // Add count of captures after this position. |
5697 int n; | 5718 int n; |
5698 while ((n = current()) != kEndMarker) { | 5719 while ((n = current()) != kEndMarker) { |
5699 Advance(); | 5720 Advance(); |
5700 switch (n) { | 5721 switch (n) { |
5701 case '\\': | 5722 case '\\': |
5702 Advance(); | 5723 Advance(); |
5703 break; | 5724 break; |
5704 case '[': { | 5725 case '[': { |
5705 int c; | 5726 int c; |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5737 value = 10 * value + (c - '0'); | 5758 value = 10 * value + (c - '0'); |
5738 if (value > kMaxCaptures) { | 5759 if (value > kMaxCaptures) { |
5739 Reset(start); | 5760 Reset(start); |
5740 return false; | 5761 return false; |
5741 } | 5762 } |
5742 Advance(); | 5763 Advance(); |
5743 } else { | 5764 } else { |
5744 break; | 5765 break; |
5745 } | 5766 } |
5746 } | 5767 } |
5747 if (value > captures_started()) { | 5768 if (value > captures_started_) { |
5748 if (!is_scanned_for_captures_) { | 5769 if (!is_scanned_for_captures_) { |
5749 int saved_position = position(); | 5770 int saved_position = position(); |
5750 ScanForCaptures(); | 5771 ScanForCaptures(); |
5751 Reset(saved_position); | 5772 Reset(saved_position); |
5752 } | 5773 } |
5753 if (value > capture_count_) { | 5774 if (value > capture_count_) { |
5754 Reset(start); | 5775 Reset(start); |
5755 return false; | 5776 return false; |
5756 } | 5777 } |
5757 } | 5778 } |
5758 *index_out = value; | 5779 *index_out = value; |
5759 return true; | 5780 return true; |
5760 } | 5781 } |
5761 | 5782 |
5762 | 5783 |
| 5784 RegExpCapture* RegExpParser::GetCapture(int index) { |
| 5785 // The index for the capture groups are one-based. Its index in the list is |
| 5786 // zero-based. |
| 5787 int know_captures = |
| 5788 is_scanned_for_captures_ ? capture_count_ : captures_started_; |
| 5789 DCHECK(index <= know_captures); |
| 5790 if (captures_ == NULL) { |
| 5791 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); |
| 5792 } |
| 5793 while (captures_->length() < know_captures) { |
| 5794 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); |
| 5795 } |
| 5796 return captures_->at(index - 1); |
| 5797 } |
| 5798 |
| 5799 |
5763 // QuantifierPrefix :: | 5800 // QuantifierPrefix :: |
5764 // { DecimalDigits } | 5801 // { DecimalDigits } |
5765 // { DecimalDigits , } | 5802 // { DecimalDigits , } |
5766 // { DecimalDigits , DecimalDigits } | 5803 // { DecimalDigits , DecimalDigits } |
5767 // | 5804 // |
5768 // Returns true if parsing succeeds, and set the min_out and max_out | 5805 // Returns true if parsing succeeds, and set the min_out and max_out |
5769 // values. Values are truncated to RegExpTree::kInfinity if they overflow. | 5806 // values. Values are truncated to RegExpTree::kInfinity if they overflow. |
5770 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { | 5807 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { |
5771 DCHECK_EQ(current(), '{'); | 5808 DCHECK_EQ(current(), '{'); |
5772 int start = position(); | 5809 int start = position(); |
(...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6032 CharacterRange range, | 6069 CharacterRange range, |
6033 Zone* zone) { | 6070 Zone* zone) { |
6034 if (char_class != kNoCharClass) { | 6071 if (char_class != kNoCharClass) { |
6035 CharacterRange::AddClassEscape(char_class, ranges, zone); | 6072 CharacterRange::AddClassEscape(char_class, ranges, zone); |
6036 } else { | 6073 } else { |
6037 ranges->Add(range, zone); | 6074 ranges->Add(range, zone); |
6038 } | 6075 } |
6039 } | 6076 } |
6040 | 6077 |
6041 | 6078 |
6042 RegExpTree* RegExpParser::ParseCharacterClass() { | 6079 RegExpTree* RegExpParser::ParseCharacterClass( |
| 6080 RegExpTree::ReadDirection read_direction) { |
6043 static const char* kUnterminated = "Unterminated character class"; | 6081 static const char* kUnterminated = "Unterminated character class"; |
6044 static const char* kRangeOutOfOrder = "Range out of order in character class"; | 6082 static const char* kRangeOutOfOrder = "Range out of order in character class"; |
6045 | 6083 |
6046 DCHECK_EQ(current(), '['); | 6084 DCHECK_EQ(current(), '['); |
6047 Advance(); | 6085 Advance(); |
6048 bool is_negated = false; | 6086 bool is_negated = false; |
6049 if (current() == '^') { | 6087 if (current() == '^') { |
6050 is_negated = true; | 6088 is_negated = true; |
6051 Advance(); | 6089 Advance(); |
6052 } | 6090 } |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6084 } | 6122 } |
6085 } | 6123 } |
6086 if (!has_more()) { | 6124 if (!has_more()) { |
6087 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); | 6125 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); |
6088 } | 6126 } |
6089 Advance(); | 6127 Advance(); |
6090 if (ranges->length() == 0) { | 6128 if (ranges->length() == 0) { |
6091 ranges->Add(CharacterRange::Everything(), zone()); | 6129 ranges->Add(CharacterRange::Everything(), zone()); |
6092 is_negated = !is_negated; | 6130 is_negated = !is_negated; |
6093 } | 6131 } |
6094 return new(zone()) RegExpCharacterClass(ranges, is_negated); | 6132 return new (zone()) RegExpCharacterClass(ranges, is_negated, read_direction); |
6095 } | 6133 } |
6096 | 6134 |
6097 | 6135 |
6098 // ---------------------------------------------------------------------------- | 6136 // ---------------------------------------------------------------------------- |
6099 // The Parser interface. | 6137 // The Parser interface. |
6100 | 6138 |
6101 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, | 6139 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, |
6102 FlatStringReader* input, bool multiline, | 6140 FlatStringReader* input, bool multiline, |
6103 bool unicode, RegExpCompileData* result) { | 6141 bool unicode, RegExpCompileData* result) { |
6104 DCHECK(result != NULL); | 6142 DCHECK(result != NULL); |
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6453 } | 6491 } |
6454 | 6492 |
6455 | 6493 |
6456 void Parser::RaiseLanguageMode(LanguageMode mode) { | 6494 void Parser::RaiseLanguageMode(LanguageMode mode) { |
6457 SetLanguageMode(scope_, | 6495 SetLanguageMode(scope_, |
6458 static_cast<LanguageMode>(scope_->language_mode() | mode)); | 6496 static_cast<LanguageMode>(scope_->language_mode() | mode)); |
6459 } | 6497 } |
6460 | 6498 |
6461 } // namespace internal | 6499 } // namespace internal |
6462 } // namespace v8 | 6500 } // namespace v8 |
OLD | NEW |