| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/parser.h" | 5 #include "src/parser.h" |
| 6 | 6 |
| 7 #include "src/api.h" | 7 #include "src/api.h" |
| 8 #include "src/ast.h" | 8 #include "src/ast.h" |
| 9 #include "src/ast-literal-reindexer.h" | 9 #include "src/ast-literal-reindexer.h" |
| 10 #include "src/bailout-reason.h" | 10 #include "src/bailout-reason.h" |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 87 set_stack_limit(isolate_->stack_guard()->real_climit()); | 87 set_stack_limit(isolate_->stack_guard()->real_climit()); |
| 88 set_unicode_cache(isolate_->unicode_cache()); | 88 set_unicode_cache(isolate_->unicode_cache()); |
| 89 set_script(script); | 89 set_script(script); |
| 90 | 90 |
| 91 if (script->type() == Script::TYPE_NATIVE) { | 91 if (script->type() == Script::TYPE_NATIVE) { |
| 92 set_native(); | 92 set_native(); |
| 93 } | 93 } |
| 94 } | 94 } |
| 95 | 95 |
| 96 | 96 |
| 97 RegExpBuilder::RegExpBuilder(Zone* zone) | 97 RegExpBuilder::RegExpBuilder(Zone* zone, |
| 98 RegExpTree::ReadDirection read_direction) |
| 98 : zone_(zone), | 99 : zone_(zone), |
| 99 pending_empty_(false), | 100 pending_empty_(false), |
| 100 characters_(NULL), | 101 characters_(NULL), |
| 101 terms_(), | 102 terms_(), |
| 102 alternatives_() | 103 alternatives_(), |
| 104 read_direction_(read_direction) |
| 103 #ifdef DEBUG | 105 #ifdef DEBUG |
| 104 , last_added_(ADD_NONE) | 106 , last_added_(ADD_NONE) |
| 105 #endif | 107 #endif |
| 106 {} | 108 {} |
| 107 | 109 |
| 108 | 110 |
| 109 void RegExpBuilder::FlushCharacters() { | 111 void RegExpBuilder::FlushCharacters() { |
| 110 pending_empty_ = false; | 112 pending_empty_ = false; |
| 111 if (characters_ != NULL) { | 113 if (characters_ != NULL) { |
| 112 RegExpTree* atom = new(zone()) RegExpAtom(characters_->ToConstVector()); | 114 RegExpTree* atom = |
| 115 new (zone()) RegExpAtom(characters_->ToConstVector(), read_direction_); |
| 113 characters_ = NULL; | 116 characters_ = NULL; |
| 114 text_.Add(atom, zone()); | 117 text_.Add(atom, zone()); |
| 115 LAST(ADD_ATOM); | 118 LAST(ADD_ATOM); |
| 116 } | 119 } |
| 117 } | 120 } |
| 118 | 121 |
| 119 | 122 |
| 120 void RegExpBuilder::FlushText() { | 123 void RegExpBuilder::FlushText() { |
| 121 FlushCharacters(); | 124 FlushCharacters(); |
| 122 int num_text = text_.length(); | 125 int num_text = text_.length(); |
| 123 if (num_text == 0) { | 126 if (num_text == 0) { |
| 124 return; | 127 return; |
| 125 } else if (num_text == 1) { | 128 } else if (num_text == 1) { |
| 126 terms_.Add(text_.last(), zone()); | 129 terms_.Add(text_.last(), zone()); |
| 127 } else { | 130 } else { |
| 128 RegExpText* text = new(zone()) RegExpText(zone()); | 131 RegExpText* text = new (zone()) RegExpText(zone(), read_direction_); |
| 129 for (int i = 0; i < num_text; i++) | 132 for (int i = 0; i < num_text; i++) |
| 130 text_.Get(i)->AppendToText(text, zone()); | 133 text_.Get(i)->AppendToText(text, zone()); |
| 131 terms_.Add(text, zone()); | 134 terms_.Add(text, zone()); |
| 132 } | 135 } |
| 133 text_.Clear(); | 136 text_.Clear(); |
| 134 } | 137 } |
| 135 | 138 |
| 136 | 139 |
| 137 void RegExpBuilder::AddCharacter(uc16 c) { | 140 void RegExpBuilder::AddCharacter(uc16 c) { |
| 138 pending_empty_ = false; | 141 pending_empty_ = false; |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 179 | 182 |
| 180 void RegExpBuilder::FlushTerms() { | 183 void RegExpBuilder::FlushTerms() { |
| 181 FlushText(); | 184 FlushText(); |
| 182 int num_terms = terms_.length(); | 185 int num_terms = terms_.length(); |
| 183 RegExpTree* alternative; | 186 RegExpTree* alternative; |
| 184 if (num_terms == 0) { | 187 if (num_terms == 0) { |
| 185 alternative = new (zone()) RegExpEmpty(); | 188 alternative = new (zone()) RegExpEmpty(); |
| 186 } else if (num_terms == 1) { | 189 } else if (num_terms == 1) { |
| 187 alternative = terms_.last(); | 190 alternative = terms_.last(); |
| 188 } else { | 191 } else { |
| 189 alternative = new(zone()) RegExpAlternative(terms_.GetList(zone())); | 192 alternative = |
| 193 new (zone()) RegExpAlternative(terms_.GetList(zone()), read_direction_); |
| 190 } | 194 } |
| 191 alternatives_.Add(alternative, zone()); | 195 alternatives_.Add(alternative, zone()); |
| 192 terms_.Clear(); | 196 terms_.Clear(); |
| 193 LAST(ADD_NONE); | 197 LAST(ADD_NONE); |
| 194 } | 198 } |
| 195 | 199 |
| 196 | 200 |
| 197 RegExpTree* RegExpBuilder::ToRegExp() { | 201 RegExpTree* RegExpBuilder::ToRegExp() { |
| 198 FlushTerms(); | 202 FlushTerms(); |
| 199 int num_alternatives = alternatives_.length(); | 203 int num_alternatives = alternatives_.length(); |
| 200 if (num_alternatives == 0) return new (zone()) RegExpEmpty(); | 204 if (num_alternatives == 0) return new (zone()) RegExpEmpty(); |
| 201 if (num_alternatives == 1) return alternatives_.last(); | 205 if (num_alternatives == 1) return alternatives_.last(); |
| 202 return new(zone()) RegExpDisjunction(alternatives_.GetList(zone())); | 206 return new (zone()) |
| 207 RegExpDisjunction(alternatives_.GetList(zone()), read_direction_); |
| 203 } | 208 } |
| 204 | 209 |
| 205 | 210 |
| 206 void RegExpBuilder::AddQuantifierToAtom( | 211 void RegExpBuilder::AddQuantifierToAtom( |
| 207 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) { | 212 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) { |
| 208 if (pending_empty_) { | 213 if (pending_empty_) { |
| 209 pending_empty_ = false; | 214 pending_empty_ = false; |
| 210 return; | 215 return; |
| 211 } | 216 } |
| 212 RegExpTree* atom; | 217 RegExpTree* atom; |
| 213 if (characters_ != NULL) { | 218 if (characters_ != NULL) { |
| 214 DCHECK(last_added_ == ADD_CHAR); | 219 DCHECK(last_added_ == ADD_CHAR); |
| 215 // Last atom was character. | 220 // Last atom was character. |
| 216 Vector<const uc16> char_vector = characters_->ToConstVector(); | 221 Vector<const uc16> char_vector = characters_->ToConstVector(); |
| 217 int num_chars = char_vector.length(); | 222 int num_chars = char_vector.length(); |
| 218 if (num_chars > 1) { | 223 if (num_chars > 1) { |
| 219 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); | 224 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); |
| 220 text_.Add(new(zone()) RegExpAtom(prefix), zone()); | 225 text_.Add(new (zone()) RegExpAtom(prefix, read_direction_), zone()); |
| 221 char_vector = char_vector.SubVector(num_chars - 1, num_chars); | 226 char_vector = char_vector.SubVector(num_chars - 1, num_chars); |
| 222 } | 227 } |
| 223 characters_ = NULL; | 228 characters_ = NULL; |
| 224 atom = new(zone()) RegExpAtom(char_vector); | 229 atom = new (zone()) RegExpAtom(char_vector, read_direction_); |
| 225 FlushText(); | 230 FlushText(); |
| 226 } else if (text_.length() > 0) { | 231 } else if (text_.length() > 0) { |
| 227 DCHECK(last_added_ == ADD_ATOM); | 232 DCHECK(last_added_ == ADD_ATOM); |
| 228 atom = text_.RemoveLast(); | 233 atom = text_.RemoveLast(); |
| 229 FlushText(); | 234 FlushText(); |
| 230 } else if (terms_.length() > 0) { | 235 } else if (terms_.length() > 0) { |
| 231 DCHECK(last_added_ == ADD_ATOM); | 236 DCHECK(last_added_ == ADD_ATOM); |
| 232 atom = terms_.RemoveLast(); | 237 atom = terms_.RemoveLast(); |
| 233 if (atom->max_match() == 0) { | 238 if (atom->max_match() == 0) { |
| 234 // Guaranteed to only match an empty string. | 239 // Guaranteed to only match an empty string. |
| 235 LAST(ADD_TERM); | 240 LAST(ADD_TERM); |
| 236 if (min == 0) { | 241 if (min == 0) { |
| 237 return; | 242 return; |
| 238 } | 243 } |
| 239 terms_.Add(atom, zone()); | 244 terms_.Add(atom, zone()); |
| 240 return; | 245 return; |
| 241 } | 246 } |
| 242 } else { | 247 } else { |
| 243 // Only call immediately after adding an atom or character! | 248 // Only call immediately after adding an atom or character! |
| 244 UNREACHABLE(); | 249 UNREACHABLE(); |
| 245 return; | 250 return; |
| 246 } | 251 } |
| 247 terms_.Add( | 252 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom, |
| 248 new(zone()) RegExpQuantifier(min, max, quantifier_type, atom), zone()); | 253 read_direction_), |
| 254 zone()); |
| 249 LAST(ADD_TERM); | 255 LAST(ADD_TERM); |
| 250 } | 256 } |
| 251 | 257 |
| 252 | 258 |
| 253 FunctionEntry ParseData::GetFunctionEntry(int start) { | 259 FunctionEntry ParseData::GetFunctionEntry(int start) { |
| 254 // The current pre-data entry must be a FunctionEntry with the given | 260 // The current pre-data entry must be a FunctionEntry with the given |
| 255 // start position. | 261 // start position. |
| 256 if ((function_index_ + FunctionEntry::kSize <= Length()) && | 262 if ((function_index_ + FunctionEntry::kSize <= Length()) && |
| 257 (static_cast<int>(Data()[function_index_]) == start)) { | 263 (static_cast<int>(Data()[function_index_]) == start)) { |
| 258 int index = function_index_; | 264 int index = function_index_; |
| (...skipping 4955 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5214 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, | 5220 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
| 5215 bool multiline, bool unicode, Isolate* isolate, | 5221 bool multiline, bool unicode, Isolate* isolate, |
| 5216 Zone* zone) | 5222 Zone* zone) |
| 5217 : isolate_(isolate), | 5223 : isolate_(isolate), |
| 5218 zone_(zone), | 5224 zone_(zone), |
| 5219 error_(error), | 5225 error_(error), |
| 5220 captures_(NULL), | 5226 captures_(NULL), |
| 5221 in_(in), | 5227 in_(in), |
| 5222 current_(kEndMarker), | 5228 current_(kEndMarker), |
| 5223 next_pos_(0), | 5229 next_pos_(0), |
| 5230 captures_started_(0), |
| 5224 capture_count_(0), | 5231 capture_count_(0), |
| 5225 has_more_(true), | 5232 has_more_(true), |
| 5226 multiline_(multiline), | 5233 multiline_(multiline), |
| 5227 unicode_(unicode), | 5234 unicode_(unicode), |
| 5228 simple_(false), | 5235 simple_(false), |
| 5229 contains_anchor_(false), | 5236 contains_anchor_(false), |
| 5230 is_scanned_for_captures_(false), | 5237 is_scanned_for_captures_(false), |
| 5231 failed_(false) { | 5238 failed_(false) { |
| 5232 Advance(); | 5239 Advance(); |
| 5233 } | 5240 } |
| (...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5295 current_ = kEndMarker; | 5302 current_ = kEndMarker; |
| 5296 next_pos_ = in()->length(); | 5303 next_pos_ = in()->length(); |
| 5297 return NULL; | 5304 return NULL; |
| 5298 } | 5305 } |
| 5299 | 5306 |
| 5300 | 5307 |
| 5301 // Pattern :: | 5308 // Pattern :: |
| 5302 // Disjunction | 5309 // Disjunction |
| 5303 RegExpTree* RegExpParser::ParsePattern() { | 5310 RegExpTree* RegExpParser::ParsePattern() { |
| 5304 RegExpTree* result = ParseDisjunction(CHECK_FAILED); | 5311 RegExpTree* result = ParseDisjunction(CHECK_FAILED); |
| 5312 |
| 5305 DCHECK(!has_more()); | 5313 DCHECK(!has_more()); |
| 5306 // If the result of parsing is a literal string atom, and it has the | 5314 // If the result of parsing is a literal string atom, and it has the |
| 5307 // same length as the input, then the atom is identical to the input. | 5315 // same length as the input, then the atom is identical to the input. |
| 5308 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { | 5316 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { |
| 5309 simple_ = true; | 5317 simple_ = true; |
| 5310 } | 5318 } |
| 5311 return result; | 5319 return result; |
| 5312 } | 5320 } |
| 5313 | 5321 |
| 5314 | 5322 |
| 5315 // Disjunction :: | 5323 // Disjunction :: |
| 5316 // Alternative | 5324 // Alternative |
| 5317 // Alternative | Disjunction | 5325 // Alternative | Disjunction |
| 5318 // Alternative :: | 5326 // Alternative :: |
| 5319 // [empty] | 5327 // [empty] |
| 5320 // Term Alternative | 5328 // Term Alternative |
| 5321 // Term :: | 5329 // Term :: |
| 5322 // Assertion | 5330 // Assertion |
| 5323 // Atom | 5331 // Atom |
| 5324 // Atom Quantifier | 5332 // Atom Quantifier |
| 5325 RegExpTree* RegExpParser::ParseDisjunction() { | 5333 RegExpTree* RegExpParser::ParseDisjunction() { |
| 5326 // Used to store current state while parsing subexpressions. | 5334 // Used to store current state while parsing subexpressions. |
| 5327 RegExpParserState initial_state(NULL, INITIAL, 0, zone()); | 5335 RegExpParserState initial_state(NULL, INITIAL, RegExpTree::READ_FORWARD, 0, |
| 5328 RegExpParserState* stored_state = &initial_state; | 5336 zone()); |
| 5337 RegExpParserState* state = &initial_state; |
| 5329 // Cache the builder in a local variable for quick access. | 5338 // Cache the builder in a local variable for quick access. |
| 5330 RegExpBuilder* builder = initial_state.builder(); | 5339 RegExpBuilder* builder = initial_state.builder(); |
| 5331 while (true) { | 5340 while (true) { |
| 5332 switch (current()) { | 5341 switch (current()) { |
| 5333 case kEndMarker: | 5342 case kEndMarker: |
| 5334 if (stored_state->IsSubexpression()) { | 5343 if (state->IsSubexpression()) { |
| 5335 // Inside a parenthesized group when hitting end of input. | 5344 // Inside a parenthesized group when hitting end of input. |
| 5336 ReportError(CStrVector("Unterminated group") CHECK_FAILED); | 5345 ReportError(CStrVector("Unterminated group") CHECK_FAILED); |
| 5337 } | 5346 } |
| 5338 DCHECK_EQ(INITIAL, stored_state->group_type()); | 5347 DCHECK_EQ(INITIAL, state->group_type()); |
| 5339 // Parsing completed successfully. | 5348 // Parsing completed successfully. |
| 5340 return builder->ToRegExp(); | 5349 return builder->ToRegExp(); |
| 5341 case ')': { | 5350 case ')': { |
| 5342 if (!stored_state->IsSubexpression()) { | 5351 if (!state->IsSubexpression()) { |
| 5343 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); | 5352 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); |
| 5344 } | 5353 } |
| 5345 DCHECK_NE(INITIAL, stored_state->group_type()); | 5354 DCHECK_NE(INITIAL, state->group_type()); |
| 5346 | 5355 |
| 5347 Advance(); | 5356 Advance(); |
| 5348 // End disjunction parsing and convert builder content to new single | 5357 // End disjunction parsing and convert builder content to new single |
| 5349 // regexp atom. | 5358 // regexp atom. |
| 5350 RegExpTree* body = builder->ToRegExp(); | 5359 RegExpTree* body = builder->ToRegExp(); |
| 5351 | 5360 |
| 5352 int end_capture_index = captures_started(); | 5361 int end_capture_index = captures_started_; |
| 5353 | 5362 |
| 5354 int capture_index = stored_state->capture_index(); | 5363 int capture_index = state->capture_index(); |
| 5355 SubexpressionType group_type = stored_state->group_type(); | 5364 SubexpressionType group_type = state->group_type(); |
| 5356 | |
| 5357 // Restore previous state. | |
| 5358 stored_state = stored_state->previous_state(); | |
| 5359 builder = stored_state->builder(); | |
| 5360 | 5365 |
| 5361 // Build result of subexpression. | 5366 // Build result of subexpression. |
| 5362 if (group_type == CAPTURE) { | 5367 if (group_type == CAPTURE) { |
| 5363 RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index); | 5368 RegExpCapture* capture = GetCapture(capture_index); |
| 5364 captures_->at(capture_index - 1) = capture; | 5369 capture->set_body(body); |
| 5370 capture->set_read_direction(state->read_direction()); |
| 5365 body = capture; | 5371 body = capture; |
| 5366 } else if (group_type != GROUPING) { | 5372 } else if (group_type != GROUPING) { |
| 5367 DCHECK(group_type == POSITIVE_LOOKAHEAD || | 5373 DCHECK(group_type == POSITIVE_LOOKAHEAD || |
| 5368 group_type == NEGATIVE_LOOKAHEAD); | 5374 group_type == NEGATIVE_LOOKAHEAD); |
| 5369 bool is_positive = (group_type == POSITIVE_LOOKAHEAD); | 5375 bool is_positive = (group_type == POSITIVE_LOOKAHEAD); |
| 5370 body = new(zone()) RegExpLookahead(body, | 5376 body = new (zone()) RegExpLookaround( |
| 5371 is_positive, | 5377 body, is_positive, end_capture_index - capture_index, capture_index, |
| 5372 end_capture_index - capture_index, | 5378 state->read_direction()); |
| 5373 capture_index); | |
| 5374 } | 5379 } |
| 5380 |
| 5381 // Restore previous state. |
| 5382 state = state->previous_state(); |
| 5383 builder = state->builder(); |
| 5384 |
| 5375 builder->AddAtom(body); | 5385 builder->AddAtom(body); |
| 5376 // For compatability with JSC and ES3, we allow quantifiers after | 5386 // For compatability with JSC and ES3, we allow quantifiers after |
| 5377 // lookaheads, and break in all cases. | 5387 // lookaheads, and break in all cases. |
| 5378 break; | 5388 break; |
| 5379 } | 5389 } |
| 5380 case '|': { | 5390 case '|': { |
| 5381 Advance(); | 5391 Advance(); |
| 5382 builder->NewAlternative(); | 5392 builder->NewAlternative(); |
| 5383 continue; | 5393 continue; |
| 5384 } | 5394 } |
| 5385 case '*': | 5395 case '*': |
| 5386 case '+': | 5396 case '+': |
| 5387 case '?': | 5397 case '?': |
| 5388 return ReportError(CStrVector("Nothing to repeat")); | 5398 return ReportError(CStrVector("Nothing to repeat")); |
| 5389 case '^': { | 5399 case '^': { |
| 5390 Advance(); | 5400 Advance(); |
| 5391 if (multiline_) { | 5401 if (multiline_) { |
| 5392 builder->AddAssertion( | 5402 builder->AddAssertion(new (zone()) RegExpAssertion( |
| 5393 new(zone()) RegExpAssertion(RegExpAssertion::START_OF_LINE)); | 5403 RegExpAssertion::START_OF_LINE, state->read_direction())); |
| 5394 } else { | 5404 } else { |
| 5395 builder->AddAssertion( | 5405 builder->AddAssertion(new (zone()) RegExpAssertion( |
| 5396 new(zone()) RegExpAssertion(RegExpAssertion::START_OF_INPUT)); | 5406 RegExpAssertion::START_OF_INPUT, state->read_direction())); |
| 5397 set_contains_anchor(); | 5407 set_contains_anchor(); |
| 5398 } | 5408 } |
| 5399 continue; | 5409 continue; |
| 5400 } | 5410 } |
| 5401 case '$': { | 5411 case '$': { |
| 5402 Advance(); | 5412 Advance(); |
| 5403 RegExpAssertion::AssertionType assertion_type = | 5413 RegExpAssertion::AssertionType assertion_type = |
| 5404 multiline_ ? RegExpAssertion::END_OF_LINE : | 5414 multiline_ ? RegExpAssertion::END_OF_LINE : |
| 5405 RegExpAssertion::END_OF_INPUT; | 5415 RegExpAssertion::END_OF_INPUT; |
| 5406 builder->AddAssertion(new(zone()) RegExpAssertion(assertion_type)); | 5416 builder->AddAssertion(new (zone()) RegExpAssertion( |
| 5417 assertion_type, state->read_direction())); |
| 5407 continue; | 5418 continue; |
| 5408 } | 5419 } |
| 5409 case '.': { | 5420 case '.': { |
| 5410 Advance(); | 5421 Advance(); |
| 5411 // everything except \x0a, \x0d, \u2028 and \u2029 | 5422 // everything except \x0a, \x0d, \u2028 and \u2029 |
| 5412 ZoneList<CharacterRange>* ranges = | 5423 ZoneList<CharacterRange>* ranges = |
| 5413 new(zone()) ZoneList<CharacterRange>(2, zone()); | 5424 new(zone()) ZoneList<CharacterRange>(2, zone()); |
| 5414 CharacterRange::AddClassEscape('.', ranges, zone()); | 5425 CharacterRange::AddClassEscape('.', ranges, zone()); |
| 5415 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | 5426 RegExpTree* atom = new (zone()) |
| 5427 RegExpCharacterClass(ranges, false, state->read_direction()); |
| 5416 builder->AddAtom(atom); | 5428 builder->AddAtom(atom); |
| 5417 break; | 5429 break; |
| 5418 } | 5430 } |
| 5419 case '(': { | 5431 case '(': { |
| 5420 SubexpressionType subexpr_type = CAPTURE; | 5432 SubexpressionType subexpr_type = CAPTURE; |
| 5433 RegExpTree::ReadDirection read_direction = state->read_direction(); |
| 5421 Advance(); | 5434 Advance(); |
| 5422 if (current() == '?') { | 5435 if (current() == '?') { |
| 5423 switch (Next()) { | 5436 switch (Next()) { |
| 5424 case ':': | 5437 case ':': |
| 5425 subexpr_type = GROUPING; | 5438 subexpr_type = GROUPING; |
| 5426 break; | 5439 break; |
| 5427 case '=': | 5440 case '=': |
| 5441 read_direction = RegExpTree::READ_FORWARD; |
| 5428 subexpr_type = POSITIVE_LOOKAHEAD; | 5442 subexpr_type = POSITIVE_LOOKAHEAD; |
| 5429 break; | 5443 break; |
| 5430 case '!': | 5444 case '!': |
| 5445 read_direction = RegExpTree::READ_FORWARD; |
| 5431 subexpr_type = NEGATIVE_LOOKAHEAD; | 5446 subexpr_type = NEGATIVE_LOOKAHEAD; |
| 5432 break; | 5447 break; |
| 5448 case '<': |
| 5449 if (FLAG_harmony_regexp_lookbehind) { |
| 5450 Advance(); |
| 5451 read_direction = RegExpTree::READ_BACKWARD; |
| 5452 if (Next() == '=') { |
| 5453 subexpr_type = POSITIVE_LOOKAHEAD; |
| 5454 break; |
| 5455 } else if (Next() == '!') { |
| 5456 subexpr_type = NEGATIVE_LOOKAHEAD; |
| 5457 break; |
| 5458 } |
| 5459 } |
| 5460 // Fall through. |
| 5433 default: | 5461 default: |
| 5434 ReportError(CStrVector("Invalid group") CHECK_FAILED); | 5462 ReportError(CStrVector("Invalid group") CHECK_FAILED); |
| 5435 break; | 5463 break; |
| 5436 } | 5464 } |
| 5437 Advance(2); | 5465 Advance(2); |
| 5438 } else { | 5466 } else { |
| 5439 if (captures_ == NULL) { | 5467 if (captures_started_ >= kMaxCaptures) { |
| 5440 captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone()); | |
| 5441 } | |
| 5442 if (captures_started() >= kMaxCaptures) { | |
| 5443 ReportError(CStrVector("Too many captures") CHECK_FAILED); | 5468 ReportError(CStrVector("Too many captures") CHECK_FAILED); |
| 5444 } | 5469 } |
| 5445 captures_->Add(NULL, zone()); | 5470 captures_started_++; |
| 5446 } | 5471 } |
| 5447 // Store current state and begin new disjunction parsing. | 5472 // Store current state and begin new disjunction parsing. |
| 5448 stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type, | 5473 state = new (zone()) RegExpParserState( |
| 5449 captures_started(), zone()); | 5474 state, subexpr_type, read_direction, captures_started_, zone()); |
| 5450 builder = stored_state->builder(); | 5475 builder = state->builder(); |
| 5451 continue; | 5476 continue; |
| 5452 } | 5477 } |
| 5453 case '[': { | 5478 case '[': { |
| 5454 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); | 5479 RegExpTree* atom = |
| 5480 ParseCharacterClass(state->read_direction() CHECK_FAILED); |
| 5455 builder->AddAtom(atom); | 5481 builder->AddAtom(atom); |
| 5456 break; | 5482 break; |
| 5457 } | 5483 } |
| 5458 // Atom :: | 5484 // Atom :: |
| 5459 // \ AtomEscape | 5485 // \ AtomEscape |
| 5460 case '\\': | 5486 case '\\': |
| 5461 switch (Next()) { | 5487 switch (Next()) { |
| 5462 case kEndMarker: | 5488 case kEndMarker: |
| 5463 return ReportError(CStrVector("\\ at end of pattern")); | 5489 return ReportError(CStrVector("\\ at end of pattern")); |
| 5464 case 'b': | 5490 case 'b': |
| 5465 Advance(2); | 5491 Advance(2); |
| 5466 builder->AddAssertion( | 5492 builder->AddAssertion(new (zone()) RegExpAssertion( |
| 5467 new(zone()) RegExpAssertion(RegExpAssertion::BOUNDARY)); | 5493 RegExpAssertion::BOUNDARY, state->read_direction())); |
| 5468 continue; | 5494 continue; |
| 5469 case 'B': | 5495 case 'B': |
| 5470 Advance(2); | 5496 Advance(2); |
| 5471 builder->AddAssertion( | 5497 builder->AddAssertion(new (zone()) RegExpAssertion( |
| 5472 new(zone()) RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); | 5498 RegExpAssertion::NON_BOUNDARY, state->read_direction())); |
| 5473 continue; | 5499 continue; |
| 5474 // AtomEscape :: | 5500 // AtomEscape :: |
| 5475 // CharacterClassEscape | 5501 // CharacterClassEscape |
| 5476 // | 5502 // |
| 5477 // CharacterClassEscape :: one of | 5503 // CharacterClassEscape :: one of |
| 5478 // d D s S w W | 5504 // d D s S w W |
| 5479 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { | 5505 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { |
| 5480 uc32 c = Next(); | 5506 uc32 c = Next(); |
| 5481 Advance(2); | 5507 Advance(2); |
| 5482 ZoneList<CharacterRange>* ranges = | 5508 ZoneList<CharacterRange>* ranges = |
| 5483 new(zone()) ZoneList<CharacterRange>(2, zone()); | 5509 new(zone()) ZoneList<CharacterRange>(2, zone()); |
| 5484 CharacterRange::AddClassEscape(c, ranges, zone()); | 5510 CharacterRange::AddClassEscape(c, ranges, zone()); |
| 5485 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | 5511 RegExpTree* atom = new (zone()) |
| 5512 RegExpCharacterClass(ranges, false, state->read_direction()); |
| 5486 builder->AddAtom(atom); | 5513 builder->AddAtom(atom); |
| 5487 break; | 5514 break; |
| 5488 } | 5515 } |
| 5489 case '1': case '2': case '3': case '4': case '5': case '6': | 5516 case '1': case '2': case '3': case '4': case '5': case '6': |
| 5490 case '7': case '8': case '9': { | 5517 case '7': case '8': case '9': { |
| 5491 int index = 0; | 5518 int index = 0; |
| 5492 if (ParseBackReferenceIndex(&index)) { | 5519 if (ParseBackReferenceIndex(&index)) { |
| 5493 RegExpCapture* capture = NULL; | 5520 RegExpCapture* capture = GetCapture(index); |
| 5494 if (captures_ != NULL && index <= captures_->length()) { | 5521 RegExpTree* atom = new (zone()) |
| 5495 capture = captures_->at(index - 1); | 5522 RegExpBackReference(capture, state->read_direction()); |
| 5496 } | |
| 5497 if (capture == NULL) { | |
| 5498 builder->AddEmpty(); | |
| 5499 break; | |
| 5500 } | |
| 5501 RegExpTree* atom = new(zone()) RegExpBackReference(capture); | |
| 5502 builder->AddAtom(atom); | 5523 builder->AddAtom(atom); |
| 5503 break; | 5524 break; |
| 5504 } | 5525 } |
| 5505 uc32 first_digit = Next(); | 5526 uc32 first_digit = Next(); |
| 5506 if (first_digit == '8' || first_digit == '9') { | 5527 if (first_digit == '8' || first_digit == '9') { |
| 5507 // If the 'u' flag is present, only syntax characters can be escaped, | 5528 // If the 'u' flag is present, only syntax characters can be escaped, |
| 5508 // no other identity escapes are allowed. If the 'u' flag is not | 5529 // no other identity escapes are allowed. If the 'u' flag is not |
| 5509 // present, all identity escapes are allowed. | 5530 // present, all identity escapes are allowed. |
| 5510 if (!FLAG_harmony_unicode_regexps || !unicode_) { | 5531 if (!FLAG_harmony_unicode_regexps || !unicode_) { |
| 5511 builder->AddCharacter(first_digit); | 5532 builder->AddCharacter(first_digit); |
| (...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5685 | 5706 |
| 5686 | 5707 |
| 5687 // In order to know whether an escape is a backreference or not we have to scan | 5708 // In order to know whether an escape is a backreference or not we have to scan |
| 5688 // the entire regexp and find the number of capturing parentheses. However we | 5709 // the entire regexp and find the number of capturing parentheses. However we |
| 5689 // don't want to scan the regexp twice unless it is necessary. This mini-parser | 5710 // don't want to scan the regexp twice unless it is necessary. This mini-parser |
| 5690 // is called when needed. It can see the difference between capturing and | 5711 // is called when needed. It can see the difference between capturing and |
| 5691 // noncapturing parentheses and can skip character classes and backslash-escaped | 5712 // noncapturing parentheses and can skip character classes and backslash-escaped |
| 5692 // characters. | 5713 // characters. |
| 5693 void RegExpParser::ScanForCaptures() { | 5714 void RegExpParser::ScanForCaptures() { |
| 5694 // Start with captures started previous to current position | 5715 // Start with captures started previous to current position |
| 5695 int capture_count = captures_started(); | 5716 int capture_count = captures_started_; |
| 5696 // Add count of captures after this position. | 5717 // Add count of captures after this position. |
| 5697 int n; | 5718 int n; |
| 5698 while ((n = current()) != kEndMarker) { | 5719 while ((n = current()) != kEndMarker) { |
| 5699 Advance(); | 5720 Advance(); |
| 5700 switch (n) { | 5721 switch (n) { |
| 5701 case '\\': | 5722 case '\\': |
| 5702 Advance(); | 5723 Advance(); |
| 5703 break; | 5724 break; |
| 5704 case '[': { | 5725 case '[': { |
| 5705 int c; | 5726 int c; |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5737 value = 10 * value + (c - '0'); | 5758 value = 10 * value + (c - '0'); |
| 5738 if (value > kMaxCaptures) { | 5759 if (value > kMaxCaptures) { |
| 5739 Reset(start); | 5760 Reset(start); |
| 5740 return false; | 5761 return false; |
| 5741 } | 5762 } |
| 5742 Advance(); | 5763 Advance(); |
| 5743 } else { | 5764 } else { |
| 5744 break; | 5765 break; |
| 5745 } | 5766 } |
| 5746 } | 5767 } |
| 5747 if (value > captures_started()) { | 5768 if (value > captures_started_) { |
| 5748 if (!is_scanned_for_captures_) { | 5769 if (!is_scanned_for_captures_) { |
| 5749 int saved_position = position(); | 5770 int saved_position = position(); |
| 5750 ScanForCaptures(); | 5771 ScanForCaptures(); |
| 5751 Reset(saved_position); | 5772 Reset(saved_position); |
| 5752 } | 5773 } |
| 5753 if (value > capture_count_) { | 5774 if (value > capture_count_) { |
| 5754 Reset(start); | 5775 Reset(start); |
| 5755 return false; | 5776 return false; |
| 5756 } | 5777 } |
| 5757 } | 5778 } |
| 5758 *index_out = value; | 5779 *index_out = value; |
| 5759 return true; | 5780 return true; |
| 5760 } | 5781 } |
| 5761 | 5782 |
| 5762 | 5783 |
| 5784 RegExpCapture* RegExpParser::GetCapture(int index) { |
| 5785 // The index for the capture groups are one-based. Its index in the list is |
| 5786 // zero-based. |
| 5787 int know_captures = |
| 5788 is_scanned_for_captures_ ? capture_count_ : captures_started_; |
| 5789 DCHECK(index <= know_captures); |
| 5790 if (captures_ == NULL) { |
| 5791 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); |
| 5792 } |
| 5793 while (captures_->length() < know_captures) { |
| 5794 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); |
| 5795 } |
| 5796 return captures_->at(index - 1); |
| 5797 } |
| 5798 |
| 5799 |
| 5763 // QuantifierPrefix :: | 5800 // QuantifierPrefix :: |
| 5764 // { DecimalDigits } | 5801 // { DecimalDigits } |
| 5765 // { DecimalDigits , } | 5802 // { DecimalDigits , } |
| 5766 // { DecimalDigits , DecimalDigits } | 5803 // { DecimalDigits , DecimalDigits } |
| 5767 // | 5804 // |
| 5768 // Returns true if parsing succeeds, and set the min_out and max_out | 5805 // Returns true if parsing succeeds, and set the min_out and max_out |
| 5769 // values. Values are truncated to RegExpTree::kInfinity if they overflow. | 5806 // values. Values are truncated to RegExpTree::kInfinity if they overflow. |
| 5770 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { | 5807 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { |
| 5771 DCHECK_EQ(current(), '{'); | 5808 DCHECK_EQ(current(), '{'); |
| 5772 int start = position(); | 5809 int start = position(); |
| (...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6032 CharacterRange range, | 6069 CharacterRange range, |
| 6033 Zone* zone) { | 6070 Zone* zone) { |
| 6034 if (char_class != kNoCharClass) { | 6071 if (char_class != kNoCharClass) { |
| 6035 CharacterRange::AddClassEscape(char_class, ranges, zone); | 6072 CharacterRange::AddClassEscape(char_class, ranges, zone); |
| 6036 } else { | 6073 } else { |
| 6037 ranges->Add(range, zone); | 6074 ranges->Add(range, zone); |
| 6038 } | 6075 } |
| 6039 } | 6076 } |
| 6040 | 6077 |
| 6041 | 6078 |
| 6042 RegExpTree* RegExpParser::ParseCharacterClass() { | 6079 RegExpTree* RegExpParser::ParseCharacterClass( |
| 6080 RegExpTree::ReadDirection read_direction) { |
| 6043 static const char* kUnterminated = "Unterminated character class"; | 6081 static const char* kUnterminated = "Unterminated character class"; |
| 6044 static const char* kRangeOutOfOrder = "Range out of order in character class"; | 6082 static const char* kRangeOutOfOrder = "Range out of order in character class"; |
| 6045 | 6083 |
| 6046 DCHECK_EQ(current(), '['); | 6084 DCHECK_EQ(current(), '['); |
| 6047 Advance(); | 6085 Advance(); |
| 6048 bool is_negated = false; | 6086 bool is_negated = false; |
| 6049 if (current() == '^') { | 6087 if (current() == '^') { |
| 6050 is_negated = true; | 6088 is_negated = true; |
| 6051 Advance(); | 6089 Advance(); |
| 6052 } | 6090 } |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6084 } | 6122 } |
| 6085 } | 6123 } |
| 6086 if (!has_more()) { | 6124 if (!has_more()) { |
| 6087 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); | 6125 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); |
| 6088 } | 6126 } |
| 6089 Advance(); | 6127 Advance(); |
| 6090 if (ranges->length() == 0) { | 6128 if (ranges->length() == 0) { |
| 6091 ranges->Add(CharacterRange::Everything(), zone()); | 6129 ranges->Add(CharacterRange::Everything(), zone()); |
| 6092 is_negated = !is_negated; | 6130 is_negated = !is_negated; |
| 6093 } | 6131 } |
| 6094 return new(zone()) RegExpCharacterClass(ranges, is_negated); | 6132 return new (zone()) RegExpCharacterClass(ranges, is_negated, read_direction); |
| 6095 } | 6133 } |
| 6096 | 6134 |
| 6097 | 6135 |
| 6098 // ---------------------------------------------------------------------------- | 6136 // ---------------------------------------------------------------------------- |
| 6099 // The Parser interface. | 6137 // The Parser interface. |
| 6100 | 6138 |
| 6101 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, | 6139 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, |
| 6102 FlatStringReader* input, bool multiline, | 6140 FlatStringReader* input, bool multiline, |
| 6103 bool unicode, RegExpCompileData* result) { | 6141 bool unicode, RegExpCompileData* result) { |
| 6104 DCHECK(result != NULL); | 6142 DCHECK(result != NULL); |
| (...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6453 } | 6491 } |
| 6454 | 6492 |
| 6455 | 6493 |
| 6456 void Parser::RaiseLanguageMode(LanguageMode mode) { | 6494 void Parser::RaiseLanguageMode(LanguageMode mode) { |
| 6457 SetLanguageMode(scope_, | 6495 SetLanguageMode(scope_, |
| 6458 static_cast<LanguageMode>(scope_->language_mode() | mode)); | 6496 static_cast<LanguageMode>(scope_->language_mode() | mode)); |
| 6459 } | 6497 } |
| 6460 | 6498 |
| 6461 } // namespace internal | 6499 } // namespace internal |
| 6462 } // namespace v8 | 6500 } // namespace v8 |
| OLD | NEW |