| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/parsing/parser.h" | 5 #include "src/parsing/parser.h" |
| 6 | 6 |
| 7 #include "src/api.h" | 7 #include "src/api.h" |
| 8 #include "src/ast/ast.h" | 8 #include "src/ast/ast.h" |
| 9 #include "src/ast/ast-expression-visitor.h" | 9 #include "src/ast/ast-expression-visitor.h" |
| 10 #include "src/ast/ast-literal-reindexer.h" | 10 #include "src/ast/ast-literal-reindexer.h" |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 88 set_stack_limit(isolate_->stack_guard()->real_climit()); | 88 set_stack_limit(isolate_->stack_guard()->real_climit()); |
| 89 set_unicode_cache(isolate_->unicode_cache()); | 89 set_unicode_cache(isolate_->unicode_cache()); |
| 90 set_script(script); | 90 set_script(script); |
| 91 | 91 |
| 92 if (script->type() == Script::TYPE_NATIVE) { | 92 if (script->type() == Script::TYPE_NATIVE) { |
| 93 set_native(); | 93 set_native(); |
| 94 } | 94 } |
| 95 } | 95 } |
| 96 | 96 |
| 97 | 97 |
| 98 RegExpBuilder::RegExpBuilder(Zone* zone) | |
| 99 : zone_(zone), | |
| 100 pending_empty_(false), | |
| 101 characters_(NULL), | |
| 102 terms_(), | |
| 103 alternatives_() | |
| 104 #ifdef DEBUG | |
| 105 , last_added_(ADD_NONE) | |
| 106 #endif | |
| 107 {} | |
| 108 | |
| 109 | |
| 110 void RegExpBuilder::FlushCharacters() { | |
| 111 pending_empty_ = false; | |
| 112 if (characters_ != NULL) { | |
| 113 RegExpTree* atom = new(zone()) RegExpAtom(characters_->ToConstVector()); | |
| 114 characters_ = NULL; | |
| 115 text_.Add(atom, zone()); | |
| 116 LAST(ADD_ATOM); | |
| 117 } | |
| 118 } | |
| 119 | |
| 120 | |
| 121 void RegExpBuilder::FlushText() { | |
| 122 FlushCharacters(); | |
| 123 int num_text = text_.length(); | |
| 124 if (num_text == 0) { | |
| 125 return; | |
| 126 } else if (num_text == 1) { | |
| 127 terms_.Add(text_.last(), zone()); | |
| 128 } else { | |
| 129 RegExpText* text = new(zone()) RegExpText(zone()); | |
| 130 for (int i = 0; i < num_text; i++) | |
| 131 text_.Get(i)->AppendToText(text, zone()); | |
| 132 terms_.Add(text, zone()); | |
| 133 } | |
| 134 text_.Clear(); | |
| 135 } | |
| 136 | |
| 137 | |
| 138 void RegExpBuilder::AddCharacter(uc16 c) { | |
| 139 pending_empty_ = false; | |
| 140 if (characters_ == NULL) { | |
| 141 characters_ = new(zone()) ZoneList<uc16>(4, zone()); | |
| 142 } | |
| 143 characters_->Add(c, zone()); | |
| 144 LAST(ADD_CHAR); | |
| 145 } | |
| 146 | |
| 147 | |
| 148 void RegExpBuilder::AddEmpty() { | |
| 149 pending_empty_ = true; | |
| 150 } | |
| 151 | |
| 152 | |
| 153 void RegExpBuilder::AddAtom(RegExpTree* term) { | |
| 154 if (term->IsEmpty()) { | |
| 155 AddEmpty(); | |
| 156 return; | |
| 157 } | |
| 158 if (term->IsTextElement()) { | |
| 159 FlushCharacters(); | |
| 160 text_.Add(term, zone()); | |
| 161 } else { | |
| 162 FlushText(); | |
| 163 terms_.Add(term, zone()); | |
| 164 } | |
| 165 LAST(ADD_ATOM); | |
| 166 } | |
| 167 | |
| 168 | |
| 169 void RegExpBuilder::AddAssertion(RegExpTree* assert) { | |
| 170 FlushText(); | |
| 171 terms_.Add(assert, zone()); | |
| 172 LAST(ADD_ASSERT); | |
| 173 } | |
| 174 | |
| 175 | |
| 176 void RegExpBuilder::NewAlternative() { | |
| 177 FlushTerms(); | |
| 178 } | |
| 179 | |
| 180 | |
| 181 void RegExpBuilder::FlushTerms() { | |
| 182 FlushText(); | |
| 183 int num_terms = terms_.length(); | |
| 184 RegExpTree* alternative; | |
| 185 if (num_terms == 0) { | |
| 186 alternative = new (zone()) RegExpEmpty(); | |
| 187 } else if (num_terms == 1) { | |
| 188 alternative = terms_.last(); | |
| 189 } else { | |
| 190 alternative = new(zone()) RegExpAlternative(terms_.GetList(zone())); | |
| 191 } | |
| 192 alternatives_.Add(alternative, zone()); | |
| 193 terms_.Clear(); | |
| 194 LAST(ADD_NONE); | |
| 195 } | |
| 196 | |
| 197 | |
| 198 RegExpTree* RegExpBuilder::ToRegExp() { | |
| 199 FlushTerms(); | |
| 200 int num_alternatives = alternatives_.length(); | |
| 201 if (num_alternatives == 0) return new (zone()) RegExpEmpty(); | |
| 202 if (num_alternatives == 1) return alternatives_.last(); | |
| 203 return new(zone()) RegExpDisjunction(alternatives_.GetList(zone())); | |
| 204 } | |
| 205 | |
| 206 | |
| 207 void RegExpBuilder::AddQuantifierToAtom( | |
| 208 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) { | |
| 209 if (pending_empty_) { | |
| 210 pending_empty_ = false; | |
| 211 return; | |
| 212 } | |
| 213 RegExpTree* atom; | |
| 214 if (characters_ != NULL) { | |
| 215 DCHECK(last_added_ == ADD_CHAR); | |
| 216 // Last atom was character. | |
| 217 Vector<const uc16> char_vector = characters_->ToConstVector(); | |
| 218 int num_chars = char_vector.length(); | |
| 219 if (num_chars > 1) { | |
| 220 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); | |
| 221 text_.Add(new(zone()) RegExpAtom(prefix), zone()); | |
| 222 char_vector = char_vector.SubVector(num_chars - 1, num_chars); | |
| 223 } | |
| 224 characters_ = NULL; | |
| 225 atom = new(zone()) RegExpAtom(char_vector); | |
| 226 FlushText(); | |
| 227 } else if (text_.length() > 0) { | |
| 228 DCHECK(last_added_ == ADD_ATOM); | |
| 229 atom = text_.RemoveLast(); | |
| 230 FlushText(); | |
| 231 } else if (terms_.length() > 0) { | |
| 232 DCHECK(last_added_ == ADD_ATOM); | |
| 233 atom = terms_.RemoveLast(); | |
| 234 if (atom->max_match() == 0) { | |
| 235 // Guaranteed to only match an empty string. | |
| 236 LAST(ADD_TERM); | |
| 237 if (min == 0) { | |
| 238 return; | |
| 239 } | |
| 240 terms_.Add(atom, zone()); | |
| 241 return; | |
| 242 } | |
| 243 } else { | |
| 244 // Only call immediately after adding an atom or character! | |
| 245 UNREACHABLE(); | |
| 246 return; | |
| 247 } | |
| 248 terms_.Add( | |
| 249 new(zone()) RegExpQuantifier(min, max, quantifier_type, atom), zone()); | |
| 250 LAST(ADD_TERM); | |
| 251 } | |
| 252 | |
| 253 | |
| 254 FunctionEntry ParseData::GetFunctionEntry(int start) { | 98 FunctionEntry ParseData::GetFunctionEntry(int start) { |
| 255 // The current pre-data entry must be a FunctionEntry with the given | 99 // The current pre-data entry must be a FunctionEntry with the given |
| 256 // start position. | 100 // start position. |
| 257 if ((function_index_ + FunctionEntry::kSize <= Length()) && | 101 if ((function_index_ + FunctionEntry::kSize <= Length()) && |
| 258 (static_cast<int>(Data()[function_index_]) == start)) { | 102 (static_cast<int>(Data()[function_index_]) == start)) { |
| 259 int index = function_index_; | 103 int index = function_index_; |
| 260 function_index_ += FunctionEntry::kSize; | 104 function_index_ += FunctionEntry::kSize; |
| 261 Vector<unsigned> subvector(&(Data()[index]), FunctionEntry::kSize); | 105 Vector<unsigned> subvector(&(Data()[index]), FunctionEntry::kSize); |
| 262 return FunctionEntry(subvector); | 106 return FunctionEntry(subvector); |
| 263 } | 107 } |
| (...skipping 4927 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5191 for (int i = 0; i < use_counts_[feature]; ++i) { | 5035 for (int i = 0; i < use_counts_[feature]; ++i) { |
| 5192 isolate->CountUsage(v8::Isolate::UseCounterFeature(feature)); | 5036 isolate->CountUsage(v8::Isolate::UseCounterFeature(feature)); |
| 5193 } | 5037 } |
| 5194 } | 5038 } |
| 5195 isolate->counters()->total_preparse_skipped()->Increment( | 5039 isolate->counters()->total_preparse_skipped()->Increment( |
| 5196 total_preparse_skipped_); | 5040 total_preparse_skipped_); |
| 5197 } | 5041 } |
| 5198 | 5042 |
| 5199 | 5043 |
| 5200 // ---------------------------------------------------------------------------- | 5044 // ---------------------------------------------------------------------------- |
| 5201 // Regular expressions | |
| 5202 | |
| 5203 | |
| 5204 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, | |
| 5205 bool multiline, bool unicode, Isolate* isolate, | |
| 5206 Zone* zone) | |
| 5207 : isolate_(isolate), | |
| 5208 zone_(zone), | |
| 5209 error_(error), | |
| 5210 captures_(NULL), | |
| 5211 in_(in), | |
| 5212 current_(kEndMarker), | |
| 5213 next_pos_(0), | |
| 5214 captures_started_(0), | |
| 5215 capture_count_(0), | |
| 5216 has_more_(true), | |
| 5217 multiline_(multiline), | |
| 5218 unicode_(unicode), | |
| 5219 simple_(false), | |
| 5220 contains_anchor_(false), | |
| 5221 is_scanned_for_captures_(false), | |
| 5222 failed_(false) { | |
| 5223 Advance(); | |
| 5224 } | |
| 5225 | |
| 5226 | |
| 5227 uc32 RegExpParser::Next() { | |
| 5228 if (has_next()) { | |
| 5229 return in()->Get(next_pos_); | |
| 5230 } else { | |
| 5231 return kEndMarker; | |
| 5232 } | |
| 5233 } | |
| 5234 | |
| 5235 | |
| 5236 void RegExpParser::Advance() { | |
| 5237 if (next_pos_ < in()->length()) { | |
| 5238 StackLimitCheck check(isolate()); | |
| 5239 if (check.HasOverflowed()) { | |
| 5240 ReportError(CStrVector(Isolate::kStackOverflowMessage)); | |
| 5241 } else if (zone()->excess_allocation()) { | |
| 5242 ReportError(CStrVector("Regular expression too large")); | |
| 5243 } else { | |
| 5244 current_ = in()->Get(next_pos_); | |
| 5245 next_pos_++; | |
| 5246 } | |
| 5247 } else { | |
| 5248 current_ = kEndMarker; | |
| 5249 // Advance so that position() points to 1-after-the-last-character. This is | |
| 5250 // important so that Reset() to this position works correctly. | |
| 5251 next_pos_ = in()->length() + 1; | |
| 5252 has_more_ = false; | |
| 5253 } | |
| 5254 } | |
| 5255 | |
| 5256 | |
| 5257 void RegExpParser::Reset(int pos) { | |
| 5258 next_pos_ = pos; | |
| 5259 has_more_ = (pos < in()->length()); | |
| 5260 Advance(); | |
| 5261 } | |
| 5262 | |
| 5263 | |
| 5264 void RegExpParser::Advance(int dist) { | |
| 5265 next_pos_ += dist - 1; | |
| 5266 Advance(); | |
| 5267 } | |
| 5268 | |
| 5269 | |
| 5270 bool RegExpParser::simple() { | |
| 5271 return simple_; | |
| 5272 } | |
| 5273 | |
| 5274 | |
| 5275 bool RegExpParser::IsSyntaxCharacter(uc32 c) { | |
| 5276 return c == '^' || c == '$' || c == '\\' || c == '.' || c == '*' || | |
| 5277 c == '+' || c == '?' || c == '(' || c == ')' || c == '[' || c == ']' || | |
| 5278 c == '{' || c == '}' || c == '|'; | |
| 5279 } | |
| 5280 | |
| 5281 | |
| 5282 RegExpTree* RegExpParser::ReportError(Vector<const char> message) { | |
| 5283 failed_ = true; | |
| 5284 *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked(); | |
| 5285 // Zip to the end to make sure the no more input is read. | |
| 5286 current_ = kEndMarker; | |
| 5287 next_pos_ = in()->length(); | |
| 5288 return NULL; | |
| 5289 } | |
| 5290 | |
| 5291 | |
| 5292 // Pattern :: | |
| 5293 // Disjunction | |
| 5294 RegExpTree* RegExpParser::ParsePattern() { | |
| 5295 RegExpTree* result = ParseDisjunction(CHECK_FAILED); | |
| 5296 DCHECK(!has_more()); | |
| 5297 // If the result of parsing is a literal string atom, and it has the | |
| 5298 // same length as the input, then the atom is identical to the input. | |
| 5299 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { | |
| 5300 simple_ = true; | |
| 5301 } | |
| 5302 return result; | |
| 5303 } | |
| 5304 | |
| 5305 | |
| 5306 // Disjunction :: | |
| 5307 // Alternative | |
| 5308 // Alternative | Disjunction | |
| 5309 // Alternative :: | |
| 5310 // [empty] | |
| 5311 // Term Alternative | |
| 5312 // Term :: | |
| 5313 // Assertion | |
| 5314 // Atom | |
| 5315 // Atom Quantifier | |
| 5316 RegExpTree* RegExpParser::ParseDisjunction() { | |
| 5317 // Used to store current state while parsing subexpressions. | |
| 5318 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, | |
| 5319 zone()); | |
| 5320 RegExpParserState* state = &initial_state; | |
| 5321 // Cache the builder in a local variable for quick access. | |
| 5322 RegExpBuilder* builder = initial_state.builder(); | |
| 5323 while (true) { | |
| 5324 switch (current()) { | |
| 5325 case kEndMarker: | |
| 5326 if (state->IsSubexpression()) { | |
| 5327 // Inside a parenthesized group when hitting end of input. | |
| 5328 ReportError(CStrVector("Unterminated group") CHECK_FAILED); | |
| 5329 } | |
| 5330 DCHECK_EQ(INITIAL, state->group_type()); | |
| 5331 // Parsing completed successfully. | |
| 5332 return builder->ToRegExp(); | |
| 5333 case ')': { | |
| 5334 if (!state->IsSubexpression()) { | |
| 5335 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); | |
| 5336 } | |
| 5337 DCHECK_NE(INITIAL, state->group_type()); | |
| 5338 | |
| 5339 Advance(); | |
| 5340 // End disjunction parsing and convert builder content to new single | |
| 5341 // regexp atom. | |
| 5342 RegExpTree* body = builder->ToRegExp(); | |
| 5343 | |
| 5344 int end_capture_index = captures_started(); | |
| 5345 | |
| 5346 int capture_index = state->capture_index(); | |
| 5347 SubexpressionType group_type = state->group_type(); | |
| 5348 | |
| 5349 // Build result of subexpression. | |
| 5350 if (group_type == CAPTURE) { | |
| 5351 RegExpCapture* capture = GetCapture(capture_index); | |
| 5352 capture->set_body(body); | |
| 5353 body = capture; | |
| 5354 } else if (group_type != GROUPING) { | |
| 5355 DCHECK(group_type == POSITIVE_LOOKAROUND || | |
| 5356 group_type == NEGATIVE_LOOKAROUND); | |
| 5357 bool is_positive = (group_type == POSITIVE_LOOKAROUND); | |
| 5358 body = new (zone()) RegExpLookaround( | |
| 5359 body, is_positive, end_capture_index - capture_index, capture_index, | |
| 5360 state->lookaround_type()); | |
| 5361 } | |
| 5362 | |
| 5363 // Restore previous state. | |
| 5364 state = state->previous_state(); | |
| 5365 builder = state->builder(); | |
| 5366 | |
| 5367 builder->AddAtom(body); | |
| 5368 // For compatability with JSC and ES3, we allow quantifiers after | |
| 5369 // lookaheads, and break in all cases. | |
| 5370 break; | |
| 5371 } | |
| 5372 case '|': { | |
| 5373 Advance(); | |
| 5374 builder->NewAlternative(); | |
| 5375 continue; | |
| 5376 } | |
| 5377 case '*': | |
| 5378 case '+': | |
| 5379 case '?': | |
| 5380 return ReportError(CStrVector("Nothing to repeat")); | |
| 5381 case '^': { | |
| 5382 Advance(); | |
| 5383 if (multiline_) { | |
| 5384 builder->AddAssertion( | |
| 5385 new(zone()) RegExpAssertion(RegExpAssertion::START_OF_LINE)); | |
| 5386 } else { | |
| 5387 builder->AddAssertion( | |
| 5388 new(zone()) RegExpAssertion(RegExpAssertion::START_OF_INPUT)); | |
| 5389 set_contains_anchor(); | |
| 5390 } | |
| 5391 continue; | |
| 5392 } | |
| 5393 case '$': { | |
| 5394 Advance(); | |
| 5395 RegExpAssertion::AssertionType assertion_type = | |
| 5396 multiline_ ? RegExpAssertion::END_OF_LINE : | |
| 5397 RegExpAssertion::END_OF_INPUT; | |
| 5398 builder->AddAssertion(new(zone()) RegExpAssertion(assertion_type)); | |
| 5399 continue; | |
| 5400 } | |
| 5401 case '.': { | |
| 5402 Advance(); | |
| 5403 // everything except \x0a, \x0d, \u2028 and \u2029 | |
| 5404 ZoneList<CharacterRange>* ranges = | |
| 5405 new(zone()) ZoneList<CharacterRange>(2, zone()); | |
| 5406 CharacterRange::AddClassEscape('.', ranges, zone()); | |
| 5407 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | |
| 5408 builder->AddAtom(atom); | |
| 5409 break; | |
| 5410 } | |
| 5411 case '(': { | |
| 5412 SubexpressionType subexpr_type = CAPTURE; | |
| 5413 RegExpLookaround::Type lookaround_type = state->lookaround_type(); | |
| 5414 Advance(); | |
| 5415 if (current() == '?') { | |
| 5416 switch (Next()) { | |
| 5417 case ':': | |
| 5418 subexpr_type = GROUPING; | |
| 5419 break; | |
| 5420 case '=': | |
| 5421 lookaround_type = RegExpLookaround::LOOKAHEAD; | |
| 5422 subexpr_type = POSITIVE_LOOKAROUND; | |
| 5423 break; | |
| 5424 case '!': | |
| 5425 lookaround_type = RegExpLookaround::LOOKAHEAD; | |
| 5426 subexpr_type = NEGATIVE_LOOKAROUND; | |
| 5427 break; | |
| 5428 case '<': | |
| 5429 if (FLAG_harmony_regexp_lookbehind) { | |
| 5430 Advance(); | |
| 5431 lookaround_type = RegExpLookaround::LOOKBEHIND; | |
| 5432 if (Next() == '=') { | |
| 5433 subexpr_type = POSITIVE_LOOKAROUND; | |
| 5434 break; | |
| 5435 } else if (Next() == '!') { | |
| 5436 subexpr_type = NEGATIVE_LOOKAROUND; | |
| 5437 break; | |
| 5438 } | |
| 5439 } | |
| 5440 // Fall through. | |
| 5441 default: | |
| 5442 ReportError(CStrVector("Invalid group") CHECK_FAILED); | |
| 5443 break; | |
| 5444 } | |
| 5445 Advance(2); | |
| 5446 } else { | |
| 5447 if (captures_started_ >= kMaxCaptures) { | |
| 5448 ReportError(CStrVector("Too many captures") CHECK_FAILED); | |
| 5449 } | |
| 5450 captures_started_++; | |
| 5451 } | |
| 5452 // Store current state and begin new disjunction parsing. | |
| 5453 state = new (zone()) RegExpParserState( | |
| 5454 state, subexpr_type, lookaround_type, captures_started_, zone()); | |
| 5455 builder = state->builder(); | |
| 5456 continue; | |
| 5457 } | |
| 5458 case '[': { | |
| 5459 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); | |
| 5460 builder->AddAtom(atom); | |
| 5461 break; | |
| 5462 } | |
| 5463 // Atom :: | |
| 5464 // \ AtomEscape | |
| 5465 case '\\': | |
| 5466 switch (Next()) { | |
| 5467 case kEndMarker: | |
| 5468 return ReportError(CStrVector("\\ at end of pattern")); | |
| 5469 case 'b': | |
| 5470 Advance(2); | |
| 5471 builder->AddAssertion( | |
| 5472 new(zone()) RegExpAssertion(RegExpAssertion::BOUNDARY)); | |
| 5473 continue; | |
| 5474 case 'B': | |
| 5475 Advance(2); | |
| 5476 builder->AddAssertion( | |
| 5477 new(zone()) RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); | |
| 5478 continue; | |
| 5479 // AtomEscape :: | |
| 5480 // CharacterClassEscape | |
| 5481 // | |
| 5482 // CharacterClassEscape :: one of | |
| 5483 // d D s S w W | |
| 5484 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { | |
| 5485 uc32 c = Next(); | |
| 5486 Advance(2); | |
| 5487 ZoneList<CharacterRange>* ranges = | |
| 5488 new(zone()) ZoneList<CharacterRange>(2, zone()); | |
| 5489 CharacterRange::AddClassEscape(c, ranges, zone()); | |
| 5490 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | |
| 5491 builder->AddAtom(atom); | |
| 5492 break; | |
| 5493 } | |
| 5494 case '1': case '2': case '3': case '4': case '5': case '6': | |
| 5495 case '7': case '8': case '9': { | |
| 5496 int index = 0; | |
| 5497 if (ParseBackReferenceIndex(&index)) { | |
| 5498 if (state->IsInsideCaptureGroup(index)) { | |
| 5499 // The back reference is inside the capture group it refers to. | |
| 5500 // Nothing can possibly have been captured yet, so we use empty | |
| 5501 // instead. This ensures that, when checking a back reference, | |
| 5502 // the capture registers of the referenced capture are either | |
| 5503 // both set or both cleared. | |
| 5504 builder->AddEmpty(); | |
| 5505 } else { | |
| 5506 RegExpCapture* capture = GetCapture(index); | |
| 5507 RegExpTree* atom = new (zone()) RegExpBackReference(capture); | |
| 5508 builder->AddAtom(atom); | |
| 5509 } | |
| 5510 break; | |
| 5511 } | |
| 5512 uc32 first_digit = Next(); | |
| 5513 if (first_digit == '8' || first_digit == '9') { | |
| 5514 // If the 'u' flag is present, only syntax characters can be escaped, | |
| 5515 // no other identity escapes are allowed. If the 'u' flag is not | |
| 5516 // present, all identity escapes are allowed. | |
| 5517 if (!FLAG_harmony_unicode_regexps || !unicode_) { | |
| 5518 builder->AddCharacter(first_digit); | |
| 5519 Advance(2); | |
| 5520 } else { | |
| 5521 return ReportError(CStrVector("Invalid escape")); | |
| 5522 } | |
| 5523 break; | |
| 5524 } | |
| 5525 } | |
| 5526 // FALLTHROUGH | |
| 5527 case '0': { | |
| 5528 Advance(); | |
| 5529 uc32 octal = ParseOctalLiteral(); | |
| 5530 builder->AddCharacter(octal); | |
| 5531 break; | |
| 5532 } | |
| 5533 // ControlEscape :: one of | |
| 5534 // f n r t v | |
| 5535 case 'f': | |
| 5536 Advance(2); | |
| 5537 builder->AddCharacter('\f'); | |
| 5538 break; | |
| 5539 case 'n': | |
| 5540 Advance(2); | |
| 5541 builder->AddCharacter('\n'); | |
| 5542 break; | |
| 5543 case 'r': | |
| 5544 Advance(2); | |
| 5545 builder->AddCharacter('\r'); | |
| 5546 break; | |
| 5547 case 't': | |
| 5548 Advance(2); | |
| 5549 builder->AddCharacter('\t'); | |
| 5550 break; | |
| 5551 case 'v': | |
| 5552 Advance(2); | |
| 5553 builder->AddCharacter('\v'); | |
| 5554 break; | |
| 5555 case 'c': { | |
| 5556 Advance(); | |
| 5557 uc32 controlLetter = Next(); | |
| 5558 // Special case if it is an ASCII letter. | |
| 5559 // Convert lower case letters to uppercase. | |
| 5560 uc32 letter = controlLetter & ~('a' ^ 'A'); | |
| 5561 if (letter < 'A' || 'Z' < letter) { | |
| 5562 // controlLetter is not in range 'A'-'Z' or 'a'-'z'. | |
| 5563 // This is outside the specification. We match JSC in | |
| 5564 // reading the backslash as a literal character instead | |
| 5565 // of as starting an escape. | |
| 5566 builder->AddCharacter('\\'); | |
| 5567 } else { | |
| 5568 Advance(2); | |
| 5569 builder->AddCharacter(controlLetter & 0x1f); | |
| 5570 } | |
| 5571 break; | |
| 5572 } | |
| 5573 case 'x': { | |
| 5574 Advance(2); | |
| 5575 uc32 value; | |
| 5576 if (ParseHexEscape(2, &value)) { | |
| 5577 builder->AddCharacter(value); | |
| 5578 } else if (!FLAG_harmony_unicode_regexps || !unicode_) { | |
| 5579 builder->AddCharacter('x'); | |
| 5580 } else { | |
| 5581 // If the 'u' flag is present, invalid escapes are not treated as | |
| 5582 // identity escapes. | |
| 5583 return ReportError(CStrVector("Invalid escape")); | |
| 5584 } | |
| 5585 break; | |
| 5586 } | |
| 5587 case 'u': { | |
| 5588 Advance(2); | |
| 5589 uc32 value; | |
| 5590 if (ParseUnicodeEscape(&value)) { | |
| 5591 builder->AddCharacter(value); | |
| 5592 } else if (!FLAG_harmony_unicode_regexps || !unicode_) { | |
| 5593 builder->AddCharacter('u'); | |
| 5594 } else { | |
| 5595 // If the 'u' flag is present, invalid escapes are not treated as | |
| 5596 // identity escapes. | |
| 5597 return ReportError(CStrVector("Invalid unicode escape")); | |
| 5598 } | |
| 5599 break; | |
| 5600 } | |
| 5601 default: | |
| 5602 Advance(); | |
| 5603 // If the 'u' flag is present, only syntax characters can be escaped, no | |
| 5604 // other identity escapes are allowed. If the 'u' flag is not present, | |
| 5605 // all identity escapes are allowed. | |
| 5606 if (!FLAG_harmony_unicode_regexps || !unicode_ || | |
| 5607 IsSyntaxCharacter(current())) { | |
| 5608 builder->AddCharacter(current()); | |
| 5609 Advance(); | |
| 5610 } else { | |
| 5611 return ReportError(CStrVector("Invalid escape")); | |
| 5612 } | |
| 5613 break; | |
| 5614 } | |
| 5615 break; | |
| 5616 case '{': { | |
| 5617 int dummy; | |
| 5618 if (ParseIntervalQuantifier(&dummy, &dummy)) { | |
| 5619 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); | |
| 5620 } | |
| 5621 // fallthrough | |
| 5622 } | |
| 5623 default: | |
| 5624 builder->AddCharacter(current()); | |
| 5625 Advance(); | |
| 5626 break; | |
| 5627 } // end switch(current()) | |
| 5628 | |
| 5629 int min; | |
| 5630 int max; | |
| 5631 switch (current()) { | |
| 5632 // QuantifierPrefix :: | |
| 5633 // * | |
| 5634 // + | |
| 5635 // ? | |
| 5636 // { | |
| 5637 case '*': | |
| 5638 min = 0; | |
| 5639 max = RegExpTree::kInfinity; | |
| 5640 Advance(); | |
| 5641 break; | |
| 5642 case '+': | |
| 5643 min = 1; | |
| 5644 max = RegExpTree::kInfinity; | |
| 5645 Advance(); | |
| 5646 break; | |
| 5647 case '?': | |
| 5648 min = 0; | |
| 5649 max = 1; | |
| 5650 Advance(); | |
| 5651 break; | |
| 5652 case '{': | |
| 5653 if (ParseIntervalQuantifier(&min, &max)) { | |
| 5654 if (max < min) { | |
| 5655 ReportError(CStrVector("numbers out of order in {} quantifier.") | |
| 5656 CHECK_FAILED); | |
| 5657 } | |
| 5658 break; | |
| 5659 } else { | |
| 5660 continue; | |
| 5661 } | |
| 5662 default: | |
| 5663 continue; | |
| 5664 } | |
| 5665 RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY; | |
| 5666 if (current() == '?') { | |
| 5667 quantifier_type = RegExpQuantifier::NON_GREEDY; | |
| 5668 Advance(); | |
| 5669 } else if (FLAG_regexp_possessive_quantifier && current() == '+') { | |
| 5670 // FLAG_regexp_possessive_quantifier is a debug-only flag. | |
| 5671 quantifier_type = RegExpQuantifier::POSSESSIVE; | |
| 5672 Advance(); | |
| 5673 } | |
| 5674 builder->AddQuantifierToAtom(min, max, quantifier_type); | |
| 5675 } | |
| 5676 } | |
| 5677 | |
| 5678 | |
| 5679 #ifdef DEBUG | |
| 5680 // Currently only used in an DCHECK. | |
| 5681 static bool IsSpecialClassEscape(uc32 c) { | |
| 5682 switch (c) { | |
| 5683 case 'd': case 'D': | |
| 5684 case 's': case 'S': | |
| 5685 case 'w': case 'W': | |
| 5686 return true; | |
| 5687 default: | |
| 5688 return false; | |
| 5689 } | |
| 5690 } | |
| 5691 #endif | |
| 5692 | |
| 5693 | |
| 5694 // In order to know whether an escape is a backreference or not we have to scan | |
| 5695 // the entire regexp and find the number of capturing parentheses. However we | |
| 5696 // don't want to scan the regexp twice unless it is necessary. This mini-parser | |
| 5697 // is called when needed. It can see the difference between capturing and | |
| 5698 // noncapturing parentheses and can skip character classes and backslash-escaped | |
| 5699 // characters. | |
| 5700 void RegExpParser::ScanForCaptures() { | |
| 5701 // Start with captures started previous to current position | |
| 5702 int capture_count = captures_started(); | |
| 5703 // Add count of captures after this position. | |
| 5704 int n; | |
| 5705 while ((n = current()) != kEndMarker) { | |
| 5706 Advance(); | |
| 5707 switch (n) { | |
| 5708 case '\\': | |
| 5709 Advance(); | |
| 5710 break; | |
| 5711 case '[': { | |
| 5712 int c; | |
| 5713 while ((c = current()) != kEndMarker) { | |
| 5714 Advance(); | |
| 5715 if (c == '\\') { | |
| 5716 Advance(); | |
| 5717 } else { | |
| 5718 if (c == ']') break; | |
| 5719 } | |
| 5720 } | |
| 5721 break; | |
| 5722 } | |
| 5723 case '(': | |
| 5724 if (current() != '?') capture_count++; | |
| 5725 break; | |
| 5726 } | |
| 5727 } | |
| 5728 capture_count_ = capture_count; | |
| 5729 is_scanned_for_captures_ = true; | |
| 5730 } | |
| 5731 | |
| 5732 | |
| 5733 bool RegExpParser::ParseBackReferenceIndex(int* index_out) { | |
| 5734 DCHECK_EQ('\\', current()); | |
| 5735 DCHECK('1' <= Next() && Next() <= '9'); | |
| 5736 // Try to parse a decimal literal that is no greater than the total number | |
| 5737 // of left capturing parentheses in the input. | |
| 5738 int start = position(); | |
| 5739 int value = Next() - '0'; | |
| 5740 Advance(2); | |
| 5741 while (true) { | |
| 5742 uc32 c = current(); | |
| 5743 if (IsDecimalDigit(c)) { | |
| 5744 value = 10 * value + (c - '0'); | |
| 5745 if (value > kMaxCaptures) { | |
| 5746 Reset(start); | |
| 5747 return false; | |
| 5748 } | |
| 5749 Advance(); | |
| 5750 } else { | |
| 5751 break; | |
| 5752 } | |
| 5753 } | |
| 5754 if (value > captures_started()) { | |
| 5755 if (!is_scanned_for_captures_) { | |
| 5756 int saved_position = position(); | |
| 5757 ScanForCaptures(); | |
| 5758 Reset(saved_position); | |
| 5759 } | |
| 5760 if (value > capture_count_) { | |
| 5761 Reset(start); | |
| 5762 return false; | |
| 5763 } | |
| 5764 } | |
| 5765 *index_out = value; | |
| 5766 return true; | |
| 5767 } | |
| 5768 | |
| 5769 | |
| 5770 RegExpCapture* RegExpParser::GetCapture(int index) { | |
| 5771 // The index for the capture groups are one-based. Its index in the list is | |
| 5772 // zero-based. | |
| 5773 int know_captures = | |
| 5774 is_scanned_for_captures_ ? capture_count_ : captures_started_; | |
| 5775 DCHECK(index <= know_captures); | |
| 5776 if (captures_ == NULL) { | |
| 5777 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); | |
| 5778 } | |
| 5779 while (captures_->length() < know_captures) { | |
| 5780 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); | |
| 5781 } | |
| 5782 return captures_->at(index - 1); | |
| 5783 } | |
| 5784 | |
| 5785 | |
| 5786 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { | |
| 5787 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { | |
| 5788 if (s->group_type() != CAPTURE) continue; | |
| 5789 // Return true if we found the matching capture index. | |
| 5790 if (index == s->capture_index()) return true; | |
| 5791 // Abort if index is larger than what has been parsed up till this state. | |
| 5792 if (index > s->capture_index()) return false; | |
| 5793 } | |
| 5794 return false; | |
| 5795 } | |
| 5796 | |
| 5797 | |
| 5798 // QuantifierPrefix :: | |
| 5799 // { DecimalDigits } | |
| 5800 // { DecimalDigits , } | |
| 5801 // { DecimalDigits , DecimalDigits } | |
| 5802 // | |
| 5803 // Returns true if parsing succeeds, and set the min_out and max_out | |
| 5804 // values. Values are truncated to RegExpTree::kInfinity if they overflow. | |
| 5805 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { | |
| 5806 DCHECK_EQ(current(), '{'); | |
| 5807 int start = position(); | |
| 5808 Advance(); | |
| 5809 int min = 0; | |
| 5810 if (!IsDecimalDigit(current())) { | |
| 5811 Reset(start); | |
| 5812 return false; | |
| 5813 } | |
| 5814 while (IsDecimalDigit(current())) { | |
| 5815 int next = current() - '0'; | |
| 5816 if (min > (RegExpTree::kInfinity - next) / 10) { | |
| 5817 // Overflow. Skip past remaining decimal digits and return -1. | |
| 5818 do { | |
| 5819 Advance(); | |
| 5820 } while (IsDecimalDigit(current())); | |
| 5821 min = RegExpTree::kInfinity; | |
| 5822 break; | |
| 5823 } | |
| 5824 min = 10 * min + next; | |
| 5825 Advance(); | |
| 5826 } | |
| 5827 int max = 0; | |
| 5828 if (current() == '}') { | |
| 5829 max = min; | |
| 5830 Advance(); | |
| 5831 } else if (current() == ',') { | |
| 5832 Advance(); | |
| 5833 if (current() == '}') { | |
| 5834 max = RegExpTree::kInfinity; | |
| 5835 Advance(); | |
| 5836 } else { | |
| 5837 while (IsDecimalDigit(current())) { | |
| 5838 int next = current() - '0'; | |
| 5839 if (max > (RegExpTree::kInfinity - next) / 10) { | |
| 5840 do { | |
| 5841 Advance(); | |
| 5842 } while (IsDecimalDigit(current())); | |
| 5843 max = RegExpTree::kInfinity; | |
| 5844 break; | |
| 5845 } | |
| 5846 max = 10 * max + next; | |
| 5847 Advance(); | |
| 5848 } | |
| 5849 if (current() != '}') { | |
| 5850 Reset(start); | |
| 5851 return false; | |
| 5852 } | |
| 5853 Advance(); | |
| 5854 } | |
| 5855 } else { | |
| 5856 Reset(start); | |
| 5857 return false; | |
| 5858 } | |
| 5859 *min_out = min; | |
| 5860 *max_out = max; | |
| 5861 return true; | |
| 5862 } | |
| 5863 | |
| 5864 | |
| 5865 uc32 RegExpParser::ParseOctalLiteral() { | |
| 5866 DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker); | |
| 5867 // For compatibility with some other browsers (not all), we parse | |
| 5868 // up to three octal digits with a value below 256. | |
| 5869 uc32 value = current() - '0'; | |
| 5870 Advance(); | |
| 5871 if ('0' <= current() && current() <= '7') { | |
| 5872 value = value * 8 + current() - '0'; | |
| 5873 Advance(); | |
| 5874 if (value < 32 && '0' <= current() && current() <= '7') { | |
| 5875 value = value * 8 + current() - '0'; | |
| 5876 Advance(); | |
| 5877 } | |
| 5878 } | |
| 5879 return value; | |
| 5880 } | |
| 5881 | |
| 5882 | |
| 5883 bool RegExpParser::ParseHexEscape(int length, uc32* value) { | |
| 5884 int start = position(); | |
| 5885 uc32 val = 0; | |
| 5886 for (int i = 0; i < length; ++i) { | |
| 5887 uc32 c = current(); | |
| 5888 int d = HexValue(c); | |
| 5889 if (d < 0) { | |
| 5890 Reset(start); | |
| 5891 return false; | |
| 5892 } | |
| 5893 val = val * 16 + d; | |
| 5894 Advance(); | |
| 5895 } | |
| 5896 *value = val; | |
| 5897 return true; | |
| 5898 } | |
| 5899 | |
| 5900 | |
| 5901 bool RegExpParser::ParseUnicodeEscape(uc32* value) { | |
| 5902 // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are | |
| 5903 // allowed). In the latter case, the number of hex digits between { } is | |
| 5904 // arbitrary. \ and u have already been read. | |
| 5905 if (current() == '{' && FLAG_harmony_unicode_regexps && unicode_) { | |
| 5906 int start = position(); | |
| 5907 Advance(); | |
| 5908 if (ParseUnlimitedLengthHexNumber(0x10ffff, value)) { | |
| 5909 if (current() == '}') { | |
| 5910 Advance(); | |
| 5911 return true; | |
| 5912 } | |
| 5913 } | |
| 5914 Reset(start); | |
| 5915 return false; | |
| 5916 } | |
| 5917 // \u but no {, or \u{...} escapes not allowed. | |
| 5918 return ParseHexEscape(4, value); | |
| 5919 } | |
| 5920 | |
| 5921 | |
| 5922 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { | |
| 5923 uc32 x = 0; | |
| 5924 int d = HexValue(current()); | |
| 5925 if (d < 0) { | |
| 5926 return false; | |
| 5927 } | |
| 5928 while (d >= 0) { | |
| 5929 x = x * 16 + d; | |
| 5930 if (x > max_value) { | |
| 5931 return false; | |
| 5932 } | |
| 5933 Advance(); | |
| 5934 d = HexValue(current()); | |
| 5935 } | |
| 5936 *value = x; | |
| 5937 return true; | |
| 5938 } | |
| 5939 | |
| 5940 | |
| 5941 uc32 RegExpParser::ParseClassCharacterEscape() { | |
| 5942 DCHECK(current() == '\\'); | |
| 5943 DCHECK(has_next() && !IsSpecialClassEscape(Next())); | |
| 5944 Advance(); | |
| 5945 switch (current()) { | |
| 5946 case 'b': | |
| 5947 Advance(); | |
| 5948 return '\b'; | |
| 5949 // ControlEscape :: one of | |
| 5950 // f n r t v | |
| 5951 case 'f': | |
| 5952 Advance(); | |
| 5953 return '\f'; | |
| 5954 case 'n': | |
| 5955 Advance(); | |
| 5956 return '\n'; | |
| 5957 case 'r': | |
| 5958 Advance(); | |
| 5959 return '\r'; | |
| 5960 case 't': | |
| 5961 Advance(); | |
| 5962 return '\t'; | |
| 5963 case 'v': | |
| 5964 Advance(); | |
| 5965 return '\v'; | |
| 5966 case 'c': { | |
| 5967 uc32 controlLetter = Next(); | |
| 5968 uc32 letter = controlLetter & ~('A' ^ 'a'); | |
| 5969 // For compatibility with JSC, inside a character class | |
| 5970 // we also accept digits and underscore as control characters. | |
| 5971 if ((controlLetter >= '0' && controlLetter <= '9') || | |
| 5972 controlLetter == '_' || | |
| 5973 (letter >= 'A' && letter <= 'Z')) { | |
| 5974 Advance(2); | |
| 5975 // Control letters mapped to ASCII control characters in the range | |
| 5976 // 0x00-0x1f. | |
| 5977 return controlLetter & 0x1f; | |
| 5978 } | |
| 5979 // We match JSC in reading the backslash as a literal | |
| 5980 // character instead of as starting an escape. | |
| 5981 return '\\'; | |
| 5982 } | |
| 5983 case '0': case '1': case '2': case '3': case '4': case '5': | |
| 5984 case '6': case '7': | |
| 5985 // For compatibility, we interpret a decimal escape that isn't | |
| 5986 // a back reference (and therefore either \0 or not valid according | |
| 5987 // to the specification) as a 1..3 digit octal character code. | |
| 5988 return ParseOctalLiteral(); | |
| 5989 case 'x': { | |
| 5990 Advance(); | |
| 5991 uc32 value; | |
| 5992 if (ParseHexEscape(2, &value)) { | |
| 5993 return value; | |
| 5994 } | |
| 5995 if (!FLAG_harmony_unicode_regexps || !unicode_) { | |
| 5996 // If \x is not followed by a two-digit hexadecimal, treat it | |
| 5997 // as an identity escape. | |
| 5998 return 'x'; | |
| 5999 } | |
| 6000 // If the 'u' flag is present, invalid escapes are not treated as | |
| 6001 // identity escapes. | |
| 6002 ReportError(CStrVector("Invalid escape")); | |
| 6003 return 0; | |
| 6004 } | |
| 6005 case 'u': { | |
| 6006 Advance(); | |
| 6007 uc32 value; | |
| 6008 if (ParseUnicodeEscape(&value)) { | |
| 6009 return value; | |
| 6010 } | |
| 6011 if (!FLAG_harmony_unicode_regexps || !unicode_) { | |
| 6012 return 'u'; | |
| 6013 } | |
| 6014 // If the 'u' flag is present, invalid escapes are not treated as | |
| 6015 // identity escapes. | |
| 6016 ReportError(CStrVector("Invalid unicode escape")); | |
| 6017 return 0; | |
| 6018 } | |
| 6019 default: { | |
| 6020 uc32 result = current(); | |
| 6021 // If the 'u' flag is present, only syntax characters can be escaped, no | |
| 6022 // other identity escapes are allowed. If the 'u' flag is not present, all | |
| 6023 // identity escapes are allowed. | |
| 6024 if (!FLAG_harmony_unicode_regexps || !unicode_ || | |
| 6025 IsSyntaxCharacter(result)) { | |
| 6026 Advance(); | |
| 6027 return result; | |
| 6028 } | |
| 6029 ReportError(CStrVector("Invalid escape")); | |
| 6030 return 0; | |
| 6031 } | |
| 6032 } | |
| 6033 return 0; | |
| 6034 } | |
| 6035 | |
| 6036 | |
| 6037 CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { | |
| 6038 DCHECK_EQ(0, *char_class); | |
| 6039 uc32 first = current(); | |
| 6040 if (first == '\\') { | |
| 6041 switch (Next()) { | |
| 6042 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { | |
| 6043 *char_class = Next(); | |
| 6044 Advance(2); | |
| 6045 return CharacterRange::Singleton(0); // Return dummy value. | |
| 6046 } | |
| 6047 case kEndMarker: | |
| 6048 return ReportError(CStrVector("\\ at end of pattern")); | |
| 6049 default: | |
| 6050 uc32 c = ParseClassCharacterEscape(CHECK_FAILED); | |
| 6051 return CharacterRange::Singleton(c); | |
| 6052 } | |
| 6053 } else { | |
| 6054 Advance(); | |
| 6055 return CharacterRange::Singleton(first); | |
| 6056 } | |
| 6057 } | |
| 6058 | |
| 6059 | |
| 6060 static const uc16 kNoCharClass = 0; | |
| 6061 | |
| 6062 // Adds range or pre-defined character class to character ranges. | |
| 6063 // If char_class is not kInvalidClass, it's interpreted as a class | |
| 6064 // escape (i.e., 's' means whitespace, from '\s'). | |
| 6065 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, | |
| 6066 uc16 char_class, | |
| 6067 CharacterRange range, | |
| 6068 Zone* zone) { | |
| 6069 if (char_class != kNoCharClass) { | |
| 6070 CharacterRange::AddClassEscape(char_class, ranges, zone); | |
| 6071 } else { | |
| 6072 ranges->Add(range, zone); | |
| 6073 } | |
| 6074 } | |
| 6075 | |
| 6076 | |
| 6077 RegExpTree* RegExpParser::ParseCharacterClass() { | |
| 6078 static const char* kUnterminated = "Unterminated character class"; | |
| 6079 static const char* kRangeOutOfOrder = "Range out of order in character class"; | |
| 6080 | |
| 6081 DCHECK_EQ(current(), '['); | |
| 6082 Advance(); | |
| 6083 bool is_negated = false; | |
| 6084 if (current() == '^') { | |
| 6085 is_negated = true; | |
| 6086 Advance(); | |
| 6087 } | |
| 6088 ZoneList<CharacterRange>* ranges = | |
| 6089 new(zone()) ZoneList<CharacterRange>(2, zone()); | |
| 6090 while (has_more() && current() != ']') { | |
| 6091 uc16 char_class = kNoCharClass; | |
| 6092 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); | |
| 6093 if (current() == '-') { | |
| 6094 Advance(); | |
| 6095 if (current() == kEndMarker) { | |
| 6096 // If we reach the end we break out of the loop and let the | |
| 6097 // following code report an error. | |
| 6098 break; | |
| 6099 } else if (current() == ']') { | |
| 6100 AddRangeOrEscape(ranges, char_class, first, zone()); | |
| 6101 ranges->Add(CharacterRange::Singleton('-'), zone()); | |
| 6102 break; | |
| 6103 } | |
| 6104 uc16 char_class_2 = kNoCharClass; | |
| 6105 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED); | |
| 6106 if (char_class != kNoCharClass || char_class_2 != kNoCharClass) { | |
| 6107 // Either end is an escaped character class. Treat the '-' verbatim. | |
| 6108 AddRangeOrEscape(ranges, char_class, first, zone()); | |
| 6109 ranges->Add(CharacterRange::Singleton('-'), zone()); | |
| 6110 AddRangeOrEscape(ranges, char_class_2, next, zone()); | |
| 6111 continue; | |
| 6112 } | |
| 6113 if (first.from() > next.to()) { | |
| 6114 return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED); | |
| 6115 } | |
| 6116 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone()); | |
| 6117 } else { | |
| 6118 AddRangeOrEscape(ranges, char_class, first, zone()); | |
| 6119 } | |
| 6120 } | |
| 6121 if (!has_more()) { | |
| 6122 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); | |
| 6123 } | |
| 6124 Advance(); | |
| 6125 if (ranges->length() == 0) { | |
| 6126 ranges->Add(CharacterRange::Everything(), zone()); | |
| 6127 is_negated = !is_negated; | |
| 6128 } | |
| 6129 return new (zone()) RegExpCharacterClass(ranges, is_negated); | |
| 6130 } | |
| 6131 | |
| 6132 | |
| 6133 // ---------------------------------------------------------------------------- | |
| 6134 // The Parser interface. | 5045 // The Parser interface. |
| 6135 | 5046 |
| 6136 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, | |
| 6137 FlatStringReader* input, bool multiline, | |
| 6138 bool unicode, RegExpCompileData* result) { | |
| 6139 DCHECK(result != NULL); | |
| 6140 RegExpParser parser(input, &result->error, multiline, unicode, isolate, zone); | |
| 6141 RegExpTree* tree = parser.ParsePattern(); | |
| 6142 if (parser.failed()) { | |
| 6143 DCHECK(tree == NULL); | |
| 6144 DCHECK(!result->error.is_null()); | |
| 6145 } else { | |
| 6146 DCHECK(tree != NULL); | |
| 6147 DCHECK(result->error.is_null()); | |
| 6148 result->tree = tree; | |
| 6149 int capture_count = parser.captures_started(); | |
| 6150 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; | |
| 6151 result->contains_anchor = parser.contains_anchor(); | |
| 6152 result->capture_count = capture_count; | |
| 6153 } | |
| 6154 return !parser.failed(); | |
| 6155 } | |
| 6156 | |
| 6157 | 5047 |
| 6158 bool Parser::ParseStatic(ParseInfo* info) { | 5048 bool Parser::ParseStatic(ParseInfo* info) { |
| 6159 Parser parser(info); | 5049 Parser parser(info); |
| 6160 if (parser.Parse(info)) { | 5050 if (parser.Parse(info)) { |
| 6161 info->set_language_mode(info->literal()->language_mode()); | 5051 info->set_language_mode(info->literal()->language_mode()); |
| 6162 return true; | 5052 return true; |
| 6163 } | 5053 } |
| 6164 return false; | 5054 return false; |
| 6165 } | 5055 } |
| 6166 | 5056 |
| (...skipping 393 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6560 auto class_literal = value->AsClassLiteral(); | 5450 auto class_literal = value->AsClassLiteral(); |
| 6561 if (class_literal->raw_name() == nullptr) { | 5451 if (class_literal->raw_name() == nullptr) { |
| 6562 class_literal->set_raw_name(name); | 5452 class_literal->set_raw_name(name); |
| 6563 } | 5453 } |
| 6564 } | 5454 } |
| 6565 } | 5455 } |
| 6566 | 5456 |
| 6567 | 5457 |
| 6568 } // namespace internal | 5458 } // namespace internal |
| 6569 } // namespace v8 | 5459 } // namespace v8 |
| OLD | NEW |