Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
| 6 | 6 |
| 7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
| 8 #include "src/factory.h" | 8 #include "src/factory.h" |
| 9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
| 10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 49 void RegExpParser::Advance() { | 49 void RegExpParser::Advance() { |
| 50 if (next_pos_ < in()->length()) { | 50 if (next_pos_ < in()->length()) { |
| 51 StackLimitCheck check(isolate()); | 51 StackLimitCheck check(isolate()); |
| 52 if (check.HasOverflowed()) { | 52 if (check.HasOverflowed()) { |
| 53 ReportError(CStrVector(Isolate::kStackOverflowMessage)); | 53 ReportError(CStrVector(Isolate::kStackOverflowMessage)); |
| 54 } else if (zone()->excess_allocation()) { | 54 } else if (zone()->excess_allocation()) { |
| 55 ReportError(CStrVector("Regular expression too large")); | 55 ReportError(CStrVector("Regular expression too large")); |
| 56 } else { | 56 } else { |
| 57 current_ = in()->Get(next_pos_); | 57 current_ = in()->Get(next_pos_); |
| 58 next_pos_++; | 58 next_pos_++; |
| 59 // Read the whole surrogate pair in case of unicode flag, if possible. | |
| 60 if (unicode_ && next_pos_ < in()->length() && | |
| 61 unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) { | |
| 62 uc16 trail = in()->Get(next_pos_); | |
| 63 if (unibrow::Utf16::IsTrailSurrogate(trail)) { | |
| 64 current_ = unibrow::Utf16::CombineSurrogatePair( | |
| 65 static_cast<uc16>(current_), trail); | |
| 66 next_pos_++; | |
| 67 } | |
| 68 } | |
| 59 } | 69 } |
| 60 } else { | 70 } else { |
| 61 current_ = kEndMarker; | 71 current_ = kEndMarker; |
| 62 // Advance so that position() points to 1-after-the-last-character. This is | 72 // Advance so that position() points to 1-after-the-last-character. This is |
| 63 // important so that Reset() to this position works correctly. | 73 // important so that Reset() to this position works correctly. |
| 64 next_pos_ = in()->length() + 1; | 74 next_pos_ = in()->length() + 1; |
| 65 has_more_ = false; | 75 has_more_ = false; |
| 66 } | 76 } |
| 67 } | 77 } |
| 68 | 78 |
| (...skipping 341 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 410 // If the 'u' flag is present, invalid escapes are not treated as | 420 // If the 'u' flag is present, invalid escapes are not treated as |
| 411 // identity escapes. | 421 // identity escapes. |
| 412 return ReportError(CStrVector("Invalid escape")); | 422 return ReportError(CStrVector("Invalid escape")); |
| 413 } | 423 } |
| 414 break; | 424 break; |
| 415 } | 425 } |
| 416 case 'u': { | 426 case 'u': { |
| 417 Advance(2); | 427 Advance(2); |
| 418 uc32 value; | 428 uc32 value; |
| 419 if (ParseUnicodeEscape(&value)) { | 429 if (ParseUnicodeEscape(&value)) { |
| 420 if (value > unibrow::Utf16::kMaxNonSurrogateCharCode) { | 430 builder->AddUnicodeCharacter(value); |
| 421 builder->AddCharacter(unibrow::Utf16::LeadSurrogate(value)); | |
| 422 builder->AddCharacter(unibrow::Utf16::TrailSurrogate(value)); | |
| 423 } else { | |
| 424 builder->AddCharacter(static_cast<uc16>(value)); | |
| 425 } | |
| 426 } else if (!FLAG_harmony_unicode_regexps || !unicode_) { | 431 } else if (!FLAG_harmony_unicode_regexps || !unicode_) { |
| 427 builder->AddCharacter('u'); | 432 builder->AddCharacter('u'); |
| 428 } else { | 433 } else { |
| 429 // If the 'u' flag is present, invalid escapes are not treated as | 434 // If the 'u' flag is present, invalid escapes are not treated as |
| 430 // identity escapes. | 435 // identity escapes. |
| 431 return ReportError(CStrVector("Invalid unicode escape")); | 436 return ReportError(CStrVector("Invalid unicode escape")); |
| 432 } | 437 } |
| 433 break; | 438 break; |
| 434 } | 439 } |
| 435 default: | 440 default: |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 450 } | 455 } |
| 451 break; | 456 break; |
| 452 case '{': { | 457 case '{': { |
| 453 int dummy; | 458 int dummy; |
| 454 if (ParseIntervalQuantifier(&dummy, &dummy)) { | 459 if (ParseIntervalQuantifier(&dummy, &dummy)) { |
| 455 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); | 460 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); |
| 456 } | 461 } |
| 457 // fallthrough | 462 // fallthrough |
| 458 } | 463 } |
| 459 default: | 464 default: |
| 460 builder->AddCharacter(current()); | 465 if (unicode_) { |
|
rossberg
2016/01/11 12:25:41
Nit: is this if necessary? Can't you always use Ad
Yang
2016/01/11 14:41:44
Good point. Done.
| |
| 466 builder->AddUnicodeCharacter(current()); | |
| 467 } else { | |
| 468 builder->AddCharacter(current()); | |
| 469 } | |
| 461 Advance(); | 470 Advance(); |
| 462 break; | 471 break; |
| 463 } // end switch(current()) | 472 } // end switch(current()) |
| 464 | 473 |
| 465 int min; | 474 int min; |
| 466 int max; | 475 int max; |
| 467 switch (current()) { | 476 switch (current()) { |
| 468 // QuantifierPrefix :: | 477 // QuantifierPrefix :: |
| 469 // * | 478 // * |
| 470 // + | 479 // + |
| (...skipping 581 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1052 void RegExpBuilder::AddCharacter(uc16 c) { | 1061 void RegExpBuilder::AddCharacter(uc16 c) { |
| 1053 pending_empty_ = false; | 1062 pending_empty_ = false; |
| 1054 if (characters_ == NULL) { | 1063 if (characters_ == NULL) { |
| 1055 characters_ = new (zone()) ZoneList<uc16>(4, zone()); | 1064 characters_ = new (zone()) ZoneList<uc16>(4, zone()); |
| 1056 } | 1065 } |
| 1057 characters_->Add(c, zone()); | 1066 characters_->Add(c, zone()); |
| 1058 LAST(ADD_CHAR); | 1067 LAST(ADD_CHAR); |
| 1059 } | 1068 } |
| 1060 | 1069 |
| 1061 | 1070 |
| 1071 void RegExpBuilder::AddUnicodeCharacter(uc32 c) { | |
| 1072 if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
| 1073 ZoneList<uc16> surrogate_pair(2, zone()); | |
| 1074 surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone()); | |
| 1075 surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone()); | |
| 1076 RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector()); | |
| 1077 AddAtom(atom); | |
| 1078 } else { | |
| 1079 AddCharacter(static_cast<uc16>(c)); | |
| 1080 } | |
| 1081 } | |
| 1082 | |
| 1083 | |
| 1062 void RegExpBuilder::AddEmpty() { pending_empty_ = true; } | 1084 void RegExpBuilder::AddEmpty() { pending_empty_ = true; } |
| 1063 | 1085 |
| 1064 | 1086 |
| 1065 void RegExpBuilder::AddAtom(RegExpTree* term) { | 1087 void RegExpBuilder::AddAtom(RegExpTree* term) { |
| 1066 if (term->IsEmpty()) { | 1088 if (term->IsEmpty()) { |
| 1067 AddEmpty(); | 1089 AddEmpty(); |
| 1068 return; | 1090 return; |
| 1069 } | 1091 } |
| 1070 if (term->IsTextElement()) { | 1092 if (term->IsTextElement()) { |
| 1071 FlushCharacters(); | 1093 FlushCharacters(); |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1155 UNREACHABLE(); | 1177 UNREACHABLE(); |
| 1156 return; | 1178 return; |
| 1157 } | 1179 } |
| 1158 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1180 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
| 1159 zone()); | 1181 zone()); |
| 1160 LAST(ADD_TERM); | 1182 LAST(ADD_TERM); |
| 1161 } | 1183 } |
| 1162 | 1184 |
| 1163 } // namespace internal | 1185 } // namespace internal |
| 1164 } // namespace v8 | 1186 } // namespace v8 |
| OLD | NEW |