OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/factory.h" | 8 #include "src/factory.h" |
9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
49 void RegExpParser::Advance() { | 49 void RegExpParser::Advance() { |
50 if (next_pos_ < in()->length()) { | 50 if (next_pos_ < in()->length()) { |
51 StackLimitCheck check(isolate()); | 51 StackLimitCheck check(isolate()); |
52 if (check.HasOverflowed()) { | 52 if (check.HasOverflowed()) { |
53 ReportError(CStrVector(Isolate::kStackOverflowMessage)); | 53 ReportError(CStrVector(Isolate::kStackOverflowMessage)); |
54 } else if (zone()->excess_allocation()) { | 54 } else if (zone()->excess_allocation()) { |
55 ReportError(CStrVector("Regular expression too large")); | 55 ReportError(CStrVector("Regular expression too large")); |
56 } else { | 56 } else { |
57 current_ = in()->Get(next_pos_); | 57 current_ = in()->Get(next_pos_); |
58 next_pos_++; | 58 next_pos_++; |
| 59 // Read the whole surrogate pair in case of unicode flag, if possible. |
| 60 if (unicode_ && next_pos_ < in()->length() && |
| 61 unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) { |
| 62 uc16 trail = in()->Get(next_pos_); |
| 63 if (unibrow::Utf16::IsTrailSurrogate(trail)) { |
| 64 current_ = unibrow::Utf16::CombineSurrogatePair( |
| 65 static_cast<uc16>(current_), trail); |
| 66 next_pos_++; |
| 67 } |
| 68 } |
59 } | 69 } |
60 } else { | 70 } else { |
61 current_ = kEndMarker; | 71 current_ = kEndMarker; |
62 // Advance so that position() points to 1-after-the-last-character. This is | 72 // Advance so that position() points to 1-after-the-last-character. This is |
63 // important so that Reset() to this position works correctly. | 73 // important so that Reset() to this position works correctly. |
64 next_pos_ = in()->length() + 1; | 74 next_pos_ = in()->length() + 1; |
65 has_more_ = false; | 75 has_more_ = false; |
66 } | 76 } |
67 } | 77 } |
68 | 78 |
(...skipping 341 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
410 // If the 'u' flag is present, invalid escapes are not treated as | 420 // If the 'u' flag is present, invalid escapes are not treated as |
411 // identity escapes. | 421 // identity escapes. |
412 return ReportError(CStrVector("Invalid escape")); | 422 return ReportError(CStrVector("Invalid escape")); |
413 } | 423 } |
414 break; | 424 break; |
415 } | 425 } |
416 case 'u': { | 426 case 'u': { |
417 Advance(2); | 427 Advance(2); |
418 uc32 value; | 428 uc32 value; |
419 if (ParseUnicodeEscape(&value)) { | 429 if (ParseUnicodeEscape(&value)) { |
420 if (value > unibrow::Utf16::kMaxNonSurrogateCharCode) { | 430 builder->AddUnicodeCharacter(value); |
421 builder->AddCharacter(unibrow::Utf16::LeadSurrogate(value)); | |
422 builder->AddCharacter(unibrow::Utf16::TrailSurrogate(value)); | |
423 } else { | |
424 builder->AddCharacter(static_cast<uc16>(value)); | |
425 } | |
426 } else if (!unicode_) { | 431 } else if (!unicode_) { |
427 builder->AddCharacter('u'); | 432 builder->AddCharacter('u'); |
428 } else { | 433 } else { |
429 // If the 'u' flag is present, invalid escapes are not treated as | 434 // If the 'u' flag is present, invalid escapes are not treated as |
430 // identity escapes. | 435 // identity escapes. |
431 return ReportError(CStrVector("Invalid unicode escape")); | 436 return ReportError(CStrVector("Invalid unicode escape")); |
432 } | 437 } |
433 break; | 438 break; |
434 } | 439 } |
435 default: | 440 default: |
(...skipping 13 matching lines...) Expand all Loading... |
449 } | 454 } |
450 break; | 455 break; |
451 case '{': { | 456 case '{': { |
452 int dummy; | 457 int dummy; |
453 if (ParseIntervalQuantifier(&dummy, &dummy)) { | 458 if (ParseIntervalQuantifier(&dummy, &dummy)) { |
454 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); | 459 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); |
455 } | 460 } |
456 // fallthrough | 461 // fallthrough |
457 } | 462 } |
458 default: | 463 default: |
459 builder->AddCharacter(current()); | 464 builder->AddUnicodeCharacter(current()); |
460 Advance(); | 465 Advance(); |
461 break; | 466 break; |
462 } // end switch(current()) | 467 } // end switch(current()) |
463 | 468 |
464 int min; | 469 int min; |
465 int max; | 470 int max; |
466 switch (current()) { | 471 switch (current()) { |
467 // QuantifierPrefix :: | 472 // QuantifierPrefix :: |
468 // * | 473 // * |
469 // + | 474 // + |
(...skipping 580 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1050 void RegExpBuilder::AddCharacter(uc16 c) { | 1055 void RegExpBuilder::AddCharacter(uc16 c) { |
1051 pending_empty_ = false; | 1056 pending_empty_ = false; |
1052 if (characters_ == NULL) { | 1057 if (characters_ == NULL) { |
1053 characters_ = new (zone()) ZoneList<uc16>(4, zone()); | 1058 characters_ = new (zone()) ZoneList<uc16>(4, zone()); |
1054 } | 1059 } |
1055 characters_->Add(c, zone()); | 1060 characters_->Add(c, zone()); |
1056 LAST(ADD_CHAR); | 1061 LAST(ADD_CHAR); |
1057 } | 1062 } |
1058 | 1063 |
1059 | 1064 |
| 1065 void RegExpBuilder::AddUnicodeCharacter(uc32 c) { |
| 1066 if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| 1067 ZoneList<uc16> surrogate_pair(2, zone()); |
| 1068 surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone()); |
| 1069 surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone()); |
| 1070 RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector()); |
| 1071 AddAtom(atom); |
| 1072 } else { |
| 1073 AddCharacter(static_cast<uc16>(c)); |
| 1074 } |
| 1075 } |
| 1076 |
| 1077 |
1060 void RegExpBuilder::AddEmpty() { pending_empty_ = true; } | 1078 void RegExpBuilder::AddEmpty() { pending_empty_ = true; } |
1061 | 1079 |
1062 | 1080 |
1063 void RegExpBuilder::AddAtom(RegExpTree* term) { | 1081 void RegExpBuilder::AddAtom(RegExpTree* term) { |
1064 if (term->IsEmpty()) { | 1082 if (term->IsEmpty()) { |
1065 AddEmpty(); | 1083 AddEmpty(); |
1066 return; | 1084 return; |
1067 } | 1085 } |
1068 if (term->IsTextElement()) { | 1086 if (term->IsTextElement()) { |
1069 FlushCharacters(); | 1087 FlushCharacters(); |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1153 UNREACHABLE(); | 1171 UNREACHABLE(); |
1154 return; | 1172 return; |
1155 } | 1173 } |
1156 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1174 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
1157 zone()); | 1175 zone()); |
1158 LAST(ADD_TERM); | 1176 LAST(ADD_TERM); |
1159 } | 1177 } |
1160 | 1178 |
1161 } // namespace internal | 1179 } // namespace internal |
1162 } // namespace v8 | 1180 } // namespace v8 |
OLD | NEW |