OLD | NEW |
---|---|
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/factory.h" | 8 #include "src/factory.h" |
9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
49 void RegExpParser::Advance() { | 49 void RegExpParser::Advance() { |
50 if (next_pos_ < in()->length()) { | 50 if (next_pos_ < in()->length()) { |
51 StackLimitCheck check(isolate()); | 51 StackLimitCheck check(isolate()); |
52 if (check.HasOverflowed()) { | 52 if (check.HasOverflowed()) { |
53 ReportError(CStrVector(Isolate::kStackOverflowMessage)); | 53 ReportError(CStrVector(Isolate::kStackOverflowMessage)); |
54 } else if (zone()->excess_allocation()) { | 54 } else if (zone()->excess_allocation()) { |
55 ReportError(CStrVector("Regular expression too large")); | 55 ReportError(CStrVector("Regular expression too large")); |
56 } else { | 56 } else { |
57 current_ = in()->Get(next_pos_); | 57 current_ = in()->Get(next_pos_); |
58 next_pos_++; | 58 next_pos_++; |
59 // Read the whole surrogate pair in case of unicode flag, if possible. | |
60 if (unicode_ && next_pos_ < in()->length() && | |
61 unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) { | |
62 uc16 trail = in()->Get(next_pos_); | |
63 if (unibrow::Utf16::IsTrailSurrogate(trail)) { | |
64 current_ = unibrow::Utf16::CombineSurrogatePair( | |
65 static_cast<uc16>(current_), trail); | |
66 next_pos_++; | |
67 } | |
68 } | |
59 } | 69 } |
60 } else { | 70 } else { |
61 current_ = kEndMarker; | 71 current_ = kEndMarker; |
62 // Advance so that position() points to 1-after-the-last-character. This is | 72 // Advance so that position() points to 1-after-the-last-character. This is |
63 // important so that Reset() to this position works correctly. | 73 // important so that Reset() to this position works correctly. |
64 next_pos_ = in()->length() + 1; | 74 next_pos_ = in()->length() + 1; |
65 has_more_ = false; | 75 has_more_ = false; |
66 } | 76 } |
67 } | 77 } |
68 | 78 |
(...skipping 341 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
410 // If the 'u' flag is present, invalid escapes are not treated as | 420 // If the 'u' flag is present, invalid escapes are not treated as |
411 // identity escapes. | 421 // identity escapes. |
412 return ReportError(CStrVector("Invalid escape")); | 422 return ReportError(CStrVector("Invalid escape")); |
413 } | 423 } |
414 break; | 424 break; |
415 } | 425 } |
416 case 'u': { | 426 case 'u': { |
417 Advance(2); | 427 Advance(2); |
418 uc32 value; | 428 uc32 value; |
419 if (ParseUnicodeEscape(&value)) { | 429 if (ParseUnicodeEscape(&value)) { |
420 if (value > unibrow::Utf16::kMaxNonSurrogateCharCode) { | 430 builder->AddUnicodeCharacter(value); |
421 builder->AddCharacter(unibrow::Utf16::LeadSurrogate(value)); | |
422 builder->AddCharacter(unibrow::Utf16::TrailSurrogate(value)); | |
423 } else { | |
424 builder->AddCharacter(static_cast<uc16>(value)); | |
425 } | |
426 } else if (!FLAG_harmony_unicode_regexps || !unicode_) { | 431 } else if (!FLAG_harmony_unicode_regexps || !unicode_) { |
427 builder->AddCharacter('u'); | 432 builder->AddCharacter('u'); |
428 } else { | 433 } else { |
429 // If the 'u' flag is present, invalid escapes are not treated as | 434 // If the 'u' flag is present, invalid escapes are not treated as |
430 // identity escapes. | 435 // identity escapes. |
431 return ReportError(CStrVector("Invalid unicode escape")); | 436 return ReportError(CStrVector("Invalid unicode escape")); |
432 } | 437 } |
433 break; | 438 break; |
434 } | 439 } |
435 default: | 440 default: |
(...skipping 14 matching lines...) Expand all Loading... | |
450 } | 455 } |
451 break; | 456 break; |
452 case '{': { | 457 case '{': { |
453 int dummy; | 458 int dummy; |
454 if (ParseIntervalQuantifier(&dummy, &dummy)) { | 459 if (ParseIntervalQuantifier(&dummy, &dummy)) { |
455 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); | 460 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); |
456 } | 461 } |
457 // fallthrough | 462 // fallthrough |
458 } | 463 } |
459 default: | 464 default: |
460 builder->AddCharacter(current()); | 465 if (unicode_) { |
rossberg
2016/01/11 12:25:41
Nit: is this if necessary? Can't you always use Ad
Yang
2016/01/11 14:41:44
Good point. Done.
| |
466 builder->AddUnicodeCharacter(current()); | |
467 } else { | |
468 builder->AddCharacter(current()); | |
469 } | |
461 Advance(); | 470 Advance(); |
462 break; | 471 break; |
463 } // end switch(current()) | 472 } // end switch(current()) |
464 | 473 |
465 int min; | 474 int min; |
466 int max; | 475 int max; |
467 switch (current()) { | 476 switch (current()) { |
468 // QuantifierPrefix :: | 477 // QuantifierPrefix :: |
469 // * | 478 // * |
470 // + | 479 // + |
(...skipping 581 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1052 void RegExpBuilder::AddCharacter(uc16 c) { | 1061 void RegExpBuilder::AddCharacter(uc16 c) { |
1053 pending_empty_ = false; | 1062 pending_empty_ = false; |
1054 if (characters_ == NULL) { | 1063 if (characters_ == NULL) { |
1055 characters_ = new (zone()) ZoneList<uc16>(4, zone()); | 1064 characters_ = new (zone()) ZoneList<uc16>(4, zone()); |
1056 } | 1065 } |
1057 characters_->Add(c, zone()); | 1066 characters_->Add(c, zone()); |
1058 LAST(ADD_CHAR); | 1067 LAST(ADD_CHAR); |
1059 } | 1068 } |
1060 | 1069 |
1061 | 1070 |
1071 void RegExpBuilder::AddUnicodeCharacter(uc32 c) { | |
1072 if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
1073 ZoneList<uc16> surrogate_pair(2, zone()); | |
1074 surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone()); | |
1075 surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone()); | |
1076 RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector()); | |
1077 AddAtom(atom); | |
1078 } else { | |
1079 AddCharacter(static_cast<uc16>(c)); | |
1080 } | |
1081 } | |
1082 | |
1083 | |
1062 void RegExpBuilder::AddEmpty() { pending_empty_ = true; } | 1084 void RegExpBuilder::AddEmpty() { pending_empty_ = true; } |
1063 | 1085 |
1064 | 1086 |
1065 void RegExpBuilder::AddAtom(RegExpTree* term) { | 1087 void RegExpBuilder::AddAtom(RegExpTree* term) { |
1066 if (term->IsEmpty()) { | 1088 if (term->IsEmpty()) { |
1067 AddEmpty(); | 1089 AddEmpty(); |
1068 return; | 1090 return; |
1069 } | 1091 } |
1070 if (term->IsTextElement()) { | 1092 if (term->IsTextElement()) { |
1071 FlushCharacters(); | 1093 FlushCharacters(); |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1155 UNREACHABLE(); | 1177 UNREACHABLE(); |
1156 return; | 1178 return; |
1157 } | 1179 } |
1158 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1180 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
1159 zone()); | 1181 zone()); |
1160 LAST(ADD_TERM); | 1182 LAST(ADD_TERM); |
1161 } | 1183 } |
1162 | 1184 |
1163 } // namespace internal | 1185 } // namespace internal |
1164 } // namespace v8 | 1186 } // namespace v8 |
OLD | NEW |