OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/v8.h" | 5 #include "src/v8.h" |
6 | 6 |
7 #include "src/api.h" | 7 #include "src/api.h" |
8 #include "src/ast.h" | 8 #include "src/ast.h" |
9 #include "src/bailout-reason.h" | 9 #include "src/bailout-reason.h" |
10 #include "src/base/platform/platform.h" | 10 #include "src/base/platform/platform.h" |
(...skipping 4260 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4271 } | 4271 } |
4272 isolate()->counters()->total_preparse_skipped()->Increment( | 4272 isolate()->counters()->total_preparse_skipped()->Increment( |
4273 total_preparse_skipped_); | 4273 total_preparse_skipped_); |
4274 } | 4274 } |
4275 | 4275 |
4276 | 4276 |
4277 // ---------------------------------------------------------------------------- | 4277 // ---------------------------------------------------------------------------- |
4278 // Regular expressions | 4278 // Regular expressions |
4279 | 4279 |
4280 | 4280 |
4281 RegExpParser::RegExpParser(FlatStringReader* in, | 4281 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
4282 Handle<String>* error, | 4282 bool multiline, bool unicode, Zone* zone) |
4283 bool multiline, | |
4284 Zone* zone) | |
4285 : isolate_(zone->isolate()), | 4283 : isolate_(zone->isolate()), |
4286 zone_(zone), | 4284 zone_(zone), |
4287 error_(error), | 4285 error_(error), |
4288 captures_(NULL), | 4286 captures_(NULL), |
4289 in_(in), | 4287 in_(in), |
4290 current_(kEndMarker), | 4288 current_(kEndMarker), |
4291 next_pos_(0), | 4289 next_pos_(0), |
4292 capture_count_(0), | 4290 capture_count_(0), |
4293 has_more_(true), | 4291 has_more_(true), |
4294 multiline_(multiline), | 4292 multiline_(multiline), |
4293 unicode_(unicode), | |
4295 simple_(false), | 4294 simple_(false), |
4296 contains_anchor_(false), | 4295 contains_anchor_(false), |
4297 is_scanned_for_captures_(false), | 4296 is_scanned_for_captures_(false), |
4298 failed_(false) { | 4297 failed_(false) { |
4299 Advance(); | 4298 Advance(); |
4300 } | 4299 } |
4301 | 4300 |
4302 | 4301 |
4303 uc32 RegExpParser::Next() { | 4302 uc32 RegExpParser::Next() { |
4304 if (has_next()) { | 4303 if (has_next()) { |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4341 next_pos_ += dist - 1; | 4340 next_pos_ += dist - 1; |
4342 Advance(); | 4341 Advance(); |
4343 } | 4342 } |
4344 | 4343 |
4345 | 4344 |
4346 bool RegExpParser::simple() { | 4345 bool RegExpParser::simple() { |
4347 return simple_; | 4346 return simple_; |
4348 } | 4347 } |
4349 | 4348 |
4350 | 4349 |
4350 bool RegExpParser::IsSyntaxCharacter(uc32 c) { | |
4351 return c == '^' || c == '$' || c == '\\' || c == '.' || c == '*' || | |
4352 c == '+' || c == '?' || c == '(' || c == ')' || c == '[' || c == ']' || | |
4353 c == '{' || c == '}' || c == '|'; | |
4354 } | |
mathias
2015/01/08 12:29:07
Should `-` be a “syntax character” as well because
marja
2015/01/08 13:42:18
The spec (draft rev 30) says:
SyntaxCharacter ::
rossberg
2015/01/08 14:11:45
It's not a SyntaxCharacter and that's likely inten
mathias
2015/01/21 07:16:54
/[\-]/u is now allowed: https://bugs.ecmascript.or
| |
4355 | |
4356 | |
4351 RegExpTree* RegExpParser::ReportError(Vector<const char> message) { | 4357 RegExpTree* RegExpParser::ReportError(Vector<const char> message) { |
4352 failed_ = true; | 4358 failed_ = true; |
4353 *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked(); | 4359 *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked(); |
4354 // Zip to the end to make sure the no more input is read. | 4360 // Zip to the end to make sure the no more input is read. |
4355 current_ = kEndMarker; | 4361 current_ = kEndMarker; |
4356 next_pos_ = in()->length(); | 4362 next_pos_ = in()->length(); |
4357 return NULL; | 4363 return NULL; |
4358 } | 4364 } |
4359 | 4365 |
4360 | 4366 |
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4557 if (capture == NULL) { | 4563 if (capture == NULL) { |
4558 builder->AddEmpty(); | 4564 builder->AddEmpty(); |
4559 break; | 4565 break; |
4560 } | 4566 } |
4561 RegExpTree* atom = new(zone()) RegExpBackReference(capture); | 4567 RegExpTree* atom = new(zone()) RegExpBackReference(capture); |
4562 builder->AddAtom(atom); | 4568 builder->AddAtom(atom); |
4563 break; | 4569 break; |
4564 } | 4570 } |
4565 uc32 first_digit = Next(); | 4571 uc32 first_digit = Next(); |
4566 if (first_digit == '8' || first_digit == '9') { | 4572 if (first_digit == '8' || first_digit == '9') { |
4567 // Treat as identity escape | 4573 // If the 'u' flag is present, only syntax characters can be escaped, |
4568 builder->AddCharacter(first_digit); | 4574 // no other identity escapes are allowed. If the 'u' flag is not |
4569 Advance(2); | 4575 // present, all identity escapes are allowed. |
4576 if (!FLAG_harmony_unicode || !unicode_) { | |
4577 builder->AddCharacter(first_digit); | |
4578 Advance(2); | |
4579 } else { | |
4580 return ReportError(CStrVector("Invalid escape")); | |
4581 } | |
4570 break; | 4582 break; |
4571 } | 4583 } |
4572 } | 4584 } |
4573 // FALLTHROUGH | 4585 // FALLTHROUGH |
4574 case '0': { | 4586 case '0': { |
4575 Advance(); | 4587 Advance(); |
4576 uc32 octal = ParseOctalLiteral(); | 4588 uc32 octal = ParseOctalLiteral(); |
4577 builder->AddCharacter(octal); | 4589 builder->AddCharacter(octal); |
4578 break; | 4590 break; |
4579 } | 4591 } |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4615 Advance(2); | 4627 Advance(2); |
4616 builder->AddCharacter(controlLetter & 0x1f); | 4628 builder->AddCharacter(controlLetter & 0x1f); |
4617 } | 4629 } |
4618 break; | 4630 break; |
4619 } | 4631 } |
4620 case 'x': { | 4632 case 'x': { |
4621 Advance(2); | 4633 Advance(2); |
4622 uc32 value; | 4634 uc32 value; |
4623 if (ParseHexEscape(2, &value)) { | 4635 if (ParseHexEscape(2, &value)) { |
4624 builder->AddCharacter(value); | 4636 builder->AddCharacter(value); |
4637 } else if (!FLAG_harmony_unicode || !unicode_) { | |
4638 builder->AddCharacter('x'); | |
4625 } else { | 4639 } else { |
4626 builder->AddCharacter('x'); | 4640 // If the 'u' flag is present, invalid escapes are not treated as |
4641 // identity escapes. | |
4642 return ReportError(CStrVector("Invalid escape")); | |
4627 } | 4643 } |
4628 break; | 4644 break; |
4629 } | 4645 } |
4630 case 'u': { | 4646 case 'u': { |
4631 Advance(2); | 4647 Advance(2); |
4632 uc32 value; | 4648 uc32 value; |
4633 if (ParseHexEscape(4, &value)) { | 4649 if (ParseUnicodeEscape(&value)) { |
4634 builder->AddCharacter(value); | 4650 builder->AddCharacter(value); |
4651 } else if (!FLAG_harmony_unicode || !unicode_) { | |
4652 builder->AddCharacter('u'); | |
4635 } else { | 4653 } else { |
4636 builder->AddCharacter('u'); | 4654 // If the 'u' flag is present, invalid escapes are not treated as |
4655 // identity escapes. | |
4656 return ReportError(CStrVector("Invalid unicode escape")); | |
4637 } | 4657 } |
4638 break; | 4658 break; |
4639 } | 4659 } |
4640 default: | 4660 default: |
4641 // Identity escape. | 4661 Advance(); |
4642 builder->AddCharacter(Next()); | 4662 // If the 'u' flag is present, only syntax characters can be escaped, no |
4643 Advance(2); | 4663 // other identity escapes are allowed. If the 'u' flag is not present, |
4664 // all identity escapes are allowed. | |
4665 if (!FLAG_harmony_unicode || !unicode_ || | |
4666 IsSyntaxCharacter(current())) { | |
4667 builder->AddCharacter(current()); | |
4668 Advance(); | |
4669 } else { | |
4670 return ReportError(CStrVector("Invalid escape")); | |
4671 } | |
4644 break; | 4672 break; |
4645 } | 4673 } |
4646 break; | 4674 break; |
4647 case '{': { | 4675 case '{': { |
4648 int dummy; | 4676 int dummy; |
4649 if (ParseIntervalQuantifier(&dummy, &dummy)) { | 4677 if (ParseIntervalQuantifier(&dummy, &dummy)) { |
4650 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); | 4678 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); |
4651 } | 4679 } |
4652 // fallthrough | 4680 // fallthrough |
4653 } | 4681 } |
(...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4876 Advance(); | 4904 Advance(); |
4877 if (value < 32 && '0' <= current() && current() <= '7') { | 4905 if (value < 32 && '0' <= current() && current() <= '7') { |
4878 value = value * 8 + current() - '0'; | 4906 value = value * 8 + current() - '0'; |
4879 Advance(); | 4907 Advance(); |
4880 } | 4908 } |
4881 } | 4909 } |
4882 return value; | 4910 return value; |
4883 } | 4911 } |
4884 | 4912 |
4885 | 4913 |
4886 bool RegExpParser::ParseHexEscape(int length, uc32 *value) { | 4914 bool RegExpParser::ParseHexEscape(int length, uc32* value) { |
4887 int start = position(); | 4915 int start = position(); |
4888 uc32 val = 0; | 4916 uc32 val = 0; |
4889 bool done = false; | 4917 for (int i = 0; i < length; ++i) { |
4890 for (int i = 0; !done; i++) { | |
4891 uc32 c = current(); | 4918 uc32 c = current(); |
4892 int d = HexValue(c); | 4919 int d = HexValue(c); |
4893 if (d < 0) { | 4920 if (d < 0) { |
4894 Reset(start); | 4921 Reset(start); |
4895 return false; | 4922 return false; |
4896 } | 4923 } |
4897 val = val * 16 + d; | 4924 val = val * 16 + d; |
4898 Advance(); | 4925 Advance(); |
4899 if (i == length - 1) { | |
4900 done = true; | |
4901 } | |
4902 } | 4926 } |
4903 *value = val; | 4927 *value = val; |
4904 return true; | 4928 return true; |
4905 } | 4929 } |
4906 | 4930 |
4907 | 4931 |
4932 bool RegExpParser::ParseUnicodeEscape(uc32* value) { | |
4933 // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are | |
4934 // allowed). In the latter case, the number of hex digits between { } is | |
4935 // arbitrary. \ and u have already been read. | |
4936 if (current() == '{' && FLAG_harmony_unicode && unicode_) { | |
4937 int start = position(); | |
4938 Advance(); | |
4939 if (ParseUnlimitedLengthHexNumber(0x10ffff, value)) { | |
4940 if (current() == '}') { | |
4941 Advance(); | |
4942 return true; | |
4943 } | |
4944 } | |
4945 Reset(start); | |
4946 return false; | |
4947 } | |
4948 // \u but no {, or \u{...} escapes not allowed. | |
4949 return ParseHexEscape(4, value); | |
4950 } | |
4951 | |
4952 | |
4953 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { | |
4954 uc32 x = 0; | |
4955 int d = HexValue(current()); | |
4956 if (d < 0) { | |
4957 return false; | |
4958 } | |
4959 while (d >= 0) { | |
4960 x = x * 16 + d; | |
4961 if (x > max_value) { | |
4962 return false; | |
4963 } | |
4964 Advance(); | |
4965 d = HexValue(current()); | |
4966 } | |
4967 *value = x; | |
4968 return true; | |
4969 } | |
4970 | |
4971 | |
4908 uc32 RegExpParser::ParseClassCharacterEscape() { | 4972 uc32 RegExpParser::ParseClassCharacterEscape() { |
4909 DCHECK(current() == '\\'); | 4973 DCHECK(current() == '\\'); |
4910 DCHECK(has_next() && !IsSpecialClassEscape(Next())); | 4974 DCHECK(has_next() && !IsSpecialClassEscape(Next())); |
4911 Advance(); | 4975 Advance(); |
4912 switch (current()) { | 4976 switch (current()) { |
4913 case 'b': | 4977 case 'b': |
4914 Advance(); | 4978 Advance(); |
4915 return '\b'; | 4979 return '\b'; |
4916 // ControlEscape :: one of | 4980 // ControlEscape :: one of |
4917 // f n r t v | 4981 // f n r t v |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4952 // For compatibility, we interpret a decimal escape that isn't | 5016 // For compatibility, we interpret a decimal escape that isn't |
4953 // a back reference (and therefore either \0 or not valid according | 5017 // a back reference (and therefore either \0 or not valid according |
4954 // to the specification) as a 1..3 digit octal character code. | 5018 // to the specification) as a 1..3 digit octal character code. |
4955 return ParseOctalLiteral(); | 5019 return ParseOctalLiteral(); |
4956 case 'x': { | 5020 case 'x': { |
4957 Advance(); | 5021 Advance(); |
4958 uc32 value; | 5022 uc32 value; |
4959 if (ParseHexEscape(2, &value)) { | 5023 if (ParseHexEscape(2, &value)) { |
4960 return value; | 5024 return value; |
4961 } | 5025 } |
4962 // If \x is not followed by a two-digit hexadecimal, treat it | 5026 if (!FLAG_harmony_unicode || !unicode_) { |
4963 // as an identity escape. | 5027 // If \x is not followed by a two-digit hexadecimal, treat it |
4964 return 'x'; | 5028 // as an identity escape. |
5029 return 'x'; | |
5030 } | |
5031 // If the 'u' flag is present, invalid escapes are not treated as | |
5032 // identity escapes. | |
5033 ReportError(CStrVector("Invalid escape")); | |
5034 return 0; | |
4965 } | 5035 } |
4966 case 'u': { | 5036 case 'u': { |
4967 Advance(); | 5037 Advance(); |
4968 uc32 value; | 5038 uc32 value; |
4969 if (ParseHexEscape(4, &value)) { | 5039 if (ParseUnicodeEscape(&value)) { |
4970 return value; | 5040 return value; |
4971 } | 5041 } |
4972 // If \u is not followed by a four-digit hexadecimal, treat it | 5042 if (!FLAG_harmony_unicode || !unicode_) { |
4973 // as an identity escape. | 5043 return 'u'; |
4974 return 'u'; | 5044 } |
5045 // If the 'u' flag is present, invalid escapes are not treated as | |
5046 // identity escapes. | |
5047 ReportError(CStrVector("Invalid unicode escape")); | |
5048 return 0; | |
4975 } | 5049 } |
4976 default: { | 5050 default: { |
4977 // Extended identity escape. We accept any character that hasn't | |
4978 // been matched by a more specific case, not just the subset required | |
4979 // by the ECMAScript specification. | |
4980 uc32 result = current(); | 5051 uc32 result = current(); |
4981 Advance(); | 5052 // If the 'u' flag is present, only syntax characters can be escaped, no |
4982 return result; | 5053 // other identity escapes are allowed. If the 'u' flag is not present, all |
5054 // identity escapes are allowed. | |
5055 if (!FLAG_harmony_unicode || !unicode_ || IsSyntaxCharacter(result)) { | |
5056 Advance(); | |
5057 return result; | |
5058 } | |
5059 ReportError(CStrVector("Invalid escape")); | |
5060 return 0; | |
4983 } | 5061 } |
4984 } | 5062 } |
4985 return 0; | 5063 return 0; |
4986 } | 5064 } |
4987 | 5065 |
4988 | 5066 |
4989 CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { | 5067 CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { |
4990 DCHECK_EQ(0, *char_class); | 5068 DCHECK_EQ(0, *char_class); |
4991 uc32 first = current(); | 5069 uc32 first = current(); |
4992 if (first == '\\') { | 5070 if (first == '\\') { |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5078 ranges->Add(CharacterRange::Everything(), zone()); | 5156 ranges->Add(CharacterRange::Everything(), zone()); |
5079 is_negated = !is_negated; | 5157 is_negated = !is_negated; |
5080 } | 5158 } |
5081 return new(zone()) RegExpCharacterClass(ranges, is_negated); | 5159 return new(zone()) RegExpCharacterClass(ranges, is_negated); |
5082 } | 5160 } |
5083 | 5161 |
5084 | 5162 |
5085 // ---------------------------------------------------------------------------- | 5163 // ---------------------------------------------------------------------------- |
5086 // The Parser interface. | 5164 // The Parser interface. |
5087 | 5165 |
5088 bool RegExpParser::ParseRegExp(FlatStringReader* input, | 5166 bool RegExpParser::ParseRegExp(FlatStringReader* input, bool multiline, |
5089 bool multiline, | 5167 bool unicode, RegExpCompileData* result, |
5090 RegExpCompileData* result, | |
5091 Zone* zone) { | 5168 Zone* zone) { |
5092 DCHECK(result != NULL); | 5169 DCHECK(result != NULL); |
5093 RegExpParser parser(input, &result->error, multiline, zone); | 5170 RegExpParser parser(input, &result->error, multiline, unicode, zone); |
5094 RegExpTree* tree = parser.ParsePattern(); | 5171 RegExpTree* tree = parser.ParsePattern(); |
5095 if (parser.failed()) { | 5172 if (parser.failed()) { |
5096 DCHECK(tree == NULL); | 5173 DCHECK(tree == NULL); |
5097 DCHECK(!result->error.is_null()); | 5174 DCHECK(!result->error.is_null()); |
5098 } else { | 5175 } else { |
5099 DCHECK(tree != NULL); | 5176 DCHECK(tree != NULL); |
5100 DCHECK(result->error.is_null()); | 5177 DCHECK(result->error.is_null()); |
5101 result->tree = tree; | 5178 result->tree = tree; |
5102 int capture_count = parser.captures_started(); | 5179 int capture_count = parser.captures_started(); |
5103 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; | 5180 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; |
(...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5293 } else { | 5370 } else { |
5294 const uc16* data = reinterpret_cast<const uc16*>(raw_string->raw_data()); | 5371 const uc16* data = reinterpret_cast<const uc16*>(raw_string->raw_data()); |
5295 running_hash = StringHasher::ComputeRunningHash(running_hash, data, | 5372 running_hash = StringHasher::ComputeRunningHash(running_hash, data, |
5296 raw_string->length()); | 5373 raw_string->length()); |
5297 } | 5374 } |
5298 } | 5375 } |
5299 | 5376 |
5300 return running_hash; | 5377 return running_hash; |
5301 } | 5378 } |
5302 } } // namespace v8::internal | 5379 } } // namespace v8::internal |
OLD | NEW |