Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(245)

Side by Side Diff: src/parser.cc

Issue 6171001: Change interpretation of malformed \c? escapes in RegExp to match JSC. (Closed)
Patch Set: Addressed review comments. Created 9 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/parser.h ('k') | src/scanner-base.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 4004 matching lines...) Expand 10 before | Expand all | Expand 10 after
4015 break; 4015 break;
4016 case 't': 4016 case 't':
4017 Advance(2); 4017 Advance(2);
4018 builder->AddCharacter('\t'); 4018 builder->AddCharacter('\t');
4019 break; 4019 break;
4020 case 'v': 4020 case 'v':
4021 Advance(2); 4021 Advance(2);
4022 builder->AddCharacter('\v'); 4022 builder->AddCharacter('\v');
4023 break; 4023 break;
4024 case 'c': { 4024 case 'c': {
4025 Advance(2); 4025 Advance();
4026 uc32 control = ParseControlLetterEscape(); 4026 uc32 controlLetter = Next();
4027 builder->AddCharacter(control); 4027 // Special case if it is an ASCII letter.
4028 // Convert lower case letters to uppercase.
4029 uc32 letter = controlLetter & ~('a' ^ 'A');
4030 if (letter < 'A' || 'Z' < letter) {
4031 // controlLetter is not in range 'A'-'Z' or 'a'-'z'.
4032 // This is outside the specification. We match JSC in
4033 // reading the backslash as a literal character instead
4034 // of as starting an escape.
4035 builder->AddCharacter('\\');
4036 } else {
4037 Advance(2);
4038 builder->AddCharacter(controlLetter & 0x1f);
4039 }
4028 break; 4040 break;
4029 } 4041 }
4030 case 'x': { 4042 case 'x': {
4031 Advance(2); 4043 Advance(2);
4032 uc32 value; 4044 uc32 value;
4033 if (ParseHexEscape(2, &value)) { 4045 if (ParseHexEscape(2, &value)) {
4034 builder->AddCharacter(value); 4046 builder->AddCharacter(value);
4035 } else { 4047 } else {
4036 builder->AddCharacter('x'); 4048 builder->AddCharacter('x');
4037 } 4049 }
(...skipping 254 matching lines...) Expand 10 before | Expand all | Expand 10 after
4292 } else { 4304 } else {
4293 Reset(start); 4305 Reset(start);
4294 return false; 4306 return false;
4295 } 4307 }
4296 *min_out = min; 4308 *min_out = min;
4297 *max_out = max; 4309 *max_out = max;
4298 return true; 4310 return true;
4299 } 4311 }
4300 4312
4301 4313
4302 // Upper and lower case letters differ by one bit.
4303 STATIC_CHECK(('a' ^ 'A') == 0x20);
4304
4305 uc32 RegExpParser::ParseControlLetterEscape() {
4306 if (!has_more())
4307 return 'c';
4308 uc32 letter = current() & ~(0x20); // Collapse upper and lower case letters.
4309 if (letter < 'A' || 'Z' < letter) {
4310 // Non-spec error-correction: "\c" followed by non-control letter is
4311 // interpreted as an IdentityEscape of 'c'.
4312 return 'c';
4313 }
4314 Advance();
4315 return letter & 0x1f; // Remainder modulo 32, per specification.
4316 }
4317
4318
4319 uc32 RegExpParser::ParseOctalLiteral() { 4314 uc32 RegExpParser::ParseOctalLiteral() {
4320 ASSERT('0' <= current() && current() <= '7'); 4315 ASSERT('0' <= current() && current() <= '7');
4321 // For compatibility with some other browsers (not all), we parse 4316 // For compatibility with some other browsers (not all), we parse
4322 // up to three octal digits with a value below 256. 4317 // up to three octal digits with a value below 256.
4323 uc32 value = current() - '0'; 4318 uc32 value = current() - '0';
4324 Advance(); 4319 Advance();
4325 if ('0' <= current() && current() <= '7') { 4320 if ('0' <= current() && current() <= '7') {
4326 value = value * 8 + current() - '0'; 4321 value = value * 8 + current() - '0';
4327 Advance(); 4322 Advance();
4328 if (value < 32 && '0' <= current() && current() <= '7') { 4323 if (value < 32 && '0' <= current() && current() <= '7') {
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
4374 return '\n'; 4369 return '\n';
4375 case 'r': 4370 case 'r':
4376 Advance(); 4371 Advance();
4377 return '\r'; 4372 return '\r';
4378 case 't': 4373 case 't':
4379 Advance(); 4374 Advance();
4380 return '\t'; 4375 return '\t';
4381 case 'v': 4376 case 'v':
4382 Advance(); 4377 Advance();
4383 return '\v'; 4378 return '\v';
4384 case 'c': 4379 case 'c': {
4385 Advance(); 4380 uc32 controlLetter = Next();
4386 return ParseControlLetterEscape(); 4381 uc32 letter = controlLetter & ~('A' ^ 'a');
4382 // For compatibility with JSC, inside a character class
4383 // we also accept digits and underscore as control characters.
4384 if ((controlLetter >= '0' && controlLetter <= '9') ||
4385 controlLetter == '_' ||
4386 (letter >= 'A' && letter <= 'Z')) {
4387 Advance(2);
4388 // Control letters mapped to ASCII control characters in the range
4389 // 0x00-0x1f.
4390 return controlLetter & 0x1f;
4391 }
4392 // We match JSC in reading the backslash as a literal
4393 // character instead of as starting an escape.
4394 return '\\';
4395 }
4387 case '0': case '1': case '2': case '3': case '4': case '5': 4396 case '0': case '1': case '2': case '3': case '4': case '5':
4388 case '6': case '7': 4397 case '6': case '7':
4389 // For compatibility, we interpret a decimal escape that isn't 4398 // For compatibility, we interpret a decimal escape that isn't
4390 // a back reference (and therefore either \0 or not valid according 4399 // a back reference (and therefore either \0 or not valid according
4391 // to the specification) as a 1..3 digit octal character code. 4400 // to the specification) as a 1..3 digit octal character code.
4392 return ParseOctalLiteral(); 4401 return ParseOctalLiteral();
4393 case 'x': { 4402 case 'x': {
4394 Advance(); 4403 Advance();
4395 uc32 value; 4404 uc32 value;
4396 if (ParseHexEscape(2, &value)) { 4405 if (ParseHexEscape(2, &value)) {
(...skipping 289 matching lines...) Expand 10 before | Expand all | Expand 10 after
4686 Handle<String> source = Handle<String>(String::cast(script->source())); 4695 Handle<String> source = Handle<String>(String::cast(script->source()));
4687 result = parser.ParseProgram(source, info->is_global()); 4696 result = parser.ParseProgram(source, info->is_global());
4688 } 4697 }
4689 } 4698 }
4690 4699
4691 info->SetFunction(result); 4700 info->SetFunction(result);
4692 return (result != NULL); 4701 return (result != NULL);
4693 } 4702 }
4694 4703
4695 } } // namespace v8::internal 4704 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/parser.h ('k') | src/scanner-base.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698