src/scanner.cc - Issue 716423002: ES6 unicode extensions, part 1.

Side by Side Diff: src/scanner.cc

Issue 716423002: ES6 unicode extensions, part 1. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: . Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <cmath>	9 #include <cmath>

10	10

(...skipping 20 matching lines...) Expand all Loading...
31	31

32 // ----------------------------------------------------------------------------	32 // ----------------------------------------------------------------------------

33 // Scanner	33 // Scanner

34	34

35 Scanner::Scanner(UnicodeCache* unicode_cache)	35 Scanner::Scanner(UnicodeCache* unicode_cache)

36 : unicode_cache_(unicode_cache),	36 : unicode_cache_(unicode_cache),

37 octal_pos_(Location::invalid()),	37 octal_pos_(Location::invalid()),

38 harmony_scoping_(false),	38 harmony_scoping_(false),

39 harmony_modules_(false),	39 harmony_modules_(false),

40 harmony_numeric_literals_(false),	40 harmony_numeric_literals_(false),

41 harmony_classes_(false) { }	41 harmony_classes_(false),

	42 harmony_unicode_(false) {}

42	43

43	44

44 void Scanner::Initialize(Utf16CharacterStream* source) {	45 void Scanner::Initialize(Utf16CharacterStream* source) {

45 source_ = source;	46 source_ = source;

46 // Need to capture identifiers in order to recognize "get" and "set"	47 // Need to capture identifiers in order to recognize "get" and "set"

47 // in object literals.	48 // in object literals.

48 Init();	49 Init();

49 // Skip initial whitespace allowing HTML comment ends just like	50 // Skip initial whitespace allowing HTML comment ends just like

50 // after a newline and scan first token.	51 // after a newline and scan first token.

51 has_line_terminator_before_next_ = true;	52 has_line_terminator_before_next_ = true;

(...skipping 12 matching lines...) Expand all Loading...
64 return -1;	65 return -1;

65 }	66 }

66 x = x * 16 + d;	67 x = x * 16 + d;

67 Advance();	68 Advance();

68 }	69 }

69	70

70 return x;	71 return x;

71 }	72 }

72	73

73	74

	75 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {

	76 uc32 x = 0;

	77 int d = HexValue(c0_);

	78 if (d < 0) {

	79 return -1;

	80 }

	81 while (d >= 0) {

	82 x = x * 16 + d;

	83 if (x > max_value) return -1;
	caitp (gmail) 2014/11/13 15:09:18 I don't think this should block landing this, but I don't think this should block landing this, but I think it would be an improvement if this error condition were handled in the parser, so that an error could be reported (like "SyntaxError: Unicode escape sequences cannot have a value exceeding U+10FFFF" or something). So you'd want to make sure the value doesn't overflow, but max_value checking could be verified in the parser. Again though, not a blocker, I just think that would be easier on people. SpiderMonkey outputs some pretty nice static errors for a lot of these, but V8 is just saying "Unexpected token ____" which is not that great. Not a blocker but something to think about rossberg 2014/11/14 09:59:08 Alternatively, we should enable the scanner to giv Show quoted text On 2014/11/13 15:09:18, caitp wrote: > I don't think this should block landing this, but I think it would be an > improvement if this error condition were handled in the parser, so that an error > could be reported (like "SyntaxError: Unicode escape sequences cannot have a > value exceeding U+10FFFF" or something). > > So you'd want to make sure the value doesn't overflow, but max_value checking > could be verified in the parser. Again though, not a blocker, I just think that > would be easier on people. SpiderMonkey outputs some pretty nice static errors > for a lot of these, but V8 is just saying "Unexpected token ____" which is not > that great. > > Not a blocker but something to think about Alternatively, we should enable the scanner to give proper error messages.
	84 Advance();

	85 d = HexValue(c0_);

	86 }

	87 return x;

	88 }

	89

	90

74 // Ensure that tokens can be stored in a byte.	91 // Ensure that tokens can be stored in a byte.

75 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);	92 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);

76	93

77 // Table of one-character tokens, by character (0x00..0x7f only).	94 // Table of one-character tokens, by character (0x00..0x7f only).

78 static const byte one_char_tokens[] = {	95 static const byte one_char_tokens[] = {

79 Token::ILLEGAL,	96 Token::ILLEGAL,

80 Token::ILLEGAL,	97 Token::ILLEGAL,

81 Token::ILLEGAL,	98 Token::ILLEGAL,

82 Token::ILLEGAL,	99 Token::ILLEGAL,

83 Token::ILLEGAL,	100 Token::ILLEGAL,

(...skipping 603 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
687 switch (c) {	704 switch (c) {

688 case '\'': // fall through	705 case '\'': // fall through

689 case '"' : // fall through	706 case '"' : // fall through

690 case '\\': break;	707 case '\\': break;

691 case 'b' : c = '\b'; break;	708 case 'b' : c = '\b'; break;

692 case 'f' : c = '\f'; break;	709 case 'f' : c = '\f'; break;

693 case 'n' : c = '\n'; break;	710 case 'n' : c = '\n'; break;

694 case 'r' : c = '\r'; break;	711 case 'r' : c = '\r'; break;

695 case 't' : c = '\t'; break;	712 case 't' : c = '\t'; break;

696 case 'u' : {	713 case 'u' : {

697 c = ScanHexNumber(4);	714 c = ScanUnicodeEscape();

698 if (c < 0) return false;	715 if (c < 0) return false;

699 break;	716 break;

700 }	717 }

701 case 'v' : c = '\v'; break;	718 case 'v' : c = '\v'; break;

702 case 'x' : {	719 case 'x' : {

703 c = ScanHexNumber(2);	720 c = ScanHexNumber(2);

704 if (c < 0) return false;	721 if (c < 0) return false;

705 break;	722 break;

706 }	723 }

707 case '0' : // fall through	724 case '0' : // fall through

(...skipping 172 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
880 literal.Complete();	897 literal.Complete();

881	898

882 return Token::NUMBER;	899 return Token::NUMBER;

883 }	900 }

884	901

885	902

886 uc32 Scanner::ScanIdentifierUnicodeEscape() {	903 uc32 Scanner::ScanIdentifierUnicodeEscape() {

887 Advance();	904 Advance();

888 if (c0_ != 'u') return -1;	905 if (c0_ != 'u') return -1;

889 Advance();	906 Advance();

	907 return ScanUnicodeEscape();

	908 }

	909

	910

	911 uc32 Scanner::ScanUnicodeEscape() {

	912 // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are

	913 // allowed). In the latter case, the number of hex digits between { } is

	914 // arbitrary. \ and u have already been read.

	915 if (c0_ == '{' && HarmonyUnicode()) {

	916 Advance();

	917 uc32 cp = ScanUnlimitedLengthHexNumber(0x10ffff);

	918 if (cp < 0) {

	919 return -1;

	920 }

	921 if (c0_ != '}') {

	922 return -1;

	923 }

	924 Advance();

	925 return cp;

	926 }

890 return ScanHexNumber(4);	927 return ScanHexNumber(4);

891 }	928 }

892	929

893	930

894 // ----------------------------------------------------------------------------	931 // ----------------------------------------------------------------------------

895 // Keyword Matcher	932 // Keyword Matcher

896	933

897 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \	934 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \

898 KEYWORD_GROUP('b') \	935 KEYWORD_GROUP('b') \

899 KEYWORD("break", Token::BREAK) \	936 KEYWORD("break", Token::BREAK) \

(...skipping 426 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1326 }	1363 }

1327 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1364 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1328 }	1365 }

1329 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1366 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1330	1367

1331 backing_store_.AddBlock(bytes);	1368 backing_store_.AddBlock(bytes);

1332 return backing_store_.EndSequence().start();	1369 return backing_store_.EndSequence().start();

1333 }	1370 }

1334	1371

1335 } } // namespace v8::internal	1372 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner.h ('k') | test/cctest/test-parsing.cc » ('j') | test/cctest/test-parsing.cc » ('J')