src/scanner-base.cc - Issue 7218009: Make multi-line comments not count when checking whether --> is first on a line.

Side by Side Diff: src/scanner-base.cc

Issue 7218009: Make multi-line comments not count when checking whether --> is first on a line. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 9 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
73 // ----------------------------------------------------------------------------	73 // ----------------------------------------------------------------------------

74 // JavaScriptScanner	74 // JavaScriptScanner

75	75

76 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants)	76 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants)

77 : Scanner(scanner_contants), octal_pos_(Location::invalid()) { }	77 : Scanner(scanner_contants), octal_pos_(Location::invalid()) { }

78	78

79	79

80 Token::Value JavaScriptScanner::Next() {	80 Token::Value JavaScriptScanner::Next() {

81 current_ = next_;	81 current_ = next_;

82 has_line_terminator_before_next_ = false;	82 has_line_terminator_before_next_ = false;

	83 next_is_first_on_line_ = false;
	William Hesse 2011/06/21 11:37:41 Could we call these things something more parallel Could we call these things something more parallel, like: has_line_terminator_before_next and has_line_terminator_outside_comment_before_next or saw_line_terminator_outside_comment saw_line_terminator_inside_comment? The meanings and algorithm would be clearer. Or better yet: newline_before_current_token and multiline_comment_before_current_token. The condition for handling --> would be newline_before... and the condition for automatic semicolon insertion would be newline_before... \|\| multiline_comment_before...
83 Scan();	84 Scan();

84 return current_.token;	85 return current_.token;

85 }	86 }

86	87

87	88

88 static inline bool IsByteOrderMark(uc32 c) {	89 static inline bool IsByteOrderMark(uc32 c) {

89 // The Unicode value U+FFFE is guaranteed never to be assigned as a	90 // The Unicode value U+FFFE is guaranteed never to be assigned as a

90 // Unicode character; this implies that in a Unicode context the	91 // Unicode character; this implies that in a Unicode context the

91 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	92 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

92 // character expressed in little-endian byte order (since it could	93 // character expressed in little-endian byte order (since it could

93 // not be a U+FFFE character expressed in big-endian byte	94 // not be a U+FFFE character expressed in big-endian byte

94 // order). Nevertheless, we check for it to be compatible with	95 // order). Nevertheless, we check for it to be compatible with

95 // Spidermonkey.	96 // Spidermonkey.

96 return c == 0xFEFF \|\| c == 0xFFFE;	97 return c == 0xFEFF \|\| c == 0xFFFE;

97 }	98 }

98	99

99	100

100 bool JavaScriptScanner::SkipWhiteSpace() {	101 bool JavaScriptScanner::SkipWhiteSpace() {

101 int start_position = source_pos();	102 int start_position = source_pos();

102	103

103 while (true) {	104 while (true) {

104 // We treat byte-order marks (BOMs) as whitespace for better	105 // We treat byte-order marks (BOMs) as whitespace for better

105 // compatibility with Spidermonkey and other JavaScript engines.	106 // compatibility with Spidermonkey and other JavaScript engines.

106 while (unicode_cache_->IsWhiteSpace(c0_) \|\| IsByteOrderMark(c0_)) {	107 while (unicode_cache_->IsWhiteSpace(c0_) \|\| IsByteOrderMark(c0_)) {

107 // IsWhiteSpace() includes line terminators!	108 // IsWhiteSpace() includes line terminators!

108 if (unicode_cache_->IsLineTerminator(c0_)) {	109 if (unicode_cache_->IsLineTerminator(c0_)) {

109 // Ignore line terminators, but remember them. This is necessary	110 // Ignore line terminators, but remember them. This is necessary

110 // for automatic semicolon insertion.	111 // for automatic semicolon insertion.

111 has_line_terminator_before_next_ = true;	112 has_line_terminator_before_next_ = true;

	113 next_is_first_on_line_ = true;

112 }	114 }

113 Advance();	115 Advance();

114 }	116 }

115	117

116 // If there is an HTML comment end '-->' at the beginning of a	118 // If there is an HTML comment end '-->' at the beginning of a

117 // line (with only whitespace in front of it), we treat the rest	119 // line (with only whitespace in front of it), we treat the rest

118 // of the line as a comment. This is in line with the way	120 // of the line as a comment. This is in line with the way

119 // SpiderMonkey handles it.	121 // SpiderMonkey handles it.

120 if (c0_ == '-' && has_line_terminator_before_next_) {	122 if (c0_ == '-' && next_is_first_on_line_) {

121 Advance();	123 Advance();

122 if (c0_ == '-') {	124 if (c0_ == '-') {

123 Advance();	125 Advance();

124 if (c0_ == '>') {	126 if (c0_ == '>') {

125 // Treat the rest of the line as a comment.	127 // Treat the rest of the line as a comment.

126 SkipSingleLineComment();	128 SkipSingleLineComment();

127 // Continue skipping white space after the comment.	129 // Continue skipping white space after the comment.

128 continue;	130 continue;

129 }	131 }

130 PushBack('-'); // undo Advance()	132 PushBack('-'); // undo Advance()

(...skipping 29 matching lines...) Expand all Loading...
160 while (c0_ >= 0) {	162 while (c0_ >= 0) {

161 char ch = c0_;	163 char ch = c0_;

162 Advance();	164 Advance();

163 if (unicode_cache_->IsLineTerminator(ch)) {	165 if (unicode_cache_->IsLineTerminator(ch)) {

164 // Following ECMA-262, section 7.4, a comment containing	166 // Following ECMA-262, section 7.4, a comment containing

165 // a newline will make the comment count as a line-terminator.	167 // a newline will make the comment count as a line-terminator.

166 has_line_terminator_before_next_ = true;	168 has_line_terminator_before_next_ = true;

167 }	169 }

168 // If we have reached the end of the multi-line comment, we	170 // If we have reached the end of the multi-line comment, we

169 // consume the '/' and insert a whitespace. This way all	171 // consume the '/' and insert a whitespace. This way all

170 // multi-line comments are treated as whitespace.	172 // multi-line comments are treated as whitespace (except

	173 // when checking whether there is non-whitespace before a

	174 // --> comment).

171 if (ch == '*' && c0_ == '/') {	175 if (ch == '*' && c0_ == '/') {

172 c0_ = ' ';	176 c0_ = ' ';

173 return Token::WHITESPACE;	177 return Token::WHITESPACE;

174 }	178 }

175 }	179 }

176	180

177 // Unterminated multi-line comment.	181 // Unterminated multi-line comment.

178 return Token::ILLEGAL;	182 return Token::ILLEGAL;

179 }	183 }

180	184

(...skipping 23 matching lines...) Expand all Loading...
204 switch (c0_) {	208 switch (c0_) {

205 case ' ':	209 case ' ':

206 case '\t':	210 case '\t':

207 Advance();	211 Advance();

208 token = Token::WHITESPACE;	212 token = Token::WHITESPACE;

209 break;	213 break;

210	214

211 case '\n':	215 case '\n':

212 Advance();	216 Advance();

213 has_line_terminator_before_next_ = true;	217 has_line_terminator_before_next_ = true;

	218 next_is_first_on_line_ = true;

214 token = Token::WHITESPACE;	219 token = Token::WHITESPACE;

215 break;	220 break;

216	221

217 case '"': case '\'':	222 case '"': case '\'':

218 token = ScanString();	223 token = ScanString();

219 break;	224 break;

220	225

221 case '<':	226 case '<':

222 // < <= << <<= <!--	227 // < <= << <<= <!--

223 Advance();	228 Advance();

(...skipping 58 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
282 } else {	287 } else {

283 token = Token::ADD;	288 token = Token::ADD;

284 }	289 }

285 break;	290 break;

286	291

287 case '-':	292 case '-':

288 // - -- --> -=	293 // - -- --> -=

289 Advance();	294 Advance();

290 if (c0_ == '-') {	295 if (c0_ == '-') {

291 Advance();	296 Advance();

292 if (c0_ == '>' && has_line_terminator_before_next_) {	297 if (c0_ == '>' && next_is_first_on_line_) {

293 // For compatibility with SpiderMonkey, we skip lines that	298 // For compatibility with SpiderMonkey, we skip lines that

294 // start with an HTML comment end '-->'.	299 // start with an HTML comment end '-->'.

295 token = SkipSingleLineComment();	300 token = SkipSingleLineComment();

296 } else {	301 } else {

297 token = Token::DEC;	302 token = Token::DEC;

298 }	303 }

299 } else if (c0_ == '=') {	304 } else if (c0_ == '=') {

300 token = Select(Token::ASSIGN_SUB);	305 token = Select(Token::ASSIGN_SUB);

301 } else {	306 } else {

302 token = Token::SUB;	307 token = Token::SUB;

(...skipping 640 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
943 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;	948 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;

944 break;	949 break;

945 case UNMATCHABLE:	950 case UNMATCHABLE:

946 break;	951 break;

947 }	952 }

948 // On fallthrough, it's a failure.	953 // On fallthrough, it's a failure.

949 state_ = UNMATCHABLE;	954 state_ = UNMATCHABLE;

950 }	955 }

951	956

952 } } // namespace v8::internal	957 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner-base.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »