tools/gn/tokenizer.cc - Issue 588893006: gn: attach comments to parse tree

Side by Side Diff: tools/gn/tokenizer.cc

Issue 588893006: gn: attach comments to parse tree (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: x64 Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "tools/gn/tokenizer.h"	5 #include "tools/gn/tokenizer.h"

6	6

7 #include "base/logging.h"	7 #include "base/logging.h"

	8 #include "base/strings/string_util.h"

8 #include "tools/gn/input_file.h"	9 #include "tools/gn/input_file.h"

9	10

10 namespace {	11 namespace {

11	12

12 bool CouldBeTwoCharOperatorBegin(char c) {	13 bool CouldBeTwoCharOperatorBegin(char c) {

13 return c == '<' \|\| c == '>' \|\| c == '!' \|\| c == '=' \|\| c == '-' \|\|	14 return c == '<' \|\| c == '>' \|\| c == '!' \|\| c == '=' \|\| c == '-' \|\|

14 c == '+' \|\| c == '\|' \|\| c == '&';	15 c == '+' \|\| c == '\|' \|\| c == '&';

15 }	16 }

16	17

17 bool CouldBeTwoCharOperatorEnd(char c) {	18 bool CouldBeTwoCharOperatorEnd(char c) {

(...skipping 82 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
100 }	101 }

101 size_t token_begin = cur_;	102 size_t token_begin = cur_;

102 AdvanceToEndOfToken(location, type);	103 AdvanceToEndOfToken(location, type);

103 if (has_error())	104 if (has_error())

104 break;	105 break;

105 size_t token_end = cur_;	106 size_t token_end = cur_;

106	107

107 base::StringPiece token_value(&input_.data()[token_begin],	108 base::StringPiece token_value(&input_.data()[token_begin],

108 token_end - token_begin);	109 token_end - token_begin);

109	110

110 if (type == Token::UNCLASSIFIED_OPERATOR)	111 if (type == Token::UNCLASSIFIED_OPERATOR) {

111 type = GetSpecificOperatorType(token_value);	112 type = GetSpecificOperatorType(token_value);

112 if (type == Token::IDENTIFIER) {	113 } else if (type == Token::IDENTIFIER) {

113 if (token_value == "if")	114 if (token_value == "if")

114 type = Token::IF;	115 type = Token::IF;

115 else if (token_value == "else")	116 else if (token_value == "else")

116 type = Token::ELSE;	117 type = Token::ELSE;

117 else if (token_value == "true")	118 else if (token_value == "true")

118 type = Token::TRUE_TOKEN;	119 type = Token::TRUE_TOKEN;

119 else if (token_value == "false")	120 else if (token_value == "false")

120 type = Token::FALSE_TOKEN;	121 type = Token::FALSE_TOKEN;

	122 } else if (type == Token::UNCLASSIFIED_COMMENT) {

	123 if (AtStartOfLine(token_begin))

	124 type = Token::LINE_COMMENT;

	125 else

	126 type = Token::SUFFIX_COMMENT;

121 }	127 }

122	128

123 // TODO(brettw) This just strips comments from the token stream. This	129 tokens_.push_back(Token(location, type, token_value));

124 // is probably wrong, they should be removed at a later stage so we can

125 // do things like rewrite the file. But this makes the parser simpler and

126 // is OK for now.

127 if (type != Token::COMMENT)

128 tokens_.push_back(Token(location, type, token_value));

129 }	130 }

130 if (err_->has_error())	131 if (err_->has_error())

131 tokens_.clear();	132 tokens_.clear();

132 return tokens_;	133 return tokens_;

133 }	134 }

134	135

135 // static	136 // static

136 size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) {	137 size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) {

137 DCHECK_GT(n, 0);	138 DCHECK_GT(n, 0);

138	139

(...skipping 53 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
192 return Token::LEFT_BRACE;	193 return Token::LEFT_BRACE;

193 if (next_char == '}')	194 if (next_char == '}')

194 return Token::RIGHT_BRACE;	195 return Token::RIGHT_BRACE;

195	196

196 if (next_char == '.')	197 if (next_char == '.')

197 return Token::DOT;	198 return Token::DOT;

198 if (next_char == ',')	199 if (next_char == ',')

199 return Token::COMMA;	200 return Token::COMMA;

200	201

201 if (next_char == '#')	202 if (next_char == '#')

202 return Token::COMMENT;	203 return Token::UNCLASSIFIED_COMMENT;

203	204

204 // For the case of '-' differentiate between a negative number and anything	205 // For the case of '-' differentiate between a negative number and anything

205 // else.	206 // else.

206 if (next_char == '-') {	207 if (next_char == '-') {

207 if (!CanIncrement())	208 if (!CanIncrement())

208 return Token::UNCLASSIFIED_OPERATOR; // Just the minus before end of	209 return Token::UNCLASSIFIED_OPERATOR; // Just the minus before end of

209 // file.	210 // file.

210 char following_char = input_[cur_ + 1];	211 char following_char = input_[cur_ + 1];

211 if (IsAsciiDigit(following_char))	212 if (IsAsciiDigit(following_char))

212 return Token::INTEGER;	213 return Token::INTEGER;

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
278 case Token::RIGHT_BRACKET:	279 case Token::RIGHT_BRACKET:

279 case Token::LEFT_BRACE:	280 case Token::LEFT_BRACE:

280 case Token::RIGHT_BRACE:	281 case Token::RIGHT_BRACE:

281 case Token::LEFT_PAREN:	282 case Token::LEFT_PAREN:

282 case Token::RIGHT_PAREN:	283 case Token::RIGHT_PAREN:

283 case Token::DOT:	284 case Token::DOT:

284 case Token::COMMA:	285 case Token::COMMA:

285 Advance(); // All are one char.	286 Advance(); // All are one char.

286 break;	287 break;

287	288

288 case Token::COMMENT:	289 case Token::UNCLASSIFIED_COMMENT:

289 // Eat to EOL.	290 // Eat to EOL.

290 while (!at_end() && !IsCurrentNewline())	291 while (!at_end() && !IsCurrentNewline())

291 Advance();	292 Advance();

292 break;	293 break;

293	294

294 case Token::INVALID:	295 case Token::INVALID:

295 default:	296 default:

296 *err_ = Err(location, "Everything is all messed up",	297 *err_ = Err(location, "Everything is all messed up",

297 "Please insert system disk in drive A: and press any key.");	298 "Please insert system disk in drive A: and press any key.");

298 NOTREACHED();	299 NOTREACHED();

299 return;	300 return;

300 }	301 }

301 }	302 }

302	303

	304 bool Tokenizer::AtStartOfLine(size_t location) const {

	305 while (location > 0) {

	306 --location;

	307 char c = input_[location];

	308 if (c == '\n')

	309 return true;

	310 if (c != ' ')

	311 return false;

	312 }

	313 return true;

	314 }

	315

303 bool Tokenizer::IsCurrentWhitespace() const {	316 bool Tokenizer::IsCurrentWhitespace() const {

304 DCHECK(!at_end());	317 DCHECK(!at_end());

305 char c = input_[cur_];	318 char c = input_[cur_];

306 // Note that tab (0x09) is illegal.	319 // Note that tab (0x09), vertical tab (0x0B), and formfeed (0x0C) are illegal.

307 return c == 0x0A \|\| c == 0x0B \|\| c == 0x0C \|\| c == 0x0D \|\| c == 0x20;	320 return c == 0x0A \|\| c == 0x0D \|\| c == 0x20;

308 }	321 }

309	322

310 bool Tokenizer::IsCurrentStringTerminator(char quote_char) const {	323 bool Tokenizer::IsCurrentStringTerminator(char quote_char) const {

311 DCHECK(!at_end());	324 DCHECK(!at_end());

312 if (cur_char() != quote_char)	325 if (cur_char() != quote_char)

313 return false;	326 return false;

314	327

315 // Check for escaping. \" is not a string terminator, but \\" is. Count	328 // Check for escaping. \" is not a string terminator, but \\" is. Count

316 // the number of preceeding backslashes.	329 // the number of preceeding backslashes.

317 int num_backslashes = 0;	330 int num_backslashes = 0;

(...skipping 14 matching lines...) Expand all Loading...
332 if (IsCurrentNewline()) {	345 if (IsCurrentNewline()) {

333 line_number_++;	346 line_number_++;

334 char_in_line_ = 1;	347 char_in_line_ = 1;

335 } else {	348 } else {

336 char_in_line_++;	349 char_in_line_++;

337 }	350 }

338 cur_++;	351 cur_++;

339 }	352 }

340	353

341 Location Tokenizer::GetCurrentLocation() const {	354 Location Tokenizer::GetCurrentLocation() const {

342 return Location(input_file_, line_number_, char_in_line_);	355 return Location(

	356 input_file_, line_number_, char_in_line_, static_cast<int>(cur_));

343 }	357 }

344	358

345 Err Tokenizer::GetErrorForInvalidToken(const Location& location) const {	359 Err Tokenizer::GetErrorForInvalidToken(const Location& location) const {

346 std::string help;	360 std::string help;

347 if (cur_char() == ';') {	361 if (cur_char() == ';') {

348 // Semicolon.	362 // Semicolon.

349 help = "Semicolons are not needed, delete this one.";	363 help = "Semicolons are not needed, delete this one.";

350 } else if (cur_char() == '\t') {	364 } else if (cur_char() == '\t') {

351 // Tab.	365 // Tab.

352 help = "You got a tab character in here. Tabs are evil. "	366 help = "You got a tab character in here. Tabs are evil. "

353 "Convert to spaces.";	367 "Convert to spaces.";

354 } else if (cur_char() == '/' && cur_ + 1 < input_.size() &&	368 } else if (cur_char() == '/' && cur_ + 1 < input_.size() &&

355 (input_[cur_ + 1] == '/' \|\| input_[cur_ + 1] == '*')) {	369 (input_[cur_ + 1] == '/' \|\| input_[cur_ + 1] == '*')) {

356 // Different types of comments.	370 // Different types of comments.

357 help = "Comments should start with # instead";	371 help = "Comments should start with # instead";

358 } else {	372 } else {

359 help = "I have no idea what this is.";	373 help = "I have no idea what this is.";

360 }	374 }

361	375

362 return Err(location, "Invalid token.", help);	376 return Err(location, "Invalid token.", help);

363 }	377 }

OLD	NEW

« no previous file with comments | « tools/gn/tokenizer.h ('k') | tools/gn/tokenizer_unittest.cc » ('j') | no next file with comments »