Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(89)

Side by Side Diff: tools/gn/tokenizer.cc

Issue 588893006: gn: attach comments to parse tree (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: x64 Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/gn/tokenizer.h ('k') | tools/gn/tokenizer_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "tools/gn/tokenizer.h" 5 #include "tools/gn/tokenizer.h"
6 6
7 #include "base/logging.h" 7 #include "base/logging.h"
8 #include "base/strings/string_util.h"
8 #include "tools/gn/input_file.h" 9 #include "tools/gn/input_file.h"
9 10
10 namespace { 11 namespace {
11 12
12 bool CouldBeTwoCharOperatorBegin(char c) { 13 bool CouldBeTwoCharOperatorBegin(char c) {
13 return c == '<' || c == '>' || c == '!' || c == '=' || c == '-' || 14 return c == '<' || c == '>' || c == '!' || c == '=' || c == '-' ||
14 c == '+' || c == '|' || c == '&'; 15 c == '+' || c == '|' || c == '&';
15 } 16 }
16 17
17 bool CouldBeTwoCharOperatorEnd(char c) { 18 bool CouldBeTwoCharOperatorEnd(char c) {
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
100 } 101 }
101 size_t token_begin = cur_; 102 size_t token_begin = cur_;
102 AdvanceToEndOfToken(location, type); 103 AdvanceToEndOfToken(location, type);
103 if (has_error()) 104 if (has_error())
104 break; 105 break;
105 size_t token_end = cur_; 106 size_t token_end = cur_;
106 107
107 base::StringPiece token_value(&input_.data()[token_begin], 108 base::StringPiece token_value(&input_.data()[token_begin],
108 token_end - token_begin); 109 token_end - token_begin);
109 110
110 if (type == Token::UNCLASSIFIED_OPERATOR) 111 if (type == Token::UNCLASSIFIED_OPERATOR) {
111 type = GetSpecificOperatorType(token_value); 112 type = GetSpecificOperatorType(token_value);
112 if (type == Token::IDENTIFIER) { 113 } else if (type == Token::IDENTIFIER) {
113 if (token_value == "if") 114 if (token_value == "if")
114 type = Token::IF; 115 type = Token::IF;
115 else if (token_value == "else") 116 else if (token_value == "else")
116 type = Token::ELSE; 117 type = Token::ELSE;
117 else if (token_value == "true") 118 else if (token_value == "true")
118 type = Token::TRUE_TOKEN; 119 type = Token::TRUE_TOKEN;
119 else if (token_value == "false") 120 else if (token_value == "false")
120 type = Token::FALSE_TOKEN; 121 type = Token::FALSE_TOKEN;
122 } else if (type == Token::UNCLASSIFIED_COMMENT) {
123 if (AtStartOfLine(token_begin))
124 type = Token::LINE_COMMENT;
125 else
126 type = Token::SUFFIX_COMMENT;
121 } 127 }
122 128
123 // TODO(brettw) This just strips comments from the token stream. This 129 tokens_.push_back(Token(location, type, token_value));
124 // is probably wrong, they should be removed at a later stage so we can
125 // do things like rewrite the file. But this makes the parser simpler and
126 // is OK for now.
127 if (type != Token::COMMENT)
128 tokens_.push_back(Token(location, type, token_value));
129 } 130 }
130 if (err_->has_error()) 131 if (err_->has_error())
131 tokens_.clear(); 132 tokens_.clear();
132 return tokens_; 133 return tokens_;
133 } 134 }
134 135
135 // static 136 // static
136 size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) { 137 size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) {
137 DCHECK_GT(n, 0); 138 DCHECK_GT(n, 0);
138 139
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
192 return Token::LEFT_BRACE; 193 return Token::LEFT_BRACE;
193 if (next_char == '}') 194 if (next_char == '}')
194 return Token::RIGHT_BRACE; 195 return Token::RIGHT_BRACE;
195 196
196 if (next_char == '.') 197 if (next_char == '.')
197 return Token::DOT; 198 return Token::DOT;
198 if (next_char == ',') 199 if (next_char == ',')
199 return Token::COMMA; 200 return Token::COMMA;
200 201
201 if (next_char == '#') 202 if (next_char == '#')
202 return Token::COMMENT; 203 return Token::UNCLASSIFIED_COMMENT;
203 204
204 // For the case of '-' differentiate between a negative number and anything 205 // For the case of '-' differentiate between a negative number and anything
205 // else. 206 // else.
206 if (next_char == '-') { 207 if (next_char == '-') {
207 if (!CanIncrement()) 208 if (!CanIncrement())
208 return Token::UNCLASSIFIED_OPERATOR; // Just the minus before end of 209 return Token::UNCLASSIFIED_OPERATOR; // Just the minus before end of
209 // file. 210 // file.
210 char following_char = input_[cur_ + 1]; 211 char following_char = input_[cur_ + 1];
211 if (IsAsciiDigit(following_char)) 212 if (IsAsciiDigit(following_char))
212 return Token::INTEGER; 213 return Token::INTEGER;
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
278 case Token::RIGHT_BRACKET: 279 case Token::RIGHT_BRACKET:
279 case Token::LEFT_BRACE: 280 case Token::LEFT_BRACE:
280 case Token::RIGHT_BRACE: 281 case Token::RIGHT_BRACE:
281 case Token::LEFT_PAREN: 282 case Token::LEFT_PAREN:
282 case Token::RIGHT_PAREN: 283 case Token::RIGHT_PAREN:
283 case Token::DOT: 284 case Token::DOT:
284 case Token::COMMA: 285 case Token::COMMA:
285 Advance(); // All are one char. 286 Advance(); // All are one char.
286 break; 287 break;
287 288
288 case Token::COMMENT: 289 case Token::UNCLASSIFIED_COMMENT:
289 // Eat to EOL. 290 // Eat to EOL.
290 while (!at_end() && !IsCurrentNewline()) 291 while (!at_end() && !IsCurrentNewline())
291 Advance(); 292 Advance();
292 break; 293 break;
293 294
294 case Token::INVALID: 295 case Token::INVALID:
295 default: 296 default:
296 *err_ = Err(location, "Everything is all messed up", 297 *err_ = Err(location, "Everything is all messed up",
297 "Please insert system disk in drive A: and press any key."); 298 "Please insert system disk in drive A: and press any key.");
298 NOTREACHED(); 299 NOTREACHED();
299 return; 300 return;
300 } 301 }
301 } 302 }
302 303
304 bool Tokenizer::AtStartOfLine(size_t location) const {
305 while (location > 0) {
306 --location;
307 char c = input_[location];
308 if (c == '\n')
309 return true;
310 if (c != ' ')
311 return false;
312 }
313 return true;
314 }
315
303 bool Tokenizer::IsCurrentWhitespace() const { 316 bool Tokenizer::IsCurrentWhitespace() const {
304 DCHECK(!at_end()); 317 DCHECK(!at_end());
305 char c = input_[cur_]; 318 char c = input_[cur_];
306 // Note that tab (0x09) is illegal. 319 // Note that tab (0x09), vertical tab (0x0B), and formfeed (0x0C) are illegal.
307 return c == 0x0A || c == 0x0B || c == 0x0C || c == 0x0D || c == 0x20; 320 return c == 0x0A || c == 0x0D || c == 0x20;
308 } 321 }
309 322
310 bool Tokenizer::IsCurrentStringTerminator(char quote_char) const { 323 bool Tokenizer::IsCurrentStringTerminator(char quote_char) const {
311 DCHECK(!at_end()); 324 DCHECK(!at_end());
312 if (cur_char() != quote_char) 325 if (cur_char() != quote_char)
313 return false; 326 return false;
314 327
315 // Check for escaping. \" is not a string terminator, but \\" is. Count 328 // Check for escaping. \" is not a string terminator, but \\" is. Count
316 // the number of preceeding backslashes. 329 // the number of preceeding backslashes.
317 int num_backslashes = 0; 330 int num_backslashes = 0;
(...skipping 14 matching lines...) Expand all
332 if (IsCurrentNewline()) { 345 if (IsCurrentNewline()) {
333 line_number_++; 346 line_number_++;
334 char_in_line_ = 1; 347 char_in_line_ = 1;
335 } else { 348 } else {
336 char_in_line_++; 349 char_in_line_++;
337 } 350 }
338 cur_++; 351 cur_++;
339 } 352 }
340 353
341 Location Tokenizer::GetCurrentLocation() const { 354 Location Tokenizer::GetCurrentLocation() const {
342 return Location(input_file_, line_number_, char_in_line_); 355 return Location(
356 input_file_, line_number_, char_in_line_, static_cast<int>(cur_));
343 } 357 }
344 358
345 Err Tokenizer::GetErrorForInvalidToken(const Location& location) const { 359 Err Tokenizer::GetErrorForInvalidToken(const Location& location) const {
346 std::string help; 360 std::string help;
347 if (cur_char() == ';') { 361 if (cur_char() == ';') {
348 // Semicolon. 362 // Semicolon.
349 help = "Semicolons are not needed, delete this one."; 363 help = "Semicolons are not needed, delete this one.";
350 } else if (cur_char() == '\t') { 364 } else if (cur_char() == '\t') {
351 // Tab. 365 // Tab.
352 help = "You got a tab character in here. Tabs are evil. " 366 help = "You got a tab character in here. Tabs are evil. "
353 "Convert to spaces."; 367 "Convert to spaces.";
354 } else if (cur_char() == '/' && cur_ + 1 < input_.size() && 368 } else if (cur_char() == '/' && cur_ + 1 < input_.size() &&
355 (input_[cur_ + 1] == '/' || input_[cur_ + 1] == '*')) { 369 (input_[cur_ + 1] == '/' || input_[cur_ + 1] == '*')) {
356 // Different types of comments. 370 // Different types of comments.
357 help = "Comments should start with # instead"; 371 help = "Comments should start with # instead";
358 } else { 372 } else {
359 help = "I have no idea what this is."; 373 help = "I have no idea what this is.";
360 } 374 }
361 375
362 return Err(location, "Invalid token.", help); 376 return Err(location, "Invalid token.", help);
363 } 377 }
OLDNEW
« no previous file with comments | « tools/gn/tokenizer.h ('k') | tools/gn/tokenizer_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698