Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(243)

Side by Side Diff: tools/gn/tokenizer.cc

Issue 588893006: gn: attach comments to parse tree (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: suffix comments too Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "tools/gn/tokenizer.h" 5 #include "tools/gn/tokenizer.h"
6 6
7 #include "base/logging.h" 7 #include "base/logging.h"
8 #include "base/strings/string_util.h"
8 #include "tools/gn/input_file.h" 9 #include "tools/gn/input_file.h"
9 10
10 namespace { 11 namespace {
11 12
12 bool CouldBeTwoCharOperatorBegin(char c) { 13 bool CouldBeTwoCharOperatorBegin(char c) {
13 return c == '<' || c == '>' || c == '!' || c == '=' || c == '-' || 14 return c == '<' || c == '>' || c == '!' || c == '=' || c == '-' ||
14 c == '+' || c == '|' || c == '&'; 15 c == '+' || c == '|' || c == '&';
15 } 16 }
16 17
17 bool CouldBeTwoCharOperatorEnd(char c) { 18 bool CouldBeTwoCharOperatorEnd(char c) {
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
100 } 101 }
101 size_t token_begin = cur_; 102 size_t token_begin = cur_;
102 AdvanceToEndOfToken(location, type); 103 AdvanceToEndOfToken(location, type);
103 if (has_error()) 104 if (has_error())
104 break; 105 break;
105 size_t token_end = cur_; 106 size_t token_end = cur_;
106 107
107 base::StringPiece token_value(&input_.data()[token_begin], 108 base::StringPiece token_value(&input_.data()[token_begin],
108 token_end - token_begin); 109 token_end - token_begin);
109 110
110 if (type == Token::UNCLASSIFIED_OPERATOR) 111 if (type == Token::UNCLASSIFIED_OPERATOR) {
111 type = GetSpecificOperatorType(token_value); 112 type = GetSpecificOperatorType(token_value);
112 if (type == Token::IDENTIFIER) { 113 } else if (type == Token::IDENTIFIER) {
113 if (token_value == "if") 114 if (token_value == "if")
114 type = Token::IF; 115 type = Token::IF;
115 else if (token_value == "else") 116 else if (token_value == "else")
116 type = Token::ELSE; 117 type = Token::ELSE;
117 else if (token_value == "true") 118 else if (token_value == "true")
118 type = Token::TRUE_TOKEN; 119 type = Token::TRUE_TOKEN;
119 else if (token_value == "false") 120 else if (token_value == "false")
120 type = Token::FALSE_TOKEN; 121 type = Token::FALSE_TOKEN;
122 } else if (type == Token::UNCLASSIFIED_COMMENT) {
123 // Find back to the previous \n, and trim. If it's only whitespace, then
124 // this is on a line alone, otherwise it's a suffix comment.
125 size_t newline_location = input_.find_last_of('\n', token_begin);
126 base::StringPiece to_newline = input_.substr(
127 newline_location + 1, token_begin - (newline_location + 1));
128 std::string trimmed;
129 // TODO(scottmg): Should write TrimWhitespace for StringPiece.
130 base::TrimWhitespace(to_newline.as_string(), base::TRIM_ALL, &trimmed);
brettw 2014/09/23 21:33:15 I'd like to resolve this if possible, the tokenize
scottmg 2014/09/23 22:15:37 Done. (as special function, that makes more sense,
131 if (trimmed.empty())
132 type = Token::LINE_COMMENT;
133 else
134 type = Token::SUFFIX_COMMENT;
121 } 135 }
122 136
123 // TODO(brettw) This just strips comments from the token stream. This 137 tokens_.push_back(Token(location, type, token_value));
124 // is probably wrong, they should be removed at a later stage so we can
125 // do things like rewrite the file. But this makes the parser simpler and
126 // is OK for now.
127 if (type != Token::COMMENT)
128 tokens_.push_back(Token(location, type, token_value));
129 } 138 }
130 if (err_->has_error()) 139 if (err_->has_error())
131 tokens_.clear(); 140 tokens_.clear();
132 return tokens_; 141 return tokens_;
133 } 142 }
134 143
135 // static 144 // static
136 size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) { 145 size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) {
137 DCHECK_GT(n, 0); 146 DCHECK_GT(n, 0);
138 147
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
192 return Token::LEFT_BRACE; 201 return Token::LEFT_BRACE;
193 if (next_char == '}') 202 if (next_char == '}')
194 return Token::RIGHT_BRACE; 203 return Token::RIGHT_BRACE;
195 204
196 if (next_char == '.') 205 if (next_char == '.')
197 return Token::DOT; 206 return Token::DOT;
198 if (next_char == ',') 207 if (next_char == ',')
199 return Token::COMMA; 208 return Token::COMMA;
200 209
201 if (next_char == '#') 210 if (next_char == '#')
202 return Token::COMMENT; 211 return Token::UNCLASSIFIED_COMMENT;
203 212
204 // For the case of '-' differentiate between a negative number and anything 213 // For the case of '-' differentiate between a negative number and anything
205 // else. 214 // else.
206 if (next_char == '-') { 215 if (next_char == '-') {
207 if (!CanIncrement()) 216 if (!CanIncrement())
208 return Token::UNCLASSIFIED_OPERATOR; // Just the minus before end of 217 return Token::UNCLASSIFIED_OPERATOR; // Just the minus before end of
209 // file. 218 // file.
210 char following_char = input_[cur_ + 1]; 219 char following_char = input_[cur_ + 1];
211 if (IsAsciiDigit(following_char)) 220 if (IsAsciiDigit(following_char))
212 return Token::INTEGER; 221 return Token::INTEGER;
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
278 case Token::RIGHT_BRACKET: 287 case Token::RIGHT_BRACKET:
279 case Token::LEFT_BRACE: 288 case Token::LEFT_BRACE:
280 case Token::RIGHT_BRACE: 289 case Token::RIGHT_BRACE:
281 case Token::LEFT_PAREN: 290 case Token::LEFT_PAREN:
282 case Token::RIGHT_PAREN: 291 case Token::RIGHT_PAREN:
283 case Token::DOT: 292 case Token::DOT:
284 case Token::COMMA: 293 case Token::COMMA:
285 Advance(); // All are one char. 294 Advance(); // All are one char.
286 break; 295 break;
287 296
288 case Token::COMMENT: 297 case Token::UNCLASSIFIED_COMMENT:
289 // Eat to EOL. 298 // Eat to EOL.
290 while (!at_end() && !IsCurrentNewline()) 299 while (!at_end() && !IsCurrentNewline())
291 Advance(); 300 Advance();
292 break; 301 break;
293 302
294 case Token::INVALID: 303 case Token::INVALID:
295 default: 304 default:
296 *err_ = Err(location, "Everything is all messed up", 305 *err_ = Err(location, "Everything is all messed up",
297 "Please insert system disk in drive A: and press any key."); 306 "Please insert system disk in drive A: and press any key.");
298 NOTREACHED(); 307 NOTREACHED();
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
332 if (IsCurrentNewline()) { 341 if (IsCurrentNewline()) {
333 line_number_++; 342 line_number_++;
334 char_in_line_ = 1; 343 char_in_line_ = 1;
335 } else { 344 } else {
336 char_in_line_++; 345 char_in_line_++;
337 } 346 }
338 cur_++; 347 cur_++;
339 } 348 }
340 349
341 Location Tokenizer::GetCurrentLocation() const { 350 Location Tokenizer::GetCurrentLocation() const {
342 return Location(input_file_, line_number_, char_in_line_); 351 return Location(input_file_, line_number_, char_in_line_, cur_);
343 } 352 }
344 353
345 Err Tokenizer::GetErrorForInvalidToken(const Location& location) const { 354 Err Tokenizer::GetErrorForInvalidToken(const Location& location) const {
346 std::string help; 355 std::string help;
347 if (cur_char() == ';') { 356 if (cur_char() == ';') {
348 // Semicolon. 357 // Semicolon.
349 help = "Semicolons are not needed, delete this one."; 358 help = "Semicolons are not needed, delete this one.";
350 } else if (cur_char() == '\t') { 359 } else if (cur_char() == '\t') {
351 // Tab. 360 // Tab.
352 help = "You got a tab character in here. Tabs are evil. " 361 help = "You got a tab character in here. Tabs are evil. "
353 "Convert to spaces."; 362 "Convert to spaces.";
354 } else if (cur_char() == '/' && cur_ + 1 < input_.size() && 363 } else if (cur_char() == '/' && cur_ + 1 < input_.size() &&
355 (input_[cur_ + 1] == '/' || input_[cur_ + 1] == '*')) { 364 (input_[cur_ + 1] == '/' || input_[cur_ + 1] == '*')) {
356 // Different types of comments. 365 // Different types of comments.
357 help = "Comments should start with # instead"; 366 help = "Comments should start with # instead";
358 } else { 367 } else {
359 help = "I have no idea what this is."; 368 help = "I have no idea what this is.";
360 } 369 }
361 370
362 return Err(location, "Invalid token.", help); 371 return Err(location, "Invalid token.", help);
363 } 372 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698