OLD | NEW |
---|---|
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "tools/gn/tokenizer.h" | 5 #include "tools/gn/tokenizer.h" |
6 | 6 |
7 #include "base/logging.h" | 7 #include "base/logging.h" |
8 #include "base/strings/string_util.h" | |
8 #include "tools/gn/input_file.h" | 9 #include "tools/gn/input_file.h" |
9 | 10 |
10 namespace { | 11 namespace { |
11 | 12 |
12 bool CouldBeTwoCharOperatorBegin(char c) { | 13 bool CouldBeTwoCharOperatorBegin(char c) { |
13 return c == '<' || c == '>' || c == '!' || c == '=' || c == '-' || | 14 return c == '<' || c == '>' || c == '!' || c == '=' || c == '-' || |
14 c == '+' || c == '|' || c == '&'; | 15 c == '+' || c == '|' || c == '&'; |
15 } | 16 } |
16 | 17 |
17 bool CouldBeTwoCharOperatorEnd(char c) { | 18 bool CouldBeTwoCharOperatorEnd(char c) { |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
100 } | 101 } |
101 size_t token_begin = cur_; | 102 size_t token_begin = cur_; |
102 AdvanceToEndOfToken(location, type); | 103 AdvanceToEndOfToken(location, type); |
103 if (has_error()) | 104 if (has_error()) |
104 break; | 105 break; |
105 size_t token_end = cur_; | 106 size_t token_end = cur_; |
106 | 107 |
107 base::StringPiece token_value(&input_.data()[token_begin], | 108 base::StringPiece token_value(&input_.data()[token_begin], |
108 token_end - token_begin); | 109 token_end - token_begin); |
109 | 110 |
110 if (type == Token::UNCLASSIFIED_OPERATOR) | 111 if (type == Token::UNCLASSIFIED_OPERATOR) { |
111 type = GetSpecificOperatorType(token_value); | 112 type = GetSpecificOperatorType(token_value); |
112 if (type == Token::IDENTIFIER) { | 113 } else if (type == Token::IDENTIFIER) { |
113 if (token_value == "if") | 114 if (token_value == "if") |
114 type = Token::IF; | 115 type = Token::IF; |
115 else if (token_value == "else") | 116 else if (token_value == "else") |
116 type = Token::ELSE; | 117 type = Token::ELSE; |
117 else if (token_value == "true") | 118 else if (token_value == "true") |
118 type = Token::TRUE_TOKEN; | 119 type = Token::TRUE_TOKEN; |
119 else if (token_value == "false") | 120 else if (token_value == "false") |
120 type = Token::FALSE_TOKEN; | 121 type = Token::FALSE_TOKEN; |
122 } else if (type == Token::UNCLASSIFIED_COMMENT) { | |
123 // Find back to the previous \n, and trim. If it's only whitespace, then | |
124 // this is on a line alone, otherwise it's a suffix comment. | |
125 size_t newline_location = input_.find_last_of('\n', token_begin); | |
126 base::StringPiece to_newline = input_.substr( | |
127 newline_location + 1, token_begin - (newline_location + 1)); | |
128 std::string trimmed; | |
129 // TODO(scottmg): Should write TrimWhitespace for StringPiece. | |
130 base::TrimWhitespace(to_newline.as_string(), base::TRIM_ALL, &trimmed); | |
brettw
2014/09/23 21:33:15
I'd like to resolve this if possible, the tokenize
scottmg
2014/09/23 22:15:37
Done. (as special function, that makes more sense,
| |
131 if (trimmed.empty()) | |
132 type = Token::LINE_COMMENT; | |
133 else | |
134 type = Token::SUFFIX_COMMENT; | |
121 } | 135 } |
122 | 136 |
123 // TODO(brettw) This just strips comments from the token stream. This | 137 tokens_.push_back(Token(location, type, token_value)); |
124 // is probably wrong, they should be removed at a later stage so we can | |
125 // do things like rewrite the file. But this makes the parser simpler and | |
126 // is OK for now. | |
127 if (type != Token::COMMENT) | |
128 tokens_.push_back(Token(location, type, token_value)); | |
129 } | 138 } |
130 if (err_->has_error()) | 139 if (err_->has_error()) |
131 tokens_.clear(); | 140 tokens_.clear(); |
132 return tokens_; | 141 return tokens_; |
133 } | 142 } |
134 | 143 |
135 // static | 144 // static |
136 size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) { | 145 size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) { |
137 DCHECK_GT(n, 0); | 146 DCHECK_GT(n, 0); |
138 | 147 |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
192 return Token::LEFT_BRACE; | 201 return Token::LEFT_BRACE; |
193 if (next_char == '}') | 202 if (next_char == '}') |
194 return Token::RIGHT_BRACE; | 203 return Token::RIGHT_BRACE; |
195 | 204 |
196 if (next_char == '.') | 205 if (next_char == '.') |
197 return Token::DOT; | 206 return Token::DOT; |
198 if (next_char == ',') | 207 if (next_char == ',') |
199 return Token::COMMA; | 208 return Token::COMMA; |
200 | 209 |
201 if (next_char == '#') | 210 if (next_char == '#') |
202 return Token::COMMENT; | 211 return Token::UNCLASSIFIED_COMMENT; |
203 | 212 |
204 // For the case of '-' differentiate between a negative number and anything | 213 // For the case of '-' differentiate between a negative number and anything |
205 // else. | 214 // else. |
206 if (next_char == '-') { | 215 if (next_char == '-') { |
207 if (!CanIncrement()) | 216 if (!CanIncrement()) |
208 return Token::UNCLASSIFIED_OPERATOR; // Just the minus before end of | 217 return Token::UNCLASSIFIED_OPERATOR; // Just the minus before end of |
209 // file. | 218 // file. |
210 char following_char = input_[cur_ + 1]; | 219 char following_char = input_[cur_ + 1]; |
211 if (IsAsciiDigit(following_char)) | 220 if (IsAsciiDigit(following_char)) |
212 return Token::INTEGER; | 221 return Token::INTEGER; |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
278 case Token::RIGHT_BRACKET: | 287 case Token::RIGHT_BRACKET: |
279 case Token::LEFT_BRACE: | 288 case Token::LEFT_BRACE: |
280 case Token::RIGHT_BRACE: | 289 case Token::RIGHT_BRACE: |
281 case Token::LEFT_PAREN: | 290 case Token::LEFT_PAREN: |
282 case Token::RIGHT_PAREN: | 291 case Token::RIGHT_PAREN: |
283 case Token::DOT: | 292 case Token::DOT: |
284 case Token::COMMA: | 293 case Token::COMMA: |
285 Advance(); // All are one char. | 294 Advance(); // All are one char. |
286 break; | 295 break; |
287 | 296 |
288 case Token::COMMENT: | 297 case Token::UNCLASSIFIED_COMMENT: |
289 // Eat to EOL. | 298 // Eat to EOL. |
290 while (!at_end() && !IsCurrentNewline()) | 299 while (!at_end() && !IsCurrentNewline()) |
291 Advance(); | 300 Advance(); |
292 break; | 301 break; |
293 | 302 |
294 case Token::INVALID: | 303 case Token::INVALID: |
295 default: | 304 default: |
296 *err_ = Err(location, "Everything is all messed up", | 305 *err_ = Err(location, "Everything is all messed up", |
297 "Please insert system disk in drive A: and press any key."); | 306 "Please insert system disk in drive A: and press any key."); |
298 NOTREACHED(); | 307 NOTREACHED(); |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
332 if (IsCurrentNewline()) { | 341 if (IsCurrentNewline()) { |
333 line_number_++; | 342 line_number_++; |
334 char_in_line_ = 1; | 343 char_in_line_ = 1; |
335 } else { | 344 } else { |
336 char_in_line_++; | 345 char_in_line_++; |
337 } | 346 } |
338 cur_++; | 347 cur_++; |
339 } | 348 } |
340 | 349 |
341 Location Tokenizer::GetCurrentLocation() const { | 350 Location Tokenizer::GetCurrentLocation() const { |
342 return Location(input_file_, line_number_, char_in_line_); | 351 return Location(input_file_, line_number_, char_in_line_, cur_); |
343 } | 352 } |
344 | 353 |
345 Err Tokenizer::GetErrorForInvalidToken(const Location& location) const { | 354 Err Tokenizer::GetErrorForInvalidToken(const Location& location) const { |
346 std::string help; | 355 std::string help; |
347 if (cur_char() == ';') { | 356 if (cur_char() == ';') { |
348 // Semicolon. | 357 // Semicolon. |
349 help = "Semicolons are not needed, delete this one."; | 358 help = "Semicolons are not needed, delete this one."; |
350 } else if (cur_char() == '\t') { | 359 } else if (cur_char() == '\t') { |
351 // Tab. | 360 // Tab. |
352 help = "You got a tab character in here. Tabs are evil. " | 361 help = "You got a tab character in here. Tabs are evil. " |
353 "Convert to spaces."; | 362 "Convert to spaces."; |
354 } else if (cur_char() == '/' && cur_ + 1 < input_.size() && | 363 } else if (cur_char() == '/' && cur_ + 1 < input_.size() && |
355 (input_[cur_ + 1] == '/' || input_[cur_ + 1] == '*')) { | 364 (input_[cur_ + 1] == '/' || input_[cur_ + 1] == '*')) { |
356 // Different types of comments. | 365 // Different types of comments. |
357 help = "Comments should start with # instead"; | 366 help = "Comments should start with # instead"; |
358 } else { | 367 } else { |
359 help = "I have no idea what this is."; | 368 help = "I have no idea what this is."; |
360 } | 369 } |
361 | 370 |
362 return Err(location, "Invalid token.", help); | 371 return Err(location, "Invalid token.", help); |
363 } | 372 } |
OLD | NEW |