Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(10)

Side by Side Diff: tools/gn/tokenizer.cc

Issue 21114002: Add initial prototype for the GN meta-buildsystem. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: add owners and readme Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/gn/tokenizer.h ('k') | tools/gn/tokenizer_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "tools/gn/tokenizer.h"
6
7 #include "base/logging.h"
8 #include "tools/gn/input_file.h"
9
10 namespace {
11
12 bool IsNumberChar(char c) {
13 return c == '-' || (c >= '0' && c <= '9');
14 }
15
16 bool CouldBeTwoCharOperatorBegin(char c) {
17 return c == '<' || c == '>' || c == '!' || c == '=' || c == '-' ||
18 c == '+' || c == '|' || c == '&';
19 }
20
21 bool CouldBeTwoCharOperatorEnd(char c) {
22 return c == '=' || c == '|' || c == '&';
23 }
24
25 bool CouldBeOneCharOperator(char c) {
26 return c == '=' || c == '<' || c == '>' || c == '+' || c == '!' ||
27 c == ':' || c == '|' || c == '&' || c == '-';
28 }
29
30 bool CouldBeOperator(char c) {
31 return CouldBeOneCharOperator(c) || CouldBeTwoCharOperatorBegin(c);
32 }
33
34 bool IsSeparatorChar(char c) {
35 return c == ',';
36 }
37
38 bool IsScoperChar(char c) {
39 return c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}';
40 }
41
42 } // namespace
43
44 Tokenizer::Tokenizer(const InputFile* input_file, Err* err)
45 : input_file_(input_file),
46 input_(input_file->contents()),
47 err_(err),
48 cur_(0),
49 line_number_(1),
50 char_in_line_(1) {
51 }
52
53 Tokenizer::~Tokenizer() {
54 }
55
56 // static
57 std::vector<Token> Tokenizer::Tokenize(const InputFile* input_file, Err* err) {
58 Tokenizer t(input_file, err);
59 return t.Run();
60 }
61
62 std::vector<Token> Tokenizer::Run() {
63 std::vector<Token> tokens;
64 while (!done()) {
65 AdvanceToNextToken();
66 if (done())
67 break;
68 Location location = GetCurrentLocation();
69
70 Token::Type type = ClassifyCurrent();
71 if (type == Token::INVALID) {
72 *err_ = GetErrorForInvalidToken(location);
73 break;
74 }
75 size_t token_begin = cur_;
76 AdvanceToEndOfToken(location, type);
77 if (has_error())
78 break;
79 size_t token_end = cur_;
80
81 // TODO(brettw) This just strips comments from the token stream. This
82 // is probably wrong, they should be removed at a later stage so we can
83 // do things like rewrite the file. But this makes the parser simpler and
84 // is OK for now.
85 if (type != Token::COMMENT) {
86 tokens.push_back(Token(
87 location,
88 type,
89 base::StringPiece(&input_.data()[token_begin],
90 token_end - token_begin)));
91 }
92 }
93 if (err_->has_error())
94 tokens.clear();
95 return tokens;
96 }
97
98 // static
99 size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) {
100 int cur_line = 1;
101 size_t cur_byte = 0;
102
103 DCHECK(n > 0);
104
105 if (n == 1)
106 return 0;
107
108 while (cur_byte < buf.size()) {
109 if (IsNewline(buf, cur_byte)) {
110 cur_line++;
111 if (cur_line == n)
112 return cur_byte + 1;
113 }
114 cur_byte++;
115 }
116 return -1;
117 }
118
119 // static
120 bool Tokenizer::IsNewline(const base::StringPiece& buffer, size_t offset) {
121 DCHECK(offset < buffer.size());
122 // We may need more logic here to handle different line ending styles.
123 return buffer[offset] == '\n';
124 }
125
126
127 void Tokenizer::AdvanceToNextToken() {
128 while (!at_end() && IsCurrentWhitespace())
129 Advance();
130 }
131
132 Token::Type Tokenizer::ClassifyCurrent() const {
133 DCHECK(!at_end());
134 char next_char = cur_char();
135 if (next_char >= '0' && next_char <= '9')
136 return Token::INTEGER;
137 if (next_char == '"')
138 return Token::STRING;
139
140 // Note: '-' handled specially below.
141 if (next_char != '-' && CouldBeOperator(next_char))
142 return Token::OPERATOR;
143
144 if (IsIdentifierFirstChar(next_char))
145 return Token::IDENTIFIER;
146
147 if (IsScoperChar(next_char))
148 return Token::SCOPER;
149
150 if (IsSeparatorChar(next_char))
151 return Token::SEPARATOR;
152
153 if (next_char == '#')
154 return Token::COMMENT;
155
156 // For the case of '-' differentiate between a negative number and anything
157 // else.
158 if (next_char == '-') {
159 if (!CanIncrement())
160 return Token::OPERATOR; // Just the minus before end of file.
161 char following_char = input_[cur_ + 1];
162 if (following_char >= '0' && following_char <= '9')
163 return Token::INTEGER;
164 return Token::OPERATOR;
165 }
166
167 return Token::INVALID;
168 }
169
170 void Tokenizer::AdvanceToEndOfToken(const Location& location,
171 Token::Type type) {
172 switch (type) {
173 case Token::INTEGER:
174 do {
175 Advance();
176 } while (!at_end() && IsNumberChar(cur_char()));
177 if (!at_end()) {
178 // Require the char after a number to be some kind of space, scope,
179 // or operator.
180 char c = cur_char();
181 if (!IsCurrentWhitespace() && !CouldBeOperator(c) &&
182 !IsScoperChar(c) && !IsSeparatorChar(c)) {
183 *err_ = Err(GetCurrentLocation(),
184 "This is not a valid number.",
185 "Learn to count.");
186 // Highlight the number.
187 err_->AppendRange(LocationRange(location, GetCurrentLocation()));
188 }
189 }
190 break;
191
192 case Token::STRING: {
193 char initial = cur_char();
194 Advance(); // Advance past initial "
195 for (;;) {
196 if (at_end()) {
197 *err_ = Err(LocationRange(location,
198 Location(input_file_, line_number_, char_in_line_)),
199 "Unterminated string literal.",
200 "Don't leave me hanging like this!");
201 break;
202 }
203 if (IsCurrentStringTerminator(initial)) {
204 Advance(); // Skip past last "
205 break;
206 } else if (cur_char() == '\n') {
207 *err_ = Err(LocationRange(location,
208 GetCurrentLocation()),
209 "Newline in string constant.");
210 }
211 Advance();
212 }
213 break;
214 }
215
216 case Token::OPERATOR:
217 // Some operators are two characters, some are one.
218 if (CouldBeTwoCharOperatorBegin(cur_char())) {
219 if (CanIncrement() && CouldBeTwoCharOperatorEnd(input_[cur_ + 1]))
220 Advance();
221 }
222 Advance();
223 break;
224
225 case Token::IDENTIFIER:
226 while (!at_end() && IsIdentifierContinuingChar(cur_char()))
227 Advance();
228 break;
229
230 case Token::SCOPER:
231 case Token::SEPARATOR:
232 Advance(); // All are one char.
233 break;
234
235 case Token::COMMENT:
236 // Eat to EOL.
237 while (!at_end() && !IsCurrentNewline())
238 Advance();
239 break;
240
241 case Token::INVALID:
242 *err_ = Err(location, "Everything is all messed up",
243 "Please insert system disk in drive A: and press any key.");
244 NOTREACHED();
245 return;
246 }
247 }
248
249 bool Tokenizer::IsCurrentWhitespace() const {
250 DCHECK(!at_end());
251 char c = input_[cur_];
252 // Note that tab (0x09) is illegal.
253 return c == 0x0A || c == 0x0B || c == 0x0C || c == 0x0D || c == 0x20;
254 }
255
256 bool Tokenizer::IsCurrentStringTerminator(char quote_char) const {
257 DCHECK(!at_end());
258 if (cur_char() != quote_char)
259 return false;
260
261 // Check for escaping. \" is not a string terminator, but \\" is. Count
262 // the number of preceeding backslashes.
263 int num_backslashes = 0;
264 for (int i = static_cast<int>(cur_) - 1; i >= 0 && input_[i] == '\\'; i--)
265 num_backslashes++;
266
267 // Even backslashes mean that they were escaping each other and don't count
268 // as escaping this quote.
269 return (num_backslashes % 2) == 0;
270 }
271
272 bool Tokenizer::IsCurrentNewline() const {
273 return IsNewline(input_, cur_);
274 }
275
276 void Tokenizer::Advance() {
277 DCHECK(cur_ < input_.size());
278 if (IsCurrentNewline()) {
279 line_number_++;
280 char_in_line_ = 1;
281 } else {
282 char_in_line_++;
283 }
284 cur_++;
285 }
286
287 Location Tokenizer::GetCurrentLocation() const {
288 return Location(input_file_, line_number_, char_in_line_);
289 }
290
291 Err Tokenizer::GetErrorForInvalidToken(const Location& location) const {
292 std::string help;
293 if (cur_char() == ';') {
294 // Semicolon.
295 help = "Semicolons are not needed, delete this one.";
296 } else if (cur_char() == '\t') {
297 // Tab.
298 help = "You got a tab character in here. Tabs are evil. "
299 "Convert to spaces.";
300 } else if (cur_char() == '/' && cur_ + 1 < input_.size() &&
301 (input_[cur_ + 1] == '/' || input_[cur_ + 1] == '*')) {
302 // Different types of comments.
303 help = "Comments should start with # instead";
304 } else {
305 help = "I have no idea what this is.";
306 }
307
308 return Err(location, "Invalid token.", help);
309 }
OLDNEW
« no previous file with comments | « tools/gn/tokenizer.h ('k') | tools/gn/tokenizer_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698