trunk/src/tools/gn/tokenizer.cc - Issue 21084010: Revert 214254 "Add initial prototype for the GN meta-buildsystem."

Side by Side Diff: trunk/src/tools/gn/tokenizer.cc

Issue 21084010: Revert 214254 "Add initial prototype for the GN meta-buildsystem." (Closed) Base URL: svn://svn.chromium.org/chrome/

Patch Set: Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "tools/gn/tokenizer.h"

6

7 #include "base/logging.h"

8 #include "tools/gn/input_file.h"

9

10 namespace {

11

12 bool IsNumberChar(char c) {

13 return c == '-' \|\| (c >= '0' && c <= '9');

14 }

15

16 bool CouldBeTwoCharOperatorBegin(char c) {

17 return c == '<' \|\| c == '>' \|\| c == '!' \|\| c == '=' \|\| c == '-' \|\|

18 c == '+' \|\| c == '\|' \|\| c == '&';

19 }

20

21 bool CouldBeTwoCharOperatorEnd(char c) {

22 return c == '=' \|\| c == '\|' \|\| c == '&';

23 }

24

25 bool CouldBeOneCharOperator(char c) {

26 return c == '=' \|\| c == '<' \|\| c == '>' \|\| c == '+' \|\| c == '!' \|\|

27 c == ':' \|\| c == '\|' \|\| c == '&' \|\| c == '-';

28 }

29

30 bool CouldBeOperator(char c) {

31 return CouldBeOneCharOperator(c) \|\| CouldBeTwoCharOperatorBegin(c);

32 }

33

34 bool IsSeparatorChar(char c) {

35 return c == ',';

36 }

37

38 bool IsScoperChar(char c) {

39 return c == '(' \|\| c == ')' \|\| c == '[' \|\| c == ']' \|\| c == '{' \|\| c == '}';

40 }

41

42 } // namespace

43

44 Tokenizer::Tokenizer(const InputFile* input_file, Err* err)

45 : input_file_(input_file),

46 input_(input_file->contents()),

47 err_(err),

48 cur_(0),

49 line_number_(1),

50 char_in_line_(1) {

51 }

52

53 Tokenizer::~Tokenizer() {

54 }

55

56 // static

57 std::vector<Token> Tokenizer::Tokenize(const InputFile* input_file, Err* err) {

58 Tokenizer t(input_file, err);

59 return t.Run();

60 }

61

62 std::vector<Token> Tokenizer::Run() {

63 std::vector<Token> tokens;

64 while (!done()) {

65 AdvanceToNextToken();

66 if (done())

67 break;

68 Location location = GetCurrentLocation();

69

70 Token::Type type = ClassifyCurrent();

71 if (type == Token::INVALID) {

72 *err_ = GetErrorForInvalidToken(location);

73 break;

74 }

75 size_t token_begin = cur_;

76 AdvanceToEndOfToken(location, type);

77 if (has_error())

78 break;

79 size_t token_end = cur_;

80

81 // TODO(brettw) This just strips comments from the token stream. This

82 // is probably wrong, they should be removed at a later stage so we can

83 // do things like rewrite the file. But this makes the parser simpler and

84 // is OK for now.

85 if (type != Token::COMMENT) {

86 tokens.push_back(Token(

87 location,

88 type,

89 base::StringPiece(&input_.data()[token_begin],

90 token_end - token_begin)));

91 }

92 }

93 if (err_->has_error())

94 tokens.clear();

95 return tokens;

96 }

97

98 // static

99 size_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) {

100 int cur_line = 1;

101 size_t cur_byte = 0;

102

103 DCHECK(n > 0);

104

105 if (n == 1)

106 return 0;

107

108 while (cur_byte < buf.size()) {

109 if (IsNewline(buf, cur_byte)) {

110 cur_line++;

111 if (cur_line == n)

112 return cur_byte + 1;

113 }

114 cur_byte++;

115 }

116 return -1;

117 }

118

119 // static

120 bool Tokenizer::IsNewline(const base::StringPiece& buffer, size_t offset) {

121 DCHECK(offset < buffer.size());

122 // We may need more logic here to handle different line ending styles.

123 return buffer[offset] == '\n';

124 }

125

126

127 void Tokenizer::AdvanceToNextToken() {

128 while (!at_end() && IsCurrentWhitespace())

129 Advance();

130 }

131

132 Token::Type Tokenizer::ClassifyCurrent() const {

133 DCHECK(!at_end());

134 char next_char = cur_char();

135 if (next_char >= '0' && next_char <= '9')

136 return Token::INTEGER;

137 if (next_char == '"')

138 return Token::STRING;

139

140 // Note: '-' handled specially below.

141 if (next_char != '-' && CouldBeOperator(next_char))

142 return Token::OPERATOR;

143

144 if (IsIdentifierFirstChar(next_char))

145 return Token::IDENTIFIER;

146

147 if (IsScoperChar(next_char))

148 return Token::SCOPER;

149

150 if (IsSeparatorChar(next_char))

151 return Token::SEPARATOR;

152

153 if (next_char == '#')

154 return Token::COMMENT;

155

156 // For the case of '-' differentiate between a negative number and anything

157 // else.

158 if (next_char == '-') {

159 if (!CanIncrement())

160 return Token::OPERATOR; // Just the minus before end of file.

161 char following_char = input_[cur_ + 1];

162 if (following_char >= '0' && following_char <= '9')

163 return Token::INTEGER;

164 return Token::OPERATOR;

165 }

166

167 return Token::INVALID;

168 }

169

170 void Tokenizer::AdvanceToEndOfToken(const Location& location,

171 Token::Type type) {

172 switch (type) {

173 case Token::INTEGER:

174 do {

175 Advance();

176 } while (!at_end() && IsNumberChar(cur_char()));

177 if (!at_end()) {

178 // Require the char after a number to be some kind of space, scope,

179 // or operator.

180 char c = cur_char();

181 if (!IsCurrentWhitespace() && !CouldBeOperator(c) &&

182 !IsScoperChar(c) && !IsSeparatorChar(c)) {

183 *err_ = Err(GetCurrentLocation(),

184 "This is not a valid number.",

185 "Learn to count.");

186 // Highlight the number.

187 err_->AppendRange(LocationRange(location, GetCurrentLocation()));

188 }

189 }

190 break;

191

192 case Token::STRING: {

193 char initial = cur_char();

194 Advance(); // Advance past initial "

195 for (;;) {

196 if (at_end()) {

197 *err_ = Err(LocationRange(location,

198 Location(input_file_, line_number_, char_in_line_)),

199 "Unterminated string literal.",

200 "Don't leave me hanging like this!");

201 break;

202 }

203 if (IsCurrentStringTerminator(initial)) {

204 Advance(); // Skip past last "

205 break;

206 } else if (cur_char() == '\n') {

207 *err_ = Err(LocationRange(location,

208 GetCurrentLocation()),

209 "Newline in string constant.");

210 }

211 Advance();

212 }

213 break;

214 }

215

216 case Token::OPERATOR:

217 // Some operators are two characters, some are one.

218 if (CouldBeTwoCharOperatorBegin(cur_char())) {

219 if (CanIncrement() && CouldBeTwoCharOperatorEnd(input_[cur_ + 1]))

220 Advance();

221 }

222 Advance();

223 break;

224

225 case Token::IDENTIFIER:

226 while (!at_end() && IsIdentifierContinuingChar(cur_char()))

227 Advance();

228 break;

229

230 case Token::SCOPER:

231 case Token::SEPARATOR:

232 Advance(); // All are one char.

233 break;

234

235 case Token::COMMENT:

236 // Eat to EOL.

237 while (!at_end() && !IsCurrentNewline())

238 Advance();

239 break;

240

241 case Token::INVALID:

242 *err_ = Err(location, "Everything is all messed up",

243 "Please insert system disk in drive A: and press any key.");

244 NOTREACHED();

245 return;

246 }

247 }

248

249 bool Tokenizer::IsCurrentWhitespace() const {

250 DCHECK(!at_end());

251 char c = input_[cur_];

252 // Note that tab (0x09) is illegal.

253 return c == 0x0A \|\| c == 0x0B \|\| c == 0x0C \|\| c == 0x0D \|\| c == 0x20;

254 }

255

256 bool Tokenizer::IsCurrentStringTerminator(char quote_char) const {

257 DCHECK(!at_end());

258 if (cur_char() != quote_char)

259 return false;

260

261 // Check for escaping. \" is not a string terminator, but \\" is. Count

262 // the number of preceeding backslashes.

263 int num_backslashes = 0;

264 for (int i = static_cast<int>(cur_) - 1; i >= 0 && input_[i] == '\\'; i--)

265 num_backslashes++;

266

267 // Even backslashes mean that they were escaping each other and don't count

268 // as escaping this quote.

269 return (num_backslashes % 2) == 0;

270 }

271

272 bool Tokenizer::IsCurrentNewline() const {

273 return IsNewline(input_, cur_);

274 }

275

276 void Tokenizer::Advance() {

277 DCHECK(cur_ < input_.size());

278 if (IsCurrentNewline()) {

279 line_number_++;

280 char_in_line_ = 1;

281 } else {

282 char_in_line_++;

283 }

284 cur_++;

285 }

286

287 Location Tokenizer::GetCurrentLocation() const {

288 return Location(input_file_, line_number_, char_in_line_);

289 }

290

291 Err Tokenizer::GetErrorForInvalidToken(const Location& location) const {

292 std::string help;

293 if (cur_char() == ';') {

294 // Semicolon.

295 help = "Semicolons are not needed, delete this one.";

296 } else if (cur_char() == '\t') {

297 // Tab.

298 help = "You got a tab character in here. Tabs are evil. "

299 "Convert to spaces.";

300 } else if (cur_char() == '/' && cur_ + 1 < input_.size() &&

301 (input_[cur_ + 1] == '/' \|\| input_[cur_ + 1] == '*')) {

302 // Different types of comments.

303 help = "Comments should start with # instead";

304 } else {

305 help = "I have no idea what this is.";

306 }

307

308 return Err(location, "Invalid token.", help);

309 }

OLD	NEW

« no previous file with comments | « trunk/src/tools/gn/tokenizer.h ('k') | trunk/src/tools/gn/tokenizer_unittest.cc » ('j') | no next file with comments »