mojom/lexer.cc - Issue 1432613003: Remove unused lexer code.

Side by Side Diff: mojom/lexer.cc

Issue 1432613003: Remove unused lexer code. (Closed) Base URL: https://github.com/domokit/mojo.git@master

Patch Set: Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "mojom/lexer.h"

6

7 #include <map>

8 #include <string>

9

10 #include "base/lazy_instance.h"

11

12 namespace mojo {

13 namespace mojom {

14

15 namespace {

16

17 class KeywordsDict {

18 public:

19 KeywordsDict();

20

21 private:

22 std::map<std::string, mojom::TokenType> keywords_;

23 friend std::map<std::string, mojom::TokenType>& Keywords();

24

25 DISALLOW_COPY_AND_ASSIGN(KeywordsDict);

26 };

27 static base::LazyInstance<KeywordsDict> g_keywords = LAZY_INSTANCE_INITIALIZER;

28

29 std::map<std::string, mojom::TokenType>& Keywords() {

30 return g_keywords.Get().keywords_;

31 }

32

33 KeywordsDict::KeywordsDict() {

34 keywords_["import"] = TokenType::IMPORT;

35 keywords_["module"] = TokenType::MODULE;

36 keywords_["struct"] = TokenType::STRUCT;

37 keywords_["union"] = TokenType::UNION;

38 keywords_["interface"] = TokenType::INTERFACE;

39 keywords_["enum"] = TokenType::ENUM;

40 keywords_["const"] = TokenType::CONST;

41 keywords_["true"] = TokenType::TRUE;

42 keywords_["false"] = TokenType::FALSE;

43 keywords_["default"] = TokenType::DEFAULT;

44 }

45

46 // Non-localized versions of isalpha.

47 bool IsAlpha(char c) {

48 return (('a' <= c && c <= 'z') \|\| ('A' <= c && c <= 'Z'));

49 }

50

51 // Non-localized versions of isnum.

52 bool IsDigit(char c) {

53 return ('0' <= c && c <= '9');

54 }

55

56 bool IsHexDigit(char c) {

57 return (IsDigit(c) \|\| ('a' <= c && c <= 'f') \|\| ('A' <= c && c <= 'F'));

58 }

59

60 // Non-localized versions of isalnum.

61 bool IsAlnum(char c) {

62 return IsAlpha(c) \|\| IsDigit(c);

63 }

64

65 // MojomLexer tokenizes a mojom source file. It is NOT thread-safe.

66 class MojomLexer {

67 public:

68 explicit MojomLexer(const std::string& source);

69 ~MojomLexer();

70

71 // Returns the list of tokens in the source file.

72 std::vector<Token> Tokenize();

73

74 private:

75 // The GetNextToken.* functions all return true if they could find a token

76 // (even an error token) and false otherwise.

77 bool GetNextToken(Token* result);

78 bool GetNextTokenSingleChar(Token* result);

79 bool GetNextTokenEqualsOrResponse(Token* result);

80 bool GetNextTokenIdentifier(Token* result);

81 bool GetNextTokenDecConst(Token* result);

82 bool GetNextTokenHexConst(Token* result);

83 bool GetNextTokenOrdinal(Token* result);

84 bool GetNextTokenStringLiteral(Token* result);

85

86 void ConsumeSkippable();

87 void ConsumeDigits();

88 void ConsumeEol();

89 void Consume(size_t num);

90

91 bool eos(size_t offset_plus) {

92 return offset_ + offset_plus >= source_.size();

93 }

94

95 const std::string source_;

96 size_t offset_;

97 size_t line_no_;

98 size_t offset_in_line_;

99

100 DISALLOW_COPY_AND_ASSIGN(MojomLexer);

101 };

102

103 std::vector<Token> MojomLexer::Tokenize() {

104 offset_ = 0;

105 line_no_ = 0;

106 offset_in_line_ = 0;

107

108 std::vector<Token> result;

109 Token cur;

110 while (GetNextToken(&cur)) {

111 result.push_back(cur);

112

113 // As soon as an error token is found, stop tokenizing.

114 if (cur.error()) {

115 break;

116 }

117 }

118

119 return result;

120 }

121

122 bool MojomLexer::GetNextToken(Token* result) {

123 // Skip all spaces which may be in front of the next token.

124 ConsumeSkippable();

125

126 // If we found the end of the source signal that is so.

127 if (eos(0))

128 return false;

129

130 // Save the current position in the source code.

131 result->char_pos = offset_;

132 result->line_no = line_no_;

133 result->line_pos = offset_in_line_;

134

135 if (GetNextTokenSingleChar(result) \|\| GetNextTokenEqualsOrResponse(result) \|\|

136 GetNextTokenIdentifier(result) \|\| GetNextTokenHexConst(result) \|\|

137 GetNextTokenDecConst(result) \|\| GetNextTokenDecConst(result) \|\|

138 GetNextTokenOrdinal(result) \|\| GetNextTokenStringLiteral(result))

139 return true;

140

141 result->token = source_.substr(offset_, 1);

142 result->token_type = TokenType::ERROR_ILLEGAL_CHAR;

143 return true;

144 }

145

146 void MojomLexer::ConsumeSkippable() {

147 if (eos(0))

148 return;

149

150 bool found_non_space = false;

151 while (!found_non_space && !eos(0)) {

152 switch (source_[offset_]) {

153 case ' ':

154 case '\t':

155 case '\r':

156 Consume(1);

157 break;

158 case '\n':

159 ConsumeEol();

160 break;

161 default:

162 found_non_space = true;

163 break;

164 }

165 }

166 }

167

168 // Finds all single-character tokens except for '='.

169 bool MojomLexer::GetNextTokenSingleChar(Token* result) {

170 switch (source_[offset_]) {

171 case '(':

172 result->token_type = TokenType::LPAREN;

173 break;

174 case ')':

175 result->token_type = TokenType::RPAREN;

176 break;

177 case '[':

178 result->token_type = TokenType::LBRACKET;

179 break;

180 case ']':

181 result->token_type = TokenType::RBRACKET;

182 break;

183 case '{':

184 result->token_type = TokenType::LBRACE;

185 break;

186 case '}':

187 result->token_type = TokenType::RBRACE;

188 break;

189 case '<':

190 result->token_type = TokenType::LANGLE;

191 break;

192 case '>':

193 result->token_type = TokenType::RANGLE;

194 break;

195 case ';':

196 result->token_type = TokenType::SEMI;

197 break;

198 case ',':

199 result->token_type = TokenType::COMMA;

200 break;

201 case '.':

202 result->token_type = TokenType::DOT;

203 break;

204 case '-':

205 result->token_type = TokenType::MINUS;

206 break;

207 case '+':

208 result->token_type = TokenType::PLUS;

209 break;

210 case '&':

211 result->token_type = TokenType::AMP;

212 break;

213 case '?':

214 result->token_type = TokenType::QSTN;

215 break;

216 default:

217 return false;

218 break;

219 }

220

221 result->token = source_.substr(offset_, 1);

222 Consume(1);

223 return true;

224 }

225

226 // Finds '=' or '=>'.

227 bool MojomLexer::GetNextTokenEqualsOrResponse(Token* result) {

228 if (source_[offset_] != '=')

229 return false;

230 Consume(1);

231

232 if (eos(0) \|\| source_[offset_] != '>') {

233 result->token_type = TokenType::EQUALS;

234 result->token = "=";

235 } else {

236 result->token_type = TokenType::RESPONSE;

237 result->token = "=>";

238 Consume(1);

239 }

240 return true;

241 }

242

243 // valid C identifiers (K&R2: A.2.3)

244 bool MojomLexer::GetNextTokenIdentifier(Token* result) {

245 char c = source_[offset_];

246

247 // Identifiers start with a letter or underscore.

248 if (!(IsAlpha(c) \|\| c == '_'))

249 return false;

250 size_t start_offset = offset_;

251

252 // Identifiers contain letters numbers and underscores.

253 while (!eos(0) && (IsAlnum(source_[offset_]) \|\| c == '_'))

254 Consume(1);

255

256 result->token = source_.substr(start_offset, offset_ - start_offset);

257 result->token_type = TokenType::IDENTIFIER;

258

259 if (Keywords().count(result->token))

260 result->token_type = Keywords()[result->token];

261

262 return true;

263 }

264

265 // integer constants (K&R2: A.2.5.1) dec

266 // floating constants (K&R2: A.2.5.3)

267 bool MojomLexer::GetNextTokenDecConst(Token* result) {

268 if (!IsDigit(source_[offset_]))

269 return false;

270

271 result->token_type = TokenType::INT_CONST_DEC;

272 // If the number starts with a zero and is not a floating point number.

273 if (source_[offset_] == '0' &&

274 (eos(1) \|\| (source_[offset_] == 'e' && source_[offset_] == 'E' &&

275 source_[offset_] == '.'))) {

276 // TODO(azani): Catch and error on octal.

277 result->token = "0";

278 Consume(1);

279 return true;

280 }

281

282 size_t start_offset = offset_;

283

284 // First, we consume all the digits.

285 ConsumeDigits();

286

287 // If there is a fractional part, we consume the . and the following digits.

288 if (!eos(0) && source_[offset_] == '.') {

289 result->token_type = TokenType::FLOAT_CONST;

290 Consume(1);

291 ConsumeDigits();

292 }

293

294 // If there is an exponential part, we consume the e and the following digits.

295 if (!eos(0) && (source_[offset_] == 'e' \|\| source_[offset_] == 'E')) {

296 if (!eos(2) && (source_[offset_ + 1] == '-' \|\| source_[offset_ + 1]) &&

297 IsDigit(source_[offset_ + 2])) {

298 result->token_type = TokenType::FLOAT_CONST;

299 Consume(2); // Consume e/E and +/-

300 ConsumeDigits();

301 } else if (!eos(1) && IsDigit(source_[offset_ + 1])) {

302 result->token_type = TokenType::FLOAT_CONST;

303 Consume(1); // Consume e/E

304 ConsumeDigits();

305 }

306 }

307

308 result->token = source_.substr(start_offset, offset_ - start_offset);

309 return true;

310 }

311

312 // integer constants (K&R2: A.2.5.1) hex

313 bool MojomLexer::GetNextTokenHexConst(Token* result) {

314 // Hex numbers start with a 0, x and then some hex numeral.

315 if (eos(2) \|\| source_[offset_] != '0' \|\|

316 (source_[offset_ + 1] != 'x' && source_[offset_ + 1] != 'X') \|\|

317 !IsHexDigit(source_[offset_ + 2]))

318 return false;

319

320 result->token_type = TokenType::INT_CONST_HEX;

321 size_t start_offset = offset_;

322 Consume(2);

323

324 while (IsHexDigit(source_[offset_]))

325 Consume(1);

326

327 result->token = source_.substr(start_offset, offset_ - start_offset);

328 return true;

329 }

330

331 bool MojomLexer::GetNextTokenOrdinal(Token* result) {

332 // Ordinals start with '@' and then some digit.

333 if (eos(1) \|\| source_[offset_] != '@' \|\| !IsDigit(source_[offset_ + 1]))

334 return false;

335 size_t start_offset = offset_;

336 // Consumes '@'.

337 Consume(1);

338

339 result->token_type = TokenType::ORDINAL;

340 ConsumeDigits();

341

342 result->token = source_.substr(start_offset, offset_ - start_offset);

343 return true;

344 }

345

346 bool MojomLexer::GetNextTokenStringLiteral(Token* result) {

347 // Ordinals start with '@' and then some digit.

348 if (source_[offset_] != '"')

349 return false;

350

351 size_t start_offset = offset_;

352 // Consumes '"'.

353 Consume(1);

354

355 while (source_[offset_] != '"') {

356 if (source_[offset_] == '\n' \|\| eos(0)) {

357 result->token_type = TokenType::ERROR_UNTERMINATED_STRING_LITERAL;

358 result->token = source_.substr(start_offset, offset_ - start_offset);

359 return true;

360 }

361

362 // This block will be skipped if the backslash is at the end of the source.

363 if (source_[offset_] == '\\' && !eos(1)) {

364 // Consume the backslash. This will ensure \" is consumed.

365 Consume(1);

366 }

367 Consume(1);

368 }

369 // Consume the closing doublequotes.

370 Consume(1);

371

372 result->token_type = TokenType::STRING_LITERAL;

373

374 result->token = source_.substr(start_offset, offset_ - start_offset);

375 return true;

376 }

377

378 void MojomLexer::ConsumeDigits() {

379 while (!eos(0) && IsDigit(source_[offset_]))

380 Consume(1);

381 }

382

383 void MojomLexer::ConsumeEol() {

384 ++offset_;

385 ++line_no_;

386 offset_in_line_ = 0;

387 }

388

389 void MojomLexer::Consume(size_t num) {

390 offset_ += num;

391 offset_in_line_ += num;

392 }

393

394 MojomLexer::MojomLexer(const std::string& source)

395 : source_(source), offset_(0), line_no_(0), offset_in_line_(0) {

396 }

397

398 MojomLexer::~MojomLexer() {

399 }

400

401 } // namespace

402

403 Token::Token()

404 : token_type(TokenType::ERROR_UNKNOWN),

405 char_pos(0),

406 line_no(0),

407 line_pos(0) {

408 }

409

410 Token::~Token() {

411 }

412

413 // Accepts the text of a mojom file and returns the ordered list of tokens

414 // found in the file.

415 std::vector<Token> Tokenize(const std::string& source) {

416 return MojomLexer(source).Tokenize();

417 }

418

419 } // namespace mojom

420 } // namespace mojo

OLD	NEW

« no previous file with comments | « mojom/lexer.h ('k') | mojom/lexer_unittest.cc » ('j') | no next file with comments »