Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // To use the lexer, call Tokenize with the source string to obtain | 5 // To use the lexer, call Tokenize with the source string to obtain |
| 6 // a TokenStream. The lexer is run concurrently so you should be able to | 6 // a TokenStream. The lexer is run concurrently so you should be able to |
| 7 // use the TokenStream before the lexer is done with the source. | 7 // use the TokenStream before the lexer is done with the source. |
| 8 // | 8 // |
| 9 // The lexer is implemented as a state machine. The states are represented | 9 // The lexer is implemented as a state machine. The states are represented |
| 10 // by functions (the stateFn type) which accept a lexer and return the | 10 // by functions (the stateFn type) which accept a lexer and return the |
| 11 // new state. | 11 // new state. |
| 12 // | 12 // |
| 13 // Most states also have an isFooStart function which helps determine if | 13 // Most states also have an isFooStart function which helps determine if |
| 14 // a transition to Foo is appropriate. Those functions accept a single | 14 // a transition to Foo is appropriate. Those functions accept a single |
| 15 // rune as a parameter and return true if the state machine should | 15 // rune as a parameter and return true if the state machine should |
| 16 // transition to state Foo. Some states do not have such functions on | 16 // transition to state Foo. Some states do not have such functions on |
| 17 // account of the transition condition being trivial. | 17 // account of the transition condition being trivial. |
| 18 // | 18 // |
| 19 // The lexer implementation was inspired by | 19 // The lexer implementation was inspired by |
| 20 // http://cuddle.googlecode.com/hg/talk/lex.html | 20 // http://cuddle.googlecode.com/hg/talk/lex.html |
| 21 | 21 |
| 22 package lexer | 22 package lexer |
| 23 | 23 |
| 24 import ( | 24 import ( |
| 25 "unicode/utf8" | 25 "unicode/utf8" |
| 26 ) | 26 ) |
| 27 | 27 |
| 28 // Tokenize accepts a source string and parses it into a stream of tokens which | 28 // Tokenize accepts a source string and parses it into a stream of tokens which |
| 29 // can be read from the returned TokenStream. | 29 // can be read from the returned TokenStream. |
|
rudominer
2016/01/16 01:08:58
You should comment that this function does not ret
azani
2016/01/20 00:00:11
Done.
| |
| 30 func Tokenize(source string) TokenStream { | 30 func Tokenize(source string) TokenStream { |
| 31 return &FilteredTokenStream{ | |
| 32 tokenizeUnfiltered(source), | |
| 33 map[TokenKind]bool{ | |
| 34 SingleLineComment: true, | |
| 35 MultiLineComment: true}} | |
| 36 } | |
| 37 | |
| 38 // tokenizeUnfiltered does not filter any token. | |
|
rudominer
2016/01/16 01:08:58
I would add a slightly longer comment that explain
azani
2016/01/20 00:00:11
Done.
| |
| 39 func tokenizeUnfiltered(source string) TokenStream { | |
| 31 tokens := make(chan Token) | 40 tokens := make(chan Token) |
| 32 l := lexer{source: source, tokens: tokens} | 41 l := lexer{source: source, tokens: tokens} |
| 33 go l.run() | 42 go l.run() |
| 34 return &TokenChan{tokenChan: tokens} | 43 return &TokenChan{tokenChan: tokens} |
| 35 } | 44 } |
| 36 | 45 |
| 37 type lexer struct { | 46 type lexer struct { |
| 38 // source is the source code to be lexed. | 47 // source is the source code to be lexed. |
| 39 source string | 48 source string |
| 40 | 49 |
| (...skipping 461 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 502 | 511 |
| 503 // lexSingleLineComment consumes a single line comment. | 512 // lexSingleLineComment consumes a single line comment. |
| 504 func lexSingleLineComment(l *lexer) stateFn { | 513 func lexSingleLineComment(l *lexer) stateFn { |
| 505 // Consume the '/' | 514 // Consume the '/' |
| 506 l.Consume() | 515 l.Consume() |
| 507 | 516 |
| 508 for !l.IsEos() && l.Peek() != '\n' { | 517 for !l.IsEos() && l.Peek() != '\n' { |
| 509 l.Consume() | 518 l.Consume() |
| 510 } | 519 } |
| 511 | 520 |
| 512 » l.beginToken() | 521 » l.emitToken(SingleLineComment) |
| 513 return lexRoot | 522 return lexRoot |
| 514 } | 523 } |
| 515 | 524 |
| 516 // lexMultiLineComment consumes a multi-line comment. | 525 // lexMultiLineComment consumes a multi-line comment. |
| 517 func lexMultiLineComment(l *lexer) stateFn { | 526 func lexMultiLineComment(l *lexer) stateFn { |
| 518 // Consume the '*'. | 527 // Consume the '*'. |
| 519 l.Consume() | 528 l.Consume() |
| 520 | 529 |
| 521 for !l.IsEos() { | 530 for !l.IsEos() { |
| 522 if l.Peek() == '*' { | 531 if l.Peek() == '*' { |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 535 // Consume the '*' | 544 // Consume the '*' |
| 536 l.Consume() | 545 l.Consume() |
| 537 | 546 |
| 538 if l.IsEos() { | 547 if l.IsEos() { |
| 539 l.emitToken(ErrorUnterminatedComment) | 548 l.emitToken(ErrorUnterminatedComment) |
| 540 return nil | 549 return nil |
| 541 } | 550 } |
| 542 | 551 |
| 543 if l.Peek() == '/' { | 552 if l.Peek() == '/' { |
| 544 l.Consume() | 553 l.Consume() |
| 545 » » l.beginToken() | 554 » » l.emitToken(MultiLineComment) |
| 546 return lexRoot | 555 return lexRoot |
| 547 } | 556 } |
| 548 | 557 |
| 549 return lexMultiLineComment | 558 return lexMultiLineComment |
| 550 } | 559 } |
| OLD | NEW |