| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // To use the lexer, call Tokenize with the source string to obtain | 5 // To use the lexer, call Tokenize with the source string to obtain |
| 6 // a TokenStream. The lexer is run concurrently so you should be able to | 6 // a TokenStream. The lexer is run concurrently so you should be able to |
| 7 // use the TokenStream before the lexer is done with the source. | 7 // use the TokenStream before the lexer is done with the source. |
| 8 // | 8 // |
| 9 // The lexer is implemented as a state machine. The states are represented | 9 // The lexer is implemented as a state machine. The states are represented |
| 10 // by functions (the stateFn type) which accept a lexer and return the | 10 // by functions (the stateFn type) which accept a lexer and return the |
| 11 // new state. | 11 // new state. |
| 12 // | 12 // |
| 13 // Most states also have an isFooStart function which helps determine if | 13 // Most states also have an isFooStart function which helps determine if |
| 14 // a transition to Foo is appropriate. Those functions accept a single | 14 // a transition to Foo is appropriate. Those functions accept a single |
| 15 // rune as a parameter and return true if the state machine should | 15 // rune as a parameter and return true if the state machine should |
| 16 // transition to state Foo. Some states do not have such functions on | 16 // transition to state Foo. Some states do not have such functions on |
| 17 // account of the transition condition being trivial. | 17 // account of the transition condition being trivial. |
| 18 // | 18 // |
| 19 // The lexer implementation was inspired by | 19 // The lexer implementation was inspired by |
| 20 // http://cuddle.googlecode.com/hg/talk/lex.html | 20 // http://cuddle.googlecode.com/hg/talk/lex.html |
| 21 | 21 |
| 22 package lexer | 22 package lexer |
| 23 | 23 |
| 24 import ( | 24 import ( |
| 25 "unicode/utf8" | 25 "unicode/utf8" |
| 26 ) | 26 ) |
| 27 | 27 |
| 28 // Tokenize accepts a source string and parses it into a stream of tokens which | 28 // Tokenize accepts a source string and parses it into a stream of tokens which |
| 29 // can be read from the returned TokenStream. | 29 // can be read from the returned TokenStream. Comment tokens are ommitted from |
| 30 // the returned stream. |
| 30 func Tokenize(source string) TokenStream { | 31 func Tokenize(source string) TokenStream { |
| 32 return NewFilteredTokenStream( |
| 33 tokenizeUnfiltered(source), |
| 34 []TokenKind{SingleLineComment, MultiLineComment}) |
| 35 } |
| 36 |
| 37 // tokenizeUnfiltered returns a TokenStream which does not filter out any of the |
| 38 // tokens in the channel. It is used for testing and by Tokenize which adds a |
| 39 // filter on top of the stream returned by tokenizeUnfiltered. |
| 40 func tokenizeUnfiltered(source string) TokenStream { |
| 31 tokens := make(chan Token) | 41 tokens := make(chan Token) |
| 32 l := lexer{source: source, tokens: tokens} | 42 l := lexer{source: source, tokens: tokens} |
| 33 go l.run() | 43 go l.run() |
| 34 return &TokenChan{tokenChan: tokens} | 44 return &TokenChan{tokenChan: tokens} |
| 35 } | 45 } |
| 36 | 46 |
| 37 type lexer struct { | 47 type lexer struct { |
| 38 // source is the source code to be lexed. | 48 // source is the source code to be lexed. |
| 39 source string | 49 source string |
| 40 | 50 |
| (...skipping 461 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 502 | 512 |
| 503 // lexSingleLineComment consumes a single line comment. | 513 // lexSingleLineComment consumes a single line comment. |
| 504 func lexSingleLineComment(l *lexer) stateFn { | 514 func lexSingleLineComment(l *lexer) stateFn { |
| 505 // Consume the '/' | 515 // Consume the '/' |
| 506 l.Consume() | 516 l.Consume() |
| 507 | 517 |
| 508 for !l.IsEos() && l.Peek() != '\n' { | 518 for !l.IsEos() && l.Peek() != '\n' { |
| 509 l.Consume() | 519 l.Consume() |
| 510 } | 520 } |
| 511 | 521 |
| 512 » l.beginToken() | 522 » l.emitToken(SingleLineComment) |
| 513 return lexRoot | 523 return lexRoot |
| 514 } | 524 } |
| 515 | 525 |
| 516 // lexMultiLineComment consumes a multi-line comment. | 526 // lexMultiLineComment consumes a multi-line comment. |
| 517 func lexMultiLineComment(l *lexer) stateFn { | 527 func lexMultiLineComment(l *lexer) stateFn { |
| 518 // Consume the '*'. | 528 // Consume the '*'. |
| 519 l.Consume() | 529 l.Consume() |
| 520 | 530 |
| 521 for !l.IsEos() { | 531 for !l.IsEos() { |
| 522 if l.Peek() == '*' { | 532 if l.Peek() == '*' { |
| (...skipping 12 matching lines...) Expand all Loading... |
| 535 // Consume the '*' | 545 // Consume the '*' |
| 536 l.Consume() | 546 l.Consume() |
| 537 | 547 |
| 538 if l.IsEos() { | 548 if l.IsEos() { |
| 539 l.emitToken(ErrorUnterminatedComment) | 549 l.emitToken(ErrorUnterminatedComment) |
| 540 return nil | 550 return nil |
| 541 } | 551 } |
| 542 | 552 |
| 543 if l.Peek() == '/' { | 553 if l.Peek() == '/' { |
| 544 l.Consume() | 554 l.Consume() |
| 545 » » l.beginToken() | 555 » » l.emitToken(MultiLineComment) |
| 546 return lexRoot | 556 return lexRoot |
| 547 } | 557 } |
| 548 | 558 |
| 549 return lexMultiLineComment | 559 return lexMultiLineComment |
| 550 } | 560 } |
| OLD | NEW |