| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // To use the lexer, call Tokenize with the source string to obtain | 5 // To use the lexer, call Tokenize with the source string to obtain |
| 6 // a TokenStream. The lexer is run concurrently so you should be able to | 6 // a TokenStream. The lexer is run concurrently so you should be able to |
| 7 // use the TokenStream before the lexer is done with the source. | 7 // use the TokenStream before the lexer is done with the source. |
| 8 // | 8 // |
| 9 // The lexer is implemented as a state machine. The states are represented | 9 // The lexer is implemented as a state machine. The states are represented |
| 10 // by functions (the stateFn type) which accept a lexer and return the | 10 // by functions (the stateFn type) which accept a lexer and return the |
| 11 // new state. | 11 // new state. |
| 12 // | 12 // |
| 13 // Most states also have an isFooStart function which helps determine if | 13 // Most states also have an isFooStart function which helps determine if |
| 14 // a transition to Foo is appropriate. Those functions accept a single | 14 // a transition to Foo is appropriate. Those functions accept a single |
| 15 // rune as a parameter and return true if the state machine should | 15 // rune as a parameter and return true if the state machine should |
| 16 // transition to state Foo. Some states do not have such functions on | 16 // transition to state Foo. Some states do not have such functions on |
| 17 // account of the transition condition being trivial. | 17 // account of the transition condition being trivial. |
| 18 // | 18 // |
| 19 // The lexer implementation was inspired by | 19 // The lexer implementation was inspired by |
| 20 // http://cuddle.googlecode.com/hg/talk/lex.html | 20 // http://cuddle.googlecode.com/hg/talk/lex.html |
| 21 | 21 |
| 22 package lexer | 22 package lexer |
| 23 | 23 |
| 24 import ( | 24 import ( |
| 25 "unicode/utf8" | 25 "unicode/utf8" |
| 26 ) | 26 ) |
| 27 | 27 |
| 28 // Tokenize accepts a source string and parses it into a stream of tokens which | 28 // Tokenize accepts a source string and parses it into a stream of tokens which |
| 29 // can be read from the returned TokenStream. | 29 // can be read from the returned TokenStream. Comment tokens are ommitted from |
| 30 // the returned stream. |
| 30 func Tokenize(source string) TokenStream { | 31 func Tokenize(source string) TokenStream { |
| 32 return &FilteredTokenStream{ |
| 33 tokenizeUnfiltered(source), |
| 34 map[TokenKind]bool{ |
| 35 SingleLineComment: true, |
| 36 MultiLineComment: true}} |
| 37 } |
| 38 |
| 39 // tokenizeUnfiltered returns a TokenStream which does not filter out any of the |
| 40 // tokens in the channel. It is used for testing and by Tokenize which adds a |
| 41 // filter on top of the stream returned by tokenizeUnfiltered. |
| 42 func tokenizeUnfiltered(source string) TokenStream { |
| 31 tokens := make(chan Token) | 43 tokens := make(chan Token) |
| 32 l := lexer{source: source, tokens: tokens} | 44 l := lexer{source: source, tokens: tokens} |
| 33 go l.run() | 45 go l.run() |
| 34 return &TokenChan{tokenChan: tokens} | 46 return &TokenChan{tokenChan: tokens} |
| 35 } | 47 } |
| 36 | 48 |
| 37 type lexer struct { | 49 type lexer struct { |
| 38 // source is the source code to be lexed. | 50 // source is the source code to be lexed. |
| 39 source string | 51 source string |
| 40 | 52 |
| (...skipping 461 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 502 | 514 |
| 503 // lexSingleLineComment consumes a single line comment. | 515 // lexSingleLineComment consumes a single line comment. |
| 504 func lexSingleLineComment(l *lexer) stateFn { | 516 func lexSingleLineComment(l *lexer) stateFn { |
| 505 // Consume the '/' | 517 // Consume the '/' |
| 506 l.Consume() | 518 l.Consume() |
| 507 | 519 |
| 508 for !l.IsEos() && l.Peek() != '\n' { | 520 for !l.IsEos() && l.Peek() != '\n' { |
| 509 l.Consume() | 521 l.Consume() |
| 510 } | 522 } |
| 511 | 523 |
| 512 » l.beginToken() | 524 » l.emitToken(SingleLineComment) |
| 513 return lexRoot | 525 return lexRoot |
| 514 } | 526 } |
| 515 | 527 |
| 516 // lexMultiLineComment consumes a multi-line comment. | 528 // lexMultiLineComment consumes a multi-line comment. |
| 517 func lexMultiLineComment(l *lexer) stateFn { | 529 func lexMultiLineComment(l *lexer) stateFn { |
| 518 // Consume the '*'. | 530 // Consume the '*'. |
| 519 l.Consume() | 531 l.Consume() |
| 520 | 532 |
| 521 for !l.IsEos() { | 533 for !l.IsEos() { |
| 522 if l.Peek() == '*' { | 534 if l.Peek() == '*' { |
| (...skipping 12 matching lines...) Expand all Loading... |
| 535 // Consume the '*' | 547 // Consume the '*' |
| 536 l.Consume() | 548 l.Consume() |
| 537 | 549 |
| 538 if l.IsEos() { | 550 if l.IsEos() { |
| 539 l.emitToken(ErrorUnterminatedComment) | 551 l.emitToken(ErrorUnterminatedComment) |
| 540 return nil | 552 return nil |
| 541 } | 553 } |
| 542 | 554 |
| 543 if l.Peek() == '/' { | 555 if l.Peek() == '/' { |
| 544 l.Consume() | 556 l.Consume() |
| 545 » » l.beginToken() | 557 » » l.emitToken(MultiLineComment) |
| 546 return lexRoot | 558 return lexRoot |
| 547 } | 559 } |
| 548 | 560 |
| 549 return lexMultiLineComment | 561 return lexMultiLineComment |
| 550 } | 562 } |
| OLD | NEW |