| OLD | NEW |
| (Empty) | |
| 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #ifndef TOOLS_GN_TOKENIZER_H_ |
| 6 #define TOOLS_GN_TOKENIZER_H_ |
| 7 |
| 8 #include <vector> |
| 9 |
| 10 #include "base/basictypes.h" |
| 11 #include "base/strings/string_piece.h" |
| 12 #include "tools/gn/err.h" |
| 13 #include "tools/gn/token.h" |
| 14 |
| 15 class InputFile; |
| 16 |
| 17 class Tokenizer { |
| 18 public: |
| 19 static std::vector<Token> Tokenize(const InputFile* input_file, Err* err); |
| 20 |
| 21 // Counts lines in the given buffer (the first line is "1") and returns |
| 22 // the byte offset of the beginning of that line, or (size_t)-1 if there |
| 23 // aren't that many lines in the file. Note that this will return the byte |
| 24 // one past the end of the input if the last character is a newline. |
| 25 // |
| 26 // This is a helper function for error output so that the tokenizer's |
| 27 // notion of lines can be used elsewhere. |
| 28 static size_t ByteOffsetOfNthLine(const base::StringPiece& buf, int n); |
| 29 |
| 30 // Returns true if the given offset of the string piece counts as a newline. |
| 31 // The offset must be in the buffer. |
| 32 static bool IsNewline(const base::StringPiece& buffer, size_t offset); |
| 33 |
| 34 static bool IsIdentifierFirstChar(char c) { |
| 35 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'; |
| 36 } |
| 37 |
| 38 static bool IsIdentifierContinuingChar(char c) { |
| 39 // Also allow digits after the first char. |
| 40 return IsIdentifierFirstChar(c) || (c >= '0' && c <= '9'); |
| 41 } |
| 42 |
| 43 private: |
| 44 // InputFile must outlive the tokenizer and all generated tokens. |
| 45 explicit Tokenizer(const InputFile* input_file, Err* err); |
| 46 ~Tokenizer(); |
| 47 |
| 48 std::vector<Token> Run(); |
| 49 |
| 50 void AdvanceToNextToken(); |
| 51 Token::Type ClassifyCurrent() const; |
| 52 void AdvanceToEndOfToken(const Location& location, Token::Type type); |
| 53 |
| 54 bool IsCurrentWhitespace() const; |
| 55 bool IsCurrentNewline() const; |
| 56 bool IsCurrentStringTerminator(char quote_char) const; |
| 57 |
| 58 bool CanIncrement() const { return cur_ < input_.size(); } |
| 59 |
| 60 // Increments the current location by one. |
| 61 void Advance(); |
| 62 |
| 63 // Returns the current character in the file as a location. |
| 64 Location GetCurrentLocation() const; |
| 65 |
| 66 Err GetErrorForInvalidToken(const Location& location) const; |
| 67 |
| 68 bool done() const { return at_end() || has_error(); } |
| 69 |
| 70 bool at_end() const { return cur_ == input_.size(); } |
| 71 char cur_char() const { return input_[cur_]; } |
| 72 |
| 73 bool has_error() const { return err_->has_error(); } |
| 74 |
| 75 const InputFile* input_file_; |
| 76 const base::StringPiece input_; |
| 77 Err* err_; |
| 78 size_t cur_; // Byte offset into input buffer. |
| 79 |
| 80 int line_number_; |
| 81 int char_in_line_; |
| 82 |
| 83 DISALLOW_COPY_AND_ASSIGN(Tokenizer); |
| 84 }; |
| 85 |
| 86 #endif // TOOLS_GN_TOKENIZER_H_ |
| OLD | NEW |