third_party/closure_linter/closure_linter/common/tokenizer.py - Issue 2592193002: Remove closure_linter from Chrome

Side by Side Diff: third_party/closure_linter/closure_linter/common/tokenizer.py

Issue 2592193002: Remove closure_linter from Chrome (Closed)

Patch Set: Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/closure_linter/closure_linter/common/simplefileflags.py ('k') | third_party/closure_linter/closure_linter/common/tokens.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 #!/usr/bin/env python

2 #

3 # Copyright 2007 The Closure Linter Authors. All Rights Reserved.

4 #

5 # Licensed under the Apache License, Version 2.0 (the "License");

6 # you may not use this file except in compliance with the License.

7 # You may obtain a copy of the License at

8 #

9 # http://www.apache.org/licenses/LICENSE-2.0

10 #

11 # Unless required by applicable law or agreed to in writing, software

12 # distributed under the License is distributed on an "AS-IS" BASIS,

13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14 # See the License for the specific language governing permissions and

15 # limitations under the License.

16

17 """Regular expression based lexer."""

18

19 __author__ = ('robbyw@google.com (Robert Walker)',

20 'ajp@google.com (Andy Perelson)')

21

22 from closure_linter.common import tokens

23

24 # Shorthand

25 Type = tokens.TokenType

26

27

28 class Tokenizer(object):

29 """General purpose tokenizer.

30

31 Attributes:

32 mode: The latest mode of the tokenizer. This allows patterns to distinguish

33 if they are mid-comment, mid-parameter list, etc.

34 matchers: Dictionary of modes to sequences of matchers that define the

35 patterns to check at any given time.

36 default_types: Dictionary of modes to types, defining what type to give

37 non-matched text when in the given mode. Defaults to Type.NORMAL.

38 """

39

40 def __init__(self, starting_mode, matchers, default_types):

41 """Initialize the tokenizer.

42

43 Args:

44 starting_mode: Mode to start in.

45 matchers: Dictionary of modes to sequences of matchers that defines the

46 patterns to check at any given time.

47 default_types: Dictionary of modes to types, defining what type to give

48 non-matched text when in the given mode. Defaults to Type.NORMAL.

49 """

50 self.__starting_mode = starting_mode

51 self.matchers = matchers

52 self.default_types = default_types

53

54 def TokenizeFile(self, file):

55 """Tokenizes the given file.

56

57 Args:

58 file: An iterable that yields one line of the file at a time.

59

60 Returns:

61 The first token in the file

62 """

63 # The current mode.

64 self.mode = self.__starting_mode

65 # The first token in the stream.

66 self.__first_token = None

67 # The last token added to the token stream.

68 self.__last_token = None

69 # The current line number.

70 self.__line_number = 0

71

72 for line in file:

73 self.__line_number += 1

74 self.__TokenizeLine(line)

75

76 return self.__first_token

77

78 def _CreateToken(self, string, token_type, line, line_number, values=None):

79 """Creates a new Token object (or subclass).

80

81 Args:

82 string: The string of input the token represents.

83 token_type: The type of token.

84 line: The text of the line this token is in.

85 line_number: The line number of the token.

86 values: A dict of named values within the token. For instance, a

87 function declaration may have a value called 'name' which captures the

88 name of the function.

89

90 Returns:

91 The newly created Token object.

92 """

93 return tokens.Token(string, token_type, line, line_number, values,

94 line_number)

95

96 def __TokenizeLine(self, line):

97 """Tokenizes the given line.

98

99 Args:

100 line: The contents of the line.

101 """

102 string = line.rstrip('\n\r\f')

103 line_number = self.__line_number

104 self.__start_index = 0

105

106 if not string:

107 self.__AddToken(self._CreateToken('', Type.BLANK_LINE, line, line_number))

108 return

109

110 normal_token = ''

111 index = 0

112 while index < len(string):

113 for matcher in self.matchers[self.mode]:

114 if matcher.line_start and index > 0:

115 continue

116

117 match = matcher.regex.match(string, index)

118

119 if match:

120 if normal_token:

121 self.__AddToken(

122 self.__CreateNormalToken(self.mode, normal_token, line,

123 line_number))

124 normal_token = ''

125

126 # Add the match.

127 self.__AddToken(self._CreateToken(match.group(), matcher.type, line,

128 line_number, match.groupdict()))

129

130 # Change the mode to the correct one for after this match.

131 self.mode = matcher.result_mode or self.mode

132

133 # Shorten the string to be matched.

134 index = match.end()

135

136 break

137

138 else:

139 # If the for loop finishes naturally (i.e. no matches) we just add the

140 # first character to the string of consecutive non match characters.

141 # These will constitute a NORMAL token.

142 if string:

143 normal_token += string[index:index + 1]

144 index += 1

145

146 if normal_token:

147 self.__AddToken(

148 self.__CreateNormalToken(self.mode, normal_token, line, line_number))

149

150 def __CreateNormalToken(self, mode, string, line, line_number):

151 """Creates a normal token.

152

153 Args:

154 mode: The current mode.

155 string: The string to tokenize.

156 line: The line of text.

157 line_number: The line number within the file.

158

159 Returns:

160 A Token object, of the default type for the current mode.

161 """

162 type = Type.NORMAL

163 if mode in self.default_types:

164 type = self.default_types[mode]

165 return self._CreateToken(string, type, line, line_number)

166

167 def __AddToken(self, token):

168 """Add the given token to the token stream.

169

170 Args:

171 token: The token to add.

172 """

173 # Store the first token, or point the previous token to this one.

174 if not self.__first_token:

175 self.__first_token = token

176 else:

177 self.__last_token.next = token

178

179 # Establish the doubly linked list

180 token.previous = self.__last_token

181 self.__last_token = token

182

183 # Compute the character indices

184 token.start_index = self.__start_index

185 self.__start_index += token.length

OLD	NEW