OLD | NEW |
| (Empty) |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import parser | |
6 import symbol | |
7 import sys | |
8 import token | |
9 import tokenize | |
10 | |
11 from catapult_base.refactor import offset_token | |
12 | |
13 | |
14 class Snippet(object): | |
15 """A node in the Python parse tree. | |
16 | |
17 The Python grammar is defined at: | |
18 https://docs.python.org/2/reference/grammar.html | |
19 | |
20 There are two types of Snippets: | |
21 TokenSnippets are leaf nodes containing actual text. | |
22 Symbols are internal nodes representing higher-level groupings, and are | |
23 defined by the left-hand sides of the BNFs in the above link. | |
24 """ | |
25 @property | |
26 def type(self): | |
27 raise NotImplementedError() | |
28 | |
29 @property | |
30 def type_name(self): | |
31 raise NotImplementedError() | |
32 | |
33 @property | |
34 def children(self): | |
35 """Return a list of this node's children.""" | |
36 raise NotImplementedError() | |
37 | |
38 @property | |
39 def tokens(self): | |
40 """Return a tuple of the tokens this Snippet contains.""" | |
41 raise NotImplementedError() | |
42 | |
43 def PrintTree(self, indent=0, stream=sys.stdout): | |
44 """Spew a pretty-printed parse tree. Mostly useful for debugging.""" | |
45 raise NotImplementedError() | |
46 | |
47 def __str__(self): | |
48 return offset_token.Untokenize(self.tokens) | |
49 | |
50 def FindAll(self, snippet_type): | |
51 if isinstance(snippet_type, int): | |
52 if self.type == snippet_type: | |
53 yield self | |
54 else: | |
55 if isinstance(self, snippet_type): | |
56 yield self | |
57 | |
58 for child in self.children: | |
59 for snippet in child.FindAll(snippet_type): | |
60 yield snippet | |
61 | |
62 def FindChild(self, snippet_type, **kwargs): | |
63 for child in self.children: | |
64 if isinstance(snippet_type, int): | |
65 if child.type != snippet_type: | |
66 continue | |
67 else: | |
68 if not isinstance(child, snippet_type): | |
69 continue | |
70 | |
71 for attribute, value in kwargs: | |
72 if getattr(child, attribute) != value: | |
73 break | |
74 else: | |
75 return child | |
76 raise ValueError('%s is not in %s. Children are: %s' % | |
77 (snippet_type, self, self.children)) | |
78 | |
79 def FindChildren(self, snippet_type): | |
80 if isinstance(snippet_type, int): | |
81 for child in self.children: | |
82 if child.type == snippet_type: | |
83 yield child | |
84 else: | |
85 for child in self.children: | |
86 if isinstance(child, snippet_type): | |
87 yield child | |
88 | |
89 | |
90 class TokenSnippet(Snippet): | |
91 """A Snippet containing a list of tokens. | |
92 | |
93 A list of tokens may start with any number of comments and non-terminating | |
94 newlines, but must end with a syntactically meaningful token. | |
95 """ | |
96 def __init__(self, token_type, tokens): | |
97 # For operators and delimiters, the TokenSnippet's type may be more specific | |
98 # than the type of the constituent token. E.g. the TokenSnippet type is | |
99 # token.DOT, but the token type is token.OP. This is because the parser | |
100 # has more context than the tokenizer. | |
101 self._type = token_type | |
102 self._tokens = tokens | |
103 self._modified = False | |
104 | |
105 @classmethod | |
106 def Create(cls, token_type, string, offset=(0, 0)): | |
107 return cls(token_type, | |
108 [offset_token.OffsetToken(token_type, string, offset)]) | |
109 | |
110 @property | |
111 def type(self): | |
112 return self._type | |
113 | |
114 @property | |
115 def type_name(self): | |
116 return token.tok_name[self.type] | |
117 | |
118 @property | |
119 def value(self): | |
120 return self._tokens[-1].string | |
121 | |
122 @value.setter | |
123 def value(self, value): | |
124 self._tokens[-1].string = value | |
125 self._modified = True | |
126 | |
127 @property | |
128 def children(self): | |
129 return [] | |
130 | |
131 @property | |
132 def tokens(self): | |
133 return tuple(self._tokens) | |
134 | |
135 @property | |
136 def modified(self): | |
137 return self._modified | |
138 | |
139 def PrintTree(self, indent=0, stream=sys.stdout): | |
140 stream.write(' ' * indent) | |
141 if not self.tokens: | |
142 print >> stream, self.type_name | |
143 return | |
144 | |
145 print >> stream, '%-4s' % self.type_name, repr(self.tokens[0].string) | |
146 for tok in self.tokens[1:]: | |
147 stream.write(' ' * indent) | |
148 print >> stream, ' ' * max(len(self.type_name), 4), repr(tok.string) | |
149 | |
150 | |
151 class Symbol(Snippet): | |
152 """A Snippet containing sub-Snippets. | |
153 | |
154 The possible types and type_names are defined in Python's symbol module.""" | |
155 def __init__(self, symbol_type, children): | |
156 self._type = symbol_type | |
157 self._children = children | |
158 | |
159 @property | |
160 def type(self): | |
161 return self._type | |
162 | |
163 @property | |
164 def type_name(self): | |
165 return symbol.sym_name[self.type] | |
166 | |
167 @property | |
168 def children(self): | |
169 return self._children | |
170 | |
171 @children.setter | |
172 def children(self, value): # pylint: disable=arguments-differ | |
173 self._children = value | |
174 | |
175 @property | |
176 def tokens(self): | |
177 tokens = [] | |
178 for child in self.children: | |
179 tokens += child.tokens | |
180 return tuple(tokens) | |
181 | |
182 @property | |
183 def modified(self): | |
184 return any(child.modified for child in self.children) | |
185 | |
186 def PrintTree(self, indent=0, stream=sys.stdout): | |
187 stream.write(' ' * indent) | |
188 | |
189 # If there's only one child, collapse it onto the same line. | |
190 node = self | |
191 while len(node.children) == 1 and len(node.children[0].children) == 1: | |
192 print >> stream, node.type_name, | |
193 node = node.children[0] | |
194 | |
195 print >> stream, node.type_name | |
196 for child in node.children: | |
197 child.PrintTree(indent+2, stream) | |
198 | |
199 | |
200 def Snippetize(f): | |
201 """Return the syntax tree of the given file.""" | |
202 f.seek(0) | |
203 syntax_tree = parser.st2list(parser.suite(f.read())) | |
204 tokens = offset_token.Tokenize(f) | |
205 | |
206 snippet = _SnippetizeNode(syntax_tree, tokens) | |
207 assert not tokens | |
208 return snippet | |
209 | |
210 | |
211 def _SnippetizeNode(node, tokens): | |
212 # The parser module gives a syntax tree that discards comments, | |
213 # non-terminating newlines, and whitespace information. Use the tokens given | |
214 # by the tokenize module to annotate the syntax tree with the information | |
215 # needed to exactly reproduce the original source code. | |
216 node_type = node[0] | |
217 | |
218 if node_type >= token.NT_OFFSET: | |
219 # Symbol. | |
220 children = tuple(_SnippetizeNode(child, tokens) for child in node[1:]) | |
221 return Symbol(node_type, children) | |
222 else: | |
223 # Token. | |
224 grabbed_tokens = [] | |
225 while tokens and ( | |
226 tokens[0].type == tokenize.COMMENT or tokens[0].type == tokenize.NL): | |
227 grabbed_tokens.append(tokens.popleft()) | |
228 | |
229 # parser has 2 NEWLINEs right before the end. | |
230 # tokenize has 0 or 1 depending on if the file has one. | |
231 # Create extra nodes without consuming tokens to account for this. | |
232 if node_type == token.NEWLINE: | |
233 for tok in tokens: | |
234 if tok.type == token.ENDMARKER: | |
235 return TokenSnippet(node_type, grabbed_tokens) | |
236 if tok.type != token.DEDENT: | |
237 break | |
238 | |
239 assert tokens[0].type == token.OP or node_type == tokens[0].type | |
240 | |
241 grabbed_tokens.append(tokens.popleft()) | |
242 return TokenSnippet(node_type, grabbed_tokens) | |
OLD | NEW |