OLD | NEW |
(Empty) | |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 import parser |
| 6 import symbol |
| 7 import sys |
| 8 import token |
| 9 import tokenize |
| 10 |
| 11 from catapult_base.refactor import offset_token |
| 12 |
| 13 |
| 14 class Snippet(object): |
| 15 """A node in the Python parse tree. |
| 16 |
| 17 The Python grammar is defined at: |
| 18 https://docs.python.org/2/reference/grammar.html |
| 19 |
| 20 There are two types of Snippets: |
| 21 TokenSnippets are leaf nodes containing actual text. |
| 22 Symbols are internal nodes representing higher-level groupings, and are |
| 23 defined by the left-hand sides of the BNFs in the above link. |
| 24 """ |
| 25 @property |
| 26 def type(self): |
| 27 raise NotImplementedError() |
| 28 |
| 29 @property |
| 30 def type_name(self): |
| 31 raise NotImplementedError() |
| 32 |
| 33 @property |
| 34 def children(self): |
| 35 """Return a list of this node's children.""" |
| 36 raise NotImplementedError() |
| 37 |
| 38 @property |
| 39 def tokens(self): |
| 40 """Return a tuple of the tokens this Snippet contains.""" |
| 41 raise NotImplementedError() |
| 42 |
| 43 def PrintTree(self, indent=0, stream=sys.stdout): |
| 44 """Spew a pretty-printed parse tree. Mostly useful for debugging.""" |
| 45 raise NotImplementedError() |
| 46 |
| 47 def __str__(self): |
| 48 return offset_token.Untokenize(self.tokens) |
| 49 |
| 50 def FindAll(self, snippet_type): |
| 51 if isinstance(snippet_type, int): |
| 52 if self.type == snippet_type: |
| 53 yield self |
| 54 else: |
| 55 if isinstance(self, snippet_type): |
| 56 yield self |
| 57 |
| 58 for child in self.children: |
| 59 for snippet in child.FindAll(snippet_type): |
| 60 yield snippet |
| 61 |
| 62 def FindChild(self, snippet_type, **kwargs): |
| 63 for child in self.children: |
| 64 if isinstance(snippet_type, int): |
| 65 if child.type != snippet_type: |
| 66 continue |
| 67 else: |
| 68 if not isinstance(child, snippet_type): |
| 69 continue |
| 70 |
| 71 for attribute, value in kwargs: |
| 72 if getattr(child, attribute) != value: |
| 73 break |
| 74 else: |
| 75 return child |
| 76 raise ValueError('%s is not in %s. Children are: %s' % |
| 77 (snippet_type, self, self.children)) |
| 78 |
| 79 def FindChildren(self, snippet_type): |
| 80 if isinstance(snippet_type, int): |
| 81 for child in self.children: |
| 82 if child.type == snippet_type: |
| 83 yield child |
| 84 else: |
| 85 for child in self.children: |
| 86 if isinstance(child, snippet_type): |
| 87 yield child |
| 88 |
| 89 |
| 90 class TokenSnippet(Snippet): |
| 91 """A Snippet containing a list of tokens. |
| 92 |
| 93 A list of tokens may start with any number of comments and non-terminating |
| 94 newlines, but must end with a syntactically meaningful token. |
| 95 """ |
| 96 def __init__(self, token_type, tokens): |
| 97 # For operators and delimiters, the TokenSnippet's type may be more specific |
| 98 # than the type of the constituent token. E.g. the TokenSnippet type is |
| 99 # token.DOT, but the token type is token.OP. This is because the parser |
| 100 # has more context than the tokenizer. |
| 101 self._type = token_type |
| 102 self._tokens = tokens |
| 103 self._modified = False |
| 104 |
| 105 @classmethod |
| 106 def Create(cls, token_type, string, offset=(0, 0)): |
| 107 return cls(token_type, |
| 108 [offset_token.OffsetToken(token_type, string, offset)]) |
| 109 |
| 110 @property |
| 111 def type(self): |
| 112 return self._type |
| 113 |
| 114 @property |
| 115 def type_name(self): |
| 116 return token.tok_name[self.type] |
| 117 |
| 118 @property |
| 119 def value(self): |
| 120 return self._tokens[-1].string |
| 121 |
| 122 @value.setter |
| 123 def value(self, value): |
| 124 self._tokens[-1].string = value |
| 125 self._modified = True |
| 126 |
| 127 @property |
| 128 def children(self): |
| 129 return [] |
| 130 |
| 131 @property |
| 132 def tokens(self): |
| 133 return tuple(self._tokens) |
| 134 |
| 135 @property |
| 136 def modified(self): |
| 137 return self._modified |
| 138 |
| 139 def PrintTree(self, indent=0, stream=sys.stdout): |
| 140 stream.write(' ' * indent) |
| 141 if not self.tokens: |
| 142 print >> stream, self.type_name |
| 143 return |
| 144 |
| 145 print >> stream, '%-4s' % self.type_name, repr(self.tokens[0].string) |
| 146 for tok in self.tokens[1:]: |
| 147 stream.write(' ' * indent) |
| 148 print >> stream, ' ' * max(len(self.type_name), 4), repr(tok.string) |
| 149 |
| 150 |
| 151 class Symbol(Snippet): |
| 152 """A Snippet containing sub-Snippets. |
| 153 |
| 154 The possible types and type_names are defined in Python's symbol module.""" |
| 155 def __init__(self, symbol_type, children): |
| 156 self._type = symbol_type |
| 157 self._children = children |
| 158 |
| 159 @property |
| 160 def type(self): |
| 161 return self._type |
| 162 |
| 163 @property |
| 164 def type_name(self): |
| 165 return symbol.sym_name[self.type] |
| 166 |
| 167 @property |
| 168 def children(self): |
| 169 return self._children |
| 170 |
| 171 @children.setter |
| 172 def children(self, value): # pylint: disable=arguments-differ |
| 173 self._children = value |
| 174 |
| 175 @property |
| 176 def tokens(self): |
| 177 tokens = [] |
| 178 for child in self.children: |
| 179 tokens += child.tokens |
| 180 return tuple(tokens) |
| 181 |
| 182 @property |
| 183 def modified(self): |
| 184 return any(child.modified for child in self.children) |
| 185 |
| 186 def PrintTree(self, indent=0, stream=sys.stdout): |
| 187 stream.write(' ' * indent) |
| 188 |
| 189 # If there's only one child, collapse it onto the same line. |
| 190 node = self |
| 191 while len(node.children) == 1 and len(node.children[0].children) == 1: |
| 192 print >> stream, node.type_name, |
| 193 node = node.children[0] |
| 194 |
| 195 print >> stream, node.type_name |
| 196 for child in node.children: |
| 197 child.PrintTree(indent+2, stream) |
| 198 |
| 199 |
| 200 def Snippetize(f): |
| 201 """Return the syntax tree of the given file.""" |
| 202 f.seek(0) |
| 203 syntax_tree = parser.st2list(parser.suite(f.read())) |
| 204 tokens = offset_token.Tokenize(f) |
| 205 |
| 206 snippet = _SnippetizeNode(syntax_tree, tokens) |
| 207 assert not tokens |
| 208 return snippet |
| 209 |
| 210 |
| 211 def _SnippetizeNode(node, tokens): |
| 212 # The parser module gives a syntax tree that discards comments, |
| 213 # non-terminating newlines, and whitespace information. Use the tokens given |
| 214 # by the tokenize module to annotate the syntax tree with the information |
| 215 # needed to exactly reproduce the original source code. |
| 216 node_type = node[0] |
| 217 |
| 218 if node_type >= token.NT_OFFSET: |
| 219 # Symbol. |
| 220 children = tuple(_SnippetizeNode(child, tokens) for child in node[1:]) |
| 221 return Symbol(node_type, children) |
| 222 else: |
| 223 # Token. |
| 224 grabbed_tokens = [] |
| 225 while tokens and ( |
| 226 tokens[0].type == tokenize.COMMENT or tokens[0].type == tokenize.NL): |
| 227 grabbed_tokens.append(tokens.popleft()) |
| 228 |
| 229 # parser has 2 NEWLINEs right before the end. |
| 230 # tokenize has 0 or 1 depending on if the file has one. |
| 231 # Create extra nodes without consuming tokens to account for this. |
| 232 if node_type == token.NEWLINE: |
| 233 for tok in tokens: |
| 234 if tok.type == token.ENDMARKER: |
| 235 return TokenSnippet(node_type, grabbed_tokens) |
| 236 if tok.type != token.DEDENT: |
| 237 break |
| 238 |
| 239 assert tokens[0].type == token.OP or node_type == tokens[0].type |
| 240 |
| 241 grabbed_tokens.append(tokens.popleft()) |
| 242 return TokenSnippet(node_type, grabbed_tokens) |
OLD | NEW |