Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(83)

Side by Side Diff: tools/telemetry/catapult_base/refactor/snippet.py

Issue 1620023002: Revert of Remove catapult_base from telemetry. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@perf_cb_move
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import parser
6 import symbol
7 import sys
8 import token
9 import tokenize
10
11 from catapult_base.refactor import offset_token
12
13
14 class Snippet(object):
15 """A node in the Python parse tree.
16
17 The Python grammar is defined at:
18 https://docs.python.org/2/reference/grammar.html
19
20 There are two types of Snippets:
21 TokenSnippets are leaf nodes containing actual text.
22 Symbols are internal nodes representing higher-level groupings, and are
23 defined by the left-hand sides of the BNFs in the above link.
24 """
25 @property
26 def type(self):
27 raise NotImplementedError()
28
29 @property
30 def type_name(self):
31 raise NotImplementedError()
32
33 @property
34 def children(self):
35 """Return a list of this node's children."""
36 raise NotImplementedError()
37
38 @property
39 def tokens(self):
40 """Return a tuple of the tokens this Snippet contains."""
41 raise NotImplementedError()
42
43 def PrintTree(self, indent=0, stream=sys.stdout):
44 """Spew a pretty-printed parse tree. Mostly useful for debugging."""
45 raise NotImplementedError()
46
47 def __str__(self):
48 return offset_token.Untokenize(self.tokens)
49
50 def FindAll(self, snippet_type):
51 if isinstance(snippet_type, int):
52 if self.type == snippet_type:
53 yield self
54 else:
55 if isinstance(self, snippet_type):
56 yield self
57
58 for child in self.children:
59 for snippet in child.FindAll(snippet_type):
60 yield snippet
61
62 def FindChild(self, snippet_type, **kwargs):
63 for child in self.children:
64 if isinstance(snippet_type, int):
65 if child.type != snippet_type:
66 continue
67 else:
68 if not isinstance(child, snippet_type):
69 continue
70
71 for attribute, value in kwargs:
72 if getattr(child, attribute) != value:
73 break
74 else:
75 return child
76 raise ValueError('%s is not in %s. Children are: %s' %
77 (snippet_type, self, self.children))
78
79 def FindChildren(self, snippet_type):
80 if isinstance(snippet_type, int):
81 for child in self.children:
82 if child.type == snippet_type:
83 yield child
84 else:
85 for child in self.children:
86 if isinstance(child, snippet_type):
87 yield child
88
89
90 class TokenSnippet(Snippet):
91 """A Snippet containing a list of tokens.
92
93 A list of tokens may start with any number of comments and non-terminating
94 newlines, but must end with a syntactically meaningful token.
95 """
96 def __init__(self, token_type, tokens):
97 # For operators and delimiters, the TokenSnippet's type may be more specific
98 # than the type of the constituent token. E.g. the TokenSnippet type is
99 # token.DOT, but the token type is token.OP. This is because the parser
100 # has more context than the tokenizer.
101 self._type = token_type
102 self._tokens = tokens
103 self._modified = False
104
105 @classmethod
106 def Create(cls, token_type, string, offset=(0, 0)):
107 return cls(token_type,
108 [offset_token.OffsetToken(token_type, string, offset)])
109
110 @property
111 def type(self):
112 return self._type
113
114 @property
115 def type_name(self):
116 return token.tok_name[self.type]
117
118 @property
119 def value(self):
120 return self._tokens[-1].string
121
122 @value.setter
123 def value(self, value):
124 self._tokens[-1].string = value
125 self._modified = True
126
127 @property
128 def children(self):
129 return []
130
131 @property
132 def tokens(self):
133 return tuple(self._tokens)
134
135 @property
136 def modified(self):
137 return self._modified
138
139 def PrintTree(self, indent=0, stream=sys.stdout):
140 stream.write(' ' * indent)
141 if not self.tokens:
142 print >> stream, self.type_name
143 return
144
145 print >> stream, '%-4s' % self.type_name, repr(self.tokens[0].string)
146 for tok in self.tokens[1:]:
147 stream.write(' ' * indent)
148 print >> stream, ' ' * max(len(self.type_name), 4), repr(tok.string)
149
150
151 class Symbol(Snippet):
152 """A Snippet containing sub-Snippets.
153
154 The possible types and type_names are defined in Python's symbol module."""
155 def __init__(self, symbol_type, children):
156 self._type = symbol_type
157 self._children = children
158
159 @property
160 def type(self):
161 return self._type
162
163 @property
164 def type_name(self):
165 return symbol.sym_name[self.type]
166
167 @property
168 def children(self):
169 return self._children
170
171 @children.setter
172 def children(self, value): # pylint: disable=arguments-differ
173 self._children = value
174
175 @property
176 def tokens(self):
177 tokens = []
178 for child in self.children:
179 tokens += child.tokens
180 return tuple(tokens)
181
182 @property
183 def modified(self):
184 return any(child.modified for child in self.children)
185
186 def PrintTree(self, indent=0, stream=sys.stdout):
187 stream.write(' ' * indent)
188
189 # If there's only one child, collapse it onto the same line.
190 node = self
191 while len(node.children) == 1 and len(node.children[0].children) == 1:
192 print >> stream, node.type_name,
193 node = node.children[0]
194
195 print >> stream, node.type_name
196 for child in node.children:
197 child.PrintTree(indent+2, stream)
198
199
200 def Snippetize(f):
201 """Return the syntax tree of the given file."""
202 f.seek(0)
203 syntax_tree = parser.st2list(parser.suite(f.read()))
204 tokens = offset_token.Tokenize(f)
205
206 snippet = _SnippetizeNode(syntax_tree, tokens)
207 assert not tokens
208 return snippet
209
210
211 def _SnippetizeNode(node, tokens):
212 # The parser module gives a syntax tree that discards comments,
213 # non-terminating newlines, and whitespace information. Use the tokens given
214 # by the tokenize module to annotate the syntax tree with the information
215 # needed to exactly reproduce the original source code.
216 node_type = node[0]
217
218 if node_type >= token.NT_OFFSET:
219 # Symbol.
220 children = tuple(_SnippetizeNode(child, tokens) for child in node[1:])
221 return Symbol(node_type, children)
222 else:
223 # Token.
224 grabbed_tokens = []
225 while tokens and (
226 tokens[0].type == tokenize.COMMENT or tokens[0].type == tokenize.NL):
227 grabbed_tokens.append(tokens.popleft())
228
229 # parser has 2 NEWLINEs right before the end.
230 # tokenize has 0 or 1 depending on if the file has one.
231 # Create extra nodes without consuming tokens to account for this.
232 if node_type == token.NEWLINE:
233 for tok in tokens:
234 if tok.type == token.ENDMARKER:
235 return TokenSnippet(node_type, grabbed_tokens)
236 if tok.type != token.DEDENT:
237 break
238
239 assert tokens[0].type == token.OP or node_type == tokens[0].type
240
241 grabbed_tokens.append(tokens.popleft())
242 return TokenSnippet(node_type, grabbed_tokens)
OLDNEW
« no previous file with comments | « tools/telemetry/catapult_base/refactor/offset_token.py ('k') | tools/telemetry/catapult_base/refactor_util/__init__.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698