Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(173)

Unified Diff: tools/telemetry/catapult_base/refactor/snippet.py

Issue 1620023002: Revert of Remove catapult_base from telemetry. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@perf_cb_move
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/telemetry/catapult_base/refactor/snippet.py
diff --git a/tools/telemetry/catapult_base/refactor/snippet.py b/tools/telemetry/catapult_base/refactor/snippet.py
new file mode 100644
index 0000000000000000000000000000000000000000..ecb688af9004038866ca44a1335dd003c02356ab
--- /dev/null
+++ b/tools/telemetry/catapult_base/refactor/snippet.py
@@ -0,0 +1,242 @@
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import parser
+import symbol
+import sys
+import token
+import tokenize
+
+from catapult_base.refactor import offset_token
+
+
+class Snippet(object):
+ """A node in the Python parse tree.
+
+ The Python grammar is defined at:
+ https://docs.python.org/2/reference/grammar.html
+
+ There are two types of Snippets:
+ TokenSnippets are leaf nodes containing actual text.
+ Symbols are internal nodes representing higher-level groupings, and are
+ defined by the left-hand sides of the BNFs in the above link.
+ """
+ @property
+ def type(self):
+ raise NotImplementedError()
+
+ @property
+ def type_name(self):
+ raise NotImplementedError()
+
+ @property
+ def children(self):
+ """Return a list of this node's children."""
+ raise NotImplementedError()
+
+ @property
+ def tokens(self):
+ """Return a tuple of the tokens this Snippet contains."""
+ raise NotImplementedError()
+
+ def PrintTree(self, indent=0, stream=sys.stdout):
+ """Spew a pretty-printed parse tree. Mostly useful for debugging."""
+ raise NotImplementedError()
+
+ def __str__(self):
+ return offset_token.Untokenize(self.tokens)
+
+ def FindAll(self, snippet_type):
+ if isinstance(snippet_type, int):
+ if self.type == snippet_type:
+ yield self
+ else:
+ if isinstance(self, snippet_type):
+ yield self
+
+ for child in self.children:
+ for snippet in child.FindAll(snippet_type):
+ yield snippet
+
+ def FindChild(self, snippet_type, **kwargs):
+ for child in self.children:
+ if isinstance(snippet_type, int):
+ if child.type != snippet_type:
+ continue
+ else:
+ if not isinstance(child, snippet_type):
+ continue
+
+ for attribute, value in kwargs:
+ if getattr(child, attribute) != value:
+ break
+ else:
+ return child
+ raise ValueError('%s is not in %s. Children are: %s' %
+ (snippet_type, self, self.children))
+
+ def FindChildren(self, snippet_type):
+ if isinstance(snippet_type, int):
+ for child in self.children:
+ if child.type == snippet_type:
+ yield child
+ else:
+ for child in self.children:
+ if isinstance(child, snippet_type):
+ yield child
+
+
+class TokenSnippet(Snippet):
+ """A Snippet containing a list of tokens.
+
+ A list of tokens may start with any number of comments and non-terminating
+ newlines, but must end with a syntactically meaningful token.
+ """
+ def __init__(self, token_type, tokens):
+ # For operators and delimiters, the TokenSnippet's type may be more specific
+ # than the type of the constituent token. E.g. the TokenSnippet type is
+ # token.DOT, but the token type is token.OP. This is because the parser
+ # has more context than the tokenizer.
+ self._type = token_type
+ self._tokens = tokens
+ self._modified = False
+
+ @classmethod
+ def Create(cls, token_type, string, offset=(0, 0)):
+ return cls(token_type,
+ [offset_token.OffsetToken(token_type, string, offset)])
+
+ @property
+ def type(self):
+ return self._type
+
+ @property
+ def type_name(self):
+ return token.tok_name[self.type]
+
+ @property
+ def value(self):
+ return self._tokens[-1].string
+
+ @value.setter
+ def value(self, value):
+ self._tokens[-1].string = value
+ self._modified = True
+
+ @property
+ def children(self):
+ return []
+
+ @property
+ def tokens(self):
+ return tuple(self._tokens)
+
+ @property
+ def modified(self):
+ return self._modified
+
+ def PrintTree(self, indent=0, stream=sys.stdout):
+ stream.write(' ' * indent)
+ if not self.tokens:
+ print >> stream, self.type_name
+ return
+
+ print >> stream, '%-4s' % self.type_name, repr(self.tokens[0].string)
+ for tok in self.tokens[1:]:
+ stream.write(' ' * indent)
+ print >> stream, ' ' * max(len(self.type_name), 4), repr(tok.string)
+
+
+class Symbol(Snippet):
+ """A Snippet containing sub-Snippets.
+
+ The possible types and type_names are defined in Python's symbol module."""
+ def __init__(self, symbol_type, children):
+ self._type = symbol_type
+ self._children = children
+
+ @property
+ def type(self):
+ return self._type
+
+ @property
+ def type_name(self):
+ return symbol.sym_name[self.type]
+
+ @property
+ def children(self):
+ return self._children
+
+ @children.setter
+ def children(self, value): # pylint: disable=arguments-differ
+ self._children = value
+
+ @property
+ def tokens(self):
+ tokens = []
+ for child in self.children:
+ tokens += child.tokens
+ return tuple(tokens)
+
+ @property
+ def modified(self):
+ return any(child.modified for child in self.children)
+
+ def PrintTree(self, indent=0, stream=sys.stdout):
+ stream.write(' ' * indent)
+
+ # If there's only one child, collapse it onto the same line.
+ node = self
+ while len(node.children) == 1 and len(node.children[0].children) == 1:
+ print >> stream, node.type_name,
+ node = node.children[0]
+
+ print >> stream, node.type_name
+ for child in node.children:
+ child.PrintTree(indent+2, stream)
+
+
+def Snippetize(f):
+ """Return the syntax tree of the given file."""
+ f.seek(0)
+ syntax_tree = parser.st2list(parser.suite(f.read()))
+ tokens = offset_token.Tokenize(f)
+
+ snippet = _SnippetizeNode(syntax_tree, tokens)
+ assert not tokens
+ return snippet
+
+
+def _SnippetizeNode(node, tokens):
+ # The parser module gives a syntax tree that discards comments,
+ # non-terminating newlines, and whitespace information. Use the tokens given
+ # by the tokenize module to annotate the syntax tree with the information
+ # needed to exactly reproduce the original source code.
+ node_type = node[0]
+
+ if node_type >= token.NT_OFFSET:
+ # Symbol.
+ children = tuple(_SnippetizeNode(child, tokens) for child in node[1:])
+ return Symbol(node_type, children)
+ else:
+ # Token.
+ grabbed_tokens = []
+ while tokens and (
+ tokens[0].type == tokenize.COMMENT or tokens[0].type == tokenize.NL):
+ grabbed_tokens.append(tokens.popleft())
+
+ # parser has 2 NEWLINEs right before the end.
+ # tokenize has 0 or 1 depending on if the file has one.
+ # Create extra nodes without consuming tokens to account for this.
+ if node_type == token.NEWLINE:
+ for tok in tokens:
+ if tok.type == token.ENDMARKER:
+ return TokenSnippet(node_type, grabbed_tokens)
+ if tok.type != token.DEDENT:
+ break
+
+ assert tokens[0].type == token.OP or node_type == tokens[0].type
+
+ grabbed_tokens.append(tokens.popleft())
+ return TokenSnippet(node_type, grabbed_tokens)
« no previous file with comments | « tools/telemetry/catapult_base/refactor/offset_token.py ('k') | tools/telemetry/catapult_base/refactor_util/__init__.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698