| Index: tools/telemetry/catapult_base/refactor/snippet.py
|
| diff --git a/tools/telemetry/catapult_base/refactor/snippet.py b/tools/telemetry/catapult_base/refactor/snippet.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..ecb688af9004038866ca44a1335dd003c02356ab
|
| --- /dev/null
|
| +++ b/tools/telemetry/catapult_base/refactor/snippet.py
|
| @@ -0,0 +1,242 @@
|
| +# Copyright 2015 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +import parser
|
| +import symbol
|
| +import sys
|
| +import token
|
| +import tokenize
|
| +
|
| +from catapult_base.refactor import offset_token
|
| +
|
| +
|
| +class Snippet(object):
|
| + """A node in the Python parse tree.
|
| +
|
| + The Python grammar is defined at:
|
| + https://docs.python.org/2/reference/grammar.html
|
| +
|
| + There are two types of Snippets:
|
| + TokenSnippets are leaf nodes containing actual text.
|
| + Symbols are internal nodes representing higher-level groupings, and are
|
| + defined by the left-hand sides of the BNFs in the above link.
|
| + """
|
| + @property
|
| + def type(self):
|
| + raise NotImplementedError()
|
| +
|
| + @property
|
| + def type_name(self):
|
| + raise NotImplementedError()
|
| +
|
| + @property
|
| + def children(self):
|
| + """Return a list of this node's children."""
|
| + raise NotImplementedError()
|
| +
|
| + @property
|
| + def tokens(self):
|
| + """Return a tuple of the tokens this Snippet contains."""
|
| + raise NotImplementedError()
|
| +
|
| + def PrintTree(self, indent=0, stream=sys.stdout):
|
| + """Spew a pretty-printed parse tree. Mostly useful for debugging."""
|
| + raise NotImplementedError()
|
| +
|
| + def __str__(self):
|
| + return offset_token.Untokenize(self.tokens)
|
| +
|
| + def FindAll(self, snippet_type):
|
| + if isinstance(snippet_type, int):
|
| + if self.type == snippet_type:
|
| + yield self
|
| + else:
|
| + if isinstance(self, snippet_type):
|
| + yield self
|
| +
|
| + for child in self.children:
|
| + for snippet in child.FindAll(snippet_type):
|
| + yield snippet
|
| +
|
| + def FindChild(self, snippet_type, **kwargs):
|
| + for child in self.children:
|
| + if isinstance(snippet_type, int):
|
| + if child.type != snippet_type:
|
| + continue
|
| + else:
|
| + if not isinstance(child, snippet_type):
|
| + continue
|
| +
|
| + for attribute, value in kwargs:
|
| + if getattr(child, attribute) != value:
|
| + break
|
| + else:
|
| + return child
|
| + raise ValueError('%s is not in %s. Children are: %s' %
|
| + (snippet_type, self, self.children))
|
| +
|
| + def FindChildren(self, snippet_type):
|
| + if isinstance(snippet_type, int):
|
| + for child in self.children:
|
| + if child.type == snippet_type:
|
| + yield child
|
| + else:
|
| + for child in self.children:
|
| + if isinstance(child, snippet_type):
|
| + yield child
|
| +
|
| +
|
| +class TokenSnippet(Snippet):
|
| + """A Snippet containing a list of tokens.
|
| +
|
| + A list of tokens may start with any number of comments and non-terminating
|
| + newlines, but must end with a syntactically meaningful token.
|
| + """
|
| + def __init__(self, token_type, tokens):
|
| + # For operators and delimiters, the TokenSnippet's type may be more specific
|
| + # than the type of the constituent token. E.g. the TokenSnippet type is
|
| + # token.DOT, but the token type is token.OP. This is because the parser
|
| + # has more context than the tokenizer.
|
| + self._type = token_type
|
| + self._tokens = tokens
|
| + self._modified = False
|
| +
|
| + @classmethod
|
| + def Create(cls, token_type, string, offset=(0, 0)):
|
| + return cls(token_type,
|
| + [offset_token.OffsetToken(token_type, string, offset)])
|
| +
|
| + @property
|
| + def type(self):
|
| + return self._type
|
| +
|
| + @property
|
| + def type_name(self):
|
| + return token.tok_name[self.type]
|
| +
|
| + @property
|
| + def value(self):
|
| + return self._tokens[-1].string
|
| +
|
| + @value.setter
|
| + def value(self, value):
|
| + self._tokens[-1].string = value
|
| + self._modified = True
|
| +
|
| + @property
|
| + def children(self):
|
| + return []
|
| +
|
| + @property
|
| + def tokens(self):
|
| + return tuple(self._tokens)
|
| +
|
| + @property
|
| + def modified(self):
|
| + return self._modified
|
| +
|
| + def PrintTree(self, indent=0, stream=sys.stdout):
|
| + stream.write(' ' * indent)
|
| + if not self.tokens:
|
| + print >> stream, self.type_name
|
| + return
|
| +
|
| + print >> stream, '%-4s' % self.type_name, repr(self.tokens[0].string)
|
| + for tok in self.tokens[1:]:
|
| + stream.write(' ' * indent)
|
| + print >> stream, ' ' * max(len(self.type_name), 4), repr(tok.string)
|
| +
|
| +
|
| +class Symbol(Snippet):
|
| + """A Snippet containing sub-Snippets.
|
| +
|
| + The possible types and type_names are defined in Python's symbol module."""
|
| + def __init__(self, symbol_type, children):
|
| + self._type = symbol_type
|
| + self._children = children
|
| +
|
| + @property
|
| + def type(self):
|
| + return self._type
|
| +
|
| + @property
|
| + def type_name(self):
|
| + return symbol.sym_name[self.type]
|
| +
|
| + @property
|
| + def children(self):
|
| + return self._children
|
| +
|
| + @children.setter
|
| + def children(self, value): # pylint: disable=arguments-differ
|
| + self._children = value
|
| +
|
| + @property
|
| + def tokens(self):
|
| + tokens = []
|
| + for child in self.children:
|
| + tokens += child.tokens
|
| + return tuple(tokens)
|
| +
|
| + @property
|
| + def modified(self):
|
| + return any(child.modified for child in self.children)
|
| +
|
| + def PrintTree(self, indent=0, stream=sys.stdout):
|
| + stream.write(' ' * indent)
|
| +
|
| + # If there's only one child, collapse it onto the same line.
|
| + node = self
|
| + while len(node.children) == 1 and len(node.children[0].children) == 1:
|
| + print >> stream, node.type_name,
|
| + node = node.children[0]
|
| +
|
| + print >> stream, node.type_name
|
| + for child in node.children:
|
| + child.PrintTree(indent+2, stream)
|
| +
|
| +
|
| +def Snippetize(f):
|
| + """Return the syntax tree of the given file."""
|
| + f.seek(0)
|
| + syntax_tree = parser.st2list(parser.suite(f.read()))
|
| + tokens = offset_token.Tokenize(f)
|
| +
|
| + snippet = _SnippetizeNode(syntax_tree, tokens)
|
| + assert not tokens
|
| + return snippet
|
| +
|
| +
|
| +def _SnippetizeNode(node, tokens):
|
| + # The parser module gives a syntax tree that discards comments,
|
| + # non-terminating newlines, and whitespace information. Use the tokens given
|
| + # by the tokenize module to annotate the syntax tree with the information
|
| + # needed to exactly reproduce the original source code.
|
| + node_type = node[0]
|
| +
|
| + if node_type >= token.NT_OFFSET:
|
| + # Symbol.
|
| + children = tuple(_SnippetizeNode(child, tokens) for child in node[1:])
|
| + return Symbol(node_type, children)
|
| + else:
|
| + # Token.
|
| + grabbed_tokens = []
|
| + while tokens and (
|
| + tokens[0].type == tokenize.COMMENT or tokens[0].type == tokenize.NL):
|
| + grabbed_tokens.append(tokens.popleft())
|
| +
|
| + # parser has 2 NEWLINEs right before the end.
|
| + # tokenize has 0 or 1 depending on if the file has one.
|
| + # Create extra nodes without consuming tokens to account for this.
|
| + if node_type == token.NEWLINE:
|
| + for tok in tokens:
|
| + if tok.type == token.ENDMARKER:
|
| + return TokenSnippet(node_type, grabbed_tokens)
|
| + if tok.type != token.DEDENT:
|
| + break
|
| +
|
| + assert tokens[0].type == token.OP or node_type == tokens[0].type
|
| +
|
| + grabbed_tokens.append(tokens.popleft())
|
| + return TokenSnippet(node_type, grabbed_tokens)
|
|
|