tools/telemetry/catapult_base/refactor/snippet.py - Issue 1620023002: Revert of Remove catapult_base from telemetry.

Unified Diff: tools/telemetry/catapult_base/refactor/snippet.py

Issue 1620023002: Revert of Remove catapult_base from telemetry. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@perf_cb_move

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « tools/telemetry/catapult_base/refactor/offset_token.py ('k') | tools/telemetry/catapult_base/refactor_util/__init__.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: tools/telemetry/catapult_base/refactor/snippet.py

diff --git a/tools/telemetry/catapult_base/refactor/snippet.py b/tools/telemetry/catapult_base/refactor/snippet.py

new file mode 100644

index 0000000000000000000000000000000000000000..ecb688af9004038866ca44a1335dd003c02356ab

--- /dev/null

+++ b/tools/telemetry/catapult_base/refactor/snippet.py

@@ -0,0 +1,242 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+import parser

+import symbol

+import sys

+import token

+import tokenize

+from catapult_base.refactor import offset_token

+class Snippet(object):

+ """A node in the Python parse tree.

+ The Python grammar is defined at:

+ https://docs.python.org/2/reference/grammar.html

+ There are two types of Snippets:

+ TokenSnippets are leaf nodes containing actual text.

+ Symbols are internal nodes representing higher-level groupings, and are

+ defined by the left-hand sides of the BNFs in the above link.

+ """

+ @property

+ def type(self):

+ raise NotImplementedError()

+ @property

+ def type_name(self):

+ raise NotImplementedError()

+ @property

+ def children(self):

+ """Return a list of this node's children."""

+ raise NotImplementedError()

+ @property

+ def tokens(self):

+ """Return a tuple of the tokens this Snippet contains."""

+ raise NotImplementedError()

+ def PrintTree(self, indent=0, stream=sys.stdout):

+ """Spew a pretty-printed parse tree. Mostly useful for debugging."""

+ raise NotImplementedError()

+ def __str__(self):

+ return offset_token.Untokenize(self.tokens)

+ def FindAll(self, snippet_type):

+ if isinstance(snippet_type, int):

+ if self.type == snippet_type:

+ yield self

+ else:

+ if isinstance(self, snippet_type):

+ yield self

+ for child in self.children:

+ for snippet in child.FindAll(snippet_type):

+ yield snippet

+ def FindChild(self, snippet_type, **kwargs):

+ for child in self.children:

+ if isinstance(snippet_type, int):

+ if child.type != snippet_type:

+ continue

+ else:

+ if not isinstance(child, snippet_type):

+ continue

+ for attribute, value in kwargs:

+ if getattr(child, attribute) != value:

+ break

+ else:

+ return child

+ raise ValueError('%s is not in %s. Children are: %s' %

+ (snippet_type, self, self.children))

+ def FindChildren(self, snippet_type):

+ if isinstance(snippet_type, int):

+ for child in self.children:

+ if child.type == snippet_type:

+ yield child

+ else:

+ for child in self.children:

+ if isinstance(child, snippet_type):

+ yield child

+class TokenSnippet(Snippet):

+ """A Snippet containing a list of tokens.

+ A list of tokens may start with any number of comments and non-terminating

+ newlines, but must end with a syntactically meaningful token.

+ """

+ def __init__(self, token_type, tokens):

+ # For operators and delimiters, the TokenSnippet's type may be more specific

+ # than the type of the constituent token. E.g. the TokenSnippet type is

+ # token.DOT, but the token type is token.OP. This is because the parser

+ # has more context than the tokenizer.

+ self._type = token_type

+ self._tokens = tokens

+ self._modified = False

+ @classmethod

+ def Create(cls, token_type, string, offset=(0, 0)):

+ return cls(token_type,

+ [offset_token.OffsetToken(token_type, string, offset)])

+ @property

+ def type(self):

+ return self._type

+ @property

+ def type_name(self):

+ return token.tok_name[self.type]

+ @property

+ def value(self):

+ return self._tokens[-1].string

+ @value.setter

+ def value(self, value):

+ self._tokens[-1].string = value

+ self._modified = True

+ @property

+ def children(self):

+ return []

+ @property

+ def tokens(self):

+ return tuple(self._tokens)

+ @property

+ def modified(self):

+ return self._modified

+ def PrintTree(self, indent=0, stream=sys.stdout):

+ stream.write(' ' * indent)

+ if not self.tokens:

+ print >> stream, self.type_name

+ return

+ print >> stream, '%-4s' % self.type_name, repr(self.tokens[0].string)

+ for tok in self.tokens[1:]:

+ stream.write(' ' * indent)

+ print >> stream, ' ' * max(len(self.type_name), 4), repr(tok.string)

+class Symbol(Snippet):

+ """A Snippet containing sub-Snippets.

+ The possible types and type_names are defined in Python's symbol module."""

+ def __init__(self, symbol_type, children):

+ self._type = symbol_type

+ self._children = children

+ @property

+ def type(self):

+ return self._type

+ @property

+ def type_name(self):

+ return symbol.sym_name[self.type]

+ @property

+ def children(self):

+ return self._children

+ @children.setter

+ def children(self, value): # pylint: disable=arguments-differ

+ self._children = value

+ @property

+ def tokens(self):

+ tokens = []

+ for child in self.children:

+ tokens += child.tokens

+ return tuple(tokens)

+ @property

+ def modified(self):

+ return any(child.modified for child in self.children)

+ def PrintTree(self, indent=0, stream=sys.stdout):

+ stream.write(' ' * indent)

+ # If there's only one child, collapse it onto the same line.

+ node = self

+ while len(node.children) == 1 and len(node.children[0].children) == 1:

+ print >> stream, node.type_name,

+ node = node.children[0]

+ print >> stream, node.type_name

+ for child in node.children:

+ child.PrintTree(indent+2, stream)

+def Snippetize(f):

+ """Return the syntax tree of the given file."""

+ f.seek(0)

+ syntax_tree = parser.st2list(parser.suite(f.read()))

+ tokens = offset_token.Tokenize(f)

+ snippet = _SnippetizeNode(syntax_tree, tokens)

+ assert not tokens

+ return snippet

+def _SnippetizeNode(node, tokens):

+ # The parser module gives a syntax tree that discards comments,

+ # non-terminating newlines, and whitespace information. Use the tokens given

+ # by the tokenize module to annotate the syntax tree with the information

+ # needed to exactly reproduce the original source code.

+ node_type = node[0]

+ if node_type >= token.NT_OFFSET:

+ # Symbol.

+ children = tuple(_SnippetizeNode(child, tokens) for child in node[1:])

+ return Symbol(node_type, children)

+ else:

+ # Token.

+ grabbed_tokens = []

+ while tokens and (

+ tokens[0].type == tokenize.COMMENT or tokens[0].type == tokenize.NL):

+ grabbed_tokens.append(tokens.popleft())

+ # parser has 2 NEWLINEs right before the end.

+ # tokenize has 0 or 1 depending on if the file has one.

+ # Create extra nodes without consuming tokens to account for this.

+ if node_type == token.NEWLINE:

+ for tok in tokens:

+ if tok.type == token.ENDMARKER:

+ return TokenSnippet(node_type, grabbed_tokens)

+ if tok.type != token.DEDENT:

+ break

+ assert tokens[0].type == token.OP or node_type == tokens[0].type

+ grabbed_tokens.append(tokens.popleft())

+ return TokenSnippet(node_type, grabbed_tokens)

« no previous file with comments | « tools/telemetry/catapult_base/refactor/offset_token.py ('k') | tools/telemetry/catapult_base/refactor_util/__init__.py » ('j') | no next file with comments »