| Index: tools/telemetry/catapult_base/refactor/offset_token.py
|
| diff --git a/tools/telemetry/catapult_base/refactor/offset_token.py b/tools/telemetry/catapult_base/refactor/offset_token.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..2578f854864e1e7d32ab117ac731167a55b634b0
|
| --- /dev/null
|
| +++ b/tools/telemetry/catapult_base/refactor/offset_token.py
|
| @@ -0,0 +1,113 @@
|
| +# Copyright 2015 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +import collections
|
| +import itertools
|
| +import token
|
| +import tokenize
|
| +
|
| +
|
| +def _Pairwise(iterable):
|
| + """s -> (None, s0), (s0, s1), (s1, s2), (s2, s3), ..."""
|
| + a, b = itertools.tee(iterable)
|
| + a = itertools.chain((None,), a)
|
| + return itertools.izip(a, b)
|
| +
|
| +
|
| +class OffsetToken(object):
|
| + """A Python token with a relative position.
|
| +
|
| + A token is represented by a type defined in Python's token module, a string
|
| + representing the content, and an offset. Using relative positions makes it
|
| + easy to insert and remove tokens.
|
| + """
|
| + def __init__(self, token_type, string, offset):
|
| + self._type = token_type
|
| + self._string = string
|
| + self._offset = offset
|
| +
|
| + @property
|
| + def type(self):
|
| + return self._type
|
| +
|
| + @property
|
| + def type_name(self):
|
| + return token.tok_name[self._type]
|
| +
|
| + @property
|
| + def string(self):
|
| + return self._string
|
| +
|
| + @string.setter
|
| + def string(self, value):
|
| + self._string = value
|
| +
|
| + @property
|
| + def offset(self):
|
| + return self._offset
|
| +
|
| + def __str__(self):
|
| + return str((self.type_name, self.string, self.offset))
|
| +
|
| +
|
| +def Tokenize(f):
|
| + """Read tokens from a file-like object.
|
| +
|
| + Args:
|
| + f: Any object that has a readline method.
|
| +
|
| + Returns:
|
| + A collections.deque containing OffsetTokens. Deques are cheaper and easier
|
| + to manipulate sequentially than lists.
|
| + """
|
| + f.seek(0)
|
| + tokenize_tokens = tokenize.generate_tokens(f.readline)
|
| +
|
| + offset_tokens = collections.deque()
|
| + for prev_token, next_token in _Pairwise(tokenize_tokens):
|
| + token_type, string, (srow, scol), _, _ = next_token
|
| + if not prev_token:
|
| + offset_tokens.append(OffsetToken(token_type, string, (0, 0)))
|
| + else:
|
| + erow, ecol = prev_token[3]
|
| + if erow == srow:
|
| + offset_tokens.append(OffsetToken(token_type, string, (0, scol-ecol)))
|
| + else:
|
| + offset_tokens.append(OffsetToken(token_type, string, (srow-erow, scol)))
|
| +
|
| + return offset_tokens
|
| +
|
| +
|
| +def Untokenize(offset_tokens):
|
| + """Return the string representation of an iterable of OffsetTokens."""
|
| + # Make a copy. Don't modify the original.
|
| + offset_tokens = collections.deque(offset_tokens)
|
| +
|
| + # Strip leading NL tokens.
|
| + while offset_tokens[0].type == tokenize.NL:
|
| + offset_tokens.popleft()
|
| +
|
| + # Strip leading vertical whitespace.
|
| + first_token = offset_tokens.popleft()
|
| + # Take care not to modify the existing token. Create a new one in its place.
|
| + first_token = OffsetToken(first_token.type, first_token.string,
|
| + (0, first_token.offset[1]))
|
| + offset_tokens.appendleft(first_token)
|
| +
|
| + # Convert OffsetTokens to tokenize tokens.
|
| + tokenize_tokens = []
|
| + row = 1
|
| + col = 0
|
| + for t in offset_tokens:
|
| + offset_row, offset_col = t.offset
|
| + if offset_row == 0:
|
| + col += offset_col
|
| + else:
|
| + row += offset_row
|
| + col = offset_col
|
| + tokenize_tokens.append((t.type, t.string, (row, col), (row, col), None))
|
| +
|
| + # tokenize can't handle whitespace before line continuations.
|
| + # So add a space.
|
| + return tokenize.untokenize(tokenize_tokens).replace('\\\n', ' \\\n')
|
|
|