Index: tools/telemetry/catapult_base/refactor/offset_token.py |
diff --git a/tools/telemetry/catapult_base/refactor/offset_token.py b/tools/telemetry/catapult_base/refactor/offset_token.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..2578f854864e1e7d32ab117ac731167a55b634b0 |
--- /dev/null |
+++ b/tools/telemetry/catapult_base/refactor/offset_token.py |
@@ -0,0 +1,113 @@ |
+# Copyright 2015 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+import collections |
+import itertools |
+import token |
+import tokenize |
+ |
+ |
+def _Pairwise(iterable): |
+ """s -> (None, s0), (s0, s1), (s1, s2), (s2, s3), ...""" |
+ a, b = itertools.tee(iterable) |
+ a = itertools.chain((None,), a) |
+ return itertools.izip(a, b) |
+ |
+ |
+class OffsetToken(object): |
+ """A Python token with a relative position. |
+ |
+ A token is represented by a type defined in Python's token module, a string |
+ representing the content, and an offset. Using relative positions makes it |
+ easy to insert and remove tokens. |
+ """ |
+ def __init__(self, token_type, string, offset): |
+ self._type = token_type |
+ self._string = string |
+ self._offset = offset |
+ |
+ @property |
+ def type(self): |
+ return self._type |
+ |
+ @property |
+ def type_name(self): |
+ return token.tok_name[self._type] |
+ |
+ @property |
+ def string(self): |
+ return self._string |
+ |
+ @string.setter |
+ def string(self, value): |
+ self._string = value |
+ |
+ @property |
+ def offset(self): |
+ return self._offset |
+ |
+ def __str__(self): |
+ return str((self.type_name, self.string, self.offset)) |
+ |
+ |
+def Tokenize(f): |
+ """Read tokens from a file-like object. |
+ |
+ Args: |
+ f: Any object that has a readline method. |
+ |
+ Returns: |
+ A collections.deque containing OffsetTokens. Deques are cheaper and easier |
+ to manipulate sequentially than lists. |
+ """ |
+ f.seek(0) |
+ tokenize_tokens = tokenize.generate_tokens(f.readline) |
+ |
+ offset_tokens = collections.deque() |
+ for prev_token, next_token in _Pairwise(tokenize_tokens): |
+ token_type, string, (srow, scol), _, _ = next_token |
+ if not prev_token: |
+ offset_tokens.append(OffsetToken(token_type, string, (0, 0))) |
+ else: |
+ erow, ecol = prev_token[3] |
+ if erow == srow: |
+ offset_tokens.append(OffsetToken(token_type, string, (0, scol-ecol))) |
+ else: |
+ offset_tokens.append(OffsetToken(token_type, string, (srow-erow, scol))) |
+ |
+ return offset_tokens |
+ |
+ |
+def Untokenize(offset_tokens): |
+ """Return the string representation of an iterable of OffsetTokens.""" |
+ # Make a copy. Don't modify the original. |
+ offset_tokens = collections.deque(offset_tokens) |
+ |
+ # Strip leading NL tokens. |
+ while offset_tokens[0].type == tokenize.NL: |
+ offset_tokens.popleft() |
+ |
+ # Strip leading vertical whitespace. |
+ first_token = offset_tokens.popleft() |
+ # Take care not to modify the existing token. Create a new one in its place. |
+ first_token = OffsetToken(first_token.type, first_token.string, |
+ (0, first_token.offset[1])) |
+ offset_tokens.appendleft(first_token) |
+ |
+ # Convert OffsetTokens to tokenize tokens. |
+ tokenize_tokens = [] |
+ row = 1 |
+ col = 0 |
+ for t in offset_tokens: |
+ offset_row, offset_col = t.offset |
+ if offset_row == 0: |
+ col += offset_col |
+ else: |
+ row += offset_row |
+ col = offset_col |
+ tokenize_tokens.append((t.type, t.string, (row, col), (row, col), None)) |
+ |
+ # tokenize can't handle whitespace before line continuations. |
+ # So add a space. |
+ return tokenize.untokenize(tokenize_tokens).replace('\\\n', ' \\\n') |