OLD | NEW |
(Empty) | |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 import collections |
| 6 import itertools |
| 7 import token |
| 8 import tokenize |
| 9 |
| 10 |
| 11 def _Pairwise(iterable): |
| 12 """s -> (None, s0), (s0, s1), (s1, s2), (s2, s3), ...""" |
| 13 a, b = itertools.tee(iterable) |
| 14 a = itertools.chain((None,), a) |
| 15 return itertools.izip(a, b) |
| 16 |
| 17 |
| 18 class OffsetToken(object): |
| 19 """A Python token with a relative position. |
| 20 |
| 21 A token is represented by a type defined in Python's token module, a string |
| 22 representing the content, and an offset. Using relative positions makes it |
| 23 easy to insert and remove tokens. |
| 24 """ |
| 25 def __init__(self, token_type, string, offset): |
| 26 self._type = token_type |
| 27 self._string = string |
| 28 self._offset = offset |
| 29 |
| 30 @property |
| 31 def type(self): |
| 32 return self._type |
| 33 |
| 34 @property |
| 35 def type_name(self): |
| 36 return token.tok_name[self._type] |
| 37 |
| 38 @property |
| 39 def string(self): |
| 40 return self._string |
| 41 |
| 42 @string.setter |
| 43 def string(self, value): |
| 44 self._string = value |
| 45 |
| 46 @property |
| 47 def offset(self): |
| 48 return self._offset |
| 49 |
| 50 def __str__(self): |
| 51 return str((self.type_name, self.string, self.offset)) |
| 52 |
| 53 |
| 54 def Tokenize(f): |
| 55 """Read tokens from a file-like object. |
| 56 |
| 57 Args: |
| 58 f: Any object that has a readline method. |
| 59 |
| 60 Returns: |
| 61 A collections.deque containing OffsetTokens. Deques are cheaper and easier |
| 62 to manipulate sequentially than lists. |
| 63 """ |
| 64 f.seek(0) |
| 65 tokenize_tokens = tokenize.generate_tokens(f.readline) |
| 66 |
| 67 offset_tokens = collections.deque() |
| 68 for prev_token, next_token in _Pairwise(tokenize_tokens): |
| 69 token_type, string, (srow, scol), _, _ = next_token |
| 70 if not prev_token: |
| 71 offset_tokens.append(OffsetToken(token_type, string, (0, 0))) |
| 72 else: |
| 73 erow, ecol = prev_token[3] |
| 74 if erow == srow: |
| 75 offset_tokens.append(OffsetToken(token_type, string, (0, scol-ecol))) |
| 76 else: |
| 77 offset_tokens.append(OffsetToken(token_type, string, (srow-erow, scol))) |
| 78 |
| 79 return offset_tokens |
| 80 |
| 81 |
| 82 def Untokenize(offset_tokens): |
| 83 """Return the string representation of an iterable of OffsetTokens.""" |
| 84 # Make a copy. Don't modify the original. |
| 85 offset_tokens = collections.deque(offset_tokens) |
| 86 |
| 87 # Strip leading NL tokens. |
| 88 while offset_tokens[0].type == tokenize.NL: |
| 89 offset_tokens.popleft() |
| 90 |
| 91 # Strip leading vertical whitespace. |
| 92 first_token = offset_tokens.popleft() |
| 93 # Take care not to modify the existing token. Create a new one in its place. |
| 94 first_token = OffsetToken(first_token.type, first_token.string, |
| 95 (0, first_token.offset[1])) |
| 96 offset_tokens.appendleft(first_token) |
| 97 |
| 98 # Convert OffsetTokens to tokenize tokens. |
| 99 tokenize_tokens = [] |
| 100 row = 1 |
| 101 col = 0 |
| 102 for t in offset_tokens: |
| 103 offset_row, offset_col = t.offset |
| 104 if offset_row == 0: |
| 105 col += offset_col |
| 106 else: |
| 107 row += offset_row |
| 108 col = offset_col |
| 109 tokenize_tokens.append((t.type, t.string, (row, col), (row, col), None)) |
| 110 |
| 111 # tokenize can't handle whitespace before line continuations. |
| 112 # So add a space. |
| 113 return tokenize.untokenize(tokenize_tokens).replace('\\\n', ' \\\n') |
OLD | NEW |