| OLD | NEW |
| (Empty) |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 import collections | |
| 6 import itertools | |
| 7 import token | |
| 8 import tokenize | |
| 9 | |
| 10 | |
| 11 def _Pairwise(iterable): | |
| 12 """s -> (None, s0), (s0, s1), (s1, s2), (s2, s3), ...""" | |
| 13 a, b = itertools.tee(iterable) | |
| 14 a = itertools.chain((None,), a) | |
| 15 return itertools.izip(a, b) | |
| 16 | |
| 17 | |
| 18 class OffsetToken(object): | |
| 19 """A Python token with a relative position. | |
| 20 | |
| 21 A token is represented by a type defined in Python's token module, a string | |
| 22 representing the content, and an offset. Using relative positions makes it | |
| 23 easy to insert and remove tokens. | |
| 24 """ | |
| 25 def __init__(self, token_type, string, offset): | |
| 26 self._type = token_type | |
| 27 self._string = string | |
| 28 self._offset = offset | |
| 29 | |
| 30 @property | |
| 31 def type(self): | |
| 32 return self._type | |
| 33 | |
| 34 @property | |
| 35 def type_name(self): | |
| 36 return token.tok_name[self._type] | |
| 37 | |
| 38 @property | |
| 39 def string(self): | |
| 40 return self._string | |
| 41 | |
| 42 @string.setter | |
| 43 def string(self, value): | |
| 44 self._string = value | |
| 45 | |
| 46 @property | |
| 47 def offset(self): | |
| 48 return self._offset | |
| 49 | |
| 50 def __str__(self): | |
| 51 return str((self.type_name, self.string, self.offset)) | |
| 52 | |
| 53 | |
| 54 def Tokenize(f): | |
| 55 """Read tokens from a file-like object. | |
| 56 | |
| 57 Args: | |
| 58 f: Any object that has a readline method. | |
| 59 | |
| 60 Returns: | |
| 61 A collections.deque containing OffsetTokens. Deques are cheaper and easier | |
| 62 to manipulate sequentially than lists. | |
| 63 """ | |
| 64 f.seek(0) | |
| 65 tokenize_tokens = tokenize.generate_tokens(f.readline) | |
| 66 | |
| 67 offset_tokens = collections.deque() | |
| 68 for prev_token, next_token in _Pairwise(tokenize_tokens): | |
| 69 token_type, string, (srow, scol), _, _ = next_token | |
| 70 if not prev_token: | |
| 71 offset_tokens.append(OffsetToken(token_type, string, (0, 0))) | |
| 72 else: | |
| 73 erow, ecol = prev_token[3] | |
| 74 if erow == srow: | |
| 75 offset_tokens.append(OffsetToken(token_type, string, (0, scol-ecol))) | |
| 76 else: | |
| 77 offset_tokens.append(OffsetToken(token_type, string, (srow-erow, scol))) | |
| 78 | |
| 79 return offset_tokens | |
| 80 | |
| 81 | |
| 82 def Untokenize(offset_tokens): | |
| 83 """Return the string representation of an iterable of OffsetTokens.""" | |
| 84 # Make a copy. Don't modify the original. | |
| 85 offset_tokens = collections.deque(offset_tokens) | |
| 86 | |
| 87 # Strip leading NL tokens. | |
| 88 while offset_tokens[0].type == tokenize.NL: | |
| 89 offset_tokens.popleft() | |
| 90 | |
| 91 # Strip leading vertical whitespace. | |
| 92 first_token = offset_tokens.popleft() | |
| 93 # Take care not to modify the existing token. Create a new one in its place. | |
| 94 first_token = OffsetToken(first_token.type, first_token.string, | |
| 95 (0, first_token.offset[1])) | |
| 96 offset_tokens.appendleft(first_token) | |
| 97 | |
| 98 # Convert OffsetTokens to tokenize tokens. | |
| 99 tokenize_tokens = [] | |
| 100 row = 1 | |
| 101 col = 0 | |
| 102 for t in offset_tokens: | |
| 103 offset_row, offset_col = t.offset | |
| 104 if offset_row == 0: | |
| 105 col += offset_col | |
| 106 else: | |
| 107 row += offset_row | |
| 108 col = offset_col | |
| 109 tokenize_tokens.append((t.type, t.string, (row, col), (row, col), None)) | |
| 110 | |
| 111 # tokenize can't handle whitespace before line continuations. | |
| 112 # So add a space. | |
| 113 return tokenize.untokenize(tokenize_tokens).replace('\\\n', ' \\\n') | |
| OLD | NEW |