OLD | NEW |
| (Empty) |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import collections | |
6 import itertools | |
7 import token | |
8 import tokenize | |
9 | |
10 | |
11 def _Pairwise(iterable): | |
12 """s -> (None, s0), (s0, s1), (s1, s2), (s2, s3), ...""" | |
13 a, b = itertools.tee(iterable) | |
14 a = itertools.chain((None,), a) | |
15 return itertools.izip(a, b) | |
16 | |
17 | |
18 class OffsetToken(object): | |
19 """A Python token with a relative position. | |
20 | |
21 A token is represented by a type defined in Python's token module, a string | |
22 representing the content, and an offset. Using relative positions makes it | |
23 easy to insert and remove tokens. | |
24 """ | |
25 def __init__(self, token_type, string, offset): | |
26 self._type = token_type | |
27 self._string = string | |
28 self._offset = offset | |
29 | |
30 @property | |
31 def type(self): | |
32 return self._type | |
33 | |
34 @property | |
35 def type_name(self): | |
36 return token.tok_name[self._type] | |
37 | |
38 @property | |
39 def string(self): | |
40 return self._string | |
41 | |
42 @string.setter | |
43 def string(self, value): | |
44 self._string = value | |
45 | |
46 @property | |
47 def offset(self): | |
48 return self._offset | |
49 | |
50 def __str__(self): | |
51 return str((self.type_name, self.string, self.offset)) | |
52 | |
53 | |
54 def Tokenize(f): | |
55 """Read tokens from a file-like object. | |
56 | |
57 Args: | |
58 f: Any object that has a readline method. | |
59 | |
60 Returns: | |
61 A collections.deque containing OffsetTokens. Deques are cheaper and easier | |
62 to manipulate sequentially than lists. | |
63 """ | |
64 f.seek(0) | |
65 tokenize_tokens = tokenize.generate_tokens(f.readline) | |
66 | |
67 offset_tokens = collections.deque() | |
68 for prev_token, next_token in _Pairwise(tokenize_tokens): | |
69 token_type, string, (srow, scol), _, _ = next_token | |
70 if not prev_token: | |
71 offset_tokens.append(OffsetToken(token_type, string, (0, 0))) | |
72 else: | |
73 erow, ecol = prev_token[3] | |
74 if erow == srow: | |
75 offset_tokens.append(OffsetToken(token_type, string, (0, scol-ecol))) | |
76 else: | |
77 offset_tokens.append(OffsetToken(token_type, string, (srow-erow, scol))) | |
78 | |
79 return offset_tokens | |
80 | |
81 | |
82 def Untokenize(offset_tokens): | |
83 """Return the string representation of an iterable of OffsetTokens.""" | |
84 # Make a copy. Don't modify the original. | |
85 offset_tokens = collections.deque(offset_tokens) | |
86 | |
87 # Strip leading NL tokens. | |
88 while offset_tokens[0].type == tokenize.NL: | |
89 offset_tokens.popleft() | |
90 | |
91 # Strip leading vertical whitespace. | |
92 first_token = offset_tokens.popleft() | |
93 # Take care not to modify the existing token. Create a new one in its place. | |
94 first_token = OffsetToken(first_token.type, first_token.string, | |
95 (0, first_token.offset[1])) | |
96 offset_tokens.appendleft(first_token) | |
97 | |
98 # Convert OffsetTokens to tokenize tokens. | |
99 tokenize_tokens = [] | |
100 row = 1 | |
101 col = 0 | |
102 for t in offset_tokens: | |
103 offset_row, offset_col = t.offset | |
104 if offset_row == 0: | |
105 col += offset_col | |
106 else: | |
107 row += offset_row | |
108 col = offset_col | |
109 tokenize_tokens.append((t.type, t.string, (row, col), (row, col), None)) | |
110 | |
111 # tokenize can't handle whitespace before line continuations. | |
112 # So add a space. | |
113 return tokenize.untokenize(tokenize_tokens).replace('\\\n', ' \\\n') | |
OLD | NEW |