OLD | NEW |
(Empty) | |
| 1 # Copyright (c) 2003-2013 LOGILAB S.A. (Paris, FRANCE). |
| 2 # |
| 3 # This program is free software; you can redistribute it and/or modify it under |
| 4 # the terms of the GNU General Public License as published by the Free Software |
| 5 # Foundation; either version 2 of the License, or (at your option) any later |
| 6 # version. |
| 7 # |
| 8 # This program is distributed in the hope that it will be useful, but WITHOUT |
| 9 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 10 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 11 # |
| 12 # You should have received a copy of the GNU General Public License along with |
| 13 # this program; if not, write to the Free Software Foundation, Inc., |
| 14 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
| 15 """Python code format's checker. |
| 16 |
| 17 By default try to follow Guido's style guide : |
| 18 |
| 19 http://www.python.org/doc/essays/styleguide.html |
| 20 |
| 21 Some parts of the process_token method is based from The Tab Nanny std module. |
| 22 """ |
| 23 |
| 24 import keyword |
| 25 import sys |
| 26 import tokenize |
| 27 from functools import reduce # pylint: disable=redefined-builtin |
| 28 |
| 29 import six |
| 30 from six.moves import zip, map, filter # pylint: disable=redefined-builtin |
| 31 |
| 32 from astroid import nodes |
| 33 |
| 34 from pylint.interfaces import ITokenChecker, IAstroidChecker, IRawChecker |
| 35 from pylint.checkers import BaseTokenChecker |
| 36 from pylint.checkers.utils import check_messages |
| 37 from pylint.utils import WarningScope, OPTION_RGX |
| 38 |
| 39 _CONTINUATION_BLOCK_OPENERS = ['elif', 'except', 'for', 'if', 'while', 'def', 'c
lass'] |
| 40 _KEYWORD_TOKENS = ['assert', 'del', 'elif', 'except', 'for', 'if', 'in', 'not', |
| 41 'raise', 'return', 'while', 'yield'] |
| 42 if sys.version_info < (3, 0): |
| 43 _KEYWORD_TOKENS.append('print') |
| 44 |
| 45 _SPACED_OPERATORS = ['==', '<', '>', '!=', '<>', '<=', '>=', |
| 46 '+=', '-=', '*=', '**=', '/=', '//=', '&=', '|=', '^=', |
| 47 '%=', '>>=', '<<='] |
| 48 _OPENING_BRACKETS = ['(', '[', '{'] |
| 49 _CLOSING_BRACKETS = [')', ']', '}'] |
| 50 _TAB_LENGTH = 8 |
| 51 |
| 52 _EOL = frozenset([tokenize.NEWLINE, tokenize.NL, tokenize.COMMENT]) |
| 53 _JUNK_TOKENS = (tokenize.COMMENT, tokenize.NL) |
| 54 |
| 55 # Whitespace checking policy constants |
| 56 _MUST = 0 |
| 57 _MUST_NOT = 1 |
| 58 _IGNORE = 2 |
| 59 |
| 60 # Whitespace checking config constants |
| 61 _DICT_SEPARATOR = 'dict-separator' |
| 62 _TRAILING_COMMA = 'trailing-comma' |
| 63 _NO_SPACE_CHECK_CHOICES = [_TRAILING_COMMA, _DICT_SEPARATOR] |
| 64 |
| 65 MSGS = { |
| 66 'C0301': ('Line too long (%s/%s)', |
| 67 'line-too-long', |
| 68 'Used when a line is longer than a given number of characters.'), |
| 69 'C0302': ('Too many lines in module (%s/%s)', # was W0302 |
| 70 'too-many-lines', |
| 71 'Used when a module has too much lines, reducing its readability.' |
| 72 ), |
| 73 'C0303': ('Trailing whitespace', |
| 74 'trailing-whitespace', |
| 75 'Used when there is whitespace between the end of a line and the ' |
| 76 'newline.'), |
| 77 'C0304': ('Final newline missing', |
| 78 'missing-final-newline', |
| 79 'Used when the last line in a file is missing a newline.'), |
| 80 'W0311': ('Bad indentation. Found %s %s, expected %s', |
| 81 'bad-indentation', |
| 82 'Used when an unexpected number of indentation\'s tabulations or ' |
| 83 'spaces has been found.'), |
| 84 'C0330': ('Wrong %s indentation%s.\n%s%s', |
| 85 'bad-continuation', |
| 86 'TODO'), |
| 87 'W0312': ('Found indentation with %ss instead of %ss', |
| 88 'mixed-indentation', |
| 89 'Used when there are some mixed tabs and spaces in a module.'), |
| 90 'W0301': ('Unnecessary semicolon', # was W0106 |
| 91 'unnecessary-semicolon', |
| 92 'Used when a statement is ended by a semi-colon (";"), which \ |
| 93 isn\'t necessary (that\'s python, not C ;).'), |
| 94 'C0321': ('More than one statement on a single line', |
| 95 'multiple-statements', |
| 96 'Used when more than on statement are found on the same line.', |
| 97 {'scope': WarningScope.NODE}), |
| 98 'C0325' : ('Unnecessary parens after %r keyword', |
| 99 'superfluous-parens', |
| 100 'Used when a single item in parentheses follows an if, for, or ' |
| 101 'other keyword.'), |
| 102 'C0326': ('%s space %s %s %s\n%s', |
| 103 'bad-whitespace', |
| 104 ('Used when a wrong number of spaces is used around an operator, ' |
| 105 'bracket or block opener.'), |
| 106 {'old_names': [('C0323', 'no-space-after-operator'), |
| 107 ('C0324', 'no-space-after-comma'), |
| 108 ('C0322', 'no-space-before-operator')]}), |
| 109 'W0332': ('Use of "l" as long integer identifier', |
| 110 'lowercase-l-suffix', |
| 111 'Used when a lower case "l" is used to mark a long integer. You ' |
| 112 'should use a upper case "L" since the letter "l" looks too much ' |
| 113 'like the digit "1"', |
| 114 {'maxversion': (3, 0)}), |
| 115 'C0327': ('Mixed line endings LF and CRLF', |
| 116 'mixed-line-endings', |
| 117 'Used when there are mixed (LF and CRLF) newline signs in a file.'
), |
| 118 'C0328': ('Unexpected line ending format. There is \'%s\' while it should be
\'%s\'.', |
| 119 'unexpected-line-ending-format', |
| 120 'Used when there is different newline than expected.'), |
| 121 } |
| 122 |
| 123 |
| 124 def _underline_token(token): |
| 125 length = token[3][1] - token[2][1] |
| 126 offset = token[2][1] |
| 127 return token[4] + (' ' * offset) + ('^' * length) |
| 128 |
| 129 |
| 130 def _column_distance(token1, token2): |
| 131 if token1 == token2: |
| 132 return 0 |
| 133 if token2[3] < token1[3]: |
| 134 token1, token2 = token2, token1 |
| 135 if token1[3][0] != token2[2][0]: |
| 136 return None |
| 137 return token2[2][1] - token1[3][1] |
| 138 |
| 139 |
| 140 def _last_token_on_line_is(tokens, line_end, token): |
| 141 return (line_end > 0 and tokens.token(line_end-1) == token or |
| 142 line_end > 1 and tokens.token(line_end-2) == token |
| 143 and tokens.type(line_end-1) == tokenize.COMMENT) |
| 144 |
| 145 |
| 146 def _token_followed_by_eol(tokens, position): |
| 147 return (tokens.type(position+1) == tokenize.NL or |
| 148 tokens.type(position+1) == tokenize.COMMENT and |
| 149 tokens.type(position+2) == tokenize.NL) |
| 150 |
| 151 |
| 152 def _get_indent_length(line): |
| 153 """Return the length of the indentation on the given token's line.""" |
| 154 result = 0 |
| 155 for char in line: |
| 156 if char == ' ': |
| 157 result += 1 |
| 158 elif char == '\t': |
| 159 result += _TAB_LENGTH |
| 160 else: |
| 161 break |
| 162 return result |
| 163 |
| 164 |
| 165 def _get_indent_hint_line(bar_positions, bad_position): |
| 166 """Return a line with |s for each of the positions in the given lists.""" |
| 167 if not bar_positions: |
| 168 return '' |
| 169 markers = [(pos, '|') for pos in bar_positions] |
| 170 markers.append((bad_position, '^')) |
| 171 markers.sort() |
| 172 line = [' '] * (markers[-1][0] + 1) |
| 173 for position, marker in markers: |
| 174 line[position] = marker |
| 175 return ''.join(line) |
| 176 |
| 177 |
| 178 class _ContinuedIndent(object): |
| 179 __slots__ = ('valid_outdent_offsets', |
| 180 'valid_continuation_offsets', |
| 181 'context_type', |
| 182 'token', |
| 183 'position') |
| 184 |
| 185 def __init__(self, |
| 186 context_type, |
| 187 token, |
| 188 position, |
| 189 valid_outdent_offsets, |
| 190 valid_continuation_offsets): |
| 191 self.valid_outdent_offsets = valid_outdent_offsets |
| 192 self.valid_continuation_offsets = valid_continuation_offsets |
| 193 self.context_type = context_type |
| 194 self.position = position |
| 195 self.token = token |
| 196 |
| 197 |
| 198 # The contexts for hanging indents. |
| 199 # A hanging indented dictionary value after : |
| 200 HANGING_DICT_VALUE = 'dict-value' |
| 201 # Hanging indentation in an expression. |
| 202 HANGING = 'hanging' |
| 203 # Hanging indentation in a block header. |
| 204 HANGING_BLOCK = 'hanging-block' |
| 205 # Continued indentation inside an expression. |
| 206 CONTINUED = 'continued' |
| 207 # Continued indentation in a block header. |
| 208 CONTINUED_BLOCK = 'continued-block' |
| 209 |
| 210 SINGLE_LINE = 'single' |
| 211 WITH_BODY = 'multi' |
| 212 |
| 213 _CONTINUATION_MSG_PARTS = { |
| 214 HANGING_DICT_VALUE: ('hanging', ' in dict value'), |
| 215 HANGING: ('hanging', ''), |
| 216 HANGING_BLOCK: ('hanging', ' before block'), |
| 217 CONTINUED: ('continued', ''), |
| 218 CONTINUED_BLOCK: ('continued', ' before block'), |
| 219 } |
| 220 |
| 221 |
| 222 def _Offsets(*args): |
| 223 """Valid indentation offsets for a continued line.""" |
| 224 return dict((a, None) for a in args) |
| 225 |
| 226 |
| 227 def _BeforeBlockOffsets(single, with_body): |
| 228 """Valid alternative indent offsets for continued lines before blocks. |
| 229 |
| 230 :param single: Valid offset for statements on a single logical line. |
| 231 :param with_body: Valid offset for statements on several lines. |
| 232 """ |
| 233 return {single: SINGLE_LINE, with_body: WITH_BODY} |
| 234 |
| 235 |
| 236 class TokenWrapper(object): |
| 237 """A wrapper for readable access to token information.""" |
| 238 |
| 239 def __init__(self, tokens): |
| 240 self._tokens = tokens |
| 241 |
| 242 def token(self, idx): |
| 243 return self._tokens[idx][1] |
| 244 |
| 245 def type(self, idx): |
| 246 return self._tokens[idx][0] |
| 247 |
| 248 def start_line(self, idx): |
| 249 return self._tokens[idx][2][0] |
| 250 |
| 251 def start_col(self, idx): |
| 252 return self._tokens[idx][2][1] |
| 253 |
| 254 def line(self, idx): |
| 255 return self._tokens[idx][4] |
| 256 |
| 257 |
| 258 class ContinuedLineState(object): |
| 259 """Tracker for continued indentation inside a logical line.""" |
| 260 |
| 261 def __init__(self, tokens, config): |
| 262 self._line_start = -1 |
| 263 self._cont_stack = [] |
| 264 self._is_block_opener = False |
| 265 self.retained_warnings = [] |
| 266 self._config = config |
| 267 self._tokens = TokenWrapper(tokens) |
| 268 |
| 269 @property |
| 270 def has_content(self): |
| 271 return bool(self._cont_stack) |
| 272 |
| 273 @property |
| 274 def _block_indent_size(self): |
| 275 return len(self._config.indent_string.replace('\t', ' ' * _TAB_LENGTH)) |
| 276 |
| 277 @property |
| 278 def _continuation_size(self): |
| 279 return self._config.indent_after_paren |
| 280 |
| 281 def handle_line_start(self, pos): |
| 282 """Record the first non-junk token at the start of a line.""" |
| 283 if self._line_start > -1: |
| 284 return |
| 285 self._is_block_opener = self._tokens.token(pos) in _CONTINUATION_BLOCK_O
PENERS |
| 286 self._line_start = pos |
| 287 |
| 288 def next_physical_line(self): |
| 289 """Prepares the tracker for a new physical line (NL).""" |
| 290 self._line_start = -1 |
| 291 self._is_block_opener = False |
| 292 |
| 293 def next_logical_line(self): |
| 294 """Prepares the tracker for a new logical line (NEWLINE). |
| 295 |
| 296 A new logical line only starts with block indentation. |
| 297 """ |
| 298 self.next_physical_line() |
| 299 self.retained_warnings = [] |
| 300 self._cont_stack = [] |
| 301 |
| 302 def add_block_warning(self, token_position, state, valid_offsets): |
| 303 self.retained_warnings.append((token_position, state, valid_offsets)) |
| 304 |
| 305 def get_valid_offsets(self, idx): |
| 306 """"Returns the valid offsets for the token at the given position.""" |
| 307 # The closing brace on a dict or the 'for' in a dict comprehension may |
| 308 # reset two indent levels because the dict value is ended implicitly |
| 309 stack_top = -1 |
| 310 if self._tokens.token(idx) in ('}', 'for') and self._cont_stack[-1].toke
n == ':': |
| 311 stack_top = -2 |
| 312 indent = self._cont_stack[stack_top] |
| 313 if self._tokens.token(idx) in _CLOSING_BRACKETS: |
| 314 valid_offsets = indent.valid_outdent_offsets |
| 315 else: |
| 316 valid_offsets = indent.valid_continuation_offsets |
| 317 return indent, valid_offsets.copy() |
| 318 |
| 319 def _hanging_indent_after_bracket(self, bracket, position): |
| 320 """Extracts indentation information for a hanging indent.""" |
| 321 indentation = _get_indent_length(self._tokens.line(position)) |
| 322 if self._is_block_opener and self._continuation_size == self._block_inde
nt_size: |
| 323 return _ContinuedIndent( |
| 324 HANGING_BLOCK, |
| 325 bracket, |
| 326 position, |
| 327 _Offsets(indentation + self._continuation_size, indentation), |
| 328 _BeforeBlockOffsets(indentation + self._continuation_size, |
| 329 indentation + self._continuation_size * 2)) |
| 330 elif bracket == ':': |
| 331 # If the dict key was on the same line as the open brace, the new |
| 332 # correct indent should be relative to the key instead of the |
| 333 # current indent level |
| 334 paren_align = self._cont_stack[-1].valid_outdent_offsets |
| 335 next_align = self._cont_stack[-1].valid_continuation_offsets.copy() |
| 336 next_align_keys = list(next_align.keys()) |
| 337 next_align[next_align_keys[0] + self._continuation_size] = True |
| 338 # Note that the continuation of |
| 339 # d = { |
| 340 # 'a': 'b' |
| 341 # 'c' |
| 342 # } |
| 343 # is handled by the special-casing for hanging continued string inde
nts. |
| 344 return _ContinuedIndent(HANGING_DICT_VALUE, bracket, position, paren
_align, next_align) |
| 345 else: |
| 346 return _ContinuedIndent( |
| 347 HANGING, |
| 348 bracket, |
| 349 position, |
| 350 _Offsets(indentation, indentation + self._continuation_size), |
| 351 _Offsets(indentation + self._continuation_size)) |
| 352 |
| 353 def _continuation_inside_bracket(self, bracket, pos): |
| 354 """Extracts indentation information for a continued indent.""" |
| 355 indentation = _get_indent_length(self._tokens.line(pos)) |
| 356 if self._is_block_opener and self._tokens.start_col(pos+1) - indentation
== self._block_indent_size: |
| 357 return _ContinuedIndent( |
| 358 CONTINUED_BLOCK, |
| 359 bracket, |
| 360 pos, |
| 361 _Offsets(self._tokens.start_col(pos)), |
| 362 _BeforeBlockOffsets(self._tokens.start_col(pos+1), |
| 363 self._tokens.start_col(pos+1) + self._contin
uation_size)) |
| 364 else: |
| 365 return _ContinuedIndent( |
| 366 CONTINUED, |
| 367 bracket, |
| 368 pos, |
| 369 _Offsets(self._tokens.start_col(pos)), |
| 370 _Offsets(self._tokens.start_col(pos+1))) |
| 371 |
| 372 def pop_token(self): |
| 373 self._cont_stack.pop() |
| 374 |
| 375 def push_token(self, token, position): |
| 376 """Pushes a new token for continued indentation on the stack. |
| 377 |
| 378 Tokens that can modify continued indentation offsets are: |
| 379 * opening brackets |
| 380 * 'lambda' |
| 381 * : inside dictionaries |
| 382 |
| 383 push_token relies on the caller to filter out those |
| 384 interesting tokens. |
| 385 |
| 386 :param token: The concrete token |
| 387 :param position: The position of the token in the stream. |
| 388 """ |
| 389 if _token_followed_by_eol(self._tokens, position): |
| 390 self._cont_stack.append( |
| 391 self._hanging_indent_after_bracket(token, position)) |
| 392 else: |
| 393 self._cont_stack.append( |
| 394 self._continuation_inside_bracket(token, position)) |
| 395 |
| 396 |
| 397 class FormatChecker(BaseTokenChecker): |
| 398 """checks for : |
| 399 * unauthorized constructions |
| 400 * strict indentation |
| 401 * line length |
| 402 """ |
| 403 |
| 404 __implements__ = (ITokenChecker, IAstroidChecker, IRawChecker) |
| 405 |
| 406 # configuration section name |
| 407 name = 'format' |
| 408 # messages |
| 409 msgs = MSGS |
| 410 # configuration options |
| 411 # for available dict keys/values see the optik parser 'add_option' method |
| 412 options = (('max-line-length', |
| 413 {'default' : 100, 'type' : "int", 'metavar' : '<int>', |
| 414 'help' : 'Maximum number of characters on a single line.'}), |
| 415 ('ignore-long-lines', |
| 416 {'type': 'regexp', 'metavar': '<regexp>', |
| 417 'default': r'^\s*(# )?<?https?://\S+>?$', |
| 418 'help': ('Regexp for a line that is allowed to be longer than ' |
| 419 'the limit.')}), |
| 420 ('single-line-if-stmt', |
| 421 {'default': False, 'type' : 'yn', 'metavar' : '<y_or_n>', |
| 422 'help' : ('Allow the body of an if to be on the same ' |
| 423 'line as the test if there is no else.')}), |
| 424 ('no-space-check', |
| 425 {'default': ','.join(_NO_SPACE_CHECK_CHOICES), |
| 426 'type': 'multiple_choice', |
| 427 'choices': _NO_SPACE_CHECK_CHOICES, |
| 428 'help': ('List of optional constructs for which whitespace ' |
| 429 'checking is disabled')}), |
| 430 ('max-module-lines', |
| 431 {'default' : 1000, 'type' : 'int', 'metavar' : '<int>', |
| 432 'help': 'Maximum number of lines in a module'} |
| 433 ), |
| 434 ('indent-string', |
| 435 {'default' : ' ', 'type' : "string", 'metavar' : '<string>', |
| 436 'help' : 'String used as indentation unit. This is usually ' |
| 437 '" " (4 spaces) or "\\t" (1 tab).'}), |
| 438 ('indent-after-paren', |
| 439 {'type': 'int', 'metavar': '<int>', 'default': 4, |
| 440 'help': 'Number of spaces of indent required inside a hanging ' |
| 441 ' or continued line.'}), |
| 442 ('expected-line-ending-format', |
| 443 {'type': 'choice', 'metavar': '<empty or LF or CRLF>', 'default'
: '', |
| 444 'choices': ['', 'LF', 'CRLF'], |
| 445 'help': 'Expected format of line ending, e.g. empty (any line e
nding), LF or CRLF.'}), |
| 446 ) |
| 447 |
| 448 def __init__(self, linter=None): |
| 449 BaseTokenChecker.__init__(self, linter) |
| 450 self._lines = None |
| 451 self._visited_lines = None |
| 452 self._bracket_stack = [None] |
| 453 |
| 454 def _pop_token(self): |
| 455 self._bracket_stack.pop() |
| 456 self._current_line.pop_token() |
| 457 |
| 458 def _push_token(self, token, idx): |
| 459 self._bracket_stack.append(token) |
| 460 self._current_line.push_token(token, idx) |
| 461 |
| 462 def new_line(self, tokens, line_end, line_start): |
| 463 """a new line has been encountered, process it if necessary""" |
| 464 if _last_token_on_line_is(tokens, line_end, ';'): |
| 465 self.add_message('unnecessary-semicolon', line=tokens.start_line(lin
e_end)) |
| 466 |
| 467 line_num = tokens.start_line(line_start) |
| 468 line = tokens.line(line_start) |
| 469 if tokens.type(line_start) not in _JUNK_TOKENS: |
| 470 self._lines[line_num] = line.split('\n')[0] |
| 471 self.check_lines(line, line_num) |
| 472 |
| 473 def process_module(self, module): |
| 474 self._keywords_with_parens = set() |
| 475 if 'print_function' in module.future_imports: |
| 476 self._keywords_with_parens.add('print') |
| 477 |
| 478 def _check_keyword_parentheses(self, tokens, start): |
| 479 """Check that there are not unnecessary parens after a keyword. |
| 480 |
| 481 Parens are unnecessary if there is exactly one balanced outer pair on a |
| 482 line, and it is followed by a colon, and contains no commas (i.e. is not
a |
| 483 tuple). |
| 484 |
| 485 Args: |
| 486 tokens: list of Tokens; the entire list of Tokens. |
| 487 start: int; the position of the keyword in the token list. |
| 488 """ |
| 489 # If the next token is not a paren, we're fine. |
| 490 if self._inside_brackets(':') and tokens[start][1] == 'for': |
| 491 self._pop_token() |
| 492 if tokens[start+1][1] != '(': |
| 493 return |
| 494 |
| 495 found_and_or = False |
| 496 depth = 0 |
| 497 keyword_token = tokens[start][1] |
| 498 line_num = tokens[start][2][0] |
| 499 |
| 500 for i in range(start, len(tokens) - 1): |
| 501 token = tokens[i] |
| 502 |
| 503 # If we hit a newline, then assume any parens were for continuation. |
| 504 if token[0] == tokenize.NL: |
| 505 return |
| 506 |
| 507 if token[1] == '(': |
| 508 depth += 1 |
| 509 elif token[1] == ')': |
| 510 depth -= 1 |
| 511 if not depth: |
| 512 # ')' can't happen after if (foo), since it would be a synta
x error. |
| 513 if (tokens[i+1][1] in (':', ')', ']', '}', 'in') or |
| 514 tokens[i+1][0] in (tokenize.NEWLINE, |
| 515 tokenize.ENDMARKER, |
| 516 tokenize.COMMENT)): |
| 517 # The empty tuple () is always accepted. |
| 518 if i == start + 2: |
| 519 return |
| 520 if keyword_token == 'not': |
| 521 if not found_and_or: |
| 522 self.add_message('superfluous-parens', line=line
_num, |
| 523 args=keyword_token) |
| 524 elif keyword_token in ('return', 'yield'): |
| 525 self.add_message('superfluous-parens', line=line_num
, |
| 526 args=keyword_token) |
| 527 elif keyword_token not in self._keywords_with_parens: |
| 528 if not (tokens[i+1][1] == 'in' and found_and_or): |
| 529 self.add_message('superfluous-parens', line=line
_num, |
| 530 args=keyword_token) |
| 531 return |
| 532 elif depth == 1: |
| 533 # This is a tuple, which is always acceptable. |
| 534 if token[1] == ',': |
| 535 return |
| 536 # 'and' and 'or' are the only boolean operators with lower prece
dence |
| 537 # than 'not', so parens are only required when they are found. |
| 538 elif token[1] in ('and', 'or'): |
| 539 found_and_or = True |
| 540 # A yield inside an expression must always be in parentheses, |
| 541 # quit early without error. |
| 542 elif token[1] == 'yield': |
| 543 return |
| 544 # A generator expression always has a 'for' token in it, and |
| 545 # the 'for' token is only legal inside parens when it is in a |
| 546 # generator expression. The parens are necessary here, so bail |
| 547 # without an error. |
| 548 elif token[1] == 'for': |
| 549 return |
| 550 |
| 551 def _opening_bracket(self, tokens, i): |
| 552 self._push_token(tokens[i][1], i) |
| 553 # Special case: ignore slices |
| 554 if tokens[i][1] == '[' and tokens[i+1][1] == ':': |
| 555 return |
| 556 |
| 557 if (i > 0 and (tokens[i-1][0] == tokenize.NAME and |
| 558 not (keyword.iskeyword(tokens[i-1][1])) |
| 559 or tokens[i-1][1] in _CLOSING_BRACKETS)): |
| 560 self._check_space(tokens, i, (_MUST_NOT, _MUST_NOT)) |
| 561 else: |
| 562 self._check_space(tokens, i, (_IGNORE, _MUST_NOT)) |
| 563 |
| 564 def _closing_bracket(self, tokens, i): |
| 565 if self._inside_brackets(':'): |
| 566 self._pop_token() |
| 567 self._pop_token() |
| 568 # Special case: ignore slices |
| 569 if tokens[i-1][1] == ':' and tokens[i][1] == ']': |
| 570 return |
| 571 policy_before = _MUST_NOT |
| 572 if tokens[i][1] in _CLOSING_BRACKETS and tokens[i-1][1] == ',': |
| 573 if _TRAILING_COMMA in self.config.no_space_check: |
| 574 policy_before = _IGNORE |
| 575 |
| 576 self._check_space(tokens, i, (policy_before, _IGNORE)) |
| 577 |
| 578 def _check_equals_spacing(self, tokens, i): |
| 579 """Check the spacing of a single equals sign.""" |
| 580 if self._inside_brackets('(') or self._inside_brackets('lambda'): |
| 581 self._check_space(tokens, i, (_MUST_NOT, _MUST_NOT)) |
| 582 else: |
| 583 self._check_space(tokens, i, (_MUST, _MUST)) |
| 584 |
| 585 def _open_lambda(self, tokens, i): # pylint:disable=unused-argument |
| 586 self._push_token('lambda', i) |
| 587 |
| 588 def _handle_colon(self, tokens, i): |
| 589 # Special case: ignore slices |
| 590 if self._inside_brackets('['): |
| 591 return |
| 592 if (self._inside_brackets('{') and |
| 593 _DICT_SEPARATOR in self.config.no_space_check): |
| 594 policy = (_IGNORE, _IGNORE) |
| 595 else: |
| 596 policy = (_MUST_NOT, _MUST) |
| 597 self._check_space(tokens, i, policy) |
| 598 |
| 599 if self._inside_brackets('lambda'): |
| 600 self._pop_token() |
| 601 elif self._inside_brackets('{'): |
| 602 self._push_token(':', i) |
| 603 |
| 604 def _handle_comma(self, tokens, i): |
| 605 # Only require a following whitespace if this is |
| 606 # not a hanging comma before a closing bracket. |
| 607 if tokens[i+1][1] in _CLOSING_BRACKETS: |
| 608 self._check_space(tokens, i, (_MUST_NOT, _IGNORE)) |
| 609 else: |
| 610 self._check_space(tokens, i, (_MUST_NOT, _MUST)) |
| 611 if self._inside_brackets(':'): |
| 612 self._pop_token() |
| 613 |
| 614 def _check_surrounded_by_space(self, tokens, i): |
| 615 """Check that a binary operator is surrounded by exactly one space.""" |
| 616 self._check_space(tokens, i, (_MUST, _MUST)) |
| 617 |
| 618 def _check_space(self, tokens, i, policies): |
| 619 def _policy_string(policy): |
| 620 if policy == _MUST: |
| 621 return 'Exactly one', 'required' |
| 622 else: |
| 623 return 'No', 'allowed' |
| 624 |
| 625 def _name_construct(token): |
| 626 if token[1] == ',': |
| 627 return 'comma' |
| 628 elif token[1] == ':': |
| 629 return ':' |
| 630 elif token[1] in '()[]{}': |
| 631 return 'bracket' |
| 632 elif token[1] in ('<', '>', '<=', '>=', '!=', '=='): |
| 633 return 'comparison' |
| 634 else: |
| 635 if self._inside_brackets('('): |
| 636 return 'keyword argument assignment' |
| 637 else: |
| 638 return 'assignment' |
| 639 |
| 640 good_space = [True, True] |
| 641 token = tokens[i] |
| 642 pairs = [(tokens[i-1], token), (token, tokens[i+1])] |
| 643 |
| 644 for other_idx, (policy, token_pair) in enumerate(zip(policies, pairs)): |
| 645 if token_pair[other_idx][0] in _EOL or policy == _IGNORE: |
| 646 continue |
| 647 |
| 648 distance = _column_distance(*token_pair) |
| 649 if distance is None: |
| 650 continue |
| 651 good_space[other_idx] = ( |
| 652 (policy == _MUST and distance == 1) or |
| 653 (policy == _MUST_NOT and distance == 0)) |
| 654 |
| 655 warnings = [] |
| 656 if not any(good_space) and policies[0] == policies[1]: |
| 657 warnings.append((policies[0], 'around')) |
| 658 else: |
| 659 for ok, policy, position in zip(good_space, policies, ('before', 'af
ter')): |
| 660 if not ok: |
| 661 warnings.append((policy, position)) |
| 662 for policy, position in warnings: |
| 663 construct = _name_construct(token) |
| 664 count, state = _policy_string(policy) |
| 665 self.add_message('bad-whitespace', line=token[2][0], |
| 666 args=(count, state, position, construct, |
| 667 _underline_token(token))) |
| 668 |
| 669 def _inside_brackets(self, left): |
| 670 return self._bracket_stack[-1] == left |
| 671 |
| 672 def _prepare_token_dispatcher(self): |
| 673 raw = [ |
| 674 (_KEYWORD_TOKENS, |
| 675 self._check_keyword_parentheses), |
| 676 |
| 677 (_OPENING_BRACKETS, self._opening_bracket), |
| 678 |
| 679 (_CLOSING_BRACKETS, self._closing_bracket), |
| 680 |
| 681 (['='], self._check_equals_spacing), |
| 682 |
| 683 (_SPACED_OPERATORS, self._check_surrounded_by_space), |
| 684 |
| 685 ([','], self._handle_comma), |
| 686 |
| 687 ([':'], self._handle_colon), |
| 688 |
| 689 (['lambda'], self._open_lambda), |
| 690 |
| 691 ] |
| 692 |
| 693 dispatch = {} |
| 694 for tokens, handler in raw: |
| 695 for token in tokens: |
| 696 dispatch[token] = handler |
| 697 return dispatch |
| 698 |
| 699 def process_tokens(self, tokens): |
| 700 """process tokens and search for : |
| 701 |
| 702 _ non strict indentation (i.e. not always using the <indent> parameter
as |
| 703 indent unit) |
| 704 _ too long lines (i.e. longer than <max_chars>) |
| 705 _ optionally bad construct (if given, bad_construct must be a compiled |
| 706 regular expression). |
| 707 """ |
| 708 self._bracket_stack = [None] |
| 709 indents = [0] |
| 710 check_equal = False |
| 711 line_num = 0 |
| 712 self._lines = {} |
| 713 self._visited_lines = {} |
| 714 token_handlers = self._prepare_token_dispatcher() |
| 715 self._last_line_ending = None |
| 716 |
| 717 self._current_line = ContinuedLineState(tokens, self.config) |
| 718 for idx, (tok_type, token, start, _, line) in enumerate(tokens): |
| 719 if start[0] != line_num: |
| 720 line_num = start[0] |
| 721 # A tokenizer oddity: if an indented line contains a multi-line |
| 722 # docstring, the line member of the INDENT token does not contai
n |
| 723 # the full line; therefore we check the next token on the line. |
| 724 if tok_type == tokenize.INDENT: |
| 725 self.new_line(TokenWrapper(tokens), idx-1, idx+1) |
| 726 else: |
| 727 self.new_line(TokenWrapper(tokens), idx-1, idx) |
| 728 |
| 729 if tok_type == tokenize.NEWLINE: |
| 730 # a program statement, or ENDMARKER, will eventually follow, |
| 731 # after some (possibly empty) run of tokens of the form |
| 732 # (NL | COMMENT)* (INDENT | DEDENT+)? |
| 733 # If an INDENT appears, setting check_equal is wrong, and will |
| 734 # be undone when we see the INDENT. |
| 735 check_equal = True |
| 736 self._process_retained_warnings(TokenWrapper(tokens), idx) |
| 737 self._current_line.next_logical_line() |
| 738 self._check_line_ending(token, line_num) |
| 739 elif tok_type == tokenize.INDENT: |
| 740 check_equal = False |
| 741 self.check_indent_level(token, indents[-1]+1, line_num) |
| 742 indents.append(indents[-1]+1) |
| 743 elif tok_type == tokenize.DEDENT: |
| 744 # there's nothing we need to check here! what's important is |
| 745 # that when the run of DEDENTs ends, the indentation of the |
| 746 # program statement (or ENDMARKER) that triggered the run is |
| 747 # equal to what's left at the top of the indents stack |
| 748 check_equal = True |
| 749 if len(indents) > 1: |
| 750 del indents[-1] |
| 751 elif tok_type == tokenize.NL: |
| 752 self._check_continued_indentation(TokenWrapper(tokens), idx+1) |
| 753 self._current_line.next_physical_line() |
| 754 elif tok_type != tokenize.COMMENT: |
| 755 self._current_line.handle_line_start(idx) |
| 756 # This is the first concrete token following a NEWLINE, so it |
| 757 # must be the first token of the next program statement, or an |
| 758 # ENDMARKER; the "line" argument exposes the leading whitespace |
| 759 # for this statement; in the case of ENDMARKER, line is an empty |
| 760 # string, so will properly match the empty string with which the |
| 761 # "indents" stack was seeded |
| 762 if check_equal: |
| 763 check_equal = False |
| 764 self.check_indent_level(line, indents[-1], line_num) |
| 765 |
| 766 if tok_type == tokenize.NUMBER and token.endswith('l'): |
| 767 self.add_message('lowercase-l-suffix', line=line_num) |
| 768 |
| 769 try: |
| 770 handler = token_handlers[token] |
| 771 except KeyError: |
| 772 pass |
| 773 else: |
| 774 handler(tokens, idx) |
| 775 |
| 776 line_num -= 1 # to be ok with "wc -l" |
| 777 if line_num > self.config.max_module_lines: |
| 778 # Get the line where the too-many-lines (or its message id) |
| 779 # was disabled or default to 1. |
| 780 symbol = self.linter.msgs_store.check_message_id('too-many-lines') |
| 781 names = (symbol.msgid, 'too-many-lines') |
| 782 line = next(filter(None, |
| 783 map(self.linter._pragma_lineno.get, names)), 1) |
| 784 self.add_message('too-many-lines', |
| 785 args=(line_num, self.config.max_module_lines), |
| 786 line=line) |
| 787 |
| 788 def _check_line_ending(self, line_ending, line_num): |
| 789 # check if line endings are mixed |
| 790 if self._last_line_ending is not None: |
| 791 if line_ending != self._last_line_ending: |
| 792 self.add_message('mixed-line-endings', line=line_num) |
| 793 |
| 794 self._last_line_ending = line_ending |
| 795 |
| 796 # check if line ending is as expected |
| 797 expected = self.config.expected_line_ending_format |
| 798 if expected: |
| 799 line_ending = reduce(lambda x, y: x + y if x != y else x, line_endin
g, "") # reduce multiple \n\n\n\n to one \n |
| 800 line_ending = 'LF' if line_ending == '\n' else 'CRLF' |
| 801 if line_ending != expected: |
| 802 self.add_message('unexpected-line-ending-format', args=(line_end
ing, expected), line=line_num) |
| 803 |
| 804 |
| 805 def _process_retained_warnings(self, tokens, current_pos): |
| 806 single_line_block_stmt = not _last_token_on_line_is(tokens, current_pos,
':') |
| 807 |
| 808 for indent_pos, state, offsets in self._current_line.retained_warnings: |
| 809 block_type = offsets[tokens.start_col(indent_pos)] |
| 810 hints = dict((k, v) for k, v in six.iteritems(offsets) |
| 811 if v != block_type) |
| 812 if single_line_block_stmt and block_type == WITH_BODY: |
| 813 self._add_continuation_message(state, hints, tokens, indent_pos) |
| 814 elif not single_line_block_stmt and block_type == SINGLE_LINE: |
| 815 self._add_continuation_message(state, hints, tokens, indent_pos) |
| 816 |
| 817 def _check_continued_indentation(self, tokens, next_idx): |
| 818 def same_token_around_nl(token_type): |
| 819 return (tokens.type(next_idx) == token_type and |
| 820 tokens.type(next_idx-2) == token_type) |
| 821 |
| 822 # Do not issue any warnings if the next line is empty. |
| 823 if not self._current_line.has_content or tokens.type(next_idx) == tokeni
ze.NL: |
| 824 return |
| 825 |
| 826 state, valid_offsets = self._current_line.get_valid_offsets(next_idx) |
| 827 # Special handling for hanging comments and strings. If the last line en
ded |
| 828 # with a comment (string) and the new line contains only a comment, the
line |
| 829 # may also be indented to the start of the previous token. |
| 830 if same_token_around_nl(tokenize.COMMENT) or same_token_around_nl(tokeni
ze.STRING): |
| 831 valid_offsets[tokens.start_col(next_idx-2)] = True |
| 832 |
| 833 # We can only decide if the indentation of a continued line before openi
ng |
| 834 # a new block is valid once we know of the body of the block is on the |
| 835 # same line as the block opener. Since the token processing is single-pa
ss, |
| 836 # emitting those warnings is delayed until the block opener is processed
. |
| 837 if (state.context_type in (HANGING_BLOCK, CONTINUED_BLOCK) |
| 838 and tokens.start_col(next_idx) in valid_offsets): |
| 839 self._current_line.add_block_warning(next_idx, state, valid_offsets) |
| 840 elif tokens.start_col(next_idx) not in valid_offsets: |
| 841 self._add_continuation_message(state, valid_offsets, tokens, next_id
x) |
| 842 |
| 843 def _add_continuation_message(self, state, offsets, tokens, position): |
| 844 readable_type, readable_position = _CONTINUATION_MSG_PARTS[state.context
_type] |
| 845 hint_line = _get_indent_hint_line(offsets, tokens.start_col(position)) |
| 846 self.add_message( |
| 847 'bad-continuation', |
| 848 line=tokens.start_line(position), |
| 849 args=(readable_type, readable_position, tokens.line(position), hint_
line)) |
| 850 |
| 851 @check_messages('multiple-statements') |
| 852 def visit_default(self, node): |
| 853 """check the node line number and check it if not yet done""" |
| 854 if not node.is_statement: |
| 855 return |
| 856 if not node.root().pure_python: |
| 857 return # XXX block visit of child nodes |
| 858 prev_sibl = node.previous_sibling() |
| 859 if prev_sibl is not None: |
| 860 prev_line = prev_sibl.fromlineno |
| 861 else: |
| 862 # The line on which a finally: occurs in a try/finally |
| 863 # is not directly represented in the AST. We infer it |
| 864 # by taking the last line of the body and adding 1, which |
| 865 # should be the line of finally: |
| 866 if (isinstance(node.parent, nodes.TryFinally) |
| 867 and node in node.parent.finalbody): |
| 868 prev_line = node.parent.body[0].tolineno + 1 |
| 869 else: |
| 870 prev_line = node.parent.statement().fromlineno |
| 871 line = node.fromlineno |
| 872 assert line, node |
| 873 if prev_line == line and self._visited_lines.get(line) != 2: |
| 874 self._check_multi_statement_line(node, line) |
| 875 return |
| 876 if line in self._visited_lines: |
| 877 return |
| 878 try: |
| 879 tolineno = node.blockstart_tolineno |
| 880 except AttributeError: |
| 881 tolineno = node.tolineno |
| 882 assert tolineno, node |
| 883 lines = [] |
| 884 for line in range(line, tolineno + 1): |
| 885 self._visited_lines[line] = 1 |
| 886 try: |
| 887 lines.append(self._lines[line].rstrip()) |
| 888 except KeyError: |
| 889 lines.append('') |
| 890 |
| 891 def _check_multi_statement_line(self, node, line): |
| 892 """Check for lines containing multiple statements.""" |
| 893 # Do not warn about multiple nested context managers |
| 894 # in with statements. |
| 895 if isinstance(node, nodes.With): |
| 896 return |
| 897 # For try... except... finally..., the two nodes |
| 898 # appear to be on the same line due to how the AST is built. |
| 899 if (isinstance(node, nodes.TryExcept) and |
| 900 isinstance(node.parent, nodes.TryFinally)): |
| 901 return |
| 902 if (isinstance(node.parent, nodes.If) and not node.parent.orelse |
| 903 and self.config.single_line_if_stmt): |
| 904 return |
| 905 self.add_message('multiple-statements', node=node) |
| 906 self._visited_lines[line] = 2 |
| 907 |
| 908 def check_lines(self, lines, i): |
| 909 """check lines have less than a maximum number of characters |
| 910 """ |
| 911 max_chars = self.config.max_line_length |
| 912 ignore_long_line = self.config.ignore_long_lines |
| 913 |
| 914 for line in lines.splitlines(True): |
| 915 if not line.endswith('\n'): |
| 916 self.add_message('missing-final-newline', line=i) |
| 917 else: |
| 918 stripped_line = line.rstrip() |
| 919 if line[len(stripped_line):] not in ('\n', '\r\n'): |
| 920 self.add_message('trailing-whitespace', line=i) |
| 921 # Don't count excess whitespace in the line length. |
| 922 line = stripped_line |
| 923 mobj = OPTION_RGX.search(line) |
| 924 if mobj and mobj.group(1).split('=', 1)[0].strip() == 'disable': |
| 925 line = line.split('#')[0].rstrip() |
| 926 |
| 927 if len(line) > max_chars and not ignore_long_line.search(line): |
| 928 self.add_message('line-too-long', line=i, args=(len(line), max_c
hars)) |
| 929 i += 1 |
| 930 |
| 931 def check_indent_level(self, string, expected, line_num): |
| 932 """return the indent level of the string |
| 933 """ |
| 934 indent = self.config.indent_string |
| 935 if indent == '\\t': # \t is not interpreted in the configuration file |
| 936 indent = '\t' |
| 937 level = 0 |
| 938 unit_size = len(indent) |
| 939 while string[:unit_size] == indent: |
| 940 string = string[unit_size:] |
| 941 level += 1 |
| 942 suppl = '' |
| 943 while string and string[0] in ' \t': |
| 944 if string[0] != indent[0]: |
| 945 if string[0] == '\t': |
| 946 args = ('tab', 'space') |
| 947 else: |
| 948 args = ('space', 'tab') |
| 949 self.add_message('mixed-indentation', args=args, line=line_num) |
| 950 return level |
| 951 suppl += string[0] |
| 952 string = string[1:] |
| 953 if level != expected or suppl: |
| 954 i_type = 'spaces' |
| 955 if indent[0] == '\t': |
| 956 i_type = 'tabs' |
| 957 self.add_message('bad-indentation', line=line_num, |
| 958 args=(level * unit_size + len(suppl), i_type, |
| 959 expected * unit_size)) |
| 960 |
| 961 |
| 962 def register(linter): |
| 963 """required method to auto register this checker """ |
| 964 linter.register_checker(FormatChecker(linter)) |
OLD | NEW |