OLD | NEW |
(Empty) | |
| 1 # Copyright (C) 2010-2011 Hideo Hattori |
| 2 # Copyright (C) 2011-2013 Hideo Hattori, Steven Myint |
| 3 # Copyright (C) 2013-2014 Hideo Hattori, Steven Myint, Bill Wendling |
| 4 # |
| 5 # Permission is hereby granted, free of charge, to any person obtaining |
| 6 # a copy of this software and associated documentation files (the |
| 7 # "Software"), to deal in the Software without restriction, including |
| 8 # without limitation the rights to use, copy, modify, merge, publish, |
| 9 # distribute, sublicense, and/or sell copies of the Software, and to |
| 10 # permit persons to whom the Software is furnished to do so, subject to |
| 11 # the following conditions: |
| 12 # |
| 13 # The above copyright notice and this permission notice shall be |
| 14 # included in all copies or substantial portions of the Software. |
| 15 # |
| 16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| 20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| 21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 23 # SOFTWARE. |
| 24 |
| 25 """Automatically formats Python code to conform to the PEP 8 style guide. |
| 26 |
| 27 Fixes that only need be done once can be added by adding a function of the form |
| 28 "fix_<code>(source)" to this module. They should return the fixed source code. |
| 29 These fixes are picked up by apply_global_fixes(). |
| 30 |
| 31 Fixes that depend on pep8 should be added as methods to FixPEP8. See the class |
| 32 documentation for more information. |
| 33 |
| 34 """ |
| 35 |
| 36 from __future__ import absolute_import |
| 37 from __future__ import division |
| 38 from __future__ import print_function |
| 39 from __future__ import unicode_literals |
| 40 |
| 41 import bisect |
| 42 import codecs |
| 43 import collections |
| 44 import copy |
| 45 import difflib |
| 46 import fnmatch |
| 47 import inspect |
| 48 import io |
| 49 import itertools |
| 50 import keyword |
| 51 import locale |
| 52 import os |
| 53 import re |
| 54 import signal |
| 55 import sys |
| 56 import token |
| 57 import tokenize |
| 58 |
| 59 import pep8 |
| 60 |
| 61 |
| 62 try: |
| 63 unicode |
| 64 except NameError: |
| 65 unicode = str |
| 66 |
| 67 |
| 68 __version__ = '1.0.3' |
| 69 |
| 70 |
| 71 CR = '\r' |
| 72 LF = '\n' |
| 73 CRLF = '\r\n' |
| 74 |
| 75 |
| 76 PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$') |
| 77 |
| 78 |
| 79 # For generating line shortening candidates. |
| 80 SHORTEN_OPERATOR_GROUPS = frozenset([ |
| 81 frozenset([',']), |
| 82 frozenset(['%']), |
| 83 frozenset([',', '(', '[', '{']), |
| 84 frozenset(['%', '(', '[', '{']), |
| 85 frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']), |
| 86 frozenset(['%', '+', '-', '*', '/', '//']), |
| 87 ]) |
| 88 |
| 89 |
| 90 DEFAULT_IGNORE = 'E24' |
| 91 DEFAULT_INDENT_SIZE = 4 |
| 92 |
| 93 |
| 94 # W602 is handled separately due to the need to avoid "with_traceback". |
| 95 CODE_TO_2TO3 = { |
| 96 'E721': ['idioms'], |
| 97 'W601': ['has_key'], |
| 98 'W603': ['ne'], |
| 99 'W604': ['repr'], |
| 100 'W690': ['apply', |
| 101 'except', |
| 102 'exitfunc', |
| 103 'import', |
| 104 'numliterals', |
| 105 'operator', |
| 106 'paren', |
| 107 'reduce', |
| 108 'renames', |
| 109 'standarderror', |
| 110 'sys_exc', |
| 111 'throw', |
| 112 'tuple_params', |
| 113 'xreadlines']} |
| 114 |
| 115 |
| 116 def open_with_encoding(filename, encoding=None, mode='r'): |
| 117 """Return opened file with a specific encoding.""" |
| 118 if not encoding: |
| 119 encoding = detect_encoding(filename) |
| 120 |
| 121 return io.open(filename, mode=mode, encoding=encoding, |
| 122 newline='') # Preserve line endings |
| 123 |
| 124 |
| 125 def detect_encoding(filename): |
| 126 """Return file encoding.""" |
| 127 try: |
| 128 with open(filename, 'rb') as input_file: |
| 129 from lib2to3.pgen2 import tokenize as lib2to3_tokenize |
| 130 encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0] |
| 131 |
| 132 # Check for correctness of encoding |
| 133 with open_with_encoding(filename, encoding) as test_file: |
| 134 test_file.read() |
| 135 |
| 136 return encoding |
| 137 except (LookupError, SyntaxError, UnicodeDecodeError): |
| 138 return 'latin-1' |
| 139 |
| 140 |
| 141 def readlines_from_file(filename): |
| 142 """Return contents of file.""" |
| 143 with open_with_encoding(filename) as input_file: |
| 144 return input_file.readlines() |
| 145 |
| 146 |
| 147 def extended_blank_lines(logical_line, |
| 148 blank_lines, |
| 149 indent_level, |
| 150 previous_logical): |
| 151 """Check for missing blank lines after class declaration.""" |
| 152 if previous_logical.startswith('class '): |
| 153 if ( |
| 154 logical_line.startswith(('def ', 'class ', '@')) or |
| 155 pep8.DOCSTRING_REGEX.match(logical_line) |
| 156 ): |
| 157 if indent_level and not blank_lines: |
| 158 yield (0, 'E309 expected 1 blank line after class declaration') |
| 159 elif previous_logical.startswith('def '): |
| 160 if blank_lines and pep8.DOCSTRING_REGEX.match(logical_line): |
| 161 yield (0, 'E303 too many blank lines ({0})'.format(blank_lines)) |
| 162 elif pep8.DOCSTRING_REGEX.match(previous_logical): |
| 163 # Missing blank line between class docstring and method declaration. |
| 164 if ( |
| 165 indent_level and |
| 166 not blank_lines and |
| 167 logical_line.startswith(('def ')) and |
| 168 '(self' in logical_line |
| 169 ): |
| 170 yield (0, 'E301 expected 1 blank line, found 0') |
| 171 pep8.register_check(extended_blank_lines) |
| 172 |
| 173 |
| 174 def continued_indentation(logical_line, tokens, indent_level, indent_char, |
| 175 noqa): |
| 176 """Override pep8's function to provide indentation information.""" |
| 177 first_row = tokens[0][2][0] |
| 178 nrows = 1 + tokens[-1][2][0] - first_row |
| 179 if noqa or nrows == 1: |
| 180 return |
| 181 |
| 182 # indent_next tells us whether the next block is indented. Assuming |
| 183 # that it is indented by 4 spaces, then we should not allow 4-space |
| 184 # indents on the final continuation line. In turn, some other |
| 185 # indents are allowed to have an extra 4 spaces. |
| 186 indent_next = logical_line.endswith(':') |
| 187 |
| 188 row = depth = 0 |
| 189 valid_hangs = ( |
| 190 (DEFAULT_INDENT_SIZE,) |
| 191 if indent_char != '\t' else (DEFAULT_INDENT_SIZE, |
| 192 2 * DEFAULT_INDENT_SIZE) |
| 193 ) |
| 194 |
| 195 # Remember how many brackets were opened on each line. |
| 196 parens = [0] * nrows |
| 197 |
| 198 # Relative indents of physical lines. |
| 199 rel_indent = [0] * nrows |
| 200 |
| 201 # For each depth, collect a list of opening rows. |
| 202 open_rows = [[0]] |
| 203 # For each depth, memorize the hanging indentation. |
| 204 hangs = [None] |
| 205 |
| 206 # Visual indents. |
| 207 indent_chances = {} |
| 208 last_indent = tokens[0][2] |
| 209 indent = [last_indent[1]] |
| 210 |
| 211 last_token_multiline = None |
| 212 line = None |
| 213 last_line = '' |
| 214 last_line_begins_with_multiline = False |
| 215 for token_type, text, start, end, line in tokens: |
| 216 |
| 217 newline = row < start[0] - first_row |
| 218 if newline: |
| 219 row = start[0] - first_row |
| 220 newline = (not last_token_multiline and |
| 221 token_type not in (tokenize.NL, tokenize.NEWLINE)) |
| 222 last_line_begins_with_multiline = last_token_multiline |
| 223 |
| 224 if newline: |
| 225 # This is the beginning of a continuation line. |
| 226 last_indent = start |
| 227 |
| 228 # Record the initial indent. |
| 229 rel_indent[row] = pep8.expand_indent(line) - indent_level |
| 230 |
| 231 # Identify closing bracket. |
| 232 close_bracket = (token_type == tokenize.OP and text in ']})') |
| 233 |
| 234 # Is the indent relative to an opening bracket line? |
| 235 for open_row in reversed(open_rows[depth]): |
| 236 hang = rel_indent[row] - rel_indent[open_row] |
| 237 hanging_indent = hang in valid_hangs |
| 238 if hanging_indent: |
| 239 break |
| 240 if hangs[depth]: |
| 241 hanging_indent = (hang == hangs[depth]) |
| 242 |
| 243 visual_indent = (not close_bracket and hang > 0 and |
| 244 indent_chances.get(start[1])) |
| 245 |
| 246 if close_bracket and indent[depth]: |
| 247 # Closing bracket for visual indent. |
| 248 if start[1] != indent[depth]: |
| 249 yield (start, 'E124 {0}'.format(indent[depth])) |
| 250 elif close_bracket and not hang: |
| 251 pass |
| 252 elif indent[depth] and start[1] < indent[depth]: |
| 253 # Visual indent is broken. |
| 254 yield (start, 'E128 {0}'.format(indent[depth])) |
| 255 elif (hanging_indent or |
| 256 (indent_next and |
| 257 rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)): |
| 258 # Hanging indent is verified. |
| 259 if close_bracket: |
| 260 yield (start, 'E123 {0}'.format(indent_level + |
| 261 rel_indent[open_row])) |
| 262 hangs[depth] = hang |
| 263 elif visual_indent is True: |
| 264 # Visual indent is verified. |
| 265 indent[depth] = start[1] |
| 266 elif visual_indent in (text, unicode): |
| 267 # Ignore token lined up with matching one from a previous line. |
| 268 pass |
| 269 else: |
| 270 one_indented = (indent_level + rel_indent[open_row] + |
| 271 DEFAULT_INDENT_SIZE) |
| 272 # Indent is broken. |
| 273 if hang <= 0: |
| 274 error = ('E122', one_indented) |
| 275 elif indent[depth]: |
| 276 error = ('E127', indent[depth]) |
| 277 elif hang > DEFAULT_INDENT_SIZE: |
| 278 error = ('E126', one_indented) |
| 279 else: |
| 280 hangs[depth] = hang |
| 281 error = ('E121', one_indented) |
| 282 |
| 283 yield (start, '{0} {1}'.format(*error)) |
| 284 |
| 285 # Look for visual indenting. |
| 286 if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT) |
| 287 and not indent[depth]): |
| 288 indent[depth] = start[1] |
| 289 indent_chances[start[1]] = True |
| 290 # Deal with implicit string concatenation. |
| 291 elif (token_type in (tokenize.STRING, tokenize.COMMENT) or |
| 292 text in ('u', 'ur', 'b', 'br')): |
| 293 indent_chances[start[1]] = unicode |
| 294 # Special case for the "if" statement because len("if (") is equal to |
| 295 # 4. |
| 296 elif not indent_chances and not row and not depth and text == 'if': |
| 297 indent_chances[end[1] + 1] = True |
| 298 elif text == ':' and line[end[1]:].isspace(): |
| 299 open_rows[depth].append(row) |
| 300 |
| 301 # Keep track of bracket depth. |
| 302 if token_type == tokenize.OP: |
| 303 if text in '([{': |
| 304 depth += 1 |
| 305 indent.append(0) |
| 306 hangs.append(None) |
| 307 if len(open_rows) == depth: |
| 308 open_rows.append([]) |
| 309 open_rows[depth].append(row) |
| 310 parens[row] += 1 |
| 311 elif text in ')]}' and depth > 0: |
| 312 # Parent indents should not be more than this one. |
| 313 prev_indent = indent.pop() or last_indent[1] |
| 314 hangs.pop() |
| 315 for d in range(depth): |
| 316 if indent[d] > prev_indent: |
| 317 indent[d] = 0 |
| 318 for ind in list(indent_chances): |
| 319 if ind >= prev_indent: |
| 320 del indent_chances[ind] |
| 321 del open_rows[depth + 1:] |
| 322 depth -= 1 |
| 323 if depth: |
| 324 indent_chances[indent[depth]] = True |
| 325 for idx in range(row, -1, -1): |
| 326 if parens[idx]: |
| 327 parens[idx] -= 1 |
| 328 break |
| 329 assert len(indent) == depth + 1 |
| 330 if ( |
| 331 start[1] not in indent_chances and |
| 332 # This is for purposes of speeding up E121 (GitHub #90). |
| 333 not last_line.rstrip().endswith(',') |
| 334 ): |
| 335 # Allow to line up tokens. |
| 336 indent_chances[start[1]] = text |
| 337 |
| 338 last_token_multiline = (start[0] != end[0]) |
| 339 if last_token_multiline: |
| 340 rel_indent[end[0] - first_row] = rel_indent[row] |
| 341 |
| 342 last_line = line |
| 343 |
| 344 if ( |
| 345 indent_next and |
| 346 not last_line_begins_with_multiline and |
| 347 pep8.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE |
| 348 ): |
| 349 pos = (start[0], indent[0] + 4) |
| 350 yield (pos, 'E125 {0}'.format(indent_level + |
| 351 2 * DEFAULT_INDENT_SIZE)) |
| 352 del pep8._checks['logical_line'][pep8.continued_indentation] |
| 353 pep8.register_check(continued_indentation) |
| 354 |
| 355 |
| 356 class FixPEP8(object): |
| 357 |
| 358 """Fix invalid code. |
| 359 |
| 360 Fixer methods are prefixed "fix_". The _fix_source() method looks for these |
| 361 automatically. |
| 362 |
| 363 The fixer method can take either one or two arguments (in addition to |
| 364 self). The first argument is "result", which is the error information from |
| 365 pep8. The second argument, "logical", is required only for logical-line |
| 366 fixes. |
| 367 |
| 368 The fixer method can return the list of modified lines or None. An empty |
| 369 list would mean that no changes were made. None would mean that only the |
| 370 line reported in the pep8 error was modified. Note that the modified line |
| 371 numbers that are returned are indexed at 1. This typically would correspond |
| 372 with the line number reported in the pep8 error information. |
| 373 |
| 374 [fixed method list] |
| 375 - e121,e122,e123,e124,e125,e126,e127,e128,e129 |
| 376 - e201,e202,e203 |
| 377 - e211 |
| 378 - e221,e222,e223,e224,e225 |
| 379 - e231 |
| 380 - e251 |
| 381 - e261,e262 |
| 382 - e271,e272,e273,e274 |
| 383 - e301,e302,e303 |
| 384 - e401 |
| 385 - e502 |
| 386 - e701,e702 |
| 387 - e711 |
| 388 - w291 |
| 389 |
| 390 """ |
| 391 |
| 392 def __init__(self, filename, |
| 393 options, |
| 394 contents=None, |
| 395 long_line_ignore_cache=None): |
| 396 self.filename = filename |
| 397 if contents is None: |
| 398 self.source = readlines_from_file(filename) |
| 399 else: |
| 400 sio = io.StringIO(contents) |
| 401 self.source = sio.readlines() |
| 402 self.options = options |
| 403 self.indent_word = _get_indentword(''.join(self.source)) |
| 404 |
| 405 self.long_line_ignore_cache = ( |
| 406 set() if long_line_ignore_cache is None |
| 407 else long_line_ignore_cache) |
| 408 |
| 409 # Many fixers are the same even though pep8 categorizes them |
| 410 # differently. |
| 411 self.fix_e115 = self.fix_e112 |
| 412 self.fix_e116 = self.fix_e113 |
| 413 self.fix_e121 = self._fix_reindent |
| 414 self.fix_e122 = self._fix_reindent |
| 415 self.fix_e123 = self._fix_reindent |
| 416 self.fix_e124 = self._fix_reindent |
| 417 self.fix_e126 = self._fix_reindent |
| 418 self.fix_e127 = self._fix_reindent |
| 419 self.fix_e128 = self._fix_reindent |
| 420 self.fix_e129 = self._fix_reindent |
| 421 self.fix_e202 = self.fix_e201 |
| 422 self.fix_e203 = self.fix_e201 |
| 423 self.fix_e211 = self.fix_e201 |
| 424 self.fix_e221 = self.fix_e271 |
| 425 self.fix_e222 = self.fix_e271 |
| 426 self.fix_e223 = self.fix_e271 |
| 427 self.fix_e226 = self.fix_e225 |
| 428 self.fix_e227 = self.fix_e225 |
| 429 self.fix_e228 = self.fix_e225 |
| 430 self.fix_e241 = self.fix_e271 |
| 431 self.fix_e242 = self.fix_e224 |
| 432 self.fix_e261 = self.fix_e262 |
| 433 self.fix_e272 = self.fix_e271 |
| 434 self.fix_e273 = self.fix_e271 |
| 435 self.fix_e274 = self.fix_e271 |
| 436 self.fix_e309 = self.fix_e301 |
| 437 self.fix_e501 = ( |
| 438 self.fix_long_line_logically if |
| 439 options and (options.aggressive >= 2 or options.experimental) else |
| 440 self.fix_long_line_physically) |
| 441 self.fix_e703 = self.fix_e702 |
| 442 |
| 443 self._ws_comma_done = False |
| 444 |
| 445 def _fix_source(self, results): |
| 446 try: |
| 447 (logical_start, logical_end) = _find_logical(self.source) |
| 448 logical_support = True |
| 449 except (SyntaxError, tokenize.TokenError): # pragma: no cover |
| 450 logical_support = False |
| 451 |
| 452 completed_lines = set() |
| 453 for result in sorted(results, key=_priority_key): |
| 454 if result['line'] in completed_lines: |
| 455 continue |
| 456 |
| 457 fixed_methodname = 'fix_' + result['id'].lower() |
| 458 if hasattr(self, fixed_methodname): |
| 459 fix = getattr(self, fixed_methodname) |
| 460 |
| 461 line_index = result['line'] - 1 |
| 462 original_line = self.source[line_index] |
| 463 |
| 464 is_logical_fix = len(inspect.getargspec(fix).args) > 2 |
| 465 if is_logical_fix: |
| 466 logical = None |
| 467 if logical_support: |
| 468 logical = _get_logical(self.source, |
| 469 result, |
| 470 logical_start, |
| 471 logical_end) |
| 472 if logical and set(range( |
| 473 logical[0][0] + 1, |
| 474 logical[1][0] + 1)).intersection( |
| 475 completed_lines): |
| 476 continue |
| 477 |
| 478 modified_lines = fix(result, logical) |
| 479 else: |
| 480 modified_lines = fix(result) |
| 481 |
| 482 if modified_lines is None: |
| 483 # Force logical fixes to report what they modified. |
| 484 assert not is_logical_fix |
| 485 |
| 486 if self.source[line_index] == original_line: |
| 487 modified_lines = [] |
| 488 |
| 489 if modified_lines: |
| 490 completed_lines.update(modified_lines) |
| 491 elif modified_lines == []: # Empty list means no fix |
| 492 if self.options.verbose >= 2: |
| 493 print( |
| 494 '---> Not fixing {f} on line {l}'.format( |
| 495 f=result['id'], l=result['line']), |
| 496 file=sys.stderr) |
| 497 else: # We assume one-line fix when None. |
| 498 completed_lines.add(result['line']) |
| 499 else: |
| 500 if self.options.verbose >= 3: |
| 501 print( |
| 502 "---> '{0}' is not defined.".format(fixed_methodname), |
| 503 file=sys.stderr) |
| 504 |
| 505 info = result['info'].strip() |
| 506 print('---> {0}:{1}:{2}:{3}'.format(self.filename, |
| 507 result['line'], |
| 508 result['column'], |
| 509 info), |
| 510 file=sys.stderr) |
| 511 |
| 512 def fix(self): |
| 513 """Return a version of the source code with PEP 8 violations fixed.""" |
| 514 pep8_options = { |
| 515 'ignore': self.options.ignore, |
| 516 'select': self.options.select, |
| 517 'max_line_length': self.options.max_line_length, |
| 518 } |
| 519 results = _execute_pep8(pep8_options, self.source) |
| 520 |
| 521 if self.options.verbose: |
| 522 progress = {} |
| 523 for r in results: |
| 524 if r['id'] not in progress: |
| 525 progress[r['id']] = set() |
| 526 progress[r['id']].add(r['line']) |
| 527 print('---> {n} issue(s) to fix {progress}'.format( |
| 528 n=len(results), progress=progress), file=sys.stderr) |
| 529 |
| 530 if self.options.line_range: |
| 531 start, end = self.options.line_range |
| 532 results = [r for r in results |
| 533 if start <= r['line'] <= end] |
| 534 |
| 535 self._fix_source(filter_results(source=''.join(self.source), |
| 536 results=results, |
| 537 aggressive=self.options.aggressive)) |
| 538 |
| 539 if self.options.line_range: |
| 540 # If number of lines has changed then change line_range. |
| 541 count = sum(sline.count('\n') |
| 542 for sline in self.source[start - 1:end]) |
| 543 self.options.line_range[1] = start + count - 1 |
| 544 |
| 545 return ''.join(self.source) |
| 546 |
| 547 def _fix_reindent(self, result): |
| 548 """Fix a badly indented line. |
| 549 |
| 550 This is done by adding or removing from its initial indent only. |
| 551 |
| 552 """ |
| 553 num_indent_spaces = int(result['info'].split()[1]) |
| 554 line_index = result['line'] - 1 |
| 555 target = self.source[line_index] |
| 556 |
| 557 self.source[line_index] = ' ' * num_indent_spaces + target.lstrip() |
| 558 |
| 559 def fix_e112(self, result): |
| 560 """Fix under-indented comments.""" |
| 561 line_index = result['line'] - 1 |
| 562 target = self.source[line_index] |
| 563 |
| 564 if not target.lstrip().startswith('#'): |
| 565 # Don't screw with invalid syntax. |
| 566 return [] |
| 567 |
| 568 self.source[line_index] = self.indent_word + target |
| 569 |
| 570 def fix_e113(self, result): |
| 571 """Fix over-indented comments.""" |
| 572 line_index = result['line'] - 1 |
| 573 target = self.source[line_index] |
| 574 |
| 575 indent = _get_indentation(target) |
| 576 stripped = target.lstrip() |
| 577 |
| 578 if not stripped.startswith('#'): |
| 579 # Don't screw with invalid syntax. |
| 580 return [] |
| 581 |
| 582 self.source[line_index] = indent[1:] + stripped |
| 583 |
| 584 def fix_e125(self, result): |
| 585 """Fix indentation undistinguish from the next logical line.""" |
| 586 num_indent_spaces = int(result['info'].split()[1]) |
| 587 line_index = result['line'] - 1 |
| 588 target = self.source[line_index] |
| 589 |
| 590 spaces_to_add = num_indent_spaces - len(_get_indentation(target)) |
| 591 indent = len(_get_indentation(target)) |
| 592 modified_lines = [] |
| 593 |
| 594 while len(_get_indentation(self.source[line_index])) >= indent: |
| 595 self.source[line_index] = (' ' * spaces_to_add + |
| 596 self.source[line_index]) |
| 597 modified_lines.append(1 + line_index) # Line indexed at 1. |
| 598 line_index -= 1 |
| 599 |
| 600 return modified_lines |
| 601 |
| 602 def fix_e201(self, result): |
| 603 """Remove extraneous whitespace.""" |
| 604 line_index = result['line'] - 1 |
| 605 target = self.source[line_index] |
| 606 offset = result['column'] - 1 |
| 607 |
| 608 if is_probably_part_of_multiline(target): |
| 609 return [] |
| 610 |
| 611 fixed = fix_whitespace(target, |
| 612 offset=offset, |
| 613 replacement='') |
| 614 |
| 615 self.source[line_index] = fixed |
| 616 |
| 617 def fix_e224(self, result): |
| 618 """Remove extraneous whitespace around operator.""" |
| 619 target = self.source[result['line'] - 1] |
| 620 offset = result['column'] - 1 |
| 621 fixed = target[:offset] + target[offset:].replace('\t', ' ') |
| 622 self.source[result['line'] - 1] = fixed |
| 623 |
| 624 def fix_e225(self, result): |
| 625 """Fix missing whitespace around operator.""" |
| 626 target = self.source[result['line'] - 1] |
| 627 offset = result['column'] - 1 |
| 628 fixed = target[:offset] + ' ' + target[offset:] |
| 629 |
| 630 # Only proceed if non-whitespace characters match. |
| 631 # And make sure we don't break the indentation. |
| 632 if ( |
| 633 fixed.replace(' ', '') == target.replace(' ', '') and |
| 634 _get_indentation(fixed) == _get_indentation(target) |
| 635 ): |
| 636 self.source[result['line'] - 1] = fixed |
| 637 else: |
| 638 return [] |
| 639 |
| 640 def fix_e231(self, result): |
| 641 """Add missing whitespace.""" |
| 642 # Optimize for comma case. This will fix all commas in the full source |
| 643 # code in one pass. Don't do this more than once. If it fails the first |
| 644 # time, there is no point in trying again. |
| 645 if ',' in result['info'] and not self._ws_comma_done: |
| 646 self._ws_comma_done = True |
| 647 original = ''.join(self.source) |
| 648 new = refactor(original, ['ws_comma']) |
| 649 if original.strip() != new.strip(): |
| 650 self.source = [new] |
| 651 return range(1, 1 + len(original)) |
| 652 |
| 653 line_index = result['line'] - 1 |
| 654 target = self.source[line_index] |
| 655 offset = result['column'] |
| 656 fixed = target[:offset] + ' ' + target[offset:] |
| 657 self.source[line_index] = fixed |
| 658 |
| 659 def fix_e251(self, result): |
| 660 """Remove whitespace around parameter '=' sign.""" |
| 661 line_index = result['line'] - 1 |
| 662 target = self.source[line_index] |
| 663 |
| 664 # This is necessary since pep8 sometimes reports columns that goes |
| 665 # past the end of the physical line. This happens in cases like, |
| 666 # foo(bar\n=None) |
| 667 c = min(result['column'] - 1, |
| 668 len(target) - 1) |
| 669 |
| 670 if target[c].strip(): |
| 671 fixed = target |
| 672 else: |
| 673 fixed = target[:c].rstrip() + target[c:].lstrip() |
| 674 |
| 675 # There could be an escaped newline |
| 676 # |
| 677 # def foo(a=\ |
| 678 # 1) |
| 679 if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')): |
| 680 self.source[line_index] = fixed.rstrip('\n\r \t\\') |
| 681 self.source[line_index + 1] = self.source[line_index + 1].lstrip() |
| 682 return [line_index + 1, line_index + 2] # Line indexed at 1 |
| 683 |
| 684 self.source[result['line'] - 1] = fixed |
| 685 |
| 686 def fix_e262(self, result): |
| 687 """Fix spacing after comment hash.""" |
| 688 target = self.source[result['line'] - 1] |
| 689 offset = result['column'] |
| 690 |
| 691 code = target[:offset].rstrip(' \t#') |
| 692 comment = target[offset:].lstrip(' \t#') |
| 693 |
| 694 fixed = code + (' # ' + comment if comment.strip() else '\n') |
| 695 |
| 696 self.source[result['line'] - 1] = fixed |
| 697 |
| 698 def fix_e271(self, result): |
| 699 """Fix extraneous whitespace around keywords.""" |
| 700 line_index = result['line'] - 1 |
| 701 target = self.source[line_index] |
| 702 offset = result['column'] - 1 |
| 703 |
| 704 if is_probably_part_of_multiline(target): |
| 705 return [] |
| 706 |
| 707 fixed = fix_whitespace(target, |
| 708 offset=offset, |
| 709 replacement=' ') |
| 710 |
| 711 if fixed == target: |
| 712 return [] |
| 713 else: |
| 714 self.source[line_index] = fixed |
| 715 |
| 716 def fix_e301(self, result): |
| 717 """Add missing blank line.""" |
| 718 cr = '\n' |
| 719 self.source[result['line'] - 1] = cr + self.source[result['line'] - 1] |
| 720 |
| 721 def fix_e302(self, result): |
| 722 """Add missing 2 blank lines.""" |
| 723 add_linenum = 2 - int(result['info'].split()[-1]) |
| 724 cr = '\n' * add_linenum |
| 725 self.source[result['line'] - 1] = cr + self.source[result['line'] - 1] |
| 726 |
| 727 def fix_e303(self, result): |
| 728 """Remove extra blank lines.""" |
| 729 delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2 |
| 730 delete_linenum = max(1, delete_linenum) |
| 731 |
| 732 # We need to count because pep8 reports an offset line number if there |
| 733 # are comments. |
| 734 cnt = 0 |
| 735 line = result['line'] - 2 |
| 736 modified_lines = [] |
| 737 while cnt < delete_linenum and line >= 0: |
| 738 if not self.source[line].strip(): |
| 739 self.source[line] = '' |
| 740 modified_lines.append(1 + line) # Line indexed at 1 |
| 741 cnt += 1 |
| 742 line -= 1 |
| 743 |
| 744 return modified_lines |
| 745 |
| 746 def fix_e304(self, result): |
| 747 """Remove blank line following function decorator.""" |
| 748 line = result['line'] - 2 |
| 749 if not self.source[line].strip(): |
| 750 self.source[line] = '' |
| 751 |
| 752 def fix_e401(self, result): |
| 753 """Put imports on separate lines.""" |
| 754 line_index = result['line'] - 1 |
| 755 target = self.source[line_index] |
| 756 offset = result['column'] - 1 |
| 757 |
| 758 if not target.lstrip().startswith('import'): |
| 759 return [] |
| 760 |
| 761 indentation = re.split(pattern=r'\bimport\b', |
| 762 string=target, maxsplit=1)[0] |
| 763 fixed = (target[:offset].rstrip('\t ,') + '\n' + |
| 764 indentation + 'import ' + target[offset:].lstrip('\t ,')) |
| 765 self.source[line_index] = fixed |
| 766 |
| 767 def fix_long_line_logically(self, result, logical): |
| 768 """Try to make lines fit within --max-line-length characters.""" |
| 769 if ( |
| 770 not logical or |
| 771 len(logical[2]) == 1 or |
| 772 self.source[result['line'] - 1].lstrip().startswith('#') |
| 773 ): |
| 774 return self.fix_long_line_physically(result) |
| 775 |
| 776 start_line_index = logical[0][0] |
| 777 end_line_index = logical[1][0] |
| 778 logical_lines = logical[2] |
| 779 |
| 780 previous_line = get_item(self.source, start_line_index - 1, default='') |
| 781 next_line = get_item(self.source, end_line_index + 1, default='') |
| 782 |
| 783 single_line = join_logical_line(''.join(logical_lines)) |
| 784 |
| 785 try: |
| 786 fixed = self.fix_long_line( |
| 787 target=single_line, |
| 788 previous_line=previous_line, |
| 789 next_line=next_line, |
| 790 original=''.join(logical_lines)) |
| 791 except (SyntaxError, tokenize.TokenError): |
| 792 return self.fix_long_line_physically(result) |
| 793 |
| 794 if fixed: |
| 795 for line_index in range(start_line_index, end_line_index + 1): |
| 796 self.source[line_index] = '' |
| 797 self.source[start_line_index] = fixed |
| 798 return range(start_line_index + 1, end_line_index + 1) |
| 799 else: |
| 800 return [] |
| 801 |
| 802 def fix_long_line_physically(self, result): |
| 803 """Try to make lines fit within --max-line-length characters.""" |
| 804 line_index = result['line'] - 1 |
| 805 target = self.source[line_index] |
| 806 |
| 807 previous_line = get_item(self.source, line_index - 1, default='') |
| 808 next_line = get_item(self.source, line_index + 1, default='') |
| 809 |
| 810 try: |
| 811 fixed = self.fix_long_line( |
| 812 target=target, |
| 813 previous_line=previous_line, |
| 814 next_line=next_line, |
| 815 original=target) |
| 816 except (SyntaxError, tokenize.TokenError): |
| 817 return [] |
| 818 |
| 819 if fixed: |
| 820 self.source[line_index] = fixed |
| 821 return [line_index + 1] |
| 822 else: |
| 823 return [] |
| 824 |
| 825 def fix_long_line(self, target, previous_line, |
| 826 next_line, original): |
| 827 cache_entry = (target, previous_line, next_line) |
| 828 if cache_entry in self.long_line_ignore_cache: |
| 829 return [] |
| 830 |
| 831 if target.lstrip().startswith('#'): |
| 832 # Wrap commented lines. |
| 833 return shorten_comment( |
| 834 line=target, |
| 835 max_line_length=self.options.max_line_length, |
| 836 last_comment=not next_line.lstrip().startswith('#')) |
| 837 |
| 838 fixed = get_fixed_long_line( |
| 839 target=target, |
| 840 previous_line=previous_line, |
| 841 original=original, |
| 842 indent_word=self.indent_word, |
| 843 max_line_length=self.options.max_line_length, |
| 844 aggressive=self.options.aggressive, |
| 845 experimental=self.options.experimental, |
| 846 verbose=self.options.verbose) |
| 847 if fixed and not code_almost_equal(original, fixed): |
| 848 return fixed |
| 849 else: |
| 850 self.long_line_ignore_cache.add(cache_entry) |
| 851 return None |
| 852 |
| 853 def fix_e502(self, result): |
| 854 """Remove extraneous escape of newline.""" |
| 855 line_index = result['line'] - 1 |
| 856 target = self.source[line_index] |
| 857 self.source[line_index] = target.rstrip('\n\r \t\\') + '\n' |
| 858 |
| 859 def fix_e701(self, result): |
| 860 """Put colon-separated compound statement on separate lines.""" |
| 861 line_index = result['line'] - 1 |
| 862 target = self.source[line_index] |
| 863 c = result['column'] |
| 864 |
| 865 fixed_source = (target[:c] + '\n' + |
| 866 _get_indentation(target) + self.indent_word + |
| 867 target[c:].lstrip('\n\r \t\\')) |
| 868 self.source[result['line'] - 1] = fixed_source |
| 869 return [result['line'], result['line'] + 1] |
| 870 |
| 871 def fix_e702(self, result, logical): |
| 872 """Put semicolon-separated compound statement on separate lines.""" |
| 873 if not logical: |
| 874 return [] # pragma: no cover |
| 875 logical_lines = logical[2] |
| 876 |
| 877 line_index = result['line'] - 1 |
| 878 target = self.source[line_index] |
| 879 |
| 880 if target.rstrip().endswith('\\'): |
| 881 # Normalize '1; \\\n2' into '1; 2'. |
| 882 self.source[line_index] = target.rstrip('\n \r\t\\') |
| 883 self.source[line_index + 1] = self.source[line_index + 1].lstrip() |
| 884 return [line_index + 1, line_index + 2] |
| 885 |
| 886 if target.rstrip().endswith(';'): |
| 887 self.source[line_index] = target.rstrip('\n \r\t;') + '\n' |
| 888 return [line_index + 1] |
| 889 |
| 890 offset = result['column'] - 1 |
| 891 first = target[:offset].rstrip(';').rstrip() |
| 892 second = (_get_indentation(logical_lines[0]) + |
| 893 target[offset:].lstrip(';').lstrip()) |
| 894 |
| 895 self.source[line_index] = first + '\n' + second |
| 896 return [line_index + 1] |
| 897 |
| 898 def fix_e711(self, result): |
| 899 """Fix comparison with None.""" |
| 900 line_index = result['line'] - 1 |
| 901 target = self.source[line_index] |
| 902 offset = result['column'] - 1 |
| 903 |
| 904 right_offset = offset + 2 |
| 905 if right_offset >= len(target): |
| 906 return [] |
| 907 |
| 908 left = target[:offset].rstrip() |
| 909 center = target[offset:right_offset] |
| 910 right = target[right_offset:].lstrip() |
| 911 |
| 912 if not right.startswith('None'): |
| 913 return [] |
| 914 |
| 915 if center.strip() == '==': |
| 916 new_center = 'is' |
| 917 elif center.strip() == '!=': |
| 918 new_center = 'is not' |
| 919 else: |
| 920 return [] |
| 921 |
| 922 self.source[line_index] = ' '.join([left, new_center, right]) |
| 923 |
| 924 def fix_e712(self, result): |
| 925 """Fix comparison with boolean.""" |
| 926 line_index = result['line'] - 1 |
| 927 target = self.source[line_index] |
| 928 offset = result['column'] - 1 |
| 929 |
| 930 # Handle very easy "not" special cases. |
| 931 if re.match(r'^\s*if \w+ == False:$', target): |
| 932 self.source[line_index] = re.sub(r'if (\w+) == False:', |
| 933 r'if not \1:', target, count=1) |
| 934 elif re.match(r'^\s*if \w+ != True:$', target): |
| 935 self.source[line_index] = re.sub(r'if (\w+) != True:', |
| 936 r'if not \1:', target, count=1) |
| 937 else: |
| 938 right_offset = offset + 2 |
| 939 if right_offset >= len(target): |
| 940 return [] |
| 941 |
| 942 left = target[:offset].rstrip() |
| 943 center = target[offset:right_offset] |
| 944 right = target[right_offset:].lstrip() |
| 945 |
| 946 # Handle simple cases only. |
| 947 new_right = None |
| 948 if center.strip() == '==': |
| 949 if re.match(r'\bTrue\b', right): |
| 950 new_right = re.sub(r'\bTrue\b *', '', right, count=1) |
| 951 elif center.strip() == '!=': |
| 952 if re.match(r'\bFalse\b', right): |
| 953 new_right = re.sub(r'\bFalse\b *', '', right, count=1) |
| 954 |
| 955 if new_right is None: |
| 956 return [] |
| 957 |
| 958 if new_right[0].isalnum(): |
| 959 new_right = ' ' + new_right |
| 960 |
| 961 self.source[line_index] = left + new_right |
| 962 |
| 963 def fix_e713(self, result): |
| 964 """Fix non-membership check.""" |
| 965 line_index = result['line'] - 1 |
| 966 target = self.source[line_index] |
| 967 |
| 968 # Handle very easy case only. |
| 969 if re.match(r'^\s*if not \w+ in \w+:$', target): |
| 970 self.source[line_index] = re.sub(r'if not (\w+) in (\w+):', |
| 971 r'if \1 not in \2:', |
| 972 target, |
| 973 count=1) |
| 974 |
| 975 def fix_w291(self, result): |
| 976 """Remove trailing whitespace.""" |
| 977 fixed_line = self.source[result['line'] - 1].rstrip() |
| 978 self.source[result['line'] - 1] = fixed_line + '\n' |
| 979 |
| 980 |
| 981 def get_fixed_long_line(target, previous_line, original, |
| 982 indent_word=' ', max_line_length=79, |
| 983 aggressive=False, experimental=False, verbose=False): |
| 984 """Break up long line and return result. |
| 985 |
| 986 Do this by generating multiple reformatted candidates and then |
| 987 ranking the candidates to heuristically select the best option. |
| 988 |
| 989 """ |
| 990 indent = _get_indentation(target) |
| 991 source = target[len(indent):] |
| 992 assert source.lstrip() == source |
| 993 |
| 994 # Check for partial multiline. |
| 995 tokens = list(generate_tokens(source)) |
| 996 |
| 997 candidates = shorten_line( |
| 998 tokens, source, indent, |
| 999 indent_word, |
| 1000 max_line_length, |
| 1001 aggressive=aggressive, |
| 1002 experimental=experimental, |
| 1003 previous_line=previous_line) |
| 1004 |
| 1005 # Also sort alphabetically as a tie breaker (for determinism). |
| 1006 candidates = sorted( |
| 1007 sorted(set(candidates).union([target, original])), |
| 1008 key=lambda x: line_shortening_rank(x, |
| 1009 indent_word, |
| 1010 max_line_length, |
| 1011 experimental)) |
| 1012 |
| 1013 if verbose >= 4: |
| 1014 print(('-' * 79 + '\n').join([''] + candidates + ['']), |
| 1015 file=codecs.getwriter('utf-8')(sys.stderr.buffer |
| 1016 if hasattr(sys.stderr, |
| 1017 'buffer') |
| 1018 else sys.stderr)) |
| 1019 |
| 1020 if candidates: |
| 1021 return candidates[0] |
| 1022 |
| 1023 |
| 1024 def join_logical_line(logical_line): |
| 1025 """Return single line based on logical line input.""" |
| 1026 indentation = _get_indentation(logical_line) |
| 1027 |
| 1028 return indentation + untokenize_without_newlines( |
| 1029 generate_tokens(logical_line.lstrip())) + '\n' |
| 1030 |
| 1031 |
| 1032 def untokenize_without_newlines(tokens): |
| 1033 """Return source code based on tokens.""" |
| 1034 text = '' |
| 1035 last_row = 0 |
| 1036 last_column = -1 |
| 1037 |
| 1038 for t in tokens: |
| 1039 token_string = t[1] |
| 1040 (start_row, start_column) = t[2] |
| 1041 (end_row, end_column) = t[3] |
| 1042 |
| 1043 if start_row > last_row: |
| 1044 last_column = 0 |
| 1045 if ( |
| 1046 (start_column > last_column or token_string == '\n') and |
| 1047 not text.endswith(' ') |
| 1048 ): |
| 1049 text += ' ' |
| 1050 |
| 1051 if token_string != '\n': |
| 1052 text += token_string |
| 1053 |
| 1054 last_row = end_row |
| 1055 last_column = end_column |
| 1056 |
| 1057 return text |
| 1058 |
| 1059 |
| 1060 def _find_logical(source_lines): |
| 1061 # Make a variable which is the index of all the starts of lines. |
| 1062 logical_start = [] |
| 1063 logical_end = [] |
| 1064 last_newline = True |
| 1065 parens = 0 |
| 1066 for t in generate_tokens(''.join(source_lines)): |
| 1067 if t[0] in [tokenize.COMMENT, tokenize.DEDENT, |
| 1068 tokenize.INDENT, tokenize.NL, |
| 1069 tokenize.ENDMARKER]: |
| 1070 continue |
| 1071 if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]: |
| 1072 last_newline = True |
| 1073 logical_end.append((t[3][0] - 1, t[2][1])) |
| 1074 continue |
| 1075 if last_newline and not parens: |
| 1076 logical_start.append((t[2][0] - 1, t[2][1])) |
| 1077 last_newline = False |
| 1078 if t[0] == tokenize.OP: |
| 1079 if t[1] in '([{': |
| 1080 parens += 1 |
| 1081 elif t[1] in '}])': |
| 1082 parens -= 1 |
| 1083 return (logical_start, logical_end) |
| 1084 |
| 1085 |
| 1086 def _get_logical(source_lines, result, logical_start, logical_end): |
| 1087 """Return the logical line corresponding to the result. |
| 1088 |
| 1089 Assumes input is already E702-clean. |
| 1090 |
| 1091 """ |
| 1092 row = result['line'] - 1 |
| 1093 col = result['column'] - 1 |
| 1094 ls = None |
| 1095 le = None |
| 1096 for i in range(0, len(logical_start), 1): |
| 1097 assert logical_end |
| 1098 x = logical_end[i] |
| 1099 if x[0] > row or (x[0] == row and x[1] > col): |
| 1100 le = x |
| 1101 ls = logical_start[i] |
| 1102 break |
| 1103 if ls is None: |
| 1104 return None |
| 1105 original = source_lines[ls[0]:le[0] + 1] |
| 1106 return ls, le, original |
| 1107 |
| 1108 |
| 1109 def get_item(items, index, default=None): |
| 1110 if 0 <= index < len(items): |
| 1111 return items[index] |
| 1112 else: |
| 1113 return default |
| 1114 |
| 1115 |
| 1116 def reindent(source, indent_size): |
| 1117 """Reindent all lines.""" |
| 1118 reindenter = Reindenter(source) |
| 1119 return reindenter.run(indent_size) |
| 1120 |
| 1121 |
| 1122 def code_almost_equal(a, b): |
| 1123 """Return True if code is similar. |
| 1124 |
| 1125 Ignore whitespace when comparing specific line. |
| 1126 |
| 1127 """ |
| 1128 split_a = split_and_strip_non_empty_lines(a) |
| 1129 split_b = split_and_strip_non_empty_lines(b) |
| 1130 |
| 1131 if len(split_a) != len(split_b): |
| 1132 return False |
| 1133 |
| 1134 for index in range(len(split_a)): |
| 1135 if ''.join(split_a[index].split()) != ''.join(split_b[index].split()): |
| 1136 return False |
| 1137 |
| 1138 return True |
| 1139 |
| 1140 |
| 1141 def split_and_strip_non_empty_lines(text): |
| 1142 """Return lines split by newline. |
| 1143 |
| 1144 Ignore empty lines. |
| 1145 |
| 1146 """ |
| 1147 return [line.strip() for line in text.splitlines() if line.strip()] |
| 1148 |
| 1149 |
| 1150 def fix_e265(source, aggressive=False): # pylint: disable=unused-argument |
| 1151 """Format block comments.""" |
| 1152 if '#' not in source: |
| 1153 # Optimization. |
| 1154 return source |
| 1155 |
| 1156 ignored_line_numbers = multiline_string_lines( |
| 1157 source, |
| 1158 include_docstrings=True) | set(commented_out_code_lines(source)) |
| 1159 |
| 1160 fixed_lines = [] |
| 1161 sio = io.StringIO(source) |
| 1162 for (line_number, line) in enumerate(sio.readlines(), start=1): |
| 1163 if ( |
| 1164 line.lstrip().startswith('#') and |
| 1165 line_number not in ignored_line_numbers |
| 1166 ): |
| 1167 indentation = _get_indentation(line) |
| 1168 line = line.lstrip() |
| 1169 |
| 1170 # Normalize beginning if not a shebang. |
| 1171 if len(line) > 1: |
| 1172 if ( |
| 1173 # Leave multiple spaces like '# ' alone. |
| 1174 (line.count('#') > 1 or line[1].isalnum()) |
| 1175 # Leave stylistic outlined blocks alone. |
| 1176 and not line.rstrip().endswith('#') |
| 1177 ): |
| 1178 line = '# ' + line.lstrip('# \t') |
| 1179 |
| 1180 fixed_lines.append(indentation + line) |
| 1181 else: |
| 1182 fixed_lines.append(line) |
| 1183 |
| 1184 return ''.join(fixed_lines) |
| 1185 |
| 1186 |
| 1187 def refactor(source, fixer_names, ignore=None): |
| 1188 """Return refactored code using lib2to3. |
| 1189 |
| 1190 Skip if ignore string is produced in the refactored code. |
| 1191 |
| 1192 """ |
| 1193 from lib2to3 import pgen2 |
| 1194 try: |
| 1195 new_text = refactor_with_2to3(source, |
| 1196 fixer_names=fixer_names) |
| 1197 except (pgen2.parse.ParseError, |
| 1198 SyntaxError, |
| 1199 UnicodeDecodeError, |
| 1200 UnicodeEncodeError): |
| 1201 return source |
| 1202 |
| 1203 if ignore: |
| 1204 if ignore in new_text and ignore not in source: |
| 1205 return source |
| 1206 |
| 1207 return new_text |
| 1208 |
| 1209 |
| 1210 def code_to_2to3(select, ignore): |
| 1211 fixes = set() |
| 1212 for code, fix in CODE_TO_2TO3.items(): |
| 1213 if code_match(code, select=select, ignore=ignore): |
| 1214 fixes |= set(fix) |
| 1215 return fixes |
| 1216 |
| 1217 |
| 1218 def fix_2to3(source, aggressive=True, select=None, ignore=None): |
| 1219 """Fix various deprecated code (via lib2to3).""" |
| 1220 if not aggressive: |
| 1221 return source |
| 1222 |
| 1223 select = select or [] |
| 1224 ignore = ignore or [] |
| 1225 |
| 1226 return refactor(source, |
| 1227 code_to_2to3(select=select, |
| 1228 ignore=ignore)) |
| 1229 |
| 1230 |
| 1231 def fix_w602(source, aggressive=True): |
| 1232 """Fix deprecated form of raising exception.""" |
| 1233 if not aggressive: |
| 1234 return source |
| 1235 |
| 1236 return refactor(source, ['raise'], |
| 1237 ignore='with_traceback') |
| 1238 |
| 1239 |
| 1240 def find_newline(source): |
| 1241 """Return type of newline used in source. |
| 1242 |
| 1243 Input is a list of lines. |
| 1244 |
| 1245 """ |
| 1246 assert not isinstance(source, unicode) |
| 1247 |
| 1248 counter = collections.defaultdict(int) |
| 1249 for line in source: |
| 1250 if line.endswith(CRLF): |
| 1251 counter[CRLF] += 1 |
| 1252 elif line.endswith(CR): |
| 1253 counter[CR] += 1 |
| 1254 elif line.endswith(LF): |
| 1255 counter[LF] += 1 |
| 1256 |
| 1257 return (sorted(counter, key=counter.get, reverse=True) or [LF])[0] |
| 1258 |
| 1259 |
| 1260 def _get_indentword(source): |
| 1261 """Return indentation type.""" |
| 1262 indent_word = ' ' # Default in case source has no indentation |
| 1263 try: |
| 1264 for t in generate_tokens(source): |
| 1265 if t[0] == token.INDENT: |
| 1266 indent_word = t[1] |
| 1267 break |
| 1268 except (SyntaxError, tokenize.TokenError): |
| 1269 pass |
| 1270 return indent_word |
| 1271 |
| 1272 |
| 1273 def _get_indentation(line): |
| 1274 """Return leading whitespace.""" |
| 1275 if line.strip(): |
| 1276 non_whitespace_index = len(line) - len(line.lstrip()) |
| 1277 return line[:non_whitespace_index] |
| 1278 else: |
| 1279 return '' |
| 1280 |
| 1281 |
| 1282 def get_diff_text(old, new, filename): |
| 1283 """Return text of unified diff between old and new.""" |
| 1284 newline = '\n' |
| 1285 diff = difflib.unified_diff( |
| 1286 old, new, |
| 1287 'original/' + filename, |
| 1288 'fixed/' + filename, |
| 1289 lineterm=newline) |
| 1290 |
| 1291 text = '' |
| 1292 for line in diff: |
| 1293 text += line |
| 1294 |
| 1295 # Work around missing newline (http://bugs.python.org/issue2142). |
| 1296 if text and not line.endswith(newline): |
| 1297 text += newline + r'\ No newline at end of file' + newline |
| 1298 |
| 1299 return text |
| 1300 |
| 1301 |
| 1302 def _priority_key(pep8_result): |
| 1303 """Key for sorting PEP8 results. |
| 1304 |
| 1305 Global fixes should be done first. This is important for things like |
| 1306 indentation. |
| 1307 |
| 1308 """ |
| 1309 priority = [ |
| 1310 # Fix multiline colon-based before semicolon based. |
| 1311 'e701', |
| 1312 # Break multiline statements early. |
| 1313 'e702', |
| 1314 # Things that make lines longer. |
| 1315 'e225', 'e231', |
| 1316 # Remove extraneous whitespace before breaking lines. |
| 1317 'e201', |
| 1318 # Shorten whitespace in comment before resorting to wrapping. |
| 1319 'e262' |
| 1320 ] |
| 1321 middle_index = 10000 |
| 1322 lowest_priority = [ |
| 1323 # We need to shorten lines last since the logical fixer can get in a |
| 1324 # loop, which causes us to exit early. |
| 1325 'e501' |
| 1326 ] |
| 1327 key = pep8_result['id'].lower() |
| 1328 try: |
| 1329 return priority.index(key) |
| 1330 except ValueError: |
| 1331 try: |
| 1332 return middle_index + lowest_priority.index(key) + 1 |
| 1333 except ValueError: |
| 1334 return middle_index |
| 1335 |
| 1336 |
| 1337 def shorten_line(tokens, source, indentation, indent_word, max_line_length, |
| 1338 aggressive=False, experimental=False, previous_line=''): |
| 1339 """Separate line at OPERATOR. |
| 1340 |
| 1341 Multiple candidates will be yielded. |
| 1342 |
| 1343 """ |
| 1344 for candidate in _shorten_line(tokens=tokens, |
| 1345 source=source, |
| 1346 indentation=indentation, |
| 1347 indent_word=indent_word, |
| 1348 aggressive=aggressive, |
| 1349 previous_line=previous_line): |
| 1350 yield candidate |
| 1351 |
| 1352 if aggressive: |
| 1353 for key_token_strings in SHORTEN_OPERATOR_GROUPS: |
| 1354 shortened = _shorten_line_at_tokens( |
| 1355 tokens=tokens, |
| 1356 source=source, |
| 1357 indentation=indentation, |
| 1358 indent_word=indent_word, |
| 1359 key_token_strings=key_token_strings, |
| 1360 aggressive=aggressive) |
| 1361 |
| 1362 if shortened is not None and shortened != source: |
| 1363 yield shortened |
| 1364 |
| 1365 if experimental: |
| 1366 for shortened in _shorten_line_at_tokens_new( |
| 1367 tokens=tokens, |
| 1368 source=source, |
| 1369 indentation=indentation, |
| 1370 max_line_length=max_line_length): |
| 1371 |
| 1372 yield shortened |
| 1373 |
| 1374 |
| 1375 def _shorten_line(tokens, source, indentation, indent_word, |
| 1376 aggressive=False, previous_line=''): |
| 1377 """Separate line at OPERATOR. |
| 1378 |
| 1379 The input is expected to be free of newlines except for inside multiline |
| 1380 strings and at the end. |
| 1381 |
| 1382 Multiple candidates will be yielded. |
| 1383 |
| 1384 """ |
| 1385 for (token_type, |
| 1386 token_string, |
| 1387 start_offset, |
| 1388 end_offset) in token_offsets(tokens): |
| 1389 |
| 1390 if ( |
| 1391 token_type == tokenize.COMMENT and |
| 1392 not is_probably_part_of_multiline(previous_line) and |
| 1393 not is_probably_part_of_multiline(source) and |
| 1394 not source[start_offset + 1:].strip().lower().startswith( |
| 1395 ('noqa', 'pragma:', 'pylint:')) |
| 1396 ): |
| 1397 # Move inline comments to previous line. |
| 1398 first = source[:start_offset] |
| 1399 second = source[start_offset:] |
| 1400 yield (indentation + second.strip() + '\n' + |
| 1401 indentation + first.strip() + '\n') |
| 1402 elif token_type == token.OP and token_string != '=': |
| 1403 # Don't break on '=' after keyword as this violates PEP 8. |
| 1404 |
| 1405 assert token_type != token.INDENT |
| 1406 |
| 1407 first = source[:end_offset] |
| 1408 |
| 1409 second_indent = indentation |
| 1410 if first.rstrip().endswith('('): |
| 1411 second_indent += indent_word |
| 1412 elif '(' in first: |
| 1413 second_indent += ' ' * (1 + first.find('(')) |
| 1414 else: |
| 1415 second_indent += indent_word |
| 1416 |
| 1417 second = (second_indent + source[end_offset:].lstrip()) |
| 1418 if ( |
| 1419 not second.strip() or |
| 1420 second.lstrip().startswith('#') |
| 1421 ): |
| 1422 continue |
| 1423 |
| 1424 # Do not begin a line with a comma |
| 1425 if second.lstrip().startswith(','): |
| 1426 continue |
| 1427 # Do end a line with a dot |
| 1428 if first.rstrip().endswith('.'): |
| 1429 continue |
| 1430 if token_string in '+-*/': |
| 1431 fixed = first + ' \\' + '\n' + second |
| 1432 else: |
| 1433 fixed = first + '\n' + second |
| 1434 |
| 1435 # Only fix if syntax is okay. |
| 1436 if check_syntax(normalize_multiline(fixed) |
| 1437 if aggressive else fixed): |
| 1438 yield indentation + fixed |
| 1439 |
| 1440 |
| 1441 # A convenient way to handle tokens. |
| 1442 Token = collections.namedtuple('Token', ['token_type', 'token_string', |
| 1443 'spos', 'epos', 'line']) |
| 1444 |
| 1445 |
| 1446 class ReformattedLines(object): |
| 1447 |
| 1448 """The reflowed lines of atoms. |
| 1449 |
| 1450 Each part of the line is represented as an "atom." They can be moved |
| 1451 around when need be to get the optimal formatting. |
| 1452 |
| 1453 """ |
| 1454 |
| 1455 ########################################################################### |
| 1456 # Private Classes |
| 1457 |
| 1458 class _Indent(object): |
| 1459 |
| 1460 """Represent an indentation in the atom stream.""" |
| 1461 |
| 1462 def __init__(self, indent_amt): |
| 1463 self._indent_amt = indent_amt |
| 1464 |
| 1465 def emit(self): |
| 1466 return ' ' * self._indent_amt |
| 1467 |
| 1468 @property |
| 1469 def size(self): |
| 1470 return self._indent_amt |
| 1471 |
| 1472 class _Space(object): |
| 1473 |
| 1474 """Represent a space in the atom stream.""" |
| 1475 |
| 1476 def emit(self): |
| 1477 return ' ' |
| 1478 |
| 1479 @property |
| 1480 def size(self): |
| 1481 return 1 |
| 1482 |
| 1483 class _LineBreak(object): |
| 1484 |
| 1485 """Represent a line break in the atom stream.""" |
| 1486 |
| 1487 def emit(self): |
| 1488 return '\n' |
| 1489 |
| 1490 @property |
| 1491 def size(self): |
| 1492 return 0 |
| 1493 |
| 1494 def __init__(self, max_line_length): |
| 1495 self._max_line_length = max_line_length |
| 1496 self._lines = [] |
| 1497 self._bracket_depth = 0 |
| 1498 self._prev_item = None |
| 1499 self._prev_prev_item = None |
| 1500 |
| 1501 def __repr__(self): |
| 1502 return self.emit() |
| 1503 |
| 1504 ########################################################################### |
| 1505 # Public Methods |
| 1506 |
| 1507 def add(self, obj, indent_amt, break_after_open_bracket): |
| 1508 if isinstance(obj, Atom): |
| 1509 self._add_item(obj, indent_amt) |
| 1510 return |
| 1511 |
| 1512 self._add_container(obj, indent_amt, break_after_open_bracket) |
| 1513 |
| 1514 def add_comment(self, item): |
| 1515 num_spaces = 2 |
| 1516 if len(self._lines) > 1: |
| 1517 if isinstance(self._lines[-1], self._Space): |
| 1518 num_spaces -= 1 |
| 1519 if len(self._lines) > 2: |
| 1520 if isinstance(self._lines[-2], self._Space): |
| 1521 num_spaces -= 1 |
| 1522 |
| 1523 while num_spaces > 0: |
| 1524 self._lines.append(self._Space()) |
| 1525 num_spaces -= 1 |
| 1526 self._lines.append(item) |
| 1527 |
| 1528 def add_indent(self, indent_amt): |
| 1529 self._lines.append(self._Indent(indent_amt)) |
| 1530 |
| 1531 def add_line_break(self, indent): |
| 1532 self._lines.append(self._LineBreak()) |
| 1533 self.add_indent(len(indent)) |
| 1534 |
| 1535 def add_line_break_at(self, index, indent_amt): |
| 1536 self._lines.insert(index, self._LineBreak()) |
| 1537 self._lines.insert(index + 1, self._Indent(indent_amt)) |
| 1538 |
| 1539 def add_space_if_needed(self, curr_text, equal=False): |
| 1540 if ( |
| 1541 not self._lines or isinstance( |
| 1542 self._lines[-1], (self._LineBreak, self._Indent, self._Space)) |
| 1543 ): |
| 1544 return |
| 1545 |
| 1546 prev_text = unicode(self._prev_item) |
| 1547 prev_prev_text = ( |
| 1548 unicode(self._prev_prev_item) if self._prev_prev_item else '') |
| 1549 |
| 1550 if ( |
| 1551 # The previous item was a keyword or identifier and the current |
| 1552 # item isn't an operator that doesn't require a space. |
| 1553 ((self._prev_item.is_keyword or self._prev_item.is_string or |
| 1554 self._prev_item.is_name or self._prev_item.is_number) and |
| 1555 (curr_text[0] not in '([{.,:}])' or |
| 1556 (curr_text[0] == '=' and equal))) or |
| 1557 |
| 1558 # Don't place spaces around a '.', unless it's in an 'import' |
| 1559 # statement. |
| 1560 ((prev_prev_text != 'from' and prev_text[-1] != '.' and |
| 1561 curr_text != 'import') and |
| 1562 |
| 1563 # Don't place a space before a colon. |
| 1564 curr_text[0] != ':' and |
| 1565 |
| 1566 # Don't split up ending brackets by spaces. |
| 1567 ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or |
| 1568 |
| 1569 # Put a space after a colon or comma. |
| 1570 prev_text[-1] in ':,' or |
| 1571 |
| 1572 # Put space around '=' if asked to. |
| 1573 (equal and prev_text == '=') or |
| 1574 |
| 1575 # Put spaces around non-unary arithmetic operators. |
| 1576 ((self._prev_prev_item and |
| 1577 (prev_text not in '+-' and |
| 1578 (self._prev_prev_item.is_name or |
| 1579 self._prev_prev_item.is_number or |
| 1580 self._prev_prev_item.is_string)) and |
| 1581 prev_text in ('+', '-', '%', '*', '/', '//', '**'))))) |
| 1582 ): |
| 1583 self._lines.append(self._Space()) |
| 1584 |
| 1585 def previous_item(self): |
| 1586 """Return the previous non-whitespace item.""" |
| 1587 return self._prev_item |
| 1588 |
| 1589 def fits_on_current_line(self, item_extent): |
| 1590 return self.current_size() + item_extent <= self._max_line_length |
| 1591 |
| 1592 def current_size(self): |
| 1593 """The size of the current line minus the indentation.""" |
| 1594 size = 0 |
| 1595 for item in reversed(self._lines): |
| 1596 size += item.size |
| 1597 if isinstance(item, self._LineBreak): |
| 1598 break |
| 1599 |
| 1600 return size |
| 1601 |
| 1602 def line_empty(self): |
| 1603 return (self._lines and |
| 1604 isinstance(self._lines[-1], |
| 1605 (self._LineBreak, self._Indent))) |
| 1606 |
| 1607 def emit(self): |
| 1608 string = '' |
| 1609 for item in self._lines: |
| 1610 if isinstance(item, self._LineBreak): |
| 1611 string = string.rstrip() |
| 1612 string += item.emit() |
| 1613 |
| 1614 return string.rstrip() + '\n' |
| 1615 |
| 1616 ########################################################################### |
| 1617 # Private Methods |
| 1618 |
| 1619 def _add_item(self, item, indent_amt): |
| 1620 """Add an item to the line. |
| 1621 |
| 1622 Reflow the line to get the best formatting after the item is |
| 1623 inserted. The bracket depth indicates if the item is being |
| 1624 inserted inside of a container or not. |
| 1625 |
| 1626 """ |
| 1627 if self._prev_item and self._prev_item.is_string and item.is_string: |
| 1628 # Place consecutive string literals on separate lines. |
| 1629 self._lines.append(self._LineBreak()) |
| 1630 self._lines.append(self._Indent(indent_amt)) |
| 1631 |
| 1632 item_text = unicode(item) |
| 1633 if self._lines and self._bracket_depth: |
| 1634 # Adding the item into a container. |
| 1635 self._prevent_default_initializer_splitting(item, indent_amt) |
| 1636 |
| 1637 if item_text in '.,)]}': |
| 1638 self._split_after_delimiter(item, indent_amt) |
| 1639 |
| 1640 elif self._lines and not self.line_empty(): |
| 1641 # Adding the item outside of a container. |
| 1642 if self.fits_on_current_line(len(item_text)): |
| 1643 self._enforce_space(item) |
| 1644 |
| 1645 else: |
| 1646 # Line break for the new item. |
| 1647 self._lines.append(self._LineBreak()) |
| 1648 self._lines.append(self._Indent(indent_amt)) |
| 1649 |
| 1650 self._lines.append(item) |
| 1651 self._prev_item, self._prev_prev_item = item, self._prev_item |
| 1652 |
| 1653 if item_text in '([{': |
| 1654 self._bracket_depth += 1 |
| 1655 |
| 1656 elif item_text in '}])': |
| 1657 self._bracket_depth -= 1 |
| 1658 assert self._bracket_depth >= 0 |
| 1659 |
| 1660 def _add_container(self, container, indent_amt, break_after_open_bracket): |
| 1661 actual_indent = indent_amt + 1 |
| 1662 |
| 1663 if ( |
| 1664 unicode(self._prev_item) != '=' and |
| 1665 not self.line_empty() and |
| 1666 not self.fits_on_current_line( |
| 1667 container.size + self._bracket_depth + 2) |
| 1668 ): |
| 1669 |
| 1670 if unicode(container)[0] == '(' and self._prev_item.is_name: |
| 1671 # Don't split before the opening bracket of a call. |
| 1672 break_after_open_bracket = True |
| 1673 actual_indent = indent_amt + 4 |
| 1674 elif ( |
| 1675 break_after_open_bracket or |
| 1676 unicode(self._prev_item) not in '([{' |
| 1677 ): |
| 1678 # If the container doesn't fit on the current line and the |
| 1679 # current line isn't empty, place the container on the next |
| 1680 # line. |
| 1681 self._lines.append(self._LineBreak()) |
| 1682 self._lines.append(self._Indent(indent_amt)) |
| 1683 break_after_open_bracket = False |
| 1684 else: |
| 1685 actual_indent = self.current_size() + 1 |
| 1686 break_after_open_bracket = False |
| 1687 |
| 1688 if isinstance(container, (ListComprehension, IfExpression)): |
| 1689 actual_indent = indent_amt |
| 1690 |
| 1691 # Increase the continued indentation only if recursing on a |
| 1692 # container. |
| 1693 container.reflow(self, ' ' * actual_indent, |
| 1694 break_after_open_bracket=break_after_open_bracket) |
| 1695 |
| 1696 def _prevent_default_initializer_splitting(self, item, indent_amt): |
| 1697 """Prevent splitting between a default initializer. |
| 1698 |
| 1699 When there is a default initializer, it's best to keep it all on |
| 1700 the same line. It's nicer and more readable, even if it goes |
| 1701 over the maximum allowable line length. This goes back along the |
| 1702 current line to determine if we have a default initializer, and, |
| 1703 if so, to remove extraneous whitespaces and add a line |
| 1704 break/indent before it if needed. |
| 1705 |
| 1706 """ |
| 1707 if unicode(item) == '=': |
| 1708 # This is the assignment in the initializer. Just remove spaces for |
| 1709 # now. |
| 1710 self._delete_whitespace() |
| 1711 return |
| 1712 |
| 1713 if (not self._prev_item or not self._prev_prev_item or |
| 1714 unicode(self._prev_item) != '='): |
| 1715 return |
| 1716 |
| 1717 self._delete_whitespace() |
| 1718 prev_prev_index = self._lines.index(self._prev_prev_item) |
| 1719 |
| 1720 if ( |
| 1721 isinstance(self._lines[prev_prev_index - 1], self._Indent) or |
| 1722 self.fits_on_current_line(item.size + 1) |
| 1723 ): |
| 1724 # The default initializer is already the only item on this line. |
| 1725 # Don't insert a newline here. |
| 1726 return |
| 1727 |
| 1728 # Replace the space with a newline/indent combo. |
| 1729 if isinstance(self._lines[prev_prev_index - 1], self._Space): |
| 1730 del self._lines[prev_prev_index - 1] |
| 1731 |
| 1732 self.add_line_break_at(self._lines.index(self._prev_prev_item), |
| 1733 indent_amt) |
| 1734 |
| 1735 def _split_after_delimiter(self, item, indent_amt): |
| 1736 """Split the line only after a delimiter.""" |
| 1737 self._delete_whitespace() |
| 1738 |
| 1739 if self.fits_on_current_line(item.size): |
| 1740 return |
| 1741 |
| 1742 last_space = None |
| 1743 for item in reversed(self._lines): |
| 1744 if ( |
| 1745 last_space and |
| 1746 (not isinstance(item, Atom) or not item.is_colon) |
| 1747 ): |
| 1748 break |
| 1749 else: |
| 1750 last_space = None |
| 1751 if isinstance(item, self._Space): |
| 1752 last_space = item |
| 1753 if isinstance(item, (self._LineBreak, self._Indent)): |
| 1754 return |
| 1755 |
| 1756 if not last_space: |
| 1757 return |
| 1758 |
| 1759 self.add_line_break_at(self._lines.index(last_space), indent_amt) |
| 1760 |
| 1761 def _enforce_space(self, item): |
| 1762 """Enforce a space in certain situations. |
| 1763 |
| 1764 There are cases where we will want a space where normally we |
| 1765 wouldn't put one. This just enforces the addition of a space. |
| 1766 |
| 1767 """ |
| 1768 if isinstance(self._lines[-1], |
| 1769 (self._Space, self._LineBreak, self._Indent)): |
| 1770 return |
| 1771 |
| 1772 if not self._prev_item: |
| 1773 return |
| 1774 |
| 1775 item_text = unicode(item) |
| 1776 prev_text = unicode(self._prev_item) |
| 1777 |
| 1778 # Prefer a space around a '.' in an import statement, and between the |
| 1779 # 'import' and '('. |
| 1780 if ( |
| 1781 (item_text == '.' and prev_text == 'from') or |
| 1782 (item_text == 'import' and prev_text == '.') or |
| 1783 (item_text == '(' and prev_text == 'import') |
| 1784 ): |
| 1785 self._lines.append(self._Space()) |
| 1786 |
| 1787 def _delete_whitespace(self): |
| 1788 """Delete all whitespace from the end of the line.""" |
| 1789 while isinstance(self._lines[-1], (self._Space, self._LineBreak, |
| 1790 self._Indent)): |
| 1791 del self._lines[-1] |
| 1792 |
| 1793 |
| 1794 class Atom(object): |
| 1795 |
| 1796 """The smallest unbreakable unit that can be reflowed.""" |
| 1797 |
| 1798 def __init__(self, atom): |
| 1799 self._atom = atom |
| 1800 |
| 1801 def __repr__(self): |
| 1802 return self._atom.token_string |
| 1803 |
| 1804 def __len__(self): |
| 1805 return self.size |
| 1806 |
| 1807 def reflow( |
| 1808 self, reflowed_lines, continued_indent, extent, |
| 1809 break_after_open_bracket=False, |
| 1810 is_list_comp_or_if_expr=False, |
| 1811 next_is_dot=False |
| 1812 ): |
| 1813 if self._atom.token_type == tokenize.COMMENT: |
| 1814 reflowed_lines.add_comment(self) |
| 1815 return |
| 1816 |
| 1817 total_size = extent if extent else self.size |
| 1818 |
| 1819 if self._atom.token_string not in ',:([{}])': |
| 1820 # Some atoms will need an extra 1-sized space token after them. |
| 1821 total_size += 1 |
| 1822 |
| 1823 prev_item = reflowed_lines.previous_item() |
| 1824 if ( |
| 1825 not is_list_comp_or_if_expr and |
| 1826 not reflowed_lines.fits_on_current_line(total_size) and |
| 1827 not (next_is_dot and |
| 1828 reflowed_lines.fits_on_current_line(self.size + 1)) and |
| 1829 not reflowed_lines.line_empty() and |
| 1830 not self.is_colon and |
| 1831 not (prev_item and prev_item.is_name and |
| 1832 unicode(self) == '(') |
| 1833 ): |
| 1834 # Start a new line if there is already something on the line and |
| 1835 # adding this atom would make it go over the max line length. |
| 1836 reflowed_lines.add_line_break(continued_indent) |
| 1837 else: |
| 1838 reflowed_lines.add_space_if_needed(unicode(self)) |
| 1839 |
| 1840 reflowed_lines.add(self, len(continued_indent), |
| 1841 break_after_open_bracket) |
| 1842 |
| 1843 def emit(self): |
| 1844 return self.__repr__() |
| 1845 |
| 1846 @property |
| 1847 def is_keyword(self): |
| 1848 return keyword.iskeyword(self._atom.token_string) |
| 1849 |
| 1850 @property |
| 1851 def is_string(self): |
| 1852 return self._atom.token_type == tokenize.STRING |
| 1853 |
| 1854 @property |
| 1855 def is_name(self): |
| 1856 return self._atom.token_type == tokenize.NAME |
| 1857 |
| 1858 @property |
| 1859 def is_number(self): |
| 1860 return self._atom.token_type == tokenize.NUMBER |
| 1861 |
| 1862 @property |
| 1863 def is_comma(self): |
| 1864 return self._atom.token_string == ',' |
| 1865 |
| 1866 @property |
| 1867 def is_colon(self): |
| 1868 return self._atom.token_string == ':' |
| 1869 |
| 1870 @property |
| 1871 def size(self): |
| 1872 return len(self._atom.token_string) |
| 1873 |
| 1874 |
| 1875 class Container(object): |
| 1876 |
| 1877 """Base class for all container types.""" |
| 1878 |
| 1879 def __init__(self, items): |
| 1880 self._items = items |
| 1881 |
| 1882 def __repr__(self): |
| 1883 string = '' |
| 1884 last_was_keyword = False |
| 1885 |
| 1886 for item in self._items: |
| 1887 if item.is_comma: |
| 1888 string += ', ' |
| 1889 elif item.is_colon: |
| 1890 string += ': ' |
| 1891 else: |
| 1892 item_string = unicode(item) |
| 1893 if ( |
| 1894 string and |
| 1895 (last_was_keyword or |
| 1896 (not string.endswith(tuple('([{,.:}]) ')) and |
| 1897 not item_string.startswith(tuple('([{,.:}])')))) |
| 1898 ): |
| 1899 string += ' ' |
| 1900 string += item_string |
| 1901 |
| 1902 last_was_keyword = item.is_keyword |
| 1903 return string |
| 1904 |
| 1905 def __iter__(self): |
| 1906 for element in self._items: |
| 1907 yield element |
| 1908 |
| 1909 def __getitem__(self, idx): |
| 1910 return self._items[idx] |
| 1911 |
| 1912 def reflow(self, reflowed_lines, continued_indent, |
| 1913 break_after_open_bracket=False): |
| 1914 last_was_container = False |
| 1915 for (index, item) in enumerate(self._items): |
| 1916 next_item = get_item(self._items, index + 1) |
| 1917 |
| 1918 if isinstance(item, Atom): |
| 1919 is_list_comp_or_if_expr = ( |
| 1920 isinstance(self, (ListComprehension, IfExpression))) |
| 1921 item.reflow(reflowed_lines, continued_indent, |
| 1922 self._get_extent(index), |
| 1923 is_list_comp_or_if_expr=is_list_comp_or_if_expr, |
| 1924 next_is_dot=(next_item and |
| 1925 unicode(next_item) == '.')) |
| 1926 if last_was_container and item.is_comma: |
| 1927 reflowed_lines.add_line_break(continued_indent) |
| 1928 last_was_container = False |
| 1929 else: # isinstance(item, Container) |
| 1930 reflowed_lines.add(item, len(continued_indent), |
| 1931 break_after_open_bracket) |
| 1932 last_was_container = not isinstance(item, (ListComprehension, |
| 1933 IfExpression)) |
| 1934 |
| 1935 if ( |
| 1936 break_after_open_bracket and index == 0 and |
| 1937 # Prefer to keep empty containers together instead of |
| 1938 # separating them. |
| 1939 unicode(item) == self.open_bracket and |
| 1940 (not next_item or unicode(next_item) != self.close_bracket) and |
| 1941 (len(self._items) != 3 or not isinstance(next_item, Atom)) |
| 1942 ): |
| 1943 reflowed_lines.add_line_break(continued_indent) |
| 1944 break_after_open_bracket = False |
| 1945 else: |
| 1946 next_next_item = get_item(self._items, index + 2) |
| 1947 if ( |
| 1948 unicode(item) not in ['.', '%', 'in'] and |
| 1949 next_item and not isinstance(next_item, Container) and |
| 1950 unicode(next_item) != ':' and |
| 1951 next_next_item and (not isinstance(next_next_item, Atom) or |
| 1952 unicode(next_item) == 'not') and |
| 1953 not reflowed_lines.line_empty() and |
| 1954 not reflowed_lines.fits_on_current_line( |
| 1955 self._get_extent(index + 1) + 2) |
| 1956 ): |
| 1957 reflowed_lines.add_line_break(continued_indent) |
| 1958 |
| 1959 def _get_extent(self, index): |
| 1960 """The extent of the full element. |
| 1961 |
| 1962 E.g., the length of a function call or keyword. |
| 1963 |
| 1964 """ |
| 1965 extent = 0 |
| 1966 prev_item = get_item(self._items, index - 1) |
| 1967 seen_dot = prev_item and unicode(prev_item) == '.' |
| 1968 while index < len(self._items): |
| 1969 item = get_item(self._items, index) |
| 1970 index += 1 |
| 1971 |
| 1972 if isinstance(item, (ListComprehension, IfExpression)): |
| 1973 break |
| 1974 |
| 1975 if isinstance(item, Container): |
| 1976 if prev_item and prev_item.is_name: |
| 1977 if seen_dot: |
| 1978 extent += 1 |
| 1979 else: |
| 1980 extent += item.size |
| 1981 |
| 1982 prev_item = item |
| 1983 continue |
| 1984 elif (unicode(item) not in ['.', '=', ':', 'not'] and |
| 1985 not item.is_name and not item.is_string): |
| 1986 break |
| 1987 |
| 1988 if unicode(item) == '.': |
| 1989 seen_dot = True |
| 1990 |
| 1991 extent += item.size |
| 1992 prev_item = item |
| 1993 |
| 1994 return extent |
| 1995 |
| 1996 @property |
| 1997 def is_string(self): |
| 1998 return False |
| 1999 |
| 2000 @property |
| 2001 def size(self): |
| 2002 return len(self.__repr__()) |
| 2003 |
| 2004 @property |
| 2005 def is_keyword(self): |
| 2006 return False |
| 2007 |
| 2008 @property |
| 2009 def is_name(self): |
| 2010 return False |
| 2011 |
| 2012 @property |
| 2013 def is_comma(self): |
| 2014 return False |
| 2015 |
| 2016 @property |
| 2017 def is_colon(self): |
| 2018 return False |
| 2019 |
| 2020 @property |
| 2021 def open_bracket(self): |
| 2022 return None |
| 2023 |
| 2024 @property |
| 2025 def close_bracket(self): |
| 2026 return None |
| 2027 |
| 2028 |
| 2029 class Tuple(Container): |
| 2030 |
| 2031 """A high-level representation of a tuple.""" |
| 2032 |
| 2033 @property |
| 2034 def open_bracket(self): |
| 2035 return '(' |
| 2036 |
| 2037 @property |
| 2038 def close_bracket(self): |
| 2039 return ')' |
| 2040 |
| 2041 |
| 2042 class List(Container): |
| 2043 |
| 2044 """A high-level representation of a list.""" |
| 2045 |
| 2046 @property |
| 2047 def open_bracket(self): |
| 2048 return '[' |
| 2049 |
| 2050 @property |
| 2051 def close_bracket(self): |
| 2052 return ']' |
| 2053 |
| 2054 |
| 2055 class DictOrSet(Container): |
| 2056 |
| 2057 """A high-level representation of a dictionary or set.""" |
| 2058 |
| 2059 @property |
| 2060 def open_bracket(self): |
| 2061 return '{' |
| 2062 |
| 2063 @property |
| 2064 def close_bracket(self): |
| 2065 return '}' |
| 2066 |
| 2067 |
| 2068 class ListComprehension(Container): |
| 2069 |
| 2070 """A high-level representation of a list comprehension.""" |
| 2071 |
| 2072 @property |
| 2073 def size(self): |
| 2074 length = 0 |
| 2075 for item in self._items: |
| 2076 if isinstance(item, IfExpression): |
| 2077 break |
| 2078 length += item.size |
| 2079 return length |
| 2080 |
| 2081 |
| 2082 class IfExpression(Container): |
| 2083 |
| 2084 """A high-level representation of an if-expression.""" |
| 2085 |
| 2086 |
| 2087 def _parse_container(tokens, index, for_or_if=None): |
| 2088 """Parse a high-level container, such as a list, tuple, etc.""" |
| 2089 |
| 2090 # Store the opening bracket. |
| 2091 items = [Atom(Token(*tokens[index]))] |
| 2092 index += 1 |
| 2093 |
| 2094 num_tokens = len(tokens) |
| 2095 while index < num_tokens: |
| 2096 tok = Token(*tokens[index]) |
| 2097 |
| 2098 if tok.token_string in ',)]}': |
| 2099 # First check if we're at the end of a list comprehension or |
| 2100 # if-expression. Don't add the ending token as part of the list |
| 2101 # comprehension or if-expression, because they aren't part of those |
| 2102 # constructs. |
| 2103 if for_or_if == 'for': |
| 2104 return (ListComprehension(items), index - 1) |
| 2105 |
| 2106 elif for_or_if == 'if': |
| 2107 return (IfExpression(items), index - 1) |
| 2108 |
| 2109 # We've reached the end of a container. |
| 2110 items.append(Atom(tok)) |
| 2111 |
| 2112 # If not, then we are at the end of a container. |
| 2113 if tok.token_string == ')': |
| 2114 # The end of a tuple. |
| 2115 return (Tuple(items), index) |
| 2116 |
| 2117 elif tok.token_string == ']': |
| 2118 # The end of a list. |
| 2119 return (List(items), index) |
| 2120 |
| 2121 elif tok.token_string == '}': |
| 2122 # The end of a dictionary or set. |
| 2123 return (DictOrSet(items), index) |
| 2124 |
| 2125 elif tok.token_string in '([{': |
| 2126 # A sub-container is being defined. |
| 2127 (container, index) = _parse_container(tokens, index) |
| 2128 items.append(container) |
| 2129 |
| 2130 elif tok.token_string == 'for': |
| 2131 (container, index) = _parse_container(tokens, index, 'for') |
| 2132 items.append(container) |
| 2133 |
| 2134 elif tok.token_string == 'if': |
| 2135 (container, index) = _parse_container(tokens, index, 'if') |
| 2136 items.append(container) |
| 2137 |
| 2138 else: |
| 2139 items.append(Atom(tok)) |
| 2140 |
| 2141 index += 1 |
| 2142 |
| 2143 return (None, None) |
| 2144 |
| 2145 |
| 2146 def _parse_tokens(tokens): |
| 2147 """Parse the tokens. |
| 2148 |
| 2149 This converts the tokens into a form where we can manipulate them |
| 2150 more easily. |
| 2151 |
| 2152 """ |
| 2153 |
| 2154 index = 0 |
| 2155 parsed_tokens = [] |
| 2156 |
| 2157 num_tokens = len(tokens) |
| 2158 while index < num_tokens: |
| 2159 tok = Token(*tokens[index]) |
| 2160 |
| 2161 assert tok.token_type != token.INDENT |
| 2162 if tok.token_type == tokenize.NEWLINE: |
| 2163 # There's only one newline and it's at the end. |
| 2164 break |
| 2165 |
| 2166 if tok.token_string in '([{': |
| 2167 (container, index) = _parse_container(tokens, index) |
| 2168 if not container: |
| 2169 return None |
| 2170 parsed_tokens.append(container) |
| 2171 else: |
| 2172 parsed_tokens.append(Atom(tok)) |
| 2173 |
| 2174 index += 1 |
| 2175 |
| 2176 return parsed_tokens |
| 2177 |
| 2178 |
| 2179 def _reflow_lines(parsed_tokens, indentation, max_line_length, |
| 2180 start_on_prefix_line): |
| 2181 """Reflow the lines so that it looks nice.""" |
| 2182 |
| 2183 if unicode(parsed_tokens[0]) == 'def': |
| 2184 # A function definition gets indented a bit more. |
| 2185 continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE |
| 2186 else: |
| 2187 continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE |
| 2188 |
| 2189 break_after_open_bracket = not start_on_prefix_line |
| 2190 |
| 2191 lines = ReformattedLines(max_line_length) |
| 2192 lines.add_indent(len(indentation.lstrip('\r\n'))) |
| 2193 |
| 2194 if not start_on_prefix_line: |
| 2195 # If splitting after the opening bracket will cause the first element |
| 2196 # to be aligned weirdly, don't try it. |
| 2197 first_token = get_item(parsed_tokens, 0) |
| 2198 second_token = get_item(parsed_tokens, 1) |
| 2199 |
| 2200 if ( |
| 2201 first_token and second_token and |
| 2202 unicode(second_token)[0] == '(' and |
| 2203 len(indentation) + len(first_token) + 1 == len(continued_indent) |
| 2204 ): |
| 2205 return None |
| 2206 |
| 2207 for item in parsed_tokens: |
| 2208 lines.add_space_if_needed(unicode(item), equal=True) |
| 2209 |
| 2210 save_continued_indent = continued_indent |
| 2211 if start_on_prefix_line and isinstance(item, Container): |
| 2212 start_on_prefix_line = False |
| 2213 continued_indent = ' ' * (lines.current_size() + 1) |
| 2214 |
| 2215 item.reflow(lines, continued_indent, break_after_open_bracket) |
| 2216 continued_indent = save_continued_indent |
| 2217 |
| 2218 return lines.emit() |
| 2219 |
| 2220 |
| 2221 def _shorten_line_at_tokens_new(tokens, source, indentation, |
| 2222 max_line_length): |
| 2223 """Shorten the line taking its length into account. |
| 2224 |
| 2225 The input is expected to be free of newlines except for inside |
| 2226 multiline strings and at the end. |
| 2227 |
| 2228 """ |
| 2229 # Yield the original source so to see if it's a better choice than the |
| 2230 # shortened candidate lines we generate here. |
| 2231 yield indentation + source |
| 2232 |
| 2233 parsed_tokens = _parse_tokens(tokens) |
| 2234 |
| 2235 if parsed_tokens: |
| 2236 # Perform two reflows. The first one starts on the same line as the |
| 2237 # prefix. The second starts on the line after the prefix. |
| 2238 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length, |
| 2239 start_on_prefix_line=True) |
| 2240 if fixed and check_syntax(normalize_multiline(fixed.lstrip())): |
| 2241 yield fixed |
| 2242 |
| 2243 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length, |
| 2244 start_on_prefix_line=False) |
| 2245 if fixed and check_syntax(normalize_multiline(fixed.lstrip())): |
| 2246 yield fixed |
| 2247 |
| 2248 |
| 2249 def _shorten_line_at_tokens(tokens, source, indentation, indent_word, |
| 2250 key_token_strings, aggressive): |
| 2251 """Separate line by breaking at tokens in key_token_strings. |
| 2252 |
| 2253 The input is expected to be free of newlines except for inside |
| 2254 multiline strings and at the end. |
| 2255 |
| 2256 """ |
| 2257 offsets = [] |
| 2258 for (index, _t) in enumerate(token_offsets(tokens)): |
| 2259 (token_type, |
| 2260 token_string, |
| 2261 start_offset, |
| 2262 end_offset) = _t |
| 2263 |
| 2264 assert token_type != token.INDENT |
| 2265 |
| 2266 if token_string in key_token_strings: |
| 2267 # Do not break in containers with zero or one items. |
| 2268 unwanted_next_token = { |
| 2269 '(': ')', |
| 2270 '[': ']', |
| 2271 '{': '}'}.get(token_string) |
| 2272 if unwanted_next_token: |
| 2273 if ( |
| 2274 get_item(tokens, |
| 2275 index + 1, |
| 2276 default=[None, None])[1] == unwanted_next_token or |
| 2277 get_item(tokens, |
| 2278 index + 2, |
| 2279 default=[None, None])[1] == unwanted_next_token |
| 2280 ): |
| 2281 continue |
| 2282 |
| 2283 if ( |
| 2284 index > 2 and token_string == '(' and |
| 2285 tokens[index - 1][1] in ',(%[' |
| 2286 ): |
| 2287 # Don't split after a tuple start, or before a tuple start if |
| 2288 # the tuple is in a list. |
| 2289 continue |
| 2290 |
| 2291 if end_offset < len(source) - 1: |
| 2292 # Don't split right before newline. |
| 2293 offsets.append(end_offset) |
| 2294 else: |
| 2295 # Break at adjacent strings. These were probably meant to be on |
| 2296 # separate lines in the first place. |
| 2297 previous_token = get_item(tokens, index - 1) |
| 2298 if ( |
| 2299 token_type == tokenize.STRING and |
| 2300 previous_token and previous_token[0] == tokenize.STRING |
| 2301 ): |
| 2302 offsets.append(start_offset) |
| 2303 |
| 2304 current_indent = None |
| 2305 fixed = None |
| 2306 for line in split_at_offsets(source, offsets): |
| 2307 if fixed: |
| 2308 fixed += '\n' + current_indent + line |
| 2309 |
| 2310 for symbol in '([{': |
| 2311 if line.endswith(symbol): |
| 2312 current_indent += indent_word |
| 2313 else: |
| 2314 # First line. |
| 2315 fixed = line |
| 2316 assert not current_indent |
| 2317 current_indent = indent_word |
| 2318 |
| 2319 assert fixed is not None |
| 2320 |
| 2321 if check_syntax(normalize_multiline(fixed) |
| 2322 if aggressive > 1 else fixed): |
| 2323 return indentation + fixed |
| 2324 else: |
| 2325 return None |
| 2326 |
| 2327 |
| 2328 def token_offsets(tokens): |
| 2329 """Yield tokens and offsets.""" |
| 2330 end_offset = 0 |
| 2331 previous_end_row = 0 |
| 2332 previous_end_column = 0 |
| 2333 for t in tokens: |
| 2334 token_type = t[0] |
| 2335 token_string = t[1] |
| 2336 (start_row, start_column) = t[2] |
| 2337 (end_row, end_column) = t[3] |
| 2338 |
| 2339 # Account for the whitespace between tokens. |
| 2340 end_offset += start_column |
| 2341 if previous_end_row == start_row: |
| 2342 end_offset -= previous_end_column |
| 2343 |
| 2344 # Record the start offset of the token. |
| 2345 start_offset = end_offset |
| 2346 |
| 2347 # Account for the length of the token itself. |
| 2348 end_offset += len(token_string) |
| 2349 |
| 2350 yield (token_type, |
| 2351 token_string, |
| 2352 start_offset, |
| 2353 end_offset) |
| 2354 |
| 2355 previous_end_row = end_row |
| 2356 previous_end_column = end_column |
| 2357 |
| 2358 |
| 2359 def normalize_multiline(line): |
| 2360 """Normalize multiline-related code that will cause syntax error. |
| 2361 |
| 2362 This is for purposes of checking syntax. |
| 2363 |
| 2364 """ |
| 2365 if line.startswith('def ') and line.rstrip().endswith(':'): |
| 2366 return line + ' pass' |
| 2367 elif line.startswith('return '): |
| 2368 return 'def _(): ' + line |
| 2369 elif line.startswith('@'): |
| 2370 return line + 'def _(): pass' |
| 2371 elif line.startswith('class '): |
| 2372 return line + ' pass' |
| 2373 elif line.startswith('if '): |
| 2374 return line + ' pass' |
| 2375 else: |
| 2376 return line |
| 2377 |
| 2378 |
| 2379 def fix_whitespace(line, offset, replacement): |
| 2380 """Replace whitespace at offset and return fixed line.""" |
| 2381 # Replace escaped newlines too |
| 2382 left = line[:offset].rstrip('\n\r \t\\') |
| 2383 right = line[offset:].lstrip('\n\r \t\\') |
| 2384 if right.startswith('#'): |
| 2385 return line |
| 2386 else: |
| 2387 return left + replacement + right |
| 2388 |
| 2389 |
| 2390 def _execute_pep8(pep8_options, source): |
| 2391 """Execute pep8 via python method calls.""" |
| 2392 class QuietReport(pep8.BaseReport): |
| 2393 |
| 2394 """Version of checker that does not print.""" |
| 2395 |
| 2396 def __init__(self, options): |
| 2397 super(QuietReport, self).__init__(options) |
| 2398 self.__full_error_results = [] |
| 2399 |
| 2400 def error(self, line_number, offset, text, _): |
| 2401 """Collect errors.""" |
| 2402 code = super(QuietReport, self).error(line_number, offset, text, _) |
| 2403 if code: |
| 2404 self.__full_error_results.append( |
| 2405 {'id': code, |
| 2406 'line': line_number, |
| 2407 'column': offset + 1, |
| 2408 'info': text}) |
| 2409 |
| 2410 def full_error_results(self): |
| 2411 """Return error results in detail. |
| 2412 |
| 2413 Results are in the form of a list of dictionaries. Each |
| 2414 dictionary contains 'id', 'line', 'column', and 'info'. |
| 2415 |
| 2416 """ |
| 2417 return self.__full_error_results |
| 2418 |
| 2419 checker = pep8.Checker('', lines=source, |
| 2420 reporter=QuietReport, **pep8_options) |
| 2421 checker.check_all() |
| 2422 return checker.report.full_error_results() |
| 2423 |
| 2424 |
| 2425 def _remove_leading_and_normalize(line): |
| 2426 return line.lstrip().rstrip(CR + LF) + '\n' |
| 2427 |
| 2428 |
| 2429 class Reindenter(object): |
| 2430 |
| 2431 """Reindents badly-indented code to uniformly use four-space indentation. |
| 2432 |
| 2433 Released to the public domain, by Tim Peters, 03 October 2000. |
| 2434 |
| 2435 """ |
| 2436 |
| 2437 def __init__(self, input_text): |
| 2438 sio = io.StringIO(input_text) |
| 2439 source_lines = sio.readlines() |
| 2440 |
| 2441 self.string_content_line_numbers = multiline_string_lines(input_text) |
| 2442 |
| 2443 # File lines, rstripped & tab-expanded. Dummy at start is so |
| 2444 # that we can use tokenize's 1-based line numbering easily. |
| 2445 # Note that a line is all-blank iff it is a newline. |
| 2446 self.lines = [] |
| 2447 for line_number, line in enumerate(source_lines, start=1): |
| 2448 # Do not modify if inside a multiline string. |
| 2449 if line_number in self.string_content_line_numbers: |
| 2450 self.lines.append(line) |
| 2451 else: |
| 2452 # Only expand leading tabs. |
| 2453 self.lines.append(_get_indentation(line).expandtabs() + |
| 2454 _remove_leading_and_normalize(line)) |
| 2455 |
| 2456 self.lines.insert(0, None) |
| 2457 self.index = 1 # index into self.lines of next line |
| 2458 self.input_text = input_text |
| 2459 |
| 2460 def run(self, indent_size=DEFAULT_INDENT_SIZE): |
| 2461 """Fix indentation and return modified line numbers. |
| 2462 |
| 2463 Line numbers are indexed at 1. |
| 2464 |
| 2465 """ |
| 2466 if indent_size < 1: |
| 2467 return self.input_text |
| 2468 |
| 2469 try: |
| 2470 stats = _reindent_stats(tokenize.generate_tokens(self.getline)) |
| 2471 except (SyntaxError, tokenize.TokenError): |
| 2472 return self.input_text |
| 2473 # Remove trailing empty lines. |
| 2474 lines = self.lines |
| 2475 while lines and lines[-1] == '\n': |
| 2476 lines.pop() |
| 2477 # Sentinel. |
| 2478 stats.append((len(lines), 0)) |
| 2479 # Map count of leading spaces to # we want. |
| 2480 have2want = {} |
| 2481 # Program after transformation. |
| 2482 after = [] |
| 2483 # Copy over initial empty lines -- there's nothing to do until |
| 2484 # we see a line with *something* on it. |
| 2485 i = stats[0][0] |
| 2486 after.extend(lines[1:i]) |
| 2487 for i in range(len(stats) - 1): |
| 2488 thisstmt, thislevel = stats[i] |
| 2489 nextstmt = stats[i + 1][0] |
| 2490 have = _leading_space_count(lines[thisstmt]) |
| 2491 want = thislevel * indent_size |
| 2492 if want < 0: |
| 2493 # A comment line. |
| 2494 if have: |
| 2495 # An indented comment line. If we saw the same |
| 2496 # indentation before, reuse what it most recently |
| 2497 # mapped to. |
| 2498 want = have2want.get(have, -1) |
| 2499 if want < 0: |
| 2500 # Then it probably belongs to the next real stmt. |
| 2501 for j in range(i + 1, len(stats) - 1): |
| 2502 jline, jlevel = stats[j] |
| 2503 if jlevel >= 0: |
| 2504 if have == _leading_space_count(lines[jline]): |
| 2505 want = jlevel * indent_size |
| 2506 break |
| 2507 if want < 0: # Maybe it's a hanging |
| 2508 # comment like this one, |
| 2509 # in which case we should shift it like its base |
| 2510 # line got shifted. |
| 2511 for j in range(i - 1, -1, -1): |
| 2512 jline, jlevel = stats[j] |
| 2513 if jlevel >= 0: |
| 2514 want = (have + _leading_space_count( |
| 2515 after[jline - 1]) - |
| 2516 _leading_space_count(lines[jline])) |
| 2517 break |
| 2518 if want < 0: |
| 2519 # Still no luck -- leave it alone. |
| 2520 want = have |
| 2521 else: |
| 2522 want = 0 |
| 2523 assert want >= 0 |
| 2524 have2want[have] = want |
| 2525 diff = want - have |
| 2526 if diff == 0 or have == 0: |
| 2527 after.extend(lines[thisstmt:nextstmt]) |
| 2528 else: |
| 2529 for line_number, line in enumerate(lines[thisstmt:nextstmt], |
| 2530 start=thisstmt): |
| 2531 if line_number in self.string_content_line_numbers: |
| 2532 after.append(line) |
| 2533 elif diff > 0: |
| 2534 if line == '\n': |
| 2535 after.append(line) |
| 2536 else: |
| 2537 after.append(' ' * diff + line) |
| 2538 else: |
| 2539 remove = min(_leading_space_count(line), -diff) |
| 2540 after.append(line[remove:]) |
| 2541 |
| 2542 return ''.join(after) |
| 2543 |
| 2544 def getline(self): |
| 2545 """Line-getter for tokenize.""" |
| 2546 if self.index >= len(self.lines): |
| 2547 line = '' |
| 2548 else: |
| 2549 line = self.lines[self.index] |
| 2550 self.index += 1 |
| 2551 return line |
| 2552 |
| 2553 |
| 2554 def _reindent_stats(tokens): |
| 2555 """Return list of (lineno, indentlevel) pairs. |
| 2556 |
| 2557 One for each stmt and comment line. indentlevel is -1 for comment lines, as |
| 2558 a signal that tokenize doesn't know what to do about them; indeed, they're |
| 2559 our headache! |
| 2560 |
| 2561 """ |
| 2562 find_stmt = 1 # Next token begins a fresh stmt? |
| 2563 level = 0 # Current indent level. |
| 2564 stats = [] |
| 2565 |
| 2566 for t in tokens: |
| 2567 token_type = t[0] |
| 2568 sline = t[2][0] |
| 2569 line = t[4] |
| 2570 |
| 2571 if token_type == tokenize.NEWLINE: |
| 2572 # A program statement, or ENDMARKER, will eventually follow, |
| 2573 # after some (possibly empty) run of tokens of the form |
| 2574 # (NL | COMMENT)* (INDENT | DEDENT+)? |
| 2575 find_stmt = 1 |
| 2576 |
| 2577 elif token_type == tokenize.INDENT: |
| 2578 find_stmt = 1 |
| 2579 level += 1 |
| 2580 |
| 2581 elif token_type == tokenize.DEDENT: |
| 2582 find_stmt = 1 |
| 2583 level -= 1 |
| 2584 |
| 2585 elif token_type == tokenize.COMMENT: |
| 2586 if find_stmt: |
| 2587 stats.append((sline, -1)) |
| 2588 # But we're still looking for a new stmt, so leave |
| 2589 # find_stmt alone. |
| 2590 |
| 2591 elif token_type == tokenize.NL: |
| 2592 pass |
| 2593 |
| 2594 elif find_stmt: |
| 2595 # This is the first "real token" following a NEWLINE, so it |
| 2596 # must be the first token of the next program statement, or an |
| 2597 # ENDMARKER. |
| 2598 find_stmt = 0 |
| 2599 if line: # Not endmarker. |
| 2600 stats.append((sline, level)) |
| 2601 |
| 2602 return stats |
| 2603 |
| 2604 |
| 2605 def _leading_space_count(line): |
| 2606 """Return number of leading spaces in line.""" |
| 2607 i = 0 |
| 2608 while i < len(line) and line[i] == ' ': |
| 2609 i += 1 |
| 2610 return i |
| 2611 |
| 2612 |
| 2613 def refactor_with_2to3(source_text, fixer_names): |
| 2614 """Use lib2to3 to refactor the source. |
| 2615 |
| 2616 Return the refactored source code. |
| 2617 |
| 2618 """ |
| 2619 from lib2to3.refactor import RefactoringTool |
| 2620 fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names] |
| 2621 tool = RefactoringTool(fixer_names=fixers, explicit=fixers) |
| 2622 |
| 2623 from lib2to3.pgen2 import tokenize as lib2to3_tokenize |
| 2624 try: |
| 2625 return unicode(tool.refactor_string(source_text, name='')) |
| 2626 except lib2to3_tokenize.TokenError: |
| 2627 return source_text |
| 2628 |
| 2629 |
| 2630 def check_syntax(code): |
| 2631 """Return True if syntax is okay.""" |
| 2632 try: |
| 2633 return compile(code, '<string>', 'exec') |
| 2634 except (SyntaxError, TypeError, UnicodeDecodeError): |
| 2635 return False |
| 2636 |
| 2637 |
| 2638 def filter_results(source, results, aggressive): |
| 2639 """Filter out spurious reports from pep8. |
| 2640 |
| 2641 If aggressive is True, we allow possibly unsafe fixes (E711, E712). |
| 2642 |
| 2643 """ |
| 2644 non_docstring_string_line_numbers = multiline_string_lines( |
| 2645 source, include_docstrings=False) |
| 2646 all_string_line_numbers = multiline_string_lines( |
| 2647 source, include_docstrings=True) |
| 2648 |
| 2649 commented_out_code_line_numbers = commented_out_code_lines(source) |
| 2650 |
| 2651 for r in results: |
| 2652 issue_id = r['id'].lower() |
| 2653 |
| 2654 if r['line'] in non_docstring_string_line_numbers: |
| 2655 if issue_id.startswith(('e1', 'e501', 'w191')): |
| 2656 continue |
| 2657 |
| 2658 if r['line'] in all_string_line_numbers: |
| 2659 if issue_id in ['e501']: |
| 2660 continue |
| 2661 |
| 2662 # We must offset by 1 for lines that contain the trailing contents of |
| 2663 # multiline strings. |
| 2664 if not aggressive and (r['line'] + 1) in all_string_line_numbers: |
| 2665 # Do not modify multiline strings in non-aggressive mode. Remove |
| 2666 # trailing whitespace could break doctests. |
| 2667 if issue_id.startswith(('w29', 'w39')): |
| 2668 continue |
| 2669 |
| 2670 if aggressive <= 0: |
| 2671 if issue_id.startswith(('e711', 'w6')): |
| 2672 continue |
| 2673 |
| 2674 if aggressive <= 1: |
| 2675 if issue_id.startswith(('e712', 'e713')): |
| 2676 continue |
| 2677 |
| 2678 if r['line'] in commented_out_code_line_numbers: |
| 2679 if issue_id.startswith(('e26', 'e501')): |
| 2680 continue |
| 2681 |
| 2682 yield r |
| 2683 |
| 2684 |
| 2685 def multiline_string_lines(source, include_docstrings=False): |
| 2686 """Return line numbers that are within multiline strings. |
| 2687 |
| 2688 The line numbers are indexed at 1. |
| 2689 |
| 2690 Docstrings are ignored. |
| 2691 |
| 2692 """ |
| 2693 line_numbers = set() |
| 2694 previous_token_type = '' |
| 2695 try: |
| 2696 for t in generate_tokens(source): |
| 2697 token_type = t[0] |
| 2698 start_row = t[2][0] |
| 2699 end_row = t[3][0] |
| 2700 |
| 2701 if token_type == tokenize.STRING and start_row != end_row: |
| 2702 if ( |
| 2703 include_docstrings or |
| 2704 previous_token_type != tokenize.INDENT |
| 2705 ): |
| 2706 # We increment by one since we want the contents of the |
| 2707 # string. |
| 2708 line_numbers |= set(range(1 + start_row, 1 + end_row)) |
| 2709 |
| 2710 previous_token_type = token_type |
| 2711 except (SyntaxError, tokenize.TokenError): |
| 2712 pass |
| 2713 |
| 2714 return line_numbers |
| 2715 |
| 2716 |
| 2717 def commented_out_code_lines(source): |
| 2718 """Return line numbers of comments that are likely code. |
| 2719 |
| 2720 Commented-out code is bad practice, but modifying it just adds even more |
| 2721 clutter. |
| 2722 |
| 2723 """ |
| 2724 line_numbers = [] |
| 2725 try: |
| 2726 for t in generate_tokens(source): |
| 2727 token_type = t[0] |
| 2728 token_string = t[1] |
| 2729 start_row = t[2][0] |
| 2730 line = t[4] |
| 2731 |
| 2732 # Ignore inline comments. |
| 2733 if not line.lstrip().startswith('#'): |
| 2734 continue |
| 2735 |
| 2736 if token_type == tokenize.COMMENT: |
| 2737 stripped_line = token_string.lstrip('#').strip() |
| 2738 if ( |
| 2739 ' ' in stripped_line and |
| 2740 '#' not in stripped_line and |
| 2741 check_syntax(stripped_line) |
| 2742 ): |
| 2743 line_numbers.append(start_row) |
| 2744 except (SyntaxError, tokenize.TokenError): |
| 2745 pass |
| 2746 |
| 2747 return line_numbers |
| 2748 |
| 2749 |
| 2750 def shorten_comment(line, max_line_length, last_comment=False): |
| 2751 """Return trimmed or split long comment line. |
| 2752 |
| 2753 If there are no comments immediately following it, do a text wrap. |
| 2754 Doing this wrapping on all comments in general would lead to jagged |
| 2755 comment text. |
| 2756 |
| 2757 """ |
| 2758 assert len(line) > max_line_length |
| 2759 line = line.rstrip() |
| 2760 |
| 2761 # PEP 8 recommends 72 characters for comment text. |
| 2762 indentation = _get_indentation(line) + '# ' |
| 2763 max_line_length = min(max_line_length, |
| 2764 len(indentation) + 72) |
| 2765 |
| 2766 MIN_CHARACTER_REPEAT = 5 |
| 2767 if ( |
| 2768 len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and |
| 2769 not line[-1].isalnum() |
| 2770 ): |
| 2771 # Trim comments that end with things like --------- |
| 2772 return line[:max_line_length] + '\n' |
| 2773 elif last_comment and re.match(r'\s*#+\s*\w+', line): |
| 2774 import textwrap |
| 2775 split_lines = textwrap.wrap(line.lstrip(' \t#'), |
| 2776 initial_indent=indentation, |
| 2777 subsequent_indent=indentation, |
| 2778 width=max_line_length, |
| 2779 break_long_words=False, |
| 2780 break_on_hyphens=False) |
| 2781 return '\n'.join(split_lines) + '\n' |
| 2782 else: |
| 2783 return line + '\n' |
| 2784 |
| 2785 |
| 2786 def normalize_line_endings(lines, newline): |
| 2787 """Return fixed line endings. |
| 2788 |
| 2789 All lines will be modified to use the most common line ending. |
| 2790 |
| 2791 """ |
| 2792 return [line.rstrip('\n\r') + newline for line in lines] |
| 2793 |
| 2794 |
| 2795 def mutual_startswith(a, b): |
| 2796 return b.startswith(a) or a.startswith(b) |
| 2797 |
| 2798 |
| 2799 def code_match(code, select, ignore): |
| 2800 if ignore: |
| 2801 assert not isinstance(ignore, unicode) |
| 2802 for ignored_code in [c.strip() for c in ignore]: |
| 2803 if mutual_startswith(code.lower(), ignored_code.lower()): |
| 2804 return False |
| 2805 |
| 2806 if select: |
| 2807 assert not isinstance(select, unicode) |
| 2808 for selected_code in [c.strip() for c in select]: |
| 2809 if mutual_startswith(code.lower(), selected_code.lower()): |
| 2810 return True |
| 2811 return False |
| 2812 |
| 2813 return True |
| 2814 |
| 2815 |
| 2816 def fix_code(source, options=None): |
| 2817 """Return fixed source code.""" |
| 2818 if not options: |
| 2819 options = parse_args(['']) |
| 2820 |
| 2821 if not isinstance(source, unicode): |
| 2822 source = source.decode(locale.getpreferredencoding()) |
| 2823 |
| 2824 sio = io.StringIO(source) |
| 2825 return fix_lines(sio.readlines(), options=options) |
| 2826 |
| 2827 |
| 2828 def fix_lines(source_lines, options, filename=''): |
| 2829 """Return fixed source code.""" |
| 2830 # Transform everything to line feed. Then change them back to original |
| 2831 # before returning fixed source code. |
| 2832 original_newline = find_newline(source_lines) |
| 2833 tmp_source = ''.join(normalize_line_endings(source_lines, '\n')) |
| 2834 |
| 2835 # Keep a history to break out of cycles. |
| 2836 previous_hashes = set() |
| 2837 |
| 2838 if options.line_range: |
| 2839 fixed_source = apply_local_fixes(tmp_source, options) |
| 2840 else: |
| 2841 # Apply global fixes only once (for efficiency). |
| 2842 fixed_source = apply_global_fixes(tmp_source, options) |
| 2843 |
| 2844 passes = 0 |
| 2845 long_line_ignore_cache = set() |
| 2846 while hash(fixed_source) not in previous_hashes: |
| 2847 if options.pep8_passes >= 0 and passes > options.pep8_passes: |
| 2848 break |
| 2849 passes += 1 |
| 2850 |
| 2851 previous_hashes.add(hash(fixed_source)) |
| 2852 |
| 2853 tmp_source = copy.copy(fixed_source) |
| 2854 |
| 2855 fix = FixPEP8( |
| 2856 filename, |
| 2857 options, |
| 2858 contents=tmp_source, |
| 2859 long_line_ignore_cache=long_line_ignore_cache) |
| 2860 |
| 2861 fixed_source = fix.fix() |
| 2862 |
| 2863 sio = io.StringIO(fixed_source) |
| 2864 return ''.join(normalize_line_endings(sio.readlines(), original_newline)) |
| 2865 |
| 2866 |
| 2867 def fix_file(filename, options=None, output=None): |
| 2868 if not options: |
| 2869 options = parse_args([filename]) |
| 2870 |
| 2871 original_source = readlines_from_file(filename) |
| 2872 |
| 2873 fixed_source = original_source |
| 2874 |
| 2875 if options.in_place or output: |
| 2876 encoding = detect_encoding(filename) |
| 2877 |
| 2878 if output: |
| 2879 output = codecs.getwriter(encoding)(output.buffer |
| 2880 if hasattr(output, 'buffer') |
| 2881 else output) |
| 2882 |
| 2883 output = LineEndingWrapper(output) |
| 2884 |
| 2885 fixed_source = fix_lines(fixed_source, options, filename=filename) |
| 2886 |
| 2887 if options.diff: |
| 2888 new = io.StringIO(fixed_source) |
| 2889 new = new.readlines() |
| 2890 diff = get_diff_text(original_source, new, filename) |
| 2891 if output: |
| 2892 output.write(diff) |
| 2893 output.flush() |
| 2894 else: |
| 2895 return diff |
| 2896 elif options.in_place: |
| 2897 fp = open_with_encoding(filename, encoding=encoding, |
| 2898 mode='w') |
| 2899 fp.write(fixed_source) |
| 2900 fp.close() |
| 2901 else: |
| 2902 if output: |
| 2903 output.write(fixed_source) |
| 2904 output.flush() |
| 2905 else: |
| 2906 return fixed_source |
| 2907 |
| 2908 |
| 2909 def global_fixes(): |
| 2910 """Yield multiple (code, function) tuples.""" |
| 2911 for function in globals().values(): |
| 2912 if inspect.isfunction(function): |
| 2913 arguments = inspect.getargspec(function)[0] |
| 2914 if arguments[:1] != ['source']: |
| 2915 continue |
| 2916 |
| 2917 code = extract_code_from_function(function) |
| 2918 if code: |
| 2919 yield (code, function) |
| 2920 |
| 2921 |
| 2922 def apply_global_fixes(source, options, where='global'): |
| 2923 """Run global fixes on source code. |
| 2924 |
| 2925 These are fixes that only need be done once (unlike those in |
| 2926 FixPEP8, which are dependent on pep8). |
| 2927 |
| 2928 """ |
| 2929 if code_match('E101', select=options.select, ignore=options.ignore): |
| 2930 source = reindent(source, |
| 2931 indent_size=options.indent_size) |
| 2932 |
| 2933 for (code, function) in global_fixes(): |
| 2934 if code_match(code, select=options.select, ignore=options.ignore): |
| 2935 if options.verbose: |
| 2936 print('---> Applying {0} fix for {1}'.format(where, |
| 2937 code.upper()), |
| 2938 file=sys.stderr) |
| 2939 source = function(source, |
| 2940 aggressive=options.aggressive) |
| 2941 |
| 2942 source = fix_2to3(source, |
| 2943 aggressive=options.aggressive, |
| 2944 select=options.select, |
| 2945 ignore=options.ignore) |
| 2946 |
| 2947 return source |
| 2948 |
| 2949 |
| 2950 def apply_local_fixes(source, options): |
| 2951 """Ananologus to apply_global_fixes, but runs only those which makes sense |
| 2952 for the given line_range. |
| 2953 |
| 2954 Do as much as we can without breaking code. |
| 2955 |
| 2956 """ |
| 2957 def find_ge(a, x): |
| 2958 """Find leftmost item greater than or equal to x.""" |
| 2959 i = bisect.bisect_left(a, x) |
| 2960 if i != len(a): |
| 2961 return i, a[i] |
| 2962 return len(a) - 1, a[-1] |
| 2963 |
| 2964 def find_le(a, x): |
| 2965 """Find rightmost value less than or equal to x.""" |
| 2966 i = bisect.bisect_right(a, x) |
| 2967 if i: |
| 2968 return i - 1, a[i - 1] |
| 2969 return 0, a[0] |
| 2970 |
| 2971 def local_fix(source, start_log, end_log, |
| 2972 start_lines, end_lines, indents, last_line): |
| 2973 """apply_global_fixes to the source between start_log and end_log. |
| 2974 |
| 2975 The subsource must be the correct syntax of a complete python program |
| 2976 (but all lines may share an indentation). The subsource's shared indent |
| 2977 is removed, fixes are applied and the indent prepended back. Taking |
| 2978 care to not reindent strings. |
| 2979 |
| 2980 last_line is the strict cut off (options.line_range[1]), so that |
| 2981 lines after last_line are not modified. |
| 2982 |
| 2983 """ |
| 2984 if end_log < start_log: |
| 2985 return source |
| 2986 |
| 2987 ind = indents[start_log] |
| 2988 indent = _get_indentation(source[start_lines[start_log]]) |
| 2989 |
| 2990 sl = slice(start_lines[start_log], end_lines[end_log] + 1) |
| 2991 |
| 2992 subsource = source[sl] |
| 2993 # Remove indent from subsource. |
| 2994 if ind: |
| 2995 for line_no in start_lines[start_log:end_log + 1]: |
| 2996 pos = line_no - start_lines[start_log] |
| 2997 subsource[pos] = subsource[pos][ind:] |
| 2998 |
| 2999 # Fix indentation of subsource. |
| 3000 fixed_subsource = apply_global_fixes(''.join(subsource), |
| 3001 options, |
| 3002 where='local') |
| 3003 fixed_subsource = fixed_subsource.splitlines(True) |
| 3004 |
| 3005 # Add back indent for non multi-line strings lines. |
| 3006 msl = multiline_string_lines(''.join(fixed_subsource), |
| 3007 include_docstrings=False) |
| 3008 for i, line in enumerate(fixed_subsource): |
| 3009 if not i + 1 in msl: |
| 3010 fixed_subsource[i] = indent + line if line != '\n' else line |
| 3011 |
| 3012 # We make a special case to look at the final line, if it's a multiline |
| 3013 # *and* the cut off is somewhere inside it, we take the fixed |
| 3014 # subset up until last_line, this assumes that the number of lines |
| 3015 # does not change in this multiline line. |
| 3016 changed_lines = len(fixed_subsource) |
| 3017 if (start_lines[end_log] != end_lines[end_log] |
| 3018 and end_lines[end_log] > last_line): |
| 3019 after_end = end_lines[end_log] - last_line |
| 3020 fixed_subsource = (fixed_subsource[:-after_end] + |
| 3021 source[sl][-after_end:]) |
| 3022 changed_lines -= after_end |
| 3023 |
| 3024 options.line_range[1] = (options.line_range[0] + |
| 3025 changed_lines - 1) |
| 3026 |
| 3027 return (source[:start_lines[start_log]] + |
| 3028 fixed_subsource + |
| 3029 source[end_lines[end_log] + 1:]) |
| 3030 |
| 3031 def is_continued_stmt(line, |
| 3032 continued_stmts=frozenset(['else', 'elif', |
| 3033 'finally', 'except'])): |
| 3034 return re.split('[ :]', line.strip(), 1)[0] in continued_stmts |
| 3035 |
| 3036 assert options.line_range |
| 3037 start, end = options.line_range |
| 3038 start -= 1 |
| 3039 end -= 1 |
| 3040 last_line = end # We shouldn't modify lines after this cut-off. |
| 3041 |
| 3042 try: |
| 3043 logical = _find_logical(source) |
| 3044 except (SyntaxError, tokenize.TokenError): |
| 3045 return ''.join(source) |
| 3046 |
| 3047 if not logical[0]: |
| 3048 # Just blank lines, this should imply that it will become '\n' ? |
| 3049 return apply_global_fixes(source, options) |
| 3050 |
| 3051 start_lines, indents = zip(*logical[0]) |
| 3052 end_lines, _ = zip(*logical[1]) |
| 3053 |
| 3054 source = source.splitlines(True) |
| 3055 |
| 3056 start_log, start = find_ge(start_lines, start) |
| 3057 end_log, end = find_le(start_lines, end) |
| 3058 |
| 3059 # Look behind one line, if it's indented less than current indent |
| 3060 # then we can move to this previous line knowing that its |
| 3061 # indentation level will not be changed. |
| 3062 if (start_log > 0 |
| 3063 and indents[start_log - 1] < indents[start_log] |
| 3064 and not is_continued_stmt(source[start_log - 1])): |
| 3065 start_log -= 1 |
| 3066 start = start_lines[start_log] |
| 3067 |
| 3068 while start < end: |
| 3069 |
| 3070 if is_continued_stmt(source[start]): |
| 3071 start_log += 1 |
| 3072 start = start_lines[start_log] |
| 3073 continue |
| 3074 |
| 3075 ind = indents[start_log] |
| 3076 for t in itertools.takewhile(lambda t: t[1][1] >= ind, |
| 3077 enumerate(logical[0][start_log:])): |
| 3078 n_log, n = start_log + t[0], t[1][0] |
| 3079 # start shares indent up to n. |
| 3080 |
| 3081 if n <= end: |
| 3082 source = local_fix(source, start_log, n_log, |
| 3083 start_lines, end_lines, |
| 3084 indents, last_line) |
| 3085 start_log = n_log if n == end else n_log + 1 |
| 3086 start = start_lines[start_log] |
| 3087 continue |
| 3088 |
| 3089 else: |
| 3090 # Look at the line after end and see if allows us to reindent. |
| 3091 after_end_log, after_end = find_ge(start_lines, end + 1) |
| 3092 |
| 3093 if indents[after_end_log] > indents[start_log]: |
| 3094 start_log, start = find_ge(start_lines, start + 1) |
| 3095 continue |
| 3096 |
| 3097 if (indents[after_end_log] == indents[start_log] |
| 3098 and is_continued_stmt(source[after_end])): |
| 3099 # find n, the beginning of the last continued statement |
| 3100 # Apply fix to previous block if there is one. |
| 3101 only_block = True |
| 3102 for n, n_ind in logical[0][start_log:end_log + 1][::-1]: |
| 3103 if n_ind == ind and not is_continued_stmt(source[n]): |
| 3104 n_log = start_lines.index(n) |
| 3105 source = local_fix(source, start_log, n_log - 1, |
| 3106 start_lines, end_lines, |
| 3107 indents, last_line) |
| 3108 start_log = n_log + 1 |
| 3109 start = start_lines[start_log] |
| 3110 only_block = False |
| 3111 break |
| 3112 if only_block: |
| 3113 end_log, end = find_le(start_lines, end - 1) |
| 3114 continue |
| 3115 |
| 3116 source = local_fix(source, start_log, end_log, |
| 3117 start_lines, end_lines, |
| 3118 indents, last_line) |
| 3119 break |
| 3120 |
| 3121 return ''.join(source) |
| 3122 |
| 3123 |
| 3124 def extract_code_from_function(function): |
| 3125 """Return code handled by function.""" |
| 3126 if not function.__name__.startswith('fix_'): |
| 3127 return None |
| 3128 |
| 3129 code = re.sub('^fix_', '', function.__name__) |
| 3130 if not code: |
| 3131 return None |
| 3132 |
| 3133 try: |
| 3134 int(code[1:]) |
| 3135 except ValueError: |
| 3136 return None |
| 3137 |
| 3138 return code |
| 3139 |
| 3140 |
| 3141 def create_parser(): |
| 3142 """Return command-line parser.""" |
| 3143 # Do import locally to be friendly to those who use autopep8 as a library |
| 3144 # and are supporting Python 2.6. |
| 3145 import argparse |
| 3146 |
| 3147 parser = argparse.ArgumentParser(description=docstring_summary(__doc__), |
| 3148 prog='autopep8') |
| 3149 parser.add_argument('--version', action='version', |
| 3150 version='%(prog)s ' + __version__) |
| 3151 parser.add_argument('-v', '--verbose', action='count', dest='verbose', |
| 3152 default=0, |
| 3153 help='print verbose messages; ' |
| 3154 'multiple -v result in more verbose messages') |
| 3155 parser.add_argument('-d', '--diff', action='store_true', dest='diff', |
| 3156 help='print the diff for the fixed source') |
| 3157 parser.add_argument('-i', '--in-place', action='store_true', |
| 3158 help='make changes to files in place') |
| 3159 parser.add_argument('-r', '--recursive', action='store_true', |
| 3160 help='run recursively over directories; ' |
| 3161 'must be used with --in-place or --diff') |
| 3162 parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1, |
| 3163 help='number of parallel jobs; ' |
| 3164 'match CPU count if value is less than 1') |
| 3165 parser.add_argument('-p', '--pep8-passes', metavar='n', |
| 3166 default=-1, type=int, |
| 3167 help='maximum number of additional pep8 passes ' |
| 3168 '(default: infinite)') |
| 3169 parser.add_argument('-a', '--aggressive', action='count', default=0, |
| 3170 help='enable non-whitespace changes; ' |
| 3171 'multiple -a result in more aggressive changes') |
| 3172 parser.add_argument('--experimental', action='store_true', |
| 3173 help='enable experimental fixes') |
| 3174 parser.add_argument('--exclude', metavar='globs', |
| 3175 help='exclude file/directory names that match these ' |
| 3176 'comma-separated globs') |
| 3177 parser.add_argument('--list-fixes', action='store_true', |
| 3178 help='list codes for fixes; ' |
| 3179 'used by --ignore and --select') |
| 3180 parser.add_argument('--ignore', metavar='errors', default='', |
| 3181 help='do not fix these errors/warnings ' |
| 3182 '(default: {0})'.format(DEFAULT_IGNORE)) |
| 3183 parser.add_argument('--select', metavar='errors', default='', |
| 3184 help='fix only these errors/warnings (e.g. E4,W)') |
| 3185 parser.add_argument('--max-line-length', metavar='n', default=79, type=int, |
| 3186 help='set maximum allowed line length ' |
| 3187 '(default: %(default)s)') |
| 3188 parser.add_argument('--range', metavar='line', dest='line_range', |
| 3189 default=None, type=int, nargs=2, |
| 3190 help='only fix errors found within this inclusive ' |
| 3191 'range of line numbers (e.g. 1 99); ' |
| 3192 'line numbers are indexed at 1') |
| 3193 parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE, |
| 3194 type=int, metavar='n', |
| 3195 help='number of spaces per indent level ' |
| 3196 '(default %(default)s)') |
| 3197 parser.add_argument('files', nargs='*', |
| 3198 help="files to format or '-' for standard in") |
| 3199 |
| 3200 return parser |
| 3201 |
| 3202 |
| 3203 def parse_args(arguments): |
| 3204 """Parse command-line options.""" |
| 3205 parser = create_parser() |
| 3206 args = parser.parse_args(arguments) |
| 3207 |
| 3208 if not args.files and not args.list_fixes: |
| 3209 parser.error('incorrect number of arguments') |
| 3210 |
| 3211 args.files = [decode_filename(name) for name in args.files] |
| 3212 |
| 3213 if '-' in args.files: |
| 3214 if len(args.files) > 1: |
| 3215 parser.error('cannot mix stdin and regular files') |
| 3216 |
| 3217 if args.diff: |
| 3218 parser.error('--diff cannot be used with standard input') |
| 3219 |
| 3220 if args.in_place: |
| 3221 parser.error('--in-place cannot be used with standard input') |
| 3222 |
| 3223 if args.recursive: |
| 3224 parser.error('--recursive cannot be used with standard input') |
| 3225 |
| 3226 if len(args.files) > 1 and not (args.in_place or args.diff): |
| 3227 parser.error('autopep8 only takes one filename as argument ' |
| 3228 'unless the "--in-place" or "--diff" args are ' |
| 3229 'used') |
| 3230 |
| 3231 if args.recursive and not (args.in_place or args.diff): |
| 3232 parser.error('--recursive must be used with --in-place or --diff') |
| 3233 |
| 3234 if args.exclude and not args.recursive: |
| 3235 parser.error('--exclude is only relevant when used with --recursive') |
| 3236 |
| 3237 if args.in_place and args.diff: |
| 3238 parser.error('--in-place and --diff are mutually exclusive') |
| 3239 |
| 3240 if args.max_line_length <= 0: |
| 3241 parser.error('--max-line-length must be greater than 0') |
| 3242 |
| 3243 if args.select: |
| 3244 args.select = args.select.split(',') |
| 3245 |
| 3246 if args.ignore: |
| 3247 args.ignore = args.ignore.split(',') |
| 3248 elif not args.select: |
| 3249 if args.aggressive: |
| 3250 # Enable everything by default if aggressive. |
| 3251 args.select = ['E', 'W'] |
| 3252 else: |
| 3253 args.ignore = DEFAULT_IGNORE.split(',') |
| 3254 |
| 3255 if args.exclude: |
| 3256 args.exclude = args.exclude.split(',') |
| 3257 else: |
| 3258 args.exclude = [] |
| 3259 |
| 3260 if args.jobs < 1: |
| 3261 # Do not import multiprocessing globally in case it is not supported |
| 3262 # on the platform. |
| 3263 import multiprocessing |
| 3264 args.jobs = multiprocessing.cpu_count() |
| 3265 |
| 3266 if args.jobs > 1 and not args.in_place: |
| 3267 parser.error('parallel jobs requires --in-place') |
| 3268 |
| 3269 if args.line_range: |
| 3270 if args.line_range[0] <= 0: |
| 3271 parser.error('--range must be positive numbers') |
| 3272 if args.line_range[0] > args.line_range[1]: |
| 3273 parser.error('First value of --range should be less than or equal ' |
| 3274 'to the second') |
| 3275 |
| 3276 return args |
| 3277 |
| 3278 |
| 3279 def decode_filename(filename): |
| 3280 """Return Unicode filename.""" |
| 3281 if isinstance(filename, unicode): |
| 3282 return filename |
| 3283 else: |
| 3284 return filename.decode(sys.getfilesystemencoding()) |
| 3285 |
| 3286 |
| 3287 def supported_fixes(): |
| 3288 """Yield pep8 error codes that autopep8 fixes. |
| 3289 |
| 3290 Each item we yield is a tuple of the code followed by its |
| 3291 description. |
| 3292 |
| 3293 """ |
| 3294 yield ('E101', docstring_summary(reindent.__doc__)) |
| 3295 |
| 3296 instance = FixPEP8(filename=None, options=None, contents='') |
| 3297 for attribute in dir(instance): |
| 3298 code = re.match('fix_([ew][0-9][0-9][0-9])', attribute) |
| 3299 if code: |
| 3300 yield ( |
| 3301 code.group(1).upper(), |
| 3302 re.sub(r'\s+', ' ', |
| 3303 docstring_summary(getattr(instance, attribute).__doc__)) |
| 3304 ) |
| 3305 |
| 3306 for (code, function) in sorted(global_fixes()): |
| 3307 yield (code.upper() + (4 - len(code)) * ' ', |
| 3308 re.sub(r'\s+', ' ', docstring_summary(function.__doc__))) |
| 3309 |
| 3310 for code in sorted(CODE_TO_2TO3): |
| 3311 yield (code.upper() + (4 - len(code)) * ' ', |
| 3312 re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__))) |
| 3313 |
| 3314 |
| 3315 def docstring_summary(docstring): |
| 3316 """Return summary of docstring.""" |
| 3317 return docstring.split('\n')[0] |
| 3318 |
| 3319 |
| 3320 def line_shortening_rank(candidate, indent_word, max_line_length, |
| 3321 experimental=False): |
| 3322 """Return rank of candidate. |
| 3323 |
| 3324 This is for sorting candidates. |
| 3325 |
| 3326 """ |
| 3327 if not candidate.strip(): |
| 3328 return 0 |
| 3329 |
| 3330 rank = 0 |
| 3331 lines = candidate.split('\n') |
| 3332 |
| 3333 offset = 0 |
| 3334 if ( |
| 3335 not lines[0].lstrip().startswith('#') and |
| 3336 lines[0].rstrip()[-1] not in '([{' |
| 3337 ): |
| 3338 for (opening, closing) in ('()', '[]', '{}'): |
| 3339 # Don't penalize empty containers that aren't split up. Things like |
| 3340 # this "foo(\n )" aren't particularly good. |
| 3341 opening_loc = lines[0].find(opening) |
| 3342 closing_loc = lines[0].find(closing) |
| 3343 if opening_loc >= 0: |
| 3344 if closing_loc < 0 or closing_loc != opening_loc + 1: |
| 3345 offset = max(offset, 1 + opening_loc) |
| 3346 |
| 3347 current_longest = max(offset + len(x.strip()) for x in lines) |
| 3348 |
| 3349 rank += 4 * max(0, current_longest - max_line_length) |
| 3350 |
| 3351 rank += len(lines) |
| 3352 |
| 3353 # Too much variation in line length is ugly. |
| 3354 rank += 2 * standard_deviation(len(line) for line in lines) |
| 3355 |
| 3356 bad_staring_symbol = { |
| 3357 '(': ')', |
| 3358 '[': ']', |
| 3359 '{': '}'}.get(lines[0][-1]) |
| 3360 |
| 3361 if len(lines) > 1: |
| 3362 if ( |
| 3363 bad_staring_symbol and |
| 3364 lines[1].lstrip().startswith(bad_staring_symbol) |
| 3365 ): |
| 3366 rank += 20 |
| 3367 |
| 3368 for lineno, current_line in enumerate(lines): |
| 3369 current_line = current_line.strip() |
| 3370 |
| 3371 if current_line.startswith('#'): |
| 3372 continue |
| 3373 |
| 3374 for bad_start in ['.', '%', '+', '-', '/']: |
| 3375 if current_line.startswith(bad_start): |
| 3376 rank += 100 |
| 3377 |
| 3378 # Do not tolerate operators on their own line. |
| 3379 if current_line == bad_start: |
| 3380 rank += 1000 |
| 3381 |
| 3382 if current_line.endswith(('(', '[', '{', '.')): |
| 3383 # Avoid lonely opening. They result in longer lines. |
| 3384 if len(current_line) <= len(indent_word): |
| 3385 rank += 100 |
| 3386 |
| 3387 # Avoid the ugliness of ", (\n". |
| 3388 if ( |
| 3389 current_line.endswith('(') and |
| 3390 current_line[:-1].rstrip().endswith(',') |
| 3391 ): |
| 3392 rank += 100 |
| 3393 |
| 3394 # Also avoid the ugliness of "foo.\nbar" |
| 3395 if current_line.endswith('.'): |
| 3396 rank += 100 |
| 3397 |
| 3398 if has_arithmetic_operator(current_line): |
| 3399 rank += 100 |
| 3400 |
| 3401 if current_line.endswith(('%', '(', '[', '{')): |
| 3402 rank -= 20 |
| 3403 |
| 3404 # Try to break list comprehensions at the "for". |
| 3405 if current_line.startswith('for '): |
| 3406 rank -= 50 |
| 3407 |
| 3408 if current_line.endswith('\\'): |
| 3409 # If a line ends in \-newline, it may be part of a |
| 3410 # multiline string. In that case, we would like to know |
| 3411 # how long that line is without the \-newline. If it's |
| 3412 # longer than the maximum, or has comments, then we assume |
| 3413 # that the \-newline is an okay candidate and only |
| 3414 # penalize it a bit. |
| 3415 total_len = len(current_line) |
| 3416 lineno += 1 |
| 3417 while lineno < len(lines): |
| 3418 total_len += len(lines[lineno]) |
| 3419 |
| 3420 if lines[lineno].lstrip().startswith('#'): |
| 3421 total_len = max_line_length |
| 3422 break |
| 3423 |
| 3424 if not lines[lineno].endswith('\\'): |
| 3425 break |
| 3426 |
| 3427 lineno += 1 |
| 3428 |
| 3429 if total_len < max_line_length: |
| 3430 rank += 10 |
| 3431 else: |
| 3432 rank += 100 if experimental else 1 |
| 3433 |
| 3434 # Prefer breaking at commas rather than colon. |
| 3435 if ',' in current_line and current_line.endswith(':'): |
| 3436 rank += 10 |
| 3437 |
| 3438 rank += 10 * count_unbalanced_brackets(current_line) |
| 3439 |
| 3440 return max(0, rank) |
| 3441 |
| 3442 |
| 3443 def standard_deviation(numbers): |
| 3444 """Return standard devation.""" |
| 3445 numbers = list(numbers) |
| 3446 if not numbers: |
| 3447 return 0 |
| 3448 mean = sum(numbers) / len(numbers) |
| 3449 return (sum((n - mean) ** 2 for n in numbers) / |
| 3450 len(numbers)) ** .5 |
| 3451 |
| 3452 |
| 3453 def has_arithmetic_operator(line): |
| 3454 """Return True if line contains any arithmetic operators.""" |
| 3455 for operator in pep8.ARITHMETIC_OP: |
| 3456 if operator in line: |
| 3457 return True |
| 3458 |
| 3459 return False |
| 3460 |
| 3461 |
| 3462 def count_unbalanced_brackets(line): |
| 3463 """Return number of unmatched open/close brackets.""" |
| 3464 count = 0 |
| 3465 for opening, closing in ['()', '[]', '{}']: |
| 3466 count += abs(line.count(opening) - line.count(closing)) |
| 3467 |
| 3468 return count |
| 3469 |
| 3470 |
| 3471 def split_at_offsets(line, offsets): |
| 3472 """Split line at offsets. |
| 3473 |
| 3474 Return list of strings. |
| 3475 |
| 3476 """ |
| 3477 result = [] |
| 3478 |
| 3479 previous_offset = 0 |
| 3480 current_offset = 0 |
| 3481 for current_offset in sorted(offsets): |
| 3482 if current_offset < len(line) and previous_offset != current_offset: |
| 3483 result.append(line[previous_offset:current_offset].strip()) |
| 3484 previous_offset = current_offset |
| 3485 |
| 3486 result.append(line[current_offset:]) |
| 3487 |
| 3488 return result |
| 3489 |
| 3490 |
| 3491 class LineEndingWrapper(object): |
| 3492 |
| 3493 r"""Replace line endings to work with sys.stdout. |
| 3494 |
| 3495 It seems that sys.stdout expects only '\n' as the line ending, no matter |
| 3496 the platform. Otherwise, we get repeated line endings. |
| 3497 |
| 3498 """ |
| 3499 |
| 3500 def __init__(self, output): |
| 3501 self.__output = output |
| 3502 |
| 3503 def write(self, s): |
| 3504 self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n')) |
| 3505 |
| 3506 def flush(self): |
| 3507 self.__output.flush() |
| 3508 |
| 3509 |
| 3510 def match_file(filename, exclude): |
| 3511 """Return True if file is okay for modifying/recursing.""" |
| 3512 base_name = os.path.basename(filename) |
| 3513 |
| 3514 if base_name.startswith('.'): |
| 3515 return False |
| 3516 |
| 3517 for pattern in exclude: |
| 3518 if fnmatch.fnmatch(base_name, pattern): |
| 3519 return False |
| 3520 |
| 3521 if not os.path.isdir(filename) and not is_python_file(filename): |
| 3522 return False |
| 3523 |
| 3524 return True |
| 3525 |
| 3526 |
| 3527 def find_files(filenames, recursive, exclude): |
| 3528 """Yield filenames.""" |
| 3529 while filenames: |
| 3530 name = filenames.pop(0) |
| 3531 if recursive and os.path.isdir(name): |
| 3532 for root, directories, children in os.walk(name): |
| 3533 filenames += [os.path.join(root, f) for f in children |
| 3534 if match_file(os.path.join(root, f), |
| 3535 exclude)] |
| 3536 directories[:] = [d for d in directories |
| 3537 if match_file(os.path.join(root, d), |
| 3538 exclude)] |
| 3539 else: |
| 3540 yield name |
| 3541 |
| 3542 |
| 3543 def _fix_file(parameters): |
| 3544 """Helper function for optionally running fix_file() in parallel.""" |
| 3545 if parameters[1].verbose: |
| 3546 print('[file:{0}]'.format(parameters[0]), file=sys.stderr) |
| 3547 try: |
| 3548 fix_file(*parameters) |
| 3549 except IOError as error: |
| 3550 print(unicode(error), file=sys.stderr) |
| 3551 |
| 3552 |
| 3553 def fix_multiple_files(filenames, options, output=None): |
| 3554 """Fix list of files. |
| 3555 |
| 3556 Optionally fix files recursively. |
| 3557 |
| 3558 """ |
| 3559 filenames = find_files(filenames, options.recursive, options.exclude) |
| 3560 if options.jobs > 1: |
| 3561 import multiprocessing |
| 3562 pool = multiprocessing.Pool(options.jobs) |
| 3563 pool.map(_fix_file, |
| 3564 [(name, options) for name in filenames]) |
| 3565 else: |
| 3566 for name in filenames: |
| 3567 _fix_file((name, options, output)) |
| 3568 |
| 3569 |
| 3570 def is_python_file(filename): |
| 3571 """Return True if filename is Python file.""" |
| 3572 if filename.endswith('.py'): |
| 3573 return True |
| 3574 |
| 3575 try: |
| 3576 with open_with_encoding(filename) as f: |
| 3577 first_line = f.readlines(1)[0] |
| 3578 except (IOError, IndexError): |
| 3579 return False |
| 3580 |
| 3581 if not PYTHON_SHEBANG_REGEX.match(first_line): |
| 3582 return False |
| 3583 |
| 3584 return True |
| 3585 |
| 3586 |
| 3587 def is_probably_part_of_multiline(line): |
| 3588 """Return True if line is likely part of a multiline string. |
| 3589 |
| 3590 When multiline strings are involved, pep8 reports the error as being |
| 3591 at the start of the multiline string, which doesn't work for us. |
| 3592 |
| 3593 """ |
| 3594 return ( |
| 3595 '"""' in line or |
| 3596 "'''" in line or |
| 3597 line.rstrip().endswith('\\') |
| 3598 ) |
| 3599 |
| 3600 |
| 3601 def main(): |
| 3602 """Tool main.""" |
| 3603 try: |
| 3604 # Exit on broken pipe. |
| 3605 signal.signal(signal.SIGPIPE, signal.SIG_DFL) |
| 3606 except AttributeError: # pragma: no cover |
| 3607 # SIGPIPE is not available on Windows. |
| 3608 pass |
| 3609 |
| 3610 try: |
| 3611 args = parse_args(sys.argv[1:]) |
| 3612 |
| 3613 if args.list_fixes: |
| 3614 for code, description in sorted(supported_fixes()): |
| 3615 print('{code} - {description}'.format( |
| 3616 code=code, description=description)) |
| 3617 return 0 |
| 3618 |
| 3619 if args.files == ['-']: |
| 3620 assert not args.in_place |
| 3621 |
| 3622 # LineEndingWrapper is unnecessary here due to the symmetry between |
| 3623 # standard in and standard out. |
| 3624 sys.stdout.write(fix_code(sys.stdin.read(), args)) |
| 3625 else: |
| 3626 if args.in_place or args.diff: |
| 3627 args.files = list(set(args.files)) |
| 3628 else: |
| 3629 assert len(args.files) == 1 |
| 3630 assert not args.recursive |
| 3631 |
| 3632 fix_multiple_files(args.files, args, sys.stdout) |
| 3633 except KeyboardInterrupt: |
| 3634 return 1 # pragma: no cover |
| 3635 |
| 3636 |
| 3637 class CachedTokenizer(object): |
| 3638 |
| 3639 """A one-element cache around tokenize.generate_tokens(). |
| 3640 |
| 3641 Original code written by Ned Batchelder, in coverage.py. |
| 3642 |
| 3643 """ |
| 3644 |
| 3645 def __init__(self): |
| 3646 self.last_text = None |
| 3647 self.last_tokens = None |
| 3648 |
| 3649 def generate_tokens(self, text): |
| 3650 """A stand-in for tokenize.generate_tokens().""" |
| 3651 if text != self.last_text: |
| 3652 string_io = io.StringIO(text) |
| 3653 self.last_tokens = list( |
| 3654 tokenize.generate_tokens(string_io.readline) |
| 3655 ) |
| 3656 self.last_text = text |
| 3657 return self.last_tokens |
| 3658 |
| 3659 _cached_tokenizer = CachedTokenizer() |
| 3660 generate_tokens = _cached_tokenizer.generate_tokens |
| 3661 |
| 3662 |
| 3663 if __name__ == '__main__': |
| 3664 sys.exit(main()) |
OLD | NEW |