| Index: Tools/Scripts/webkitpy/thirdparty/autopep8.py
|
| diff --git a/Tools/Scripts/webkitpy/thirdparty/autopep8.py b/Tools/Scripts/webkitpy/thirdparty/autopep8.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..c54ebea44a0bd9c46ca98c24a672c6155eea04cf
|
| --- /dev/null
|
| +++ b/Tools/Scripts/webkitpy/thirdparty/autopep8.py
|
| @@ -0,0 +1,3664 @@
|
| +# Copyright (C) 2010-2011 Hideo Hattori
|
| +# Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
|
| +# Copyright (C) 2013-2014 Hideo Hattori, Steven Myint, Bill Wendling
|
| +#
|
| +# Permission is hereby granted, free of charge, to any person obtaining
|
| +# a copy of this software and associated documentation files (the
|
| +# "Software"), to deal in the Software without restriction, including
|
| +# without limitation the rights to use, copy, modify, merge, publish,
|
| +# distribute, sublicense, and/or sell copies of the Software, and to
|
| +# permit persons to whom the Software is furnished to do so, subject to
|
| +# the following conditions:
|
| +#
|
| +# The above copyright notice and this permission notice shall be
|
| +# included in all copies or substantial portions of the Software.
|
| +#
|
| +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
| +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
| +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
| +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
| +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
| +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
| +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| +# SOFTWARE.
|
| +
|
| +"""Automatically formats Python code to conform to the PEP 8 style guide.
|
| +
|
| +Fixes that only need be done once can be added by adding a function of the form
|
| +"fix_<code>(source)" to this module. They should return the fixed source code.
|
| +These fixes are picked up by apply_global_fixes().
|
| +
|
| +Fixes that depend on pep8 should be added as methods to FixPEP8. See the class
|
| +documentation for more information.
|
| +
|
| +"""
|
| +
|
| +from __future__ import absolute_import
|
| +from __future__ import division
|
| +from __future__ import print_function
|
| +from __future__ import unicode_literals
|
| +
|
| +import bisect
|
| +import codecs
|
| +import collections
|
| +import copy
|
| +import difflib
|
| +import fnmatch
|
| +import inspect
|
| +import io
|
| +import itertools
|
| +import keyword
|
| +import locale
|
| +import os
|
| +import re
|
| +import signal
|
| +import sys
|
| +import token
|
| +import tokenize
|
| +
|
| +import pep8
|
| +
|
| +
|
| +try:
|
| + unicode
|
| +except NameError:
|
| + unicode = str
|
| +
|
| +
|
| +__version__ = '1.0.3'
|
| +
|
| +
|
| +CR = '\r'
|
| +LF = '\n'
|
| +CRLF = '\r\n'
|
| +
|
| +
|
| +PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$')
|
| +
|
| +
|
| +# For generating line shortening candidates.
|
| +SHORTEN_OPERATOR_GROUPS = frozenset([
|
| + frozenset([',']),
|
| + frozenset(['%']),
|
| + frozenset([',', '(', '[', '{']),
|
| + frozenset(['%', '(', '[', '{']),
|
| + frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
|
| + frozenset(['%', '+', '-', '*', '/', '//']),
|
| +])
|
| +
|
| +
|
| +DEFAULT_IGNORE = 'E24'
|
| +DEFAULT_INDENT_SIZE = 4
|
| +
|
| +
|
| +# W602 is handled separately due to the need to avoid "with_traceback".
|
| +CODE_TO_2TO3 = {
|
| + 'E721': ['idioms'],
|
| + 'W601': ['has_key'],
|
| + 'W603': ['ne'],
|
| + 'W604': ['repr'],
|
| + 'W690': ['apply',
|
| + 'except',
|
| + 'exitfunc',
|
| + 'import',
|
| + 'numliterals',
|
| + 'operator',
|
| + 'paren',
|
| + 'reduce',
|
| + 'renames',
|
| + 'standarderror',
|
| + 'sys_exc',
|
| + 'throw',
|
| + 'tuple_params',
|
| + 'xreadlines']}
|
| +
|
| +
|
| +def open_with_encoding(filename, encoding=None, mode='r'):
|
| + """Return opened file with a specific encoding."""
|
| + if not encoding:
|
| + encoding = detect_encoding(filename)
|
| +
|
| + return io.open(filename, mode=mode, encoding=encoding,
|
| + newline='') # Preserve line endings
|
| +
|
| +
|
| +def detect_encoding(filename):
|
| + """Return file encoding."""
|
| + try:
|
| + with open(filename, 'rb') as input_file:
|
| + from lib2to3.pgen2 import tokenize as lib2to3_tokenize
|
| + encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
|
| +
|
| + # Check for correctness of encoding
|
| + with open_with_encoding(filename, encoding) as test_file:
|
| + test_file.read()
|
| +
|
| + return encoding
|
| + except (LookupError, SyntaxError, UnicodeDecodeError):
|
| + return 'latin-1'
|
| +
|
| +
|
| +def readlines_from_file(filename):
|
| + """Return contents of file."""
|
| + with open_with_encoding(filename) as input_file:
|
| + return input_file.readlines()
|
| +
|
| +
|
| +def extended_blank_lines(logical_line,
|
| + blank_lines,
|
| + indent_level,
|
| + previous_logical):
|
| + """Check for missing blank lines after class declaration."""
|
| + if previous_logical.startswith('class '):
|
| + if (
|
| + logical_line.startswith(('def ', 'class ', '@')) or
|
| + pep8.DOCSTRING_REGEX.match(logical_line)
|
| + ):
|
| + if indent_level and not blank_lines:
|
| + yield (0, 'E309 expected 1 blank line after class declaration')
|
| + elif previous_logical.startswith('def '):
|
| + if blank_lines and pep8.DOCSTRING_REGEX.match(logical_line):
|
| + yield (0, 'E303 too many blank lines ({0})'.format(blank_lines))
|
| + elif pep8.DOCSTRING_REGEX.match(previous_logical):
|
| + # Missing blank line between class docstring and method declaration.
|
| + if (
|
| + indent_level and
|
| + not blank_lines and
|
| + logical_line.startswith(('def ')) and
|
| + '(self' in logical_line
|
| + ):
|
| + yield (0, 'E301 expected 1 blank line, found 0')
|
| +pep8.register_check(extended_blank_lines)
|
| +
|
| +
|
| +def continued_indentation(logical_line, tokens, indent_level, indent_char,
|
| + noqa):
|
| + """Override pep8's function to provide indentation information."""
|
| + first_row = tokens[0][2][0]
|
| + nrows = 1 + tokens[-1][2][0] - first_row
|
| + if noqa or nrows == 1:
|
| + return
|
| +
|
| + # indent_next tells us whether the next block is indented. Assuming
|
| + # that it is indented by 4 spaces, then we should not allow 4-space
|
| + # indents on the final continuation line. In turn, some other
|
| + # indents are allowed to have an extra 4 spaces.
|
| + indent_next = logical_line.endswith(':')
|
| +
|
| + row = depth = 0
|
| + valid_hangs = (
|
| + (DEFAULT_INDENT_SIZE,)
|
| + if indent_char != '\t' else (DEFAULT_INDENT_SIZE,
|
| + 2 * DEFAULT_INDENT_SIZE)
|
| + )
|
| +
|
| + # Remember how many brackets were opened on each line.
|
| + parens = [0] * nrows
|
| +
|
| + # Relative indents of physical lines.
|
| + rel_indent = [0] * nrows
|
| +
|
| + # For each depth, collect a list of opening rows.
|
| + open_rows = [[0]]
|
| + # For each depth, memorize the hanging indentation.
|
| + hangs = [None]
|
| +
|
| + # Visual indents.
|
| + indent_chances = {}
|
| + last_indent = tokens[0][2]
|
| + indent = [last_indent[1]]
|
| +
|
| + last_token_multiline = None
|
| + line = None
|
| + last_line = ''
|
| + last_line_begins_with_multiline = False
|
| + for token_type, text, start, end, line in tokens:
|
| +
|
| + newline = row < start[0] - first_row
|
| + if newline:
|
| + row = start[0] - first_row
|
| + newline = (not last_token_multiline and
|
| + token_type not in (tokenize.NL, tokenize.NEWLINE))
|
| + last_line_begins_with_multiline = last_token_multiline
|
| +
|
| + if newline:
|
| + # This is the beginning of a continuation line.
|
| + last_indent = start
|
| +
|
| + # Record the initial indent.
|
| + rel_indent[row] = pep8.expand_indent(line) - indent_level
|
| +
|
| + # Identify closing bracket.
|
| + close_bracket = (token_type == tokenize.OP and text in ']})')
|
| +
|
| + # Is the indent relative to an opening bracket line?
|
| + for open_row in reversed(open_rows[depth]):
|
| + hang = rel_indent[row] - rel_indent[open_row]
|
| + hanging_indent = hang in valid_hangs
|
| + if hanging_indent:
|
| + break
|
| + if hangs[depth]:
|
| + hanging_indent = (hang == hangs[depth])
|
| +
|
| + visual_indent = (not close_bracket and hang > 0 and
|
| + indent_chances.get(start[1]))
|
| +
|
| + if close_bracket and indent[depth]:
|
| + # Closing bracket for visual indent.
|
| + if start[1] != indent[depth]:
|
| + yield (start, 'E124 {0}'.format(indent[depth]))
|
| + elif close_bracket and not hang:
|
| + pass
|
| + elif indent[depth] and start[1] < indent[depth]:
|
| + # Visual indent is broken.
|
| + yield (start, 'E128 {0}'.format(indent[depth]))
|
| + elif (hanging_indent or
|
| + (indent_next and
|
| + rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)):
|
| + # Hanging indent is verified.
|
| + if close_bracket:
|
| + yield (start, 'E123 {0}'.format(indent_level +
|
| + rel_indent[open_row]))
|
| + hangs[depth] = hang
|
| + elif visual_indent is True:
|
| + # Visual indent is verified.
|
| + indent[depth] = start[1]
|
| + elif visual_indent in (text, unicode):
|
| + # Ignore token lined up with matching one from a previous line.
|
| + pass
|
| + else:
|
| + one_indented = (indent_level + rel_indent[open_row] +
|
| + DEFAULT_INDENT_SIZE)
|
| + # Indent is broken.
|
| + if hang <= 0:
|
| + error = ('E122', one_indented)
|
| + elif indent[depth]:
|
| + error = ('E127', indent[depth])
|
| + elif hang > DEFAULT_INDENT_SIZE:
|
| + error = ('E126', one_indented)
|
| + else:
|
| + hangs[depth] = hang
|
| + error = ('E121', one_indented)
|
| +
|
| + yield (start, '{0} {1}'.format(*error))
|
| +
|
| + # Look for visual indenting.
|
| + if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
|
| + and not indent[depth]):
|
| + indent[depth] = start[1]
|
| + indent_chances[start[1]] = True
|
| + # Deal with implicit string concatenation.
|
| + elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
|
| + text in ('u', 'ur', 'b', 'br')):
|
| + indent_chances[start[1]] = unicode
|
| + # Special case for the "if" statement because len("if (") is equal to
|
| + # 4.
|
| + elif not indent_chances and not row and not depth and text == 'if':
|
| + indent_chances[end[1] + 1] = True
|
| + elif text == ':' and line[end[1]:].isspace():
|
| + open_rows[depth].append(row)
|
| +
|
| + # Keep track of bracket depth.
|
| + if token_type == tokenize.OP:
|
| + if text in '([{':
|
| + depth += 1
|
| + indent.append(0)
|
| + hangs.append(None)
|
| + if len(open_rows) == depth:
|
| + open_rows.append([])
|
| + open_rows[depth].append(row)
|
| + parens[row] += 1
|
| + elif text in ')]}' and depth > 0:
|
| + # Parent indents should not be more than this one.
|
| + prev_indent = indent.pop() or last_indent[1]
|
| + hangs.pop()
|
| + for d in range(depth):
|
| + if indent[d] > prev_indent:
|
| + indent[d] = 0
|
| + for ind in list(indent_chances):
|
| + if ind >= prev_indent:
|
| + del indent_chances[ind]
|
| + del open_rows[depth + 1:]
|
| + depth -= 1
|
| + if depth:
|
| + indent_chances[indent[depth]] = True
|
| + for idx in range(row, -1, -1):
|
| + if parens[idx]:
|
| + parens[idx] -= 1
|
| + break
|
| + assert len(indent) == depth + 1
|
| + if (
|
| + start[1] not in indent_chances and
|
| + # This is for purposes of speeding up E121 (GitHub #90).
|
| + not last_line.rstrip().endswith(',')
|
| + ):
|
| + # Allow to line up tokens.
|
| + indent_chances[start[1]] = text
|
| +
|
| + last_token_multiline = (start[0] != end[0])
|
| + if last_token_multiline:
|
| + rel_indent[end[0] - first_row] = rel_indent[row]
|
| +
|
| + last_line = line
|
| +
|
| + if (
|
| + indent_next and
|
| + not last_line_begins_with_multiline and
|
| + pep8.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE
|
| + ):
|
| + pos = (start[0], indent[0] + 4)
|
| + yield (pos, 'E125 {0}'.format(indent_level +
|
| + 2 * DEFAULT_INDENT_SIZE))
|
| +del pep8._checks['logical_line'][pep8.continued_indentation]
|
| +pep8.register_check(continued_indentation)
|
| +
|
| +
|
| +class FixPEP8(object):
|
| +
|
| + """Fix invalid code.
|
| +
|
| + Fixer methods are prefixed "fix_". The _fix_source() method looks for these
|
| + automatically.
|
| +
|
| + The fixer method can take either one or two arguments (in addition to
|
| + self). The first argument is "result", which is the error information from
|
| + pep8. The second argument, "logical", is required only for logical-line
|
| + fixes.
|
| +
|
| + The fixer method can return the list of modified lines or None. An empty
|
| + list would mean that no changes were made. None would mean that only the
|
| + line reported in the pep8 error was modified. Note that the modified line
|
| + numbers that are returned are indexed at 1. This typically would correspond
|
| + with the line number reported in the pep8 error information.
|
| +
|
| + [fixed method list]
|
| + - e121,e122,e123,e124,e125,e126,e127,e128,e129
|
| + - e201,e202,e203
|
| + - e211
|
| + - e221,e222,e223,e224,e225
|
| + - e231
|
| + - e251
|
| + - e261,e262
|
| + - e271,e272,e273,e274
|
| + - e301,e302,e303
|
| + - e401
|
| + - e502
|
| + - e701,e702
|
| + - e711
|
| + - w291
|
| +
|
| + """
|
| +
|
| + def __init__(self, filename,
|
| + options,
|
| + contents=None,
|
| + long_line_ignore_cache=None):
|
| + self.filename = filename
|
| + if contents is None:
|
| + self.source = readlines_from_file(filename)
|
| + else:
|
| + sio = io.StringIO(contents)
|
| + self.source = sio.readlines()
|
| + self.options = options
|
| + self.indent_word = _get_indentword(''.join(self.source))
|
| +
|
| + self.long_line_ignore_cache = (
|
| + set() if long_line_ignore_cache is None
|
| + else long_line_ignore_cache)
|
| +
|
| + # Many fixers are the same even though pep8 categorizes them
|
| + # differently.
|
| + self.fix_e115 = self.fix_e112
|
| + self.fix_e116 = self.fix_e113
|
| + self.fix_e121 = self._fix_reindent
|
| + self.fix_e122 = self._fix_reindent
|
| + self.fix_e123 = self._fix_reindent
|
| + self.fix_e124 = self._fix_reindent
|
| + self.fix_e126 = self._fix_reindent
|
| + self.fix_e127 = self._fix_reindent
|
| + self.fix_e128 = self._fix_reindent
|
| + self.fix_e129 = self._fix_reindent
|
| + self.fix_e202 = self.fix_e201
|
| + self.fix_e203 = self.fix_e201
|
| + self.fix_e211 = self.fix_e201
|
| + self.fix_e221 = self.fix_e271
|
| + self.fix_e222 = self.fix_e271
|
| + self.fix_e223 = self.fix_e271
|
| + self.fix_e226 = self.fix_e225
|
| + self.fix_e227 = self.fix_e225
|
| + self.fix_e228 = self.fix_e225
|
| + self.fix_e241 = self.fix_e271
|
| + self.fix_e242 = self.fix_e224
|
| + self.fix_e261 = self.fix_e262
|
| + self.fix_e272 = self.fix_e271
|
| + self.fix_e273 = self.fix_e271
|
| + self.fix_e274 = self.fix_e271
|
| + self.fix_e309 = self.fix_e301
|
| + self.fix_e501 = (
|
| + self.fix_long_line_logically if
|
| + options and (options.aggressive >= 2 or options.experimental) else
|
| + self.fix_long_line_physically)
|
| + self.fix_e703 = self.fix_e702
|
| +
|
| + self._ws_comma_done = False
|
| +
|
| + def _fix_source(self, results):
|
| + try:
|
| + (logical_start, logical_end) = _find_logical(self.source)
|
| + logical_support = True
|
| + except (SyntaxError, tokenize.TokenError): # pragma: no cover
|
| + logical_support = False
|
| +
|
| + completed_lines = set()
|
| + for result in sorted(results, key=_priority_key):
|
| + if result['line'] in completed_lines:
|
| + continue
|
| +
|
| + fixed_methodname = 'fix_' + result['id'].lower()
|
| + if hasattr(self, fixed_methodname):
|
| + fix = getattr(self, fixed_methodname)
|
| +
|
| + line_index = result['line'] - 1
|
| + original_line = self.source[line_index]
|
| +
|
| + is_logical_fix = len(inspect.getargspec(fix).args) > 2
|
| + if is_logical_fix:
|
| + logical = None
|
| + if logical_support:
|
| + logical = _get_logical(self.source,
|
| + result,
|
| + logical_start,
|
| + logical_end)
|
| + if logical and set(range(
|
| + logical[0][0] + 1,
|
| + logical[1][0] + 1)).intersection(
|
| + completed_lines):
|
| + continue
|
| +
|
| + modified_lines = fix(result, logical)
|
| + else:
|
| + modified_lines = fix(result)
|
| +
|
| + if modified_lines is None:
|
| + # Force logical fixes to report what they modified.
|
| + assert not is_logical_fix
|
| +
|
| + if self.source[line_index] == original_line:
|
| + modified_lines = []
|
| +
|
| + if modified_lines:
|
| + completed_lines.update(modified_lines)
|
| + elif modified_lines == []: # Empty list means no fix
|
| + if self.options.verbose >= 2:
|
| + print(
|
| + '---> Not fixing {f} on line {l}'.format(
|
| + f=result['id'], l=result['line']),
|
| + file=sys.stderr)
|
| + else: # We assume one-line fix when None.
|
| + completed_lines.add(result['line'])
|
| + else:
|
| + if self.options.verbose >= 3:
|
| + print(
|
| + "---> '{0}' is not defined.".format(fixed_methodname),
|
| + file=sys.stderr)
|
| +
|
| + info = result['info'].strip()
|
| + print('---> {0}:{1}:{2}:{3}'.format(self.filename,
|
| + result['line'],
|
| + result['column'],
|
| + info),
|
| + file=sys.stderr)
|
| +
|
| + def fix(self):
|
| + """Return a version of the source code with PEP 8 violations fixed."""
|
| + pep8_options = {
|
| + 'ignore': self.options.ignore,
|
| + 'select': self.options.select,
|
| + 'max_line_length': self.options.max_line_length,
|
| + }
|
| + results = _execute_pep8(pep8_options, self.source)
|
| +
|
| + if self.options.verbose:
|
| + progress = {}
|
| + for r in results:
|
| + if r['id'] not in progress:
|
| + progress[r['id']] = set()
|
| + progress[r['id']].add(r['line'])
|
| + print('---> {n} issue(s) to fix {progress}'.format(
|
| + n=len(results), progress=progress), file=sys.stderr)
|
| +
|
| + if self.options.line_range:
|
| + start, end = self.options.line_range
|
| + results = [r for r in results
|
| + if start <= r['line'] <= end]
|
| +
|
| + self._fix_source(filter_results(source=''.join(self.source),
|
| + results=results,
|
| + aggressive=self.options.aggressive))
|
| +
|
| + if self.options.line_range:
|
| + # If number of lines has changed then change line_range.
|
| + count = sum(sline.count('\n')
|
| + for sline in self.source[start - 1:end])
|
| + self.options.line_range[1] = start + count - 1
|
| +
|
| + return ''.join(self.source)
|
| +
|
| + def _fix_reindent(self, result):
|
| + """Fix a badly indented line.
|
| +
|
| + This is done by adding or removing from its initial indent only.
|
| +
|
| + """
|
| + num_indent_spaces = int(result['info'].split()[1])
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| +
|
| + self.source[line_index] = ' ' * num_indent_spaces + target.lstrip()
|
| +
|
| + def fix_e112(self, result):
|
| + """Fix under-indented comments."""
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| +
|
| + if not target.lstrip().startswith('#'):
|
| + # Don't screw with invalid syntax.
|
| + return []
|
| +
|
| + self.source[line_index] = self.indent_word + target
|
| +
|
| + def fix_e113(self, result):
|
| + """Fix over-indented comments."""
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| +
|
| + indent = _get_indentation(target)
|
| + stripped = target.lstrip()
|
| +
|
| + if not stripped.startswith('#'):
|
| + # Don't screw with invalid syntax.
|
| + return []
|
| +
|
| + self.source[line_index] = indent[1:] + stripped
|
| +
|
| + def fix_e125(self, result):
|
| + """Fix indentation undistinguish from the next logical line."""
|
| + num_indent_spaces = int(result['info'].split()[1])
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| +
|
| + spaces_to_add = num_indent_spaces - len(_get_indentation(target))
|
| + indent = len(_get_indentation(target))
|
| + modified_lines = []
|
| +
|
| + while len(_get_indentation(self.source[line_index])) >= indent:
|
| + self.source[line_index] = (' ' * spaces_to_add +
|
| + self.source[line_index])
|
| + modified_lines.append(1 + line_index) # Line indexed at 1.
|
| + line_index -= 1
|
| +
|
| + return modified_lines
|
| +
|
| + def fix_e201(self, result):
|
| + """Remove extraneous whitespace."""
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| + offset = result['column'] - 1
|
| +
|
| + if is_probably_part_of_multiline(target):
|
| + return []
|
| +
|
| + fixed = fix_whitespace(target,
|
| + offset=offset,
|
| + replacement='')
|
| +
|
| + self.source[line_index] = fixed
|
| +
|
| + def fix_e224(self, result):
|
| + """Remove extraneous whitespace around operator."""
|
| + target = self.source[result['line'] - 1]
|
| + offset = result['column'] - 1
|
| + fixed = target[:offset] + target[offset:].replace('\t', ' ')
|
| + self.source[result['line'] - 1] = fixed
|
| +
|
| + def fix_e225(self, result):
|
| + """Fix missing whitespace around operator."""
|
| + target = self.source[result['line'] - 1]
|
| + offset = result['column'] - 1
|
| + fixed = target[:offset] + ' ' + target[offset:]
|
| +
|
| + # Only proceed if non-whitespace characters match.
|
| + # And make sure we don't break the indentation.
|
| + if (
|
| + fixed.replace(' ', '') == target.replace(' ', '') and
|
| + _get_indentation(fixed) == _get_indentation(target)
|
| + ):
|
| + self.source[result['line'] - 1] = fixed
|
| + else:
|
| + return []
|
| +
|
| + def fix_e231(self, result):
|
| + """Add missing whitespace."""
|
| + # Optimize for comma case. This will fix all commas in the full source
|
| + # code in one pass. Don't do this more than once. If it fails the first
|
| + # time, there is no point in trying again.
|
| + if ',' in result['info'] and not self._ws_comma_done:
|
| + self._ws_comma_done = True
|
| + original = ''.join(self.source)
|
| + new = refactor(original, ['ws_comma'])
|
| + if original.strip() != new.strip():
|
| + self.source = [new]
|
| + return range(1, 1 + len(original))
|
| +
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| + offset = result['column']
|
| + fixed = target[:offset] + ' ' + target[offset:]
|
| + self.source[line_index] = fixed
|
| +
|
| + def fix_e251(self, result):
|
| + """Remove whitespace around parameter '=' sign."""
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| +
|
| + # This is necessary since pep8 sometimes reports columns that goes
|
| + # past the end of the physical line. This happens in cases like,
|
| + # foo(bar\n=None)
|
| + c = min(result['column'] - 1,
|
| + len(target) - 1)
|
| +
|
| + if target[c].strip():
|
| + fixed = target
|
| + else:
|
| + fixed = target[:c].rstrip() + target[c:].lstrip()
|
| +
|
| + # There could be an escaped newline
|
| + #
|
| + # def foo(a=\
|
| + # 1)
|
| + if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
|
| + self.source[line_index] = fixed.rstrip('\n\r \t\\')
|
| + self.source[line_index + 1] = self.source[line_index + 1].lstrip()
|
| + return [line_index + 1, line_index + 2] # Line indexed at 1
|
| +
|
| + self.source[result['line'] - 1] = fixed
|
| +
|
| + def fix_e262(self, result):
|
| + """Fix spacing after comment hash."""
|
| + target = self.source[result['line'] - 1]
|
| + offset = result['column']
|
| +
|
| + code = target[:offset].rstrip(' \t#')
|
| + comment = target[offset:].lstrip(' \t#')
|
| +
|
| + fixed = code + (' # ' + comment if comment.strip() else '\n')
|
| +
|
| + self.source[result['line'] - 1] = fixed
|
| +
|
| + def fix_e271(self, result):
|
| + """Fix extraneous whitespace around keywords."""
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| + offset = result['column'] - 1
|
| +
|
| + if is_probably_part_of_multiline(target):
|
| + return []
|
| +
|
| + fixed = fix_whitespace(target,
|
| + offset=offset,
|
| + replacement=' ')
|
| +
|
| + if fixed == target:
|
| + return []
|
| + else:
|
| + self.source[line_index] = fixed
|
| +
|
| + def fix_e301(self, result):
|
| + """Add missing blank line."""
|
| + cr = '\n'
|
| + self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
|
| +
|
| + def fix_e302(self, result):
|
| + """Add missing 2 blank lines."""
|
| + add_linenum = 2 - int(result['info'].split()[-1])
|
| + cr = '\n' * add_linenum
|
| + self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
|
| +
|
| + def fix_e303(self, result):
|
| + """Remove extra blank lines."""
|
| + delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2
|
| + delete_linenum = max(1, delete_linenum)
|
| +
|
| + # We need to count because pep8 reports an offset line number if there
|
| + # are comments.
|
| + cnt = 0
|
| + line = result['line'] - 2
|
| + modified_lines = []
|
| + while cnt < delete_linenum and line >= 0:
|
| + if not self.source[line].strip():
|
| + self.source[line] = ''
|
| + modified_lines.append(1 + line) # Line indexed at 1
|
| + cnt += 1
|
| + line -= 1
|
| +
|
| + return modified_lines
|
| +
|
| + def fix_e304(self, result):
|
| + """Remove blank line following function decorator."""
|
| + line = result['line'] - 2
|
| + if not self.source[line].strip():
|
| + self.source[line] = ''
|
| +
|
| + def fix_e401(self, result):
|
| + """Put imports on separate lines."""
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| + offset = result['column'] - 1
|
| +
|
| + if not target.lstrip().startswith('import'):
|
| + return []
|
| +
|
| + indentation = re.split(pattern=r'\bimport\b',
|
| + string=target, maxsplit=1)[0]
|
| + fixed = (target[:offset].rstrip('\t ,') + '\n' +
|
| + indentation + 'import ' + target[offset:].lstrip('\t ,'))
|
| + self.source[line_index] = fixed
|
| +
|
| + def fix_long_line_logically(self, result, logical):
|
| + """Try to make lines fit within --max-line-length characters."""
|
| + if (
|
| + not logical or
|
| + len(logical[2]) == 1 or
|
| + self.source[result['line'] - 1].lstrip().startswith('#')
|
| + ):
|
| + return self.fix_long_line_physically(result)
|
| +
|
| + start_line_index = logical[0][0]
|
| + end_line_index = logical[1][0]
|
| + logical_lines = logical[2]
|
| +
|
| + previous_line = get_item(self.source, start_line_index - 1, default='')
|
| + next_line = get_item(self.source, end_line_index + 1, default='')
|
| +
|
| + single_line = join_logical_line(''.join(logical_lines))
|
| +
|
| + try:
|
| + fixed = self.fix_long_line(
|
| + target=single_line,
|
| + previous_line=previous_line,
|
| + next_line=next_line,
|
| + original=''.join(logical_lines))
|
| + except (SyntaxError, tokenize.TokenError):
|
| + return self.fix_long_line_physically(result)
|
| +
|
| + if fixed:
|
| + for line_index in range(start_line_index, end_line_index + 1):
|
| + self.source[line_index] = ''
|
| + self.source[start_line_index] = fixed
|
| + return range(start_line_index + 1, end_line_index + 1)
|
| + else:
|
| + return []
|
| +
|
| + def fix_long_line_physically(self, result):
|
| + """Try to make lines fit within --max-line-length characters."""
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| +
|
| + previous_line = get_item(self.source, line_index - 1, default='')
|
| + next_line = get_item(self.source, line_index + 1, default='')
|
| +
|
| + try:
|
| + fixed = self.fix_long_line(
|
| + target=target,
|
| + previous_line=previous_line,
|
| + next_line=next_line,
|
| + original=target)
|
| + except (SyntaxError, tokenize.TokenError):
|
| + return []
|
| +
|
| + if fixed:
|
| + self.source[line_index] = fixed
|
| + return [line_index + 1]
|
| + else:
|
| + return []
|
| +
|
| + def fix_long_line(self, target, previous_line,
|
| + next_line, original):
|
| + cache_entry = (target, previous_line, next_line)
|
| + if cache_entry in self.long_line_ignore_cache:
|
| + return []
|
| +
|
| + if target.lstrip().startswith('#'):
|
| + # Wrap commented lines.
|
| + return shorten_comment(
|
| + line=target,
|
| + max_line_length=self.options.max_line_length,
|
| + last_comment=not next_line.lstrip().startswith('#'))
|
| +
|
| + fixed = get_fixed_long_line(
|
| + target=target,
|
| + previous_line=previous_line,
|
| + original=original,
|
| + indent_word=self.indent_word,
|
| + max_line_length=self.options.max_line_length,
|
| + aggressive=self.options.aggressive,
|
| + experimental=self.options.experimental,
|
| + verbose=self.options.verbose)
|
| + if fixed and not code_almost_equal(original, fixed):
|
| + return fixed
|
| + else:
|
| + self.long_line_ignore_cache.add(cache_entry)
|
| + return None
|
| +
|
| + def fix_e502(self, result):
|
| + """Remove extraneous escape of newline."""
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| + self.source[line_index] = target.rstrip('\n\r \t\\') + '\n'
|
| +
|
| + def fix_e701(self, result):
|
| + """Put colon-separated compound statement on separate lines."""
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| + c = result['column']
|
| +
|
| + fixed_source = (target[:c] + '\n' +
|
| + _get_indentation(target) + self.indent_word +
|
| + target[c:].lstrip('\n\r \t\\'))
|
| + self.source[result['line'] - 1] = fixed_source
|
| + return [result['line'], result['line'] + 1]
|
| +
|
| + def fix_e702(self, result, logical):
|
| + """Put semicolon-separated compound statement on separate lines."""
|
| + if not logical:
|
| + return [] # pragma: no cover
|
| + logical_lines = logical[2]
|
| +
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| +
|
| + if target.rstrip().endswith('\\'):
|
| + # Normalize '1; \\\n2' into '1; 2'.
|
| + self.source[line_index] = target.rstrip('\n \r\t\\')
|
| + self.source[line_index + 1] = self.source[line_index + 1].lstrip()
|
| + return [line_index + 1, line_index + 2]
|
| +
|
| + if target.rstrip().endswith(';'):
|
| + self.source[line_index] = target.rstrip('\n \r\t;') + '\n'
|
| + return [line_index + 1]
|
| +
|
| + offset = result['column'] - 1
|
| + first = target[:offset].rstrip(';').rstrip()
|
| + second = (_get_indentation(logical_lines[0]) +
|
| + target[offset:].lstrip(';').lstrip())
|
| +
|
| + self.source[line_index] = first + '\n' + second
|
| + return [line_index + 1]
|
| +
|
| + def fix_e711(self, result):
|
| + """Fix comparison with None."""
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| + offset = result['column'] - 1
|
| +
|
| + right_offset = offset + 2
|
| + if right_offset >= len(target):
|
| + return []
|
| +
|
| + left = target[:offset].rstrip()
|
| + center = target[offset:right_offset]
|
| + right = target[right_offset:].lstrip()
|
| +
|
| + if not right.startswith('None'):
|
| + return []
|
| +
|
| + if center.strip() == '==':
|
| + new_center = 'is'
|
| + elif center.strip() == '!=':
|
| + new_center = 'is not'
|
| + else:
|
| + return []
|
| +
|
| + self.source[line_index] = ' '.join([left, new_center, right])
|
| +
|
| + def fix_e712(self, result):
|
| + """Fix comparison with boolean."""
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| + offset = result['column'] - 1
|
| +
|
| + # Handle very easy "not" special cases.
|
| + if re.match(r'^\s*if \w+ == False:$', target):
|
| + self.source[line_index] = re.sub(r'if (\w+) == False:',
|
| + r'if not \1:', target, count=1)
|
| + elif re.match(r'^\s*if \w+ != True:$', target):
|
| + self.source[line_index] = re.sub(r'if (\w+) != True:',
|
| + r'if not \1:', target, count=1)
|
| + else:
|
| + right_offset = offset + 2
|
| + if right_offset >= len(target):
|
| + return []
|
| +
|
| + left = target[:offset].rstrip()
|
| + center = target[offset:right_offset]
|
| + right = target[right_offset:].lstrip()
|
| +
|
| + # Handle simple cases only.
|
| + new_right = None
|
| + if center.strip() == '==':
|
| + if re.match(r'\bTrue\b', right):
|
| + new_right = re.sub(r'\bTrue\b *', '', right, count=1)
|
| + elif center.strip() == '!=':
|
| + if re.match(r'\bFalse\b', right):
|
| + new_right = re.sub(r'\bFalse\b *', '', right, count=1)
|
| +
|
| + if new_right is None:
|
| + return []
|
| +
|
| + if new_right[0].isalnum():
|
| + new_right = ' ' + new_right
|
| +
|
| + self.source[line_index] = left + new_right
|
| +
|
| + def fix_e713(self, result):
|
| + """Fix non-membership check."""
|
| + line_index = result['line'] - 1
|
| + target = self.source[line_index]
|
| +
|
| + # Handle very easy case only.
|
| + if re.match(r'^\s*if not \w+ in \w+:$', target):
|
| + self.source[line_index] = re.sub(r'if not (\w+) in (\w+):',
|
| + r'if \1 not in \2:',
|
| + target,
|
| + count=1)
|
| +
|
| + def fix_w291(self, result):
|
| + """Remove trailing whitespace."""
|
| + fixed_line = self.source[result['line'] - 1].rstrip()
|
| + self.source[result['line'] - 1] = fixed_line + '\n'
|
| +
|
| +
|
| +def get_fixed_long_line(target, previous_line, original,
|
| + indent_word=' ', max_line_length=79,
|
| + aggressive=False, experimental=False, verbose=False):
|
| + """Break up long line and return result.
|
| +
|
| + Do this by generating multiple reformatted candidates and then
|
| + ranking the candidates to heuristically select the best option.
|
| +
|
| + """
|
| + indent = _get_indentation(target)
|
| + source = target[len(indent):]
|
| + assert source.lstrip() == source
|
| +
|
| + # Check for partial multiline.
|
| + tokens = list(generate_tokens(source))
|
| +
|
| + candidates = shorten_line(
|
| + tokens, source, indent,
|
| + indent_word,
|
| + max_line_length,
|
| + aggressive=aggressive,
|
| + experimental=experimental,
|
| + previous_line=previous_line)
|
| +
|
| + # Also sort alphabetically as a tie breaker (for determinism).
|
| + candidates = sorted(
|
| + sorted(set(candidates).union([target, original])),
|
| + key=lambda x: line_shortening_rank(x,
|
| + indent_word,
|
| + max_line_length,
|
| + experimental))
|
| +
|
| + if verbose >= 4:
|
| + print(('-' * 79 + '\n').join([''] + candidates + ['']),
|
| + file=codecs.getwriter('utf-8')(sys.stderr.buffer
|
| + if hasattr(sys.stderr,
|
| + 'buffer')
|
| + else sys.stderr))
|
| +
|
| + if candidates:
|
| + return candidates[0]
|
| +
|
| +
|
| +def join_logical_line(logical_line):
|
| + """Return single line based on logical line input."""
|
| + indentation = _get_indentation(logical_line)
|
| +
|
| + return indentation + untokenize_without_newlines(
|
| + generate_tokens(logical_line.lstrip())) + '\n'
|
| +
|
| +
|
| +def untokenize_without_newlines(tokens):
|
| + """Return source code based on tokens."""
|
| + text = ''
|
| + last_row = 0
|
| + last_column = -1
|
| +
|
| + for t in tokens:
|
| + token_string = t[1]
|
| + (start_row, start_column) = t[2]
|
| + (end_row, end_column) = t[3]
|
| +
|
| + if start_row > last_row:
|
| + last_column = 0
|
| + if (
|
| + (start_column > last_column or token_string == '\n') and
|
| + not text.endswith(' ')
|
| + ):
|
| + text += ' '
|
| +
|
| + if token_string != '\n':
|
| + text += token_string
|
| +
|
| + last_row = end_row
|
| + last_column = end_column
|
| +
|
| + return text
|
| +
|
| +
|
| +def _find_logical(source_lines):
|
| + # Make a variable which is the index of all the starts of lines.
|
| + logical_start = []
|
| + logical_end = []
|
| + last_newline = True
|
| + parens = 0
|
| + for t in generate_tokens(''.join(source_lines)):
|
| + if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
|
| + tokenize.INDENT, tokenize.NL,
|
| + tokenize.ENDMARKER]:
|
| + continue
|
| + if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
|
| + last_newline = True
|
| + logical_end.append((t[3][0] - 1, t[2][1]))
|
| + continue
|
| + if last_newline and not parens:
|
| + logical_start.append((t[2][0] - 1, t[2][1]))
|
| + last_newline = False
|
| + if t[0] == tokenize.OP:
|
| + if t[1] in '([{':
|
| + parens += 1
|
| + elif t[1] in '}])':
|
| + parens -= 1
|
| + return (logical_start, logical_end)
|
| +
|
| +
|
| +def _get_logical(source_lines, result, logical_start, logical_end):
|
| + """Return the logical line corresponding to the result.
|
| +
|
| + Assumes input is already E702-clean.
|
| +
|
| + """
|
| + row = result['line'] - 1
|
| + col = result['column'] - 1
|
| + ls = None
|
| + le = None
|
| + for i in range(0, len(logical_start), 1):
|
| + assert logical_end
|
| + x = logical_end[i]
|
| + if x[0] > row or (x[0] == row and x[1] > col):
|
| + le = x
|
| + ls = logical_start[i]
|
| + break
|
| + if ls is None:
|
| + return None
|
| + original = source_lines[ls[0]:le[0] + 1]
|
| + return ls, le, original
|
| +
|
| +
|
| +def get_item(items, index, default=None):
|
| + if 0 <= index < len(items):
|
| + return items[index]
|
| + else:
|
| + return default
|
| +
|
| +
|
| +def reindent(source, indent_size):
|
| + """Reindent all lines."""
|
| + reindenter = Reindenter(source)
|
| + return reindenter.run(indent_size)
|
| +
|
| +
|
| +def code_almost_equal(a, b):
|
| + """Return True if code is similar.
|
| +
|
| + Ignore whitespace when comparing specific line.
|
| +
|
| + """
|
| + split_a = split_and_strip_non_empty_lines(a)
|
| + split_b = split_and_strip_non_empty_lines(b)
|
| +
|
| + if len(split_a) != len(split_b):
|
| + return False
|
| +
|
| + for index in range(len(split_a)):
|
| + if ''.join(split_a[index].split()) != ''.join(split_b[index].split()):
|
| + return False
|
| +
|
| + return True
|
| +
|
| +
|
| +def split_and_strip_non_empty_lines(text):
|
| + """Return lines split by newline.
|
| +
|
| + Ignore empty lines.
|
| +
|
| + """
|
| + return [line.strip() for line in text.splitlines() if line.strip()]
|
| +
|
| +
|
| +def fix_e265(source, aggressive=False): # pylint: disable=unused-argument
|
| + """Format block comments."""
|
| + if '#' not in source:
|
| + # Optimization.
|
| + return source
|
| +
|
| + ignored_line_numbers = multiline_string_lines(
|
| + source,
|
| + include_docstrings=True) | set(commented_out_code_lines(source))
|
| +
|
| + fixed_lines = []
|
| + sio = io.StringIO(source)
|
| + for (line_number, line) in enumerate(sio.readlines(), start=1):
|
| + if (
|
| + line.lstrip().startswith('#') and
|
| + line_number not in ignored_line_numbers
|
| + ):
|
| + indentation = _get_indentation(line)
|
| + line = line.lstrip()
|
| +
|
| + # Normalize beginning if not a shebang.
|
| + if len(line) > 1:
|
| + if (
|
| + # Leave multiple spaces like '# ' alone.
|
| + (line.count('#') > 1 or line[1].isalnum())
|
| + # Leave stylistic outlined blocks alone.
|
| + and not line.rstrip().endswith('#')
|
| + ):
|
| + line = '# ' + line.lstrip('# \t')
|
| +
|
| + fixed_lines.append(indentation + line)
|
| + else:
|
| + fixed_lines.append(line)
|
| +
|
| + return ''.join(fixed_lines)
|
| +
|
| +
|
| +def refactor(source, fixer_names, ignore=None):
|
| + """Return refactored code using lib2to3.
|
| +
|
| + Skip if ignore string is produced in the refactored code.
|
| +
|
| + """
|
| + from lib2to3 import pgen2
|
| + try:
|
| + new_text = refactor_with_2to3(source,
|
| + fixer_names=fixer_names)
|
| + except (pgen2.parse.ParseError,
|
| + SyntaxError,
|
| + UnicodeDecodeError,
|
| + UnicodeEncodeError):
|
| + return source
|
| +
|
| + if ignore:
|
| + if ignore in new_text and ignore not in source:
|
| + return source
|
| +
|
| + return new_text
|
| +
|
| +
|
| +def code_to_2to3(select, ignore):
|
| + fixes = set()
|
| + for code, fix in CODE_TO_2TO3.items():
|
| + if code_match(code, select=select, ignore=ignore):
|
| + fixes |= set(fix)
|
| + return fixes
|
| +
|
| +
|
| +def fix_2to3(source, aggressive=True, select=None, ignore=None):
|
| + """Fix various deprecated code (via lib2to3)."""
|
| + if not aggressive:
|
| + return source
|
| +
|
| + select = select or []
|
| + ignore = ignore or []
|
| +
|
| + return refactor(source,
|
| + code_to_2to3(select=select,
|
| + ignore=ignore))
|
| +
|
| +
|
| +def fix_w602(source, aggressive=True):
|
| + """Fix deprecated form of raising exception."""
|
| + if not aggressive:
|
| + return source
|
| +
|
| + return refactor(source, ['raise'],
|
| + ignore='with_traceback')
|
| +
|
| +
|
| +def find_newline(source):
|
| + """Return type of newline used in source.
|
| +
|
| + Input is a list of lines.
|
| +
|
| + """
|
| + assert not isinstance(source, unicode)
|
| +
|
| + counter = collections.defaultdict(int)
|
| + for line in source:
|
| + if line.endswith(CRLF):
|
| + counter[CRLF] += 1
|
| + elif line.endswith(CR):
|
| + counter[CR] += 1
|
| + elif line.endswith(LF):
|
| + counter[LF] += 1
|
| +
|
| + return (sorted(counter, key=counter.get, reverse=True) or [LF])[0]
|
| +
|
| +
|
| +def _get_indentword(source):
|
| + """Return indentation type."""
|
| + indent_word = ' ' # Default in case source has no indentation
|
| + try:
|
| + for t in generate_tokens(source):
|
| + if t[0] == token.INDENT:
|
| + indent_word = t[1]
|
| + break
|
| + except (SyntaxError, tokenize.TokenError):
|
| + pass
|
| + return indent_word
|
| +
|
| +
|
| +def _get_indentation(line):
|
| + """Return leading whitespace."""
|
| + if line.strip():
|
| + non_whitespace_index = len(line) - len(line.lstrip())
|
| + return line[:non_whitespace_index]
|
| + else:
|
| + return ''
|
| +
|
| +
|
| +def get_diff_text(old, new, filename):
|
| + """Return text of unified diff between old and new."""
|
| + newline = '\n'
|
| + diff = difflib.unified_diff(
|
| + old, new,
|
| + 'original/' + filename,
|
| + 'fixed/' + filename,
|
| + lineterm=newline)
|
| +
|
| + text = ''
|
| + for line in diff:
|
| + text += line
|
| +
|
| + # Work around missing newline (http://bugs.python.org/issue2142).
|
| + if text and not line.endswith(newline):
|
| + text += newline + r'\ No newline at end of file' + newline
|
| +
|
| + return text
|
| +
|
| +
|
| +def _priority_key(pep8_result):
|
| + """Key for sorting PEP8 results.
|
| +
|
| + Global fixes should be done first. This is important for things like
|
| + indentation.
|
| +
|
| + """
|
| + priority = [
|
| + # Fix multiline colon-based before semicolon based.
|
| + 'e701',
|
| + # Break multiline statements early.
|
| + 'e702',
|
| + # Things that make lines longer.
|
| + 'e225', 'e231',
|
| + # Remove extraneous whitespace before breaking lines.
|
| + 'e201',
|
| + # Shorten whitespace in comment before resorting to wrapping.
|
| + 'e262'
|
| + ]
|
| + middle_index = 10000
|
| + lowest_priority = [
|
| + # We need to shorten lines last since the logical fixer can get in a
|
| + # loop, which causes us to exit early.
|
| + 'e501'
|
| + ]
|
| + key = pep8_result['id'].lower()
|
| + try:
|
| + return priority.index(key)
|
| + except ValueError:
|
| + try:
|
| + return middle_index + lowest_priority.index(key) + 1
|
| + except ValueError:
|
| + return middle_index
|
| +
|
| +
|
| +def shorten_line(tokens, source, indentation, indent_word, max_line_length,
|
| + aggressive=False, experimental=False, previous_line=''):
|
| + """Separate line at OPERATOR.
|
| +
|
| + Multiple candidates will be yielded.
|
| +
|
| + """
|
| + for candidate in _shorten_line(tokens=tokens,
|
| + source=source,
|
| + indentation=indentation,
|
| + indent_word=indent_word,
|
| + aggressive=aggressive,
|
| + previous_line=previous_line):
|
| + yield candidate
|
| +
|
| + if aggressive:
|
| + for key_token_strings in SHORTEN_OPERATOR_GROUPS:
|
| + shortened = _shorten_line_at_tokens(
|
| + tokens=tokens,
|
| + source=source,
|
| + indentation=indentation,
|
| + indent_word=indent_word,
|
| + key_token_strings=key_token_strings,
|
| + aggressive=aggressive)
|
| +
|
| + if shortened is not None and shortened != source:
|
| + yield shortened
|
| +
|
| + if experimental:
|
| + for shortened in _shorten_line_at_tokens_new(
|
| + tokens=tokens,
|
| + source=source,
|
| + indentation=indentation,
|
| + max_line_length=max_line_length):
|
| +
|
| + yield shortened
|
| +
|
| +
|
| +def _shorten_line(tokens, source, indentation, indent_word,
|
| + aggressive=False, previous_line=''):
|
| + """Separate line at OPERATOR.
|
| +
|
| + The input is expected to be free of newlines except for inside multiline
|
| + strings and at the end.
|
| +
|
| + Multiple candidates will be yielded.
|
| +
|
| + """
|
| + for (token_type,
|
| + token_string,
|
| + start_offset,
|
| + end_offset) in token_offsets(tokens):
|
| +
|
| + if (
|
| + token_type == tokenize.COMMENT and
|
| + not is_probably_part_of_multiline(previous_line) and
|
| + not is_probably_part_of_multiline(source) and
|
| + not source[start_offset + 1:].strip().lower().startswith(
|
| + ('noqa', 'pragma:', 'pylint:'))
|
| + ):
|
| + # Move inline comments to previous line.
|
| + first = source[:start_offset]
|
| + second = source[start_offset:]
|
| + yield (indentation + second.strip() + '\n' +
|
| + indentation + first.strip() + '\n')
|
| + elif token_type == token.OP and token_string != '=':
|
| + # Don't break on '=' after keyword as this violates PEP 8.
|
| +
|
| + assert token_type != token.INDENT
|
| +
|
| + first = source[:end_offset]
|
| +
|
| + second_indent = indentation
|
| + if first.rstrip().endswith('('):
|
| + second_indent += indent_word
|
| + elif '(' in first:
|
| + second_indent += ' ' * (1 + first.find('('))
|
| + else:
|
| + second_indent += indent_word
|
| +
|
| + second = (second_indent + source[end_offset:].lstrip())
|
| + if (
|
| + not second.strip() or
|
| + second.lstrip().startswith('#')
|
| + ):
|
| + continue
|
| +
|
| + # Do not begin a line with a comma
|
| + if second.lstrip().startswith(','):
|
| + continue
|
| + # Do end a line with a dot
|
| + if first.rstrip().endswith('.'):
|
| + continue
|
| + if token_string in '+-*/':
|
| + fixed = first + ' \\' + '\n' + second
|
| + else:
|
| + fixed = first + '\n' + second
|
| +
|
| + # Only fix if syntax is okay.
|
| + if check_syntax(normalize_multiline(fixed)
|
| + if aggressive else fixed):
|
| + yield indentation + fixed
|
| +
|
| +
|
| +# A convenient way to handle tokens.
|
| +Token = collections.namedtuple('Token', ['token_type', 'token_string',
|
| + 'spos', 'epos', 'line'])
|
| +
|
| +
|
| +class ReformattedLines(object):
|
| +
|
| + """The reflowed lines of atoms.
|
| +
|
| + Each part of the line is represented as an "atom." They can be moved
|
| + around when need be to get the optimal formatting.
|
| +
|
| + """
|
| +
|
| + ###########################################################################
|
| + # Private Classes
|
| +
|
| + class _Indent(object):
|
| +
|
| + """Represent an indentation in the atom stream."""
|
| +
|
| + def __init__(self, indent_amt):
|
| + self._indent_amt = indent_amt
|
| +
|
| + def emit(self):
|
| + return ' ' * self._indent_amt
|
| +
|
| + @property
|
| + def size(self):
|
| + return self._indent_amt
|
| +
|
| + class _Space(object):
|
| +
|
| + """Represent a space in the atom stream."""
|
| +
|
| + def emit(self):
|
| + return ' '
|
| +
|
| + @property
|
| + def size(self):
|
| + return 1
|
| +
|
| + class _LineBreak(object):
|
| +
|
| + """Represent a line break in the atom stream."""
|
| +
|
| + def emit(self):
|
| + return '\n'
|
| +
|
| + @property
|
| + def size(self):
|
| + return 0
|
| +
|
| + def __init__(self, max_line_length):
|
| + self._max_line_length = max_line_length
|
| + self._lines = []
|
| + self._bracket_depth = 0
|
| + self._prev_item = None
|
| + self._prev_prev_item = None
|
| +
|
| + def __repr__(self):
|
| + return self.emit()
|
| +
|
| + ###########################################################################
|
| + # Public Methods
|
| +
|
| + def add(self, obj, indent_amt, break_after_open_bracket):
|
| + if isinstance(obj, Atom):
|
| + self._add_item(obj, indent_amt)
|
| + return
|
| +
|
| + self._add_container(obj, indent_amt, break_after_open_bracket)
|
| +
|
| + def add_comment(self, item):
|
| + num_spaces = 2
|
| + if len(self._lines) > 1:
|
| + if isinstance(self._lines[-1], self._Space):
|
| + num_spaces -= 1
|
| + if len(self._lines) > 2:
|
| + if isinstance(self._lines[-2], self._Space):
|
| + num_spaces -= 1
|
| +
|
| + while num_spaces > 0:
|
| + self._lines.append(self._Space())
|
| + num_spaces -= 1
|
| + self._lines.append(item)
|
| +
|
| + def add_indent(self, indent_amt):
|
| + self._lines.append(self._Indent(indent_amt))
|
| +
|
| + def add_line_break(self, indent):
|
| + self._lines.append(self._LineBreak())
|
| + self.add_indent(len(indent))
|
| +
|
| + def add_line_break_at(self, index, indent_amt):
|
| + self._lines.insert(index, self._LineBreak())
|
| + self._lines.insert(index + 1, self._Indent(indent_amt))
|
| +
|
| + def add_space_if_needed(self, curr_text, equal=False):
|
| + if (
|
| + not self._lines or isinstance(
|
| + self._lines[-1], (self._LineBreak, self._Indent, self._Space))
|
| + ):
|
| + return
|
| +
|
| + prev_text = unicode(self._prev_item)
|
| + prev_prev_text = (
|
| + unicode(self._prev_prev_item) if self._prev_prev_item else '')
|
| +
|
| + if (
|
| + # The previous item was a keyword or identifier and the current
|
| + # item isn't an operator that doesn't require a space.
|
| + ((self._prev_item.is_keyword or self._prev_item.is_string or
|
| + self._prev_item.is_name or self._prev_item.is_number) and
|
| + (curr_text[0] not in '([{.,:}])' or
|
| + (curr_text[0] == '=' and equal))) or
|
| +
|
| + # Don't place spaces around a '.', unless it's in an 'import'
|
| + # statement.
|
| + ((prev_prev_text != 'from' and prev_text[-1] != '.' and
|
| + curr_text != 'import') and
|
| +
|
| + # Don't place a space before a colon.
|
| + curr_text[0] != ':' and
|
| +
|
| + # Don't split up ending brackets by spaces.
|
| + ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or
|
| +
|
| + # Put a space after a colon or comma.
|
| + prev_text[-1] in ':,' or
|
| +
|
| + # Put space around '=' if asked to.
|
| + (equal and prev_text == '=') or
|
| +
|
| + # Put spaces around non-unary arithmetic operators.
|
| + ((self._prev_prev_item and
|
| + (prev_text not in '+-' and
|
| + (self._prev_prev_item.is_name or
|
| + self._prev_prev_item.is_number or
|
| + self._prev_prev_item.is_string)) and
|
| + prev_text in ('+', '-', '%', '*', '/', '//', '**')))))
|
| + ):
|
| + self._lines.append(self._Space())
|
| +
|
| + def previous_item(self):
|
| + """Return the previous non-whitespace item."""
|
| + return self._prev_item
|
| +
|
| + def fits_on_current_line(self, item_extent):
|
| + return self.current_size() + item_extent <= self._max_line_length
|
| +
|
| + def current_size(self):
|
| + """The size of the current line minus the indentation."""
|
| + size = 0
|
| + for item in reversed(self._lines):
|
| + size += item.size
|
| + if isinstance(item, self._LineBreak):
|
| + break
|
| +
|
| + return size
|
| +
|
| + def line_empty(self):
|
| + return (self._lines and
|
| + isinstance(self._lines[-1],
|
| + (self._LineBreak, self._Indent)))
|
| +
|
| + def emit(self):
|
| + string = ''
|
| + for item in self._lines:
|
| + if isinstance(item, self._LineBreak):
|
| + string = string.rstrip()
|
| + string += item.emit()
|
| +
|
| + return string.rstrip() + '\n'
|
| +
|
| + ###########################################################################
|
| + # Private Methods
|
| +
|
| + def _add_item(self, item, indent_amt):
|
| + """Add an item to the line.
|
| +
|
| + Reflow the line to get the best formatting after the item is
|
| + inserted. The bracket depth indicates if the item is being
|
| + inserted inside of a container or not.
|
| +
|
| + """
|
| + if self._prev_item and self._prev_item.is_string and item.is_string:
|
| + # Place consecutive string literals on separate lines.
|
| + self._lines.append(self._LineBreak())
|
| + self._lines.append(self._Indent(indent_amt))
|
| +
|
| + item_text = unicode(item)
|
| + if self._lines and self._bracket_depth:
|
| + # Adding the item into a container.
|
| + self._prevent_default_initializer_splitting(item, indent_amt)
|
| +
|
| + if item_text in '.,)]}':
|
| + self._split_after_delimiter(item, indent_amt)
|
| +
|
| + elif self._lines and not self.line_empty():
|
| + # Adding the item outside of a container.
|
| + if self.fits_on_current_line(len(item_text)):
|
| + self._enforce_space(item)
|
| +
|
| + else:
|
| + # Line break for the new item.
|
| + self._lines.append(self._LineBreak())
|
| + self._lines.append(self._Indent(indent_amt))
|
| +
|
| + self._lines.append(item)
|
| + self._prev_item, self._prev_prev_item = item, self._prev_item
|
| +
|
| + if item_text in '([{':
|
| + self._bracket_depth += 1
|
| +
|
| + elif item_text in '}])':
|
| + self._bracket_depth -= 1
|
| + assert self._bracket_depth >= 0
|
| +
|
| + def _add_container(self, container, indent_amt, break_after_open_bracket):
|
| + actual_indent = indent_amt + 1
|
| +
|
| + if (
|
| + unicode(self._prev_item) != '=' and
|
| + not self.line_empty() and
|
| + not self.fits_on_current_line(
|
| + container.size + self._bracket_depth + 2)
|
| + ):
|
| +
|
| + if unicode(container)[0] == '(' and self._prev_item.is_name:
|
| + # Don't split before the opening bracket of a call.
|
| + break_after_open_bracket = True
|
| + actual_indent = indent_amt + 4
|
| + elif (
|
| + break_after_open_bracket or
|
| + unicode(self._prev_item) not in '([{'
|
| + ):
|
| + # If the container doesn't fit on the current line and the
|
| + # current line isn't empty, place the container on the next
|
| + # line.
|
| + self._lines.append(self._LineBreak())
|
| + self._lines.append(self._Indent(indent_amt))
|
| + break_after_open_bracket = False
|
| + else:
|
| + actual_indent = self.current_size() + 1
|
| + break_after_open_bracket = False
|
| +
|
| + if isinstance(container, (ListComprehension, IfExpression)):
|
| + actual_indent = indent_amt
|
| +
|
| + # Increase the continued indentation only if recursing on a
|
| + # container.
|
| + container.reflow(self, ' ' * actual_indent,
|
| + break_after_open_bracket=break_after_open_bracket)
|
| +
|
| + def _prevent_default_initializer_splitting(self, item, indent_amt):
|
| + """Prevent splitting between a default initializer.
|
| +
|
| + When there is a default initializer, it's best to keep it all on
|
| + the same line. It's nicer and more readable, even if it goes
|
| + over the maximum allowable line length. This goes back along the
|
| + current line to determine if we have a default initializer, and,
|
| + if so, to remove extraneous whitespaces and add a line
|
| + break/indent before it if needed.
|
| +
|
| + """
|
| + if unicode(item) == '=':
|
| + # This is the assignment in the initializer. Just remove spaces for
|
| + # now.
|
| + self._delete_whitespace()
|
| + return
|
| +
|
| + if (not self._prev_item or not self._prev_prev_item or
|
| + unicode(self._prev_item) != '='):
|
| + return
|
| +
|
| + self._delete_whitespace()
|
| + prev_prev_index = self._lines.index(self._prev_prev_item)
|
| +
|
| + if (
|
| + isinstance(self._lines[prev_prev_index - 1], self._Indent) or
|
| + self.fits_on_current_line(item.size + 1)
|
| + ):
|
| + # The default initializer is already the only item on this line.
|
| + # Don't insert a newline here.
|
| + return
|
| +
|
| + # Replace the space with a newline/indent combo.
|
| + if isinstance(self._lines[prev_prev_index - 1], self._Space):
|
| + del self._lines[prev_prev_index - 1]
|
| +
|
| + self.add_line_break_at(self._lines.index(self._prev_prev_item),
|
| + indent_amt)
|
| +
|
| + def _split_after_delimiter(self, item, indent_amt):
|
| + """Split the line only after a delimiter."""
|
| + self._delete_whitespace()
|
| +
|
| + if self.fits_on_current_line(item.size):
|
| + return
|
| +
|
| + last_space = None
|
| + for item in reversed(self._lines):
|
| + if (
|
| + last_space and
|
| + (not isinstance(item, Atom) or not item.is_colon)
|
| + ):
|
| + break
|
| + else:
|
| + last_space = None
|
| + if isinstance(item, self._Space):
|
| + last_space = item
|
| + if isinstance(item, (self._LineBreak, self._Indent)):
|
| + return
|
| +
|
| + if not last_space:
|
| + return
|
| +
|
| + self.add_line_break_at(self._lines.index(last_space), indent_amt)
|
| +
|
| + def _enforce_space(self, item):
|
| + """Enforce a space in certain situations.
|
| +
|
| + There are cases where we will want a space where normally we
|
| + wouldn't put one. This just enforces the addition of a space.
|
| +
|
| + """
|
| + if isinstance(self._lines[-1],
|
| + (self._Space, self._LineBreak, self._Indent)):
|
| + return
|
| +
|
| + if not self._prev_item:
|
| + return
|
| +
|
| + item_text = unicode(item)
|
| + prev_text = unicode(self._prev_item)
|
| +
|
| + # Prefer a space around a '.' in an import statement, and between the
|
| + # 'import' and '('.
|
| + if (
|
| + (item_text == '.' and prev_text == 'from') or
|
| + (item_text == 'import' and prev_text == '.') or
|
| + (item_text == '(' and prev_text == 'import')
|
| + ):
|
| + self._lines.append(self._Space())
|
| +
|
| + def _delete_whitespace(self):
|
| + """Delete all whitespace from the end of the line."""
|
| + while isinstance(self._lines[-1], (self._Space, self._LineBreak,
|
| + self._Indent)):
|
| + del self._lines[-1]
|
| +
|
| +
|
| +class Atom(object):
|
| +
|
| + """The smallest unbreakable unit that can be reflowed."""
|
| +
|
| + def __init__(self, atom):
|
| + self._atom = atom
|
| +
|
| + def __repr__(self):
|
| + return self._atom.token_string
|
| +
|
| + def __len__(self):
|
| + return self.size
|
| +
|
| + def reflow(
|
| + self, reflowed_lines, continued_indent, extent,
|
| + break_after_open_bracket=False,
|
| + is_list_comp_or_if_expr=False,
|
| + next_is_dot=False
|
| + ):
|
| + if self._atom.token_type == tokenize.COMMENT:
|
| + reflowed_lines.add_comment(self)
|
| + return
|
| +
|
| + total_size = extent if extent else self.size
|
| +
|
| + if self._atom.token_string not in ',:([{}])':
|
| + # Some atoms will need an extra 1-sized space token after them.
|
| + total_size += 1
|
| +
|
| + prev_item = reflowed_lines.previous_item()
|
| + if (
|
| + not is_list_comp_or_if_expr and
|
| + not reflowed_lines.fits_on_current_line(total_size) and
|
| + not (next_is_dot and
|
| + reflowed_lines.fits_on_current_line(self.size + 1)) and
|
| + not reflowed_lines.line_empty() and
|
| + not self.is_colon and
|
| + not (prev_item and prev_item.is_name and
|
| + unicode(self) == '(')
|
| + ):
|
| + # Start a new line if there is already something on the line and
|
| + # adding this atom would make it go over the max line length.
|
| + reflowed_lines.add_line_break(continued_indent)
|
| + else:
|
| + reflowed_lines.add_space_if_needed(unicode(self))
|
| +
|
| + reflowed_lines.add(self, len(continued_indent),
|
| + break_after_open_bracket)
|
| +
|
| + def emit(self):
|
| + return self.__repr__()
|
| +
|
| + @property
|
| + def is_keyword(self):
|
| + return keyword.iskeyword(self._atom.token_string)
|
| +
|
| + @property
|
| + def is_string(self):
|
| + return self._atom.token_type == tokenize.STRING
|
| +
|
| + @property
|
| + def is_name(self):
|
| + return self._atom.token_type == tokenize.NAME
|
| +
|
| + @property
|
| + def is_number(self):
|
| + return self._atom.token_type == tokenize.NUMBER
|
| +
|
| + @property
|
| + def is_comma(self):
|
| + return self._atom.token_string == ','
|
| +
|
| + @property
|
| + def is_colon(self):
|
| + return self._atom.token_string == ':'
|
| +
|
| + @property
|
| + def size(self):
|
| + return len(self._atom.token_string)
|
| +
|
| +
|
| +class Container(object):
|
| +
|
| + """Base class for all container types."""
|
| +
|
| + def __init__(self, items):
|
| + self._items = items
|
| +
|
| + def __repr__(self):
|
| + string = ''
|
| + last_was_keyword = False
|
| +
|
| + for item in self._items:
|
| + if item.is_comma:
|
| + string += ', '
|
| + elif item.is_colon:
|
| + string += ': '
|
| + else:
|
| + item_string = unicode(item)
|
| + if (
|
| + string and
|
| + (last_was_keyword or
|
| + (not string.endswith(tuple('([{,.:}]) ')) and
|
| + not item_string.startswith(tuple('([{,.:}])'))))
|
| + ):
|
| + string += ' '
|
| + string += item_string
|
| +
|
| + last_was_keyword = item.is_keyword
|
| + return string
|
| +
|
| + def __iter__(self):
|
| + for element in self._items:
|
| + yield element
|
| +
|
| + def __getitem__(self, idx):
|
| + return self._items[idx]
|
| +
|
| + def reflow(self, reflowed_lines, continued_indent,
|
| + break_after_open_bracket=False):
|
| + last_was_container = False
|
| + for (index, item) in enumerate(self._items):
|
| + next_item = get_item(self._items, index + 1)
|
| +
|
| + if isinstance(item, Atom):
|
| + is_list_comp_or_if_expr = (
|
| + isinstance(self, (ListComprehension, IfExpression)))
|
| + item.reflow(reflowed_lines, continued_indent,
|
| + self._get_extent(index),
|
| + is_list_comp_or_if_expr=is_list_comp_or_if_expr,
|
| + next_is_dot=(next_item and
|
| + unicode(next_item) == '.'))
|
| + if last_was_container and item.is_comma:
|
| + reflowed_lines.add_line_break(continued_indent)
|
| + last_was_container = False
|
| + else: # isinstance(item, Container)
|
| + reflowed_lines.add(item, len(continued_indent),
|
| + break_after_open_bracket)
|
| + last_was_container = not isinstance(item, (ListComprehension,
|
| + IfExpression))
|
| +
|
| + if (
|
| + break_after_open_bracket and index == 0 and
|
| + # Prefer to keep empty containers together instead of
|
| + # separating them.
|
| + unicode(item) == self.open_bracket and
|
| + (not next_item or unicode(next_item) != self.close_bracket) and
|
| + (len(self._items) != 3 or not isinstance(next_item, Atom))
|
| + ):
|
| + reflowed_lines.add_line_break(continued_indent)
|
| + break_after_open_bracket = False
|
| + else:
|
| + next_next_item = get_item(self._items, index + 2)
|
| + if (
|
| + unicode(item) not in ['.', '%', 'in'] and
|
| + next_item and not isinstance(next_item, Container) and
|
| + unicode(next_item) != ':' and
|
| + next_next_item and (not isinstance(next_next_item, Atom) or
|
| + unicode(next_item) == 'not') and
|
| + not reflowed_lines.line_empty() and
|
| + not reflowed_lines.fits_on_current_line(
|
| + self._get_extent(index + 1) + 2)
|
| + ):
|
| + reflowed_lines.add_line_break(continued_indent)
|
| +
|
| + def _get_extent(self, index):
|
| + """The extent of the full element.
|
| +
|
| + E.g., the length of a function call or keyword.
|
| +
|
| + """
|
| + extent = 0
|
| + prev_item = get_item(self._items, index - 1)
|
| + seen_dot = prev_item and unicode(prev_item) == '.'
|
| + while index < len(self._items):
|
| + item = get_item(self._items, index)
|
| + index += 1
|
| +
|
| + if isinstance(item, (ListComprehension, IfExpression)):
|
| + break
|
| +
|
| + if isinstance(item, Container):
|
| + if prev_item and prev_item.is_name:
|
| + if seen_dot:
|
| + extent += 1
|
| + else:
|
| + extent += item.size
|
| +
|
| + prev_item = item
|
| + continue
|
| + elif (unicode(item) not in ['.', '=', ':', 'not'] and
|
| + not item.is_name and not item.is_string):
|
| + break
|
| +
|
| + if unicode(item) == '.':
|
| + seen_dot = True
|
| +
|
| + extent += item.size
|
| + prev_item = item
|
| +
|
| + return extent
|
| +
|
| + @property
|
| + def is_string(self):
|
| + return False
|
| +
|
| + @property
|
| + def size(self):
|
| + return len(self.__repr__())
|
| +
|
| + @property
|
| + def is_keyword(self):
|
| + return False
|
| +
|
| + @property
|
| + def is_name(self):
|
| + return False
|
| +
|
| + @property
|
| + def is_comma(self):
|
| + return False
|
| +
|
| + @property
|
| + def is_colon(self):
|
| + return False
|
| +
|
| + @property
|
| + def open_bracket(self):
|
| + return None
|
| +
|
| + @property
|
| + def close_bracket(self):
|
| + return None
|
| +
|
| +
|
| +class Tuple(Container):
|
| +
|
| + """A high-level representation of a tuple."""
|
| +
|
| + @property
|
| + def open_bracket(self):
|
| + return '('
|
| +
|
| + @property
|
| + def close_bracket(self):
|
| + return ')'
|
| +
|
| +
|
| +class List(Container):
|
| +
|
| + """A high-level representation of a list."""
|
| +
|
| + @property
|
| + def open_bracket(self):
|
| + return '['
|
| +
|
| + @property
|
| + def close_bracket(self):
|
| + return ']'
|
| +
|
| +
|
| +class DictOrSet(Container):
|
| +
|
| + """A high-level representation of a dictionary or set."""
|
| +
|
| + @property
|
| + def open_bracket(self):
|
| + return '{'
|
| +
|
| + @property
|
| + def close_bracket(self):
|
| + return '}'
|
| +
|
| +
|
| +class ListComprehension(Container):
|
| +
|
| + """A high-level representation of a list comprehension."""
|
| +
|
| + @property
|
| + def size(self):
|
| + length = 0
|
| + for item in self._items:
|
| + if isinstance(item, IfExpression):
|
| + break
|
| + length += item.size
|
| + return length
|
| +
|
| +
|
| +class IfExpression(Container):
|
| +
|
| + """A high-level representation of an if-expression."""
|
| +
|
| +
|
| +def _parse_container(tokens, index, for_or_if=None):
|
| + """Parse a high-level container, such as a list, tuple, etc."""
|
| +
|
| + # Store the opening bracket.
|
| + items = [Atom(Token(*tokens[index]))]
|
| + index += 1
|
| +
|
| + num_tokens = len(tokens)
|
| + while index < num_tokens:
|
| + tok = Token(*tokens[index])
|
| +
|
| + if tok.token_string in ',)]}':
|
| + # First check if we're at the end of a list comprehension or
|
| + # if-expression. Don't add the ending token as part of the list
|
| + # comprehension or if-expression, because they aren't part of those
|
| + # constructs.
|
| + if for_or_if == 'for':
|
| + return (ListComprehension(items), index - 1)
|
| +
|
| + elif for_or_if == 'if':
|
| + return (IfExpression(items), index - 1)
|
| +
|
| + # We've reached the end of a container.
|
| + items.append(Atom(tok))
|
| +
|
| + # If not, then we are at the end of a container.
|
| + if tok.token_string == ')':
|
| + # The end of a tuple.
|
| + return (Tuple(items), index)
|
| +
|
| + elif tok.token_string == ']':
|
| + # The end of a list.
|
| + return (List(items), index)
|
| +
|
| + elif tok.token_string == '}':
|
| + # The end of a dictionary or set.
|
| + return (DictOrSet(items), index)
|
| +
|
| + elif tok.token_string in '([{':
|
| + # A sub-container is being defined.
|
| + (container, index) = _parse_container(tokens, index)
|
| + items.append(container)
|
| +
|
| + elif tok.token_string == 'for':
|
| + (container, index) = _parse_container(tokens, index, 'for')
|
| + items.append(container)
|
| +
|
| + elif tok.token_string == 'if':
|
| + (container, index) = _parse_container(tokens, index, 'if')
|
| + items.append(container)
|
| +
|
| + else:
|
| + items.append(Atom(tok))
|
| +
|
| + index += 1
|
| +
|
| + return (None, None)
|
| +
|
| +
|
| +def _parse_tokens(tokens):
|
| + """Parse the tokens.
|
| +
|
| + This converts the tokens into a form where we can manipulate them
|
| + more easily.
|
| +
|
| + """
|
| +
|
| + index = 0
|
| + parsed_tokens = []
|
| +
|
| + num_tokens = len(tokens)
|
| + while index < num_tokens:
|
| + tok = Token(*tokens[index])
|
| +
|
| + assert tok.token_type != token.INDENT
|
| + if tok.token_type == tokenize.NEWLINE:
|
| + # There's only one newline and it's at the end.
|
| + break
|
| +
|
| + if tok.token_string in '([{':
|
| + (container, index) = _parse_container(tokens, index)
|
| + if not container:
|
| + return None
|
| + parsed_tokens.append(container)
|
| + else:
|
| + parsed_tokens.append(Atom(tok))
|
| +
|
| + index += 1
|
| +
|
| + return parsed_tokens
|
| +
|
| +
|
| +def _reflow_lines(parsed_tokens, indentation, max_line_length,
|
| + start_on_prefix_line):
|
| + """Reflow the lines so that it looks nice."""
|
| +
|
| + if unicode(parsed_tokens[0]) == 'def':
|
| + # A function definition gets indented a bit more.
|
| + continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE
|
| + else:
|
| + continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE
|
| +
|
| + break_after_open_bracket = not start_on_prefix_line
|
| +
|
| + lines = ReformattedLines(max_line_length)
|
| + lines.add_indent(len(indentation.lstrip('\r\n')))
|
| +
|
| + if not start_on_prefix_line:
|
| + # If splitting after the opening bracket will cause the first element
|
| + # to be aligned weirdly, don't try it.
|
| + first_token = get_item(parsed_tokens, 0)
|
| + second_token = get_item(parsed_tokens, 1)
|
| +
|
| + if (
|
| + first_token and second_token and
|
| + unicode(second_token)[0] == '(' and
|
| + len(indentation) + len(first_token) + 1 == len(continued_indent)
|
| + ):
|
| + return None
|
| +
|
| + for item in parsed_tokens:
|
| + lines.add_space_if_needed(unicode(item), equal=True)
|
| +
|
| + save_continued_indent = continued_indent
|
| + if start_on_prefix_line and isinstance(item, Container):
|
| + start_on_prefix_line = False
|
| + continued_indent = ' ' * (lines.current_size() + 1)
|
| +
|
| + item.reflow(lines, continued_indent, break_after_open_bracket)
|
| + continued_indent = save_continued_indent
|
| +
|
| + return lines.emit()
|
| +
|
| +
|
| +def _shorten_line_at_tokens_new(tokens, source, indentation,
|
| + max_line_length):
|
| + """Shorten the line taking its length into account.
|
| +
|
| + The input is expected to be free of newlines except for inside
|
| + multiline strings and at the end.
|
| +
|
| + """
|
| + # Yield the original source so to see if it's a better choice than the
|
| + # shortened candidate lines we generate here.
|
| + yield indentation + source
|
| +
|
| + parsed_tokens = _parse_tokens(tokens)
|
| +
|
| + if parsed_tokens:
|
| + # Perform two reflows. The first one starts on the same line as the
|
| + # prefix. The second starts on the line after the prefix.
|
| + fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
|
| + start_on_prefix_line=True)
|
| + if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
|
| + yield fixed
|
| +
|
| + fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
|
| + start_on_prefix_line=False)
|
| + if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
|
| + yield fixed
|
| +
|
| +
|
| +def _shorten_line_at_tokens(tokens, source, indentation, indent_word,
|
| + key_token_strings, aggressive):
|
| + """Separate line by breaking at tokens in key_token_strings.
|
| +
|
| + The input is expected to be free of newlines except for inside
|
| + multiline strings and at the end.
|
| +
|
| + """
|
| + offsets = []
|
| + for (index, _t) in enumerate(token_offsets(tokens)):
|
| + (token_type,
|
| + token_string,
|
| + start_offset,
|
| + end_offset) = _t
|
| +
|
| + assert token_type != token.INDENT
|
| +
|
| + if token_string in key_token_strings:
|
| + # Do not break in containers with zero or one items.
|
| + unwanted_next_token = {
|
| + '(': ')',
|
| + '[': ']',
|
| + '{': '}'}.get(token_string)
|
| + if unwanted_next_token:
|
| + if (
|
| + get_item(tokens,
|
| + index + 1,
|
| + default=[None, None])[1] == unwanted_next_token or
|
| + get_item(tokens,
|
| + index + 2,
|
| + default=[None, None])[1] == unwanted_next_token
|
| + ):
|
| + continue
|
| +
|
| + if (
|
| + index > 2 and token_string == '(' and
|
| + tokens[index - 1][1] in ',(%['
|
| + ):
|
| + # Don't split after a tuple start, or before a tuple start if
|
| + # the tuple is in a list.
|
| + continue
|
| +
|
| + if end_offset < len(source) - 1:
|
| + # Don't split right before newline.
|
| + offsets.append(end_offset)
|
| + else:
|
| + # Break at adjacent strings. These were probably meant to be on
|
| + # separate lines in the first place.
|
| + previous_token = get_item(tokens, index - 1)
|
| + if (
|
| + token_type == tokenize.STRING and
|
| + previous_token and previous_token[0] == tokenize.STRING
|
| + ):
|
| + offsets.append(start_offset)
|
| +
|
| + current_indent = None
|
| + fixed = None
|
| + for line in split_at_offsets(source, offsets):
|
| + if fixed:
|
| + fixed += '\n' + current_indent + line
|
| +
|
| + for symbol in '([{':
|
| + if line.endswith(symbol):
|
| + current_indent += indent_word
|
| + else:
|
| + # First line.
|
| + fixed = line
|
| + assert not current_indent
|
| + current_indent = indent_word
|
| +
|
| + assert fixed is not None
|
| +
|
| + if check_syntax(normalize_multiline(fixed)
|
| + if aggressive > 1 else fixed):
|
| + return indentation + fixed
|
| + else:
|
| + return None
|
| +
|
| +
|
| +def token_offsets(tokens):
|
| + """Yield tokens and offsets."""
|
| + end_offset = 0
|
| + previous_end_row = 0
|
| + previous_end_column = 0
|
| + for t in tokens:
|
| + token_type = t[0]
|
| + token_string = t[1]
|
| + (start_row, start_column) = t[2]
|
| + (end_row, end_column) = t[3]
|
| +
|
| + # Account for the whitespace between tokens.
|
| + end_offset += start_column
|
| + if previous_end_row == start_row:
|
| + end_offset -= previous_end_column
|
| +
|
| + # Record the start offset of the token.
|
| + start_offset = end_offset
|
| +
|
| + # Account for the length of the token itself.
|
| + end_offset += len(token_string)
|
| +
|
| + yield (token_type,
|
| + token_string,
|
| + start_offset,
|
| + end_offset)
|
| +
|
| + previous_end_row = end_row
|
| + previous_end_column = end_column
|
| +
|
| +
|
| +def normalize_multiline(line):
|
| + """Normalize multiline-related code that will cause syntax error.
|
| +
|
| + This is for purposes of checking syntax.
|
| +
|
| + """
|
| + if line.startswith('def ') and line.rstrip().endswith(':'):
|
| + return line + ' pass'
|
| + elif line.startswith('return '):
|
| + return 'def _(): ' + line
|
| + elif line.startswith('@'):
|
| + return line + 'def _(): pass'
|
| + elif line.startswith('class '):
|
| + return line + ' pass'
|
| + elif line.startswith('if '):
|
| + return line + ' pass'
|
| + else:
|
| + return line
|
| +
|
| +
|
| +def fix_whitespace(line, offset, replacement):
|
| + """Replace whitespace at offset and return fixed line."""
|
| + # Replace escaped newlines too
|
| + left = line[:offset].rstrip('\n\r \t\\')
|
| + right = line[offset:].lstrip('\n\r \t\\')
|
| + if right.startswith('#'):
|
| + return line
|
| + else:
|
| + return left + replacement + right
|
| +
|
| +
|
| +def _execute_pep8(pep8_options, source):
|
| + """Execute pep8 via python method calls."""
|
| + class QuietReport(pep8.BaseReport):
|
| +
|
| + """Version of checker that does not print."""
|
| +
|
| + def __init__(self, options):
|
| + super(QuietReport, self).__init__(options)
|
| + self.__full_error_results = []
|
| +
|
| + def error(self, line_number, offset, text, _):
|
| + """Collect errors."""
|
| + code = super(QuietReport, self).error(line_number, offset, text, _)
|
| + if code:
|
| + self.__full_error_results.append(
|
| + {'id': code,
|
| + 'line': line_number,
|
| + 'column': offset + 1,
|
| + 'info': text})
|
| +
|
| + def full_error_results(self):
|
| + """Return error results in detail.
|
| +
|
| + Results are in the form of a list of dictionaries. Each
|
| + dictionary contains 'id', 'line', 'column', and 'info'.
|
| +
|
| + """
|
| + return self.__full_error_results
|
| +
|
| + checker = pep8.Checker('', lines=source,
|
| + reporter=QuietReport, **pep8_options)
|
| + checker.check_all()
|
| + return checker.report.full_error_results()
|
| +
|
| +
|
| +def _remove_leading_and_normalize(line):
|
| + return line.lstrip().rstrip(CR + LF) + '\n'
|
| +
|
| +
|
| +class Reindenter(object):
|
| +
|
| + """Reindents badly-indented code to uniformly use four-space indentation.
|
| +
|
| + Released to the public domain, by Tim Peters, 03 October 2000.
|
| +
|
| + """
|
| +
|
| + def __init__(self, input_text):
|
| + sio = io.StringIO(input_text)
|
| + source_lines = sio.readlines()
|
| +
|
| + self.string_content_line_numbers = multiline_string_lines(input_text)
|
| +
|
| + # File lines, rstripped & tab-expanded. Dummy at start is so
|
| + # that we can use tokenize's 1-based line numbering easily.
|
| + # Note that a line is all-blank iff it is a newline.
|
| + self.lines = []
|
| + for line_number, line in enumerate(source_lines, start=1):
|
| + # Do not modify if inside a multiline string.
|
| + if line_number in self.string_content_line_numbers:
|
| + self.lines.append(line)
|
| + else:
|
| + # Only expand leading tabs.
|
| + self.lines.append(_get_indentation(line).expandtabs() +
|
| + _remove_leading_and_normalize(line))
|
| +
|
| + self.lines.insert(0, None)
|
| + self.index = 1 # index into self.lines of next line
|
| + self.input_text = input_text
|
| +
|
| + def run(self, indent_size=DEFAULT_INDENT_SIZE):
|
| + """Fix indentation and return modified line numbers.
|
| +
|
| + Line numbers are indexed at 1.
|
| +
|
| + """
|
| + if indent_size < 1:
|
| + return self.input_text
|
| +
|
| + try:
|
| + stats = _reindent_stats(tokenize.generate_tokens(self.getline))
|
| + except (SyntaxError, tokenize.TokenError):
|
| + return self.input_text
|
| + # Remove trailing empty lines.
|
| + lines = self.lines
|
| + while lines and lines[-1] == '\n':
|
| + lines.pop()
|
| + # Sentinel.
|
| + stats.append((len(lines), 0))
|
| + # Map count of leading spaces to # we want.
|
| + have2want = {}
|
| + # Program after transformation.
|
| + after = []
|
| + # Copy over initial empty lines -- there's nothing to do until
|
| + # we see a line with *something* on it.
|
| + i = stats[0][0]
|
| + after.extend(lines[1:i])
|
| + for i in range(len(stats) - 1):
|
| + thisstmt, thislevel = stats[i]
|
| + nextstmt = stats[i + 1][0]
|
| + have = _leading_space_count(lines[thisstmt])
|
| + want = thislevel * indent_size
|
| + if want < 0:
|
| + # A comment line.
|
| + if have:
|
| + # An indented comment line. If we saw the same
|
| + # indentation before, reuse what it most recently
|
| + # mapped to.
|
| + want = have2want.get(have, -1)
|
| + if want < 0:
|
| + # Then it probably belongs to the next real stmt.
|
| + for j in range(i + 1, len(stats) - 1):
|
| + jline, jlevel = stats[j]
|
| + if jlevel >= 0:
|
| + if have == _leading_space_count(lines[jline]):
|
| + want = jlevel * indent_size
|
| + break
|
| + if want < 0: # Maybe it's a hanging
|
| + # comment like this one,
|
| + # in which case we should shift it like its base
|
| + # line got shifted.
|
| + for j in range(i - 1, -1, -1):
|
| + jline, jlevel = stats[j]
|
| + if jlevel >= 0:
|
| + want = (have + _leading_space_count(
|
| + after[jline - 1]) -
|
| + _leading_space_count(lines[jline]))
|
| + break
|
| + if want < 0:
|
| + # Still no luck -- leave it alone.
|
| + want = have
|
| + else:
|
| + want = 0
|
| + assert want >= 0
|
| + have2want[have] = want
|
| + diff = want - have
|
| + if diff == 0 or have == 0:
|
| + after.extend(lines[thisstmt:nextstmt])
|
| + else:
|
| + for line_number, line in enumerate(lines[thisstmt:nextstmt],
|
| + start=thisstmt):
|
| + if line_number in self.string_content_line_numbers:
|
| + after.append(line)
|
| + elif diff > 0:
|
| + if line == '\n':
|
| + after.append(line)
|
| + else:
|
| + after.append(' ' * diff + line)
|
| + else:
|
| + remove = min(_leading_space_count(line), -diff)
|
| + after.append(line[remove:])
|
| +
|
| + return ''.join(after)
|
| +
|
| + def getline(self):
|
| + """Line-getter for tokenize."""
|
| + if self.index >= len(self.lines):
|
| + line = ''
|
| + else:
|
| + line = self.lines[self.index]
|
| + self.index += 1
|
| + return line
|
| +
|
| +
|
| +def _reindent_stats(tokens):
|
| + """Return list of (lineno, indentlevel) pairs.
|
| +
|
| + One for each stmt and comment line. indentlevel is -1 for comment lines, as
|
| + a signal that tokenize doesn't know what to do about them; indeed, they're
|
| + our headache!
|
| +
|
| + """
|
| + find_stmt = 1 # Next token begins a fresh stmt?
|
| + level = 0 # Current indent level.
|
| + stats = []
|
| +
|
| + for t in tokens:
|
| + token_type = t[0]
|
| + sline = t[2][0]
|
| + line = t[4]
|
| +
|
| + if token_type == tokenize.NEWLINE:
|
| + # A program statement, or ENDMARKER, will eventually follow,
|
| + # after some (possibly empty) run of tokens of the form
|
| + # (NL | COMMENT)* (INDENT | DEDENT+)?
|
| + find_stmt = 1
|
| +
|
| + elif token_type == tokenize.INDENT:
|
| + find_stmt = 1
|
| + level += 1
|
| +
|
| + elif token_type == tokenize.DEDENT:
|
| + find_stmt = 1
|
| + level -= 1
|
| +
|
| + elif token_type == tokenize.COMMENT:
|
| + if find_stmt:
|
| + stats.append((sline, -1))
|
| + # But we're still looking for a new stmt, so leave
|
| + # find_stmt alone.
|
| +
|
| + elif token_type == tokenize.NL:
|
| + pass
|
| +
|
| + elif find_stmt:
|
| + # This is the first "real token" following a NEWLINE, so it
|
| + # must be the first token of the next program statement, or an
|
| + # ENDMARKER.
|
| + find_stmt = 0
|
| + if line: # Not endmarker.
|
| + stats.append((sline, level))
|
| +
|
| + return stats
|
| +
|
| +
|
| +def _leading_space_count(line):
|
| + """Return number of leading spaces in line."""
|
| + i = 0
|
| + while i < len(line) and line[i] == ' ':
|
| + i += 1
|
| + return i
|
| +
|
| +
|
| +def refactor_with_2to3(source_text, fixer_names):
|
| + """Use lib2to3 to refactor the source.
|
| +
|
| + Return the refactored source code.
|
| +
|
| + """
|
| + from lib2to3.refactor import RefactoringTool
|
| + fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names]
|
| + tool = RefactoringTool(fixer_names=fixers, explicit=fixers)
|
| +
|
| + from lib2to3.pgen2 import tokenize as lib2to3_tokenize
|
| + try:
|
| + return unicode(tool.refactor_string(source_text, name=''))
|
| + except lib2to3_tokenize.TokenError:
|
| + return source_text
|
| +
|
| +
|
| +def check_syntax(code):
|
| + """Return True if syntax is okay."""
|
| + try:
|
| + return compile(code, '<string>', 'exec')
|
| + except (SyntaxError, TypeError, UnicodeDecodeError):
|
| + return False
|
| +
|
| +
|
| +def filter_results(source, results, aggressive):
|
| + """Filter out spurious reports from pep8.
|
| +
|
| + If aggressive is True, we allow possibly unsafe fixes (E711, E712).
|
| +
|
| + """
|
| + non_docstring_string_line_numbers = multiline_string_lines(
|
| + source, include_docstrings=False)
|
| + all_string_line_numbers = multiline_string_lines(
|
| + source, include_docstrings=True)
|
| +
|
| + commented_out_code_line_numbers = commented_out_code_lines(source)
|
| +
|
| + for r in results:
|
| + issue_id = r['id'].lower()
|
| +
|
| + if r['line'] in non_docstring_string_line_numbers:
|
| + if issue_id.startswith(('e1', 'e501', 'w191')):
|
| + continue
|
| +
|
| + if r['line'] in all_string_line_numbers:
|
| + if issue_id in ['e501']:
|
| + continue
|
| +
|
| + # We must offset by 1 for lines that contain the trailing contents of
|
| + # multiline strings.
|
| + if not aggressive and (r['line'] + 1) in all_string_line_numbers:
|
| + # Do not modify multiline strings in non-aggressive mode. Remove
|
| + # trailing whitespace could break doctests.
|
| + if issue_id.startswith(('w29', 'w39')):
|
| + continue
|
| +
|
| + if aggressive <= 0:
|
| + if issue_id.startswith(('e711', 'w6')):
|
| + continue
|
| +
|
| + if aggressive <= 1:
|
| + if issue_id.startswith(('e712', 'e713')):
|
| + continue
|
| +
|
| + if r['line'] in commented_out_code_line_numbers:
|
| + if issue_id.startswith(('e26', 'e501')):
|
| + continue
|
| +
|
| + yield r
|
| +
|
| +
|
| +def multiline_string_lines(source, include_docstrings=False):
|
| + """Return line numbers that are within multiline strings.
|
| +
|
| + The line numbers are indexed at 1.
|
| +
|
| + Docstrings are ignored.
|
| +
|
| + """
|
| + line_numbers = set()
|
| + previous_token_type = ''
|
| + try:
|
| + for t in generate_tokens(source):
|
| + token_type = t[0]
|
| + start_row = t[2][0]
|
| + end_row = t[3][0]
|
| +
|
| + if token_type == tokenize.STRING and start_row != end_row:
|
| + if (
|
| + include_docstrings or
|
| + previous_token_type != tokenize.INDENT
|
| + ):
|
| + # We increment by one since we want the contents of the
|
| + # string.
|
| + line_numbers |= set(range(1 + start_row, 1 + end_row))
|
| +
|
| + previous_token_type = token_type
|
| + except (SyntaxError, tokenize.TokenError):
|
| + pass
|
| +
|
| + return line_numbers
|
| +
|
| +
|
| +def commented_out_code_lines(source):
|
| + """Return line numbers of comments that are likely code.
|
| +
|
| + Commented-out code is bad practice, but modifying it just adds even more
|
| + clutter.
|
| +
|
| + """
|
| + line_numbers = []
|
| + try:
|
| + for t in generate_tokens(source):
|
| + token_type = t[0]
|
| + token_string = t[1]
|
| + start_row = t[2][0]
|
| + line = t[4]
|
| +
|
| + # Ignore inline comments.
|
| + if not line.lstrip().startswith('#'):
|
| + continue
|
| +
|
| + if token_type == tokenize.COMMENT:
|
| + stripped_line = token_string.lstrip('#').strip()
|
| + if (
|
| + ' ' in stripped_line and
|
| + '#' not in stripped_line and
|
| + check_syntax(stripped_line)
|
| + ):
|
| + line_numbers.append(start_row)
|
| + except (SyntaxError, tokenize.TokenError):
|
| + pass
|
| +
|
| + return line_numbers
|
| +
|
| +
|
| +def shorten_comment(line, max_line_length, last_comment=False):
|
| + """Return trimmed or split long comment line.
|
| +
|
| + If there are no comments immediately following it, do a text wrap.
|
| + Doing this wrapping on all comments in general would lead to jagged
|
| + comment text.
|
| +
|
| + """
|
| + assert len(line) > max_line_length
|
| + line = line.rstrip()
|
| +
|
| + # PEP 8 recommends 72 characters for comment text.
|
| + indentation = _get_indentation(line) + '# '
|
| + max_line_length = min(max_line_length,
|
| + len(indentation) + 72)
|
| +
|
| + MIN_CHARACTER_REPEAT = 5
|
| + if (
|
| + len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and
|
| + not line[-1].isalnum()
|
| + ):
|
| + # Trim comments that end with things like ---------
|
| + return line[:max_line_length] + '\n'
|
| + elif last_comment and re.match(r'\s*#+\s*\w+', line):
|
| + import textwrap
|
| + split_lines = textwrap.wrap(line.lstrip(' \t#'),
|
| + initial_indent=indentation,
|
| + subsequent_indent=indentation,
|
| + width=max_line_length,
|
| + break_long_words=False,
|
| + break_on_hyphens=False)
|
| + return '\n'.join(split_lines) + '\n'
|
| + else:
|
| + return line + '\n'
|
| +
|
| +
|
| +def normalize_line_endings(lines, newline):
|
| + """Return fixed line endings.
|
| +
|
| + All lines will be modified to use the most common line ending.
|
| +
|
| + """
|
| + return [line.rstrip('\n\r') + newline for line in lines]
|
| +
|
| +
|
| +def mutual_startswith(a, b):
|
| + return b.startswith(a) or a.startswith(b)
|
| +
|
| +
|
| +def code_match(code, select, ignore):
|
| + if ignore:
|
| + assert not isinstance(ignore, unicode)
|
| + for ignored_code in [c.strip() for c in ignore]:
|
| + if mutual_startswith(code.lower(), ignored_code.lower()):
|
| + return False
|
| +
|
| + if select:
|
| + assert not isinstance(select, unicode)
|
| + for selected_code in [c.strip() for c in select]:
|
| + if mutual_startswith(code.lower(), selected_code.lower()):
|
| + return True
|
| + return False
|
| +
|
| + return True
|
| +
|
| +
|
| +def fix_code(source, options=None):
|
| + """Return fixed source code."""
|
| + if not options:
|
| + options = parse_args([''])
|
| +
|
| + if not isinstance(source, unicode):
|
| + source = source.decode(locale.getpreferredencoding())
|
| +
|
| + sio = io.StringIO(source)
|
| + return fix_lines(sio.readlines(), options=options)
|
| +
|
| +
|
| +def fix_lines(source_lines, options, filename=''):
|
| + """Return fixed source code."""
|
| + # Transform everything to line feed. Then change them back to original
|
| + # before returning fixed source code.
|
| + original_newline = find_newline(source_lines)
|
| + tmp_source = ''.join(normalize_line_endings(source_lines, '\n'))
|
| +
|
| + # Keep a history to break out of cycles.
|
| + previous_hashes = set()
|
| +
|
| + if options.line_range:
|
| + fixed_source = apply_local_fixes(tmp_source, options)
|
| + else:
|
| + # Apply global fixes only once (for efficiency).
|
| + fixed_source = apply_global_fixes(tmp_source, options)
|
| +
|
| + passes = 0
|
| + long_line_ignore_cache = set()
|
| + while hash(fixed_source) not in previous_hashes:
|
| + if options.pep8_passes >= 0 and passes > options.pep8_passes:
|
| + break
|
| + passes += 1
|
| +
|
| + previous_hashes.add(hash(fixed_source))
|
| +
|
| + tmp_source = copy.copy(fixed_source)
|
| +
|
| + fix = FixPEP8(
|
| + filename,
|
| + options,
|
| + contents=tmp_source,
|
| + long_line_ignore_cache=long_line_ignore_cache)
|
| +
|
| + fixed_source = fix.fix()
|
| +
|
| + sio = io.StringIO(fixed_source)
|
| + return ''.join(normalize_line_endings(sio.readlines(), original_newline))
|
| +
|
| +
|
| +def fix_file(filename, options=None, output=None):
|
| + if not options:
|
| + options = parse_args([filename])
|
| +
|
| + original_source = readlines_from_file(filename)
|
| +
|
| + fixed_source = original_source
|
| +
|
| + if options.in_place or output:
|
| + encoding = detect_encoding(filename)
|
| +
|
| + if output:
|
| + output = codecs.getwriter(encoding)(output.buffer
|
| + if hasattr(output, 'buffer')
|
| + else output)
|
| +
|
| + output = LineEndingWrapper(output)
|
| +
|
| + fixed_source = fix_lines(fixed_source, options, filename=filename)
|
| +
|
| + if options.diff:
|
| + new = io.StringIO(fixed_source)
|
| + new = new.readlines()
|
| + diff = get_diff_text(original_source, new, filename)
|
| + if output:
|
| + output.write(diff)
|
| + output.flush()
|
| + else:
|
| + return diff
|
| + elif options.in_place:
|
| + fp = open_with_encoding(filename, encoding=encoding,
|
| + mode='w')
|
| + fp.write(fixed_source)
|
| + fp.close()
|
| + else:
|
| + if output:
|
| + output.write(fixed_source)
|
| + output.flush()
|
| + else:
|
| + return fixed_source
|
| +
|
| +
|
| +def global_fixes():
|
| + """Yield multiple (code, function) tuples."""
|
| + for function in globals().values():
|
| + if inspect.isfunction(function):
|
| + arguments = inspect.getargspec(function)[0]
|
| + if arguments[:1] != ['source']:
|
| + continue
|
| +
|
| + code = extract_code_from_function(function)
|
| + if code:
|
| + yield (code, function)
|
| +
|
| +
|
| +def apply_global_fixes(source, options, where='global'):
|
| + """Run global fixes on source code.
|
| +
|
| + These are fixes that only need be done once (unlike those in
|
| + FixPEP8, which are dependent on pep8).
|
| +
|
| + """
|
| + if code_match('E101', select=options.select, ignore=options.ignore):
|
| + source = reindent(source,
|
| + indent_size=options.indent_size)
|
| +
|
| + for (code, function) in global_fixes():
|
| + if code_match(code, select=options.select, ignore=options.ignore):
|
| + if options.verbose:
|
| + print('---> Applying {0} fix for {1}'.format(where,
|
| + code.upper()),
|
| + file=sys.stderr)
|
| + source = function(source,
|
| + aggressive=options.aggressive)
|
| +
|
| + source = fix_2to3(source,
|
| + aggressive=options.aggressive,
|
| + select=options.select,
|
| + ignore=options.ignore)
|
| +
|
| + return source
|
| +
|
| +
|
| +def apply_local_fixes(source, options):
|
| + """Ananologus to apply_global_fixes, but runs only those which makes sense
|
| + for the given line_range.
|
| +
|
| + Do as much as we can without breaking code.
|
| +
|
| + """
|
| + def find_ge(a, x):
|
| + """Find leftmost item greater than or equal to x."""
|
| + i = bisect.bisect_left(a, x)
|
| + if i != len(a):
|
| + return i, a[i]
|
| + return len(a) - 1, a[-1]
|
| +
|
| + def find_le(a, x):
|
| + """Find rightmost value less than or equal to x."""
|
| + i = bisect.bisect_right(a, x)
|
| + if i:
|
| + return i - 1, a[i - 1]
|
| + return 0, a[0]
|
| +
|
| + def local_fix(source, start_log, end_log,
|
| + start_lines, end_lines, indents, last_line):
|
| + """apply_global_fixes to the source between start_log and end_log.
|
| +
|
| + The subsource must be the correct syntax of a complete python program
|
| + (but all lines may share an indentation). The subsource's shared indent
|
| + is removed, fixes are applied and the indent prepended back. Taking
|
| + care to not reindent strings.
|
| +
|
| + last_line is the strict cut off (options.line_range[1]), so that
|
| + lines after last_line are not modified.
|
| +
|
| + """
|
| + if end_log < start_log:
|
| + return source
|
| +
|
| + ind = indents[start_log]
|
| + indent = _get_indentation(source[start_lines[start_log]])
|
| +
|
| + sl = slice(start_lines[start_log], end_lines[end_log] + 1)
|
| +
|
| + subsource = source[sl]
|
| + # Remove indent from subsource.
|
| + if ind:
|
| + for line_no in start_lines[start_log:end_log + 1]:
|
| + pos = line_no - start_lines[start_log]
|
| + subsource[pos] = subsource[pos][ind:]
|
| +
|
| + # Fix indentation of subsource.
|
| + fixed_subsource = apply_global_fixes(''.join(subsource),
|
| + options,
|
| + where='local')
|
| + fixed_subsource = fixed_subsource.splitlines(True)
|
| +
|
| + # Add back indent for non multi-line strings lines.
|
| + msl = multiline_string_lines(''.join(fixed_subsource),
|
| + include_docstrings=False)
|
| + for i, line in enumerate(fixed_subsource):
|
| + if not i + 1 in msl:
|
| + fixed_subsource[i] = indent + line if line != '\n' else line
|
| +
|
| + # We make a special case to look at the final line, if it's a multiline
|
| + # *and* the cut off is somewhere inside it, we take the fixed
|
| + # subset up until last_line, this assumes that the number of lines
|
| + # does not change in this multiline line.
|
| + changed_lines = len(fixed_subsource)
|
| + if (start_lines[end_log] != end_lines[end_log]
|
| + and end_lines[end_log] > last_line):
|
| + after_end = end_lines[end_log] - last_line
|
| + fixed_subsource = (fixed_subsource[:-after_end] +
|
| + source[sl][-after_end:])
|
| + changed_lines -= after_end
|
| +
|
| + options.line_range[1] = (options.line_range[0] +
|
| + changed_lines - 1)
|
| +
|
| + return (source[:start_lines[start_log]] +
|
| + fixed_subsource +
|
| + source[end_lines[end_log] + 1:])
|
| +
|
| + def is_continued_stmt(line,
|
| + continued_stmts=frozenset(['else', 'elif',
|
| + 'finally', 'except'])):
|
| + return re.split('[ :]', line.strip(), 1)[0] in continued_stmts
|
| +
|
| + assert options.line_range
|
| + start, end = options.line_range
|
| + start -= 1
|
| + end -= 1
|
| + last_line = end # We shouldn't modify lines after this cut-off.
|
| +
|
| + try:
|
| + logical = _find_logical(source)
|
| + except (SyntaxError, tokenize.TokenError):
|
| + return ''.join(source)
|
| +
|
| + if not logical[0]:
|
| + # Just blank lines, this should imply that it will become '\n' ?
|
| + return apply_global_fixes(source, options)
|
| +
|
| + start_lines, indents = zip(*logical[0])
|
| + end_lines, _ = zip(*logical[1])
|
| +
|
| + source = source.splitlines(True)
|
| +
|
| + start_log, start = find_ge(start_lines, start)
|
| + end_log, end = find_le(start_lines, end)
|
| +
|
| + # Look behind one line, if it's indented less than current indent
|
| + # then we can move to this previous line knowing that its
|
| + # indentation level will not be changed.
|
| + if (start_log > 0
|
| + and indents[start_log - 1] < indents[start_log]
|
| + and not is_continued_stmt(source[start_log - 1])):
|
| + start_log -= 1
|
| + start = start_lines[start_log]
|
| +
|
| + while start < end:
|
| +
|
| + if is_continued_stmt(source[start]):
|
| + start_log += 1
|
| + start = start_lines[start_log]
|
| + continue
|
| +
|
| + ind = indents[start_log]
|
| + for t in itertools.takewhile(lambda t: t[1][1] >= ind,
|
| + enumerate(logical[0][start_log:])):
|
| + n_log, n = start_log + t[0], t[1][0]
|
| + # start shares indent up to n.
|
| +
|
| + if n <= end:
|
| + source = local_fix(source, start_log, n_log,
|
| + start_lines, end_lines,
|
| + indents, last_line)
|
| + start_log = n_log if n == end else n_log + 1
|
| + start = start_lines[start_log]
|
| + continue
|
| +
|
| + else:
|
| + # Look at the line after end and see if allows us to reindent.
|
| + after_end_log, after_end = find_ge(start_lines, end + 1)
|
| +
|
| + if indents[after_end_log] > indents[start_log]:
|
| + start_log, start = find_ge(start_lines, start + 1)
|
| + continue
|
| +
|
| + if (indents[after_end_log] == indents[start_log]
|
| + and is_continued_stmt(source[after_end])):
|
| + # find n, the beginning of the last continued statement
|
| + # Apply fix to previous block if there is one.
|
| + only_block = True
|
| + for n, n_ind in logical[0][start_log:end_log + 1][::-1]:
|
| + if n_ind == ind and not is_continued_stmt(source[n]):
|
| + n_log = start_lines.index(n)
|
| + source = local_fix(source, start_log, n_log - 1,
|
| + start_lines, end_lines,
|
| + indents, last_line)
|
| + start_log = n_log + 1
|
| + start = start_lines[start_log]
|
| + only_block = False
|
| + break
|
| + if only_block:
|
| + end_log, end = find_le(start_lines, end - 1)
|
| + continue
|
| +
|
| + source = local_fix(source, start_log, end_log,
|
| + start_lines, end_lines,
|
| + indents, last_line)
|
| + break
|
| +
|
| + return ''.join(source)
|
| +
|
| +
|
| +def extract_code_from_function(function):
|
| + """Return code handled by function."""
|
| + if not function.__name__.startswith('fix_'):
|
| + return None
|
| +
|
| + code = re.sub('^fix_', '', function.__name__)
|
| + if not code:
|
| + return None
|
| +
|
| + try:
|
| + int(code[1:])
|
| + except ValueError:
|
| + return None
|
| +
|
| + return code
|
| +
|
| +
|
| +def create_parser():
|
| + """Return command-line parser."""
|
| + # Do import locally to be friendly to those who use autopep8 as a library
|
| + # and are supporting Python 2.6.
|
| + import argparse
|
| +
|
| + parser = argparse.ArgumentParser(description=docstring_summary(__doc__),
|
| + prog='autopep8')
|
| + parser.add_argument('--version', action='version',
|
| + version='%(prog)s ' + __version__)
|
| + parser.add_argument('-v', '--verbose', action='count', dest='verbose',
|
| + default=0,
|
| + help='print verbose messages; '
|
| + 'multiple -v result in more verbose messages')
|
| + parser.add_argument('-d', '--diff', action='store_true', dest='diff',
|
| + help='print the diff for the fixed source')
|
| + parser.add_argument('-i', '--in-place', action='store_true',
|
| + help='make changes to files in place')
|
| + parser.add_argument('-r', '--recursive', action='store_true',
|
| + help='run recursively over directories; '
|
| + 'must be used with --in-place or --diff')
|
| + parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1,
|
| + help='number of parallel jobs; '
|
| + 'match CPU count if value is less than 1')
|
| + parser.add_argument('-p', '--pep8-passes', metavar='n',
|
| + default=-1, type=int,
|
| + help='maximum number of additional pep8 passes '
|
| + '(default: infinite)')
|
| + parser.add_argument('-a', '--aggressive', action='count', default=0,
|
| + help='enable non-whitespace changes; '
|
| + 'multiple -a result in more aggressive changes')
|
| + parser.add_argument('--experimental', action='store_true',
|
| + help='enable experimental fixes')
|
| + parser.add_argument('--exclude', metavar='globs',
|
| + help='exclude file/directory names that match these '
|
| + 'comma-separated globs')
|
| + parser.add_argument('--list-fixes', action='store_true',
|
| + help='list codes for fixes; '
|
| + 'used by --ignore and --select')
|
| + parser.add_argument('--ignore', metavar='errors', default='',
|
| + help='do not fix these errors/warnings '
|
| + '(default: {0})'.format(DEFAULT_IGNORE))
|
| + parser.add_argument('--select', metavar='errors', default='',
|
| + help='fix only these errors/warnings (e.g. E4,W)')
|
| + parser.add_argument('--max-line-length', metavar='n', default=79, type=int,
|
| + help='set maximum allowed line length '
|
| + '(default: %(default)s)')
|
| + parser.add_argument('--range', metavar='line', dest='line_range',
|
| + default=None, type=int, nargs=2,
|
| + help='only fix errors found within this inclusive '
|
| + 'range of line numbers (e.g. 1 99); '
|
| + 'line numbers are indexed at 1')
|
| + parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE,
|
| + type=int, metavar='n',
|
| + help='number of spaces per indent level '
|
| + '(default %(default)s)')
|
| + parser.add_argument('files', nargs='*',
|
| + help="files to format or '-' for standard in")
|
| +
|
| + return parser
|
| +
|
| +
|
| +def parse_args(arguments):
|
| + """Parse command-line options."""
|
| + parser = create_parser()
|
| + args = parser.parse_args(arguments)
|
| +
|
| + if not args.files and not args.list_fixes:
|
| + parser.error('incorrect number of arguments')
|
| +
|
| + args.files = [decode_filename(name) for name in args.files]
|
| +
|
| + if '-' in args.files:
|
| + if len(args.files) > 1:
|
| + parser.error('cannot mix stdin and regular files')
|
| +
|
| + if args.diff:
|
| + parser.error('--diff cannot be used with standard input')
|
| +
|
| + if args.in_place:
|
| + parser.error('--in-place cannot be used with standard input')
|
| +
|
| + if args.recursive:
|
| + parser.error('--recursive cannot be used with standard input')
|
| +
|
| + if len(args.files) > 1 and not (args.in_place or args.diff):
|
| + parser.error('autopep8 only takes one filename as argument '
|
| + 'unless the "--in-place" or "--diff" args are '
|
| + 'used')
|
| +
|
| + if args.recursive and not (args.in_place or args.diff):
|
| + parser.error('--recursive must be used with --in-place or --diff')
|
| +
|
| + if args.exclude and not args.recursive:
|
| + parser.error('--exclude is only relevant when used with --recursive')
|
| +
|
| + if args.in_place and args.diff:
|
| + parser.error('--in-place and --diff are mutually exclusive')
|
| +
|
| + if args.max_line_length <= 0:
|
| + parser.error('--max-line-length must be greater than 0')
|
| +
|
| + if args.select:
|
| + args.select = args.select.split(',')
|
| +
|
| + if args.ignore:
|
| + args.ignore = args.ignore.split(',')
|
| + elif not args.select:
|
| + if args.aggressive:
|
| + # Enable everything by default if aggressive.
|
| + args.select = ['E', 'W']
|
| + else:
|
| + args.ignore = DEFAULT_IGNORE.split(',')
|
| +
|
| + if args.exclude:
|
| + args.exclude = args.exclude.split(',')
|
| + else:
|
| + args.exclude = []
|
| +
|
| + if args.jobs < 1:
|
| + # Do not import multiprocessing globally in case it is not supported
|
| + # on the platform.
|
| + import multiprocessing
|
| + args.jobs = multiprocessing.cpu_count()
|
| +
|
| + if args.jobs > 1 and not args.in_place:
|
| + parser.error('parallel jobs requires --in-place')
|
| +
|
| + if args.line_range:
|
| + if args.line_range[0] <= 0:
|
| + parser.error('--range must be positive numbers')
|
| + if args.line_range[0] > args.line_range[1]:
|
| + parser.error('First value of --range should be less than or equal '
|
| + 'to the second')
|
| +
|
| + return args
|
| +
|
| +
|
| +def decode_filename(filename):
|
| + """Return Unicode filename."""
|
| + if isinstance(filename, unicode):
|
| + return filename
|
| + else:
|
| + return filename.decode(sys.getfilesystemencoding())
|
| +
|
| +
|
| +def supported_fixes():
|
| + """Yield pep8 error codes that autopep8 fixes.
|
| +
|
| + Each item we yield is a tuple of the code followed by its
|
| + description.
|
| +
|
| + """
|
| + yield ('E101', docstring_summary(reindent.__doc__))
|
| +
|
| + instance = FixPEP8(filename=None, options=None, contents='')
|
| + for attribute in dir(instance):
|
| + code = re.match('fix_([ew][0-9][0-9][0-9])', attribute)
|
| + if code:
|
| + yield (
|
| + code.group(1).upper(),
|
| + re.sub(r'\s+', ' ',
|
| + docstring_summary(getattr(instance, attribute).__doc__))
|
| + )
|
| +
|
| + for (code, function) in sorted(global_fixes()):
|
| + yield (code.upper() + (4 - len(code)) * ' ',
|
| + re.sub(r'\s+', ' ', docstring_summary(function.__doc__)))
|
| +
|
| + for code in sorted(CODE_TO_2TO3):
|
| + yield (code.upper() + (4 - len(code)) * ' ',
|
| + re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__)))
|
| +
|
| +
|
| +def docstring_summary(docstring):
|
| + """Return summary of docstring."""
|
| + return docstring.split('\n')[0]
|
| +
|
| +
|
| +def line_shortening_rank(candidate, indent_word, max_line_length,
|
| + experimental=False):
|
| + """Return rank of candidate.
|
| +
|
| + This is for sorting candidates.
|
| +
|
| + """
|
| + if not candidate.strip():
|
| + return 0
|
| +
|
| + rank = 0
|
| + lines = candidate.split('\n')
|
| +
|
| + offset = 0
|
| + if (
|
| + not lines[0].lstrip().startswith('#') and
|
| + lines[0].rstrip()[-1] not in '([{'
|
| + ):
|
| + for (opening, closing) in ('()', '[]', '{}'):
|
| + # Don't penalize empty containers that aren't split up. Things like
|
| + # this "foo(\n )" aren't particularly good.
|
| + opening_loc = lines[0].find(opening)
|
| + closing_loc = lines[0].find(closing)
|
| + if opening_loc >= 0:
|
| + if closing_loc < 0 or closing_loc != opening_loc + 1:
|
| + offset = max(offset, 1 + opening_loc)
|
| +
|
| + current_longest = max(offset + len(x.strip()) for x in lines)
|
| +
|
| + rank += 4 * max(0, current_longest - max_line_length)
|
| +
|
| + rank += len(lines)
|
| +
|
| + # Too much variation in line length is ugly.
|
| + rank += 2 * standard_deviation(len(line) for line in lines)
|
| +
|
| + bad_staring_symbol = {
|
| + '(': ')',
|
| + '[': ']',
|
| + '{': '}'}.get(lines[0][-1])
|
| +
|
| + if len(lines) > 1:
|
| + if (
|
| + bad_staring_symbol and
|
| + lines[1].lstrip().startswith(bad_staring_symbol)
|
| + ):
|
| + rank += 20
|
| +
|
| + for lineno, current_line in enumerate(lines):
|
| + current_line = current_line.strip()
|
| +
|
| + if current_line.startswith('#'):
|
| + continue
|
| +
|
| + for bad_start in ['.', '%', '+', '-', '/']:
|
| + if current_line.startswith(bad_start):
|
| + rank += 100
|
| +
|
| + # Do not tolerate operators on their own line.
|
| + if current_line == bad_start:
|
| + rank += 1000
|
| +
|
| + if current_line.endswith(('(', '[', '{', '.')):
|
| + # Avoid lonely opening. They result in longer lines.
|
| + if len(current_line) <= len(indent_word):
|
| + rank += 100
|
| +
|
| + # Avoid the ugliness of ", (\n".
|
| + if (
|
| + current_line.endswith('(') and
|
| + current_line[:-1].rstrip().endswith(',')
|
| + ):
|
| + rank += 100
|
| +
|
| + # Also avoid the ugliness of "foo.\nbar"
|
| + if current_line.endswith('.'):
|
| + rank += 100
|
| +
|
| + if has_arithmetic_operator(current_line):
|
| + rank += 100
|
| +
|
| + if current_line.endswith(('%', '(', '[', '{')):
|
| + rank -= 20
|
| +
|
| + # Try to break list comprehensions at the "for".
|
| + if current_line.startswith('for '):
|
| + rank -= 50
|
| +
|
| + if current_line.endswith('\\'):
|
| + # If a line ends in \-newline, it may be part of a
|
| + # multiline string. In that case, we would like to know
|
| + # how long that line is without the \-newline. If it's
|
| + # longer than the maximum, or has comments, then we assume
|
| + # that the \-newline is an okay candidate and only
|
| + # penalize it a bit.
|
| + total_len = len(current_line)
|
| + lineno += 1
|
| + while lineno < len(lines):
|
| + total_len += len(lines[lineno])
|
| +
|
| + if lines[lineno].lstrip().startswith('#'):
|
| + total_len = max_line_length
|
| + break
|
| +
|
| + if not lines[lineno].endswith('\\'):
|
| + break
|
| +
|
| + lineno += 1
|
| +
|
| + if total_len < max_line_length:
|
| + rank += 10
|
| + else:
|
| + rank += 100 if experimental else 1
|
| +
|
| + # Prefer breaking at commas rather than colon.
|
| + if ',' in current_line and current_line.endswith(':'):
|
| + rank += 10
|
| +
|
| + rank += 10 * count_unbalanced_brackets(current_line)
|
| +
|
| + return max(0, rank)
|
| +
|
| +
|
| +def standard_deviation(numbers):
|
| + """Return standard devation."""
|
| + numbers = list(numbers)
|
| + if not numbers:
|
| + return 0
|
| + mean = sum(numbers) / len(numbers)
|
| + return (sum((n - mean) ** 2 for n in numbers) /
|
| + len(numbers)) ** .5
|
| +
|
| +
|
| +def has_arithmetic_operator(line):
|
| + """Return True if line contains any arithmetic operators."""
|
| + for operator in pep8.ARITHMETIC_OP:
|
| + if operator in line:
|
| + return True
|
| +
|
| + return False
|
| +
|
| +
|
| +def count_unbalanced_brackets(line):
|
| + """Return number of unmatched open/close brackets."""
|
| + count = 0
|
| + for opening, closing in ['()', '[]', '{}']:
|
| + count += abs(line.count(opening) - line.count(closing))
|
| +
|
| + return count
|
| +
|
| +
|
| +def split_at_offsets(line, offsets):
|
| + """Split line at offsets.
|
| +
|
| + Return list of strings.
|
| +
|
| + """
|
| + result = []
|
| +
|
| + previous_offset = 0
|
| + current_offset = 0
|
| + for current_offset in sorted(offsets):
|
| + if current_offset < len(line) and previous_offset != current_offset:
|
| + result.append(line[previous_offset:current_offset].strip())
|
| + previous_offset = current_offset
|
| +
|
| + result.append(line[current_offset:])
|
| +
|
| + return result
|
| +
|
| +
|
| +class LineEndingWrapper(object):
|
| +
|
| + r"""Replace line endings to work with sys.stdout.
|
| +
|
| + It seems that sys.stdout expects only '\n' as the line ending, no matter
|
| + the platform. Otherwise, we get repeated line endings.
|
| +
|
| + """
|
| +
|
| + def __init__(self, output):
|
| + self.__output = output
|
| +
|
| + def write(self, s):
|
| + self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n'))
|
| +
|
| + def flush(self):
|
| + self.__output.flush()
|
| +
|
| +
|
| +def match_file(filename, exclude):
|
| + """Return True if file is okay for modifying/recursing."""
|
| + base_name = os.path.basename(filename)
|
| +
|
| + if base_name.startswith('.'):
|
| + return False
|
| +
|
| + for pattern in exclude:
|
| + if fnmatch.fnmatch(base_name, pattern):
|
| + return False
|
| +
|
| + if not os.path.isdir(filename) and not is_python_file(filename):
|
| + return False
|
| +
|
| + return True
|
| +
|
| +
|
| +def find_files(filenames, recursive, exclude):
|
| + """Yield filenames."""
|
| + while filenames:
|
| + name = filenames.pop(0)
|
| + if recursive and os.path.isdir(name):
|
| + for root, directories, children in os.walk(name):
|
| + filenames += [os.path.join(root, f) for f in children
|
| + if match_file(os.path.join(root, f),
|
| + exclude)]
|
| + directories[:] = [d for d in directories
|
| + if match_file(os.path.join(root, d),
|
| + exclude)]
|
| + else:
|
| + yield name
|
| +
|
| +
|
| +def _fix_file(parameters):
|
| + """Helper function for optionally running fix_file() in parallel."""
|
| + if parameters[1].verbose:
|
| + print('[file:{0}]'.format(parameters[0]), file=sys.stderr)
|
| + try:
|
| + fix_file(*parameters)
|
| + except IOError as error:
|
| + print(unicode(error), file=sys.stderr)
|
| +
|
| +
|
| +def fix_multiple_files(filenames, options, output=None):
|
| + """Fix list of files.
|
| +
|
| + Optionally fix files recursively.
|
| +
|
| + """
|
| + filenames = find_files(filenames, options.recursive, options.exclude)
|
| + if options.jobs > 1:
|
| + import multiprocessing
|
| + pool = multiprocessing.Pool(options.jobs)
|
| + pool.map(_fix_file,
|
| + [(name, options) for name in filenames])
|
| + else:
|
| + for name in filenames:
|
| + _fix_file((name, options, output))
|
| +
|
| +
|
| +def is_python_file(filename):
|
| + """Return True if filename is Python file."""
|
| + if filename.endswith('.py'):
|
| + return True
|
| +
|
| + try:
|
| + with open_with_encoding(filename) as f:
|
| + first_line = f.readlines(1)[0]
|
| + except (IOError, IndexError):
|
| + return False
|
| +
|
| + if not PYTHON_SHEBANG_REGEX.match(first_line):
|
| + return False
|
| +
|
| + return True
|
| +
|
| +
|
| +def is_probably_part_of_multiline(line):
|
| + """Return True if line is likely part of a multiline string.
|
| +
|
| + When multiline strings are involved, pep8 reports the error as being
|
| + at the start of the multiline string, which doesn't work for us.
|
| +
|
| + """
|
| + return (
|
| + '"""' in line or
|
| + "'''" in line or
|
| + line.rstrip().endswith('\\')
|
| + )
|
| +
|
| +
|
| +def main():
|
| + """Tool main."""
|
| + try:
|
| + # Exit on broken pipe.
|
| + signal.signal(signal.SIGPIPE, signal.SIG_DFL)
|
| + except AttributeError: # pragma: no cover
|
| + # SIGPIPE is not available on Windows.
|
| + pass
|
| +
|
| + try:
|
| + args = parse_args(sys.argv[1:])
|
| +
|
| + if args.list_fixes:
|
| + for code, description in sorted(supported_fixes()):
|
| + print('{code} - {description}'.format(
|
| + code=code, description=description))
|
| + return 0
|
| +
|
| + if args.files == ['-']:
|
| + assert not args.in_place
|
| +
|
| + # LineEndingWrapper is unnecessary here due to the symmetry between
|
| + # standard in and standard out.
|
| + sys.stdout.write(fix_code(sys.stdin.read(), args))
|
| + else:
|
| + if args.in_place or args.diff:
|
| + args.files = list(set(args.files))
|
| + else:
|
| + assert len(args.files) == 1
|
| + assert not args.recursive
|
| +
|
| + fix_multiple_files(args.files, args, sys.stdout)
|
| + except KeyboardInterrupt:
|
| + return 1 # pragma: no cover
|
| +
|
| +
|
| +class CachedTokenizer(object):
|
| +
|
| + """A one-element cache around tokenize.generate_tokens().
|
| +
|
| + Original code written by Ned Batchelder, in coverage.py.
|
| +
|
| + """
|
| +
|
| + def __init__(self):
|
| + self.last_text = None
|
| + self.last_tokens = None
|
| +
|
| + def generate_tokens(self, text):
|
| + """A stand-in for tokenize.generate_tokens()."""
|
| + if text != self.last_text:
|
| + string_io = io.StringIO(text)
|
| + self.last_tokens = list(
|
| + tokenize.generate_tokens(string_io.readline)
|
| + )
|
| + self.last_text = text
|
| + return self.last_tokens
|
| +
|
| +_cached_tokenizer = CachedTokenizer()
|
| +generate_tokens = _cached_tokenizer.generate_tokens
|
| +
|
| +
|
| +if __name__ == '__main__':
|
| + sys.exit(main())
|
|
|