Tools/Scripts/webkitpy/thirdparty/pep8.py - Issue 546613003: Add a new 'format-webkitpy' command that will reformat code to the style guide.

Unified Diff: Tools/Scripts/webkitpy/thirdparty/pep8.py

Issue 546613003: Add a new 'format-webkitpy' command that will reformat code to the style guide. (Closed) Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: ready for review Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: Tools/Scripts/webkitpy/thirdparty/pep8.py

diff --git a/Tools/Scripts/webkitpy/thirdparty/pep8.py b/Tools/Scripts/webkitpy/thirdparty/pep8.py

index c31937039ad137c6d2166cedacc016babc522541..f605f189fab31af0edff9b3b076104bc3e61d91c 100755

--- a/Tools/Scripts/webkitpy/thirdparty/pep8.py

+++ b/Tools/Scripts/webkitpy/thirdparty/pep8.py

@@ -1,6 +1,7 @@

-#!/usr/bin/python

+#!/usr/bin/env python

# pep8.py - Check Python source code formatting, according to PEP 8

# Permission is hereby granted, free of charge, to any person

# obtaining a copy of this software and associated documentation files

@@ -22,9 +23,8 @@

# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

# SOFTWARE.

-"""

-Check Python source code formatting, according to PEP 8:

-http://www.python.org/dev/peps/pep-0008/

+r"""

+Check Python source code formatting, according to PEP 8.

For usage and a list of options, try this:

$ python pep8.py -h

@@ -42,87 +42,77 @@ W warnings

500 line length

600 deprecation

700 statements

-You can add checks to this program by writing plugins. Each plugin is

-a simple function that is called for each line of source code, either

-physical or logical.

-Physical line:

-- Raw line of text from the input file.

-Logical line:

-- Multi-line statements converted to a single line.

-- Stripped left and right.

-- Contents of strings replaced with 'xxx' of same length.

-- Comments removed.

-The check function requests physical or logical lines by the name of

-the first argument:

-def maximum_line_length(physical_line)

-def extraneous_whitespace(logical_line)

-def blank_lines(logical_line, blank_lines, indent_level, line_number)

-The last example above demonstrates how check plugins can request

-additional information with extra arguments. All attributes of the

-Checker object are available. Some examples:

-lines: a list of the raw lines from the input file

-tokens: the tokens that contribute to this logical line

-line_number: line number in the input file

-blank_lines: blank lines before this one

-indent_char: first indentation character in this file (' ' or '\t')

-indent_level: indentation (with tabs expanded to multiples of 8)

-previous_indent_level: indentation on previous line

-previous_logical: previous logical line

-The docstring of each check function shall be the relevant part of

-text from PEP 8. It is printed if the user enables --show-pep8.

-Several docstrings contain examples directly from the PEP 8 document.

-Okay: spam(ham[1], {eggs: 2})

-E201: spam( ham[1], {eggs: 2})

-These examples are verified automatically when pep8.py is run with the

---doctest option. You can add examples for your own check functions.

-The format is simple: "Okay" or error/warning code followed by colon

-and space, the rest of the line is example source code. If you put 'r'

-before the docstring, you can use \n for newline, \t for tab and \s

-for space.

+900 syntax error

"""

+from __future__ import with_statement

-__version__ = '0.5.0'

+__version__ = '1.5.7'

import os

import sys

import re

import time

import inspect

+import keyword

import tokenize

from optparse import OptionParser

-from keyword import iskeyword

from fnmatch import fnmatch

-DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git'

-DEFAULT_IGNORE = ['E24']

+try:

+ from configparser import RawConfigParser

+ from io import TextIOWrapper

+except ImportError:

+ from ConfigParser import RawConfigParser

+DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__'

+DEFAULT_IGNORE = 'E123,E226,E24'

+if sys.platform == 'win32':

+ DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')

+else:

+ DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or

+ os.path.expanduser('~/.config'), 'pep8')

+PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8')

+TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')

+MAX_LINE_LENGTH = 79

+REPORT_FORMAT = {

+ 'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',

+ 'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',

+PyCF_ONLY_AST = 1024

+SINGLETONS = frozenset(['False', 'None', 'True'])

+KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS

+UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])

+ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-'])

+WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])

+WS_NEEDED_OPERATORS = frozenset([

+ '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',

+ '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])

+WHITESPACE = frozenset(' \t')

+NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])

+SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])

+# ERRORTOKEN is triggered by backticks in Python 3

+SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])

+BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']

INDENT_REGEX = re.compile(r'([ \t]*)')

-RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)')

-SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)')

-ERRORCODE_REGEX = re.compile(r'[EW]\d{3}')

-E301NOT_REGEX = re.compile(r'class |def |u?r?["\']')

-WHITESPACE = ' \t'

-BINARY_OPERATORS = ['**=', '*=', '+=', '-=', '!=', '<>',

- '%=', '^=', '&=', '|=', '==', '/=', '//=', '>=', '<=', '>>=', '<<=',

- '%', '^', '&', '|', '=', '/', '//', '>', '<', '>>', '<<']

-UNARY_OPERATORS = ['**', '*', '+', '-']

-OPERATORS = BINARY_OPERATORS + UNARY_OPERATORS

-options = None

-args = None

+RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')

+RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$')

+ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')

+DOCSTRING_REGEX = re.compile(r'u?r?["\']')

+EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')

+WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)')

+COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)')

+COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^[({ ]+\s+(in|is)\s')

+COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'

+ r'|\s*$\s*([^)]*[^ )])\s*$)')

+KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))

+OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')

+LAMBDA_REGEX = re.compile(r'\blambda\b')

+HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')

+# Work around Python < 2.6 behaviour, which does not generate NL after

+# a comment which is on a line by itself.

+COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'

##############################################################################

@@ -131,8 +121,7 @@ args = None

def tabs_or_spaces(physical_line, indent_char):

- r"""

- Never mix tabs and spaces.

+ r"""Never mix tabs and spaces.

The most popular way of indenting Python is with spaces only. The

second-most popular way is with tabs only. Code indented with a mixture

@@ -151,55 +140,55 @@ def tabs_or_spaces(physical_line, indent_char):

def tabs_obsolete(physical_line):

- r"""

- For new projects, spaces-only are strongly recommended over tabs. Most

- editors have features that make this easy to do.

+ r"""For new projects, spaces-only are strongly recommended over tabs.

Okay: if True:\n return

W191: if True:\n\treturn

"""

indent = INDENT_REGEX.match(physical_line).group(1)

- if indent.count('\t'):

+ if '\t' in indent:

return indent.index('\t'), "W191 indentation contains tabs"

def trailing_whitespace(physical_line):

- """

- JCR: Trailing whitespace is superfluous.

+ r"""Trailing whitespace is superfluous.

- Okay: spam(1)

- W291: spam(1)\s

+ The warning returned varies on whether the line itself is blank, for easier

+ filtering for those who want to indent their blank lines.

+ Okay: spam(1)\n#

+ W291: spam(1) \n#

+ W293: class Foo(object):\n \n bang = 12

"""

physical_line = physical_line.rstrip('\n') # chr(10), newline

physical_line = physical_line.rstrip('\r') # chr(13), carriage return

physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L

- stripped = physical_line.rstrip()

+ stripped = physical_line.rstrip(' \t\v')

if physical_line != stripped:

- return len(stripped), "W291 trailing whitespace"

+ if stripped:

+ return len(stripped), "W291 trailing whitespace"

+ else:

+ return 0, "W293 blank line contains whitespace"

-def trailing_blank_lines(physical_line, lines, line_number):

- r"""

- JCR: Trailing blank lines are superfluous.

+def trailing_blank_lines(physical_line, lines, line_number, total_lines):

+ r"""Trailing blank lines are superfluous.

Okay: spam(1)

W391: spam(1)\n

- """

- if physical_line.strip() == '' and line_number == len(lines):

- return 0, "W391 blank line at end of file"

-def missing_newline(physical_line):

+ However the last line should end with a new line (warning W292).

"""

- JCR: The last line should have a newline.

- """

- if physical_line.rstrip() == physical_line:

- return len(physical_line), "W292 no newline at end of file"

+ if line_number == total_lines:

+ stripped_last_line = physical_line.rstrip()

+ if not stripped_last_line:

+ return 0, "W391 blank line at end of file"

+ if stripped_last_line == physical_line:

+ return len(physical_line), "W292 no newline at end of file"

-def maximum_line_length(physical_line):

- """

- Limit all lines to a maximum of 79 characters.

+def maximum_line_length(physical_line, max_line_length, multiline):

+ r"""Limit all lines to a maximum of 79 characters.

There are still many devices around that are limited to 80 character

lines; plus, limiting windows to 80 characters makes it possible to have

@@ -207,10 +196,28 @@ def maximum_line_length(physical_line):

ugly. Therefore, please limit all lines to a maximum of 79 characters.

For flowing long blocks of text (docstrings or comments), limiting the

length to 72 characters is recommended.

- """

- length = len(physical_line.rstrip())

- if length > 79:

- return 79, "E501 line too long (%d characters)" % length

+ Reports error E501.

+ """

+ line = physical_line.rstrip()

+ length = len(line)

+ if length > max_line_length and not noqa(line):

+ # Special case for long URLs in multi-line docstrings or comments,

+ # but still report the error when the 72 first chars are whitespaces.

+ chunks = line.split()

+ if ((len(chunks) == 1 and multiline) or

+ (len(chunks) == 2 and chunks[0] == '#')) and \

+ len(line) - len(chunks[-1]) < max_line_length - 7:

+ return

+ if hasattr(line, 'decode'): # Python 2

+ # The line could contain multi-byte characters

+ try:

+ length = len(line.decode('utf-8'))

+ except UnicodeError:

+ pass

+ if length > max_line_length:

+ return (max_line_length, "E501 line too long "

+ "(%d > %d characters)" % (length, max_line_length))

##############################################################################

@@ -219,9 +226,8 @@ def maximum_line_length(physical_line):

def blank_lines(logical_line, blank_lines, indent_level, line_number,

- previous_logical, blank_lines_before_comment):

- r"""

- Separate top-level function and class definitions with two blank lines.

+ blank_before, previous_logical, previous_indent_level):

+ r"""Separate top-level function and class definitions with two blank lines.

Method definitions inside a class are separated by a single blank line.

@@ -240,30 +246,27 @@ def blank_lines(logical_line, blank_lines, indent_level, line_number,

E303: def a():\n\n\n\n pass

E304: @decorator\n\ndef a():\n pass

"""

- if line_number == 1:

+ if line_number < 3 and not previous_logical:

return # Don't expect blank lines before the first line

- max_blank_lines = max(blank_lines, blank_lines_before_comment)

if previous_logical.startswith('@'):

- if max_blank_lines:

- return 0, "E304 blank lines found after function decorator"

- elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2):

- return 0, "E303 too many blank lines (%d)" % max_blank_lines

- elif (logical_line.startswith('def ') or

- logical_line.startswith('class ') or

- logical_line.startswith('@')):

+ if blank_lines:

+ yield 0, "E304 blank lines found after function decorator"

+ elif blank_lines > 2 or (indent_level and blank_lines == 2):

+ yield 0, "E303 too many blank lines (%d)" % blank_lines

+ elif logical_line.startswith(('def ', 'class ', '@')):

if indent_level:

- if not (max_blank_lines or E301NOT_REGEX.match(previous_logical)):

- return 0, "E301 expected 1 blank line, found 0"

- elif max_blank_lines != 2:

- return 0, "E302 expected 2 blank lines, found %d" % max_blank_lines

+ if not (blank_before or previous_indent_level < indent_level or

+ DOCSTRING_REGEX.match(previous_logical)):

+ yield 0, "E301 expected 1 blank line, found 0"

+ elif blank_before != 2:

+ yield 0, "E302 expected 2 blank lines, found %d" % blank_before

def extraneous_whitespace(logical_line):

- """

- Avoid extraneous whitespace in the following situations:

+ r"""Avoid extraneous whitespace.

+ Avoid extraneous whitespace in these situations:

- Immediately inside parentheses, brackets or braces.

- Immediately before a comma, semicolon, or colon.

Okay: spam(ham[1], {eggs: 2})

@@ -279,23 +282,43 @@ def extraneous_whitespace(logical_line):

E203: if x == 4 : print x, y; x, y = y, x

"""

line = logical_line

- for char in '([{':

- found = line.find(char + ' ')

- if found > -1:

- return found + 1, "E201 whitespace after '%s'" % char

- for char in '}])':

- found = line.find(' ' + char)

- if found > -1 and line[found - 1] != ',':

- return found, "E202 whitespace before '%s'" % char

- for char in ',;:':

- found = line.find(' ' + char)

- if found > -1:

- return found, "E203 whitespace before '%s'" % char

+ for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):

+ text = match.group()

+ char = text.strip()

+ found = match.start()

+ if text == char + ' ':

+ # assert char in '([{'

+ yield found + 1, "E201 whitespace after '%s'" % char

+ elif line[found - 1] != ',':

+ code = ('E202' if char in '}])' else 'E203') # if char in ',;:'

+ yield found, "%s whitespace before '%s'" % (code, char)

-def missing_whitespace(logical_line):

+def whitespace_around_keywords(logical_line):

+ r"""Avoid extraneous whitespace around keywords.

+ Okay: True and False

+ E271: True and False

+ E272: True and False

+ E273: True and\tFalse

+ E274: True\tand False

"""

- JCR: Each comma, semicolon or colon should be followed by whitespace.

+ for match in KEYWORD_REGEX.finditer(logical_line):

+ before, after = match.groups()

+ if '\t' in before:

+ yield match.start(1), "E274 tab before keyword"

+ elif len(before) > 1:

+ yield match.start(1), "E272 multiple spaces before keyword"

+ if '\t' in after:

+ yield match.start(2), "E273 tab after keyword"

+ elif len(after) > 1:

+ yield match.start(2), "E271 multiple spaces after keyword"

+def missing_whitespace(logical_line):

+ r"""Each comma, semicolon or colon should be followed by whitespace.

Okay: [a, b]

Okay: (3,)

@@ -305,23 +328,24 @@ def missing_whitespace(logical_line):

Okay: a[1:4:2]

E231: ['a','b']

E231: foo(bar,baz)

+ E231: [{'a':'b'}]

"""

line = logical_line

for index in range(len(line) - 1):

char = line[index]

if char in ',;:' and line[index + 1] not in WHITESPACE:

before = line[:index]

- if char == ':' and before.count('[') > before.count(']'):

+ if char == ':' and before.count('[') > before.count(']') and \

+ before.rfind('{') < before.rfind('['):

continue # Slice syntax, no space required

if char == ',' and line[index + 1] == ')':

continue # Allow tuple with only one element: (3,)

- return index, "E231 missing whitespace after '%s'" % char

+ yield index, "E231 missing whitespace after '%s'" % char

def indentation(logical_line, previous_logical, indent_char,

indent_level, previous_indent_level):

- r"""

- Use 4 spaces per indentation level.

+ r"""Use 4 spaces per indentation level.

For really old code that you don't want to mess up, you can continue to

use 8-space tabs.

@@ -337,23 +361,218 @@ def indentation(logical_line, previous_logical, indent_char,

E113: a = 1\n b = 2

"""

if indent_char == ' ' and indent_level % 4:

- return 0, "E111 indentation is not a multiple of four"

+ yield 0, "E111 indentation is not a multiple of four"

indent_expect = previous_logical.endswith(':')

if indent_expect and indent_level <= previous_indent_level:

- return 0, "E112 expected an indented block"

+ yield 0, "E112 expected an indented block"

if indent_level > previous_indent_level and not indent_expect:

- return 0, "E113 unexpected indentation"

+ yield 0, "E113 unexpected indentation"

+def continued_indentation(logical_line, tokens, indent_level, hang_closing,

+ indent_char, noqa, verbose):

+ r"""Continuation lines indentation.

+ Continuation lines should align wrapped elements either vertically

+ using Python's implicit line joining inside parentheses, brackets

+ and braces, or using a hanging indent.

+ When using a hanging indent these considerations should be applied:

+ - there should be no arguments on the first line, and

+ - further indentation should be used to clearly distinguish itself as a

+ continuation line.

+ Okay: a = (\n)

+ E123: a = (\n )

+ Okay: a = (\n 42)

+ E121: a = (\n 42)

+ E122: a = (\n42)

+ E123: a = (\n 42\n )

+ E124: a = (24,\n 42\n)

+ E125: if (\n b):\n pass

+ E126: a = (\n 42)

+ E127: a = (24,\n 42)

+ E128: a = (24,\n 42)

+ E129: if (a or\n b):\n pass

+ E131: a = (\n 42\n 24)

+ """

+ first_row = tokens[0][2][0]

+ nrows = 1 + tokens[-1][2][0] - first_row

+ if noqa or nrows == 1:

+ return

+ # indent_next tells us whether the next block is indented; assuming

+ # that it is indented by 4 spaces, then we should not allow 4-space

+ # indents on the final continuation line; in turn, some other

+ # indents are allowed to have an extra 4 spaces.

+ indent_next = logical_line.endswith(':')

+ row = depth = 0

+ valid_hangs = (4,) if indent_char != '\t' else (4, 8)

+ # remember how many brackets were opened on each line

+ parens = [0] * nrows

+ # relative indents of physical lines

+ rel_indent = [0] * nrows

+ # for each depth, collect a list of opening rows

+ open_rows = [[0]]

+ # for each depth, memorize the hanging indentation

+ hangs = [None]

+ # visual indents

+ indent_chances = {}

+ last_indent = tokens[0][2]

+ visual_indent = None

+ # for each depth, memorize the visual indent column

+ indent = [last_indent[1]]

+ if verbose >= 3:

+ print(">>> " + tokens[0][4].rstrip())

-def whitespace_before_parameters(logical_line, tokens):

- """

- Avoid extraneous whitespace in the following situations:

+ for token_type, text, start, end, line in tokens:

+ newline = row < start[0] - first_row

+ if newline:

+ row = start[0] - first_row

+ newline = not last_token_multiline and token_type not in NEWLINE

+ if newline:

+ # this is the beginning of a continuation line.

+ last_indent = start

+ if verbose >= 3:

+ print("... " + line.rstrip())

- - Immediately before the open parenthesis that starts the argument

- list of a function call.

+ # record the initial indent.

+ rel_indent[row] = expand_indent(line) - indent_level

- - Immediately before the open parenthesis that starts an indexing or

- slicing.

+ # identify closing bracket

+ close_bracket = (token_type == tokenize.OP and text in ']})')

+ # is the indent relative to an opening bracket line?

+ for open_row in reversed(open_rows[depth]):

+ hang = rel_indent[row] - rel_indent[open_row]

+ hanging_indent = hang in valid_hangs

+ if hanging_indent:

+ break

+ if hangs[depth]:

+ hanging_indent = (hang == hangs[depth])

+ # is there any chance of visual indent?

+ visual_indent = (not close_bracket and hang > 0 and

+ indent_chances.get(start[1]))

+ if close_bracket and indent[depth]:

+ # closing bracket for visual indent

+ if start[1] != indent[depth]:

+ yield (start, "E124 closing bracket does not match "

+ "visual indentation")

+ elif close_bracket and not hang:

+ # closing bracket matches indentation of opening bracket's line

+ if hang_closing:

+ yield start, "E133 closing bracket is missing indentation"

+ elif indent[depth] and start[1] < indent[depth]:

+ if visual_indent is not True:

+ # visual indent is broken

+ yield (start, "E128 continuation line "

+ "under-indented for visual indent")

+ elif hanging_indent or (indent_next and rel_indent[row] == 8):

+ # hanging indent is verified

+ if close_bracket and not hang_closing:

+ yield (start, "E123 closing bracket does not match "

+ "indentation of opening bracket's line")

+ hangs[depth] = hang

+ elif visual_indent is True:

+ # visual indent is verified

+ indent[depth] = start[1]

+ elif visual_indent in (text, str):

+ # ignore token lined up with matching one from a previous line

+ pass

+ else:

+ # indent is broken

+ if hang <= 0:

+ error = "E122", "missing indentation or outdented"

+ elif indent[depth]:

+ error = "E127", "over-indented for visual indent"

+ elif not close_bracket and hangs[depth]:

+ error = "E131", "unaligned for hanging indent"

+ else:

+ hangs[depth] = hang

+ if hang > 4:

+ error = "E126", "over-indented for hanging indent"

+ else:

+ error = "E121", "under-indented for hanging indent"

+ yield start, "%s continuation line %s" % error

+ # look for visual indenting

+ if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)

+ and not indent[depth]):

+ indent[depth] = start[1]

+ indent_chances[start[1]] = True

+ if verbose >= 4:

+ print("bracket depth %s indent to %s" % (depth, start[1]))

+ # deal with implicit string concatenation

+ elif (token_type in (tokenize.STRING, tokenize.COMMENT) or

+ text in ('u', 'ur', 'b', 'br')):

+ indent_chances[start[1]] = str

+ # special case for the "if" statement because len("if (") == 4

+ elif not indent_chances and not row and not depth and text == 'if':

+ indent_chances[end[1] + 1] = True

+ elif text == ':' and line[end[1]:].isspace():

+ open_rows[depth].append(row)

+ # keep track of bracket depth

+ if token_type == tokenize.OP:

+ if text in '([{':

+ depth += 1

+ indent.append(0)

+ hangs.append(None)

+ if len(open_rows) == depth:

+ open_rows.append([])

+ open_rows[depth].append(row)

+ parens[row] += 1

+ if verbose >= 4:

+ print("bracket depth %s seen, col %s, visual min = %s" %

+ (depth, start[1], indent[depth]))

+ elif text in ')]}' and depth > 0:

+ # parent indents should not be more than this one

+ prev_indent = indent.pop() or last_indent[1]

+ hangs.pop()

+ for d in range(depth):

+ if indent[d] > prev_indent:

+ indent[d] = 0

+ for ind in list(indent_chances):

+ if ind >= prev_indent:

+ del indent_chances[ind]

+ del open_rows[depth + 1:]

+ depth -= 1

+ if depth:

+ indent_chances[indent[depth]] = True

+ for idx in range(row, -1, -1):

+ if parens[idx]:

+ parens[idx] -= 1

+ break

+ assert len(indent) == depth + 1

+ if start[1] not in indent_chances:

+ # allow to line up tokens

+ indent_chances[start[1]] = text

+ last_token_multiline = (start[0] != end[0])

+ if last_token_multiline:

+ rel_indent[end[0] - first_row] = rel_indent[row]

+ if indent_next and expand_indent(line) == indent_level + 4:

+ pos = (start[0], indent[0] + 4)

+ if visual_indent:

+ code = "E129 visually indented line"

+ else:

+ code = "E125 continuation line"

+ yield pos, "%s with same indent as next logical line" % code

+def whitespace_before_parameters(logical_line, tokens):

+ r"""Avoid extraneous whitespace.

+ Avoid extraneous whitespace in the following situations:

+ - before the open parenthesis that starts the argument list of a

+ function call.

+ - before the open parenthesis that starts an indexing or slicing.

Okay: spam(1)

E211: spam (1)

@@ -362,29 +581,25 @@ def whitespace_before_parameters(logical_line, tokens):

E211: dict ['key'] = list[index]

E211: dict['key'] = list [index]

"""

- prev_type = tokens[0][0]

- prev_text = tokens[0][1]

- prev_end = tokens[0][3]

+ prev_type, prev_text, __, prev_end, __ = tokens[0]

for index in range(1, len(tokens)):

- token_type, text, start, end, line = tokens[index]

+ token_type, text, start, end, __ = tokens[index]

if (token_type == tokenize.OP and

text in '([' and

start != prev_end and

- prev_type == tokenize.NAME and

+ (prev_type == tokenize.NAME or prev_text in '}])') and

+ # Syntax "class A (B):" is allowed, but avoid it

(index < 2 or tokens[index - 2][1] != 'class') and

- (not iskeyword(prev_text))):

- return prev_end, "E211 whitespace before '%s'" % text

+ # Allow "return (a.foo for a in range(5))"

+ not keyword.iskeyword(prev_text)):

+ yield prev_end, "E211 whitespace before '%s'" % text

prev_type = token_type

prev_text = text

prev_end = end

def whitespace_around_operator(logical_line):

- """

- Avoid extraneous whitespace in the following situations:

- - More than one space around an assignment (or other) operator to

- align it with another.

+ r"""Avoid extraneous whitespace around an operator.

Okay: a = 12 + 3

E221: a = 4 + 5

@@ -392,30 +607,30 @@ def whitespace_around_operator(logical_line):

E223: a = 4\t+ 5

E224: a = 4 +\t5

"""

- line = logical_line

- for operator in OPERATORS:

- found = line.find(' ' + operator)

- if found > -1:

- return found, "E221 multiple spaces before operator"

- found = line.find(operator + ' ')

- if found > -1:

- return found, "E222 multiple spaces after operator"

- found = line.find('\t' + operator)

- if found > -1:

- return found, "E223 tab before operator"

- found = line.find(operator + '\t')

- if found > -1:

- return found, "E224 tab after operator"

+ for match in OPERATOR_REGEX.finditer(logical_line):

+ before, after = match.groups()

+ if '\t' in before:

+ yield match.start(1), "E223 tab before operator"

+ elif len(before) > 1:

+ yield match.start(1), "E221 multiple spaces before operator"

+ if '\t' in after:

+ yield match.start(2), "E224 tab after operator"

+ elif len(after) > 1:

+ yield match.start(2), "E222 multiple spaces after operator"

def missing_whitespace_around_operator(logical_line, tokens):

- r"""

+ r"""Surround operators with a single space on either side.

- Always surround these binary operators with a single space on

either side: assignment (=), augmented assignment (+=, -= etc.),

- comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not),

+ comparisons (==, <, >, !=, <=, >=, in, not in, is, is not),

Booleans (and, or, not).

- - Use spaces around arithmetic operators.

+ - If operators with different priorities are used, consider adding

+ whitespace around the operators with the lowest priorities.

Okay: i = i + 1

Okay: submitted += 1

@@ -423,64 +638,84 @@ def missing_whitespace_around_operator(logical_line, tokens):

Okay: hypot2 = x * x + y * y

Okay: c = (a + b) * (a - b)

Okay: foo(bar, key='word', *args, **kwargs)

- Okay: baz(**kwargs)

- Okay: negative = -1

- Okay: spam(-1)

Okay: alpha[:-i]

- Okay: if not -5 < x < +5:\n pass

- Okay: lambda *args, **kw: (args, kw)

E225: i=i+1

E225: submitted +=1

- E225: x = x*2 - 1

- E225: hypot2 = x*x + y*y

- E225: c = (a+b) * (a-b)

- E225: c = alpha -4

+ E225: x = x /2 - 1

E225: z = x **y

+ E226: c = (a+b) * (a-b)

+ E226: hypot2 = x*x + y*y

+ E227: c = a|b

+ E228: msg = fmt%(errno, errmsg)

"""

parens = 0

need_space = False

prev_type = tokenize.OP

prev_text = prev_end = None

for token_type, text, start, end, line in tokens:

- if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN):

- # ERRORTOKEN is triggered by backticks in Python 3000

+ if token_type in SKIP_COMMENTS:

continue

if text in ('(', 'lambda'):

parens += 1

elif text == ')':

parens -= 1

if need_space:

- if start == prev_end:

- return prev_end, "E225 missing whitespace around operator"

- need_space = False

- elif token_type == tokenize.OP:

+ if start != prev_end:

+ # Found a (probably) needed space

+ if need_space is not True and not need_space[1]:

+ yield (need_space[0],

+ "E225 missing whitespace around operator")

+ need_space = False

+ elif text == '>' and prev_text in ('<', '-'):

+ # Tolerate the "<>" operator, even if running Python 3

+ # Deal with Python 3's annotated return value "->"

+ pass

+ else:

+ if need_space is True or need_space[1]:

+ # A needed trailing space was not found

+ yield prev_end, "E225 missing whitespace around operator"

+ else:

+ code, optype = 'E226', 'arithmetic'

+ if prev_text == '%':

+ code, optype = 'E228', 'modulo'

+ elif prev_text not in ARITHMETIC_OP:

+ code, optype = 'E227', 'bitwise or shift'

+ yield (need_space[0], "%s missing whitespace "

+ "around %s operator" % (code, optype))

+ need_space = False

+ elif token_type == tokenize.OP and prev_end is not None:

if text == '=' and parens:

# Allow keyword args or defaults: foo(bar=None).

pass

- elif text in BINARY_OPERATORS:

+ elif text in WS_NEEDED_OPERATORS:

need_space = True

elif text in UNARY_OPERATORS:

- if ((prev_type != tokenize.OP or prev_text in '}])') and not

- (prev_type == tokenize.NAME and iskeyword(prev_text))):

- # Allow unary operators: -123, -x, +1.

- # Allow argument unpacking: foo(*args, **kwargs).

- need_space = True

- if need_space and start == prev_end:

- return prev_end, "E225 missing whitespace around operator"

+ # Check if the operator is being used as a binary operator

+ # Allow unary operators: -123, -x, +1.

+ # Allow argument unpacking: foo(*args, **kwargs).

+ if (prev_text in '}])' if prev_type == tokenize.OP

+ else prev_text not in KEYWORDS):

+ need_space = None

+ elif text in WS_OPTIONAL_OPERATORS:

+ need_space = None

+ if need_space is None:

+ # Surrounding space is optional, but ensure that

+ # trailing space matches opening space

+ need_space = (prev_end, start != prev_end)

+ elif need_space and start == prev_end:

+ # A needed opening space was not found

+ yield prev_end, "E225 missing whitespace around operator"

+ need_space = False

prev_type = token_type

prev_text = text

prev_end = end

def whitespace_around_comma(logical_line):

- """

- Avoid extraneous whitespace in the following situations:

+ r"""Avoid extraneous whitespace after a comma or a colon.

- - More than one space around an assignment (or other) operator to

- align it with another.

- JCR: This should also be applied around comma etc.

Note: these checks are disabled by default

Okay: a = (1, 2)

@@ -488,17 +723,17 @@ def whitespace_around_comma(logical_line):

E242: a = (1,\t2)

"""

line = logical_line

- for separator in ',;:':

- found = line.find(separator + ' ')

- if found > -1:

- return found + 1, "E241 multiple spaces after '%s'" % separator

- found = line.find(separator + '\t')

- if found > -1:

- return found + 1, "E242 tab after '%s'" % separator

+ for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):

+ found = m.start() + 1

+ if '\t' in m.group():

+ yield found, "E242 tab after '%s'" % m.group()[0]

+ else:

+ yield found, "E241 multiple spaces after '%s'" % m.group()[0]

-def whitespace_around_named_parameter_equals(logical_line):

- """

+def whitespace_around_named_parameter_equals(logical_line, tokens):

+ r"""Don't use spaces around the '=' sign in function arguments.

Don't use spaces around the '=' sign when used to indicate a

keyword argument or a default parameter value.

@@ -513,60 +748,68 @@ def whitespace_around_named_parameter_equals(logical_line):

E251: return magic(r = real, i = imag)

"""

parens = 0

- window = ' '

- equal_ok = ['==', '!=', '<=', '>=']

- for pos, c in enumerate(logical_line):

- window = window[1:] + c

- if parens:

- if window[0] in WHITESPACE and window[1] == '=':

- if window[1:] not in equal_ok:

- issue = "E251 no spaces around keyword / parameter equals"

- return pos, issue

- if window[2] in WHITESPACE and window[1] == '=':

- if window[:2] not in equal_ok:

- issue = "E251 no spaces around keyword / parameter equals"

- return pos, issue

- if c == '(':

- parens += 1

- elif c == ')':

- parens -= 1

+ no_space = False

+ prev_end = None

+ message = "E251 unexpected spaces around keyword / parameter equals"

+ for token_type, text, start, end, line in tokens:

+ if token_type == tokenize.NL:

+ continue

+ if no_space:

+ no_space = False

+ if start != prev_end:

+ yield (prev_end, message)

+ elif token_type == tokenize.OP:

+ if text == '(':

+ parens += 1

+ elif text == ')':

+ parens -= 1

+ elif parens and text == '=':

+ no_space = True

+ if start != prev_end:

+ yield (prev_end, message)

+ prev_end = end

-def whitespace_before_inline_comment(logical_line, tokens):

- """

- Separate inline comments by at least two spaces.

+def whitespace_before_comment(logical_line, tokens):

+ r"""Separate inline comments by at least two spaces.

An inline comment is a comment on the same line as a statement. Inline

comments should be separated by at least two spaces from the statement.

They should start with a # and a single space.

+ Each line of a block comment starts with a # and a single space

+ (unless it is indented text inside the comment).

Okay: x = x + 1 # Increment x

+ Okay: # Block comment

E261: x = x + 1 # Increment x

E262: x = x + 1 #Increment x

E262: x = x + 1 # Increment x

+ E265: #Block comment

"""

prev_end = (0, 0)

for token_type, text, start, end, line in tokens:

- if token_type == tokenize.NL:

- continue

if token_type == tokenize.COMMENT:

- if not line[:start[1]].strip():

- continue

- if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:

- return (prev_end,

- "E261 at least two spaces before inline comment")

- if (len(text) > 1 and text.startswith('# ')

- or not text.startswith('# ')):

- return start, "E262 inline comment should start with '# '"

- else:

+ inline_comment = line[:start[1]].strip()

+ if inline_comment:

+ if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:

+ yield (prev_end,

+ "E261 at least two spaces before inline comment")

+ symbol, sp, comment = text.partition(' ')

+ bad_prefix = symbol not in ('#', '#:')

+ if inline_comment:

+ if bad_prefix or comment[:1].isspace():

+ yield start, "E262 inline comment should start with '# '"

+ elif bad_prefix:

+ if text.rstrip('#') and (start[0] > 1 or symbol[1] != '!'):

+ yield start, "E265 block comment should start with '# '"

+ elif token_type != tokenize.NL:

prev_end = end

def imports_on_separate_lines(logical_line):

- r"""

- Imports should usually be on separate lines.

+ r"""Imports should usually be on separate lines.

Okay: import os\nimport sys

E401: import sys, os

@@ -580,18 +823,16 @@ def imports_on_separate_lines(logical_line):

line = logical_line

if line.startswith('import '):

found = line.find(',')

- if found > -1:

- return found, "E401 multiple imports on one line"

+ if -1 < found and ';' not in line[:found]:

+ yield found, "E401 multiple imports on one line"

def compound_statements(logical_line):

- r"""

- Compound statements (multiple statements on the same line) are

- generally discouraged.

+ r"""Compound statements (on the same line) are generally discouraged.

While sometimes it's okay to put an if/for/while with a small body

- on the same line, never do this for multi-clause statements. Also

- avoid folding such long lines!

+ on the same line, never do this for multi-clause statements.

+ Also avoid folding such long lines!

Okay: if foo == 'blah':\n do_blah_thing()

Okay: do_one()

@@ -608,67 +849,183 @@ def compound_statements(logical_line):

E701: if foo == 'blah': one(); two(); three()

E702: do_one(); do_two(); do_three()

+ E703: do_four(); # useless semicolon

"""

line = logical_line

+ last_char = len(line) - 1

found = line.find(':')

- if -1 < found < len(line) - 1:

+ while -1 < found < last_char:

before = line[:found]

if (before.count('{') <= before.count('}') and # {'a': 1} (dict)

before.count('[') <= before.count(']') and # [1:2] (slice)

- not re.search(r'\blambda\b', before)): # lambda x: x

- return found, "E701 multiple statements on one line (colon)"

+ before.count('(') <= before.count(')') and # (Python 3 annotation)

+ not LAMBDA_REGEX.search(before)): # lambda x: x

+ yield found, "E701 multiple statements on one line (colon)"

+ found = line.find(':', found + 1)

found = line.find(';')

- if -1 < found:

- return found, "E702 multiple statements on one line (semicolon)"

+ while -1 < found:

+ if found < last_char:

+ yield found, "E702 multiple statements on one line (semicolon)"

+ else:

+ yield found, "E703 statement ends with a semicolon"

+ found = line.find(';', found + 1)

+def explicit_line_join(logical_line, tokens):

+ r"""Avoid explicit line join between brackets.

+ The preferred way of wrapping long lines is by using Python's implied line

+ continuation inside parentheses, brackets and braces. Long lines can be

+ broken over multiple lines by wrapping expressions in parentheses. These

+ should be used in preference to using a backslash for line continuation.

+ E502: aaa = [123, \\n 123]

+ E502: aaa = ("bbb " \\n "ccc")

+ Okay: aaa = [123,\n 123]

+ Okay: aaa = ("bbb "\n "ccc")

+ Okay: aaa = "bbb " \\n "ccc"

+ """

+ prev_start = prev_end = parens = 0

+ for token_type, text, start, end, line in tokens:

+ if start[0] != prev_start and parens and backslash:

+ yield backslash, "E502 the backslash is redundant between brackets"

+ if end[0] != prev_end:

+ if line.rstrip('\r\n').endswith('\\'):

+ backslash = (end[0], len(line.splitlines()[-1]) - 1)

+ else:

+ backslash = None

+ prev_start = prev_end = end[0]

+ else:

+ prev_start = start[0]

+ if token_type == tokenize.OP:

+ if text in '([{':

+ parens += 1

+ elif text in ')]}':

+ parens -= 1

+def comparison_to_singleton(logical_line, noqa):

+ r"""Comparison to singletons should use "is" or "is not".

+ Comparisons to singletons like None should always be done

+ with "is" or "is not", never the equality operators.

+ Okay: if arg is not None:

+ E711: if arg != None:

+ E712: if arg == True:

+ Also, beware of writing if x when you really mean if x is not None --

+ e.g. when testing whether a variable or argument that defaults to None was

+ set to some other value. The other value might have a type (such as a

+ container) that could be false in a boolean context!

+ """

+ match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line)

+ if match:

+ same = (match.group(1) == '==')

+ singleton = match.group(2)

+ msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)

+ if singleton in ('None',):

+ code = 'E711'

+ else:

+ code = 'E712'

+ nonzero = ((singleton == 'True' and same) or

+ (singleton == 'False' and not same))

+ msg += " or 'if %scond:'" % ('' if nonzero else 'not ')

+ yield match.start(1), ("%s comparison to %s should be %s" %

+ (code, singleton, msg))

+def comparison_negative(logical_line):

+ r"""Negative comparison should be done using "not in" and "is not".

+ Okay: if x not in y:\n pass

+ Okay: assert (X in Y or X is Z)

+ Okay: if not (X in Y):\n pass

+ Okay: zz = x is not y

+ E713: Z = not X in Y

+ E713: if not X.B in Y:\n pass

+ E714: if not X is Y:\n pass

+ E714: Z = not X.B is Y

+ """

+ match = COMPARE_NEGATIVE_REGEX.search(logical_line)

+ if match:

+ pos = match.start(1)

+ if match.group(2) == 'in':

+ yield pos, "E713 test for membership should be 'not in'"

+ else:

+ yield pos, "E714 test for object identity should be 'is not'"

+def comparison_type(logical_line):

+ r"""Object type comparisons should always use isinstance().

+ Do not compare types directly.

+ Okay: if isinstance(obj, int):

+ E721: if type(obj) is type(1):

-def python_3000_has_key(logical_line):

+ When checking if an object is a string, keep in mind that it might be a

+ unicode string too! In Python 2.3, str and unicode have a common base

+ class, basestring, so you can do:

+ Okay: if isinstance(obj, basestring):

+ Okay: if type(a1) is type(b1):

"""

- The {}.has_key() method will be removed in the future version of

- Python. Use the 'in' operation instead, like:

- d = {"a": 1, "b": 2}

- if "b" in d:

- print d["b"]

+ match = COMPARE_TYPE_REGEX.search(logical_line)

+ if match:

+ inst = match.group(1)

+ if inst and isidentifier(inst) and inst not in SINGLETONS:

+ return # Allow comparison for types which are not obvious

+ yield match.start(), "E721 do not compare types, use 'isinstance()'"

+def python_3000_has_key(logical_line, noqa):

+ r"""The {}.has_key() method is removed in Python 3: use the 'in' operator.

+ Okay: if "alph" in d:\n print d["alph"]

+ W601: assert d.has_key('alph')

"""

pos = logical_line.find('.has_key(')

- if pos > -1:

- return pos, "W601 .has_key() is deprecated, use 'in'"

+ if pos > -1 and not noqa:

+ yield pos, "W601 .has_key() is deprecated, use 'in'"

def python_3000_raise_comma(logical_line):

- """

- When raising an exception, use "raise ValueError('message')"

- instead of the older form "raise ValueError, 'message'".

+ r"""When raising an exception, use "raise ValueError('message')".

+ The older form is removed in Python 3.

- The paren-using form is preferred because when the exception arguments

- are long or include string formatting, you don't need to use line

- continuation characters thanks to the containing parentheses. The older

- form will be removed in Python 3000.

+ Okay: raise DummyError("Message")

+ W602: raise DummyError, "Message"

"""

match = RAISE_COMMA_REGEX.match(logical_line)

- if match:

- return match.start(1), "W602 deprecated form of raising exception"

+ if match and not RERAISE_COMMA_REGEX.match(logical_line):

+ yield match.end() - 1, "W602 deprecated form of raising exception"

def python_3000_not_equal(logical_line):

- """

- != can also be written <>, but this is an obsolete usage kept for

- backwards compatibility only. New code should always use !=.

- The older syntax is removed in Python 3000.

+ r"""New code should always use != instead of <>.

+ The older syntax is removed in Python 3.

+ Okay: if a != 'no':

+ W603: if a <> 'no':

"""

pos = logical_line.find('<>')

if pos > -1:

- return pos, "W603 '<>' is deprecated, use '!='"

+ yield pos, "W603 '<>' is deprecated, use '!='"

def python_3000_backticks(logical_line):

- """

- Backticks are removed in Python 3000.

- Use repr() instead.

+ r"""Backticks are removed in Python 3: use repr() instead.

+ Okay: val = repr(1 + 2)

+ W604: val = `1 + 2`

"""

pos = logical_line.find('`')

if pos > -1:

- return pos, "W604 backticks are deprecated, use 'repr()'"

+ yield pos, "W604 backticks are deprecated, use 'repr()'"

##############################################################################

@@ -676,22 +1033,50 @@ def python_3000_backticks(logical_line):

##############################################################################

+if '' == ''.encode():

+ # Python 2: implicit encoding.

+ def readlines(filename):

+ """Read the source code."""

+ with open(filename, 'rU') as f:

+ return f.readlines()

+ isidentifier = re.compile(r'[a-zA-Z_]\w*').match

+ stdin_get_value = sys.stdin.read

+else:

+ # Python 3

+ def readlines(filename):

+ """Read the source code."""

+ try:

+ with open(filename, 'rb') as f:

+ (coding, lines) = tokenize.detect_encoding(f.readline)

+ f = TextIOWrapper(f, coding, line_buffering=True)

+ return [l.decode(coding) for l in lines] + f.readlines()

+ except (LookupError, SyntaxError, UnicodeError):

+ # Fall back if file encoding is improperly declared

+ with open(filename, encoding='latin-1') as f:

+ return f.readlines()

+ isidentifier = str.isidentifier

+ def stdin_get_value():

+ return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()

+noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search

def expand_indent(line):

- """

- Return the amount of indentation.

+ r"""Return the amount of indentation.

Tabs are expanded to the next multiple of 8.

>>> expand_indent(' ')

- >>> expand_indent('\\t')

+ >>> expand_indent('\t')

- >>> expand_indent(' \\t')

+ >>> expand_indent(' \t')

- >>> expand_indent(' \\t')

- 8

- >>> expand_indent(' \\t')

+ >>> expand_indent(' \t')

"""

+ if '\t' not in line:

+ return len(line) - len(line.lstrip())

result = 0

for char in line:

if char == '\t':

@@ -704,8 +1089,7 @@ def expand_indent(line):

def mute_string(text):

- """

- Replace contents with 'xxx' to prevent syntax matching.

+ """Replace contents with 'xxx' to prevent syntax matching.

>>> mute_string('"abc"')

'"xxx"'

@@ -714,25 +1098,73 @@ def mute_string(text):

>>> mute_string("r'abc'")

"r'xxx'"

"""

- start = 1

- end = len(text) - 1

# String modifiers (e.g. u or r)

- if text.endswith('"'):

- start += text.index('"')

- elif text.endswith("'"):

- start += text.index("'")

+ start = text.index(text[-1]) + 1

+ end = len(text) - 1

# Triple quotes

- if text.endswith('"""') or text.endswith("'''"):

+ if text[-3:] in ('"""', "'''"):

start += 2

end -= 2

return text[:start] + 'x' * (end - start) + text[end:]

-def message(text):

- """Print a message."""

- # print >> sys.stderr, options.prog + ': ' + text

- # print >> sys.stderr, text

- print(text)

+def parse_udiff(diff, patterns=None, parent='.'):

+ """Return a dictionary of matching lines."""

+ # For each file of the diff, the entry key is the filename,

+ # and the value is a set of row numbers to consider.

+ rv = {}

+ path = nrows = None

+ for line in diff.splitlines():

+ if nrows:

+ if line[:1] != '-':

+ nrows -= 1

+ continue

+ if line[:3] == '@@ ':

+ hunk_match = HUNK_REGEX.match(line)

+ (row, nrows) = [int(g or '1') for g in hunk_match.groups()]

+ rv[path].update(range(row, row + nrows))

+ elif line[:3] == '+++':

+ path = line[4:].split('\t', 1)[0]

+ if path[:2] == 'b/':

+ path = path[2:]

+ rv[path] = set()

+ return dict([(os.path.join(parent, path), rows)

+ for (path, rows) in rv.items()

+ if rows and filename_match(path, patterns)])

+def normalize_paths(value, parent=os.curdir):

+ """Parse a comma-separated list of paths.

+ Return a list of absolute paths.

+ """

+ if not value or isinstance(value, list):

+ return value

+ paths = []

+ for path in value.split(','):

+ if '/' in path:

+ path = os.path.abspath(os.path.join(parent, path))

+ paths.append(path.rstrip('/'))

+ return paths

+def filename_match(filename, patterns, default=True):

+ """Check if patterns contains a pattern that matches filename.

+ If patterns is unspecified, this always returns True.

+ """

+ if not patterns:

+ return default

+ return any(fnmatch(filename, pattern) for pattern in patterns)

+if COMMENT_WITH_NL:

+ def _is_eol_token(token):

+ return (token[0] in NEWLINE or

+ (token[0] == tokenize.COMMENT and token[1] == token[4]))

+else:

+ def _is_eol_token(token):

+ return token[0] in NEWLINE

##############################################################################

@@ -740,436 +1172,603 @@ def message(text):

##############################################################################

-def find_checks(argument_name):

- """

- Find all globally visible functions where the first argument name

- starts with argument_name.

+_checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}

+def register_check(check, codes=None):

+ """Register a new check object."""

+ def _add_check(check, kind, codes, args):

+ if check in _checks[kind]:

+ _checks[kind][check][0].extend(codes or [])

+ else:

+ _checks[kind][check] = (codes or [''], args)

+ if inspect.isfunction(check):

+ args = inspect.getargspec(check)[0]

+ if args and args[0] in ('physical_line', 'logical_line'):

+ if codes is None:

+ codes = ERRORCODE_REGEX.findall(check.__doc__ or '')

+ _add_check(check, args[0], codes, args)

+ elif inspect.isclass(check):

+ if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']:

+ _add_check(check, 'tree', codes, None)

+def init_checks_registry():

+ """Register all globally visible functions.

+ The first argument name is either 'physical_line' or 'logical_line'.

"""

- checks = []

- for name, function in globals().items():

- if not inspect.isfunction(function):

- continue

- args = inspect.getargspec(function)[0]

- if args and args[0].startswith(argument_name):

- codes = ERRORCODE_REGEX.findall(inspect.getdoc(function) or '')

- for code in codes or ['']:

- if not code or not ignore_code(code):

- checks.append((name, function, args))

- break

- checks.sort()

- return checks

+ mod = inspect.getmodule(register_check)

+ for (name, function) in inspect.getmembers(mod, inspect.isfunction):

+ register_check(function)

+init_checks_registry()

class Checker(object):

- """

- Load a Python source file, tokenize it, check coding style.

- """

+ """Load a Python source file, tokenize it, check coding style."""

- def __init__(self, filename):

- if filename:

- self.filename = filename

- try:

- self.lines = open(filename).readlines()

- except UnicodeDecodeError:

- # Errors may occur with non-UTF8 files in Python 3000

- self.lines = open(filename, errors='replace').readlines()

+ def __init__(self, filename=None, lines=None,

+ options=None, report=None, **kwargs):

+ if options is None:

+ options = StyleGuide(kwargs).options

else:

+ assert not kwargs

+ self._io_error = None

+ self._physical_checks = options.physical_checks

+ self._logical_checks = options.logical_checks

+ self._ast_checks = options.ast_checks

+ self.max_line_length = options.max_line_length

+ self.multiline = False # in a multiline string?

+ self.hang_closing = options.hang_closing

+ self.verbose = options.verbose

+ self.filename = filename

+ if filename is None:

self.filename = 'stdin'

- self.lines = []

- options.counters['physical lines'] = \

- options.counters.get('physical lines', 0) + len(self.lines)

+ self.lines = lines or []

+ elif filename == '-':

+ self.filename = 'stdin'

+ self.lines = stdin_get_value().splitlines(True)

+ elif lines is None:

+ try:

+ self.lines = readlines(filename)

+ except IOError:

+ (exc_type, exc) = sys.exc_info()[:2]

+ self._io_error = '%s: %s' % (exc_type.__name__, exc)

+ self.lines = []

+ else:

+ self.lines = lines

+ if self.lines:

+ ord0 = ord(self.lines[0][0])

+ if ord0 in (0xef, 0xfeff): # Strip the UTF-8 BOM

+ if ord0 == 0xfeff:

+ self.lines[0] = self.lines[0][1:]

+ elif self.lines[0][:3] == '\xef\xbb\xbf':

+ self.lines[0] = self.lines[0][3:]

+ self.report = report or options.report

+ self.report_error = self.report.error

+ def report_invalid_syntax(self):

+ """Check if the syntax is valid."""

+ (exc_type, exc) = sys.exc_info()[:2]

+ if len(exc.args) > 1:

+ offset = exc.args[1]

+ if len(offset) > 2:

+ offset = offset[1:3]

+ else:

+ offset = (1, 0)

+ self.report_error(offset[0], offset[1] or 0,

+ 'E901 %s: %s' % (exc_type.__name__, exc.args[0]),

+ self.report_invalid_syntax)

def readline(self):

- """

- Get the next line from the input buffer.

- """

- self.line_number += 1

- if self.line_number > len(self.lines):

+ """Get the next line from the input buffer."""

+ if self.line_number >= self.total_lines:

return ''

- return self.lines[self.line_number - 1]

- def readline_check_physical(self):

- """

- Check and return the next physical line. This method can be

- used to feed tokenize.generate_tokens.

- """

- line = self.readline()

- if line:

- self.check_physical(line)

+ line = self.lines[self.line_number]

+ self.line_number += 1

+ if self.indent_char is None and line[:1] in WHITESPACE:

+ self.indent_char = line[0]

return line

def run_check(self, check, argument_names):

- """

- Run a check plugin.

- """

+ """Run a check plugin."""

arguments = []

for name in argument_names:

arguments.append(getattr(self, name))

return check(*arguments)

def check_physical(self, line):

- """

- Run all physical checks on a raw input line.

- """

+ """Run all physical checks on a raw input line."""

self.physical_line = line

- if self.indent_char is None and len(line) and line[0] in ' \t':

- self.indent_char = line[0]

- for name, check, argument_names in options.physical_checks:

+ for name, check, argument_names in self._physical_checks:

result = self.run_check(check, argument_names)

if result is not None:

- offset, text = result

+ (offset, text) = result

self.report_error(self.line_number, offset, text, check)

+ if text[:4] == 'E101':

+ self.indent_char = line[0]

def build_tokens_line(self):

- """

- Build a logical line from tokens.

- """

- self.mapping = []

+ """Build a logical line from tokens."""

logical = []

+ comments = []

length = 0

- previous = None

- for token in self.tokens:

- token_type, text = token[0:2]

- if token_type in (tokenize.COMMENT, tokenize.NL,

- tokenize.INDENT, tokenize.DEDENT,

- tokenize.NEWLINE):

+ prev_row = prev_col = mapping = None

+ for token_type, text, start, end, line in self.tokens:

+ if token_type in SKIP_TOKENS:

+ continue

+ if not mapping:

+ mapping = [(0, start)]

+ if token_type == tokenize.COMMENT:

+ comments.append(text)

continue

if token_type == tokenize.STRING:

text = mute_string(text)

- if previous:

- end_line, end = previous[3]

- start_line, start = token[2]

- if end_line != start_line: # different row

- if self.lines[end_line - 1][end - 1] not in '{[(':

- logical.append(' ')

- length += 1

- elif end != start: # different column

- fill = self.lines[end_line - 1][end:start]

- logical.append(fill)

- length += len(fill)

- self.mapping.append((length, token))

+ if prev_row:

+ (start_row, start_col) = start

+ if prev_row != start_row: # different row

+ prev_text = self.lines[prev_row - 1][prev_col - 1]

+ if prev_text == ',' or (prev_text not in '{[('

+ and text not in '}])'):

+ text = ' ' + text

+ elif prev_col != start_col: # different column

+ text = line[prev_col:start_col] + text

logical.append(text)

length += len(text)

- previous = token

+ mapping.append((length, end))

+ (prev_row, prev_col) = end

self.logical_line = ''.join(logical)

- assert self.logical_line.lstrip() == self.logical_line

- assert self.logical_line.rstrip() == self.logical_line

+ self.noqa = comments and noqa(''.join(comments))

+ return mapping

def check_logical(self):

- """

- Build a line from tokens and run all logical checks on it.

- """

- options.counters['logical lines'] = \

- options.counters.get('logical lines', 0) + 1

- self.build_tokens_line()

- first_line = self.lines[self.mapping[0][1][2][0] - 1]

- indent = first_line[:self.mapping[0][1][2][1]]

- self.previous_indent_level = self.indent_level

- self.indent_level = expand_indent(indent)

- if options.verbose >= 2:

+ """Build a line from tokens and run all logical checks on it."""

+ self.report.increment_logical_line()

+ mapping = self.build_tokens_line()

+ (start_row, start_col) = mapping[0][1]

+ start_line = self.lines[start_row - 1]

+ self.indent_level = expand_indent(start_line[:start_col])

+ if self.blank_before < self.blank_lines:

+ self.blank_before = self.blank_lines

+ if self.verbose >= 2:

print(self.logical_line[:80].rstrip())

- for name, check, argument_names in options.logical_checks:

- if options.verbose >= 3:

- print(' ', name)

- result = self.run_check(check, argument_names)

- if result is not None:

- offset, text = result

- if isinstance(offset, tuple):

- original_number, original_offset = offset

- else:

- for token_offset, token in self.mapping:

- if offset >= token_offset:

- original_number = token[2][0]

- original_offset = (token[2][1]

- + offset - token_offset)

- self.report_error(original_number, original_offset,

- text, check)

- self.previous_logical = self.logical_line

- def check_all(self):

- """

- Run all checks on the input file.

- """

- self.file_errors = 0

+ for name, check, argument_names in self._logical_checks:

+ if self.verbose >= 4:

+ print(' ' + name)

+ for offset, text in self.run_check(check, argument_names) or ():

+ if not isinstance(offset, tuple):

+ for token_offset, pos in mapping:

+ if offset <= token_offset:

+ break

+ offset = (pos[0], pos[1] + offset - token_offset)

+ self.report_error(offset[0], offset[1], text, check)

+ if self.logical_line:

+ self.previous_indent_level = self.indent_level

+ self.previous_logical = self.logical_line

+ self.blank_lines = 0

+ self.tokens = []

+ def check_ast(self):

+ """Build the file's AST and run all AST checks."""

+ try:

+ tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)

+ except (SyntaxError, TypeError):

+ return self.report_invalid_syntax()

+ for name, cls, __ in self._ast_checks:

+ checker = cls(tree, self.filename)

+ for lineno, offset, text, check in checker.run():

+ if not self.lines or not noqa(self.lines[lineno - 1]):

+ self.report_error(lineno, offset, text, check)

+ def generate_tokens(self):

+ """Tokenize the file, run physical line checks and yield tokens."""

+ if self._io_error:

+ self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)

+ tokengen = tokenize.generate_tokens(self.readline)

+ try:

+ for token in tokengen:

+ if token[2][0] > self.total_lines:

+ return

+ self.maybe_check_physical(token)

+ yield token

+ except (SyntaxError, tokenize.TokenError):

+ self.report_invalid_syntax()

+ def maybe_check_physical(self, token):

+ """If appropriate (based on token), check current physical line(s)."""

+ # Called after every token, but act only on end of line.

+ if _is_eol_token(token):

+ # Obviously, a newline token ends a single physical line.

+ self.check_physical(token[4])

+ elif token[0] == tokenize.STRING and '\n' in token[1]:

+ # Less obviously, a string that contains newlines is a

+ # multiline string, either triple-quoted or with internal

+ # newlines backslash-escaped. Check every physical line in the

+ # string *except* for the last one: its newline is outside of

+ # the multiline string, so we consider it a regular physical

+ # line, and will check it like any other physical line.

+ #

+ # Subtleties:

+ # - we don't *completely* ignore the last line; if it contains

+ # the magical "# noqa" comment, we disable all physical

+ # checks for the entire multiline string

+ # - have to wind self.line_number back because initially it

+ # points to the last line of the string, and we want

+ # check_physical() to give accurate feedback

+ if noqa(token[4]):

+ return

+ self.multiline = True

+ self.line_number = token[2][0]

+ for line in token[1].split('\n')[:-1]:

+ self.check_physical(line + '\n')

+ self.line_number += 1

+ self.multiline = False

+ def check_all(self, expected=None, line_offset=0):

+ """Run all checks on the input file."""

+ self.report.init_file(self.filename, self.lines, expected, line_offset)

+ self.total_lines = len(self.lines)

+ if self._ast_checks:

+ self.check_ast()

self.line_number = 0

self.indent_char = None

- self.indent_level = 0

+ self.indent_level = self.previous_indent_level = 0

self.previous_logical = ''

- self.blank_lines = 0

- self.blank_lines_before_comment = 0

self.tokens = []

+ self.blank_lines = self.blank_before = 0

parens = 0

- for token in tokenize.generate_tokens(self.readline_check_physical):

- # print(tokenize.tok_name[token[0]], repr(token))

+ for token in self.generate_tokens():

self.tokens.append(token)

token_type, text = token[0:2]

- if token_type == tokenize.OP and text in '([{':

- parens += 1

- if token_type == tokenize.OP and text in '}])':

- parens -= 1

- if token_type == tokenize.NEWLINE and not parens:

- self.check_logical()

- self.blank_lines = 0

- self.blank_lines_before_comment = 0

- self.tokens = []

- if token_type == tokenize.NL and not parens:

- if len(self.tokens) <= 1:

- # The physical line contains only this token.

- self.blank_lines += 1

- self.tokens = []

- if token_type == tokenize.COMMENT:

- source_line = token[4]

- token_start = token[2][1]

- if source_line[:token_start].strip() == '':

- self.blank_lines_before_comment = max(self.blank_lines,

- self.blank_lines_before_comment)

- self.blank_lines = 0

- if text.endswith('\n') and not parens:

- # The comment also ends a physical line. This works around

- # Python < 2.6 behaviour, which does not generate NL after

- # a comment which is on a line by itself.

- self.tokens = []

- return self.file_errors

+ if self.verbose >= 3:

+ if token[2][0] == token[3][0]:

+ pos = '[%s:%s]' % (token[2][1] or '', token[3][1])

+ else:

+ pos = 'l.%s' % token[3][0]

+ print('l.%s\t%s\t%s\t%r' %

+ (token[2][0], pos, tokenize.tok_name[token[0]], text))

+ if token_type == tokenize.OP:

+ if text in '([{':

+ parens += 1

+ elif text in '}])':

+ parens -= 1

+ elif not parens:

+ if token_type in NEWLINE:

+ if token_type == tokenize.NEWLINE:

+ self.check_logical()

+ self.blank_before = 0

+ elif len(self.tokens) == 1:

+ # The physical line contains only this token.

+ self.blank_lines += 1

+ del self.tokens[0]

+ else:

+ self.check_logical()

+ elif COMMENT_WITH_NL and token_type == tokenize.COMMENT:

+ if len(self.tokens) == 1:

+ # The comment also ends a physical line

+ token = list(token)

+ token[1] = text.rstrip('\r\n')

+ token[3] = (token[2][0], token[2][1] + len(token[1]))

+ self.tokens = [tuple(token)]

+ self.check_logical()

+ if self.tokens:

+ self.check_physical(self.lines[-1])

+ self.check_logical()

+ return self.report.get_file_results()

+class BaseReport(object):

+ """Collect the results of the checks."""

+ print_filename = False

+ def __init__(self, options):

+ self._benchmark_keys = options.benchmark_keys

+ self._ignore_code = options.ignore_code

+ # Results

+ self.elapsed = 0

+ self.total_errors = 0

+ self.counters = dict.fromkeys(self._benchmark_keys, 0)

+ self.messages = {}

+ def start(self):

+ """Start the timer."""

+ self._start_time = time.time()

+ def stop(self):

+ """Stop the timer."""

+ self.elapsed = time.time() - self._start_time

+ def init_file(self, filename, lines, expected, line_offset):

+ """Signal a new file."""

+ self.filename = filename

+ self.lines = lines

+ self.expected = expected or ()

+ self.line_offset = line_offset

+ self.file_errors = 0

+ self.counters['files'] += 1

+ self.counters['physical lines'] += len(lines)

- def report_error(self, line_number, offset, text, check):

- """

- Report an error, according to options.

- """

- if options.quiet == 1 and not self.file_errors:

- message(self.filename)

- self.file_errors += 1

+ def increment_logical_line(self):

+ """Signal a new logical line."""

+ self.counters['logical lines'] += 1

+ def error(self, line_number, offset, text, check):

+ """Report an error, according to options."""

code = text[:4]

- options.counters[code] = options.counters.get(code, 0) + 1

- options.messages[code] = text[5:]

- if options.quiet:

+ if self._ignore_code(code):

return

- if options.testsuite:

- basename = os.path.basename(self.filename)

- if basename[:4] != code:

- return # Don't care about other errors or warnings

- if 'not' not in basename:

- return # Don't print the expected error message

- if ignore_code(code):

+ if code in self.counters:

+ self.counters[code] += 1

+ else:

+ self.counters[code] = 1

+ self.messages[code] = text[5:]

+ # Don't care about expected errors or warnings

+ if code in self.expected:

return

- if options.counters[code] == 1 or options.repeat:

- message("%s:%s:%d: %s" %

- (self.filename, line_number, offset + 1, text))

- if options.show_source:

- line = self.lines[line_number - 1]

- message(line.rstrip())

- message(' ' * offset + '^')

- if options.show_pep8:

- message(check.__doc__.lstrip('\n').rstrip())

-def input_file(filename):

- """

- Run all checks on a Python source file.

- """

- if excluded(filename):

- return {}

- if options.verbose:

- message('checking ' + filename)

- files_counter_before = options.counters.get('files', 0)

- if options.testsuite: # Keep showing errors for multiple tests

- options.counters = {}

- options.counters['files'] = files_counter_before + 1

- errors = Checker(filename).check_all()

- if options.testsuite: # Check if the expected error was found

- basename = os.path.basename(filename)

- code = basename[:4]

- count = options.counters.get(code, 0)

- if count == 0 and 'not' not in basename:

- message("%s: error %s not found" % (filename, code))

-def input_dir(dirname):

- """

- Check all Python source files in this directory and all subdirectories.

- """

- dirname = dirname.rstrip('/')

- if excluded(dirname):

- return

- for root, dirs, files in os.walk(dirname):

- if options.verbose:

- message('directory ' + root)

- options.counters['directories'] = \

- options.counters.get('directories', 0) + 1

- dirs.sort()

- for subdir in dirs:

- if excluded(subdir):

- dirs.remove(subdir)

- files.sort()

- for filename in files:

- if filename_match(filename):

- input_file(os.path.join(root, filename))

-def excluded(filename):

- """

- Check if options.exclude contains a pattern that matches filename.

- """

- basename = os.path.basename(filename)

- for pattern in options.exclude:

- if fnmatch(basename, pattern):

- # print basename, 'excluded because it matches', pattern

- return True

-def filename_match(filename):

- """

- Check if options.filename contains a pattern that matches filename.

- If options.filename is unspecified, this always returns True.

- """

- if not options.filename:

- return True

- for pattern in options.filename:

- if fnmatch(filename, pattern):

- return True

-def ignore_code(code):

- """

- Check if options.ignore contains a prefix of the error code.

- If options.select contains a prefix of the error code, do not ignore it.

- """

- for select in options.select:

- if code.startswith(select):

- return False

- for ignore in options.ignore:

- if code.startswith(ignore):

- return True

-def get_error_statistics():

- """Get error statistics."""

- return get_statistics("E")

+ if self.print_filename and not self.file_errors:

+ print(self.filename)

+ self.file_errors += 1

+ self.total_errors += 1

+ return code

-def get_warning_statistics():

- """Get warning statistics."""

- return get_statistics("W")

+ def get_file_results(self):

+ """Return the count of errors and warnings for this file."""

+ return self.file_errors

+ def get_count(self, prefix=''):

+ """Return the total count of errors and warnings."""

+ return sum([self.counters[key]

+ for key in self.messages if key.startswith(prefix)])

-def get_statistics(prefix=''):

- """

- Get statistics for message codes that start with the prefix.

+ def get_statistics(self, prefix=''):

+ """Get statistics for message codes that start with the prefix.

- prefix='' matches all errors and warnings

- prefix='E' matches all errors

- prefix='W' matches all warnings

- prefix='E4' matches all errors that have to do with imports

- """

- stats = []

- keys = list(options.messages.keys())

- keys.sort()

- for key in keys:

- if key.startswith(prefix):

- stats.append('%-7s %s %s' %

- (options.counters[key], key, options.messages[key]))

- return stats

+ prefix='' matches all errors and warnings

+ prefix='E' matches all errors

+ prefix='W' matches all warnings

+ prefix='E4' matches all errors that have to do with imports

+ """

+ return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])

+ for key in sorted(self.messages) if key.startswith(prefix)]

+ def print_statistics(self, prefix=''):

+ """Print overall statistics (number of errors and warnings)."""

+ for line in self.get_statistics(prefix):

+ print(line)

+ def print_benchmark(self):

+ """Print benchmark numbers."""

+ print('%-7.2f %s' % (self.elapsed, 'seconds elapsed'))

+ if self.elapsed:

+ for key in self._benchmark_keys:

+ print('%-7d %s per second (%d total)' %

+ (self.counters[key] / self.elapsed, key,

+ self.counters[key]))

+class FileReport(BaseReport):

+ """Collect the results of the checks and print only the filenames."""

+ print_filename = True

+class StandardReport(BaseReport):

+ """Collect and print the results of the checks."""

+ def __init__(self, options):

+ super(StandardReport, self).__init__(options)

+ self._fmt = REPORT_FORMAT.get(options.format.lower(),

+ options.format)

+ self._repeat = options.repeat

+ self._show_source = options.show_source

+ self._show_pep8 = options.show_pep8

+ def init_file(self, filename, lines, expected, line_offset):

+ """Signal a new file."""

+ self._deferred_print = []

+ return super(StandardReport, self).init_file(

+ filename, lines, expected, line_offset)

+ def error(self, line_number, offset, text, check):

+ """Report an error, according to options."""

+ code = super(StandardReport, self).error(line_number, offset,

+ text, check)

+ if code and (self.counters[code] == 1 or self._repeat):

+ self._deferred_print.append(

+ (line_number, offset, code, text[5:], check.__doc__))

+ return code

+ def get_file_results(self):

+ """Print the result and return the overall count for this file."""

+ self._deferred_print.sort()

+ for line_number, offset, code, text, doc in self._deferred_print:

+ print(self._fmt % {

+ 'path': self.filename,

+ 'row': self.line_offset + line_number, 'col': offset + 1,

+ 'code': code, 'text': text,

+ })

+ if self._show_source:

+ if line_number > len(self.lines):

+ line = ''

+ else:

+ line = self.lines[line_number - 1]

+ print(line.rstrip())

+ print(re.sub(r'\S', ' ', line[:offset]) + '^')

+ if self._show_pep8 and doc:

+ print(' ' + doc.strip())

+ return self.file_errors

-def get_count(prefix=''):

- """Return the total count of errors and warnings."""

- keys = list(options.messages.keys())

- count = 0

- for key in keys:

- if key.startswith(prefix):

- count += options.counters[key]

- return count

+class DiffReport(StandardReport):

+ """Collect and print the results for the changed lines only."""

+ def __init__(self, options):

+ super(DiffReport, self).__init__(options)

+ self._selected = options.selected_lines

-def print_statistics(prefix=''):

- """Print overall statistics (number of errors and warnings)."""

- for line in get_statistics(prefix):

- print(line)

+ def error(self, line_number, offset, text, check):

+ if line_number not in self._selected[self.filename]:

+ return

+ return super(DiffReport, self).error(line_number, offset, text, check)

+class StyleGuide(object):

+ """Initialize a PEP-8 instance with few options."""

+ def __init__(self, *args, **kwargs):

+ # build options from the command line

+ self.checker_class = kwargs.pop('checker_class', Checker)

+ parse_argv = kwargs.pop('parse_argv', False)

+ config_file = kwargs.pop('config_file', None)

+ parser = kwargs.pop('parser', None)

+ # build options from dict

+ options_dict = dict(*args, **kwargs)

+ arglist = None if parse_argv else options_dict.get('paths', None)

+ options, self.paths = process_options(

+ arglist, parse_argv, config_file, parser)

+ if options_dict:

+ options.__dict__.update(options_dict)

+ if 'paths' in options_dict:

+ self.paths = options_dict['paths']

+ self.runner = self.input_file

+ self.options = options

+ if not options.reporter:

+ options.reporter = BaseReport if options.quiet else StandardReport

+ options.select = tuple(options.select or ())

+ if not (options.select or options.ignore or

+ options.testsuite or options.doctest) and DEFAULT_IGNORE:

+ # The default choice: ignore controversial checks

+ options.ignore = tuple(DEFAULT_IGNORE.split(','))

+ else:

+ # Ignore all checks which are not explicitly selected

+ options.ignore = ('',) if options.select else tuple(options.ignore)

+ options.benchmark_keys = BENCHMARK_KEYS[:]

+ options.ignore_code = self.ignore_code

+ options.physical_checks = self.get_checks('physical_line')

+ options.logical_checks = self.get_checks('logical_line')

+ options.ast_checks = self.get_checks('tree')

+ self.init_report()

+ def init_report(self, reporter=None):

+ """Initialize the report instance."""

+ self.options.report = (reporter or self.options.reporter)(self.options)

+ return self.options.report

+ def check_files(self, paths=None):

+ """Run all checks on the paths."""

+ if paths is None:

+ paths = self.paths

+ report = self.options.report

+ runner = self.runner

+ report.start()

+ try:

+ for path in paths:

+ if os.path.isdir(path):

+ self.input_dir(path)

+ elif not self.excluded(path):

+ runner(path)

+ except KeyboardInterrupt:

+ print('... stopped')

+ report.stop()

+ return report

+ def input_file(self, filename, lines=None, expected=None, line_offset=0):

+ """Run all checks on a Python source file."""

+ if self.options.verbose:

+ print('checking %s' % filename)

+ fchecker = self.checker_class(

+ filename, lines=lines, options=self.options)

+ return fchecker.check_all(expected=expected, line_offset=line_offset)

+ def input_dir(self, dirname):

+ """Check all files in this directory and all subdirectories."""

+ dirname = dirname.rstrip('/')

+ if self.excluded(dirname):

+ return 0

+ counters = self.options.report.counters

+ verbose = self.options.verbose

+ filepatterns = self.options.filename

+ runner = self.runner

+ for root, dirs, files in os.walk(dirname):

+ if verbose:

+ print('directory ' + root)

+ counters['directories'] += 1

+ for subdir in sorted(dirs):

+ if self.excluded(subdir, root):

+ dirs.remove(subdir)

+ for filename in sorted(files):

+ # contain a pattern that matches?

+ if ((filename_match(filename, filepatterns) and

+ not self.excluded(filename, root))):

+ runner(os.path.join(root, filename))

+ def excluded(self, filename, parent=None):

+ """Check if the file should be excluded.

+ Check if 'options.exclude' contains a pattern that matches filename.

+ """

+ if not self.options.exclude:

+ return False

+ basename = os.path.basename(filename)

+ if filename_match(basename, self.options.exclude):

+ return True

+ if parent:

+ filename = os.path.join(parent, filename)

+ filename = os.path.abspath(filename)

+ return filename_match(filename, self.options.exclude)

+ def ignore_code(self, code):

+ """Check if the error code should be ignored.

-def print_benchmark(elapsed):

- """

- Print benchmark numbers.

- """

- print('%-7.2f %s' % (elapsed, 'seconds elapsed'))

- keys = ['directories', 'files',

- 'logical lines', 'physical lines']

- for key in keys:

- if key in options.counters:

- print('%-7d %s per second (%d total)' % (

- options.counters[key] / elapsed, key,

- options.counters[key]))

+ If 'options.select' contains a prefix of the error code,

+ return False. Else, if 'options.ignore' contains a prefix of

+ the error code, return True.

+ """

+ if len(code) < 4 and any(s.startswith(code)

+ for s in self.options.select):

+ return False

+ return (code.startswith(self.options.ignore) and

+ not code.startswith(self.options.select))

+ def get_checks(self, argument_name):

+ """Get all the checks for this category.

-def selftest():

- """

- Test all check functions with test cases in docstrings.

- """

- count_passed = 0

- count_failed = 0

- checks = options.physical_checks + options.logical_checks

- for name, check, argument_names in checks:

- for line in check.__doc__.splitlines():

- line = line.lstrip()

- match = SELFTEST_REGEX.match(line)

- if match is None:

- continue

- code, source = match.groups()

- checker = Checker(None)

- for part in source.split(r'\n'):

- part = part.replace(r'\t', '\t')

- part = part.replace(r'\s', ' ')

- checker.lines.append(part + '\n')

- options.quiet = 2

- options.counters = {}

- checker.check_all()

- error = None

- if code == 'Okay':

- if len(options.counters) > 1:

- codes = [key for key in options.counters.keys()

- if key != 'logical lines']

- error = "incorrectly found %s" % ', '.join(codes)

- elif options.counters.get(code, 0) == 0:

- error = "failed to find %s" % code

- if not error:

- count_passed += 1

- else:

- count_failed += 1

- if len(checker.lines) == 1:

- print("pep8.py: %s: %s" %

- (error, checker.lines[0].rstrip()))

- else:

- print("pep8.py: %s:" % error)

- for line in checker.lines:

- print(line.rstrip())

- if options.verbose:

- print("%d passed and %d failed." % (count_passed, count_failed))

- if count_failed:

- print("Test failed.")

- else:

- print("Test passed.")

+ Find all globally visible functions where the first argument name

+ starts with argument_name and which contain selected tests.

+ """

+ checks = []

+ for check, attrs in _checks[argument_name].items():

+ (codes, args) = attrs

+ if any(not (code and self.ignore_code(code)) for code in codes):

+ checks.append((check.__name__, check, args))

+ return sorted(checks)

-def process_options(arglist=None):

- """

- Process options passed either via arglist or via command line args.

- """

- global options, args

- parser = OptionParser(version=__version__,

+def get_parser(prog='pep8', version=__version__):

+ parser = OptionParser(prog=prog, version=version,

usage="%prog [options] input ...")

+ parser.config_options = [

+ 'exclude', 'filename', 'select', 'ignore', 'max-line-length',

+ 'hang-closing', 'count', 'format', 'quiet', 'show-pep8',

+ 'show-source', 'statistics', 'verbose']

parser.add_option('-v', '--verbose', default=0, action='count',

help="print status messages, or debug with -vv")

parser.add_option('-q', '--quiet', default=0, action='count',

help="report only file names, or nothing with -qq")

- parser.add_option('-r', '--repeat', action='store_true',

- help="show all occurrences of the same error")

+ parser.add_option('-r', '--repeat', default=True, action='store_true',

+ help="(obsolete) show all occurrences of the same error")

+ parser.add_option('--first', action='store_false', dest='repeat',

+ help="show first occurrence of each error")

parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,

help="exclude files or directories which match these "

- "comma separated patterns (default: %s)" %

- DEFAULT_EXCLUDE)

+ "comma separated patterns (default: %default)")

parser.add_option('--filename', metavar='patterns', default='*.py',

help="when parsing directories, only check filenames "

- "matching these comma separated patterns (default: "

- "*.py)")

+ "matching these comma separated patterns "

+ "(default: %default)")

parser.add_option('--select', metavar='errors', default='',

help="select errors and warnings (e.g. E,W6)")

parser.add_option('--ignore', metavar='errors', default='',

@@ -1177,78 +1776,167 @@ def process_options(arglist=None):

parser.add_option('--show-source', action='store_true',

help="show source code for each error")

parser.add_option('--show-pep8', action='store_true',

- help="show text of PEP 8 for each error")

+ help="show text of PEP 8 for each error "

+ "(implies --first)")

parser.add_option('--statistics', action='store_true',

help="count errors and warnings")

parser.add_option('--count', action='store_true',

help="print total number of errors and warnings "

- "to standard error and set exit code to 1 if "

- "total is not null")

- parser.add_option('--benchmark', action='store_true',

- help="measure processing speed")

- parser.add_option('--testsuite', metavar='dir',

- help="run regression tests from dir")

- parser.add_option('--doctest', action='store_true',

- help="run doctest on myself")

- options, args = parser.parse_args(arglist)

- if options.testsuite:

+ "to standard error and set exit code to 1 if "

+ "total is not null")

+ parser.add_option('--max-line-length', type='int', metavar='n',

+ default=MAX_LINE_LENGTH,

+ help="set maximum allowed line length "

+ "(default: %default)")

+ parser.add_option('--hang-closing', action='store_true',

+ help="hang closing bracket instead of matching "

+ "indentation of opening bracket's line")

+ parser.add_option('--format', metavar='format', default='default',

+ help="set the error format [default|pylint|<custom>]")

+ parser.add_option('--diff', action='store_true',

+ help="report only lines changed according to the "

+ "unified diff received on STDIN")

+ group = parser.add_option_group("Testing Options")

+ if os.path.exists(TESTSUITE_PATH):

+ group.add_option('--testsuite', metavar='dir',

+ help="run regression tests from dir")

+ group.add_option('--doctest', action='store_true',

+ help="run doctest on myself")

+ group.add_option('--benchmark', action='store_true',

+ help="measure processing speed")

+ return parser

+def read_config(options, args, arglist, parser):

+ """Read both user configuration and local configuration."""

+ config = RawConfigParser()

+ user_conf = options.config

+ if user_conf and os.path.isfile(user_conf):

+ if options.verbose:

+ print('user configuration: %s' % user_conf)

+ config.read(user_conf)

+ local_dir = os.curdir

+ parent = tail = args and os.path.abspath(os.path.commonprefix(args))

+ while tail:

+ if config.read([os.path.join(parent, fn) for fn in PROJECT_CONFIG]):

+ local_dir = parent

+ if options.verbose:

+ print('local configuration: in %s' % parent)

+ break

+ (parent, tail) = os.path.split(parent)

+ pep8_section = parser.prog

+ if config.has_section(pep8_section):

+ option_list = dict([(o.dest, o.type or o.action)

+ for o in parser.option_list])

+ # First, read the default values

+ (new_options, __) = parser.parse_args([])

+ # Second, parse the configuration

+ for opt in config.options(pep8_section):

+ if opt.replace('_', '-') not in parser.config_options:

+ print(" unknown option '%s' ignored" % opt)

+ continue

+ if options.verbose > 1:

+ print(" %s = %s" % (opt, config.get(pep8_section, opt)))

+ normalized_opt = opt.replace('-', '_')

+ opt_type = option_list[normalized_opt]

+ if opt_type in ('int', 'count'):

+ value = config.getint(pep8_section, opt)

+ elif opt_type == 'string':

+ value = config.get(pep8_section, opt)

+ if normalized_opt == 'exclude':

+ value = normalize_paths(value, local_dir)

+ else:

+ assert opt_type in ('store_true', 'store_false')

+ value = config.getboolean(pep8_section, opt)

+ setattr(new_options, normalized_opt, value)

+ # Third, overwrite with the command-line options

+ (options, __) = parser.parse_args(arglist, values=new_options)

+ options.doctest = options.testsuite = False

+ return options

+def process_options(arglist=None, parse_argv=False, config_file=None,

+ parser=None):

+ """Process options passed either via arglist or via command line args."""

+ if not parser:

+ parser = get_parser()

+ if not parser.has_option('--config'):

+ if config_file is True:

+ config_file = DEFAULT_CONFIG

+ group = parser.add_option_group("Configuration", description=(

+ "The project options are read from the [%s] section of the "

+ "tox.ini file or the setup.cfg file located in any parent folder "

+ "of the path(s) being processed. Allowed options are: %s." %

+ (parser.prog, ', '.join(parser.config_options))))

+ group.add_option('--config', metavar='path', default=config_file,

+ help="user config file location (default: %default)")

+ # Don't read the command line if the module is used as a library.

+ if not arglist and not parse_argv:

+ arglist = []

+ # If parse_argv is True and arglist is None, arguments are

+ # parsed from the command line (sys.argv)

+ (options, args) = parser.parse_args(arglist)

+ options.reporter = None

+ if options.ensure_value('testsuite', False):

args.append(options.testsuite)

- if len(args) == 0 and not options.doctest:

- parser.error('input not specified')

- options.prog = os.path.basename(sys.argv[0])

- options.exclude = options.exclude.split(',')

- for index in range(len(options.exclude)):

- options.exclude[index] = options.exclude[index].rstrip('/')

- if options.filename:

- options.filename = options.filename.split(',')

- if options.select:

- options.select = options.select.split(',')

- else:

- options.select = []

- if options.ignore:

- options.ignore = options.ignore.split(',')

- elif options.select:

- # Ignore all checks which are not explicitly selected

- options.ignore = ['']

- elif options.testsuite or options.doctest:

- # For doctest and testsuite, all checks are required

- options.ignore = []

- else:

- # The default choice: ignore controversial checks

- options.ignore = DEFAULT_IGNORE

- options.physical_checks = find_checks('physical_line')

- options.logical_checks = find_checks('logical_line')

- options.counters = {}

- options.messages = {}

+ elif not options.ensure_value('doctest', False):

+ if parse_argv and not args:

+ if options.diff or any(os.path.exists(name)

+ for name in PROJECT_CONFIG):

+ args = ['.']

+ else:

+ parser.error('input not specified')

+ options = read_config(options, args, arglist, parser)

+ options.reporter = parse_argv and options.quiet == 1 and FileReport

+ options.filename = options.filename and options.filename.split(',')

+ options.exclude = normalize_paths(options.exclude)

+ options.select = options.select and options.select.split(',')

+ options.ignore = options.ignore and options.ignore.split(',')

+ if options.diff:

+ options.reporter = DiffReport

+ stdin = stdin_get_value()

+ options.selected_lines = parse_udiff(stdin, options.filename, args[0])

+ args = sorted(options.selected_lines)

return options, args

def _main():

- """

- Parse options and run checks on Python source.

- """

- options, args = process_options()

- if options.doctest:

- import doctest

- doctest.testmod(verbose=options.verbose)

- selftest()

- start_time = time.time()

- for path in args:

- if os.path.isdir(path):

- input_dir(path)

- else:

- input_file(path)

- elapsed = time.time() - start_time

+ """Parse options and run checks on Python source."""

+ import signal

+ # Handle "Broken pipe" gracefully

+ try:

+ signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1))

+ except AttributeError:

+ pass # not supported on Windows

+ pep8style = StyleGuide(parse_argv=True, config_file=True)

+ options = pep8style.options

+ if options.doctest or options.testsuite:

+ from testsuite.support import run_tests

+ report = run_tests(pep8style)

+ else:

+ report = pep8style.check_files()

if options.statistics:

- print_statistics()

+ report.print_statistics()

if options.benchmark:

- print_benchmark(elapsed)

- if options.count:

- count = get_count()

- if count:

- sys.stderr.write(str(count) + '\n')

- sys.exit(1)

+ report.print_benchmark()

+ if options.testsuite and not options.quiet:

+ report.print_results()

+ if report.total_errors:

+ if options.count:

+ sys.stderr.write(str(report.total_errors) + '\n')

+ sys.exit(1)

if __name__ == '__main__':

_main()

« Tools/Scripts/webkitpy/test/main.py ('K') | « Tools/Scripts/webkitpy/thirdparty/autopep8.py ('k') | no next file » | no next file with comments »