Index: Tools/Scripts/webkitpy/thirdparty/pep8.py |
diff --git a/Tools/Scripts/webkitpy/thirdparty/pep8.py b/Tools/Scripts/webkitpy/thirdparty/pep8.py |
index c31937039ad137c6d2166cedacc016babc522541..f605f189fab31af0edff9b3b076104bc3e61d91c 100755 |
--- a/Tools/Scripts/webkitpy/thirdparty/pep8.py |
+++ b/Tools/Scripts/webkitpy/thirdparty/pep8.py |
@@ -1,6 +1,7 @@ |
-#!/usr/bin/python |
+#!/usr/bin/env python |
# pep8.py - Check Python source code formatting, according to PEP 8 |
-# Copyright (C) 2006 Johann C. Rocholl <johann@rocholl.net> |
+# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net> |
+# Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com> |
# |
# Permission is hereby granted, free of charge, to any person |
# obtaining a copy of this software and associated documentation files |
@@ -22,9 +23,8 @@ |
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
# SOFTWARE. |
-""" |
-Check Python source code formatting, according to PEP 8: |
-http://www.python.org/dev/peps/pep-0008/ |
+r""" |
+Check Python source code formatting, according to PEP 8. |
For usage and a list of options, try this: |
$ python pep8.py -h |
@@ -42,87 +42,77 @@ W warnings |
500 line length |
600 deprecation |
700 statements |
- |
-You can add checks to this program by writing plugins. Each plugin is |
-a simple function that is called for each line of source code, either |
-physical or logical. |
- |
-Physical line: |
-- Raw line of text from the input file. |
- |
-Logical line: |
-- Multi-line statements converted to a single line. |
-- Stripped left and right. |
-- Contents of strings replaced with 'xxx' of same length. |
-- Comments removed. |
- |
-The check function requests physical or logical lines by the name of |
-the first argument: |
- |
-def maximum_line_length(physical_line) |
-def extraneous_whitespace(logical_line) |
-def blank_lines(logical_line, blank_lines, indent_level, line_number) |
- |
-The last example above demonstrates how check plugins can request |
-additional information with extra arguments. All attributes of the |
-Checker object are available. Some examples: |
- |
-lines: a list of the raw lines from the input file |
-tokens: the tokens that contribute to this logical line |
-line_number: line number in the input file |
-blank_lines: blank lines before this one |
-indent_char: first indentation character in this file (' ' or '\t') |
-indent_level: indentation (with tabs expanded to multiples of 8) |
-previous_indent_level: indentation on previous line |
-previous_logical: previous logical line |
- |
-The docstring of each check function shall be the relevant part of |
-text from PEP 8. It is printed if the user enables --show-pep8. |
-Several docstrings contain examples directly from the PEP 8 document. |
- |
-Okay: spam(ham[1], {eggs: 2}) |
-E201: spam( ham[1], {eggs: 2}) |
- |
-These examples are verified automatically when pep8.py is run with the |
---doctest option. You can add examples for your own check functions. |
-The format is simple: "Okay" or error/warning code followed by colon |
-and space, the rest of the line is example source code. If you put 'r' |
-before the docstring, you can use \n for newline, \t for tab and \s |
-for space. |
- |
+900 syntax error |
""" |
+from __future__ import with_statement |
-__version__ = '0.5.0' |
+__version__ = '1.5.7' |
import os |
import sys |
import re |
import time |
import inspect |
+import keyword |
import tokenize |
from optparse import OptionParser |
-from keyword import iskeyword |
from fnmatch import fnmatch |
- |
-DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git' |
-DEFAULT_IGNORE = ['E24'] |
+try: |
+ from configparser import RawConfigParser |
+ from io import TextIOWrapper |
+except ImportError: |
+ from ConfigParser import RawConfigParser |
+ |
+DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__' |
+DEFAULT_IGNORE = 'E123,E226,E24' |
+if sys.platform == 'win32': |
+ DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8') |
+else: |
+ DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or |
+ os.path.expanduser('~/.config'), 'pep8') |
+PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8') |
+TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite') |
+MAX_LINE_LENGTH = 79 |
+REPORT_FORMAT = { |
+ 'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s', |
+ 'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s', |
+} |
+ |
+PyCF_ONLY_AST = 1024 |
+SINGLETONS = frozenset(['False', 'None', 'True']) |
+KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS |
+UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-']) |
+ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-']) |
+WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%']) |
+WS_NEEDED_OPERATORS = frozenset([ |
+ '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>', |
+ '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '=']) |
+WHITESPACE = frozenset(' \t') |
+NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE]) |
+SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT]) |
+# ERRORTOKEN is triggered by backticks in Python 3 |
+SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN]) |
+BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines'] |
INDENT_REGEX = re.compile(r'([ \t]*)') |
-RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)') |
-SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)') |
-ERRORCODE_REGEX = re.compile(r'[EW]\d{3}') |
-E301NOT_REGEX = re.compile(r'class |def |u?r?["\']') |
- |
-WHITESPACE = ' \t' |
- |
-BINARY_OPERATORS = ['**=', '*=', '+=', '-=', '!=', '<>', |
- '%=', '^=', '&=', '|=', '==', '/=', '//=', '>=', '<=', '>>=', '<<=', |
- '%', '^', '&', '|', '=', '/', '//', '>', '<', '>>', '<<'] |
-UNARY_OPERATORS = ['**', '*', '+', '-'] |
-OPERATORS = BINARY_OPERATORS + UNARY_OPERATORS |
- |
-options = None |
-args = None |
+RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,') |
+RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$') |
+ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b') |
+DOCSTRING_REGEX = re.compile(r'u?r?["\']') |
+EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]') |
+WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)') |
+COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)') |
+COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^[({ ]+\s+(in|is)\s') |
+COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type' |
+ r'|\s*\(\s*([^)]*[^ )])\s*\))') |
+KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS)) |
+OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)') |
+LAMBDA_REGEX = re.compile(r'\blambda\b') |
+HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$') |
+ |
+# Work around Python < 2.6 behaviour, which does not generate NL after |
+# a comment which is on a line by itself. |
+COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n' |
############################################################################## |
@@ -131,8 +121,7 @@ args = None |
def tabs_or_spaces(physical_line, indent_char): |
- r""" |
- Never mix tabs and spaces. |
+ r"""Never mix tabs and spaces. |
The most popular way of indenting Python is with spaces only. The |
second-most popular way is with tabs only. Code indented with a mixture |
@@ -151,55 +140,55 @@ def tabs_or_spaces(physical_line, indent_char): |
def tabs_obsolete(physical_line): |
- r""" |
- For new projects, spaces-only are strongly recommended over tabs. Most |
- editors have features that make this easy to do. |
+ r"""For new projects, spaces-only are strongly recommended over tabs. |
Okay: if True:\n return |
W191: if True:\n\treturn |
""" |
indent = INDENT_REGEX.match(physical_line).group(1) |
- if indent.count('\t'): |
+ if '\t' in indent: |
return indent.index('\t'), "W191 indentation contains tabs" |
def trailing_whitespace(physical_line): |
- """ |
- JCR: Trailing whitespace is superfluous. |
+ r"""Trailing whitespace is superfluous. |
- Okay: spam(1) |
- W291: spam(1)\s |
+ The warning returned varies on whether the line itself is blank, for easier |
+ filtering for those who want to indent their blank lines. |
+ |
+ Okay: spam(1)\n# |
+ W291: spam(1) \n# |
+ W293: class Foo(object):\n \n bang = 12 |
""" |
physical_line = physical_line.rstrip('\n') # chr(10), newline |
physical_line = physical_line.rstrip('\r') # chr(13), carriage return |
physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L |
- stripped = physical_line.rstrip() |
+ stripped = physical_line.rstrip(' \t\v') |
if physical_line != stripped: |
- return len(stripped), "W291 trailing whitespace" |
+ if stripped: |
+ return len(stripped), "W291 trailing whitespace" |
+ else: |
+ return 0, "W293 blank line contains whitespace" |
-def trailing_blank_lines(physical_line, lines, line_number): |
- r""" |
- JCR: Trailing blank lines are superfluous. |
+def trailing_blank_lines(physical_line, lines, line_number, total_lines): |
+ r"""Trailing blank lines are superfluous. |
Okay: spam(1) |
W391: spam(1)\n |
- """ |
- if physical_line.strip() == '' and line_number == len(lines): |
- return 0, "W391 blank line at end of file" |
- |
-def missing_newline(physical_line): |
+ However the last line should end with a new line (warning W292). |
""" |
- JCR: The last line should have a newline. |
- """ |
- if physical_line.rstrip() == physical_line: |
- return len(physical_line), "W292 no newline at end of file" |
+ if line_number == total_lines: |
+ stripped_last_line = physical_line.rstrip() |
+ if not stripped_last_line: |
+ return 0, "W391 blank line at end of file" |
+ if stripped_last_line == physical_line: |
+ return len(physical_line), "W292 no newline at end of file" |
-def maximum_line_length(physical_line): |
- """ |
- Limit all lines to a maximum of 79 characters. |
+def maximum_line_length(physical_line, max_line_length, multiline): |
+ r"""Limit all lines to a maximum of 79 characters. |
There are still many devices around that are limited to 80 character |
lines; plus, limiting windows to 80 characters makes it possible to have |
@@ -207,10 +196,28 @@ def maximum_line_length(physical_line): |
ugly. Therefore, please limit all lines to a maximum of 79 characters. |
For flowing long blocks of text (docstrings or comments), limiting the |
length to 72 characters is recommended. |
- """ |
- length = len(physical_line.rstrip()) |
- if length > 79: |
- return 79, "E501 line too long (%d characters)" % length |
+ |
+ Reports error E501. |
+ """ |
+ line = physical_line.rstrip() |
+ length = len(line) |
+ if length > max_line_length and not noqa(line): |
+ # Special case for long URLs in multi-line docstrings or comments, |
+ # but still report the error when the 72 first chars are whitespaces. |
+ chunks = line.split() |
+ if ((len(chunks) == 1 and multiline) or |
+ (len(chunks) == 2 and chunks[0] == '#')) and \ |
+ len(line) - len(chunks[-1]) < max_line_length - 7: |
+ return |
+ if hasattr(line, 'decode'): # Python 2 |
+ # The line could contain multi-byte characters |
+ try: |
+ length = len(line.decode('utf-8')) |
+ except UnicodeError: |
+ pass |
+ if length > max_line_length: |
+ return (max_line_length, "E501 line too long " |
+ "(%d > %d characters)" % (length, max_line_length)) |
############################################################################## |
@@ -219,9 +226,8 @@ def maximum_line_length(physical_line): |
def blank_lines(logical_line, blank_lines, indent_level, line_number, |
- previous_logical, blank_lines_before_comment): |
- r""" |
- Separate top-level function and class definitions with two blank lines. |
+ blank_before, previous_logical, previous_indent_level): |
+ r"""Separate top-level function and class definitions with two blank lines. |
Method definitions inside a class are separated by a single blank line. |
@@ -240,30 +246,27 @@ def blank_lines(logical_line, blank_lines, indent_level, line_number, |
E303: def a():\n\n\n\n pass |
E304: @decorator\n\ndef a():\n pass |
""" |
- if line_number == 1: |
+ if line_number < 3 and not previous_logical: |
return # Don't expect blank lines before the first line |
- max_blank_lines = max(blank_lines, blank_lines_before_comment) |
if previous_logical.startswith('@'): |
- if max_blank_lines: |
- return 0, "E304 blank lines found after function decorator" |
- elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2): |
- return 0, "E303 too many blank lines (%d)" % max_blank_lines |
- elif (logical_line.startswith('def ') or |
- logical_line.startswith('class ') or |
- logical_line.startswith('@')): |
+ if blank_lines: |
+ yield 0, "E304 blank lines found after function decorator" |
+ elif blank_lines > 2 or (indent_level and blank_lines == 2): |
+ yield 0, "E303 too many blank lines (%d)" % blank_lines |
+ elif logical_line.startswith(('def ', 'class ', '@')): |
if indent_level: |
- if not (max_blank_lines or E301NOT_REGEX.match(previous_logical)): |
- return 0, "E301 expected 1 blank line, found 0" |
- elif max_blank_lines != 2: |
- return 0, "E302 expected 2 blank lines, found %d" % max_blank_lines |
+ if not (blank_before or previous_indent_level < indent_level or |
+ DOCSTRING_REGEX.match(previous_logical)): |
+ yield 0, "E301 expected 1 blank line, found 0" |
+ elif blank_before != 2: |
+ yield 0, "E302 expected 2 blank lines, found %d" % blank_before |
def extraneous_whitespace(logical_line): |
- """ |
- Avoid extraneous whitespace in the following situations: |
+ r"""Avoid extraneous whitespace. |
+ Avoid extraneous whitespace in these situations: |
- Immediately inside parentheses, brackets or braces. |
- |
- Immediately before a comma, semicolon, or colon. |
Okay: spam(ham[1], {eggs: 2}) |
@@ -279,23 +282,43 @@ def extraneous_whitespace(logical_line): |
E203: if x == 4 : print x, y; x, y = y, x |
""" |
line = logical_line |
- for char in '([{': |
- found = line.find(char + ' ') |
- if found > -1: |
- return found + 1, "E201 whitespace after '%s'" % char |
- for char in '}])': |
- found = line.find(' ' + char) |
- if found > -1 and line[found - 1] != ',': |
- return found, "E202 whitespace before '%s'" % char |
- for char in ',;:': |
- found = line.find(' ' + char) |
- if found > -1: |
- return found, "E203 whitespace before '%s'" % char |
+ for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line): |
+ text = match.group() |
+ char = text.strip() |
+ found = match.start() |
+ if text == char + ' ': |
+ # assert char in '([{' |
+ yield found + 1, "E201 whitespace after '%s'" % char |
+ elif line[found - 1] != ',': |
+ code = ('E202' if char in '}])' else 'E203') # if char in ',;:' |
+ yield found, "%s whitespace before '%s'" % (code, char) |
-def missing_whitespace(logical_line): |
+def whitespace_around_keywords(logical_line): |
+ r"""Avoid extraneous whitespace around keywords. |
+ |
+ Okay: True and False |
+ E271: True and False |
+ E272: True and False |
+ E273: True and\tFalse |
+ E274: True\tand False |
""" |
- JCR: Each comma, semicolon or colon should be followed by whitespace. |
+ for match in KEYWORD_REGEX.finditer(logical_line): |
+ before, after = match.groups() |
+ |
+ if '\t' in before: |
+ yield match.start(1), "E274 tab before keyword" |
+ elif len(before) > 1: |
+ yield match.start(1), "E272 multiple spaces before keyword" |
+ |
+ if '\t' in after: |
+ yield match.start(2), "E273 tab after keyword" |
+ elif len(after) > 1: |
+ yield match.start(2), "E271 multiple spaces after keyword" |
+ |
+ |
+def missing_whitespace(logical_line): |
+ r"""Each comma, semicolon or colon should be followed by whitespace. |
Okay: [a, b] |
Okay: (3,) |
@@ -305,23 +328,24 @@ def missing_whitespace(logical_line): |
Okay: a[1:4:2] |
E231: ['a','b'] |
E231: foo(bar,baz) |
+ E231: [{'a':'b'}] |
""" |
line = logical_line |
for index in range(len(line) - 1): |
char = line[index] |
if char in ',;:' and line[index + 1] not in WHITESPACE: |
before = line[:index] |
- if char == ':' and before.count('[') > before.count(']'): |
+ if char == ':' and before.count('[') > before.count(']') and \ |
+ before.rfind('{') < before.rfind('['): |
continue # Slice syntax, no space required |
if char == ',' and line[index + 1] == ')': |
continue # Allow tuple with only one element: (3,) |
- return index, "E231 missing whitespace after '%s'" % char |
+ yield index, "E231 missing whitespace after '%s'" % char |
def indentation(logical_line, previous_logical, indent_char, |
indent_level, previous_indent_level): |
- r""" |
- Use 4 spaces per indentation level. |
+ r"""Use 4 spaces per indentation level. |
For really old code that you don't want to mess up, you can continue to |
use 8-space tabs. |
@@ -337,23 +361,218 @@ def indentation(logical_line, previous_logical, indent_char, |
E113: a = 1\n b = 2 |
""" |
if indent_char == ' ' and indent_level % 4: |
- return 0, "E111 indentation is not a multiple of four" |
+ yield 0, "E111 indentation is not a multiple of four" |
indent_expect = previous_logical.endswith(':') |
if indent_expect and indent_level <= previous_indent_level: |
- return 0, "E112 expected an indented block" |
+ yield 0, "E112 expected an indented block" |
if indent_level > previous_indent_level and not indent_expect: |
- return 0, "E113 unexpected indentation" |
+ yield 0, "E113 unexpected indentation" |
+ |
+ |
+def continued_indentation(logical_line, tokens, indent_level, hang_closing, |
+ indent_char, noqa, verbose): |
+ r"""Continuation lines indentation. |
+ |
+ Continuation lines should align wrapped elements either vertically |
+ using Python's implicit line joining inside parentheses, brackets |
+ and braces, or using a hanging indent. |
+ |
+ When using a hanging indent these considerations should be applied: |
+ - there should be no arguments on the first line, and |
+ - further indentation should be used to clearly distinguish itself as a |
+ continuation line. |
+ |
+ Okay: a = (\n) |
+ E123: a = (\n ) |
+ |
+ Okay: a = (\n 42) |
+ E121: a = (\n 42) |
+ E122: a = (\n42) |
+ E123: a = (\n 42\n ) |
+ E124: a = (24,\n 42\n) |
+ E125: if (\n b):\n pass |
+ E126: a = (\n 42) |
+ E127: a = (24,\n 42) |
+ E128: a = (24,\n 42) |
+ E129: if (a or\n b):\n pass |
+ E131: a = (\n 42\n 24) |
+ """ |
+ first_row = tokens[0][2][0] |
+ nrows = 1 + tokens[-1][2][0] - first_row |
+ if noqa or nrows == 1: |
+ return |
+ # indent_next tells us whether the next block is indented; assuming |
+ # that it is indented by 4 spaces, then we should not allow 4-space |
+ # indents on the final continuation line; in turn, some other |
+ # indents are allowed to have an extra 4 spaces. |
+ indent_next = logical_line.endswith(':') |
+ |
+ row = depth = 0 |
+ valid_hangs = (4,) if indent_char != '\t' else (4, 8) |
+ # remember how many brackets were opened on each line |
+ parens = [0] * nrows |
+ # relative indents of physical lines |
+ rel_indent = [0] * nrows |
+ # for each depth, collect a list of opening rows |
+ open_rows = [[0]] |
+ # for each depth, memorize the hanging indentation |
+ hangs = [None] |
+ # visual indents |
+ indent_chances = {} |
+ last_indent = tokens[0][2] |
+ visual_indent = None |
+ # for each depth, memorize the visual indent column |
+ indent = [last_indent[1]] |
+ if verbose >= 3: |
+ print(">>> " + tokens[0][4].rstrip()) |
-def whitespace_before_parameters(logical_line, tokens): |
- """ |
- Avoid extraneous whitespace in the following situations: |
+ for token_type, text, start, end, line in tokens: |
+ |
+ newline = row < start[0] - first_row |
+ if newline: |
+ row = start[0] - first_row |
+ newline = not last_token_multiline and token_type not in NEWLINE |
+ |
+ if newline: |
+ # this is the beginning of a continuation line. |
+ last_indent = start |
+ if verbose >= 3: |
+ print("... " + line.rstrip()) |
- - Immediately before the open parenthesis that starts the argument |
- list of a function call. |
+ # record the initial indent. |
+ rel_indent[row] = expand_indent(line) - indent_level |
- - Immediately before the open parenthesis that starts an indexing or |
- slicing. |
+ # identify closing bracket |
+ close_bracket = (token_type == tokenize.OP and text in ']})') |
+ |
+ # is the indent relative to an opening bracket line? |
+ for open_row in reversed(open_rows[depth]): |
+ hang = rel_indent[row] - rel_indent[open_row] |
+ hanging_indent = hang in valid_hangs |
+ if hanging_indent: |
+ break |
+ if hangs[depth]: |
+ hanging_indent = (hang == hangs[depth]) |
+ # is there any chance of visual indent? |
+ visual_indent = (not close_bracket and hang > 0 and |
+ indent_chances.get(start[1])) |
+ |
+ if close_bracket and indent[depth]: |
+ # closing bracket for visual indent |
+ if start[1] != indent[depth]: |
+ yield (start, "E124 closing bracket does not match " |
+ "visual indentation") |
+ elif close_bracket and not hang: |
+ # closing bracket matches indentation of opening bracket's line |
+ if hang_closing: |
+ yield start, "E133 closing bracket is missing indentation" |
+ elif indent[depth] and start[1] < indent[depth]: |
+ if visual_indent is not True: |
+ # visual indent is broken |
+ yield (start, "E128 continuation line " |
+ "under-indented for visual indent") |
+ elif hanging_indent or (indent_next and rel_indent[row] == 8): |
+ # hanging indent is verified |
+ if close_bracket and not hang_closing: |
+ yield (start, "E123 closing bracket does not match " |
+ "indentation of opening bracket's line") |
+ hangs[depth] = hang |
+ elif visual_indent is True: |
+ # visual indent is verified |
+ indent[depth] = start[1] |
+ elif visual_indent in (text, str): |
+ # ignore token lined up with matching one from a previous line |
+ pass |
+ else: |
+ # indent is broken |
+ if hang <= 0: |
+ error = "E122", "missing indentation or outdented" |
+ elif indent[depth]: |
+ error = "E127", "over-indented for visual indent" |
+ elif not close_bracket and hangs[depth]: |
+ error = "E131", "unaligned for hanging indent" |
+ else: |
+ hangs[depth] = hang |
+ if hang > 4: |
+ error = "E126", "over-indented for hanging indent" |
+ else: |
+ error = "E121", "under-indented for hanging indent" |
+ yield start, "%s continuation line %s" % error |
+ |
+ # look for visual indenting |
+ if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT) |
+ and not indent[depth]): |
+ indent[depth] = start[1] |
+ indent_chances[start[1]] = True |
+ if verbose >= 4: |
+ print("bracket depth %s indent to %s" % (depth, start[1])) |
+ # deal with implicit string concatenation |
+ elif (token_type in (tokenize.STRING, tokenize.COMMENT) or |
+ text in ('u', 'ur', 'b', 'br')): |
+ indent_chances[start[1]] = str |
+ # special case for the "if" statement because len("if (") == 4 |
+ elif not indent_chances and not row and not depth and text == 'if': |
+ indent_chances[end[1] + 1] = True |
+ elif text == ':' and line[end[1]:].isspace(): |
+ open_rows[depth].append(row) |
+ |
+ # keep track of bracket depth |
+ if token_type == tokenize.OP: |
+ if text in '([{': |
+ depth += 1 |
+ indent.append(0) |
+ hangs.append(None) |
+ if len(open_rows) == depth: |
+ open_rows.append([]) |
+ open_rows[depth].append(row) |
+ parens[row] += 1 |
+ if verbose >= 4: |
+ print("bracket depth %s seen, col %s, visual min = %s" % |
+ (depth, start[1], indent[depth])) |
+ elif text in ')]}' and depth > 0: |
+ # parent indents should not be more than this one |
+ prev_indent = indent.pop() or last_indent[1] |
+ hangs.pop() |
+ for d in range(depth): |
+ if indent[d] > prev_indent: |
+ indent[d] = 0 |
+ for ind in list(indent_chances): |
+ if ind >= prev_indent: |
+ del indent_chances[ind] |
+ del open_rows[depth + 1:] |
+ depth -= 1 |
+ if depth: |
+ indent_chances[indent[depth]] = True |
+ for idx in range(row, -1, -1): |
+ if parens[idx]: |
+ parens[idx] -= 1 |
+ break |
+ assert len(indent) == depth + 1 |
+ if start[1] not in indent_chances: |
+ # allow to line up tokens |
+ indent_chances[start[1]] = text |
+ |
+ last_token_multiline = (start[0] != end[0]) |
+ if last_token_multiline: |
+ rel_indent[end[0] - first_row] = rel_indent[row] |
+ |
+ if indent_next and expand_indent(line) == indent_level + 4: |
+ pos = (start[0], indent[0] + 4) |
+ if visual_indent: |
+ code = "E129 visually indented line" |
+ else: |
+ code = "E125 continuation line" |
+ yield pos, "%s with same indent as next logical line" % code |
+ |
+ |
+def whitespace_before_parameters(logical_line, tokens): |
+ r"""Avoid extraneous whitespace. |
+ |
+ Avoid extraneous whitespace in the following situations: |
+ - before the open parenthesis that starts the argument list of a |
+ function call. |
+ - before the open parenthesis that starts an indexing or slicing. |
Okay: spam(1) |
E211: spam (1) |
@@ -362,29 +581,25 @@ def whitespace_before_parameters(logical_line, tokens): |
E211: dict ['key'] = list[index] |
E211: dict['key'] = list [index] |
""" |
- prev_type = tokens[0][0] |
- prev_text = tokens[0][1] |
- prev_end = tokens[0][3] |
+ prev_type, prev_text, __, prev_end, __ = tokens[0] |
for index in range(1, len(tokens)): |
- token_type, text, start, end, line = tokens[index] |
+ token_type, text, start, end, __ = tokens[index] |
if (token_type == tokenize.OP and |
text in '([' and |
start != prev_end and |
- prev_type == tokenize.NAME and |
+ (prev_type == tokenize.NAME or prev_text in '}])') and |
+ # Syntax "class A (B):" is allowed, but avoid it |
(index < 2 or tokens[index - 2][1] != 'class') and |
- (not iskeyword(prev_text))): |
- return prev_end, "E211 whitespace before '%s'" % text |
+ # Allow "return (a.foo for a in range(5))" |
+ not keyword.iskeyword(prev_text)): |
+ yield prev_end, "E211 whitespace before '%s'" % text |
prev_type = token_type |
prev_text = text |
prev_end = end |
def whitespace_around_operator(logical_line): |
- """ |
- Avoid extraneous whitespace in the following situations: |
- |
- - More than one space around an assignment (or other) operator to |
- align it with another. |
+ r"""Avoid extraneous whitespace around an operator. |
Okay: a = 12 + 3 |
E221: a = 4 + 5 |
@@ -392,30 +607,30 @@ def whitespace_around_operator(logical_line): |
E223: a = 4\t+ 5 |
E224: a = 4 +\t5 |
""" |
- line = logical_line |
- for operator in OPERATORS: |
- found = line.find(' ' + operator) |
- if found > -1: |
- return found, "E221 multiple spaces before operator" |
- found = line.find(operator + ' ') |
- if found > -1: |
- return found, "E222 multiple spaces after operator" |
- found = line.find('\t' + operator) |
- if found > -1: |
- return found, "E223 tab before operator" |
- found = line.find(operator + '\t') |
- if found > -1: |
- return found, "E224 tab after operator" |
+ for match in OPERATOR_REGEX.finditer(logical_line): |
+ before, after = match.groups() |
+ |
+ if '\t' in before: |
+ yield match.start(1), "E223 tab before operator" |
+ elif len(before) > 1: |
+ yield match.start(1), "E221 multiple spaces before operator" |
+ |
+ if '\t' in after: |
+ yield match.start(2), "E224 tab after operator" |
+ elif len(after) > 1: |
+ yield match.start(2), "E222 multiple spaces after operator" |
def missing_whitespace_around_operator(logical_line, tokens): |
- r""" |
+ r"""Surround operators with a single space on either side. |
+ |
- Always surround these binary operators with a single space on |
either side: assignment (=), augmented assignment (+=, -= etc.), |
- comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not), |
+ comparisons (==, <, >, !=, <=, >=, in, not in, is, is not), |
Booleans (and, or, not). |
- - Use spaces around arithmetic operators. |
+ - If operators with different priorities are used, consider adding |
+ whitespace around the operators with the lowest priorities. |
Okay: i = i + 1 |
Okay: submitted += 1 |
@@ -423,64 +638,84 @@ def missing_whitespace_around_operator(logical_line, tokens): |
Okay: hypot2 = x * x + y * y |
Okay: c = (a + b) * (a - b) |
Okay: foo(bar, key='word', *args, **kwargs) |
- Okay: baz(**kwargs) |
- Okay: negative = -1 |
- Okay: spam(-1) |
Okay: alpha[:-i] |
- Okay: if not -5 < x < +5:\n pass |
- Okay: lambda *args, **kw: (args, kw) |
E225: i=i+1 |
E225: submitted +=1 |
- E225: x = x*2 - 1 |
- E225: hypot2 = x*x + y*y |
- E225: c = (a+b) * (a-b) |
- E225: c = alpha -4 |
+ E225: x = x /2 - 1 |
E225: z = x **y |
+ E226: c = (a+b) * (a-b) |
+ E226: hypot2 = x*x + y*y |
+ E227: c = a|b |
+ E228: msg = fmt%(errno, errmsg) |
""" |
parens = 0 |
need_space = False |
prev_type = tokenize.OP |
prev_text = prev_end = None |
for token_type, text, start, end, line in tokens: |
- if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN): |
- # ERRORTOKEN is triggered by backticks in Python 3000 |
+ if token_type in SKIP_COMMENTS: |
continue |
if text in ('(', 'lambda'): |
parens += 1 |
elif text == ')': |
parens -= 1 |
if need_space: |
- if start == prev_end: |
- return prev_end, "E225 missing whitespace around operator" |
- need_space = False |
- elif token_type == tokenize.OP: |
+ if start != prev_end: |
+ # Found a (probably) needed space |
+ if need_space is not True and not need_space[1]: |
+ yield (need_space[0], |
+ "E225 missing whitespace around operator") |
+ need_space = False |
+ elif text == '>' and prev_text in ('<', '-'): |
+ # Tolerate the "<>" operator, even if running Python 3 |
+ # Deal with Python 3's annotated return value "->" |
+ pass |
+ else: |
+ if need_space is True or need_space[1]: |
+ # A needed trailing space was not found |
+ yield prev_end, "E225 missing whitespace around operator" |
+ else: |
+ code, optype = 'E226', 'arithmetic' |
+ if prev_text == '%': |
+ code, optype = 'E228', 'modulo' |
+ elif prev_text not in ARITHMETIC_OP: |
+ code, optype = 'E227', 'bitwise or shift' |
+ yield (need_space[0], "%s missing whitespace " |
+ "around %s operator" % (code, optype)) |
+ need_space = False |
+ elif token_type == tokenize.OP and prev_end is not None: |
if text == '=' and parens: |
# Allow keyword args or defaults: foo(bar=None). |
pass |
- elif text in BINARY_OPERATORS: |
+ elif text in WS_NEEDED_OPERATORS: |
need_space = True |
elif text in UNARY_OPERATORS: |
- if ((prev_type != tokenize.OP or prev_text in '}])') and not |
- (prev_type == tokenize.NAME and iskeyword(prev_text))): |
- # Allow unary operators: -123, -x, +1. |
- # Allow argument unpacking: foo(*args, **kwargs). |
- need_space = True |
- if need_space and start == prev_end: |
- return prev_end, "E225 missing whitespace around operator" |
+ # Check if the operator is being used as a binary operator |
+ # Allow unary operators: -123, -x, +1. |
+ # Allow argument unpacking: foo(*args, **kwargs). |
+ if (prev_text in '}])' if prev_type == tokenize.OP |
+ else prev_text not in KEYWORDS): |
+ need_space = None |
+ elif text in WS_OPTIONAL_OPERATORS: |
+ need_space = None |
+ |
+ if need_space is None: |
+ # Surrounding space is optional, but ensure that |
+ # trailing space matches opening space |
+ need_space = (prev_end, start != prev_end) |
+ elif need_space and start == prev_end: |
+ # A needed opening space was not found |
+ yield prev_end, "E225 missing whitespace around operator" |
+ need_space = False |
prev_type = token_type |
prev_text = text |
prev_end = end |
def whitespace_around_comma(logical_line): |
- """ |
- Avoid extraneous whitespace in the following situations: |
+ r"""Avoid extraneous whitespace after a comma or a colon. |
- - More than one space around an assignment (or other) operator to |
- align it with another. |
- |
- JCR: This should also be applied around comma etc. |
Note: these checks are disabled by default |
Okay: a = (1, 2) |
@@ -488,17 +723,17 @@ def whitespace_around_comma(logical_line): |
E242: a = (1,\t2) |
""" |
line = logical_line |
- for separator in ',;:': |
- found = line.find(separator + ' ') |
- if found > -1: |
- return found + 1, "E241 multiple spaces after '%s'" % separator |
- found = line.find(separator + '\t') |
- if found > -1: |
- return found + 1, "E242 tab after '%s'" % separator |
+ for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line): |
+ found = m.start() + 1 |
+ if '\t' in m.group(): |
+ yield found, "E242 tab after '%s'" % m.group()[0] |
+ else: |
+ yield found, "E241 multiple spaces after '%s'" % m.group()[0] |
-def whitespace_around_named_parameter_equals(logical_line): |
- """ |
+def whitespace_around_named_parameter_equals(logical_line, tokens): |
+ r"""Don't use spaces around the '=' sign in function arguments. |
+ |
Don't use spaces around the '=' sign when used to indicate a |
keyword argument or a default parameter value. |
@@ -513,60 +748,68 @@ def whitespace_around_named_parameter_equals(logical_line): |
E251: return magic(r = real, i = imag) |
""" |
parens = 0 |
- window = ' ' |
- equal_ok = ['==', '!=', '<=', '>='] |
- |
- for pos, c in enumerate(logical_line): |
- window = window[1:] + c |
- if parens: |
- if window[0] in WHITESPACE and window[1] == '=': |
- if window[1:] not in equal_ok: |
- issue = "E251 no spaces around keyword / parameter equals" |
- return pos, issue |
- if window[2] in WHITESPACE and window[1] == '=': |
- if window[:2] not in equal_ok: |
- issue = "E251 no spaces around keyword / parameter equals" |
- return pos, issue |
- if c == '(': |
- parens += 1 |
- elif c == ')': |
- parens -= 1 |
+ no_space = False |
+ prev_end = None |
+ message = "E251 unexpected spaces around keyword / parameter equals" |
+ for token_type, text, start, end, line in tokens: |
+ if token_type == tokenize.NL: |
+ continue |
+ if no_space: |
+ no_space = False |
+ if start != prev_end: |
+ yield (prev_end, message) |
+ elif token_type == tokenize.OP: |
+ if text == '(': |
+ parens += 1 |
+ elif text == ')': |
+ parens -= 1 |
+ elif parens and text == '=': |
+ no_space = True |
+ if start != prev_end: |
+ yield (prev_end, message) |
+ prev_end = end |
-def whitespace_before_inline_comment(logical_line, tokens): |
- """ |
- Separate inline comments by at least two spaces. |
+def whitespace_before_comment(logical_line, tokens): |
+ r"""Separate inline comments by at least two spaces. |
An inline comment is a comment on the same line as a statement. Inline |
comments should be separated by at least two spaces from the statement. |
They should start with a # and a single space. |
+ Each line of a block comment starts with a # and a single space |
+ (unless it is indented text inside the comment). |
+ |
Okay: x = x + 1 # Increment x |
Okay: x = x + 1 # Increment x |
+ Okay: # Block comment |
E261: x = x + 1 # Increment x |
E262: x = x + 1 #Increment x |
E262: x = x + 1 # Increment x |
+ E265: #Block comment |
""" |
prev_end = (0, 0) |
for token_type, text, start, end, line in tokens: |
- if token_type == tokenize.NL: |
- continue |
if token_type == tokenize.COMMENT: |
- if not line[:start[1]].strip(): |
- continue |
- if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: |
- return (prev_end, |
- "E261 at least two spaces before inline comment") |
- if (len(text) > 1 and text.startswith('# ') |
- or not text.startswith('# ')): |
- return start, "E262 inline comment should start with '# '" |
- else: |
+ inline_comment = line[:start[1]].strip() |
+ if inline_comment: |
+ if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: |
+ yield (prev_end, |
+ "E261 at least two spaces before inline comment") |
+ symbol, sp, comment = text.partition(' ') |
+ bad_prefix = symbol not in ('#', '#:') |
+ if inline_comment: |
+ if bad_prefix or comment[:1].isspace(): |
+ yield start, "E262 inline comment should start with '# '" |
+ elif bad_prefix: |
+ if text.rstrip('#') and (start[0] > 1 or symbol[1] != '!'): |
+ yield start, "E265 block comment should start with '# '" |
+ elif token_type != tokenize.NL: |
prev_end = end |
def imports_on_separate_lines(logical_line): |
- r""" |
- Imports should usually be on separate lines. |
+ r"""Imports should usually be on separate lines. |
Okay: import os\nimport sys |
E401: import sys, os |
@@ -580,18 +823,16 @@ def imports_on_separate_lines(logical_line): |
line = logical_line |
if line.startswith('import '): |
found = line.find(',') |
- if found > -1: |
- return found, "E401 multiple imports on one line" |
+ if -1 < found and ';' not in line[:found]: |
+ yield found, "E401 multiple imports on one line" |
def compound_statements(logical_line): |
- r""" |
- Compound statements (multiple statements on the same line) are |
- generally discouraged. |
+ r"""Compound statements (on the same line) are generally discouraged. |
While sometimes it's okay to put an if/for/while with a small body |
- on the same line, never do this for multi-clause statements. Also |
- avoid folding such long lines! |
+ on the same line, never do this for multi-clause statements. |
+ Also avoid folding such long lines! |
Okay: if foo == 'blah':\n do_blah_thing() |
Okay: do_one() |
@@ -608,67 +849,183 @@ def compound_statements(logical_line): |
E701: if foo == 'blah': one(); two(); three() |
E702: do_one(); do_two(); do_three() |
+ E703: do_four(); # useless semicolon |
""" |
line = logical_line |
+ last_char = len(line) - 1 |
found = line.find(':') |
- if -1 < found < len(line) - 1: |
+ while -1 < found < last_char: |
before = line[:found] |
if (before.count('{') <= before.count('}') and # {'a': 1} (dict) |
before.count('[') <= before.count(']') and # [1:2] (slice) |
- not re.search(r'\blambda\b', before)): # lambda x: x |
- return found, "E701 multiple statements on one line (colon)" |
+ before.count('(') <= before.count(')') and # (Python 3 annotation) |
+ not LAMBDA_REGEX.search(before)): # lambda x: x |
+ yield found, "E701 multiple statements on one line (colon)" |
+ found = line.find(':', found + 1) |
found = line.find(';') |
- if -1 < found: |
- return found, "E702 multiple statements on one line (semicolon)" |
+ while -1 < found: |
+ if found < last_char: |
+ yield found, "E702 multiple statements on one line (semicolon)" |
+ else: |
+ yield found, "E703 statement ends with a semicolon" |
+ found = line.find(';', found + 1) |
+ |
+ |
+def explicit_line_join(logical_line, tokens): |
+ r"""Avoid explicit line join between brackets. |
+ |
+ The preferred way of wrapping long lines is by using Python's implied line |
+ continuation inside parentheses, brackets and braces. Long lines can be |
+ broken over multiple lines by wrapping expressions in parentheses. These |
+ should be used in preference to using a backslash for line continuation. |
+ |
+ E502: aaa = [123, \\n 123] |
+ E502: aaa = ("bbb " \\n "ccc") |
+ |
+ Okay: aaa = [123,\n 123] |
+ Okay: aaa = ("bbb "\n "ccc") |
+ Okay: aaa = "bbb " \\n "ccc" |
+ """ |
+ prev_start = prev_end = parens = 0 |
+ for token_type, text, start, end, line in tokens: |
+ if start[0] != prev_start and parens and backslash: |
+ yield backslash, "E502 the backslash is redundant between brackets" |
+ if end[0] != prev_end: |
+ if line.rstrip('\r\n').endswith('\\'): |
+ backslash = (end[0], len(line.splitlines()[-1]) - 1) |
+ else: |
+ backslash = None |
+ prev_start = prev_end = end[0] |
+ else: |
+ prev_start = start[0] |
+ if token_type == tokenize.OP: |
+ if text in '([{': |
+ parens += 1 |
+ elif text in ')]}': |
+ parens -= 1 |
+ |
+ |
+def comparison_to_singleton(logical_line, noqa): |
+ r"""Comparison to singletons should use "is" or "is not". |
+ |
+ Comparisons to singletons like None should always be done |
+ with "is" or "is not", never the equality operators. |
+ |
+ Okay: if arg is not None: |
+ E711: if arg != None: |
+ E712: if arg == True: |
+ |
+ Also, beware of writing if x when you really mean if x is not None -- |
+ e.g. when testing whether a variable or argument that defaults to None was |
+ set to some other value. The other value might have a type (such as a |
+ container) that could be false in a boolean context! |
+ """ |
+ match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line) |
+ if match: |
+ same = (match.group(1) == '==') |
+ singleton = match.group(2) |
+ msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton) |
+ if singleton in ('None',): |
+ code = 'E711' |
+ else: |
+ code = 'E712' |
+ nonzero = ((singleton == 'True' and same) or |
+ (singleton == 'False' and not same)) |
+ msg += " or 'if %scond:'" % ('' if nonzero else 'not ') |
+ yield match.start(1), ("%s comparison to %s should be %s" % |
+ (code, singleton, msg)) |
+ |
+ |
+def comparison_negative(logical_line): |
+ r"""Negative comparison should be done using "not in" and "is not". |
+ |
+ Okay: if x not in y:\n pass |
+ Okay: assert (X in Y or X is Z) |
+ Okay: if not (X in Y):\n pass |
+ Okay: zz = x is not y |
+ E713: Z = not X in Y |
+ E713: if not X.B in Y:\n pass |
+ E714: if not X is Y:\n pass |
+ E714: Z = not X.B is Y |
+ """ |
+ match = COMPARE_NEGATIVE_REGEX.search(logical_line) |
+ if match: |
+ pos = match.start(1) |
+ if match.group(2) == 'in': |
+ yield pos, "E713 test for membership should be 'not in'" |
+ else: |
+ yield pos, "E714 test for object identity should be 'is not'" |
+ |
+ |
+def comparison_type(logical_line): |
+ r"""Object type comparisons should always use isinstance(). |
+ |
+ Do not compare types directly. |
+ Okay: if isinstance(obj, int): |
+ E721: if type(obj) is type(1): |
-def python_3000_has_key(logical_line): |
+ When checking if an object is a string, keep in mind that it might be a |
+ unicode string too! In Python 2.3, str and unicode have a common base |
+ class, basestring, so you can do: |
+ |
+ Okay: if isinstance(obj, basestring): |
+ Okay: if type(a1) is type(b1): |
""" |
- The {}.has_key() method will be removed in the future version of |
- Python. Use the 'in' operation instead, like: |
- d = {"a": 1, "b": 2} |
- if "b" in d: |
- print d["b"] |
+ match = COMPARE_TYPE_REGEX.search(logical_line) |
+ if match: |
+ inst = match.group(1) |
+ if inst and isidentifier(inst) and inst not in SINGLETONS: |
+ return # Allow comparison for types which are not obvious |
+ yield match.start(), "E721 do not compare types, use 'isinstance()'" |
+ |
+ |
+def python_3000_has_key(logical_line, noqa): |
+ r"""The {}.has_key() method is removed in Python 3: use the 'in' operator. |
+ |
+ Okay: if "alph" in d:\n print d["alph"] |
+ W601: assert d.has_key('alph') |
""" |
pos = logical_line.find('.has_key(') |
- if pos > -1: |
- return pos, "W601 .has_key() is deprecated, use 'in'" |
+ if pos > -1 and not noqa: |
+ yield pos, "W601 .has_key() is deprecated, use 'in'" |
def python_3000_raise_comma(logical_line): |
- """ |
- When raising an exception, use "raise ValueError('message')" |
- instead of the older form "raise ValueError, 'message'". |
+ r"""When raising an exception, use "raise ValueError('message')". |
+ |
+ The older form is removed in Python 3. |
- The paren-using form is preferred because when the exception arguments |
- are long or include string formatting, you don't need to use line |
- continuation characters thanks to the containing parentheses. The older |
- form will be removed in Python 3000. |
+ Okay: raise DummyError("Message") |
+ W602: raise DummyError, "Message" |
""" |
match = RAISE_COMMA_REGEX.match(logical_line) |
- if match: |
- return match.start(1), "W602 deprecated form of raising exception" |
+ if match and not RERAISE_COMMA_REGEX.match(logical_line): |
+ yield match.end() - 1, "W602 deprecated form of raising exception" |
def python_3000_not_equal(logical_line): |
- """ |
- != can also be written <>, but this is an obsolete usage kept for |
- backwards compatibility only. New code should always use !=. |
- The older syntax is removed in Python 3000. |
+ r"""New code should always use != instead of <>. |
+ |
+ The older syntax is removed in Python 3. |
+ |
+ Okay: if a != 'no': |
+ W603: if a <> 'no': |
""" |
pos = logical_line.find('<>') |
if pos > -1: |
- return pos, "W603 '<>' is deprecated, use '!='" |
+ yield pos, "W603 '<>' is deprecated, use '!='" |
def python_3000_backticks(logical_line): |
- """ |
- Backticks are removed in Python 3000. |
- Use repr() instead. |
+ r"""Backticks are removed in Python 3: use repr() instead. |
+ |
+ Okay: val = repr(1 + 2) |
+ W604: val = `1 + 2` |
""" |
pos = logical_line.find('`') |
if pos > -1: |
- return pos, "W604 backticks are deprecated, use 'repr()'" |
+ yield pos, "W604 backticks are deprecated, use 'repr()'" |
############################################################################## |
@@ -676,22 +1033,50 @@ def python_3000_backticks(logical_line): |
############################################################################## |
+if '' == ''.encode(): |
+ # Python 2: implicit encoding. |
+ def readlines(filename): |
+ """Read the source code.""" |
+ with open(filename, 'rU') as f: |
+ return f.readlines() |
+ isidentifier = re.compile(r'[a-zA-Z_]\w*').match |
+ stdin_get_value = sys.stdin.read |
+else: |
+ # Python 3 |
+ def readlines(filename): |
+ """Read the source code.""" |
+ try: |
+ with open(filename, 'rb') as f: |
+ (coding, lines) = tokenize.detect_encoding(f.readline) |
+ f = TextIOWrapper(f, coding, line_buffering=True) |
+ return [l.decode(coding) for l in lines] + f.readlines() |
+ except (LookupError, SyntaxError, UnicodeError): |
+ # Fall back if file encoding is improperly declared |
+ with open(filename, encoding='latin-1') as f: |
+ return f.readlines() |
+ isidentifier = str.isidentifier |
+ |
+ def stdin_get_value(): |
+ return TextIOWrapper(sys.stdin.buffer, errors='ignore').read() |
+noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search |
+ |
+ |
def expand_indent(line): |
- """ |
- Return the amount of indentation. |
+ r"""Return the amount of indentation. |
+ |
Tabs are expanded to the next multiple of 8. |
>>> expand_indent(' ') |
4 |
- >>> expand_indent('\\t') |
+ >>> expand_indent('\t') |
8 |
- >>> expand_indent(' \\t') |
+ >>> expand_indent(' \t') |
8 |
- >>> expand_indent(' \\t') |
- 8 |
- >>> expand_indent(' \\t') |
+ >>> expand_indent(' \t') |
16 |
""" |
+ if '\t' not in line: |
+ return len(line) - len(line.lstrip()) |
result = 0 |
for char in line: |
if char == '\t': |
@@ -704,8 +1089,7 @@ def expand_indent(line): |
def mute_string(text): |
- """ |
- Replace contents with 'xxx' to prevent syntax matching. |
+ """Replace contents with 'xxx' to prevent syntax matching. |
>>> mute_string('"abc"') |
'"xxx"' |
@@ -714,25 +1098,73 @@ def mute_string(text): |
>>> mute_string("r'abc'") |
"r'xxx'" |
""" |
- start = 1 |
- end = len(text) - 1 |
# String modifiers (e.g. u or r) |
- if text.endswith('"'): |
- start += text.index('"') |
- elif text.endswith("'"): |
- start += text.index("'") |
+ start = text.index(text[-1]) + 1 |
+ end = len(text) - 1 |
# Triple quotes |
- if text.endswith('"""') or text.endswith("'''"): |
+ if text[-3:] in ('"""', "'''"): |
start += 2 |
end -= 2 |
return text[:start] + 'x' * (end - start) + text[end:] |
-def message(text): |
- """Print a message.""" |
- # print >> sys.stderr, options.prog + ': ' + text |
- # print >> sys.stderr, text |
- print(text) |
+def parse_udiff(diff, patterns=None, parent='.'): |
+ """Return a dictionary of matching lines.""" |
+ # For each file of the diff, the entry key is the filename, |
+ # and the value is a set of row numbers to consider. |
+ rv = {} |
+ path = nrows = None |
+ for line in diff.splitlines(): |
+ if nrows: |
+ if line[:1] != '-': |
+ nrows -= 1 |
+ continue |
+ if line[:3] == '@@ ': |
+ hunk_match = HUNK_REGEX.match(line) |
+ (row, nrows) = [int(g or '1') for g in hunk_match.groups()] |
+ rv[path].update(range(row, row + nrows)) |
+ elif line[:3] == '+++': |
+ path = line[4:].split('\t', 1)[0] |
+ if path[:2] == 'b/': |
+ path = path[2:] |
+ rv[path] = set() |
+ return dict([(os.path.join(parent, path), rows) |
+ for (path, rows) in rv.items() |
+ if rows and filename_match(path, patterns)]) |
+ |
+ |
+def normalize_paths(value, parent=os.curdir): |
+ """Parse a comma-separated list of paths. |
+ |
+ Return a list of absolute paths. |
+ """ |
+ if not value or isinstance(value, list): |
+ return value |
+ paths = [] |
+ for path in value.split(','): |
+ if '/' in path: |
+ path = os.path.abspath(os.path.join(parent, path)) |
+ paths.append(path.rstrip('/')) |
+ return paths |
+ |
+ |
+def filename_match(filename, patterns, default=True): |
+ """Check if patterns contains a pattern that matches filename. |
+ |
+ If patterns is unspecified, this always returns True. |
+ """ |
+ if not patterns: |
+ return default |
+ return any(fnmatch(filename, pattern) for pattern in patterns) |
+ |
+ |
+if COMMENT_WITH_NL: |
+ def _is_eol_token(token): |
+ return (token[0] in NEWLINE or |
+ (token[0] == tokenize.COMMENT and token[1] == token[4])) |
+else: |
+ def _is_eol_token(token): |
+ return token[0] in NEWLINE |
############################################################################## |
@@ -740,436 +1172,603 @@ def message(text): |
############################################################################## |
-def find_checks(argument_name): |
- """ |
- Find all globally visible functions where the first argument name |
- starts with argument_name. |
+_checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}} |
+ |
+ |
+def register_check(check, codes=None): |
+ """Register a new check object.""" |
+ def _add_check(check, kind, codes, args): |
+ if check in _checks[kind]: |
+ _checks[kind][check][0].extend(codes or []) |
+ else: |
+ _checks[kind][check] = (codes or [''], args) |
+ if inspect.isfunction(check): |
+ args = inspect.getargspec(check)[0] |
+ if args and args[0] in ('physical_line', 'logical_line'): |
+ if codes is None: |
+ codes = ERRORCODE_REGEX.findall(check.__doc__ or '') |
+ _add_check(check, args[0], codes, args) |
+ elif inspect.isclass(check): |
+ if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']: |
+ _add_check(check, 'tree', codes, None) |
+ |
+ |
+def init_checks_registry(): |
+ """Register all globally visible functions. |
+ |
+ The first argument name is either 'physical_line' or 'logical_line'. |
""" |
- checks = [] |
- for name, function in globals().items(): |
- if not inspect.isfunction(function): |
- continue |
- args = inspect.getargspec(function)[0] |
- if args and args[0].startswith(argument_name): |
- codes = ERRORCODE_REGEX.findall(inspect.getdoc(function) or '') |
- for code in codes or ['']: |
- if not code or not ignore_code(code): |
- checks.append((name, function, args)) |
- break |
- checks.sort() |
- return checks |
+ mod = inspect.getmodule(register_check) |
+ for (name, function) in inspect.getmembers(mod, inspect.isfunction): |
+ register_check(function) |
+init_checks_registry() |
class Checker(object): |
- """ |
- Load a Python source file, tokenize it, check coding style. |
- """ |
+ """Load a Python source file, tokenize it, check coding style.""" |
- def __init__(self, filename): |
- if filename: |
- self.filename = filename |
- try: |
- self.lines = open(filename).readlines() |
- except UnicodeDecodeError: |
- # Errors may occur with non-UTF8 files in Python 3000 |
- self.lines = open(filename, errors='replace').readlines() |
+ def __init__(self, filename=None, lines=None, |
+ options=None, report=None, **kwargs): |
+ if options is None: |
+ options = StyleGuide(kwargs).options |
else: |
+ assert not kwargs |
+ self._io_error = None |
+ self._physical_checks = options.physical_checks |
+ self._logical_checks = options.logical_checks |
+ self._ast_checks = options.ast_checks |
+ self.max_line_length = options.max_line_length |
+ self.multiline = False # in a multiline string? |
+ self.hang_closing = options.hang_closing |
+ self.verbose = options.verbose |
+ self.filename = filename |
+ if filename is None: |
self.filename = 'stdin' |
- self.lines = [] |
- options.counters['physical lines'] = \ |
- options.counters.get('physical lines', 0) + len(self.lines) |
+ self.lines = lines or [] |
+ elif filename == '-': |
+ self.filename = 'stdin' |
+ self.lines = stdin_get_value().splitlines(True) |
+ elif lines is None: |
+ try: |
+ self.lines = readlines(filename) |
+ except IOError: |
+ (exc_type, exc) = sys.exc_info()[:2] |
+ self._io_error = '%s: %s' % (exc_type.__name__, exc) |
+ self.lines = [] |
+ else: |
+ self.lines = lines |
+ if self.lines: |
+ ord0 = ord(self.lines[0][0]) |
+ if ord0 in (0xef, 0xfeff): # Strip the UTF-8 BOM |
+ if ord0 == 0xfeff: |
+ self.lines[0] = self.lines[0][1:] |
+ elif self.lines[0][:3] == '\xef\xbb\xbf': |
+ self.lines[0] = self.lines[0][3:] |
+ self.report = report or options.report |
+ self.report_error = self.report.error |
+ |
+ def report_invalid_syntax(self): |
+ """Check if the syntax is valid.""" |
+ (exc_type, exc) = sys.exc_info()[:2] |
+ if len(exc.args) > 1: |
+ offset = exc.args[1] |
+ if len(offset) > 2: |
+ offset = offset[1:3] |
+ else: |
+ offset = (1, 0) |
+ self.report_error(offset[0], offset[1] or 0, |
+ 'E901 %s: %s' % (exc_type.__name__, exc.args[0]), |
+ self.report_invalid_syntax) |
def readline(self): |
- """ |
- Get the next line from the input buffer. |
- """ |
- self.line_number += 1 |
- if self.line_number > len(self.lines): |
+ """Get the next line from the input buffer.""" |
+ if self.line_number >= self.total_lines: |
return '' |
- return self.lines[self.line_number - 1] |
- |
- def readline_check_physical(self): |
- """ |
- Check and return the next physical line. This method can be |
- used to feed tokenize.generate_tokens. |
- """ |
- line = self.readline() |
- if line: |
- self.check_physical(line) |
+ line = self.lines[self.line_number] |
+ self.line_number += 1 |
+ if self.indent_char is None and line[:1] in WHITESPACE: |
+ self.indent_char = line[0] |
return line |
def run_check(self, check, argument_names): |
- """ |
- Run a check plugin. |
- """ |
+ """Run a check plugin.""" |
arguments = [] |
for name in argument_names: |
arguments.append(getattr(self, name)) |
return check(*arguments) |
def check_physical(self, line): |
- """ |
- Run all physical checks on a raw input line. |
- """ |
+ """Run all physical checks on a raw input line.""" |
self.physical_line = line |
- if self.indent_char is None and len(line) and line[0] in ' \t': |
- self.indent_char = line[0] |
- for name, check, argument_names in options.physical_checks: |
+ for name, check, argument_names in self._physical_checks: |
result = self.run_check(check, argument_names) |
if result is not None: |
- offset, text = result |
+ (offset, text) = result |
self.report_error(self.line_number, offset, text, check) |
+ if text[:4] == 'E101': |
+ self.indent_char = line[0] |
def build_tokens_line(self): |
- """ |
- Build a logical line from tokens. |
- """ |
- self.mapping = [] |
+ """Build a logical line from tokens.""" |
logical = [] |
+ comments = [] |
length = 0 |
- previous = None |
- for token in self.tokens: |
- token_type, text = token[0:2] |
- if token_type in (tokenize.COMMENT, tokenize.NL, |
- tokenize.INDENT, tokenize.DEDENT, |
- tokenize.NEWLINE): |
+ prev_row = prev_col = mapping = None |
+ for token_type, text, start, end, line in self.tokens: |
+ if token_type in SKIP_TOKENS: |
+ continue |
+ if not mapping: |
+ mapping = [(0, start)] |
+ if token_type == tokenize.COMMENT: |
+ comments.append(text) |
continue |
if token_type == tokenize.STRING: |
text = mute_string(text) |
- if previous: |
- end_line, end = previous[3] |
- start_line, start = token[2] |
- if end_line != start_line: # different row |
- if self.lines[end_line - 1][end - 1] not in '{[(': |
- logical.append(' ') |
- length += 1 |
- elif end != start: # different column |
- fill = self.lines[end_line - 1][end:start] |
- logical.append(fill) |
- length += len(fill) |
- self.mapping.append((length, token)) |
+ if prev_row: |
+ (start_row, start_col) = start |
+ if prev_row != start_row: # different row |
+ prev_text = self.lines[prev_row - 1][prev_col - 1] |
+ if prev_text == ',' or (prev_text not in '{[(' |
+ and text not in '}])'): |
+ text = ' ' + text |
+ elif prev_col != start_col: # different column |
+ text = line[prev_col:start_col] + text |
logical.append(text) |
length += len(text) |
- previous = token |
+ mapping.append((length, end)) |
+ (prev_row, prev_col) = end |
self.logical_line = ''.join(logical) |
- assert self.logical_line.lstrip() == self.logical_line |
- assert self.logical_line.rstrip() == self.logical_line |
+ self.noqa = comments and noqa(''.join(comments)) |
+ return mapping |
def check_logical(self): |
- """ |
- Build a line from tokens and run all logical checks on it. |
- """ |
- options.counters['logical lines'] = \ |
- options.counters.get('logical lines', 0) + 1 |
- self.build_tokens_line() |
- first_line = self.lines[self.mapping[0][1][2][0] - 1] |
- indent = first_line[:self.mapping[0][1][2][1]] |
- self.previous_indent_level = self.indent_level |
- self.indent_level = expand_indent(indent) |
- if options.verbose >= 2: |
+ """Build a line from tokens and run all logical checks on it.""" |
+ self.report.increment_logical_line() |
+ mapping = self.build_tokens_line() |
+ (start_row, start_col) = mapping[0][1] |
+ start_line = self.lines[start_row - 1] |
+ self.indent_level = expand_indent(start_line[:start_col]) |
+ if self.blank_before < self.blank_lines: |
+ self.blank_before = self.blank_lines |
+ if self.verbose >= 2: |
print(self.logical_line[:80].rstrip()) |
- for name, check, argument_names in options.logical_checks: |
- if options.verbose >= 3: |
- print(' ', name) |
- result = self.run_check(check, argument_names) |
- if result is not None: |
- offset, text = result |
- if isinstance(offset, tuple): |
- original_number, original_offset = offset |
- else: |
- for token_offset, token in self.mapping: |
- if offset >= token_offset: |
- original_number = token[2][0] |
- original_offset = (token[2][1] |
- + offset - token_offset) |
- self.report_error(original_number, original_offset, |
- text, check) |
- self.previous_logical = self.logical_line |
- |
- def check_all(self): |
- """ |
- Run all checks on the input file. |
- """ |
- self.file_errors = 0 |
+ for name, check, argument_names in self._logical_checks: |
+ if self.verbose >= 4: |
+ print(' ' + name) |
+ for offset, text in self.run_check(check, argument_names) or (): |
+ if not isinstance(offset, tuple): |
+ for token_offset, pos in mapping: |
+ if offset <= token_offset: |
+ break |
+ offset = (pos[0], pos[1] + offset - token_offset) |
+ self.report_error(offset[0], offset[1], text, check) |
+ if self.logical_line: |
+ self.previous_indent_level = self.indent_level |
+ self.previous_logical = self.logical_line |
+ self.blank_lines = 0 |
+ self.tokens = [] |
+ |
+ def check_ast(self): |
+ """Build the file's AST and run all AST checks.""" |
+ try: |
+ tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST) |
+ except (SyntaxError, TypeError): |
+ return self.report_invalid_syntax() |
+ for name, cls, __ in self._ast_checks: |
+ checker = cls(tree, self.filename) |
+ for lineno, offset, text, check in checker.run(): |
+ if not self.lines or not noqa(self.lines[lineno - 1]): |
+ self.report_error(lineno, offset, text, check) |
+ |
+ def generate_tokens(self): |
+ """Tokenize the file, run physical line checks and yield tokens.""" |
+ if self._io_error: |
+ self.report_error(1, 0, 'E902 %s' % self._io_error, readlines) |
+ tokengen = tokenize.generate_tokens(self.readline) |
+ try: |
+ for token in tokengen: |
+ if token[2][0] > self.total_lines: |
+ return |
+ self.maybe_check_physical(token) |
+ yield token |
+ except (SyntaxError, tokenize.TokenError): |
+ self.report_invalid_syntax() |
+ |
+ def maybe_check_physical(self, token): |
+ """If appropriate (based on token), check current physical line(s).""" |
+ # Called after every token, but act only on end of line. |
+ if _is_eol_token(token): |
+ # Obviously, a newline token ends a single physical line. |
+ self.check_physical(token[4]) |
+ elif token[0] == tokenize.STRING and '\n' in token[1]: |
+ # Less obviously, a string that contains newlines is a |
+ # multiline string, either triple-quoted or with internal |
+ # newlines backslash-escaped. Check every physical line in the |
+ # string *except* for the last one: its newline is outside of |
+ # the multiline string, so we consider it a regular physical |
+ # line, and will check it like any other physical line. |
+ # |
+ # Subtleties: |
+ # - we don't *completely* ignore the last line; if it contains |
+ # the magical "# noqa" comment, we disable all physical |
+ # checks for the entire multiline string |
+ # - have to wind self.line_number back because initially it |
+ # points to the last line of the string, and we want |
+ # check_physical() to give accurate feedback |
+ if noqa(token[4]): |
+ return |
+ self.multiline = True |
+ self.line_number = token[2][0] |
+ for line in token[1].split('\n')[:-1]: |
+ self.check_physical(line + '\n') |
+ self.line_number += 1 |
+ self.multiline = False |
+ |
+ def check_all(self, expected=None, line_offset=0): |
+ """Run all checks on the input file.""" |
+ self.report.init_file(self.filename, self.lines, expected, line_offset) |
+ self.total_lines = len(self.lines) |
+ if self._ast_checks: |
+ self.check_ast() |
self.line_number = 0 |
self.indent_char = None |
- self.indent_level = 0 |
+ self.indent_level = self.previous_indent_level = 0 |
self.previous_logical = '' |
- self.blank_lines = 0 |
- self.blank_lines_before_comment = 0 |
self.tokens = [] |
+ self.blank_lines = self.blank_before = 0 |
parens = 0 |
- for token in tokenize.generate_tokens(self.readline_check_physical): |
- # print(tokenize.tok_name[token[0]], repr(token)) |
+ for token in self.generate_tokens(): |
self.tokens.append(token) |
token_type, text = token[0:2] |
- if token_type == tokenize.OP and text in '([{': |
- parens += 1 |
- if token_type == tokenize.OP and text in '}])': |
- parens -= 1 |
- if token_type == tokenize.NEWLINE and not parens: |
- self.check_logical() |
- self.blank_lines = 0 |
- self.blank_lines_before_comment = 0 |
- self.tokens = [] |
- if token_type == tokenize.NL and not parens: |
- if len(self.tokens) <= 1: |
- # The physical line contains only this token. |
- self.blank_lines += 1 |
- self.tokens = [] |
- if token_type == tokenize.COMMENT: |
- source_line = token[4] |
- token_start = token[2][1] |
- if source_line[:token_start].strip() == '': |
- self.blank_lines_before_comment = max(self.blank_lines, |
- self.blank_lines_before_comment) |
- self.blank_lines = 0 |
- if text.endswith('\n') and not parens: |
- # The comment also ends a physical line. This works around |
- # Python < 2.6 behaviour, which does not generate NL after |
- # a comment which is on a line by itself. |
- self.tokens = [] |
- return self.file_errors |
+ if self.verbose >= 3: |
+ if token[2][0] == token[3][0]: |
+ pos = '[%s:%s]' % (token[2][1] or '', token[3][1]) |
+ else: |
+ pos = 'l.%s' % token[3][0] |
+ print('l.%s\t%s\t%s\t%r' % |
+ (token[2][0], pos, tokenize.tok_name[token[0]], text)) |
+ if token_type == tokenize.OP: |
+ if text in '([{': |
+ parens += 1 |
+ elif text in '}])': |
+ parens -= 1 |
+ elif not parens: |
+ if token_type in NEWLINE: |
+ if token_type == tokenize.NEWLINE: |
+ self.check_logical() |
+ self.blank_before = 0 |
+ elif len(self.tokens) == 1: |
+ # The physical line contains only this token. |
+ self.blank_lines += 1 |
+ del self.tokens[0] |
+ else: |
+ self.check_logical() |
+ elif COMMENT_WITH_NL and token_type == tokenize.COMMENT: |
+ if len(self.tokens) == 1: |
+ # The comment also ends a physical line |
+ token = list(token) |
+ token[1] = text.rstrip('\r\n') |
+ token[3] = (token[2][0], token[2][1] + len(token[1])) |
+ self.tokens = [tuple(token)] |
+ self.check_logical() |
+ if self.tokens: |
+ self.check_physical(self.lines[-1]) |
+ self.check_logical() |
+ return self.report.get_file_results() |
+ |
+ |
+class BaseReport(object): |
+ """Collect the results of the checks.""" |
+ |
+ print_filename = False |
+ |
+ def __init__(self, options): |
+ self._benchmark_keys = options.benchmark_keys |
+ self._ignore_code = options.ignore_code |
+ # Results |
+ self.elapsed = 0 |
+ self.total_errors = 0 |
+ self.counters = dict.fromkeys(self._benchmark_keys, 0) |
+ self.messages = {} |
+ |
+ def start(self): |
+ """Start the timer.""" |
+ self._start_time = time.time() |
+ |
+ def stop(self): |
+ """Stop the timer.""" |
+ self.elapsed = time.time() - self._start_time |
+ |
+ def init_file(self, filename, lines, expected, line_offset): |
+ """Signal a new file.""" |
+ self.filename = filename |
+ self.lines = lines |
+ self.expected = expected or () |
+ self.line_offset = line_offset |
+ self.file_errors = 0 |
+ self.counters['files'] += 1 |
+ self.counters['physical lines'] += len(lines) |
- def report_error(self, line_number, offset, text, check): |
- """ |
- Report an error, according to options. |
- """ |
- if options.quiet == 1 and not self.file_errors: |
- message(self.filename) |
- self.file_errors += 1 |
+ def increment_logical_line(self): |
+ """Signal a new logical line.""" |
+ self.counters['logical lines'] += 1 |
+ |
+ def error(self, line_number, offset, text, check): |
+ """Report an error, according to options.""" |
code = text[:4] |
- options.counters[code] = options.counters.get(code, 0) + 1 |
- options.messages[code] = text[5:] |
- if options.quiet: |
+ if self._ignore_code(code): |
return |
- if options.testsuite: |
- basename = os.path.basename(self.filename) |
- if basename[:4] != code: |
- return # Don't care about other errors or warnings |
- if 'not' not in basename: |
- return # Don't print the expected error message |
- if ignore_code(code): |
+ if code in self.counters: |
+ self.counters[code] += 1 |
+ else: |
+ self.counters[code] = 1 |
+ self.messages[code] = text[5:] |
+ # Don't care about expected errors or warnings |
+ if code in self.expected: |
return |
- if options.counters[code] == 1 or options.repeat: |
- message("%s:%s:%d: %s" % |
- (self.filename, line_number, offset + 1, text)) |
- if options.show_source: |
- line = self.lines[line_number - 1] |
- message(line.rstrip()) |
- message(' ' * offset + '^') |
- if options.show_pep8: |
- message(check.__doc__.lstrip('\n').rstrip()) |
- |
- |
-def input_file(filename): |
- """ |
- Run all checks on a Python source file. |
- """ |
- if excluded(filename): |
- return {} |
- if options.verbose: |
- message('checking ' + filename) |
- files_counter_before = options.counters.get('files', 0) |
- if options.testsuite: # Keep showing errors for multiple tests |
- options.counters = {} |
- options.counters['files'] = files_counter_before + 1 |
- errors = Checker(filename).check_all() |
- if options.testsuite: # Check if the expected error was found |
- basename = os.path.basename(filename) |
- code = basename[:4] |
- count = options.counters.get(code, 0) |
- if count == 0 and 'not' not in basename: |
- message("%s: error %s not found" % (filename, code)) |
- |
- |
-def input_dir(dirname): |
- """ |
- Check all Python source files in this directory and all subdirectories. |
- """ |
- dirname = dirname.rstrip('/') |
- if excluded(dirname): |
- return |
- for root, dirs, files in os.walk(dirname): |
- if options.verbose: |
- message('directory ' + root) |
- options.counters['directories'] = \ |
- options.counters.get('directories', 0) + 1 |
- dirs.sort() |
- for subdir in dirs: |
- if excluded(subdir): |
- dirs.remove(subdir) |
- files.sort() |
- for filename in files: |
- if filename_match(filename): |
- input_file(os.path.join(root, filename)) |
- |
- |
-def excluded(filename): |
- """ |
- Check if options.exclude contains a pattern that matches filename. |
- """ |
- basename = os.path.basename(filename) |
- for pattern in options.exclude: |
- if fnmatch(basename, pattern): |
- # print basename, 'excluded because it matches', pattern |
- return True |
- |
- |
-def filename_match(filename): |
- """ |
- Check if options.filename contains a pattern that matches filename. |
- If options.filename is unspecified, this always returns True. |
- """ |
- if not options.filename: |
- return True |
- for pattern in options.filename: |
- if fnmatch(filename, pattern): |
- return True |
- |
- |
-def ignore_code(code): |
- """ |
- Check if options.ignore contains a prefix of the error code. |
- If options.select contains a prefix of the error code, do not ignore it. |
- """ |
- for select in options.select: |
- if code.startswith(select): |
- return False |
- for ignore in options.ignore: |
- if code.startswith(ignore): |
- return True |
- |
- |
-def get_error_statistics(): |
- """Get error statistics.""" |
- return get_statistics("E") |
- |
+ if self.print_filename and not self.file_errors: |
+ print(self.filename) |
+ self.file_errors += 1 |
+ self.total_errors += 1 |
+ return code |
-def get_warning_statistics(): |
- """Get warning statistics.""" |
- return get_statistics("W") |
+ def get_file_results(self): |
+ """Return the count of errors and warnings for this file.""" |
+ return self.file_errors |
+ def get_count(self, prefix=''): |
+ """Return the total count of errors and warnings.""" |
+ return sum([self.counters[key] |
+ for key in self.messages if key.startswith(prefix)]) |
-def get_statistics(prefix=''): |
- """ |
- Get statistics for message codes that start with the prefix. |
+ def get_statistics(self, prefix=''): |
+ """Get statistics for message codes that start with the prefix. |
- prefix='' matches all errors and warnings |
- prefix='E' matches all errors |
- prefix='W' matches all warnings |
- prefix='E4' matches all errors that have to do with imports |
- """ |
- stats = [] |
- keys = list(options.messages.keys()) |
- keys.sort() |
- for key in keys: |
- if key.startswith(prefix): |
- stats.append('%-7s %s %s' % |
- (options.counters[key], key, options.messages[key])) |
- return stats |
+ prefix='' matches all errors and warnings |
+ prefix='E' matches all errors |
+ prefix='W' matches all warnings |
+ prefix='E4' matches all errors that have to do with imports |
+ """ |
+ return ['%-7s %s %s' % (self.counters[key], key, self.messages[key]) |
+ for key in sorted(self.messages) if key.startswith(prefix)] |
+ |
+ def print_statistics(self, prefix=''): |
+ """Print overall statistics (number of errors and warnings).""" |
+ for line in self.get_statistics(prefix): |
+ print(line) |
+ |
+ def print_benchmark(self): |
+ """Print benchmark numbers.""" |
+ print('%-7.2f %s' % (self.elapsed, 'seconds elapsed')) |
+ if self.elapsed: |
+ for key in self._benchmark_keys: |
+ print('%-7d %s per second (%d total)' % |
+ (self.counters[key] / self.elapsed, key, |
+ self.counters[key])) |
+ |
+ |
+class FileReport(BaseReport): |
+ """Collect the results of the checks and print only the filenames.""" |
+ print_filename = True |
+ |
+ |
+class StandardReport(BaseReport): |
+ """Collect and print the results of the checks.""" |
+ |
+ def __init__(self, options): |
+ super(StandardReport, self).__init__(options) |
+ self._fmt = REPORT_FORMAT.get(options.format.lower(), |
+ options.format) |
+ self._repeat = options.repeat |
+ self._show_source = options.show_source |
+ self._show_pep8 = options.show_pep8 |
+ |
+ def init_file(self, filename, lines, expected, line_offset): |
+ """Signal a new file.""" |
+ self._deferred_print = [] |
+ return super(StandardReport, self).init_file( |
+ filename, lines, expected, line_offset) |
+ |
+ def error(self, line_number, offset, text, check): |
+ """Report an error, according to options.""" |
+ code = super(StandardReport, self).error(line_number, offset, |
+ text, check) |
+ if code and (self.counters[code] == 1 or self._repeat): |
+ self._deferred_print.append( |
+ (line_number, offset, code, text[5:], check.__doc__)) |
+ return code |
+ |
+ def get_file_results(self): |
+ """Print the result and return the overall count for this file.""" |
+ self._deferred_print.sort() |
+ for line_number, offset, code, text, doc in self._deferred_print: |
+ print(self._fmt % { |
+ 'path': self.filename, |
+ 'row': self.line_offset + line_number, 'col': offset + 1, |
+ 'code': code, 'text': text, |
+ }) |
+ if self._show_source: |
+ if line_number > len(self.lines): |
+ line = '' |
+ else: |
+ line = self.lines[line_number - 1] |
+ print(line.rstrip()) |
+ print(re.sub(r'\S', ' ', line[:offset]) + '^') |
+ if self._show_pep8 and doc: |
+ print(' ' + doc.strip()) |
+ return self.file_errors |
-def get_count(prefix=''): |
- """Return the total count of errors and warnings.""" |
- keys = list(options.messages.keys()) |
- count = 0 |
- for key in keys: |
- if key.startswith(prefix): |
- count += options.counters[key] |
- return count |
+class DiffReport(StandardReport): |
+ """Collect and print the results for the changed lines only.""" |
+ def __init__(self, options): |
+ super(DiffReport, self).__init__(options) |
+ self._selected = options.selected_lines |
-def print_statistics(prefix=''): |
- """Print overall statistics (number of errors and warnings).""" |
- for line in get_statistics(prefix): |
- print(line) |
+ def error(self, line_number, offset, text, check): |
+ if line_number not in self._selected[self.filename]: |
+ return |
+ return super(DiffReport, self).error(line_number, offset, text, check) |
+ |
+ |
+class StyleGuide(object): |
+ """Initialize a PEP-8 instance with few options.""" |
+ |
+ def __init__(self, *args, **kwargs): |
+ # build options from the command line |
+ self.checker_class = kwargs.pop('checker_class', Checker) |
+ parse_argv = kwargs.pop('parse_argv', False) |
+ config_file = kwargs.pop('config_file', None) |
+ parser = kwargs.pop('parser', None) |
+ # build options from dict |
+ options_dict = dict(*args, **kwargs) |
+ arglist = None if parse_argv else options_dict.get('paths', None) |
+ options, self.paths = process_options( |
+ arglist, parse_argv, config_file, parser) |
+ if options_dict: |
+ options.__dict__.update(options_dict) |
+ if 'paths' in options_dict: |
+ self.paths = options_dict['paths'] |
+ |
+ self.runner = self.input_file |
+ self.options = options |
+ |
+ if not options.reporter: |
+ options.reporter = BaseReport if options.quiet else StandardReport |
+ |
+ options.select = tuple(options.select or ()) |
+ if not (options.select or options.ignore or |
+ options.testsuite or options.doctest) and DEFAULT_IGNORE: |
+ # The default choice: ignore controversial checks |
+ options.ignore = tuple(DEFAULT_IGNORE.split(',')) |
+ else: |
+ # Ignore all checks which are not explicitly selected |
+ options.ignore = ('',) if options.select else tuple(options.ignore) |
+ options.benchmark_keys = BENCHMARK_KEYS[:] |
+ options.ignore_code = self.ignore_code |
+ options.physical_checks = self.get_checks('physical_line') |
+ options.logical_checks = self.get_checks('logical_line') |
+ options.ast_checks = self.get_checks('tree') |
+ self.init_report() |
+ |
+ def init_report(self, reporter=None): |
+ """Initialize the report instance.""" |
+ self.options.report = (reporter or self.options.reporter)(self.options) |
+ return self.options.report |
+ |
+ def check_files(self, paths=None): |
+ """Run all checks on the paths.""" |
+ if paths is None: |
+ paths = self.paths |
+ report = self.options.report |
+ runner = self.runner |
+ report.start() |
+ try: |
+ for path in paths: |
+ if os.path.isdir(path): |
+ self.input_dir(path) |
+ elif not self.excluded(path): |
+ runner(path) |
+ except KeyboardInterrupt: |
+ print('... stopped') |
+ report.stop() |
+ return report |
+ |
+ def input_file(self, filename, lines=None, expected=None, line_offset=0): |
+ """Run all checks on a Python source file.""" |
+ if self.options.verbose: |
+ print('checking %s' % filename) |
+ fchecker = self.checker_class( |
+ filename, lines=lines, options=self.options) |
+ return fchecker.check_all(expected=expected, line_offset=line_offset) |
+ |
+ def input_dir(self, dirname): |
+ """Check all files in this directory and all subdirectories.""" |
+ dirname = dirname.rstrip('/') |
+ if self.excluded(dirname): |
+ return 0 |
+ counters = self.options.report.counters |
+ verbose = self.options.verbose |
+ filepatterns = self.options.filename |
+ runner = self.runner |
+ for root, dirs, files in os.walk(dirname): |
+ if verbose: |
+ print('directory ' + root) |
+ counters['directories'] += 1 |
+ for subdir in sorted(dirs): |
+ if self.excluded(subdir, root): |
+ dirs.remove(subdir) |
+ for filename in sorted(files): |
+ # contain a pattern that matches? |
+ if ((filename_match(filename, filepatterns) and |
+ not self.excluded(filename, root))): |
+ runner(os.path.join(root, filename)) |
+ |
+ def excluded(self, filename, parent=None): |
+ """Check if the file should be excluded. |
+ |
+ Check if 'options.exclude' contains a pattern that matches filename. |
+ """ |
+ if not self.options.exclude: |
+ return False |
+ basename = os.path.basename(filename) |
+ if filename_match(basename, self.options.exclude): |
+ return True |
+ if parent: |
+ filename = os.path.join(parent, filename) |
+ filename = os.path.abspath(filename) |
+ return filename_match(filename, self.options.exclude) |
+ def ignore_code(self, code): |
+ """Check if the error code should be ignored. |
-def print_benchmark(elapsed): |
- """ |
- Print benchmark numbers. |
- """ |
- print('%-7.2f %s' % (elapsed, 'seconds elapsed')) |
- keys = ['directories', 'files', |
- 'logical lines', 'physical lines'] |
- for key in keys: |
- if key in options.counters: |
- print('%-7d %s per second (%d total)' % ( |
- options.counters[key] / elapsed, key, |
- options.counters[key])) |
+ If 'options.select' contains a prefix of the error code, |
+ return False. Else, if 'options.ignore' contains a prefix of |
+ the error code, return True. |
+ """ |
+ if len(code) < 4 and any(s.startswith(code) |
+ for s in self.options.select): |
+ return False |
+ return (code.startswith(self.options.ignore) and |
+ not code.startswith(self.options.select)) |
+ def get_checks(self, argument_name): |
+ """Get all the checks for this category. |
-def selftest(): |
- """ |
- Test all check functions with test cases in docstrings. |
- """ |
- count_passed = 0 |
- count_failed = 0 |
- checks = options.physical_checks + options.logical_checks |
- for name, check, argument_names in checks: |
- for line in check.__doc__.splitlines(): |
- line = line.lstrip() |
- match = SELFTEST_REGEX.match(line) |
- if match is None: |
- continue |
- code, source = match.groups() |
- checker = Checker(None) |
- for part in source.split(r'\n'): |
- part = part.replace(r'\t', '\t') |
- part = part.replace(r'\s', ' ') |
- checker.lines.append(part + '\n') |
- options.quiet = 2 |
- options.counters = {} |
- checker.check_all() |
- error = None |
- if code == 'Okay': |
- if len(options.counters) > 1: |
- codes = [key for key in options.counters.keys() |
- if key != 'logical lines'] |
- error = "incorrectly found %s" % ', '.join(codes) |
- elif options.counters.get(code, 0) == 0: |
- error = "failed to find %s" % code |
- if not error: |
- count_passed += 1 |
- else: |
- count_failed += 1 |
- if len(checker.lines) == 1: |
- print("pep8.py: %s: %s" % |
- (error, checker.lines[0].rstrip())) |
- else: |
- print("pep8.py: %s:" % error) |
- for line in checker.lines: |
- print(line.rstrip()) |
- if options.verbose: |
- print("%d passed and %d failed." % (count_passed, count_failed)) |
- if count_failed: |
- print("Test failed.") |
- else: |
- print("Test passed.") |
+ Find all globally visible functions where the first argument name |
+ starts with argument_name and which contain selected tests. |
+ """ |
+ checks = [] |
+ for check, attrs in _checks[argument_name].items(): |
+ (codes, args) = attrs |
+ if any(not (code and self.ignore_code(code)) for code in codes): |
+ checks.append((check.__name__, check, args)) |
+ return sorted(checks) |
-def process_options(arglist=None): |
- """ |
- Process options passed either via arglist or via command line args. |
- """ |
- global options, args |
- parser = OptionParser(version=__version__, |
+def get_parser(prog='pep8', version=__version__): |
+ parser = OptionParser(prog=prog, version=version, |
usage="%prog [options] input ...") |
+ parser.config_options = [ |
+ 'exclude', 'filename', 'select', 'ignore', 'max-line-length', |
+ 'hang-closing', 'count', 'format', 'quiet', 'show-pep8', |
+ 'show-source', 'statistics', 'verbose'] |
parser.add_option('-v', '--verbose', default=0, action='count', |
help="print status messages, or debug with -vv") |
parser.add_option('-q', '--quiet', default=0, action='count', |
help="report only file names, or nothing with -qq") |
- parser.add_option('-r', '--repeat', action='store_true', |
- help="show all occurrences of the same error") |
+ parser.add_option('-r', '--repeat', default=True, action='store_true', |
+ help="(obsolete) show all occurrences of the same error") |
+ parser.add_option('--first', action='store_false', dest='repeat', |
+ help="show first occurrence of each error") |
parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE, |
help="exclude files or directories which match these " |
- "comma separated patterns (default: %s)" % |
- DEFAULT_EXCLUDE) |
+ "comma separated patterns (default: %default)") |
parser.add_option('--filename', metavar='patterns', default='*.py', |
help="when parsing directories, only check filenames " |
- "matching these comma separated patterns (default: " |
- "*.py)") |
+ "matching these comma separated patterns " |
+ "(default: %default)") |
parser.add_option('--select', metavar='errors', default='', |
help="select errors and warnings (e.g. E,W6)") |
parser.add_option('--ignore', metavar='errors', default='', |
@@ -1177,78 +1776,167 @@ def process_options(arglist=None): |
parser.add_option('--show-source', action='store_true', |
help="show source code for each error") |
parser.add_option('--show-pep8', action='store_true', |
- help="show text of PEP 8 for each error") |
+ help="show text of PEP 8 for each error " |
+ "(implies --first)") |
parser.add_option('--statistics', action='store_true', |
help="count errors and warnings") |
parser.add_option('--count', action='store_true', |
help="print total number of errors and warnings " |
- "to standard error and set exit code to 1 if " |
- "total is not null") |
- parser.add_option('--benchmark', action='store_true', |
- help="measure processing speed") |
- parser.add_option('--testsuite', metavar='dir', |
- help="run regression tests from dir") |
- parser.add_option('--doctest', action='store_true', |
- help="run doctest on myself") |
- options, args = parser.parse_args(arglist) |
- if options.testsuite: |
+ "to standard error and set exit code to 1 if " |
+ "total is not null") |
+ parser.add_option('--max-line-length', type='int', metavar='n', |
+ default=MAX_LINE_LENGTH, |
+ help="set maximum allowed line length " |
+ "(default: %default)") |
+ parser.add_option('--hang-closing', action='store_true', |
+ help="hang closing bracket instead of matching " |
+ "indentation of opening bracket's line") |
+ parser.add_option('--format', metavar='format', default='default', |
+ help="set the error format [default|pylint|<custom>]") |
+ parser.add_option('--diff', action='store_true', |
+ help="report only lines changed according to the " |
+ "unified diff received on STDIN") |
+ group = parser.add_option_group("Testing Options") |
+ if os.path.exists(TESTSUITE_PATH): |
+ group.add_option('--testsuite', metavar='dir', |
+ help="run regression tests from dir") |
+ group.add_option('--doctest', action='store_true', |
+ help="run doctest on myself") |
+ group.add_option('--benchmark', action='store_true', |
+ help="measure processing speed") |
+ return parser |
+ |
+ |
+def read_config(options, args, arglist, parser): |
+ """Read both user configuration and local configuration.""" |
+ config = RawConfigParser() |
+ |
+ user_conf = options.config |
+ if user_conf and os.path.isfile(user_conf): |
+ if options.verbose: |
+ print('user configuration: %s' % user_conf) |
+ config.read(user_conf) |
+ |
+ local_dir = os.curdir |
+ parent = tail = args and os.path.abspath(os.path.commonprefix(args)) |
+ while tail: |
+ if config.read([os.path.join(parent, fn) for fn in PROJECT_CONFIG]): |
+ local_dir = parent |
+ if options.verbose: |
+ print('local configuration: in %s' % parent) |
+ break |
+ (parent, tail) = os.path.split(parent) |
+ |
+ pep8_section = parser.prog |
+ if config.has_section(pep8_section): |
+ option_list = dict([(o.dest, o.type or o.action) |
+ for o in parser.option_list]) |
+ |
+ # First, read the default values |
+ (new_options, __) = parser.parse_args([]) |
+ |
+ # Second, parse the configuration |
+ for opt in config.options(pep8_section): |
+ if opt.replace('_', '-') not in parser.config_options: |
+ print(" unknown option '%s' ignored" % opt) |
+ continue |
+ if options.verbose > 1: |
+ print(" %s = %s" % (opt, config.get(pep8_section, opt))) |
+ normalized_opt = opt.replace('-', '_') |
+ opt_type = option_list[normalized_opt] |
+ if opt_type in ('int', 'count'): |
+ value = config.getint(pep8_section, opt) |
+ elif opt_type == 'string': |
+ value = config.get(pep8_section, opt) |
+ if normalized_opt == 'exclude': |
+ value = normalize_paths(value, local_dir) |
+ else: |
+ assert opt_type in ('store_true', 'store_false') |
+ value = config.getboolean(pep8_section, opt) |
+ setattr(new_options, normalized_opt, value) |
+ |
+ # Third, overwrite with the command-line options |
+ (options, __) = parser.parse_args(arglist, values=new_options) |
+ options.doctest = options.testsuite = False |
+ return options |
+ |
+ |
+def process_options(arglist=None, parse_argv=False, config_file=None, |
+ parser=None): |
+ """Process options passed either via arglist or via command line args.""" |
+ if not parser: |
+ parser = get_parser() |
+ if not parser.has_option('--config'): |
+ if config_file is True: |
+ config_file = DEFAULT_CONFIG |
+ group = parser.add_option_group("Configuration", description=( |
+ "The project options are read from the [%s] section of the " |
+ "tox.ini file or the setup.cfg file located in any parent folder " |
+ "of the path(s) being processed. Allowed options are: %s." % |
+ (parser.prog, ', '.join(parser.config_options)))) |
+ group.add_option('--config', metavar='path', default=config_file, |
+ help="user config file location (default: %default)") |
+ # Don't read the command line if the module is used as a library. |
+ if not arglist and not parse_argv: |
+ arglist = [] |
+ # If parse_argv is True and arglist is None, arguments are |
+ # parsed from the command line (sys.argv) |
+ (options, args) = parser.parse_args(arglist) |
+ options.reporter = None |
+ |
+ if options.ensure_value('testsuite', False): |
args.append(options.testsuite) |
- if len(args) == 0 and not options.doctest: |
- parser.error('input not specified') |
- options.prog = os.path.basename(sys.argv[0]) |
- options.exclude = options.exclude.split(',') |
- for index in range(len(options.exclude)): |
- options.exclude[index] = options.exclude[index].rstrip('/') |
- if options.filename: |
- options.filename = options.filename.split(',') |
- if options.select: |
- options.select = options.select.split(',') |
- else: |
- options.select = [] |
- if options.ignore: |
- options.ignore = options.ignore.split(',') |
- elif options.select: |
- # Ignore all checks which are not explicitly selected |
- options.ignore = [''] |
- elif options.testsuite or options.doctest: |
- # For doctest and testsuite, all checks are required |
- options.ignore = [] |
- else: |
- # The default choice: ignore controversial checks |
- options.ignore = DEFAULT_IGNORE |
- options.physical_checks = find_checks('physical_line') |
- options.logical_checks = find_checks('logical_line') |
- options.counters = {} |
- options.messages = {} |
+ elif not options.ensure_value('doctest', False): |
+ if parse_argv and not args: |
+ if options.diff or any(os.path.exists(name) |
+ for name in PROJECT_CONFIG): |
+ args = ['.'] |
+ else: |
+ parser.error('input not specified') |
+ options = read_config(options, args, arglist, parser) |
+ options.reporter = parse_argv and options.quiet == 1 and FileReport |
+ |
+ options.filename = options.filename and options.filename.split(',') |
+ options.exclude = normalize_paths(options.exclude) |
+ options.select = options.select and options.select.split(',') |
+ options.ignore = options.ignore and options.ignore.split(',') |
+ |
+ if options.diff: |
+ options.reporter = DiffReport |
+ stdin = stdin_get_value() |
+ options.selected_lines = parse_udiff(stdin, options.filename, args[0]) |
+ args = sorted(options.selected_lines) |
+ |
return options, args |
def _main(): |
- """ |
- Parse options and run checks on Python source. |
- """ |
- options, args = process_options() |
- if options.doctest: |
- import doctest |
- doctest.testmod(verbose=options.verbose) |
- selftest() |
- start_time = time.time() |
- for path in args: |
- if os.path.isdir(path): |
- input_dir(path) |
- else: |
- input_file(path) |
- elapsed = time.time() - start_time |
+ """Parse options and run checks on Python source.""" |
+ import signal |
+ |
+ # Handle "Broken pipe" gracefully |
+ try: |
+ signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1)) |
+ except AttributeError: |
+ pass # not supported on Windows |
+ |
+ pep8style = StyleGuide(parse_argv=True, config_file=True) |
+ options = pep8style.options |
+ if options.doctest or options.testsuite: |
+ from testsuite.support import run_tests |
+ report = run_tests(pep8style) |
+ else: |
+ report = pep8style.check_files() |
if options.statistics: |
- print_statistics() |
+ report.print_statistics() |
if options.benchmark: |
- print_benchmark(elapsed) |
- if options.count: |
- count = get_count() |
- if count: |
- sys.stderr.write(str(count) + '\n') |
- sys.exit(1) |
- |
+ report.print_benchmark() |
+ if options.testsuite and not options.quiet: |
+ report.print_results() |
+ if report.total_errors: |
+ if options.count: |
+ sys.stderr.write(str(report.total_errors) + '\n') |
+ sys.exit(1) |
if __name__ == '__main__': |
_main() |