third_party/pylint/checkers/strings.py - Issue 739393004: Revert "Revert "pylint: upgrade to 1.3.1""

Unified Diff: third_party/pylint/checkers/strings.py

Issue 739393004: Revert "Revert "pylint: upgrade to 1.3.1"" (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools/

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/pylint/checkers/strings.py

===================================================================

--- third_party/pylint/checkers/strings.py (revision 0)

+++ third_party/pylint/checkers/strings.py (working copy)

@@ -0,0 +1,619 @@

+# http://www.logilab.fr/ -- mailto:contact@logilab.fr

+# This program is free software; you can redistribute it and/or modify it under

+# the terms of the GNU General Public License as published by the Free Software

+# Foundation; either version 2 of the License, or (at your option) any later

+# version.

+# This program is distributed in the hope that it will be useful, but WITHOUT

+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details

+# You should have received a copy of the GNU General Public License along with

+# this program; if not, write to the Free Software Foundation, Inc.,

+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

+"""Checker for string formatting operations.

+"""

+import sys

+import tokenize

+import string

+try:

+ import numbers

+except ImportError:

+ numbers = None

+import astroid

+from pylint.interfaces import ITokenChecker, IAstroidChecker, IRawChecker

+from pylint.checkers import BaseChecker, BaseTokenChecker

+from pylint.checkers import utils

+from pylint.checkers.utils import check_messages

+_PY3K = sys.version_info[:2] >= (3, 0)

+_PY27 = sys.version_info[:2] == (2, 7)

+MSGS = {

+ 'E1300': ("Unsupported format character %r (%#02x) at index %d",

+ "bad-format-character",

+ "Used when a unsupported format character is used in a format\

+ string."),

+ 'E1301': ("Format string ends in middle of conversion specifier",

+ "truncated-format-string",

+ "Used when a format string terminates before the end of a \

+ conversion specifier."),

+ 'E1302': ("Mixing named and unnamed conversion specifiers in format string",

+ "mixed-format-string",

+ "Used when a format string contains both named (e.g. '%(foo)d') \

+ and unnamed (e.g. '%d') conversion specifiers. This is also \

+ used when a named conversion specifier contains * for the \

+ minimum field width and/or precision."),

+ 'E1303': ("Expected mapping for format string, not %s",

+ "format-needs-mapping",

+ "Used when a format string that uses named conversion specifiers \

+ is used with an argument that is not a mapping."),

+ 'W1300': ("Format string dictionary key should be a string, not %s",

+ "bad-format-string-key",

+ "Used when a format string that uses named conversion specifiers \

+ is used with a dictionary whose keys are not all strings."),

+ 'W1301': ("Unused key %r in format string dictionary",

+ "unused-format-string-key",

+ "Used when a format string that uses named conversion specifiers \

+ is used with a dictionary that conWtains keys not required by the \

+ format string."),

+ 'E1304': ("Missing key %r in format string dictionary",

+ "missing-format-string-key",

+ "Used when a format string that uses named conversion specifiers \

+ is used with a dictionary that doesn't contain all the keys \

+ required by the format string."),

+ 'E1305': ("Too many arguments for format string",

+ "too-many-format-args",

+ "Used when a format string that uses unnamed conversion \

+ specifiers is given too many arguments."),

+ 'E1306': ("Not enough arguments for format string",

+ "too-few-format-args",

+ "Used when a format string that uses unnamed conversion \

+ specifiers is given too few arguments"),

+ 'W1302': ("Invalid format string",

+ "bad-format-string",

+ "Used when a PEP 3101 format string is invalid.",

+ {'minversion': (2, 7)}),

+ 'W1303': ("Missing keyword argument %r for format string",

+ "missing-format-argument-key",

+ "Used when a PEP 3101 format string that uses named fields "

+ "doesn't receive one or more required keywords.",

+ {'minversion': (2, 7)}),

+ 'W1304': ("Unused format argument %r",

+ "unused-format-string-argument",

+ "Used when a PEP 3101 format string that uses named "

+ "fields is used with an argument that "

+ "is not required by the format string.",

+ {'minversion': (2, 7)}),

+ 'W1305': ("Format string contains both automatic field numbering "

+ "and manual field specification",

+ "format-combined-specification",

+ "Usen when a PEP 3101 format string contains both automatic "

+ "field numbering (e.g. '{}') and manual field "

+ "specification (e.g. '{0}').",

+ {'minversion': (2, 7)}),

+ 'W1306': ("Missing format attribute %r in format specifier %r",

+ "missing-format-attribute",

+ "Used when a PEP 3101 format string uses an "

+ "attribute specifier ({0.length}), but the argument "

+ "passed for formatting doesn't have that attribute.",

+ {'minversion': (2, 7)}),

+ 'W1307': ("Using invalid lookup key %r in format specifier %r",

+ "invalid-format-index",

+ "Used when a PEP 3101 format string uses a lookup specifier "

+ "({a[1]}), but the argument passed for formatting "

+ "doesn't contain or doesn't have that key as an attribute.",

+ {'minversion': (2, 7)})

+ }

+OTHER_NODES = (astroid.Const, astroid.List, astroid.Backquote,

+ astroid.Lambda, astroid.Function,

+ astroid.ListComp, astroid.SetComp, astroid.GenExpr)

+if _PY3K:

+ import _string

+ def split_format_field_names(format_string):

+ return _string.formatter_field_name_split(format_string)

+else:

+ def _field_iterator_convertor(iterator):

+ for is_attr, key in iterator:

+ if isinstance(key, numbers.Number):

+ yield is_attr, int(key)

+ else:

+ yield is_attr, key

+ def split_format_field_names(format_string):

+ keyname, fielditerator = format_string._formatter_field_name_split()

+ # it will return longs, instead of ints, which will complicate

+ # the output

+ return keyname, _field_iterator_convertor(fielditerator)

+def collect_string_fields(format_string):

+ """ Given a format string, return an iterator

+ of all the valid format fields. It handles nested fields

+ as well.

+ """

+ formatter = string.Formatter()

+ parseiterator = formatter.parse(format_string)

+ try:

+ for result in parseiterator:

+ if all(item is None for item in result[1:]):

+ # not a replacement format

+ continue

+ name = result[1]

+ nested = result[2]

+ yield name

+ if nested:

+ for field in collect_string_fields(nested):

+ yield field

+ except ValueError:

+ # probably the format string is invalid

+ # should we check the argument of the ValueError?

+ raise utils.IncompleteFormatString(format_string)

+def parse_format_method_string(format_string):

+ """

+ Parses a PEP 3101 format string, returning a tuple of

+ (keys, num_args, manual_pos_arg),

+ where keys is the set of mapping keys in the format string, num_args

+ is the number of arguments required by the format string and

+ manual_pos_arg is the number of arguments passed with the position.

+ """

+ keys = []

+ num_args = 0

+ manual_pos_arg = set()

+ for name in collect_string_fields(format_string):

+ if name and str(name).isdigit():

+ manual_pos_arg.add(str(name))

+ elif name:

+ keyname, fielditerator = split_format_field_names(name)

+ if isinstance(keyname, numbers.Number):

+ # In Python 2 it will return long which will lead

+ # to different output between 2 and 3

+ keyname = int(keyname)

+ keys.append((keyname, list(fielditerator)))

+ else:

+ num_args += 1

+ return keys, num_args, len(manual_pos_arg)

+def get_args(callfunc):

+ """ Get the arguments from the given `CallFunc` node.

+ Return a tuple, where the first element is the

+ number of positional arguments and the second element

+ is the keyword arguments in a dict.

+ """

+ positional = 0

+ named = {}

+ for arg in callfunc.args:

+ if isinstance(arg, astroid.Keyword):

+ named[arg.arg] = utils.safe_infer(arg.value)

+ else:

+ positional += 1

+ return positional, named

+def get_access_path(key, parts):

+ """ Given a list of format specifiers, returns

+ the final access path (e.g. a.b.c[0][1]).

+ """

+ path = []

+ for is_attribute, specifier in parts:

+ if is_attribute:

+ path.append(".{}".format(specifier))

+ else:

+ path.append("[{!r}]".format(specifier))

+ return str(key) + "".join(path)

+class StringFormatChecker(BaseChecker):

+ """Checks string formatting operations to ensure that the format string

+ is valid and the arguments match the format string.

+ """

+ __implements__ = (IAstroidChecker,)

+ name = 'string'

+ msgs = MSGS

+ @check_messages(*(MSGS.keys()))

+ def visit_binop(self, node):

+ if node.op != '%':

+ return

+ left = node.left

+ args = node.right

+ if not (isinstance(left, astroid.Const)

+ and isinstance(left.value, basestring)):

+ return

+ format_string = left.value

+ try:

+ required_keys, required_num_args = \

+ utils.parse_format_string(format_string)

+ except utils.UnsupportedFormatCharacter, e:

+ c = format_string[e.index]

+ self.add_message('bad-format-character',

+ node=node, args=(c, ord(c), e.index))

+ return

+ except utils.IncompleteFormatString:

+ self.add_message('truncated-format-string', node=node)

+ return

+ if required_keys and required_num_args:

+ # The format string uses both named and unnamed format

+ # specifiers.

+ self.add_message('mixed-format-string', node=node)

+ elif required_keys:

+ # The format string uses only named format specifiers.

+ # Check that the RHS of the % operator is a mapping object

+ # that contains precisely the set of keys required by the

+ # format string.

+ if isinstance(args, astroid.Dict):

+ keys = set()

+ unknown_keys = False

+ for k, _ in args.items:

+ if isinstance(k, astroid.Const):

+ key = k.value

+ if isinstance(key, basestring):

+ keys.add(key)

+ else:

+ self.add_message('bad-format-string-key',

+ node=node, args=key)

+ else:

+ # One of the keys was something other than a

+ # constant. Since we can't tell what it is,

+ # supress checks for missing keys in the

+ # dictionary.

+ unknown_keys = True

+ if not unknown_keys:

+ for key in required_keys:

+ if key not in keys:

+ self.add_message('missing-format-string-key',

+ node=node, args=key)

+ for key in keys:

+ if key not in required_keys:

+ self.add_message('unused-format-string-key',

+ node=node, args=key)

+ elif isinstance(args, OTHER_NODES + (astroid.Tuple,)):

+ type_name = type(args).__name__

+ self.add_message('format-needs-mapping',

+ node=node, args=type_name)

+ # else:

+ # The RHS of the format specifier is a name or

+ # expression. It may be a mapping object, so

+ # there's nothing we can check.

+ else:

+ # The format string uses only unnamed format specifiers.

+ # Check that the number of arguments passed to the RHS of

+ # the % operator matches the number required by the format

+ # string.

+ if isinstance(args, astroid.Tuple):

+ num_args = len(args.elts)

+ elif isinstance(args, OTHER_NODES + (astroid.Dict, astroid.DictComp)):

+ num_args = 1

+ else:

+ # The RHS of the format specifier is a name or

+ # expression. It could be a tuple of unknown size, so

+ # there's nothing we can check.

+ num_args = None

+ if num_args is not None:

+ if num_args > required_num_args:

+ self.add_message('too-many-format-args', node=node)

+ elif num_args < required_num_args:

+ self.add_message('too-few-format-args', node=node)

+class StringMethodsChecker(BaseChecker):

+ __implements__ = (IAstroidChecker,)

+ name = 'string'

+ msgs = {

+ 'E1310': ("Suspicious argument in %s.%s call",

+ "bad-str-strip-call",

+ "The argument to a str.{l,r,}strip call contains a"

+ " duplicate character, "),

+ }

+ @check_messages(*(MSGS.keys()))

+ def visit_callfunc(self, node):

+ func = utils.safe_infer(node.func)

+ if (isinstance(func, astroid.BoundMethod)

+ and isinstance(func.bound, astroid.Instance)

+ and func.bound.name in ('str', 'unicode', 'bytes')):

+ if func.name in ('strip', 'lstrip', 'rstrip') and node.args:

+ arg = utils.safe_infer(node.args[0])

+ if not isinstance(arg, astroid.Const):

+ return

+ if len(arg.value) != len(set(arg.value)):

+ self.add_message('bad-str-strip-call', node=node,

+ args=(func.bound.name, func.name))

+ elif func.name == 'format':

+ if _PY27 or _PY3K:

+ self._check_new_format(node, func)

+ def _check_new_format(self, node, func):

+ """ Check the new string formatting. """

+ # TODO: skip (for now) format nodes which don't have

+ # an explicit string on the left side of the format operation.

+ # We do this because our inference engine can't properly handle

+ # redefinitions of the original string.

+ # For more details, see issue 287.

+ if not isinstance(node.func.expr, astroid.Const):

+ return

+ try:

+ strnode = func.bound.infer().next()

+ except astroid.InferenceError:

+ return

+ if not isinstance(strnode, astroid.Const):

+ return

+ if node.starargs or node.kwargs:

+ # TODO: Don't complicate the logic, skip these for now.

+ return

+ try:

+ positional, named = get_args(node)

+ except astroid.InferenceError:

+ return

+ try:

+ fields, num_args, manual_pos = parse_format_method_string(strnode.value)

+ except utils.IncompleteFormatString:

+ self.add_message('bad-format-string', node=node)

+ return

+ manual_fields = set(field[0] for field in fields

+ if isinstance(field[0], numbers.Number))

+ named_fields = set(field[0] for field in fields

+ if isinstance(field[0], basestring))

+ if num_args and manual_pos:

+ self.add_message('format-combined-specification',

+ node=node)

+ return

+ check_args = False

+ # Consider "{[0]} {[1]}" as num_args.

+ num_args += sum(1 for field in named_fields

+ if field == '')

+ if named_fields:

+ for field in named_fields:

+ if field not in named and field:

+ self.add_message('missing-format-argument-key',

+ node=node,

+ args=(field, ))

+ for field in named:

+ if field not in named_fields:

+ self.add_message('unused-format-string-argument',

+ node=node,

+ args=(field, ))

+ # num_args can be 0 if manual_pos is not.

+ num_args = num_args or manual_pos

+ if positional or num_args:

+ empty = any(True for field in named_fields

+ if field == '')

+ if named or empty:

+ # Verify the required number of positional arguments

+ # only if the .format got at least one keyword argument.

+ # This means that the format strings accepts both

+ # positional and named fields and we should warn

+ # when one of the them is missing or is extra.

+ check_args = True

+ else:

+ check_args = True

+ if check_args:

+ # num_args can be 0 if manual_pos is not.

+ num_args = num_args or manual_pos

+ if positional > num_args:

+ # We can have two possibilities:

+ # * "{0} {1}".format(a, b)

+ # * "{} {} {}".format(a, b, c, d)

+ # We can check the manual keys for the first one.

+ if len(manual_fields) != positional:

+ self.add_message('too-many-format-args', node=node)

+ elif positional < num_args:

+ self.add_message('too-few-format-args', node=node)

+ self._check_new_format_specifiers(node, fields, named)

+ def _check_new_format_specifiers(self, node, fields, named):

+ """

+ Check attribute and index access in the format

+ string ("{0.a}" and "{0[a]}").

+ """

+ for key, specifiers in fields:

+ # Obtain the argument. If it can't be obtained

+ # or infered, skip this check.

+ if key == '':

+ # {[0]} will have an unnamed argument, defaulting

+ # to 0. It will not be present in `named`, so use the value

+ # 0 for it.

+ key = 0

+ if isinstance(key, numbers.Number):

+ try:

+ argname = utils.get_argument_from_call(node, key)

+ except utils.NoSuchArgumentError:

+ continue

+ else:

+ if key not in named:

+ continue

+ argname = named[key]

+ if argname in (astroid.YES, None):

+ continue

+ try:

+ argument = argname.infer().next()

+ except astroid.InferenceError:

+ continue

+ if not specifiers or argument is astroid.YES:

+ # No need to check this key if it doesn't

+ # use attribute / item access

+ continue

+ if argument.parent and isinstance(argument.parent, astroid.Arguments):

+ # Check to see if our argument is kwarg or vararg,

+ # and skip the check for this argument if so, because when inferring,

+ # astroid will return empty objects (dicts and tuples) and

+ # that can lead to false positives.

+ if argname.name in (argument.parent.kwarg, argument.parent.vararg):

+ continue

+ previous = argument

+ parsed = []

+ for is_attribute, specifier in specifiers:

+ if previous is astroid.YES:

+ break

+ parsed.append((is_attribute, specifier))

+ if is_attribute:

+ try:

+ previous = previous.getattr(specifier)[0]

+ except astroid.NotFoundError:

+ if (hasattr(previous, 'has_dynamic_getattr') and

+ previous.has_dynamic_getattr()):

+ # Don't warn if the object has a custom __getattr__

+ break

+ path = get_access_path(key, parsed)

+ self.add_message('missing-format-attribute',

+ args=(specifier, path),

+ node=node)

+ break

+ else:

+ warn_error = False

+ if hasattr(previous, 'getitem'):

+ try:

+ previous = previous.getitem(specifier)

+ except (IndexError, TypeError):

+ warn_error = True

+ else:

+ try:

+ # Lookup __getitem__ in the current node,

+ # but skip further checks, because we can't

+ # retrieve the looked object

+ previous.getattr('__getitem__')

+ break

+ except astroid.NotFoundError:

+ warn_error = True

+ if warn_error:

+ path = get_access_path(key, parsed)

+ self.add_message('invalid-format-index',

+ args=(specifier, path),

+ node=node)

+ break

+ try:

+ previous = previous.infer().next()

+ except astroid.InferenceError:

+ # can't check further if we can't infer it

+ break

+class StringConstantChecker(BaseTokenChecker):

+ """Check string literals"""

+ __implements__ = (ITokenChecker, IRawChecker)

+ name = 'string_constant'

+ msgs = {

+ 'W1401': ('Anomalous backslash in string: \'%s\'. '

+ 'String constant might be missing an r prefix.',

+ 'anomalous-backslash-in-string',

+ 'Used when a backslash is in a literal string but not as an '

+ 'escape.'),

+ 'W1402': ('Anomalous Unicode escape in byte string: \'%s\'. '

+ 'String constant might be missing an r or u prefix.',

+ 'anomalous-unicode-escape-in-string',

+ 'Used when an escape like \\u is encountered in a byte '

+ 'string where it has no effect.'),

+ }

+ # Characters that have a special meaning after a backslash in either

+ # Unicode or byte strings.

+ ESCAPE_CHARACTERS = 'abfnrtvx\n\r\t\\\'\"01234567'

+ # TODO(mbp): Octal characters are quite an edge case today; people may

+ # prefer a separate warning where they occur. \0 should be allowed.

+ # Characters that have a special meaning after a backslash but only in

+ # Unicode strings.

+ UNICODE_ESCAPE_CHARACTERS = 'uUN'

+ def process_module(self, module):

+ self._unicode_literals = 'unicode_literals' in module.future_imports

+ def process_tokens(self, tokens):

+ for (tok_type, token, (start_row, start_col), _, _) in tokens:

+ if tok_type == tokenize.STRING:

+ # 'token' is the whole un-parsed token; we can look at the start

+ # of it to see whether it's a raw or unicode string etc.

+ self.process_string_token(token, start_row, start_col)

+ def process_string_token(self, token, start_row, start_col):

+ for i, c in enumerate(token):

+ if c in '\'\"':

+ quote_char = c

+ break

+ prefix = token[:i].lower() # markers like u, b, r.

+ after_prefix = token[i:]

+ if after_prefix[:3] == after_prefix[-3:] == 3 * quote_char:

+ string_body = after_prefix[3:-3]

+ else:

+ string_body = after_prefix[1:-1] # Chop off quotes

+ # No special checks on raw strings at the moment.

+ if 'r' not in prefix:

+ self.process_non_raw_string_token(prefix, string_body,

+ start_row, start_col)

+ def process_non_raw_string_token(self, prefix, string_body, start_row,

+ start_col):

+ """check for bad escapes in a non-raw string.

+ prefix: lowercase string of eg 'ur' string prefix markers.

+ string_body: the un-parsed body of the string, not including the quote

+ marks.

+ start_row: integer line number in the source.

+ start_col: integer column number in the source.

+ """

+ # Walk through the string; if we see a backslash then escape the next

+ # character, and skip over it. If we see a non-escaped character,

+ # alert, and continue.

+ #

+ # Accept a backslash when it escapes a backslash, or a quote, or

+ # end-of-line, or one of the letters that introduce a special escape

+ # sequence <http://docs.python.org/reference/lexical_analysis.html>

+ #

+ # TODO(mbp): Maybe give a separate warning about the rarely-used

+ # \a \b \v \f?

+ #

+ # TODO(mbp): We could give the column of the problem character, but

+ # add_message doesn't seem to have a way to pass it through at present.

+ i = 0

+ while True:

+ i = string_body.find('\\', i)

+ if i == -1:

+ break

+ # There must be a next character; having a backslash at the end

+ # of the string would be a SyntaxError.

+ next_char = string_body[i+1]

+ match = string_body[i:i+2]

+ if next_char in self.UNICODE_ESCAPE_CHARACTERS:

+ if 'u' in prefix:

+ pass

+ elif (_PY3K or self._unicode_literals) and 'b' not in prefix:

+ pass # unicode by default

+ else:

+ self.add_message('anomalous-unicode-escape-in-string',

+ line=start_row, args=(match, ))

+ elif next_char not in self.ESCAPE_CHARACTERS:

+ self.add_message('anomalous-backslash-in-string',

+ line=start_row, args=(match, ))

+ # Whether it was a valid escape or not, backslash followed by

+ # another character can always be consumed whole: the second

+ # character can never be the start of a new backslash escape.

+ i += 2

+def register(linter):

+ """required method to auto register this checker """

+ linter.register_checker(StringFormatChecker(linter))

+ linter.register_checker(StringMethodsChecker(linter))

+ linter.register_checker(StringConstantChecker(linter))

« no previous file with comments | « third_party/pylint/checkers/string_format.py ('k') | third_party/pylint/checkers/typecheck.py » ('j') | no next file with comments »