third_party/pylint/checkers/strings.py - Issue 741503002: pylint: upgrade to 1.3.1

Side by Side Diff: third_party/pylint/checkers/strings.py

Issue 741503002: pylint: upgrade to 1.3.1 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools/

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright (c) 2009-2010 Arista Networks, Inc. - James Lingard

	2 # Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE).

	3 # Copyright 2012 Google Inc.

	4 #

	5 # http://www.logilab.fr/ -- mailto:contact@logilab.fr

	6 # This program is free software; you can redistribute it and/or modify it under

	7 # the terms of the GNU General Public License as published by the Free Software

	8 # Foundation; either version 2 of the License, or (at your option) any later

	9 # version.

	10 #

	11 # This program is distributed in the hope that it will be useful, but WITHOUT

	12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

	13 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details

	14 #

	15 # You should have received a copy of the GNU General Public License along with

	16 # this program; if not, write to the Free Software Foundation, Inc.,

	17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

	18 """Checker for string formatting operations.

	19 """

	20

	21 import sys

	22 import tokenize

	23 import string

	24 try:

	25 import numbers

	26 except ImportError:

	27 numbers = None

	28

	29 import astroid

	30

	31 from pylint.interfaces import ITokenChecker, IAstroidChecker, IRawChecker

	32 from pylint.checkers import BaseChecker, BaseTokenChecker

	33 from pylint.checkers import utils

	34 from pylint.checkers.utils import check_messages

	35

	36 _PY3K = sys.version_info[:2] >= (3, 0)

	37 _PY27 = sys.version_info[:2] == (2, 7)

	38

	39 MSGS = {

	40 'E1300': ("Unsupported format character %r (%#02x) at index %d",

	41 "bad-format-character",

	42 "Used when a unsupported format character is used in a format\

	43 string."),

	44 'E1301': ("Format string ends in middle of conversion specifier",

	45 "truncated-format-string",

	46 "Used when a format string terminates before the end of a \

	47 conversion specifier."),

	48 'E1302': ("Mixing named and unnamed conversion specifiers in format string",

	49 "mixed-format-string",

	50 "Used when a format string contains both named (e.g. '%(foo)d') \

	51 and unnamed (e.g. '%d') conversion specifiers. This is also \

	52 used when a named conversion specifier contains * for the \

	53 minimum field width and/or precision."),

	54 'E1303': ("Expected mapping for format string, not %s",

	55 "format-needs-mapping",

	56 "Used when a format string that uses named conversion specifiers \

	57 is used with an argument that is not a mapping."),

	58 'W1300': ("Format string dictionary key should be a string, not %s",

	59 "bad-format-string-key",

	60 "Used when a format string that uses named conversion specifiers \

	61 is used with a dictionary whose keys are not all strings."),

	62 'W1301': ("Unused key %r in format string dictionary",

	63 "unused-format-string-key",

	64 "Used when a format string that uses named conversion specifiers \

	65 is used with a dictionary that conWtains keys not required by the \

	66 format string."),

	67 'E1304': ("Missing key %r in format string dictionary",

	68 "missing-format-string-key",

	69 "Used when a format string that uses named conversion specifiers \

	70 is used with a dictionary that doesn't contain all the keys \

	71 required by the format string."),

	72 'E1305': ("Too many arguments for format string",

	73 "too-many-format-args",

	74 "Used when a format string that uses unnamed conversion \

	75 specifiers is given too many arguments."),

	76 'E1306': ("Not enough arguments for format string",

	77 "too-few-format-args",

	78 "Used when a format string that uses unnamed conversion \

	79 specifiers is given too few arguments"),

	80

	81 'W1302': ("Invalid format string",

	82 "bad-format-string",

	83 "Used when a PEP 3101 format string is invalid.",

	84 {'minversion': (2, 7)}),

	85 'W1303': ("Missing keyword argument %r for format string",

	86 "missing-format-argument-key",

	87 "Used when a PEP 3101 format string that uses named fields "

	88 "doesn't receive one or more required keywords.",

	89 {'minversion': (2, 7)}),

	90 'W1304': ("Unused format argument %r",

	91 "unused-format-string-argument",

	92 "Used when a PEP 3101 format string that uses named "

	93 "fields is used with an argument that "

	94 "is not required by the format string.",

	95 {'minversion': (2, 7)}),

	96 'W1305': ("Format string contains both automatic field numbering "

	97 "and manual field specification",

	98 "format-combined-specification",

	99 "Usen when a PEP 3101 format string contains both automatic "

	100 "field numbering (e.g. '{}') and manual field "

	101 "specification (e.g. '{0}').",

	102 {'minversion': (2, 7)}),

	103 'W1306': ("Missing format attribute %r in format specifier %r",

	104 "missing-format-attribute",

	105 "Used when a PEP 3101 format string uses an "

	106 "attribute specifier ({0.length}), but the argument "

	107 "passed for formatting doesn't have that attribute.",

	108 {'minversion': (2, 7)}),

	109 'W1307': ("Using invalid lookup key %r in format specifier %r",

	110 "invalid-format-index",

	111 "Used when a PEP 3101 format string uses a lookup specifier "

	112 "({a[1]}), but the argument passed for formatting "

	113 "doesn't contain or doesn't have that key as an attribute.",

	114 {'minversion': (2, 7)})

	115 }

	116

	117 OTHER_NODES = (astroid.Const, astroid.List, astroid.Backquote,

	118 astroid.Lambda, astroid.Function,

	119 astroid.ListComp, astroid.SetComp, astroid.GenExpr)

	120

	121 if _PY3K:

	122 import _string

	123

	124 def split_format_field_names(format_string):

	125 return _string.formatter_field_name_split(format_string)

	126 else:

	127 def _field_iterator_convertor(iterator):

	128 for is_attr, key in iterator:

	129 if isinstance(key, numbers.Number):

	130 yield is_attr, int(key)

	131 else:

	132 yield is_attr, key

	133

	134 def split_format_field_names(format_string):

	135 keyname, fielditerator = format_string._formatter_field_name_split()

	136 # it will return longs, instead of ints, which will complicate

	137 # the output

	138 return keyname, _field_iterator_convertor(fielditerator)

	139

	140

	141 def collect_string_fields(format_string):

	142 """ Given a format string, return an iterator

	143 of all the valid format fields. It handles nested fields

	144 as well.

	145 """

	146

	147 formatter = string.Formatter()

	148 parseiterator = formatter.parse(format_string)

	149 try:

	150 for result in parseiterator:

	151 if all(item is None for item in result[1:]):

	152 # not a replacement format

	153 continue

	154 name = result[1]

	155 nested = result[2]

	156 yield name

	157 if nested:

	158 for field in collect_string_fields(nested):

	159 yield field

	160 except ValueError:

	161 # probably the format string is invalid

	162 # should we check the argument of the ValueError?

	163 raise utils.IncompleteFormatString(format_string)

	164

	165 def parse_format_method_string(format_string):

	166 """

	167 Parses a PEP 3101 format string, returning a tuple of

	168 (keys, num_args, manual_pos_arg),

	169 where keys is the set of mapping keys in the format string, num_args

	170 is the number of arguments required by the format string and

	171 manual_pos_arg is the number of arguments passed with the position.

	172 """

	173 keys = []

	174 num_args = 0

	175 manual_pos_arg = set()

	176 for name in collect_string_fields(format_string):

	177 if name and str(name).isdigit():

	178 manual_pos_arg.add(str(name))

	179 elif name:

	180 keyname, fielditerator = split_format_field_names(name)

	181 if isinstance(keyname, numbers.Number):

	182 # In Python 2 it will return long which will lead

	183 # to different output between 2 and 3

	184 keyname = int(keyname)

	185 keys.append((keyname, list(fielditerator)))

	186 else:

	187 num_args += 1

	188 return keys, num_args, len(manual_pos_arg)

	189

	190 def get_args(callfunc):

	191 """ Get the arguments from the given `CallFunc` node.

	192 Return a tuple, where the first element is the

	193 number of positional arguments and the second element

	194 is the keyword arguments in a dict.

	195 """

	196 positional = 0

	197 named = {}

	198

	199 for arg in callfunc.args:

	200 if isinstance(arg, astroid.Keyword):

	201 named[arg.arg] = utils.safe_infer(arg.value)

	202 else:

	203 positional += 1

	204 return positional, named

	205

	206 def get_access_path(key, parts):

	207 """ Given a list of format specifiers, returns

	208 the final access path (e.g. a.b.c[0][1]).

	209 """

	210 path = []

	211 for is_attribute, specifier in parts:

	212 if is_attribute:

	213 path.append(".{}".format(specifier))

	214 else:

	215 path.append("[{!r}]".format(specifier))

	216 return str(key) + "".join(path)

	217

	218

	219 class StringFormatChecker(BaseChecker):

	220 """Checks string formatting operations to ensure that the format string

	221 is valid and the arguments match the format string.

	222 """

	223

	224 __implements__ = (IAstroidChecker,)

	225 name = 'string'

	226 msgs = MSGS

	227

	228 @check_messages(*(MSGS.keys()))

	229 def visit_binop(self, node):

	230 if node.op != '%':

	231 return

	232 left = node.left

	233 args = node.right

	234

	235 if not (isinstance(left, astroid.Const)

	236 and isinstance(left.value, basestring)):

	237 return

	238 format_string = left.value

	239 try:

	240 required_keys, required_num_args = \

	241 utils.parse_format_string(format_string)

	242 except utils.UnsupportedFormatCharacter, e:

	243 c = format_string[e.index]

	244 self.add_message('bad-format-character',

	245 node=node, args=(c, ord(c), e.index))

	246 return

	247 except utils.IncompleteFormatString:

	248 self.add_message('truncated-format-string', node=node)

	249 return

	250 if required_keys and required_num_args:

	251 # The format string uses both named and unnamed format

	252 # specifiers.

	253 self.add_message('mixed-format-string', node=node)

	254 elif required_keys:

	255 # The format string uses only named format specifiers.

	256 # Check that the RHS of the % operator is a mapping object

	257 # that contains precisely the set of keys required by the

	258 # format string.

	259 if isinstance(args, astroid.Dict):

	260 keys = set()

	261 unknown_keys = False

	262 for k, _ in args.items:

	263 if isinstance(k, astroid.Const):

	264 key = k.value

	265 if isinstance(key, basestring):

	266 keys.add(key)

	267 else:

	268 self.add_message('bad-format-string-key',

	269 node=node, args=key)

	270 else:

	271 # One of the keys was something other than a

	272 # constant. Since we can't tell what it is,

	273 # supress checks for missing keys in the

	274 # dictionary.

	275 unknown_keys = True

	276 if not unknown_keys:

	277 for key in required_keys:

	278 if key not in keys:

	279 self.add_message('missing-format-string-key',

	280 node=node, args=key)

	281 for key in keys:

	282 if key not in required_keys:

	283 self.add_message('unused-format-string-key',

	284 node=node, args=key)

	285 elif isinstance(args, OTHER_NODES + (astroid.Tuple,)):

	286 type_name = type(args).__name__

	287 self.add_message('format-needs-mapping',

	288 node=node, args=type_name)

	289 # else:

	290 # The RHS of the format specifier is a name or

	291 # expression. It may be a mapping object, so

	292 # there's nothing we can check.

	293 else:

	294 # The format string uses only unnamed format specifiers.

	295 # Check that the number of arguments passed to the RHS of

	296 # the % operator matches the number required by the format

	297 # string.

	298 if isinstance(args, astroid.Tuple):

	299 num_args = len(args.elts)

	300 elif isinstance(args, OTHER_NODES + (astroid.Dict, astroid.DictComp) ):

	301 num_args = 1

	302 else:

	303 # The RHS of the format specifier is a name or

	304 # expression. It could be a tuple of unknown size, so

	305 # there's nothing we can check.

	306 num_args = None

	307 if num_args is not None:

	308 if num_args > required_num_args:

	309 self.add_message('too-many-format-args', node=node)

	310 elif num_args < required_num_args:

	311 self.add_message('too-few-format-args', node=node)

	312

	313

	314 class StringMethodsChecker(BaseChecker):

	315 __implements__ = (IAstroidChecker,)

	316 name = 'string'

	317 msgs = {

	318 'E1310': ("Suspicious argument in %s.%s call",

	319 "bad-str-strip-call",

	320 "The argument to a str.{l,r,}strip call contains a"

	321 " duplicate character, "),

	322 }

	323

	324 @check_messages(*(MSGS.keys()))

	325 def visit_callfunc(self, node):

	326 func = utils.safe_infer(node.func)

	327 if (isinstance(func, astroid.BoundMethod)

	328 and isinstance(func.bound, astroid.Instance)

	329 and func.bound.name in ('str', 'unicode', 'bytes')):

	330 if func.name in ('strip', 'lstrip', 'rstrip') and node.args:

	331 arg = utils.safe_infer(node.args[0])

	332 if not isinstance(arg, astroid.Const):

	333 return

	334 if len(arg.value) != len(set(arg.value)):

	335 self.add_message('bad-str-strip-call', node=node,

	336 args=(func.bound.name, func.name))

	337 elif func.name == 'format':

	338 if _PY27 or _PY3K:

	339 self._check_new_format(node, func)

	340

	341 def _check_new_format(self, node, func):

	342 """ Check the new string formatting. """

	343 # TODO: skip (for now) format nodes which don't have

	344 # an explicit string on the left side of the format operation.

	345 # We do this because our inference engine can't properly handle

	346 # redefinitions of the original string.

	347 # For more details, see issue 287.

	348 if not isinstance(node.func.expr, astroid.Const):

	349 return

	350 try:

	351 strnode = func.bound.infer().next()

	352 except astroid.InferenceError:

	353 return

	354 if not isinstance(strnode, astroid.Const):

	355 return

	356 if node.starargs or node.kwargs:

	357 # TODO: Don't complicate the logic, skip these for now.

	358 return

	359 try:

	360 positional, named = get_args(node)

	361 except astroid.InferenceError:

	362 return

	363 try:

	364 fields, num_args, manual_pos = parse_format_method_string(strnode.va lue)

	365 except utils.IncompleteFormatString:

	366 self.add_message('bad-format-string', node=node)

	367 return

	368

	369 manual_fields = set(field[0] for field in fields

	370 if isinstance(field[0], numbers.Number))

	371 named_fields = set(field[0] for field in fields

	372 if isinstance(field[0], basestring))

	373 if num_args and manual_pos:

	374 self.add_message('format-combined-specification',

	375 node=node)

	376 return

	377

	378 check_args = False

	379 # Consider "{[0]} {[1]}" as num_args.

	380 num_args += sum(1 for field in named_fields

	381 if field == '')

	382 if named_fields:

	383 for field in named_fields:

	384 if field not in named and field:

	385 self.add_message('missing-format-argument-key',

	386 node=node,

	387 args=(field, ))

	388 for field in named:

	389 if field not in named_fields:

	390 self.add_message('unused-format-string-argument',

	391 node=node,

	392 args=(field, ))

	393 # num_args can be 0 if manual_pos is not.

	394 num_args = num_args or manual_pos

	395 if positional or num_args:

	396 empty = any(True for field in named_fields

	397 if field == '')

	398 if named or empty:

	399 # Verify the required number of positional arguments

	400 # only if the .format got at least one keyword argument.

	401 # This means that the format strings accepts both

	402 # positional and named fields and we should warn

	403 # when one of the them is missing or is extra.

	404 check_args = True

	405 else:

	406 check_args = True

	407 if check_args:

	408 # num_args can be 0 if manual_pos is not.

	409 num_args = num_args or manual_pos

	410 if positional > num_args:

	411 # We can have two possibilities:

	412 # * "{0} {1}".format(a, b)

	413 # * "{} {} {}".format(a, b, c, d)

	414 # We can check the manual keys for the first one.

	415 if len(manual_fields) != positional:

	416 self.add_message('too-many-format-args', node=node)

	417 elif positional < num_args:

	418 self.add_message('too-few-format-args', node=node)

	419

	420 self._check_new_format_specifiers(node, fields, named)

	421

	422 def _check_new_format_specifiers(self, node, fields, named):

	423 """

	424 Check attribute and index access in the format

	425 string ("{0.a}" and "{0[a]}").

	426 """

	427 for key, specifiers in fields:

	428 # Obtain the argument. If it can't be obtained

	429 # or infered, skip this check.

	430 if key == '':

	431 # {[0]} will have an unnamed argument, defaulting

	432 # to 0. It will not be present in `named`, so use the value

	433 # 0 for it.

	434 key = 0

	435 if isinstance(key, numbers.Number):

	436 try:

	437 argname = utils.get_argument_from_call(node, key)

	438 except utils.NoSuchArgumentError:

	439 continue

	440 else:

	441 if key not in named:

	442 continue

	443 argname = named[key]

	444 if argname in (astroid.YES, None):

	445 continue

	446 try:

	447 argument = argname.infer().next()

	448 except astroid.InferenceError:

	449 continue

	450 if not specifiers or argument is astroid.YES:

	451 # No need to check this key if it doesn't

	452 # use attribute / item access

	453 continue

	454 if argument.parent and isinstance(argument.parent, astroid.Arguments ):

	455 # Check to see if our argument is kwarg or vararg,

	456 # and skip the check for this argument if so, because when infer ring,

	457 # astroid will return empty objects (dicts and tuples) and

	458 # that can lead to false positives.

	459 if argname.name in (argument.parent.kwarg, argument.parent.varar g):

	460 continue

	461 previous = argument

	462 parsed = []

	463 for is_attribute, specifier in specifiers:

	464 if previous is astroid.YES:

	465 break

	466 parsed.append((is_attribute, specifier))

	467 if is_attribute:

	468 try:

	469 previous = previous.getattr(specifier)[0]

	470 except astroid.NotFoundError:

	471 if (hasattr(previous, 'has_dynamic_getattr') and

	472 previous.has_dynamic_getattr()):

	473 # Don't warn if the object has a custom __getattr__

	474 break

	475 path = get_access_path(key, parsed)

	476 self.add_message('missing-format-attribute',

	477 args=(specifier, path),

	478 node=node)

	479 break

	480 else:

	481 warn_error = False

	482 if hasattr(previous, 'getitem'):

	483 try:

	484 previous = previous.getitem(specifier)

	485 except (IndexError, TypeError):

	486 warn_error = True

	487 else:

	488 try:

	489 # Lookup __getitem__ in the current node,

	490 # but skip further checks, because we can't

	491 # retrieve the looked object

	492 previous.getattr('__getitem__')

	493 break

	494 except astroid.NotFoundError:

	495 warn_error = True

	496 if warn_error:

	497 path = get_access_path(key, parsed)

	498 self.add_message('invalid-format-index',

	499 args=(specifier, path),

	500 node=node)

	501 break

	502

	503 try:

	504 previous = previous.infer().next()

	505 except astroid.InferenceError:

	506 # can't check further if we can't infer it

	507 break

	508

	509

	510

	511 class StringConstantChecker(BaseTokenChecker):

	512 """Check string literals"""

	513 __implements__ = (ITokenChecker, IRawChecker)

	514 name = 'string_constant'

	515 msgs = {

	516 'W1401': ('Anomalous backslash in string: \'%s\'. '

	517 'String constant might be missing an r prefix.',

	518 'anomalous-backslash-in-string',

	519 'Used when a backslash is in a literal string but not as an '

	520 'escape.'),

	521 'W1402': ('Anomalous Unicode escape in byte string: \'%s\'. '

	522 'String constant might be missing an r or u prefix.',

	523 'anomalous-unicode-escape-in-string',

	524 'Used when an escape like \\u is encountered in a byte '

	525 'string where it has no effect.'),

	526 }

	527

	528 # Characters that have a special meaning after a backslash in either

	529 # Unicode or byte strings.

	530 ESCAPE_CHARACTERS = 'abfnrtvx\n\r\t\\\'\"01234567'

	531

	532 # TODO(mbp): Octal characters are quite an edge case today; people may

	533 # prefer a separate warning where they occur. \0 should be allowed.

	534

	535 # Characters that have a special meaning after a backslash but only in

	536 # Unicode strings.

	537 UNICODE_ESCAPE_CHARACTERS = 'uUN'

	538

	539 def process_module(self, module):

	540 self._unicode_literals = 'unicode_literals' in module.future_imports

	541

	542 def process_tokens(self, tokens):

	543 for (tok_type, token, (start_row, start_col), _, _) in tokens:

	544 if tok_type == tokenize.STRING:

	545 # 'token' is the whole un-parsed token; we can look at the start

	546 # of it to see whether it's a raw or unicode string etc.

	547 self.process_string_token(token, start_row, start_col)

	548

	549 def process_string_token(self, token, start_row, start_col):

	550 for i, c in enumerate(token):

	551 if c in '\'\"':

	552 quote_char = c

	553 break

	554 prefix = token[:i].lower() # markers like u, b, r.

	555 after_prefix = token[i:]

	556 if after_prefix[:3] == after_prefix[-3:] == 3 * quote_char:

	557 string_body = after_prefix[3:-3]

	558 else:

	559 string_body = after_prefix[1:-1] # Chop off quotes

	560 # No special checks on raw strings at the moment.

	561 if 'r' not in prefix:

	562 self.process_non_raw_string_token(prefix, string_body,

	563 start_row, start_col)

	564

	565 def process_non_raw_string_token(self, prefix, string_body, start_row,

	566 start_col):

	567 """check for bad escapes in a non-raw string.

	568

	569 prefix: lowercase string of eg 'ur' string prefix markers.

	570 string_body: the un-parsed body of the string, not including the quote

	571 marks.

	572 start_row: integer line number in the source.

	573 start_col: integer column number in the source.

	574 """

	575 # Walk through the string; if we see a backslash then escape the next

	576 # character, and skip over it. If we see a non-escaped character,

	577 # alert, and continue.

	578 #

	579 # Accept a backslash when it escapes a backslash, or a quote, or

	580 # end-of-line, or one of the letters that introduce a special escape

	581 # sequence <http://docs.python.org/reference/lexical_analysis.html>

	582 #

	583 # TODO(mbp): Maybe give a separate warning about the rarely-used

	584 # \a \b \v \f?

	585 #

	586 # TODO(mbp): We could give the column of the problem character, but

	587 # add_message doesn't seem to have a way to pass it through at present.

	588 i = 0

	589 while True:

	590 i = string_body.find('\\', i)

	591 if i == -1:

	592 break

	593 # There must be a next character; having a backslash at the end

	594 # of the string would be a SyntaxError.

	595 next_char = string_body[i+1]

	596 match = string_body[i:i+2]

	597 if next_char in self.UNICODE_ESCAPE_CHARACTERS:

	598 if 'u' in prefix:

	599 pass

	600 elif (_PY3K or self._unicode_literals) and 'b' not in prefix:

	601 pass # unicode by default

	602 else:

	603 self.add_message('anomalous-unicode-escape-in-string',

	604 line=start_row, args=(match, ))

	605 elif next_char not in self.ESCAPE_CHARACTERS:

	606 self.add_message('anomalous-backslash-in-string',

	607 line=start_row, args=(match, ))

	608 # Whether it was a valid escape or not, backslash followed by

	609 # another character can always be consumed whole: the second

	610 # character can never be the start of a new backslash escape.

	611 i += 2

	612

	613

	614

	615 def register(linter):

	616 """required method to auto register this checker """

	617 linter.register_checker(StringFormatChecker(linter))

	618 linter.register_checker(StringMethodsChecker(linter))

	619 linter.register_checker(StringConstantChecker(linter))

OLD	NEW

« pylintrc ('K') | « third_party/pylint/checkers/string_format.py ('k') | third_party/pylint/checkers/typecheck.py » ('j') | no next file with comments »