Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: third_party/pylint/checkers/strings.py

Issue 739393004: Revert "Revert "pylint: upgrade to 1.3.1"" (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools/
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/pylint/checkers/string_format.py ('k') | third_party/pylint/checkers/typecheck.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright (c) 2009-2010 Arista Networks, Inc. - James Lingard
2 # Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE).
3 # Copyright 2012 Google Inc.
4 #
5 # http://www.logilab.fr/ -- mailto:contact@logilab.fr
6 # This program is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU General Public License as published by the Free Software
8 # Foundation; either version 2 of the License, or (at your option) any later
9 # version.
10 #
11 # This program is distributed in the hope that it will be useful, but WITHOUT
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
14 #
15 # You should have received a copy of the GNU General Public License along with
16 # this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 """Checker for string formatting operations.
19 """
20
21 import sys
22 import tokenize
23 import string
24 try:
25 import numbers
26 except ImportError:
27 numbers = None
28
29 import astroid
30
31 from pylint.interfaces import ITokenChecker, IAstroidChecker, IRawChecker
32 from pylint.checkers import BaseChecker, BaseTokenChecker
33 from pylint.checkers import utils
34 from pylint.checkers.utils import check_messages
35
36 _PY3K = sys.version_info[:2] >= (3, 0)
37 _PY27 = sys.version_info[:2] == (2, 7)
38
39 MSGS = {
40 'E1300': ("Unsupported format character %r (%#02x) at index %d",
41 "bad-format-character",
42 "Used when a unsupported format character is used in a format\
43 string."),
44 'E1301': ("Format string ends in middle of conversion specifier",
45 "truncated-format-string",
46 "Used when a format string terminates before the end of a \
47 conversion specifier."),
48 'E1302': ("Mixing named and unnamed conversion specifiers in format string",
49 "mixed-format-string",
50 "Used when a format string contains both named (e.g. '%(foo)d') \
51 and unnamed (e.g. '%d') conversion specifiers. This is also \
52 used when a named conversion specifier contains * for the \
53 minimum field width and/or precision."),
54 'E1303': ("Expected mapping for format string, not %s",
55 "format-needs-mapping",
56 "Used when a format string that uses named conversion specifiers \
57 is used with an argument that is not a mapping."),
58 'W1300': ("Format string dictionary key should be a string, not %s",
59 "bad-format-string-key",
60 "Used when a format string that uses named conversion specifiers \
61 is used with a dictionary whose keys are not all strings."),
62 'W1301': ("Unused key %r in format string dictionary",
63 "unused-format-string-key",
64 "Used when a format string that uses named conversion specifiers \
65 is used with a dictionary that conWtains keys not required by the \
66 format string."),
67 'E1304': ("Missing key %r in format string dictionary",
68 "missing-format-string-key",
69 "Used when a format string that uses named conversion specifiers \
70 is used with a dictionary that doesn't contain all the keys \
71 required by the format string."),
72 'E1305': ("Too many arguments for format string",
73 "too-many-format-args",
74 "Used when a format string that uses unnamed conversion \
75 specifiers is given too many arguments."),
76 'E1306': ("Not enough arguments for format string",
77 "too-few-format-args",
78 "Used when a format string that uses unnamed conversion \
79 specifiers is given too few arguments"),
80
81 'W1302': ("Invalid format string",
82 "bad-format-string",
83 "Used when a PEP 3101 format string is invalid.",
84 {'minversion': (2, 7)}),
85 'W1303': ("Missing keyword argument %r for format string",
86 "missing-format-argument-key",
87 "Used when a PEP 3101 format string that uses named fields "
88 "doesn't receive one or more required keywords.",
89 {'minversion': (2, 7)}),
90 'W1304': ("Unused format argument %r",
91 "unused-format-string-argument",
92 "Used when a PEP 3101 format string that uses named "
93 "fields is used with an argument that "
94 "is not required by the format string.",
95 {'minversion': (2, 7)}),
96 'W1305': ("Format string contains both automatic field numbering "
97 "and manual field specification",
98 "format-combined-specification",
99 "Usen when a PEP 3101 format string contains both automatic "
100 "field numbering (e.g. '{}') and manual field "
101 "specification (e.g. '{0}').",
102 {'minversion': (2, 7)}),
103 'W1306': ("Missing format attribute %r in format specifier %r",
104 "missing-format-attribute",
105 "Used when a PEP 3101 format string uses an "
106 "attribute specifier ({0.length}), but the argument "
107 "passed for formatting doesn't have that attribute.",
108 {'minversion': (2, 7)}),
109 'W1307': ("Using invalid lookup key %r in format specifier %r",
110 "invalid-format-index",
111 "Used when a PEP 3101 format string uses a lookup specifier "
112 "({a[1]}), but the argument passed for formatting "
113 "doesn't contain or doesn't have that key as an attribute.",
114 {'minversion': (2, 7)})
115 }
116
117 OTHER_NODES = (astroid.Const, astroid.List, astroid.Backquote,
118 astroid.Lambda, astroid.Function,
119 astroid.ListComp, astroid.SetComp, astroid.GenExpr)
120
121 if _PY3K:
122 import _string
123
124 def split_format_field_names(format_string):
125 return _string.formatter_field_name_split(format_string)
126 else:
127 def _field_iterator_convertor(iterator):
128 for is_attr, key in iterator:
129 if isinstance(key, numbers.Number):
130 yield is_attr, int(key)
131 else:
132 yield is_attr, key
133
134 def split_format_field_names(format_string):
135 keyname, fielditerator = format_string._formatter_field_name_split()
136 # it will return longs, instead of ints, which will complicate
137 # the output
138 return keyname, _field_iterator_convertor(fielditerator)
139
140
141 def collect_string_fields(format_string):
142 """ Given a format string, return an iterator
143 of all the valid format fields. It handles nested fields
144 as well.
145 """
146
147 formatter = string.Formatter()
148 parseiterator = formatter.parse(format_string)
149 try:
150 for result in parseiterator:
151 if all(item is None for item in result[1:]):
152 # not a replacement format
153 continue
154 name = result[1]
155 nested = result[2]
156 yield name
157 if nested:
158 for field in collect_string_fields(nested):
159 yield field
160 except ValueError:
161 # probably the format string is invalid
162 # should we check the argument of the ValueError?
163 raise utils.IncompleteFormatString(format_string)
164
165 def parse_format_method_string(format_string):
166 """
167 Parses a PEP 3101 format string, returning a tuple of
168 (keys, num_args, manual_pos_arg),
169 where keys is the set of mapping keys in the format string, num_args
170 is the number of arguments required by the format string and
171 manual_pos_arg is the number of arguments passed with the position.
172 """
173 keys = []
174 num_args = 0
175 manual_pos_arg = set()
176 for name in collect_string_fields(format_string):
177 if name and str(name).isdigit():
178 manual_pos_arg.add(str(name))
179 elif name:
180 keyname, fielditerator = split_format_field_names(name)
181 if isinstance(keyname, numbers.Number):
182 # In Python 2 it will return long which will lead
183 # to different output between 2 and 3
184 keyname = int(keyname)
185 keys.append((keyname, list(fielditerator)))
186 else:
187 num_args += 1
188 return keys, num_args, len(manual_pos_arg)
189
190 def get_args(callfunc):
191 """ Get the arguments from the given `CallFunc` node.
192 Return a tuple, where the first element is the
193 number of positional arguments and the second element
194 is the keyword arguments in a dict.
195 """
196 positional = 0
197 named = {}
198
199 for arg in callfunc.args:
200 if isinstance(arg, astroid.Keyword):
201 named[arg.arg] = utils.safe_infer(arg.value)
202 else:
203 positional += 1
204 return positional, named
205
206 def get_access_path(key, parts):
207 """ Given a list of format specifiers, returns
208 the final access path (e.g. a.b.c[0][1]).
209 """
210 path = []
211 for is_attribute, specifier in parts:
212 if is_attribute:
213 path.append(".{}".format(specifier))
214 else:
215 path.append("[{!r}]".format(specifier))
216 return str(key) + "".join(path)
217
218
219 class StringFormatChecker(BaseChecker):
220 """Checks string formatting operations to ensure that the format string
221 is valid and the arguments match the format string.
222 """
223
224 __implements__ = (IAstroidChecker,)
225 name = 'string'
226 msgs = MSGS
227
228 @check_messages(*(MSGS.keys()))
229 def visit_binop(self, node):
230 if node.op != '%':
231 return
232 left = node.left
233 args = node.right
234
235 if not (isinstance(left, astroid.Const)
236 and isinstance(left.value, basestring)):
237 return
238 format_string = left.value
239 try:
240 required_keys, required_num_args = \
241 utils.parse_format_string(format_string)
242 except utils.UnsupportedFormatCharacter, e:
243 c = format_string[e.index]
244 self.add_message('bad-format-character',
245 node=node, args=(c, ord(c), e.index))
246 return
247 except utils.IncompleteFormatString:
248 self.add_message('truncated-format-string', node=node)
249 return
250 if required_keys and required_num_args:
251 # The format string uses both named and unnamed format
252 # specifiers.
253 self.add_message('mixed-format-string', node=node)
254 elif required_keys:
255 # The format string uses only named format specifiers.
256 # Check that the RHS of the % operator is a mapping object
257 # that contains precisely the set of keys required by the
258 # format string.
259 if isinstance(args, astroid.Dict):
260 keys = set()
261 unknown_keys = False
262 for k, _ in args.items:
263 if isinstance(k, astroid.Const):
264 key = k.value
265 if isinstance(key, basestring):
266 keys.add(key)
267 else:
268 self.add_message('bad-format-string-key',
269 node=node, args=key)
270 else:
271 # One of the keys was something other than a
272 # constant. Since we can't tell what it is,
273 # supress checks for missing keys in the
274 # dictionary.
275 unknown_keys = True
276 if not unknown_keys:
277 for key in required_keys:
278 if key not in keys:
279 self.add_message('missing-format-string-key',
280 node=node, args=key)
281 for key in keys:
282 if key not in required_keys:
283 self.add_message('unused-format-string-key',
284 node=node, args=key)
285 elif isinstance(args, OTHER_NODES + (astroid.Tuple,)):
286 type_name = type(args).__name__
287 self.add_message('format-needs-mapping',
288 node=node, args=type_name)
289 # else:
290 # The RHS of the format specifier is a name or
291 # expression. It may be a mapping object, so
292 # there's nothing we can check.
293 else:
294 # The format string uses only unnamed format specifiers.
295 # Check that the number of arguments passed to the RHS of
296 # the % operator matches the number required by the format
297 # string.
298 if isinstance(args, astroid.Tuple):
299 num_args = len(args.elts)
300 elif isinstance(args, OTHER_NODES + (astroid.Dict, astroid.DictComp) ):
301 num_args = 1
302 else:
303 # The RHS of the format specifier is a name or
304 # expression. It could be a tuple of unknown size, so
305 # there's nothing we can check.
306 num_args = None
307 if num_args is not None:
308 if num_args > required_num_args:
309 self.add_message('too-many-format-args', node=node)
310 elif num_args < required_num_args:
311 self.add_message('too-few-format-args', node=node)
312
313
314 class StringMethodsChecker(BaseChecker):
315 __implements__ = (IAstroidChecker,)
316 name = 'string'
317 msgs = {
318 'E1310': ("Suspicious argument in %s.%s call",
319 "bad-str-strip-call",
320 "The argument to a str.{l,r,}strip call contains a"
321 " duplicate character, "),
322 }
323
324 @check_messages(*(MSGS.keys()))
325 def visit_callfunc(self, node):
326 func = utils.safe_infer(node.func)
327 if (isinstance(func, astroid.BoundMethod)
328 and isinstance(func.bound, astroid.Instance)
329 and func.bound.name in ('str', 'unicode', 'bytes')):
330 if func.name in ('strip', 'lstrip', 'rstrip') and node.args:
331 arg = utils.safe_infer(node.args[0])
332 if not isinstance(arg, astroid.Const):
333 return
334 if len(arg.value) != len(set(arg.value)):
335 self.add_message('bad-str-strip-call', node=node,
336 args=(func.bound.name, func.name))
337 elif func.name == 'format':
338 if _PY27 or _PY3K:
339 self._check_new_format(node, func)
340
341 def _check_new_format(self, node, func):
342 """ Check the new string formatting. """
343 # TODO: skip (for now) format nodes which don't have
344 # an explicit string on the left side of the format operation.
345 # We do this because our inference engine can't properly handle
346 # redefinitions of the original string.
347 # For more details, see issue 287.
348 if not isinstance(node.func.expr, astroid.Const):
349 return
350 try:
351 strnode = func.bound.infer().next()
352 except astroid.InferenceError:
353 return
354 if not isinstance(strnode, astroid.Const):
355 return
356 if node.starargs or node.kwargs:
357 # TODO: Don't complicate the logic, skip these for now.
358 return
359 try:
360 positional, named = get_args(node)
361 except astroid.InferenceError:
362 return
363 try:
364 fields, num_args, manual_pos = parse_format_method_string(strnode.va lue)
365 except utils.IncompleteFormatString:
366 self.add_message('bad-format-string', node=node)
367 return
368
369 manual_fields = set(field[0] for field in fields
370 if isinstance(field[0], numbers.Number))
371 named_fields = set(field[0] for field in fields
372 if isinstance(field[0], basestring))
373 if num_args and manual_pos:
374 self.add_message('format-combined-specification',
375 node=node)
376 return
377
378 check_args = False
379 # Consider "{[0]} {[1]}" as num_args.
380 num_args += sum(1 for field in named_fields
381 if field == '')
382 if named_fields:
383 for field in named_fields:
384 if field not in named and field:
385 self.add_message('missing-format-argument-key',
386 node=node,
387 args=(field, ))
388 for field in named:
389 if field not in named_fields:
390 self.add_message('unused-format-string-argument',
391 node=node,
392 args=(field, ))
393 # num_args can be 0 if manual_pos is not.
394 num_args = num_args or manual_pos
395 if positional or num_args:
396 empty = any(True for field in named_fields
397 if field == '')
398 if named or empty:
399 # Verify the required number of positional arguments
400 # only if the .format got at least one keyword argument.
401 # This means that the format strings accepts both
402 # positional and named fields and we should warn
403 # when one of the them is missing or is extra.
404 check_args = True
405 else:
406 check_args = True
407 if check_args:
408 # num_args can be 0 if manual_pos is not.
409 num_args = num_args or manual_pos
410 if positional > num_args:
411 # We can have two possibilities:
412 # * "{0} {1}".format(a, b)
413 # * "{} {} {}".format(a, b, c, d)
414 # We can check the manual keys for the first one.
415 if len(manual_fields) != positional:
416 self.add_message('too-many-format-args', node=node)
417 elif positional < num_args:
418 self.add_message('too-few-format-args', node=node)
419
420 self._check_new_format_specifiers(node, fields, named)
421
422 def _check_new_format_specifiers(self, node, fields, named):
423 """
424 Check attribute and index access in the format
425 string ("{0.a}" and "{0[a]}").
426 """
427 for key, specifiers in fields:
428 # Obtain the argument. If it can't be obtained
429 # or infered, skip this check.
430 if key == '':
431 # {[0]} will have an unnamed argument, defaulting
432 # to 0. It will not be present in `named`, so use the value
433 # 0 for it.
434 key = 0
435 if isinstance(key, numbers.Number):
436 try:
437 argname = utils.get_argument_from_call(node, key)
438 except utils.NoSuchArgumentError:
439 continue
440 else:
441 if key not in named:
442 continue
443 argname = named[key]
444 if argname in (astroid.YES, None):
445 continue
446 try:
447 argument = argname.infer().next()
448 except astroid.InferenceError:
449 continue
450 if not specifiers or argument is astroid.YES:
451 # No need to check this key if it doesn't
452 # use attribute / item access
453 continue
454 if argument.parent and isinstance(argument.parent, astroid.Arguments ):
455 # Check to see if our argument is kwarg or vararg,
456 # and skip the check for this argument if so, because when infer ring,
457 # astroid will return empty objects (dicts and tuples) and
458 # that can lead to false positives.
459 if argname.name in (argument.parent.kwarg, argument.parent.varar g):
460 continue
461 previous = argument
462 parsed = []
463 for is_attribute, specifier in specifiers:
464 if previous is astroid.YES:
465 break
466 parsed.append((is_attribute, specifier))
467 if is_attribute:
468 try:
469 previous = previous.getattr(specifier)[0]
470 except astroid.NotFoundError:
471 if (hasattr(previous, 'has_dynamic_getattr') and
472 previous.has_dynamic_getattr()):
473 # Don't warn if the object has a custom __getattr__
474 break
475 path = get_access_path(key, parsed)
476 self.add_message('missing-format-attribute',
477 args=(specifier, path),
478 node=node)
479 break
480 else:
481 warn_error = False
482 if hasattr(previous, 'getitem'):
483 try:
484 previous = previous.getitem(specifier)
485 except (IndexError, TypeError):
486 warn_error = True
487 else:
488 try:
489 # Lookup __getitem__ in the current node,
490 # but skip further checks, because we can't
491 # retrieve the looked object
492 previous.getattr('__getitem__')
493 break
494 except astroid.NotFoundError:
495 warn_error = True
496 if warn_error:
497 path = get_access_path(key, parsed)
498 self.add_message('invalid-format-index',
499 args=(specifier, path),
500 node=node)
501 break
502
503 try:
504 previous = previous.infer().next()
505 except astroid.InferenceError:
506 # can't check further if we can't infer it
507 break
508
509
510
511 class StringConstantChecker(BaseTokenChecker):
512 """Check string literals"""
513 __implements__ = (ITokenChecker, IRawChecker)
514 name = 'string_constant'
515 msgs = {
516 'W1401': ('Anomalous backslash in string: \'%s\'. '
517 'String constant might be missing an r prefix.',
518 'anomalous-backslash-in-string',
519 'Used when a backslash is in a literal string but not as an '
520 'escape.'),
521 'W1402': ('Anomalous Unicode escape in byte string: \'%s\'. '
522 'String constant might be missing an r or u prefix.',
523 'anomalous-unicode-escape-in-string',
524 'Used when an escape like \\u is encountered in a byte '
525 'string where it has no effect.'),
526 }
527
528 # Characters that have a special meaning after a backslash in either
529 # Unicode or byte strings.
530 ESCAPE_CHARACTERS = 'abfnrtvx\n\r\t\\\'\"01234567'
531
532 # TODO(mbp): Octal characters are quite an edge case today; people may
533 # prefer a separate warning where they occur. \0 should be allowed.
534
535 # Characters that have a special meaning after a backslash but only in
536 # Unicode strings.
537 UNICODE_ESCAPE_CHARACTERS = 'uUN'
538
539 def process_module(self, module):
540 self._unicode_literals = 'unicode_literals' in module.future_imports
541
542 def process_tokens(self, tokens):
543 for (tok_type, token, (start_row, start_col), _, _) in tokens:
544 if tok_type == tokenize.STRING:
545 # 'token' is the whole un-parsed token; we can look at the start
546 # of it to see whether it's a raw or unicode string etc.
547 self.process_string_token(token, start_row, start_col)
548
549 def process_string_token(self, token, start_row, start_col):
550 for i, c in enumerate(token):
551 if c in '\'\"':
552 quote_char = c
553 break
554 prefix = token[:i].lower() # markers like u, b, r.
555 after_prefix = token[i:]
556 if after_prefix[:3] == after_prefix[-3:] == 3 * quote_char:
557 string_body = after_prefix[3:-3]
558 else:
559 string_body = after_prefix[1:-1] # Chop off quotes
560 # No special checks on raw strings at the moment.
561 if 'r' not in prefix:
562 self.process_non_raw_string_token(prefix, string_body,
563 start_row, start_col)
564
565 def process_non_raw_string_token(self, prefix, string_body, start_row,
566 start_col):
567 """check for bad escapes in a non-raw string.
568
569 prefix: lowercase string of eg 'ur' string prefix markers.
570 string_body: the un-parsed body of the string, not including the quote
571 marks.
572 start_row: integer line number in the source.
573 start_col: integer column number in the source.
574 """
575 # Walk through the string; if we see a backslash then escape the next
576 # character, and skip over it. If we see a non-escaped character,
577 # alert, and continue.
578 #
579 # Accept a backslash when it escapes a backslash, or a quote, or
580 # end-of-line, or one of the letters that introduce a special escape
581 # sequence <http://docs.python.org/reference/lexical_analysis.html>
582 #
583 # TODO(mbp): Maybe give a separate warning about the rarely-used
584 # \a \b \v \f?
585 #
586 # TODO(mbp): We could give the column of the problem character, but
587 # add_message doesn't seem to have a way to pass it through at present.
588 i = 0
589 while True:
590 i = string_body.find('\\', i)
591 if i == -1:
592 break
593 # There must be a next character; having a backslash at the end
594 # of the string would be a SyntaxError.
595 next_char = string_body[i+1]
596 match = string_body[i:i+2]
597 if next_char in self.UNICODE_ESCAPE_CHARACTERS:
598 if 'u' in prefix:
599 pass
600 elif (_PY3K or self._unicode_literals) and 'b' not in prefix:
601 pass # unicode by default
602 else:
603 self.add_message('anomalous-unicode-escape-in-string',
604 line=start_row, args=(match, ))
605 elif next_char not in self.ESCAPE_CHARACTERS:
606 self.add_message('anomalous-backslash-in-string',
607 line=start_row, args=(match, ))
608 # Whether it was a valid escape or not, backslash followed by
609 # another character can always be consumed whole: the second
610 # character can never be the start of a new backslash escape.
611 i += 2
612
613
614
615 def register(linter):
616 """required method to auto register this checker """
617 linter.register_checker(StringFormatChecker(linter))
618 linter.register_checker(StringMethodsChecker(linter))
619 linter.register_checker(StringConstantChecker(linter))
OLDNEW
« no previous file with comments | « third_party/pylint/checkers/string_format.py ('k') | third_party/pylint/checkers/typecheck.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698