Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(697)

Side by Side Diff: third_party/pylint/pylint/checkers/strings.py

Issue 1920403002: [content/test/gpu] Run pylint check of gpu tests in unittest instead of PRESUBMIT (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Update path to LICENSE.txt of logilab/README.chromium Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright (c) 2009-2010 Arista Networks, Inc. - James Lingard
2 # Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE).
3 # Copyright 2012 Google Inc.
4 #
5 # http://www.logilab.fr/ -- mailto:contact@logilab.fr
6 # This program is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU General Public License as published by the Free Software
8 # Foundation; either version 2 of the License, or (at your option) any later
9 # version.
10 #
11 # This program is distributed in the hope that it will be useful, but WITHOUT
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
14 #
15 # You should have received a copy of the GNU General Public License along with
16 # this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 """Checker for string formatting operations.
19 """
20
21 import sys
22 import tokenize
23 import string
24 import numbers
25
26 import astroid
27
28 from pylint.interfaces import ITokenChecker, IAstroidChecker, IRawChecker
29 from pylint.checkers import BaseChecker, BaseTokenChecker
30 from pylint.checkers import utils
31 from pylint.checkers.utils import check_messages
32
33 import six
34
35
36 _PY3K = sys.version_info[:2] >= (3, 0)
37 _PY27 = sys.version_info[:2] == (2, 7)
38
39 MSGS = {
40 'E1300': ("Unsupported format character %r (%#02x) at index %d",
41 "bad-format-character",
42 "Used when a unsupported format character is used in a format\
43 string."),
44 'E1301': ("Format string ends in middle of conversion specifier",
45 "truncated-format-string",
46 "Used when a format string terminates before the end of a \
47 conversion specifier."),
48 'E1302': ("Mixing named and unnamed conversion specifiers in format string",
49 "mixed-format-string",
50 "Used when a format string contains both named (e.g. '%(foo)d') \
51 and unnamed (e.g. '%d') conversion specifiers. This is also \
52 used when a named conversion specifier contains * for the \
53 minimum field width and/or precision."),
54 'E1303': ("Expected mapping for format string, not %s",
55 "format-needs-mapping",
56 "Used when a format string that uses named conversion specifiers \
57 is used with an argument that is not a mapping."),
58 'W1300': ("Format string dictionary key should be a string, not %s",
59 "bad-format-string-key",
60 "Used when a format string that uses named conversion specifiers \
61 is used with a dictionary whose keys are not all strings."),
62 'W1301': ("Unused key %r in format string dictionary",
63 "unused-format-string-key",
64 "Used when a format string that uses named conversion specifiers \
65 is used with a dictionary that conWtains keys not required by the \
66 format string."),
67 'E1304': ("Missing key %r in format string dictionary",
68 "missing-format-string-key",
69 "Used when a format string that uses named conversion specifiers \
70 is used with a dictionary that doesn't contain all the keys \
71 required by the format string."),
72 'E1305': ("Too many arguments for format string",
73 "too-many-format-args",
74 "Used when a format string that uses unnamed conversion \
75 specifiers is given too many arguments."),
76 'E1306': ("Not enough arguments for format string",
77 "too-few-format-args",
78 "Used when a format string that uses unnamed conversion \
79 specifiers is given too few arguments"),
80
81 'W1302': ("Invalid format string",
82 "bad-format-string",
83 "Used when a PEP 3101 format string is invalid.",
84 {'minversion': (2, 7)}),
85 'W1303': ("Missing keyword argument %r for format string",
86 "missing-format-argument-key",
87 "Used when a PEP 3101 format string that uses named fields "
88 "doesn't receive one or more required keywords.",
89 {'minversion': (2, 7)}),
90 'W1304': ("Unused format argument %r",
91 "unused-format-string-argument",
92 "Used when a PEP 3101 format string that uses named "
93 "fields is used with an argument that "
94 "is not required by the format string.",
95 {'minversion': (2, 7)}),
96 'W1305': ("Format string contains both automatic field numbering "
97 "and manual field specification",
98 "format-combined-specification",
99 "Usen when a PEP 3101 format string contains both automatic "
100 "field numbering (e.g. '{}') and manual field "
101 "specification (e.g. '{0}').",
102 {'minversion': (2, 7)}),
103 'W1306': ("Missing format attribute %r in format specifier %r",
104 "missing-format-attribute",
105 "Used when a PEP 3101 format string uses an "
106 "attribute specifier ({0.length}), but the argument "
107 "passed for formatting doesn't have that attribute.",
108 {'minversion': (2, 7)}),
109 'W1307': ("Using invalid lookup key %r in format specifier %r",
110 "invalid-format-index",
111 "Used when a PEP 3101 format string uses a lookup specifier "
112 "({a[1]}), but the argument passed for formatting "
113 "doesn't contain or doesn't have that key as an attribute.",
114 {'minversion': (2, 7)})
115 }
116
117 OTHER_NODES = (astroid.Const, astroid.List, astroid.Backquote,
118 astroid.Lambda, astroid.Function,
119 astroid.ListComp, astroid.SetComp, astroid.GenExpr)
120
121 if _PY3K:
122 import _string
123
124 def split_format_field_names(format_string):
125 return _string.formatter_field_name_split(format_string)
126 else:
127 def _field_iterator_convertor(iterator):
128 for is_attr, key in iterator:
129 if isinstance(key, numbers.Number):
130 yield is_attr, int(key)
131 else:
132 yield is_attr, key
133
134 def split_format_field_names(format_string):
135 keyname, fielditerator = format_string._formatter_field_name_split()
136 # it will return longs, instead of ints, which will complicate
137 # the output
138 return keyname, _field_iterator_convertor(fielditerator)
139
140
141 def collect_string_fields(format_string):
142 """ Given a format string, return an iterator
143 of all the valid format fields. It handles nested fields
144 as well.
145 """
146
147 formatter = string.Formatter()
148 try:
149 parseiterator = formatter.parse(format_string)
150 for result in parseiterator:
151 if all(item is None for item in result[1:]):
152 # not a replacement format
153 continue
154 name = result[1]
155 nested = result[2]
156 yield name
157 if nested:
158 for field in collect_string_fields(nested):
159 yield field
160 except ValueError:
161 # probably the format string is invalid
162 # should we check the argument of the ValueError?
163 raise utils.IncompleteFormatString(format_string)
164
165 def parse_format_method_string(format_string):
166 """
167 Parses a PEP 3101 format string, returning a tuple of
168 (keys, num_args, manual_pos_arg),
169 where keys is the set of mapping keys in the format string, num_args
170 is the number of arguments required by the format string and
171 manual_pos_arg is the number of arguments passed with the position.
172 """
173 keys = []
174 num_args = 0
175 manual_pos_arg = set()
176 for name in collect_string_fields(format_string):
177 if name and str(name).isdigit():
178 manual_pos_arg.add(str(name))
179 elif name:
180 keyname, fielditerator = split_format_field_names(name)
181 if isinstance(keyname, numbers.Number):
182 # In Python 2 it will return long which will lead
183 # to different output between 2 and 3
184 manual_pos_arg.add(keyname)
185 keyname = int(keyname)
186 keys.append((keyname, list(fielditerator)))
187 else:
188 num_args += 1
189 return keys, num_args, len(manual_pos_arg)
190
191 def get_args(callfunc):
192 """ Get the arguments from the given `CallFunc` node.
193 Return a tuple, where the first element is the
194 number of positional arguments and the second element
195 is the keyword arguments in a dict.
196 """
197 positional = 0
198 named = {}
199
200 for arg in callfunc.args:
201 if isinstance(arg, astroid.Keyword):
202 named[arg.arg] = utils.safe_infer(arg.value)
203 else:
204 positional += 1
205 return positional, named
206
207 def get_access_path(key, parts):
208 """ Given a list of format specifiers, returns
209 the final access path (e.g. a.b.c[0][1]).
210 """
211 path = []
212 for is_attribute, specifier in parts:
213 if is_attribute:
214 path.append(".{}".format(specifier))
215 else:
216 path.append("[{!r}]".format(specifier))
217 return str(key) + "".join(path)
218
219
220 class StringFormatChecker(BaseChecker):
221 """Checks string formatting operations to ensure that the format string
222 is valid and the arguments match the format string.
223 """
224
225 __implements__ = (IAstroidChecker,)
226 name = 'string'
227 msgs = MSGS
228
229 @check_messages(*(MSGS.keys()))
230 def visit_binop(self, node):
231 if node.op != '%':
232 return
233 left = node.left
234 args = node.right
235
236 if not (isinstance(left, astroid.Const)
237 and isinstance(left.value, six.string_types)):
238 return
239 format_string = left.value
240 try:
241 required_keys, required_num_args = \
242 utils.parse_format_string(format_string)
243 except utils.UnsupportedFormatCharacter as e:
244 c = format_string[e.index]
245 self.add_message('bad-format-character',
246 node=node, args=(c, ord(c), e.index))
247 return
248 except utils.IncompleteFormatString:
249 self.add_message('truncated-format-string', node=node)
250 return
251 if required_keys and required_num_args:
252 # The format string uses both named and unnamed format
253 # specifiers.
254 self.add_message('mixed-format-string', node=node)
255 elif required_keys:
256 # The format string uses only named format specifiers.
257 # Check that the RHS of the % operator is a mapping object
258 # that contains precisely the set of keys required by the
259 # format string.
260 if isinstance(args, astroid.Dict):
261 keys = set()
262 unknown_keys = False
263 for k, _ in args.items:
264 if isinstance(k, astroid.Const):
265 key = k.value
266 if isinstance(key, six.string_types):
267 keys.add(key)
268 else:
269 self.add_message('bad-format-string-key',
270 node=node, args=key)
271 else:
272 # One of the keys was something other than a
273 # constant. Since we can't tell what it is,
274 # supress checks for missing keys in the
275 # dictionary.
276 unknown_keys = True
277 if not unknown_keys:
278 for key in required_keys:
279 if key not in keys:
280 self.add_message('missing-format-string-key',
281 node=node, args=key)
282 for key in keys:
283 if key not in required_keys:
284 self.add_message('unused-format-string-key',
285 node=node, args=key)
286 elif isinstance(args, OTHER_NODES + (astroid.Tuple,)):
287 type_name = type(args).__name__
288 self.add_message('format-needs-mapping',
289 node=node, args=type_name)
290 # else:
291 # The RHS of the format specifier is a name or
292 # expression. It may be a mapping object, so
293 # there's nothing we can check.
294 else:
295 # The format string uses only unnamed format specifiers.
296 # Check that the number of arguments passed to the RHS of
297 # the % operator matches the number required by the format
298 # string.
299 if isinstance(args, astroid.Tuple):
300 num_args = len(args.elts)
301 elif isinstance(args, OTHER_NODES + (astroid.Dict, astroid.DictComp) ):
302 num_args = 1
303 else:
304 # The RHS of the format specifier is a name or
305 # expression. It could be a tuple of unknown size, so
306 # there's nothing we can check.
307 num_args = None
308 if num_args is not None:
309 if num_args > required_num_args:
310 self.add_message('too-many-format-args', node=node)
311 elif num_args < required_num_args:
312 self.add_message('too-few-format-args', node=node)
313
314
315 class StringMethodsChecker(BaseChecker):
316 __implements__ = (IAstroidChecker,)
317 name = 'string'
318 msgs = {
319 'E1310': ("Suspicious argument in %s.%s call",
320 "bad-str-strip-call",
321 "The argument to a str.{l,r,}strip call contains a"
322 " duplicate character, "),
323 }
324
325 @check_messages(*(MSGS.keys()))
326 def visit_callfunc(self, node):
327 func = utils.safe_infer(node.func)
328 if (isinstance(func, astroid.BoundMethod)
329 and isinstance(func.bound, astroid.Instance)
330 and func.bound.name in ('str', 'unicode', 'bytes')):
331 if func.name in ('strip', 'lstrip', 'rstrip') and node.args:
332 arg = utils.safe_infer(node.args[0])
333 if not isinstance(arg, astroid.Const):
334 return
335 if len(arg.value) != len(set(arg.value)):
336 self.add_message('bad-str-strip-call', node=node,
337 args=(func.bound.name, func.name))
338 elif func.name == 'format':
339 if _PY27 or _PY3K:
340 self._check_new_format(node, func)
341
342 def _check_new_format(self, node, func):
343 """ Check the new string formatting. """
344 # TODO: skip (for now) format nodes which don't have
345 # an explicit string on the left side of the format operation.
346 # We do this because our inference engine can't properly handle
347 # redefinitions of the original string.
348 # For more details, see issue 287.
349 #
350 # Note that there may not be any left side at all, if the format method
351 # has been assigned to another variable. See issue 351. For example:
352 #
353 # fmt = 'some string {}'.format
354 # fmt('arg')
355 if (isinstance(node.func, astroid.Getattr)
356 and not isinstance(node.func.expr, astroid.Const)):
357 return
358 try:
359 strnode = next(func.bound.infer())
360 except astroid.InferenceError:
361 return
362 if not isinstance(strnode, astroid.Const):
363 return
364 if node.starargs or node.kwargs:
365 # TODO: Don't complicate the logic, skip these for now.
366 return
367 try:
368 positional, named = get_args(node)
369 except astroid.InferenceError:
370 return
371 try:
372 fields, num_args, manual_pos = parse_format_method_string(strnode.va lue)
373 except utils.IncompleteFormatString:
374 self.add_message('bad-format-string', node=node)
375 return
376
377 named_fields = set(field[0] for field in fields
378 if isinstance(field[0], six.string_types))
379 if num_args and manual_pos:
380 self.add_message('format-combined-specification',
381 node=node)
382 return
383
384 check_args = False
385 # Consider "{[0]} {[1]}" as num_args.
386 num_args += sum(1 for field in named_fields
387 if field == '')
388 if named_fields:
389 for field in named_fields:
390 if field not in named and field:
391 self.add_message('missing-format-argument-key',
392 node=node,
393 args=(field, ))
394 for field in named:
395 if field not in named_fields:
396 self.add_message('unused-format-string-argument',
397 node=node,
398 args=(field, ))
399 # num_args can be 0 if manual_pos is not.
400 num_args = num_args or manual_pos
401 if positional or num_args:
402 empty = any(True for field in named_fields
403 if field == '')
404 if named or empty:
405 # Verify the required number of positional arguments
406 # only if the .format got at least one keyword argument.
407 # This means that the format strings accepts both
408 # positional and named fields and we should warn
409 # when one of the them is missing or is extra.
410 check_args = True
411 else:
412 check_args = True
413 if check_args:
414 # num_args can be 0 if manual_pos is not.
415 num_args = num_args or manual_pos
416 if positional > num_args:
417 self.add_message('too-many-format-args', node=node)
418 elif positional < num_args:
419 self.add_message('too-few-format-args', node=node)
420
421 self._check_new_format_specifiers(node, fields, named)
422
423 def _check_new_format_specifiers(self, node, fields, named):
424 """
425 Check attribute and index access in the format
426 string ("{0.a}" and "{0[a]}").
427 """
428 for key, specifiers in fields:
429 # Obtain the argument. If it can't be obtained
430 # or infered, skip this check.
431 if key == '':
432 # {[0]} will have an unnamed argument, defaulting
433 # to 0. It will not be present in `named`, so use the value
434 # 0 for it.
435 key = 0
436 if isinstance(key, numbers.Number):
437 try:
438 argname = utils.get_argument_from_call(node, key)
439 except utils.NoSuchArgumentError:
440 continue
441 else:
442 if key not in named:
443 continue
444 argname = named[key]
445 if argname in (astroid.YES, None):
446 continue
447 try:
448 argument = next(argname.infer())
449 except astroid.InferenceError:
450 continue
451 if not specifiers or argument is astroid.YES:
452 # No need to check this key if it doesn't
453 # use attribute / item access
454 continue
455 if argument.parent and isinstance(argument.parent, astroid.Arguments ):
456 # Ignore any object coming from an argument,
457 # because we can't infer its value properly.
458 continue
459 previous = argument
460 parsed = []
461 for is_attribute, specifier in specifiers:
462 if previous is astroid.YES:
463 break
464 parsed.append((is_attribute, specifier))
465 if is_attribute:
466 try:
467 previous = previous.getattr(specifier)[0]
468 except astroid.NotFoundError:
469 if (hasattr(previous, 'has_dynamic_getattr') and
470 previous.has_dynamic_getattr()):
471 # Don't warn if the object has a custom __getattr__
472 break
473 path = get_access_path(key, parsed)
474 self.add_message('missing-format-attribute',
475 args=(specifier, path),
476 node=node)
477 break
478 else:
479 warn_error = False
480 if hasattr(previous, 'getitem'):
481 try:
482 previous = previous.getitem(specifier)
483 except (IndexError, TypeError):
484 warn_error = True
485 else:
486 try:
487 # Lookup __getitem__ in the current node,
488 # but skip further checks, because we can't
489 # retrieve the looked object
490 previous.getattr('__getitem__')
491 break
492 except astroid.NotFoundError:
493 warn_error = True
494 if warn_error:
495 path = get_access_path(key, parsed)
496 self.add_message('invalid-format-index',
497 args=(specifier, path),
498 node=node)
499 break
500
501 try:
502 previous = next(previous.infer())
503 except astroid.InferenceError:
504 # can't check further if we can't infer it
505 break
506
507
508
509 class StringConstantChecker(BaseTokenChecker):
510 """Check string literals"""
511 __implements__ = (ITokenChecker, IRawChecker)
512 name = 'string_constant'
513 msgs = {
514 'W1401': ('Anomalous backslash in string: \'%s\'. '
515 'String constant might be missing an r prefix.',
516 'anomalous-backslash-in-string',
517 'Used when a backslash is in a literal string but not as an '
518 'escape.'),
519 'W1402': ('Anomalous Unicode escape in byte string: \'%s\'. '
520 'String constant might be missing an r or u prefix.',
521 'anomalous-unicode-escape-in-string',
522 'Used when an escape like \\u is encountered in a byte '
523 'string where it has no effect.'),
524 }
525
526 # Characters that have a special meaning after a backslash in either
527 # Unicode or byte strings.
528 ESCAPE_CHARACTERS = 'abfnrtvx\n\r\t\\\'\"01234567'
529
530 # TODO(mbp): Octal characters are quite an edge case today; people may
531 # prefer a separate warning where they occur. \0 should be allowed.
532
533 # Characters that have a special meaning after a backslash but only in
534 # Unicode strings.
535 UNICODE_ESCAPE_CHARACTERS = 'uUN'
536
537 def process_module(self, module):
538 self._unicode_literals = 'unicode_literals' in module.future_imports
539
540 def process_tokens(self, tokens):
541 for (tok_type, token, (start_row, _), _, _) in tokens:
542 if tok_type == tokenize.STRING:
543 # 'token' is the whole un-parsed token; we can look at the start
544 # of it to see whether it's a raw or unicode string etc.
545 self.process_string_token(token, start_row)
546
547 def process_string_token(self, token, start_row):
548 for i, c in enumerate(token):
549 if c in '\'\"':
550 quote_char = c
551 break
552 # pylint: disable=undefined-loop-variable
553 prefix = token[:i].lower() # markers like u, b, r.
554 after_prefix = token[i:]
555 if after_prefix[:3] == after_prefix[-3:] == 3 * quote_char:
556 string_body = after_prefix[3:-3]
557 else:
558 string_body = after_prefix[1:-1] # Chop off quotes
559 # No special checks on raw strings at the moment.
560 if 'r' not in prefix:
561 self.process_non_raw_string_token(prefix, string_body, start_row)
562
563 def process_non_raw_string_token(self, prefix, string_body, start_row):
564 """check for bad escapes in a non-raw string.
565
566 prefix: lowercase string of eg 'ur' string prefix markers.
567 string_body: the un-parsed body of the string, not including the quote
568 marks.
569 start_row: integer line number in the source.
570 """
571 # Walk through the string; if we see a backslash then escape the next
572 # character, and skip over it. If we see a non-escaped character,
573 # alert, and continue.
574 #
575 # Accept a backslash when it escapes a backslash, or a quote, or
576 # end-of-line, or one of the letters that introduce a special escape
577 # sequence <http://docs.python.org/reference/lexical_analysis.html>
578 #
579 # TODO(mbp): Maybe give a separate warning about the rarely-used
580 # \a \b \v \f?
581 #
582 # TODO(mbp): We could give the column of the problem character, but
583 # add_message doesn't seem to have a way to pass it through at present.
584 i = 0
585 while True:
586 i = string_body.find('\\', i)
587 if i == -1:
588 break
589 # There must be a next character; having a backslash at the end
590 # of the string would be a SyntaxError.
591 next_char = string_body[i+1]
592 match = string_body[i:i+2]
593 if next_char in self.UNICODE_ESCAPE_CHARACTERS:
594 if 'u' in prefix:
595 pass
596 elif (_PY3K or self._unicode_literals) and 'b' not in prefix:
597 pass # unicode by default
598 else:
599 self.add_message('anomalous-unicode-escape-in-string',
600 line=start_row, args=(match, ))
601 elif next_char not in self.ESCAPE_CHARACTERS:
602 self.add_message('anomalous-backslash-in-string',
603 line=start_row, args=(match, ))
604 # Whether it was a valid escape or not, backslash followed by
605 # another character can always be consumed whole: the second
606 # character can never be the start of a new backslash escape.
607 i += 2
608
609
610
611 def register(linter):
612 """required method to auto register this checker """
613 linter.register_checker(StringFormatChecker(linter))
614 linter.register_checker(StringMethodsChecker(linter))
615 linter.register_checker(StringConstantChecker(linter))
OLDNEW
« no previous file with comments | « third_party/pylint/pylint/checkers/stdlib.py ('k') | third_party/pylint/pylint/checkers/typecheck.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698