OLD | NEW |
1 # Copyright (c) 2009-2010 Arista Networks, Inc. - James Lingard | 1 # Copyright (c) 2009-2010 Arista Networks, Inc. - James Lingard |
2 # Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE). | 2 # Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE). |
3 # Copyright 2012 Google Inc. | 3 # Copyright 2012 Google Inc. |
4 # | 4 # |
5 # http://www.logilab.fr/ -- mailto:contact@logilab.fr | 5 # http://www.logilab.fr/ -- mailto:contact@logilab.fr |
6 # This program is free software; you can redistribute it and/or modify it under | 6 # This program is free software; you can redistribute it and/or modify it under |
7 # the terms of the GNU General Public License as published by the Free Software | 7 # the terms of the GNU General Public License as published by the Free Software |
8 # Foundation; either version 2 of the License, or (at your option) any later | 8 # Foundation; either version 2 of the License, or (at your option) any later |
9 # version. | 9 # version. |
10 # | 10 # |
11 # This program is distributed in the hope that it will be useful, but WITHOUT | 11 # This program is distributed in the hope that it will be useful, but WITHOUT |
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
13 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details | 13 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details |
14 # | 14 # |
15 # You should have received a copy of the GNU General Public License along with | 15 # You should have received a copy of the GNU General Public License along with |
16 # this program; if not, write to the Free Software Foundation, Inc., | 16 # this program; if not, write to the Free Software Foundation, Inc., |
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | 17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 """Checker for string formatting operations. | 18 """Checker for string formatting operations. |
19 """ | 19 """ |
20 | 20 |
21 import sys | 21 import sys |
22 import tokenize | 22 import tokenize |
23 import string | 23 import string |
24 try: | 24 import numbers |
25 import numbers | |
26 except ImportError: | |
27 numbers = None | |
28 | 25 |
29 import astroid | 26 import astroid |
30 | 27 |
31 from pylint.interfaces import ITokenChecker, IAstroidChecker, IRawChecker | 28 from pylint.interfaces import ITokenChecker, IAstroidChecker, IRawChecker |
32 from pylint.checkers import BaseChecker, BaseTokenChecker | 29 from pylint.checkers import BaseChecker, BaseTokenChecker |
33 from pylint.checkers import utils | 30 from pylint.checkers import utils |
34 from pylint.checkers.utils import check_messages | 31 from pylint.checkers.utils import check_messages |
35 | 32 |
| 33 import six |
| 34 |
| 35 |
36 _PY3K = sys.version_info[:2] >= (3, 0) | 36 _PY3K = sys.version_info[:2] >= (3, 0) |
37 _PY27 = sys.version_info[:2] == (2, 7) | 37 _PY27 = sys.version_info[:2] == (2, 7) |
38 | 38 |
39 MSGS = { | 39 MSGS = { |
40 'E1300': ("Unsupported format character %r (%#02x) at index %d", | 40 'E1300': ("Unsupported format character %r (%#02x) at index %d", |
41 "bad-format-character", | 41 "bad-format-character", |
42 "Used when a unsupported format character is used in a format\ | 42 "Used when a unsupported format character is used in a format\ |
43 string."), | 43 string."), |
44 'E1301': ("Format string ends in middle of conversion specifier", | 44 'E1301': ("Format string ends in middle of conversion specifier", |
45 "truncated-format-string", | 45 "truncated-format-string", |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
138 return keyname, _field_iterator_convertor(fielditerator) | 138 return keyname, _field_iterator_convertor(fielditerator) |
139 | 139 |
140 | 140 |
141 def collect_string_fields(format_string): | 141 def collect_string_fields(format_string): |
142 """ Given a format string, return an iterator | 142 """ Given a format string, return an iterator |
143 of all the valid format fields. It handles nested fields | 143 of all the valid format fields. It handles nested fields |
144 as well. | 144 as well. |
145 """ | 145 """ |
146 | 146 |
147 formatter = string.Formatter() | 147 formatter = string.Formatter() |
148 parseiterator = formatter.parse(format_string) | |
149 try: | 148 try: |
| 149 parseiterator = formatter.parse(format_string) |
150 for result in parseiterator: | 150 for result in parseiterator: |
151 if all(item is None for item in result[1:]): | 151 if all(item is None for item in result[1:]): |
152 # not a replacement format | 152 # not a replacement format |
153 continue | 153 continue |
154 name = result[1] | 154 name = result[1] |
155 nested = result[2] | 155 nested = result[2] |
156 yield name | 156 yield name |
157 if nested: | 157 if nested: |
158 for field in collect_string_fields(nested): | 158 for field in collect_string_fields(nested): |
159 yield field | 159 yield field |
(...skipping 14 matching lines...) Expand all Loading... |
174 num_args = 0 | 174 num_args = 0 |
175 manual_pos_arg = set() | 175 manual_pos_arg = set() |
176 for name in collect_string_fields(format_string): | 176 for name in collect_string_fields(format_string): |
177 if name and str(name).isdigit(): | 177 if name and str(name).isdigit(): |
178 manual_pos_arg.add(str(name)) | 178 manual_pos_arg.add(str(name)) |
179 elif name: | 179 elif name: |
180 keyname, fielditerator = split_format_field_names(name) | 180 keyname, fielditerator = split_format_field_names(name) |
181 if isinstance(keyname, numbers.Number): | 181 if isinstance(keyname, numbers.Number): |
182 # In Python 2 it will return long which will lead | 182 # In Python 2 it will return long which will lead |
183 # to different output between 2 and 3 | 183 # to different output between 2 and 3 |
| 184 manual_pos_arg.add(keyname) |
184 keyname = int(keyname) | 185 keyname = int(keyname) |
185 keys.append((keyname, list(fielditerator))) | 186 keys.append((keyname, list(fielditerator))) |
186 else: | 187 else: |
187 num_args += 1 | 188 num_args += 1 |
188 return keys, num_args, len(manual_pos_arg) | 189 return keys, num_args, len(manual_pos_arg) |
189 | 190 |
190 def get_args(callfunc): | 191 def get_args(callfunc): |
191 """ Get the arguments from the given `CallFunc` node. | 192 """ Get the arguments from the given `CallFunc` node. |
192 Return a tuple, where the first element is the | 193 Return a tuple, where the first element is the |
193 number of positional arguments and the second element | 194 number of positional arguments and the second element |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
226 msgs = MSGS | 227 msgs = MSGS |
227 | 228 |
228 @check_messages(*(MSGS.keys())) | 229 @check_messages(*(MSGS.keys())) |
229 def visit_binop(self, node): | 230 def visit_binop(self, node): |
230 if node.op != '%': | 231 if node.op != '%': |
231 return | 232 return |
232 left = node.left | 233 left = node.left |
233 args = node.right | 234 args = node.right |
234 | 235 |
235 if not (isinstance(left, astroid.Const) | 236 if not (isinstance(left, astroid.Const) |
236 and isinstance(left.value, basestring)): | 237 and isinstance(left.value, six.string_types)): |
237 return | 238 return |
238 format_string = left.value | 239 format_string = left.value |
239 try: | 240 try: |
240 required_keys, required_num_args = \ | 241 required_keys, required_num_args = \ |
241 utils.parse_format_string(format_string) | 242 utils.parse_format_string(format_string) |
242 except utils.UnsupportedFormatCharacter, e: | 243 except utils.UnsupportedFormatCharacter as e: |
243 c = format_string[e.index] | 244 c = format_string[e.index] |
244 self.add_message('bad-format-character', | 245 self.add_message('bad-format-character', |
245 node=node, args=(c, ord(c), e.index)) | 246 node=node, args=(c, ord(c), e.index)) |
246 return | 247 return |
247 except utils.IncompleteFormatString: | 248 except utils.IncompleteFormatString: |
248 self.add_message('truncated-format-string', node=node) | 249 self.add_message('truncated-format-string', node=node) |
249 return | 250 return |
250 if required_keys and required_num_args: | 251 if required_keys and required_num_args: |
251 # The format string uses both named and unnamed format | 252 # The format string uses both named and unnamed format |
252 # specifiers. | 253 # specifiers. |
253 self.add_message('mixed-format-string', node=node) | 254 self.add_message('mixed-format-string', node=node) |
254 elif required_keys: | 255 elif required_keys: |
255 # The format string uses only named format specifiers. | 256 # The format string uses only named format specifiers. |
256 # Check that the RHS of the % operator is a mapping object | 257 # Check that the RHS of the % operator is a mapping object |
257 # that contains precisely the set of keys required by the | 258 # that contains precisely the set of keys required by the |
258 # format string. | 259 # format string. |
259 if isinstance(args, astroid.Dict): | 260 if isinstance(args, astroid.Dict): |
260 keys = set() | 261 keys = set() |
261 unknown_keys = False | 262 unknown_keys = False |
262 for k, _ in args.items: | 263 for k, _ in args.items: |
263 if isinstance(k, astroid.Const): | 264 if isinstance(k, astroid.Const): |
264 key = k.value | 265 key = k.value |
265 if isinstance(key, basestring): | 266 if isinstance(key, six.string_types): |
266 keys.add(key) | 267 keys.add(key) |
267 else: | 268 else: |
268 self.add_message('bad-format-string-key', | 269 self.add_message('bad-format-string-key', |
269 node=node, args=key) | 270 node=node, args=key) |
270 else: | 271 else: |
271 # One of the keys was something other than a | 272 # One of the keys was something other than a |
272 # constant. Since we can't tell what it is, | 273 # constant. Since we can't tell what it is, |
273 # supress checks for missing keys in the | 274 # supress checks for missing keys in the |
274 # dictionary. | 275 # dictionary. |
275 unknown_keys = True | 276 unknown_keys = True |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
338 if _PY27 or _PY3K: | 339 if _PY27 or _PY3K: |
339 self._check_new_format(node, func) | 340 self._check_new_format(node, func) |
340 | 341 |
341 def _check_new_format(self, node, func): | 342 def _check_new_format(self, node, func): |
342 """ Check the new string formatting. """ | 343 """ Check the new string formatting. """ |
343 # TODO: skip (for now) format nodes which don't have | 344 # TODO: skip (for now) format nodes which don't have |
344 # an explicit string on the left side of the format operation. | 345 # an explicit string on the left side of the format operation. |
345 # We do this because our inference engine can't properly handle | 346 # We do this because our inference engine can't properly handle |
346 # redefinitions of the original string. | 347 # redefinitions of the original string. |
347 # For more details, see issue 287. | 348 # For more details, see issue 287. |
348 if not isinstance(node.func.expr, astroid.Const): | 349 # |
| 350 # Note that there may not be any left side at all, if the format method |
| 351 # has been assigned to another variable. See issue 351. For example: |
| 352 # |
| 353 # fmt = 'some string {}'.format |
| 354 # fmt('arg') |
| 355 if (isinstance(node.func, astroid.Getattr) |
| 356 and not isinstance(node.func.expr, astroid.Const)): |
349 return | 357 return |
350 try: | 358 try: |
351 strnode = func.bound.infer().next() | 359 strnode = next(func.bound.infer()) |
352 except astroid.InferenceError: | 360 except astroid.InferenceError: |
353 return | 361 return |
354 if not isinstance(strnode, astroid.Const): | 362 if not isinstance(strnode, astroid.Const): |
355 return | 363 return |
356 if node.starargs or node.kwargs: | 364 if node.starargs or node.kwargs: |
357 # TODO: Don't complicate the logic, skip these for now. | 365 # TODO: Don't complicate the logic, skip these for now. |
358 return | 366 return |
359 try: | 367 try: |
360 positional, named = get_args(node) | 368 positional, named = get_args(node) |
361 except astroid.InferenceError: | 369 except astroid.InferenceError: |
362 return | 370 return |
363 try: | 371 try: |
364 fields, num_args, manual_pos = parse_format_method_string(strnode.va
lue) | 372 fields, num_args, manual_pos = parse_format_method_string(strnode.va
lue) |
365 except utils.IncompleteFormatString: | 373 except utils.IncompleteFormatString: |
366 self.add_message('bad-format-string', node=node) | 374 self.add_message('bad-format-string', node=node) |
367 return | 375 return |
368 | 376 |
369 manual_fields = set(field[0] for field in fields | |
370 if isinstance(field[0], numbers.Number)) | |
371 named_fields = set(field[0] for field in fields | 377 named_fields = set(field[0] for field in fields |
372 if isinstance(field[0], basestring)) | 378 if isinstance(field[0], six.string_types)) |
373 if num_args and manual_pos: | 379 if num_args and manual_pos: |
374 self.add_message('format-combined-specification', | 380 self.add_message('format-combined-specification', |
375 node=node) | 381 node=node) |
376 return | 382 return |
377 | 383 |
378 check_args = False | 384 check_args = False |
379 # Consider "{[0]} {[1]}" as num_args. | 385 # Consider "{[0]} {[1]}" as num_args. |
380 num_args += sum(1 for field in named_fields | 386 num_args += sum(1 for field in named_fields |
381 if field == '') | 387 if field == '') |
382 if named_fields: | 388 if named_fields: |
(...skipping 18 matching lines...) Expand all Loading... |
401 # This means that the format strings accepts both | 407 # This means that the format strings accepts both |
402 # positional and named fields and we should warn | 408 # positional and named fields and we should warn |
403 # when one of the them is missing or is extra. | 409 # when one of the them is missing or is extra. |
404 check_args = True | 410 check_args = True |
405 else: | 411 else: |
406 check_args = True | 412 check_args = True |
407 if check_args: | 413 if check_args: |
408 # num_args can be 0 if manual_pos is not. | 414 # num_args can be 0 if manual_pos is not. |
409 num_args = num_args or manual_pos | 415 num_args = num_args or manual_pos |
410 if positional > num_args: | 416 if positional > num_args: |
411 # We can have two possibilities: | 417 self.add_message('too-many-format-args', node=node) |
412 # * "{0} {1}".format(a, b) | |
413 # * "{} {} {}".format(a, b, c, d) | |
414 # We can check the manual keys for the first one. | |
415 if len(manual_fields) != positional: | |
416 self.add_message('too-many-format-args', node=node) | |
417 elif positional < num_args: | 418 elif positional < num_args: |
418 self.add_message('too-few-format-args', node=node) | 419 self.add_message('too-few-format-args', node=node) |
419 | 420 |
420 self._check_new_format_specifiers(node, fields, named) | 421 self._check_new_format_specifiers(node, fields, named) |
421 | 422 |
422 def _check_new_format_specifiers(self, node, fields, named): | 423 def _check_new_format_specifiers(self, node, fields, named): |
423 """ | 424 """ |
424 Check attribute and index access in the format | 425 Check attribute and index access in the format |
425 string ("{0.a}" and "{0[a]}"). | 426 string ("{0.a}" and "{0[a]}"). |
426 """ | 427 """ |
(...skipping 10 matching lines...) Expand all Loading... |
437 argname = utils.get_argument_from_call(node, key) | 438 argname = utils.get_argument_from_call(node, key) |
438 except utils.NoSuchArgumentError: | 439 except utils.NoSuchArgumentError: |
439 continue | 440 continue |
440 else: | 441 else: |
441 if key not in named: | 442 if key not in named: |
442 continue | 443 continue |
443 argname = named[key] | 444 argname = named[key] |
444 if argname in (astroid.YES, None): | 445 if argname in (astroid.YES, None): |
445 continue | 446 continue |
446 try: | 447 try: |
447 argument = argname.infer().next() | 448 argument = next(argname.infer()) |
448 except astroid.InferenceError: | 449 except astroid.InferenceError: |
449 continue | 450 continue |
450 if not specifiers or argument is astroid.YES: | 451 if not specifiers or argument is astroid.YES: |
451 # No need to check this key if it doesn't | 452 # No need to check this key if it doesn't |
452 # use attribute / item access | 453 # use attribute / item access |
453 continue | 454 continue |
454 if argument.parent and isinstance(argument.parent, astroid.Arguments
): | 455 if argument.parent and isinstance(argument.parent, astroid.Arguments
): |
455 # Check to see if our argument is kwarg or vararg, | 456 # Ignore any object coming from an argument, |
456 # and skip the check for this argument if so, because when infer
ring, | 457 # because we can't infer its value properly. |
457 # astroid will return empty objects (dicts and tuples) and | 458 continue |
458 # that can lead to false positives. | |
459 if argname.name in (argument.parent.kwarg, argument.parent.varar
g): | |
460 continue | |
461 previous = argument | 459 previous = argument |
462 parsed = [] | 460 parsed = [] |
463 for is_attribute, specifier in specifiers: | 461 for is_attribute, specifier in specifiers: |
464 if previous is astroid.YES: | 462 if previous is astroid.YES: |
465 break | 463 break |
466 parsed.append((is_attribute, specifier)) | 464 parsed.append((is_attribute, specifier)) |
467 if is_attribute: | 465 if is_attribute: |
468 try: | 466 try: |
469 previous = previous.getattr(specifier)[0] | 467 previous = previous.getattr(specifier)[0] |
470 except astroid.NotFoundError: | 468 except astroid.NotFoundError: |
(...skipping 23 matching lines...) Expand all Loading... |
494 except astroid.NotFoundError: | 492 except astroid.NotFoundError: |
495 warn_error = True | 493 warn_error = True |
496 if warn_error: | 494 if warn_error: |
497 path = get_access_path(key, parsed) | 495 path = get_access_path(key, parsed) |
498 self.add_message('invalid-format-index', | 496 self.add_message('invalid-format-index', |
499 args=(specifier, path), | 497 args=(specifier, path), |
500 node=node) | 498 node=node) |
501 break | 499 break |
502 | 500 |
503 try: | 501 try: |
504 previous = previous.infer().next() | 502 previous = next(previous.infer()) |
505 except astroid.InferenceError: | 503 except astroid.InferenceError: |
506 # can't check further if we can't infer it | 504 # can't check further if we can't infer it |
507 break | 505 break |
508 | 506 |
509 | 507 |
510 | 508 |
511 class StringConstantChecker(BaseTokenChecker): | 509 class StringConstantChecker(BaseTokenChecker): |
512 """Check string literals""" | 510 """Check string literals""" |
513 __implements__ = (ITokenChecker, IRawChecker) | 511 __implements__ = (ITokenChecker, IRawChecker) |
514 name = 'string_constant' | 512 name = 'string_constant' |
(...skipping 18 matching lines...) Expand all Loading... |
533 # prefer a separate warning where they occur. \0 should be allowed. | 531 # prefer a separate warning where they occur. \0 should be allowed. |
534 | 532 |
535 # Characters that have a special meaning after a backslash but only in | 533 # Characters that have a special meaning after a backslash but only in |
536 # Unicode strings. | 534 # Unicode strings. |
537 UNICODE_ESCAPE_CHARACTERS = 'uUN' | 535 UNICODE_ESCAPE_CHARACTERS = 'uUN' |
538 | 536 |
539 def process_module(self, module): | 537 def process_module(self, module): |
540 self._unicode_literals = 'unicode_literals' in module.future_imports | 538 self._unicode_literals = 'unicode_literals' in module.future_imports |
541 | 539 |
542 def process_tokens(self, tokens): | 540 def process_tokens(self, tokens): |
543 for (tok_type, token, (start_row, start_col), _, _) in tokens: | 541 for (tok_type, token, (start_row, _), _, _) in tokens: |
544 if tok_type == tokenize.STRING: | 542 if tok_type == tokenize.STRING: |
545 # 'token' is the whole un-parsed token; we can look at the start | 543 # 'token' is the whole un-parsed token; we can look at the start |
546 # of it to see whether it's a raw or unicode string etc. | 544 # of it to see whether it's a raw or unicode string etc. |
547 self.process_string_token(token, start_row, start_col) | 545 self.process_string_token(token, start_row) |
548 | 546 |
549 def process_string_token(self, token, start_row, start_col): | 547 def process_string_token(self, token, start_row): |
550 for i, c in enumerate(token): | 548 for i, c in enumerate(token): |
551 if c in '\'\"': | 549 if c in '\'\"': |
552 quote_char = c | 550 quote_char = c |
553 break | 551 break |
| 552 # pylint: disable=undefined-loop-variable |
554 prefix = token[:i].lower() # markers like u, b, r. | 553 prefix = token[:i].lower() # markers like u, b, r. |
555 after_prefix = token[i:] | 554 after_prefix = token[i:] |
556 if after_prefix[:3] == after_prefix[-3:] == 3 * quote_char: | 555 if after_prefix[:3] == after_prefix[-3:] == 3 * quote_char: |
557 string_body = after_prefix[3:-3] | 556 string_body = after_prefix[3:-3] |
558 else: | 557 else: |
559 string_body = after_prefix[1:-1] # Chop off quotes | 558 string_body = after_prefix[1:-1] # Chop off quotes |
560 # No special checks on raw strings at the moment. | 559 # No special checks on raw strings at the moment. |
561 if 'r' not in prefix: | 560 if 'r' not in prefix: |
562 self.process_non_raw_string_token(prefix, string_body, | 561 self.process_non_raw_string_token(prefix, string_body, start_row) |
563 start_row, start_col) | |
564 | 562 |
565 def process_non_raw_string_token(self, prefix, string_body, start_row, | 563 def process_non_raw_string_token(self, prefix, string_body, start_row): |
566 start_col): | |
567 """check for bad escapes in a non-raw string. | 564 """check for bad escapes in a non-raw string. |
568 | 565 |
569 prefix: lowercase string of eg 'ur' string prefix markers. | 566 prefix: lowercase string of eg 'ur' string prefix markers. |
570 string_body: the un-parsed body of the string, not including the quote | 567 string_body: the un-parsed body of the string, not including the quote |
571 marks. | 568 marks. |
572 start_row: integer line number in the source. | 569 start_row: integer line number in the source. |
573 start_col: integer column number in the source. | |
574 """ | 570 """ |
575 # Walk through the string; if we see a backslash then escape the next | 571 # Walk through the string; if we see a backslash then escape the next |
576 # character, and skip over it. If we see a non-escaped character, | 572 # character, and skip over it. If we see a non-escaped character, |
577 # alert, and continue. | 573 # alert, and continue. |
578 # | 574 # |
579 # Accept a backslash when it escapes a backslash, or a quote, or | 575 # Accept a backslash when it escapes a backslash, or a quote, or |
580 # end-of-line, or one of the letters that introduce a special escape | 576 # end-of-line, or one of the letters that introduce a special escape |
581 # sequence <http://docs.python.org/reference/lexical_analysis.html> | 577 # sequence <http://docs.python.org/reference/lexical_analysis.html> |
582 # | 578 # |
583 # TODO(mbp): Maybe give a separate warning about the rarely-used | 579 # TODO(mbp): Maybe give a separate warning about the rarely-used |
(...skipping 26 matching lines...) Expand all Loading... |
610 # character can never be the start of a new backslash escape. | 606 # character can never be the start of a new backslash escape. |
611 i += 2 | 607 i += 2 |
612 | 608 |
613 | 609 |
614 | 610 |
615 def register(linter): | 611 def register(linter): |
616 """required method to auto register this checker """ | 612 """required method to auto register this checker """ |
617 linter.register_checker(StringFormatChecker(linter)) | 613 linter.register_checker(StringFormatChecker(linter)) |
618 linter.register_checker(StringMethodsChecker(linter)) | 614 linter.register_checker(StringMethodsChecker(linter)) |
619 linter.register_checker(StringConstantChecker(linter)) | 615 linter.register_checker(StringConstantChecker(linter)) |
OLD | NEW |