Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(258)

Side by Side Diff: Tools/Scripts/webkitpy/thirdparty/autopep8.py

Issue 546613003: Add a new 'format-webkitpy' command that will reformat code to the style guide. (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: remove hack from test/main.py Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 # Copyright (C) 2010-2011 Hideo Hattori
2 # Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
3 # Copyright (C) 2013-2014 Hideo Hattori, Steven Myint, Bill Wendling
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 # SOFTWARE.
24
25 """Automatically formats Python code to conform to the PEP 8 style guide.
26
27 Fixes that only need be done once can be added by adding a function of the form
28 "fix_<code>(source)" to this module. They should return the fixed source code.
29 These fixes are picked up by apply_global_fixes().
30
31 Fixes that depend on pep8 should be added as methods to FixPEP8. See the class
32 documentation for more information.
33
34 """
35
36 from __future__ import absolute_import
37 from __future__ import division
38 from __future__ import print_function
39 from __future__ import unicode_literals
40
41 import bisect
42 import codecs
43 import collections
44 import copy
45 import difflib
46 import fnmatch
47 import inspect
48 import io
49 import itertools
50 import keyword
51 import locale
52 import os
53 import re
54 import signal
55 import sys
56 import token
57 import tokenize
58
59 import pep8
60
61
62 try:
63 unicode
64 except NameError:
65 unicode = str
66
67
68 __version__ = '1.0.3'
69
70
71 CR = '\r'
72 LF = '\n'
73 CRLF = '\r\n'
74
75
76 PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$')
77
78
79 # For generating line shortening candidates.
80 SHORTEN_OPERATOR_GROUPS = frozenset([
81 frozenset([',']),
82 frozenset(['%']),
83 frozenset([',', '(', '[', '{']),
84 frozenset(['%', '(', '[', '{']),
85 frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
86 frozenset(['%', '+', '-', '*', '/', '//']),
87 ])
88
89
90 DEFAULT_IGNORE = 'E24'
91 DEFAULT_INDENT_SIZE = 4
92
93
94 # W602 is handled separately due to the need to avoid "with_traceback".
95 CODE_TO_2TO3 = {
96 'E721': ['idioms'],
97 'W601': ['has_key'],
98 'W603': ['ne'],
99 'W604': ['repr'],
100 'W690': ['apply',
101 'except',
102 'exitfunc',
103 'import',
104 'numliterals',
105 'operator',
106 'paren',
107 'reduce',
108 'renames',
109 'standarderror',
110 'sys_exc',
111 'throw',
112 'tuple_params',
113 'xreadlines']}
114
115
116 def open_with_encoding(filename, encoding=None, mode='r'):
117 """Return opened file with a specific encoding."""
118 if not encoding:
119 encoding = detect_encoding(filename)
120
121 return io.open(filename, mode=mode, encoding=encoding,
122 newline='') # Preserve line endings
123
124
125 def detect_encoding(filename):
126 """Return file encoding."""
127 try:
128 with open(filename, 'rb') as input_file:
129 from lib2to3.pgen2 import tokenize as lib2to3_tokenize
130 encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
131
132 # Check for correctness of encoding
133 with open_with_encoding(filename, encoding) as test_file:
134 test_file.read()
135
136 return encoding
137 except (LookupError, SyntaxError, UnicodeDecodeError):
138 return 'latin-1'
139
140
141 def readlines_from_file(filename):
142 """Return contents of file."""
143 with open_with_encoding(filename) as input_file:
144 return input_file.readlines()
145
146
147 def extended_blank_lines(logical_line,
148 blank_lines,
149 indent_level,
150 previous_logical):
151 """Check for missing blank lines after class declaration."""
152 if previous_logical.startswith('class '):
153 if (
154 logical_line.startswith(('def ', 'class ', '@')) or
155 pep8.DOCSTRING_REGEX.match(logical_line)
156 ):
157 if indent_level and not blank_lines:
158 yield (0, 'E309 expected 1 blank line after class declaration')
159 elif previous_logical.startswith('def '):
160 if blank_lines and pep8.DOCSTRING_REGEX.match(logical_line):
161 yield (0, 'E303 too many blank lines ({0})'.format(blank_lines))
162 elif pep8.DOCSTRING_REGEX.match(previous_logical):
163 # Missing blank line between class docstring and method declaration.
164 if (
165 indent_level and
166 not blank_lines and
167 logical_line.startswith(('def ')) and
168 '(self' in logical_line
169 ):
170 yield (0, 'E301 expected 1 blank line, found 0')
171 pep8.register_check(extended_blank_lines)
172
173
174 def continued_indentation(logical_line, tokens, indent_level, indent_char,
175 noqa):
176 """Override pep8's function to provide indentation information."""
177 first_row = tokens[0][2][0]
178 nrows = 1 + tokens[-1][2][0] - first_row
179 if noqa or nrows == 1:
180 return
181
182 # indent_next tells us whether the next block is indented. Assuming
183 # that it is indented by 4 spaces, then we should not allow 4-space
184 # indents on the final continuation line. In turn, some other
185 # indents are allowed to have an extra 4 spaces.
186 indent_next = logical_line.endswith(':')
187
188 row = depth = 0
189 valid_hangs = (
190 (DEFAULT_INDENT_SIZE,)
191 if indent_char != '\t' else (DEFAULT_INDENT_SIZE,
192 2 * DEFAULT_INDENT_SIZE)
193 )
194
195 # Remember how many brackets were opened on each line.
196 parens = [0] * nrows
197
198 # Relative indents of physical lines.
199 rel_indent = [0] * nrows
200
201 # For each depth, collect a list of opening rows.
202 open_rows = [[0]]
203 # For each depth, memorize the hanging indentation.
204 hangs = [None]
205
206 # Visual indents.
207 indent_chances = {}
208 last_indent = tokens[0][2]
209 indent = [last_indent[1]]
210
211 last_token_multiline = None
212 line = None
213 last_line = ''
214 last_line_begins_with_multiline = False
215 for token_type, text, start, end, line in tokens:
216
217 newline = row < start[0] - first_row
218 if newline:
219 row = start[0] - first_row
220 newline = (not last_token_multiline and
221 token_type not in (tokenize.NL, tokenize.NEWLINE))
222 last_line_begins_with_multiline = last_token_multiline
223
224 if newline:
225 # This is the beginning of a continuation line.
226 last_indent = start
227
228 # Record the initial indent.
229 rel_indent[row] = pep8.expand_indent(line) - indent_level
230
231 # Identify closing bracket.
232 close_bracket = (token_type == tokenize.OP and text in ']})')
233
234 # Is the indent relative to an opening bracket line?
235 for open_row in reversed(open_rows[depth]):
236 hang = rel_indent[row] - rel_indent[open_row]
237 hanging_indent = hang in valid_hangs
238 if hanging_indent:
239 break
240 if hangs[depth]:
241 hanging_indent = (hang == hangs[depth])
242
243 visual_indent = (not close_bracket and hang > 0 and
244 indent_chances.get(start[1]))
245
246 if close_bracket and indent[depth]:
247 # Closing bracket for visual indent.
248 if start[1] != indent[depth]:
249 yield (start, 'E124 {0}'.format(indent[depth]))
250 elif close_bracket and not hang:
251 pass
252 elif indent[depth] and start[1] < indent[depth]:
253 # Visual indent is broken.
254 yield (start, 'E128 {0}'.format(indent[depth]))
255 elif (hanging_indent or
256 (indent_next and
257 rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)):
258 # Hanging indent is verified.
259 if close_bracket:
260 yield (start, 'E123 {0}'.format(indent_level +
261 rel_indent[open_row]))
262 hangs[depth] = hang
263 elif visual_indent is True:
264 # Visual indent is verified.
265 indent[depth] = start[1]
266 elif visual_indent in (text, unicode):
267 # Ignore token lined up with matching one from a previous line.
268 pass
269 else:
270 one_indented = (indent_level + rel_indent[open_row] +
271 DEFAULT_INDENT_SIZE)
272 # Indent is broken.
273 if hang <= 0:
274 error = ('E122', one_indented)
275 elif indent[depth]:
276 error = ('E127', indent[depth])
277 elif hang > DEFAULT_INDENT_SIZE:
278 error = ('E126', one_indented)
279 else:
280 hangs[depth] = hang
281 error = ('E121', one_indented)
282
283 yield (start, '{0} {1}'.format(*error))
284
285 # Look for visual indenting.
286 if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
287 and not indent[depth]):
288 indent[depth] = start[1]
289 indent_chances[start[1]] = True
290 # Deal with implicit string concatenation.
291 elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
292 text in ('u', 'ur', 'b', 'br')):
293 indent_chances[start[1]] = unicode
294 # Special case for the "if" statement because len("if (") is equal to
295 # 4.
296 elif not indent_chances and not row and not depth and text == 'if':
297 indent_chances[end[1] + 1] = True
298 elif text == ':' and line[end[1]:].isspace():
299 open_rows[depth].append(row)
300
301 # Keep track of bracket depth.
302 if token_type == tokenize.OP:
303 if text in '([{':
304 depth += 1
305 indent.append(0)
306 hangs.append(None)
307 if len(open_rows) == depth:
308 open_rows.append([])
309 open_rows[depth].append(row)
310 parens[row] += 1
311 elif text in ')]}' and depth > 0:
312 # Parent indents should not be more than this one.
313 prev_indent = indent.pop() or last_indent[1]
314 hangs.pop()
315 for d in range(depth):
316 if indent[d] > prev_indent:
317 indent[d] = 0
318 for ind in list(indent_chances):
319 if ind >= prev_indent:
320 del indent_chances[ind]
321 del open_rows[depth + 1:]
322 depth -= 1
323 if depth:
324 indent_chances[indent[depth]] = True
325 for idx in range(row, -1, -1):
326 if parens[idx]:
327 parens[idx] -= 1
328 break
329 assert len(indent) == depth + 1
330 if (
331 start[1] not in indent_chances and
332 # This is for purposes of speeding up E121 (GitHub #90).
333 not last_line.rstrip().endswith(',')
334 ):
335 # Allow to line up tokens.
336 indent_chances[start[1]] = text
337
338 last_token_multiline = (start[0] != end[0])
339 if last_token_multiline:
340 rel_indent[end[0] - first_row] = rel_indent[row]
341
342 last_line = line
343
344 if (
345 indent_next and
346 not last_line_begins_with_multiline and
347 pep8.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE
348 ):
349 pos = (start[0], indent[0] + 4)
350 yield (pos, 'E125 {0}'.format(indent_level +
351 2 * DEFAULT_INDENT_SIZE))
352 del pep8._checks['logical_line'][pep8.continued_indentation]
353 pep8.register_check(continued_indentation)
354
355
356 class FixPEP8(object):
357
358 """Fix invalid code.
359
360 Fixer methods are prefixed "fix_". The _fix_source() method looks for these
361 automatically.
362
363 The fixer method can take either one or two arguments (in addition to
364 self). The first argument is "result", which is the error information from
365 pep8. The second argument, "logical", is required only for logical-line
366 fixes.
367
368 The fixer method can return the list of modified lines or None. An empty
369 list would mean that no changes were made. None would mean that only the
370 line reported in the pep8 error was modified. Note that the modified line
371 numbers that are returned are indexed at 1. This typically would correspond
372 with the line number reported in the pep8 error information.
373
374 [fixed method list]
375 - e121,e122,e123,e124,e125,e126,e127,e128,e129
376 - e201,e202,e203
377 - e211
378 - e221,e222,e223,e224,e225
379 - e231
380 - e251
381 - e261,e262
382 - e271,e272,e273,e274
383 - e301,e302,e303
384 - e401
385 - e502
386 - e701,e702
387 - e711
388 - w291
389
390 """
391
392 def __init__(self, filename,
393 options,
394 contents=None,
395 long_line_ignore_cache=None):
396 self.filename = filename
397 if contents is None:
398 self.source = readlines_from_file(filename)
399 else:
400 sio = io.StringIO(contents)
401 self.source = sio.readlines()
402 self.options = options
403 self.indent_word = _get_indentword(''.join(self.source))
404
405 self.long_line_ignore_cache = (
406 set() if long_line_ignore_cache is None
407 else long_line_ignore_cache)
408
409 # Many fixers are the same even though pep8 categorizes them
410 # differently.
411 self.fix_e115 = self.fix_e112
412 self.fix_e116 = self.fix_e113
413 self.fix_e121 = self._fix_reindent
414 self.fix_e122 = self._fix_reindent
415 self.fix_e123 = self._fix_reindent
416 self.fix_e124 = self._fix_reindent
417 self.fix_e126 = self._fix_reindent
418 self.fix_e127 = self._fix_reindent
419 self.fix_e128 = self._fix_reindent
420 self.fix_e129 = self._fix_reindent
421 self.fix_e202 = self.fix_e201
422 self.fix_e203 = self.fix_e201
423 self.fix_e211 = self.fix_e201
424 self.fix_e221 = self.fix_e271
425 self.fix_e222 = self.fix_e271
426 self.fix_e223 = self.fix_e271
427 self.fix_e226 = self.fix_e225
428 self.fix_e227 = self.fix_e225
429 self.fix_e228 = self.fix_e225
430 self.fix_e241 = self.fix_e271
431 self.fix_e242 = self.fix_e224
432 self.fix_e261 = self.fix_e262
433 self.fix_e272 = self.fix_e271
434 self.fix_e273 = self.fix_e271
435 self.fix_e274 = self.fix_e271
436 self.fix_e309 = self.fix_e301
437 self.fix_e501 = (
438 self.fix_long_line_logically if
439 options and (options.aggressive >= 2 or options.experimental) else
440 self.fix_long_line_physically)
441 self.fix_e703 = self.fix_e702
442
443 self._ws_comma_done = False
444
445 def _fix_source(self, results):
446 try:
447 (logical_start, logical_end) = _find_logical(self.source)
448 logical_support = True
449 except (SyntaxError, tokenize.TokenError): # pragma: no cover
450 logical_support = False
451
452 completed_lines = set()
453 for result in sorted(results, key=_priority_key):
454 if result['line'] in completed_lines:
455 continue
456
457 fixed_methodname = 'fix_' + result['id'].lower()
458 if hasattr(self, fixed_methodname):
459 fix = getattr(self, fixed_methodname)
460
461 line_index = result['line'] - 1
462 original_line = self.source[line_index]
463
464 is_logical_fix = len(inspect.getargspec(fix).args) > 2
465 if is_logical_fix:
466 logical = None
467 if logical_support:
468 logical = _get_logical(self.source,
469 result,
470 logical_start,
471 logical_end)
472 if logical and set(range(
473 logical[0][0] + 1,
474 logical[1][0] + 1)).intersection(
475 completed_lines):
476 continue
477
478 modified_lines = fix(result, logical)
479 else:
480 modified_lines = fix(result)
481
482 if modified_lines is None:
483 # Force logical fixes to report what they modified.
484 assert not is_logical_fix
485
486 if self.source[line_index] == original_line:
487 modified_lines = []
488
489 if modified_lines:
490 completed_lines.update(modified_lines)
491 elif modified_lines == []: # Empty list means no fix
492 if self.options.verbose >= 2:
493 print(
494 '---> Not fixing {f} on line {l}'.format(
495 f=result['id'], l=result['line']),
496 file=sys.stderr)
497 else: # We assume one-line fix when None.
498 completed_lines.add(result['line'])
499 else:
500 if self.options.verbose >= 3:
501 print(
502 "---> '{0}' is not defined.".format(fixed_methodname),
503 file=sys.stderr)
504
505 info = result['info'].strip()
506 print('---> {0}:{1}:{2}:{3}'.format(self.filename,
507 result['line'],
508 result['column'],
509 info),
510 file=sys.stderr)
511
512 def fix(self):
513 """Return a version of the source code with PEP 8 violations fixed."""
514 pep8_options = {
515 'ignore': self.options.ignore,
516 'select': self.options.select,
517 'max_line_length': self.options.max_line_length,
518 }
519 results = _execute_pep8(pep8_options, self.source)
520
521 if self.options.verbose:
522 progress = {}
523 for r in results:
524 if r['id'] not in progress:
525 progress[r['id']] = set()
526 progress[r['id']].add(r['line'])
527 print('---> {n} issue(s) to fix {progress}'.format(
528 n=len(results), progress=progress), file=sys.stderr)
529
530 if self.options.line_range:
531 start, end = self.options.line_range
532 results = [r for r in results
533 if start <= r['line'] <= end]
534
535 self._fix_source(filter_results(source=''.join(self.source),
536 results=results,
537 aggressive=self.options.aggressive))
538
539 if self.options.line_range:
540 # If number of lines has changed then change line_range.
541 count = sum(sline.count('\n')
542 for sline in self.source[start - 1:end])
543 self.options.line_range[1] = start + count - 1
544
545 return ''.join(self.source)
546
547 def _fix_reindent(self, result):
548 """Fix a badly indented line.
549
550 This is done by adding or removing from its initial indent only.
551
552 """
553 num_indent_spaces = int(result['info'].split()[1])
554 line_index = result['line'] - 1
555 target = self.source[line_index]
556
557 self.source[line_index] = ' ' * num_indent_spaces + target.lstrip()
558
559 def fix_e112(self, result):
560 """Fix under-indented comments."""
561 line_index = result['line'] - 1
562 target = self.source[line_index]
563
564 if not target.lstrip().startswith('#'):
565 # Don't screw with invalid syntax.
566 return []
567
568 self.source[line_index] = self.indent_word + target
569
570 def fix_e113(self, result):
571 """Fix over-indented comments."""
572 line_index = result['line'] - 1
573 target = self.source[line_index]
574
575 indent = _get_indentation(target)
576 stripped = target.lstrip()
577
578 if not stripped.startswith('#'):
579 # Don't screw with invalid syntax.
580 return []
581
582 self.source[line_index] = indent[1:] + stripped
583
584 def fix_e125(self, result):
585 """Fix indentation undistinguish from the next logical line."""
586 num_indent_spaces = int(result['info'].split()[1])
587 line_index = result['line'] - 1
588 target = self.source[line_index]
589
590 spaces_to_add = num_indent_spaces - len(_get_indentation(target))
591 indent = len(_get_indentation(target))
592 modified_lines = []
593
594 while len(_get_indentation(self.source[line_index])) >= indent:
595 self.source[line_index] = (' ' * spaces_to_add +
596 self.source[line_index])
597 modified_lines.append(1 + line_index) # Line indexed at 1.
598 line_index -= 1
599
600 return modified_lines
601
602 def fix_e201(self, result):
603 """Remove extraneous whitespace."""
604 line_index = result['line'] - 1
605 target = self.source[line_index]
606 offset = result['column'] - 1
607
608 if is_probably_part_of_multiline(target):
609 return []
610
611 fixed = fix_whitespace(target,
612 offset=offset,
613 replacement='')
614
615 self.source[line_index] = fixed
616
617 def fix_e224(self, result):
618 """Remove extraneous whitespace around operator."""
619 target = self.source[result['line'] - 1]
620 offset = result['column'] - 1
621 fixed = target[:offset] + target[offset:].replace('\t', ' ')
622 self.source[result['line'] - 1] = fixed
623
624 def fix_e225(self, result):
625 """Fix missing whitespace around operator."""
626 target = self.source[result['line'] - 1]
627 offset = result['column'] - 1
628 fixed = target[:offset] + ' ' + target[offset:]
629
630 # Only proceed if non-whitespace characters match.
631 # And make sure we don't break the indentation.
632 if (
633 fixed.replace(' ', '') == target.replace(' ', '') and
634 _get_indentation(fixed) == _get_indentation(target)
635 ):
636 self.source[result['line'] - 1] = fixed
637 else:
638 return []
639
640 def fix_e231(self, result):
641 """Add missing whitespace."""
642 # Optimize for comma case. This will fix all commas in the full source
643 # code in one pass. Don't do this more than once. If it fails the first
644 # time, there is no point in trying again.
645 if ',' in result['info'] and not self._ws_comma_done:
646 self._ws_comma_done = True
647 original = ''.join(self.source)
648 new = refactor(original, ['ws_comma'])
649 if original.strip() != new.strip():
650 self.source = [new]
651 return range(1, 1 + len(original))
652
653 line_index = result['line'] - 1
654 target = self.source[line_index]
655 offset = result['column']
656 fixed = target[:offset] + ' ' + target[offset:]
657 self.source[line_index] = fixed
658
659 def fix_e251(self, result):
660 """Remove whitespace around parameter '=' sign."""
661 line_index = result['line'] - 1
662 target = self.source[line_index]
663
664 # This is necessary since pep8 sometimes reports columns that goes
665 # past the end of the physical line. This happens in cases like,
666 # foo(bar\n=None)
667 c = min(result['column'] - 1,
668 len(target) - 1)
669
670 if target[c].strip():
671 fixed = target
672 else:
673 fixed = target[:c].rstrip() + target[c:].lstrip()
674
675 # There could be an escaped newline
676 #
677 # def foo(a=\
678 # 1)
679 if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
680 self.source[line_index] = fixed.rstrip('\n\r \t\\')
681 self.source[line_index + 1] = self.source[line_index + 1].lstrip()
682 return [line_index + 1, line_index + 2] # Line indexed at 1
683
684 self.source[result['line'] - 1] = fixed
685
686 def fix_e262(self, result):
687 """Fix spacing after comment hash."""
688 target = self.source[result['line'] - 1]
689 offset = result['column']
690
691 code = target[:offset].rstrip(' \t#')
692 comment = target[offset:].lstrip(' \t#')
693
694 fixed = code + (' # ' + comment if comment.strip() else '\n')
695
696 self.source[result['line'] - 1] = fixed
697
698 def fix_e271(self, result):
699 """Fix extraneous whitespace around keywords."""
700 line_index = result['line'] - 1
701 target = self.source[line_index]
702 offset = result['column'] - 1
703
704 if is_probably_part_of_multiline(target):
705 return []
706
707 fixed = fix_whitespace(target,
708 offset=offset,
709 replacement=' ')
710
711 if fixed == target:
712 return []
713 else:
714 self.source[line_index] = fixed
715
716 def fix_e301(self, result):
717 """Add missing blank line."""
718 cr = '\n'
719 self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
720
721 def fix_e302(self, result):
722 """Add missing 2 blank lines."""
723 add_linenum = 2 - int(result['info'].split()[-1])
724 cr = '\n' * add_linenum
725 self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
726
727 def fix_e303(self, result):
728 """Remove extra blank lines."""
729 delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2
730 delete_linenum = max(1, delete_linenum)
731
732 # We need to count because pep8 reports an offset line number if there
733 # are comments.
734 cnt = 0
735 line = result['line'] - 2
736 modified_lines = []
737 while cnt < delete_linenum and line >= 0:
738 if not self.source[line].strip():
739 self.source[line] = ''
740 modified_lines.append(1 + line) # Line indexed at 1
741 cnt += 1
742 line -= 1
743
744 return modified_lines
745
746 def fix_e304(self, result):
747 """Remove blank line following function decorator."""
748 line = result['line'] - 2
749 if not self.source[line].strip():
750 self.source[line] = ''
751
752 def fix_e401(self, result):
753 """Put imports on separate lines."""
754 line_index = result['line'] - 1
755 target = self.source[line_index]
756 offset = result['column'] - 1
757
758 if not target.lstrip().startswith('import'):
759 return []
760
761 indentation = re.split(pattern=r'\bimport\b',
762 string=target, maxsplit=1)[0]
763 fixed = (target[:offset].rstrip('\t ,') + '\n' +
764 indentation + 'import ' + target[offset:].lstrip('\t ,'))
765 self.source[line_index] = fixed
766
767 def fix_long_line_logically(self, result, logical):
768 """Try to make lines fit within --max-line-length characters."""
769 if (
770 not logical or
771 len(logical[2]) == 1 or
772 self.source[result['line'] - 1].lstrip().startswith('#')
773 ):
774 return self.fix_long_line_physically(result)
775
776 start_line_index = logical[0][0]
777 end_line_index = logical[1][0]
778 logical_lines = logical[2]
779
780 previous_line = get_item(self.source, start_line_index - 1, default='')
781 next_line = get_item(self.source, end_line_index + 1, default='')
782
783 single_line = join_logical_line(''.join(logical_lines))
784
785 try:
786 fixed = self.fix_long_line(
787 target=single_line,
788 previous_line=previous_line,
789 next_line=next_line,
790 original=''.join(logical_lines))
791 except (SyntaxError, tokenize.TokenError):
792 return self.fix_long_line_physically(result)
793
794 if fixed:
795 for line_index in range(start_line_index, end_line_index + 1):
796 self.source[line_index] = ''
797 self.source[start_line_index] = fixed
798 return range(start_line_index + 1, end_line_index + 1)
799 else:
800 return []
801
802 def fix_long_line_physically(self, result):
803 """Try to make lines fit within --max-line-length characters."""
804 line_index = result['line'] - 1
805 target = self.source[line_index]
806
807 previous_line = get_item(self.source, line_index - 1, default='')
808 next_line = get_item(self.source, line_index + 1, default='')
809
810 try:
811 fixed = self.fix_long_line(
812 target=target,
813 previous_line=previous_line,
814 next_line=next_line,
815 original=target)
816 except (SyntaxError, tokenize.TokenError):
817 return []
818
819 if fixed:
820 self.source[line_index] = fixed
821 return [line_index + 1]
822 else:
823 return []
824
825 def fix_long_line(self, target, previous_line,
826 next_line, original):
827 cache_entry = (target, previous_line, next_line)
828 if cache_entry in self.long_line_ignore_cache:
829 return []
830
831 if target.lstrip().startswith('#'):
832 # Wrap commented lines.
833 return shorten_comment(
834 line=target,
835 max_line_length=self.options.max_line_length,
836 last_comment=not next_line.lstrip().startswith('#'))
837
838 fixed = get_fixed_long_line(
839 target=target,
840 previous_line=previous_line,
841 original=original,
842 indent_word=self.indent_word,
843 max_line_length=self.options.max_line_length,
844 aggressive=self.options.aggressive,
845 experimental=self.options.experimental,
846 verbose=self.options.verbose)
847 if fixed and not code_almost_equal(original, fixed):
848 return fixed
849 else:
850 self.long_line_ignore_cache.add(cache_entry)
851 return None
852
853 def fix_e502(self, result):
854 """Remove extraneous escape of newline."""
855 line_index = result['line'] - 1
856 target = self.source[line_index]
857 self.source[line_index] = target.rstrip('\n\r \t\\') + '\n'
858
859 def fix_e701(self, result):
860 """Put colon-separated compound statement on separate lines."""
861 line_index = result['line'] - 1
862 target = self.source[line_index]
863 c = result['column']
864
865 fixed_source = (target[:c] + '\n' +
866 _get_indentation(target) + self.indent_word +
867 target[c:].lstrip('\n\r \t\\'))
868 self.source[result['line'] - 1] = fixed_source
869 return [result['line'], result['line'] + 1]
870
871 def fix_e702(self, result, logical):
872 """Put semicolon-separated compound statement on separate lines."""
873 if not logical:
874 return [] # pragma: no cover
875 logical_lines = logical[2]
876
877 line_index = result['line'] - 1
878 target = self.source[line_index]
879
880 if target.rstrip().endswith('\\'):
881 # Normalize '1; \\\n2' into '1; 2'.
882 self.source[line_index] = target.rstrip('\n \r\t\\')
883 self.source[line_index + 1] = self.source[line_index + 1].lstrip()
884 return [line_index + 1, line_index + 2]
885
886 if target.rstrip().endswith(';'):
887 self.source[line_index] = target.rstrip('\n \r\t;') + '\n'
888 return [line_index + 1]
889
890 offset = result['column'] - 1
891 first = target[:offset].rstrip(';').rstrip()
892 second = (_get_indentation(logical_lines[0]) +
893 target[offset:].lstrip(';').lstrip())
894
895 self.source[line_index] = first + '\n' + second
896 return [line_index + 1]
897
898 def fix_e711(self, result):
899 """Fix comparison with None."""
900 line_index = result['line'] - 1
901 target = self.source[line_index]
902 offset = result['column'] - 1
903
904 right_offset = offset + 2
905 if right_offset >= len(target):
906 return []
907
908 left = target[:offset].rstrip()
909 center = target[offset:right_offset]
910 right = target[right_offset:].lstrip()
911
912 if not right.startswith('None'):
913 return []
914
915 if center.strip() == '==':
916 new_center = 'is'
917 elif center.strip() == '!=':
918 new_center = 'is not'
919 else:
920 return []
921
922 self.source[line_index] = ' '.join([left, new_center, right])
923
924 def fix_e712(self, result):
925 """Fix comparison with boolean."""
926 line_index = result['line'] - 1
927 target = self.source[line_index]
928 offset = result['column'] - 1
929
930 # Handle very easy "not" special cases.
931 if re.match(r'^\s*if \w+ == False:$', target):
932 self.source[line_index] = re.sub(r'if (\w+) == False:',
933 r'if not \1:', target, count=1)
934 elif re.match(r'^\s*if \w+ != True:$', target):
935 self.source[line_index] = re.sub(r'if (\w+) != True:',
936 r'if not \1:', target, count=1)
937 else:
938 right_offset = offset + 2
939 if right_offset >= len(target):
940 return []
941
942 left = target[:offset].rstrip()
943 center = target[offset:right_offset]
944 right = target[right_offset:].lstrip()
945
946 # Handle simple cases only.
947 new_right = None
948 if center.strip() == '==':
949 if re.match(r'\bTrue\b', right):
950 new_right = re.sub(r'\bTrue\b *', '', right, count=1)
951 elif center.strip() == '!=':
952 if re.match(r'\bFalse\b', right):
953 new_right = re.sub(r'\bFalse\b *', '', right, count=1)
954
955 if new_right is None:
956 return []
957
958 if new_right[0].isalnum():
959 new_right = ' ' + new_right
960
961 self.source[line_index] = left + new_right
962
963 def fix_e713(self, result):
964 """Fix non-membership check."""
965 line_index = result['line'] - 1
966 target = self.source[line_index]
967
968 # Handle very easy case only.
969 if re.match(r'^\s*if not \w+ in \w+:$', target):
970 self.source[line_index] = re.sub(r'if not (\w+) in (\w+):',
971 r'if \1 not in \2:',
972 target,
973 count=1)
974
975 def fix_w291(self, result):
976 """Remove trailing whitespace."""
977 fixed_line = self.source[result['line'] - 1].rstrip()
978 self.source[result['line'] - 1] = fixed_line + '\n'
979
980
981 def get_fixed_long_line(target, previous_line, original,
982 indent_word=' ', max_line_length=79,
983 aggressive=False, experimental=False, verbose=False):
984 """Break up long line and return result.
985
986 Do this by generating multiple reformatted candidates and then
987 ranking the candidates to heuristically select the best option.
988
989 """
990 indent = _get_indentation(target)
991 source = target[len(indent):]
992 assert source.lstrip() == source
993
994 # Check for partial multiline.
995 tokens = list(generate_tokens(source))
996
997 candidates = shorten_line(
998 tokens, source, indent,
999 indent_word,
1000 max_line_length,
1001 aggressive=aggressive,
1002 experimental=experimental,
1003 previous_line=previous_line)
1004
1005 # Also sort alphabetically as a tie breaker (for determinism).
1006 candidates = sorted(
1007 sorted(set(candidates).union([target, original])),
1008 key=lambda x: line_shortening_rank(x,
1009 indent_word,
1010 max_line_length,
1011 experimental))
1012
1013 if verbose >= 4:
1014 print(('-' * 79 + '\n').join([''] + candidates + ['']),
1015 file=codecs.getwriter('utf-8')(sys.stderr.buffer
1016 if hasattr(sys.stderr,
1017 'buffer')
1018 else sys.stderr))
1019
1020 if candidates:
1021 return candidates[0]
1022
1023
1024 def join_logical_line(logical_line):
1025 """Return single line based on logical line input."""
1026 indentation = _get_indentation(logical_line)
1027
1028 return indentation + untokenize_without_newlines(
1029 generate_tokens(logical_line.lstrip())) + '\n'
1030
1031
1032 def untokenize_without_newlines(tokens):
1033 """Return source code based on tokens."""
1034 text = ''
1035 last_row = 0
1036 last_column = -1
1037
1038 for t in tokens:
1039 token_string = t[1]
1040 (start_row, start_column) = t[2]
1041 (end_row, end_column) = t[3]
1042
1043 if start_row > last_row:
1044 last_column = 0
1045 if (
1046 (start_column > last_column or token_string == '\n') and
1047 not text.endswith(' ')
1048 ):
1049 text += ' '
1050
1051 if token_string != '\n':
1052 text += token_string
1053
1054 last_row = end_row
1055 last_column = end_column
1056
1057 return text
1058
1059
1060 def _find_logical(source_lines):
1061 # Make a variable which is the index of all the starts of lines.
1062 logical_start = []
1063 logical_end = []
1064 last_newline = True
1065 parens = 0
1066 for t in generate_tokens(''.join(source_lines)):
1067 if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
1068 tokenize.INDENT, tokenize.NL,
1069 tokenize.ENDMARKER]:
1070 continue
1071 if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
1072 last_newline = True
1073 logical_end.append((t[3][0] - 1, t[2][1]))
1074 continue
1075 if last_newline and not parens:
1076 logical_start.append((t[2][0] - 1, t[2][1]))
1077 last_newline = False
1078 if t[0] == tokenize.OP:
1079 if t[1] in '([{':
1080 parens += 1
1081 elif t[1] in '}])':
1082 parens -= 1
1083 return (logical_start, logical_end)
1084
1085
1086 def _get_logical(source_lines, result, logical_start, logical_end):
1087 """Return the logical line corresponding to the result.
1088
1089 Assumes input is already E702-clean.
1090
1091 """
1092 row = result['line'] - 1
1093 col = result['column'] - 1
1094 ls = None
1095 le = None
1096 for i in range(0, len(logical_start), 1):
1097 assert logical_end
1098 x = logical_end[i]
1099 if x[0] > row or (x[0] == row and x[1] > col):
1100 le = x
1101 ls = logical_start[i]
1102 break
1103 if ls is None:
1104 return None
1105 original = source_lines[ls[0]:le[0] + 1]
1106 return ls, le, original
1107
1108
1109 def get_item(items, index, default=None):
1110 if 0 <= index < len(items):
1111 return items[index]
1112 else:
1113 return default
1114
1115
1116 def reindent(source, indent_size):
1117 """Reindent all lines."""
1118 reindenter = Reindenter(source)
1119 return reindenter.run(indent_size)
1120
1121
1122 def code_almost_equal(a, b):
1123 """Return True if code is similar.
1124
1125 Ignore whitespace when comparing specific line.
1126
1127 """
1128 split_a = split_and_strip_non_empty_lines(a)
1129 split_b = split_and_strip_non_empty_lines(b)
1130
1131 if len(split_a) != len(split_b):
1132 return False
1133
1134 for index in range(len(split_a)):
1135 if ''.join(split_a[index].split()) != ''.join(split_b[index].split()):
1136 return False
1137
1138 return True
1139
1140
1141 def split_and_strip_non_empty_lines(text):
1142 """Return lines split by newline.
1143
1144 Ignore empty lines.
1145
1146 """
1147 return [line.strip() for line in text.splitlines() if line.strip()]
1148
1149
1150 def fix_e265(source, aggressive=False): # pylint: disable=unused-argument
1151 """Format block comments."""
1152 if '#' not in source:
1153 # Optimization.
1154 return source
1155
1156 ignored_line_numbers = multiline_string_lines(
1157 source,
1158 include_docstrings=True) | set(commented_out_code_lines(source))
1159
1160 fixed_lines = []
1161 sio = io.StringIO(source)
1162 for (line_number, line) in enumerate(sio.readlines(), start=1):
1163 if (
1164 line.lstrip().startswith('#') and
1165 line_number not in ignored_line_numbers
1166 ):
1167 indentation = _get_indentation(line)
1168 line = line.lstrip()
1169
1170 # Normalize beginning if not a shebang.
1171 if len(line) > 1:
1172 if (
1173 # Leave multiple spaces like '# ' alone.
1174 (line.count('#') > 1 or line[1].isalnum())
1175 # Leave stylistic outlined blocks alone.
1176 and not line.rstrip().endswith('#')
1177 ):
1178 line = '# ' + line.lstrip('# \t')
1179
1180 fixed_lines.append(indentation + line)
1181 else:
1182 fixed_lines.append(line)
1183
1184 return ''.join(fixed_lines)
1185
1186
1187 def refactor(source, fixer_names, ignore=None):
1188 """Return refactored code using lib2to3.
1189
1190 Skip if ignore string is produced in the refactored code.
1191
1192 """
1193 from lib2to3 import pgen2
1194 try:
1195 new_text = refactor_with_2to3(source,
1196 fixer_names=fixer_names)
1197 except (pgen2.parse.ParseError,
1198 SyntaxError,
1199 UnicodeDecodeError,
1200 UnicodeEncodeError):
1201 return source
1202
1203 if ignore:
1204 if ignore in new_text and ignore not in source:
1205 return source
1206
1207 return new_text
1208
1209
1210 def code_to_2to3(select, ignore):
1211 fixes = set()
1212 for code, fix in CODE_TO_2TO3.items():
1213 if code_match(code, select=select, ignore=ignore):
1214 fixes |= set(fix)
1215 return fixes
1216
1217
1218 def fix_2to3(source, aggressive=True, select=None, ignore=None):
1219 """Fix various deprecated code (via lib2to3)."""
1220 if not aggressive:
1221 return source
1222
1223 select = select or []
1224 ignore = ignore or []
1225
1226 return refactor(source,
1227 code_to_2to3(select=select,
1228 ignore=ignore))
1229
1230
1231 def fix_w602(source, aggressive=True):
1232 """Fix deprecated form of raising exception."""
1233 if not aggressive:
1234 return source
1235
1236 return refactor(source, ['raise'],
1237 ignore='with_traceback')
1238
1239
1240 def find_newline(source):
1241 """Return type of newline used in source.
1242
1243 Input is a list of lines.
1244
1245 """
1246 assert not isinstance(source, unicode)
1247
1248 counter = collections.defaultdict(int)
1249 for line in source:
1250 if line.endswith(CRLF):
1251 counter[CRLF] += 1
1252 elif line.endswith(CR):
1253 counter[CR] += 1
1254 elif line.endswith(LF):
1255 counter[LF] += 1
1256
1257 return (sorted(counter, key=counter.get, reverse=True) or [LF])[0]
1258
1259
1260 def _get_indentword(source):
1261 """Return indentation type."""
1262 indent_word = ' ' # Default in case source has no indentation
1263 try:
1264 for t in generate_tokens(source):
1265 if t[0] == token.INDENT:
1266 indent_word = t[1]
1267 break
1268 except (SyntaxError, tokenize.TokenError):
1269 pass
1270 return indent_word
1271
1272
1273 def _get_indentation(line):
1274 """Return leading whitespace."""
1275 if line.strip():
1276 non_whitespace_index = len(line) - len(line.lstrip())
1277 return line[:non_whitespace_index]
1278 else:
1279 return ''
1280
1281
1282 def get_diff_text(old, new, filename):
1283 """Return text of unified diff between old and new."""
1284 newline = '\n'
1285 diff = difflib.unified_diff(
1286 old, new,
1287 'original/' + filename,
1288 'fixed/' + filename,
1289 lineterm=newline)
1290
1291 text = ''
1292 for line in diff:
1293 text += line
1294
1295 # Work around missing newline (http://bugs.python.org/issue2142).
1296 if text and not line.endswith(newline):
1297 text += newline + r'\ No newline at end of file' + newline
1298
1299 return text
1300
1301
1302 def _priority_key(pep8_result):
1303 """Key for sorting PEP8 results.
1304
1305 Global fixes should be done first. This is important for things like
1306 indentation.
1307
1308 """
1309 priority = [
1310 # Fix multiline colon-based before semicolon based.
1311 'e701',
1312 # Break multiline statements early.
1313 'e702',
1314 # Things that make lines longer.
1315 'e225', 'e231',
1316 # Remove extraneous whitespace before breaking lines.
1317 'e201',
1318 # Shorten whitespace in comment before resorting to wrapping.
1319 'e262'
1320 ]
1321 middle_index = 10000
1322 lowest_priority = [
1323 # We need to shorten lines last since the logical fixer can get in a
1324 # loop, which causes us to exit early.
1325 'e501'
1326 ]
1327 key = pep8_result['id'].lower()
1328 try:
1329 return priority.index(key)
1330 except ValueError:
1331 try:
1332 return middle_index + lowest_priority.index(key) + 1
1333 except ValueError:
1334 return middle_index
1335
1336
1337 def shorten_line(tokens, source, indentation, indent_word, max_line_length,
1338 aggressive=False, experimental=False, previous_line=''):
1339 """Separate line at OPERATOR.
1340
1341 Multiple candidates will be yielded.
1342
1343 """
1344 for candidate in _shorten_line(tokens=tokens,
1345 source=source,
1346 indentation=indentation,
1347 indent_word=indent_word,
1348 aggressive=aggressive,
1349 previous_line=previous_line):
1350 yield candidate
1351
1352 if aggressive:
1353 for key_token_strings in SHORTEN_OPERATOR_GROUPS:
1354 shortened = _shorten_line_at_tokens(
1355 tokens=tokens,
1356 source=source,
1357 indentation=indentation,
1358 indent_word=indent_word,
1359 key_token_strings=key_token_strings,
1360 aggressive=aggressive)
1361
1362 if shortened is not None and shortened != source:
1363 yield shortened
1364
1365 if experimental:
1366 for shortened in _shorten_line_at_tokens_new(
1367 tokens=tokens,
1368 source=source,
1369 indentation=indentation,
1370 max_line_length=max_line_length):
1371
1372 yield shortened
1373
1374
1375 def _shorten_line(tokens, source, indentation, indent_word,
1376 aggressive=False, previous_line=''):
1377 """Separate line at OPERATOR.
1378
1379 The input is expected to be free of newlines except for inside multiline
1380 strings and at the end.
1381
1382 Multiple candidates will be yielded.
1383
1384 """
1385 for (token_type,
1386 token_string,
1387 start_offset,
1388 end_offset) in token_offsets(tokens):
1389
1390 if (
1391 token_type == tokenize.COMMENT and
1392 not is_probably_part_of_multiline(previous_line) and
1393 not is_probably_part_of_multiline(source) and
1394 not source[start_offset + 1:].strip().lower().startswith(
1395 ('noqa', 'pragma:', 'pylint:'))
1396 ):
1397 # Move inline comments to previous line.
1398 first = source[:start_offset]
1399 second = source[start_offset:]
1400 yield (indentation + second.strip() + '\n' +
1401 indentation + first.strip() + '\n')
1402 elif token_type == token.OP and token_string != '=':
1403 # Don't break on '=' after keyword as this violates PEP 8.
1404
1405 assert token_type != token.INDENT
1406
1407 first = source[:end_offset]
1408
1409 second_indent = indentation
1410 if first.rstrip().endswith('('):
1411 second_indent += indent_word
1412 elif '(' in first:
1413 second_indent += ' ' * (1 + first.find('('))
1414 else:
1415 second_indent += indent_word
1416
1417 second = (second_indent + source[end_offset:].lstrip())
1418 if (
1419 not second.strip() or
1420 second.lstrip().startswith('#')
1421 ):
1422 continue
1423
1424 # Do not begin a line with a comma
1425 if second.lstrip().startswith(','):
1426 continue
1427 # Do end a line with a dot
1428 if first.rstrip().endswith('.'):
1429 continue
1430 if token_string in '+-*/':
1431 fixed = first + ' \\' + '\n' + second
1432 else:
1433 fixed = first + '\n' + second
1434
1435 # Only fix if syntax is okay.
1436 if check_syntax(normalize_multiline(fixed)
1437 if aggressive else fixed):
1438 yield indentation + fixed
1439
1440
1441 # A convenient way to handle tokens.
1442 Token = collections.namedtuple('Token', ['token_type', 'token_string',
1443 'spos', 'epos', 'line'])
1444
1445
1446 class ReformattedLines(object):
1447
1448 """The reflowed lines of atoms.
1449
1450 Each part of the line is represented as an "atom." They can be moved
1451 around when need be to get the optimal formatting.
1452
1453 """
1454
1455 ###########################################################################
1456 # Private Classes
1457
1458 class _Indent(object):
1459
1460 """Represent an indentation in the atom stream."""
1461
1462 def __init__(self, indent_amt):
1463 self._indent_amt = indent_amt
1464
1465 def emit(self):
1466 return ' ' * self._indent_amt
1467
1468 @property
1469 def size(self):
1470 return self._indent_amt
1471
1472 class _Space(object):
1473
1474 """Represent a space in the atom stream."""
1475
1476 def emit(self):
1477 return ' '
1478
1479 @property
1480 def size(self):
1481 return 1
1482
1483 class _LineBreak(object):
1484
1485 """Represent a line break in the atom stream."""
1486
1487 def emit(self):
1488 return '\n'
1489
1490 @property
1491 def size(self):
1492 return 0
1493
1494 def __init__(self, max_line_length):
1495 self._max_line_length = max_line_length
1496 self._lines = []
1497 self._bracket_depth = 0
1498 self._prev_item = None
1499 self._prev_prev_item = None
1500
1501 def __repr__(self):
1502 return self.emit()
1503
1504 ###########################################################################
1505 # Public Methods
1506
1507 def add(self, obj, indent_amt, break_after_open_bracket):
1508 if isinstance(obj, Atom):
1509 self._add_item(obj, indent_amt)
1510 return
1511
1512 self._add_container(obj, indent_amt, break_after_open_bracket)
1513
1514 def add_comment(self, item):
1515 num_spaces = 2
1516 if len(self._lines) > 1:
1517 if isinstance(self._lines[-1], self._Space):
1518 num_spaces -= 1
1519 if len(self._lines) > 2:
1520 if isinstance(self._lines[-2], self._Space):
1521 num_spaces -= 1
1522
1523 while num_spaces > 0:
1524 self._lines.append(self._Space())
1525 num_spaces -= 1
1526 self._lines.append(item)
1527
1528 def add_indent(self, indent_amt):
1529 self._lines.append(self._Indent(indent_amt))
1530
1531 def add_line_break(self, indent):
1532 self._lines.append(self._LineBreak())
1533 self.add_indent(len(indent))
1534
1535 def add_line_break_at(self, index, indent_amt):
1536 self._lines.insert(index, self._LineBreak())
1537 self._lines.insert(index + 1, self._Indent(indent_amt))
1538
1539 def add_space_if_needed(self, curr_text, equal=False):
1540 if (
1541 not self._lines or isinstance(
1542 self._lines[-1], (self._LineBreak, self._Indent, self._Space))
1543 ):
1544 return
1545
1546 prev_text = unicode(self._prev_item)
1547 prev_prev_text = (
1548 unicode(self._prev_prev_item) if self._prev_prev_item else '')
1549
1550 if (
1551 # The previous item was a keyword or identifier and the current
1552 # item isn't an operator that doesn't require a space.
1553 ((self._prev_item.is_keyword or self._prev_item.is_string or
1554 self._prev_item.is_name or self._prev_item.is_number) and
1555 (curr_text[0] not in '([{.,:}])' or
1556 (curr_text[0] == '=' and equal))) or
1557
1558 # Don't place spaces around a '.', unless it's in an 'import'
1559 # statement.
1560 ((prev_prev_text != 'from' and prev_text[-1] != '.' and
1561 curr_text != 'import') and
1562
1563 # Don't place a space before a colon.
1564 curr_text[0] != ':' and
1565
1566 # Don't split up ending brackets by spaces.
1567 ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or
1568
1569 # Put a space after a colon or comma.
1570 prev_text[-1] in ':,' or
1571
1572 # Put space around '=' if asked to.
1573 (equal and prev_text == '=') or
1574
1575 # Put spaces around non-unary arithmetic operators.
1576 ((self._prev_prev_item and
1577 (prev_text not in '+-' and
1578 (self._prev_prev_item.is_name or
1579 self._prev_prev_item.is_number or
1580 self._prev_prev_item.is_string)) and
1581 prev_text in ('+', '-', '%', '*', '/', '//', '**')))))
1582 ):
1583 self._lines.append(self._Space())
1584
1585 def previous_item(self):
1586 """Return the previous non-whitespace item."""
1587 return self._prev_item
1588
1589 def fits_on_current_line(self, item_extent):
1590 return self.current_size() + item_extent <= self._max_line_length
1591
1592 def current_size(self):
1593 """The size of the current line minus the indentation."""
1594 size = 0
1595 for item in reversed(self._lines):
1596 size += item.size
1597 if isinstance(item, self._LineBreak):
1598 break
1599
1600 return size
1601
1602 def line_empty(self):
1603 return (self._lines and
1604 isinstance(self._lines[-1],
1605 (self._LineBreak, self._Indent)))
1606
1607 def emit(self):
1608 string = ''
1609 for item in self._lines:
1610 if isinstance(item, self._LineBreak):
1611 string = string.rstrip()
1612 string += item.emit()
1613
1614 return string.rstrip() + '\n'
1615
1616 ###########################################################################
1617 # Private Methods
1618
1619 def _add_item(self, item, indent_amt):
1620 """Add an item to the line.
1621
1622 Reflow the line to get the best formatting after the item is
1623 inserted. The bracket depth indicates if the item is being
1624 inserted inside of a container or not.
1625
1626 """
1627 if self._prev_item and self._prev_item.is_string and item.is_string:
1628 # Place consecutive string literals on separate lines.
1629 self._lines.append(self._LineBreak())
1630 self._lines.append(self._Indent(indent_amt))
1631
1632 item_text = unicode(item)
1633 if self._lines and self._bracket_depth:
1634 # Adding the item into a container.
1635 self._prevent_default_initializer_splitting(item, indent_amt)
1636
1637 if item_text in '.,)]}':
1638 self._split_after_delimiter(item, indent_amt)
1639
1640 elif self._lines and not self.line_empty():
1641 # Adding the item outside of a container.
1642 if self.fits_on_current_line(len(item_text)):
1643 self._enforce_space(item)
1644
1645 else:
1646 # Line break for the new item.
1647 self._lines.append(self._LineBreak())
1648 self._lines.append(self._Indent(indent_amt))
1649
1650 self._lines.append(item)
1651 self._prev_item, self._prev_prev_item = item, self._prev_item
1652
1653 if item_text in '([{':
1654 self._bracket_depth += 1
1655
1656 elif item_text in '}])':
1657 self._bracket_depth -= 1
1658 assert self._bracket_depth >= 0
1659
1660 def _add_container(self, container, indent_amt, break_after_open_bracket):
1661 actual_indent = indent_amt + 1
1662
1663 if (
1664 unicode(self._prev_item) != '=' and
1665 not self.line_empty() and
1666 not self.fits_on_current_line(
1667 container.size + self._bracket_depth + 2)
1668 ):
1669
1670 if unicode(container)[0] == '(' and self._prev_item.is_name:
1671 # Don't split before the opening bracket of a call.
1672 break_after_open_bracket = True
1673 actual_indent = indent_amt + 4
1674 elif (
1675 break_after_open_bracket or
1676 unicode(self._prev_item) not in '([{'
1677 ):
1678 # If the container doesn't fit on the current line and the
1679 # current line isn't empty, place the container on the next
1680 # line.
1681 self._lines.append(self._LineBreak())
1682 self._lines.append(self._Indent(indent_amt))
1683 break_after_open_bracket = False
1684 else:
1685 actual_indent = self.current_size() + 1
1686 break_after_open_bracket = False
1687
1688 if isinstance(container, (ListComprehension, IfExpression)):
1689 actual_indent = indent_amt
1690
1691 # Increase the continued indentation only if recursing on a
1692 # container.
1693 container.reflow(self, ' ' * actual_indent,
1694 break_after_open_bracket=break_after_open_bracket)
1695
1696 def _prevent_default_initializer_splitting(self, item, indent_amt):
1697 """Prevent splitting between a default initializer.
1698
1699 When there is a default initializer, it's best to keep it all on
1700 the same line. It's nicer and more readable, even if it goes
1701 over the maximum allowable line length. This goes back along the
1702 current line to determine if we have a default initializer, and,
1703 if so, to remove extraneous whitespaces and add a line
1704 break/indent before it if needed.
1705
1706 """
1707 if unicode(item) == '=':
1708 # This is the assignment in the initializer. Just remove spaces for
1709 # now.
1710 self._delete_whitespace()
1711 return
1712
1713 if (not self._prev_item or not self._prev_prev_item or
1714 unicode(self._prev_item) != '='):
1715 return
1716
1717 self._delete_whitespace()
1718 prev_prev_index = self._lines.index(self._prev_prev_item)
1719
1720 if (
1721 isinstance(self._lines[prev_prev_index - 1], self._Indent) or
1722 self.fits_on_current_line(item.size + 1)
1723 ):
1724 # The default initializer is already the only item on this line.
1725 # Don't insert a newline here.
1726 return
1727
1728 # Replace the space with a newline/indent combo.
1729 if isinstance(self._lines[prev_prev_index - 1], self._Space):
1730 del self._lines[prev_prev_index - 1]
1731
1732 self.add_line_break_at(self._lines.index(self._prev_prev_item),
1733 indent_amt)
1734
1735 def _split_after_delimiter(self, item, indent_amt):
1736 """Split the line only after a delimiter."""
1737 self._delete_whitespace()
1738
1739 if self.fits_on_current_line(item.size):
1740 return
1741
1742 last_space = None
1743 for item in reversed(self._lines):
1744 if (
1745 last_space and
1746 (not isinstance(item, Atom) or not item.is_colon)
1747 ):
1748 break
1749 else:
1750 last_space = None
1751 if isinstance(item, self._Space):
1752 last_space = item
1753 if isinstance(item, (self._LineBreak, self._Indent)):
1754 return
1755
1756 if not last_space:
1757 return
1758
1759 self.add_line_break_at(self._lines.index(last_space), indent_amt)
1760
1761 def _enforce_space(self, item):
1762 """Enforce a space in certain situations.
1763
1764 There are cases where we will want a space where normally we
1765 wouldn't put one. This just enforces the addition of a space.
1766
1767 """
1768 if isinstance(self._lines[-1],
1769 (self._Space, self._LineBreak, self._Indent)):
1770 return
1771
1772 if not self._prev_item:
1773 return
1774
1775 item_text = unicode(item)
1776 prev_text = unicode(self._prev_item)
1777
1778 # Prefer a space around a '.' in an import statement, and between the
1779 # 'import' and '('.
1780 if (
1781 (item_text == '.' and prev_text == 'from') or
1782 (item_text == 'import' and prev_text == '.') or
1783 (item_text == '(' and prev_text == 'import')
1784 ):
1785 self._lines.append(self._Space())
1786
1787 def _delete_whitespace(self):
1788 """Delete all whitespace from the end of the line."""
1789 while isinstance(self._lines[-1], (self._Space, self._LineBreak,
1790 self._Indent)):
1791 del self._lines[-1]
1792
1793
1794 class Atom(object):
1795
1796 """The smallest unbreakable unit that can be reflowed."""
1797
1798 def __init__(self, atom):
1799 self._atom = atom
1800
1801 def __repr__(self):
1802 return self._atom.token_string
1803
1804 def __len__(self):
1805 return self.size
1806
1807 def reflow(
1808 self, reflowed_lines, continued_indent, extent,
1809 break_after_open_bracket=False,
1810 is_list_comp_or_if_expr=False,
1811 next_is_dot=False
1812 ):
1813 if self._atom.token_type == tokenize.COMMENT:
1814 reflowed_lines.add_comment(self)
1815 return
1816
1817 total_size = extent if extent else self.size
1818
1819 if self._atom.token_string not in ',:([{}])':
1820 # Some atoms will need an extra 1-sized space token after them.
1821 total_size += 1
1822
1823 prev_item = reflowed_lines.previous_item()
1824 if (
1825 not is_list_comp_or_if_expr and
1826 not reflowed_lines.fits_on_current_line(total_size) and
1827 not (next_is_dot and
1828 reflowed_lines.fits_on_current_line(self.size + 1)) and
1829 not reflowed_lines.line_empty() and
1830 not self.is_colon and
1831 not (prev_item and prev_item.is_name and
1832 unicode(self) == '(')
1833 ):
1834 # Start a new line if there is already something on the line and
1835 # adding this atom would make it go over the max line length.
1836 reflowed_lines.add_line_break(continued_indent)
1837 else:
1838 reflowed_lines.add_space_if_needed(unicode(self))
1839
1840 reflowed_lines.add(self, len(continued_indent),
1841 break_after_open_bracket)
1842
1843 def emit(self):
1844 return self.__repr__()
1845
1846 @property
1847 def is_keyword(self):
1848 return keyword.iskeyword(self._atom.token_string)
1849
1850 @property
1851 def is_string(self):
1852 return self._atom.token_type == tokenize.STRING
1853
1854 @property
1855 def is_name(self):
1856 return self._atom.token_type == tokenize.NAME
1857
1858 @property
1859 def is_number(self):
1860 return self._atom.token_type == tokenize.NUMBER
1861
1862 @property
1863 def is_comma(self):
1864 return self._atom.token_string == ','
1865
1866 @property
1867 def is_colon(self):
1868 return self._atom.token_string == ':'
1869
1870 @property
1871 def size(self):
1872 return len(self._atom.token_string)
1873
1874
1875 class Container(object):
1876
1877 """Base class for all container types."""
1878
1879 def __init__(self, items):
1880 self._items = items
1881
1882 def __repr__(self):
1883 string = ''
1884 last_was_keyword = False
1885
1886 for item in self._items:
1887 if item.is_comma:
1888 string += ', '
1889 elif item.is_colon:
1890 string += ': '
1891 else:
1892 item_string = unicode(item)
1893 if (
1894 string and
1895 (last_was_keyword or
1896 (not string.endswith(tuple('([{,.:}]) ')) and
1897 not item_string.startswith(tuple('([{,.:}])'))))
1898 ):
1899 string += ' '
1900 string += item_string
1901
1902 last_was_keyword = item.is_keyword
1903 return string
1904
1905 def __iter__(self):
1906 for element in self._items:
1907 yield element
1908
1909 def __getitem__(self, idx):
1910 return self._items[idx]
1911
1912 def reflow(self, reflowed_lines, continued_indent,
1913 break_after_open_bracket=False):
1914 last_was_container = False
1915 for (index, item) in enumerate(self._items):
1916 next_item = get_item(self._items, index + 1)
1917
1918 if isinstance(item, Atom):
1919 is_list_comp_or_if_expr = (
1920 isinstance(self, (ListComprehension, IfExpression)))
1921 item.reflow(reflowed_lines, continued_indent,
1922 self._get_extent(index),
1923 is_list_comp_or_if_expr=is_list_comp_or_if_expr,
1924 next_is_dot=(next_item and
1925 unicode(next_item) == '.'))
1926 if last_was_container and item.is_comma:
1927 reflowed_lines.add_line_break(continued_indent)
1928 last_was_container = False
1929 else: # isinstance(item, Container)
1930 reflowed_lines.add(item, len(continued_indent),
1931 break_after_open_bracket)
1932 last_was_container = not isinstance(item, (ListComprehension,
1933 IfExpression))
1934
1935 if (
1936 break_after_open_bracket and index == 0 and
1937 # Prefer to keep empty containers together instead of
1938 # separating them.
1939 unicode(item) == self.open_bracket and
1940 (not next_item or unicode(next_item) != self.close_bracket) and
1941 (len(self._items) != 3 or not isinstance(next_item, Atom))
1942 ):
1943 reflowed_lines.add_line_break(continued_indent)
1944 break_after_open_bracket = False
1945 else:
1946 next_next_item = get_item(self._items, index + 2)
1947 if (
1948 unicode(item) not in ['.', '%', 'in'] and
1949 next_item and not isinstance(next_item, Container) and
1950 unicode(next_item) != ':' and
1951 next_next_item and (not isinstance(next_next_item, Atom) or
1952 unicode(next_item) == 'not') and
1953 not reflowed_lines.line_empty() and
1954 not reflowed_lines.fits_on_current_line(
1955 self._get_extent(index + 1) + 2)
1956 ):
1957 reflowed_lines.add_line_break(continued_indent)
1958
1959 def _get_extent(self, index):
1960 """The extent of the full element.
1961
1962 E.g., the length of a function call or keyword.
1963
1964 """
1965 extent = 0
1966 prev_item = get_item(self._items, index - 1)
1967 seen_dot = prev_item and unicode(prev_item) == '.'
1968 while index < len(self._items):
1969 item = get_item(self._items, index)
1970 index += 1
1971
1972 if isinstance(item, (ListComprehension, IfExpression)):
1973 break
1974
1975 if isinstance(item, Container):
1976 if prev_item and prev_item.is_name:
1977 if seen_dot:
1978 extent += 1
1979 else:
1980 extent += item.size
1981
1982 prev_item = item
1983 continue
1984 elif (unicode(item) not in ['.', '=', ':', 'not'] and
1985 not item.is_name and not item.is_string):
1986 break
1987
1988 if unicode(item) == '.':
1989 seen_dot = True
1990
1991 extent += item.size
1992 prev_item = item
1993
1994 return extent
1995
1996 @property
1997 def is_string(self):
1998 return False
1999
2000 @property
2001 def size(self):
2002 return len(self.__repr__())
2003
2004 @property
2005 def is_keyword(self):
2006 return False
2007
2008 @property
2009 def is_name(self):
2010 return False
2011
2012 @property
2013 def is_comma(self):
2014 return False
2015
2016 @property
2017 def is_colon(self):
2018 return False
2019
2020 @property
2021 def open_bracket(self):
2022 return None
2023
2024 @property
2025 def close_bracket(self):
2026 return None
2027
2028
2029 class Tuple(Container):
2030
2031 """A high-level representation of a tuple."""
2032
2033 @property
2034 def open_bracket(self):
2035 return '('
2036
2037 @property
2038 def close_bracket(self):
2039 return ')'
2040
2041
2042 class List(Container):
2043
2044 """A high-level representation of a list."""
2045
2046 @property
2047 def open_bracket(self):
2048 return '['
2049
2050 @property
2051 def close_bracket(self):
2052 return ']'
2053
2054
2055 class DictOrSet(Container):
2056
2057 """A high-level representation of a dictionary or set."""
2058
2059 @property
2060 def open_bracket(self):
2061 return '{'
2062
2063 @property
2064 def close_bracket(self):
2065 return '}'
2066
2067
2068 class ListComprehension(Container):
2069
2070 """A high-level representation of a list comprehension."""
2071
2072 @property
2073 def size(self):
2074 length = 0
2075 for item in self._items:
2076 if isinstance(item, IfExpression):
2077 break
2078 length += item.size
2079 return length
2080
2081
2082 class IfExpression(Container):
2083
2084 """A high-level representation of an if-expression."""
2085
2086
2087 def _parse_container(tokens, index, for_or_if=None):
2088 """Parse a high-level container, such as a list, tuple, etc."""
2089
2090 # Store the opening bracket.
2091 items = [Atom(Token(*tokens[index]))]
2092 index += 1
2093
2094 num_tokens = len(tokens)
2095 while index < num_tokens:
2096 tok = Token(*tokens[index])
2097
2098 if tok.token_string in ',)]}':
2099 # First check if we're at the end of a list comprehension or
2100 # if-expression. Don't add the ending token as part of the list
2101 # comprehension or if-expression, because they aren't part of those
2102 # constructs.
2103 if for_or_if == 'for':
2104 return (ListComprehension(items), index - 1)
2105
2106 elif for_or_if == 'if':
2107 return (IfExpression(items), index - 1)
2108
2109 # We've reached the end of a container.
2110 items.append(Atom(tok))
2111
2112 # If not, then we are at the end of a container.
2113 if tok.token_string == ')':
2114 # The end of a tuple.
2115 return (Tuple(items), index)
2116
2117 elif tok.token_string == ']':
2118 # The end of a list.
2119 return (List(items), index)
2120
2121 elif tok.token_string == '}':
2122 # The end of a dictionary or set.
2123 return (DictOrSet(items), index)
2124
2125 elif tok.token_string in '([{':
2126 # A sub-container is being defined.
2127 (container, index) = _parse_container(tokens, index)
2128 items.append(container)
2129
2130 elif tok.token_string == 'for':
2131 (container, index) = _parse_container(tokens, index, 'for')
2132 items.append(container)
2133
2134 elif tok.token_string == 'if':
2135 (container, index) = _parse_container(tokens, index, 'if')
2136 items.append(container)
2137
2138 else:
2139 items.append(Atom(tok))
2140
2141 index += 1
2142
2143 return (None, None)
2144
2145
2146 def _parse_tokens(tokens):
2147 """Parse the tokens.
2148
2149 This converts the tokens into a form where we can manipulate them
2150 more easily.
2151
2152 """
2153
2154 index = 0
2155 parsed_tokens = []
2156
2157 num_tokens = len(tokens)
2158 while index < num_tokens:
2159 tok = Token(*tokens[index])
2160
2161 assert tok.token_type != token.INDENT
2162 if tok.token_type == tokenize.NEWLINE:
2163 # There's only one newline and it's at the end.
2164 break
2165
2166 if tok.token_string in '([{':
2167 (container, index) = _parse_container(tokens, index)
2168 if not container:
2169 return None
2170 parsed_tokens.append(container)
2171 else:
2172 parsed_tokens.append(Atom(tok))
2173
2174 index += 1
2175
2176 return parsed_tokens
2177
2178
2179 def _reflow_lines(parsed_tokens, indentation, max_line_length,
2180 start_on_prefix_line):
2181 """Reflow the lines so that it looks nice."""
2182
2183 if unicode(parsed_tokens[0]) == 'def':
2184 # A function definition gets indented a bit more.
2185 continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE
2186 else:
2187 continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE
2188
2189 break_after_open_bracket = not start_on_prefix_line
2190
2191 lines = ReformattedLines(max_line_length)
2192 lines.add_indent(len(indentation.lstrip('\r\n')))
2193
2194 if not start_on_prefix_line:
2195 # If splitting after the opening bracket will cause the first element
2196 # to be aligned weirdly, don't try it.
2197 first_token = get_item(parsed_tokens, 0)
2198 second_token = get_item(parsed_tokens, 1)
2199
2200 if (
2201 first_token and second_token and
2202 unicode(second_token)[0] == '(' and
2203 len(indentation) + len(first_token) + 1 == len(continued_indent)
2204 ):
2205 return None
2206
2207 for item in parsed_tokens:
2208 lines.add_space_if_needed(unicode(item), equal=True)
2209
2210 save_continued_indent = continued_indent
2211 if start_on_prefix_line and isinstance(item, Container):
2212 start_on_prefix_line = False
2213 continued_indent = ' ' * (lines.current_size() + 1)
2214
2215 item.reflow(lines, continued_indent, break_after_open_bracket)
2216 continued_indent = save_continued_indent
2217
2218 return lines.emit()
2219
2220
2221 def _shorten_line_at_tokens_new(tokens, source, indentation,
2222 max_line_length):
2223 """Shorten the line taking its length into account.
2224
2225 The input is expected to be free of newlines except for inside
2226 multiline strings and at the end.
2227
2228 """
2229 # Yield the original source so to see if it's a better choice than the
2230 # shortened candidate lines we generate here.
2231 yield indentation + source
2232
2233 parsed_tokens = _parse_tokens(tokens)
2234
2235 if parsed_tokens:
2236 # Perform two reflows. The first one starts on the same line as the
2237 # prefix. The second starts on the line after the prefix.
2238 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2239 start_on_prefix_line=True)
2240 if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2241 yield fixed
2242
2243 fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2244 start_on_prefix_line=False)
2245 if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2246 yield fixed
2247
2248
2249 def _shorten_line_at_tokens(tokens, source, indentation, indent_word,
2250 key_token_strings, aggressive):
2251 """Separate line by breaking at tokens in key_token_strings.
2252
2253 The input is expected to be free of newlines except for inside
2254 multiline strings and at the end.
2255
2256 """
2257 offsets = []
2258 for (index, _t) in enumerate(token_offsets(tokens)):
2259 (token_type,
2260 token_string,
2261 start_offset,
2262 end_offset) = _t
2263
2264 assert token_type != token.INDENT
2265
2266 if token_string in key_token_strings:
2267 # Do not break in containers with zero or one items.
2268 unwanted_next_token = {
2269 '(': ')',
2270 '[': ']',
2271 '{': '}'}.get(token_string)
2272 if unwanted_next_token:
2273 if (
2274 get_item(tokens,
2275 index + 1,
2276 default=[None, None])[1] == unwanted_next_token or
2277 get_item(tokens,
2278 index + 2,
2279 default=[None, None])[1] == unwanted_next_token
2280 ):
2281 continue
2282
2283 if (
2284 index > 2 and token_string == '(' and
2285 tokens[index - 1][1] in ',(%['
2286 ):
2287 # Don't split after a tuple start, or before a tuple start if
2288 # the tuple is in a list.
2289 continue
2290
2291 if end_offset < len(source) - 1:
2292 # Don't split right before newline.
2293 offsets.append(end_offset)
2294 else:
2295 # Break at adjacent strings. These were probably meant to be on
2296 # separate lines in the first place.
2297 previous_token = get_item(tokens, index - 1)
2298 if (
2299 token_type == tokenize.STRING and
2300 previous_token and previous_token[0] == tokenize.STRING
2301 ):
2302 offsets.append(start_offset)
2303
2304 current_indent = None
2305 fixed = None
2306 for line in split_at_offsets(source, offsets):
2307 if fixed:
2308 fixed += '\n' + current_indent + line
2309
2310 for symbol in '([{':
2311 if line.endswith(symbol):
2312 current_indent += indent_word
2313 else:
2314 # First line.
2315 fixed = line
2316 assert not current_indent
2317 current_indent = indent_word
2318
2319 assert fixed is not None
2320
2321 if check_syntax(normalize_multiline(fixed)
2322 if aggressive > 1 else fixed):
2323 return indentation + fixed
2324 else:
2325 return None
2326
2327
2328 def token_offsets(tokens):
2329 """Yield tokens and offsets."""
2330 end_offset = 0
2331 previous_end_row = 0
2332 previous_end_column = 0
2333 for t in tokens:
2334 token_type = t[0]
2335 token_string = t[1]
2336 (start_row, start_column) = t[2]
2337 (end_row, end_column) = t[3]
2338
2339 # Account for the whitespace between tokens.
2340 end_offset += start_column
2341 if previous_end_row == start_row:
2342 end_offset -= previous_end_column
2343
2344 # Record the start offset of the token.
2345 start_offset = end_offset
2346
2347 # Account for the length of the token itself.
2348 end_offset += len(token_string)
2349
2350 yield (token_type,
2351 token_string,
2352 start_offset,
2353 end_offset)
2354
2355 previous_end_row = end_row
2356 previous_end_column = end_column
2357
2358
2359 def normalize_multiline(line):
2360 """Normalize multiline-related code that will cause syntax error.
2361
2362 This is for purposes of checking syntax.
2363
2364 """
2365 if line.startswith('def ') and line.rstrip().endswith(':'):
2366 return line + ' pass'
2367 elif line.startswith('return '):
2368 return 'def _(): ' + line
2369 elif line.startswith('@'):
2370 return line + 'def _(): pass'
2371 elif line.startswith('class '):
2372 return line + ' pass'
2373 elif line.startswith('if '):
2374 return line + ' pass'
2375 else:
2376 return line
2377
2378
2379 def fix_whitespace(line, offset, replacement):
2380 """Replace whitespace at offset and return fixed line."""
2381 # Replace escaped newlines too
2382 left = line[:offset].rstrip('\n\r \t\\')
2383 right = line[offset:].lstrip('\n\r \t\\')
2384 if right.startswith('#'):
2385 return line
2386 else:
2387 return left + replacement + right
2388
2389
2390 def _execute_pep8(pep8_options, source):
2391 """Execute pep8 via python method calls."""
2392 class QuietReport(pep8.BaseReport):
2393
2394 """Version of checker that does not print."""
2395
2396 def __init__(self, options):
2397 super(QuietReport, self).__init__(options)
2398 self.__full_error_results = []
2399
2400 def error(self, line_number, offset, text, _):
2401 """Collect errors."""
2402 code = super(QuietReport, self).error(line_number, offset, text, _)
2403 if code:
2404 self.__full_error_results.append(
2405 {'id': code,
2406 'line': line_number,
2407 'column': offset + 1,
2408 'info': text})
2409
2410 def full_error_results(self):
2411 """Return error results in detail.
2412
2413 Results are in the form of a list of dictionaries. Each
2414 dictionary contains 'id', 'line', 'column', and 'info'.
2415
2416 """
2417 return self.__full_error_results
2418
2419 checker = pep8.Checker('', lines=source,
2420 reporter=QuietReport, **pep8_options)
2421 checker.check_all()
2422 return checker.report.full_error_results()
2423
2424
2425 def _remove_leading_and_normalize(line):
2426 return line.lstrip().rstrip(CR + LF) + '\n'
2427
2428
2429 class Reindenter(object):
2430
2431 """Reindents badly-indented code to uniformly use four-space indentation.
2432
2433 Released to the public domain, by Tim Peters, 03 October 2000.
2434
2435 """
2436
2437 def __init__(self, input_text):
2438 sio = io.StringIO(input_text)
2439 source_lines = sio.readlines()
2440
2441 self.string_content_line_numbers = multiline_string_lines(input_text)
2442
2443 # File lines, rstripped & tab-expanded. Dummy at start is so
2444 # that we can use tokenize's 1-based line numbering easily.
2445 # Note that a line is all-blank iff it is a newline.
2446 self.lines = []
2447 for line_number, line in enumerate(source_lines, start=1):
2448 # Do not modify if inside a multiline string.
2449 if line_number in self.string_content_line_numbers:
2450 self.lines.append(line)
2451 else:
2452 # Only expand leading tabs.
2453 self.lines.append(_get_indentation(line).expandtabs() +
2454 _remove_leading_and_normalize(line))
2455
2456 self.lines.insert(0, None)
2457 self.index = 1 # index into self.lines of next line
2458 self.input_text = input_text
2459
2460 def run(self, indent_size=DEFAULT_INDENT_SIZE):
2461 """Fix indentation and return modified line numbers.
2462
2463 Line numbers are indexed at 1.
2464
2465 """
2466 if indent_size < 1:
2467 return self.input_text
2468
2469 try:
2470 stats = _reindent_stats(tokenize.generate_tokens(self.getline))
2471 except (SyntaxError, tokenize.TokenError):
2472 return self.input_text
2473 # Remove trailing empty lines.
2474 lines = self.lines
2475 while lines and lines[-1] == '\n':
2476 lines.pop()
2477 # Sentinel.
2478 stats.append((len(lines), 0))
2479 # Map count of leading spaces to # we want.
2480 have2want = {}
2481 # Program after transformation.
2482 after = []
2483 # Copy over initial empty lines -- there's nothing to do until
2484 # we see a line with *something* on it.
2485 i = stats[0][0]
2486 after.extend(lines[1:i])
2487 for i in range(len(stats) - 1):
2488 thisstmt, thislevel = stats[i]
2489 nextstmt = stats[i + 1][0]
2490 have = _leading_space_count(lines[thisstmt])
2491 want = thislevel * indent_size
2492 if want < 0:
2493 # A comment line.
2494 if have:
2495 # An indented comment line. If we saw the same
2496 # indentation before, reuse what it most recently
2497 # mapped to.
2498 want = have2want.get(have, -1)
2499 if want < 0:
2500 # Then it probably belongs to the next real stmt.
2501 for j in range(i + 1, len(stats) - 1):
2502 jline, jlevel = stats[j]
2503 if jlevel >= 0:
2504 if have == _leading_space_count(lines[jline]):
2505 want = jlevel * indent_size
2506 break
2507 if want < 0: # Maybe it's a hanging
2508 # comment like this one,
2509 # in which case we should shift it like its base
2510 # line got shifted.
2511 for j in range(i - 1, -1, -1):
2512 jline, jlevel = stats[j]
2513 if jlevel >= 0:
2514 want = (have + _leading_space_count(
2515 after[jline - 1]) -
2516 _leading_space_count(lines[jline]))
2517 break
2518 if want < 0:
2519 # Still no luck -- leave it alone.
2520 want = have
2521 else:
2522 want = 0
2523 assert want >= 0
2524 have2want[have] = want
2525 diff = want - have
2526 if diff == 0 or have == 0:
2527 after.extend(lines[thisstmt:nextstmt])
2528 else:
2529 for line_number, line in enumerate(lines[thisstmt:nextstmt],
2530 start=thisstmt):
2531 if line_number in self.string_content_line_numbers:
2532 after.append(line)
2533 elif diff > 0:
2534 if line == '\n':
2535 after.append(line)
2536 else:
2537 after.append(' ' * diff + line)
2538 else:
2539 remove = min(_leading_space_count(line), -diff)
2540 after.append(line[remove:])
2541
2542 return ''.join(after)
2543
2544 def getline(self):
2545 """Line-getter for tokenize."""
2546 if self.index >= len(self.lines):
2547 line = ''
2548 else:
2549 line = self.lines[self.index]
2550 self.index += 1
2551 return line
2552
2553
2554 def _reindent_stats(tokens):
2555 """Return list of (lineno, indentlevel) pairs.
2556
2557 One for each stmt and comment line. indentlevel is -1 for comment lines, as
2558 a signal that tokenize doesn't know what to do about them; indeed, they're
2559 our headache!
2560
2561 """
2562 find_stmt = 1 # Next token begins a fresh stmt?
2563 level = 0 # Current indent level.
2564 stats = []
2565
2566 for t in tokens:
2567 token_type = t[0]
2568 sline = t[2][0]
2569 line = t[4]
2570
2571 if token_type == tokenize.NEWLINE:
2572 # A program statement, or ENDMARKER, will eventually follow,
2573 # after some (possibly empty) run of tokens of the form
2574 # (NL | COMMENT)* (INDENT | DEDENT+)?
2575 find_stmt = 1
2576
2577 elif token_type == tokenize.INDENT:
2578 find_stmt = 1
2579 level += 1
2580
2581 elif token_type == tokenize.DEDENT:
2582 find_stmt = 1
2583 level -= 1
2584
2585 elif token_type == tokenize.COMMENT:
2586 if find_stmt:
2587 stats.append((sline, -1))
2588 # But we're still looking for a new stmt, so leave
2589 # find_stmt alone.
2590
2591 elif token_type == tokenize.NL:
2592 pass
2593
2594 elif find_stmt:
2595 # This is the first "real token" following a NEWLINE, so it
2596 # must be the first token of the next program statement, or an
2597 # ENDMARKER.
2598 find_stmt = 0
2599 if line: # Not endmarker.
2600 stats.append((sline, level))
2601
2602 return stats
2603
2604
2605 def _leading_space_count(line):
2606 """Return number of leading spaces in line."""
2607 i = 0
2608 while i < len(line) and line[i] == ' ':
2609 i += 1
2610 return i
2611
2612
2613 def refactor_with_2to3(source_text, fixer_names):
2614 """Use lib2to3 to refactor the source.
2615
2616 Return the refactored source code.
2617
2618 """
2619 from lib2to3.refactor import RefactoringTool
2620 fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names]
2621 tool = RefactoringTool(fixer_names=fixers, explicit=fixers)
2622
2623 from lib2to3.pgen2 import tokenize as lib2to3_tokenize
2624 try:
2625 return unicode(tool.refactor_string(source_text, name=''))
2626 except lib2to3_tokenize.TokenError:
2627 return source_text
2628
2629
2630 def check_syntax(code):
2631 """Return True if syntax is okay."""
2632 try:
2633 return compile(code, '<string>', 'exec')
2634 except (SyntaxError, TypeError, UnicodeDecodeError):
2635 return False
2636
2637
2638 def filter_results(source, results, aggressive):
2639 """Filter out spurious reports from pep8.
2640
2641 If aggressive is True, we allow possibly unsafe fixes (E711, E712).
2642
2643 """
2644 non_docstring_string_line_numbers = multiline_string_lines(
2645 source, include_docstrings=False)
2646 all_string_line_numbers = multiline_string_lines(
2647 source, include_docstrings=True)
2648
2649 commented_out_code_line_numbers = commented_out_code_lines(source)
2650
2651 for r in results:
2652 issue_id = r['id'].lower()
2653
2654 if r['line'] in non_docstring_string_line_numbers:
2655 if issue_id.startswith(('e1', 'e501', 'w191')):
2656 continue
2657
2658 if r['line'] in all_string_line_numbers:
2659 if issue_id in ['e501']:
2660 continue
2661
2662 # We must offset by 1 for lines that contain the trailing contents of
2663 # multiline strings.
2664 if not aggressive and (r['line'] + 1) in all_string_line_numbers:
2665 # Do not modify multiline strings in non-aggressive mode. Remove
2666 # trailing whitespace could break doctests.
2667 if issue_id.startswith(('w29', 'w39')):
2668 continue
2669
2670 if aggressive <= 0:
2671 if issue_id.startswith(('e711', 'w6')):
2672 continue
2673
2674 if aggressive <= 1:
2675 if issue_id.startswith(('e712', 'e713')):
2676 continue
2677
2678 if r['line'] in commented_out_code_line_numbers:
2679 if issue_id.startswith(('e26', 'e501')):
2680 continue
2681
2682 yield r
2683
2684
2685 def multiline_string_lines(source, include_docstrings=False):
2686 """Return line numbers that are within multiline strings.
2687
2688 The line numbers are indexed at 1.
2689
2690 Docstrings are ignored.
2691
2692 """
2693 line_numbers = set()
2694 previous_token_type = ''
2695 try:
2696 for t in generate_tokens(source):
2697 token_type = t[0]
2698 start_row = t[2][0]
2699 end_row = t[3][0]
2700
2701 if token_type == tokenize.STRING and start_row != end_row:
2702 if (
2703 include_docstrings or
2704 previous_token_type != tokenize.INDENT
2705 ):
2706 # We increment by one since we want the contents of the
2707 # string.
2708 line_numbers |= set(range(1 + start_row, 1 + end_row))
2709
2710 previous_token_type = token_type
2711 except (SyntaxError, tokenize.TokenError):
2712 pass
2713
2714 return line_numbers
2715
2716
2717 def commented_out_code_lines(source):
2718 """Return line numbers of comments that are likely code.
2719
2720 Commented-out code is bad practice, but modifying it just adds even more
2721 clutter.
2722
2723 """
2724 line_numbers = []
2725 try:
2726 for t in generate_tokens(source):
2727 token_type = t[0]
2728 token_string = t[1]
2729 start_row = t[2][0]
2730 line = t[4]
2731
2732 # Ignore inline comments.
2733 if not line.lstrip().startswith('#'):
2734 continue
2735
2736 if token_type == tokenize.COMMENT:
2737 stripped_line = token_string.lstrip('#').strip()
2738 if (
2739 ' ' in stripped_line and
2740 '#' not in stripped_line and
2741 check_syntax(stripped_line)
2742 ):
2743 line_numbers.append(start_row)
2744 except (SyntaxError, tokenize.TokenError):
2745 pass
2746
2747 return line_numbers
2748
2749
2750 def shorten_comment(line, max_line_length, last_comment=False):
2751 """Return trimmed or split long comment line.
2752
2753 If there are no comments immediately following it, do a text wrap.
2754 Doing this wrapping on all comments in general would lead to jagged
2755 comment text.
2756
2757 """
2758 assert len(line) > max_line_length
2759 line = line.rstrip()
2760
2761 # PEP 8 recommends 72 characters for comment text.
2762 indentation = _get_indentation(line) + '# '
2763 max_line_length = min(max_line_length,
2764 len(indentation) + 72)
2765
2766 MIN_CHARACTER_REPEAT = 5
2767 if (
2768 len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and
2769 not line[-1].isalnum()
2770 ):
2771 # Trim comments that end with things like ---------
2772 return line[:max_line_length] + '\n'
2773 elif last_comment and re.match(r'\s*#+\s*\w+', line):
2774 import textwrap
2775 split_lines = textwrap.wrap(line.lstrip(' \t#'),
2776 initial_indent=indentation,
2777 subsequent_indent=indentation,
2778 width=max_line_length,
2779 break_long_words=False,
2780 break_on_hyphens=False)
2781 return '\n'.join(split_lines) + '\n'
2782 else:
2783 return line + '\n'
2784
2785
2786 def normalize_line_endings(lines, newline):
2787 """Return fixed line endings.
2788
2789 All lines will be modified to use the most common line ending.
2790
2791 """
2792 return [line.rstrip('\n\r') + newline for line in lines]
2793
2794
2795 def mutual_startswith(a, b):
2796 return b.startswith(a) or a.startswith(b)
2797
2798
2799 def code_match(code, select, ignore):
2800 if ignore:
2801 assert not isinstance(ignore, unicode)
2802 for ignored_code in [c.strip() for c in ignore]:
2803 if mutual_startswith(code.lower(), ignored_code.lower()):
2804 return False
2805
2806 if select:
2807 assert not isinstance(select, unicode)
2808 for selected_code in [c.strip() for c in select]:
2809 if mutual_startswith(code.lower(), selected_code.lower()):
2810 return True
2811 return False
2812
2813 return True
2814
2815
2816 def fix_code(source, options=None):
2817 """Return fixed source code."""
2818 if not options:
2819 options = parse_args([''])
2820
2821 if not isinstance(source, unicode):
2822 source = source.decode(locale.getpreferredencoding())
2823
2824 sio = io.StringIO(source)
2825 return fix_lines(sio.readlines(), options=options)
2826
2827
2828 def fix_lines(source_lines, options, filename=''):
2829 """Return fixed source code."""
2830 # Transform everything to line feed. Then change them back to original
2831 # before returning fixed source code.
2832 original_newline = find_newline(source_lines)
2833 tmp_source = ''.join(normalize_line_endings(source_lines, '\n'))
2834
2835 # Keep a history to break out of cycles.
2836 previous_hashes = set()
2837
2838 if options.line_range:
2839 fixed_source = apply_local_fixes(tmp_source, options)
2840 else:
2841 # Apply global fixes only once (for efficiency).
2842 fixed_source = apply_global_fixes(tmp_source, options)
2843
2844 passes = 0
2845 long_line_ignore_cache = set()
2846 while hash(fixed_source) not in previous_hashes:
2847 if options.pep8_passes >= 0 and passes > options.pep8_passes:
2848 break
2849 passes += 1
2850
2851 previous_hashes.add(hash(fixed_source))
2852
2853 tmp_source = copy.copy(fixed_source)
2854
2855 fix = FixPEP8(
2856 filename,
2857 options,
2858 contents=tmp_source,
2859 long_line_ignore_cache=long_line_ignore_cache)
2860
2861 fixed_source = fix.fix()
2862
2863 sio = io.StringIO(fixed_source)
2864 return ''.join(normalize_line_endings(sio.readlines(), original_newline))
2865
2866
2867 def fix_file(filename, options=None, output=None):
2868 if not options:
2869 options = parse_args([filename])
2870
2871 original_source = readlines_from_file(filename)
2872
2873 fixed_source = original_source
2874
2875 if options.in_place or output:
2876 encoding = detect_encoding(filename)
2877
2878 if output:
2879 output = codecs.getwriter(encoding)(output.buffer
2880 if hasattr(output, 'buffer')
2881 else output)
2882
2883 output = LineEndingWrapper(output)
2884
2885 fixed_source = fix_lines(fixed_source, options, filename=filename)
2886
2887 if options.diff:
2888 new = io.StringIO(fixed_source)
2889 new = new.readlines()
2890 diff = get_diff_text(original_source, new, filename)
2891 if output:
2892 output.write(diff)
2893 output.flush()
2894 else:
2895 return diff
2896 elif options.in_place:
2897 fp = open_with_encoding(filename, encoding=encoding,
2898 mode='w')
2899 fp.write(fixed_source)
2900 fp.close()
2901 else:
2902 if output:
2903 output.write(fixed_source)
2904 output.flush()
2905 else:
2906 return fixed_source
2907
2908
2909 def global_fixes():
2910 """Yield multiple (code, function) tuples."""
2911 for function in globals().values():
2912 if inspect.isfunction(function):
2913 arguments = inspect.getargspec(function)[0]
2914 if arguments[:1] != ['source']:
2915 continue
2916
2917 code = extract_code_from_function(function)
2918 if code:
2919 yield (code, function)
2920
2921
2922 def apply_global_fixes(source, options, where='global'):
2923 """Run global fixes on source code.
2924
2925 These are fixes that only need be done once (unlike those in
2926 FixPEP8, which are dependent on pep8).
2927
2928 """
2929 if code_match('E101', select=options.select, ignore=options.ignore):
2930 source = reindent(source,
2931 indent_size=options.indent_size)
2932
2933 for (code, function) in global_fixes():
2934 if code_match(code, select=options.select, ignore=options.ignore):
2935 if options.verbose:
2936 print('---> Applying {0} fix for {1}'.format(where,
2937 code.upper()),
2938 file=sys.stderr)
2939 source = function(source,
2940 aggressive=options.aggressive)
2941
2942 source = fix_2to3(source,
2943 aggressive=options.aggressive,
2944 select=options.select,
2945 ignore=options.ignore)
2946
2947 return source
2948
2949
2950 def apply_local_fixes(source, options):
2951 """Ananologus to apply_global_fixes, but runs only those which makes sense
2952 for the given line_range.
2953
2954 Do as much as we can without breaking code.
2955
2956 """
2957 def find_ge(a, x):
2958 """Find leftmost item greater than or equal to x."""
2959 i = bisect.bisect_left(a, x)
2960 if i != len(a):
2961 return i, a[i]
2962 return len(a) - 1, a[-1]
2963
2964 def find_le(a, x):
2965 """Find rightmost value less than or equal to x."""
2966 i = bisect.bisect_right(a, x)
2967 if i:
2968 return i - 1, a[i - 1]
2969 return 0, a[0]
2970
2971 def local_fix(source, start_log, end_log,
2972 start_lines, end_lines, indents, last_line):
2973 """apply_global_fixes to the source between start_log and end_log.
2974
2975 The subsource must be the correct syntax of a complete python program
2976 (but all lines may share an indentation). The subsource's shared indent
2977 is removed, fixes are applied and the indent prepended back. Taking
2978 care to not reindent strings.
2979
2980 last_line is the strict cut off (options.line_range[1]), so that
2981 lines after last_line are not modified.
2982
2983 """
2984 if end_log < start_log:
2985 return source
2986
2987 ind = indents[start_log]
2988 indent = _get_indentation(source[start_lines[start_log]])
2989
2990 sl = slice(start_lines[start_log], end_lines[end_log] + 1)
2991
2992 subsource = source[sl]
2993 # Remove indent from subsource.
2994 if ind:
2995 for line_no in start_lines[start_log:end_log + 1]:
2996 pos = line_no - start_lines[start_log]
2997 subsource[pos] = subsource[pos][ind:]
2998
2999 # Fix indentation of subsource.
3000 fixed_subsource = apply_global_fixes(''.join(subsource),
3001 options,
3002 where='local')
3003 fixed_subsource = fixed_subsource.splitlines(True)
3004
3005 # Add back indent for non multi-line strings lines.
3006 msl = multiline_string_lines(''.join(fixed_subsource),
3007 include_docstrings=False)
3008 for i, line in enumerate(fixed_subsource):
3009 if not i + 1 in msl:
3010 fixed_subsource[i] = indent + line if line != '\n' else line
3011
3012 # We make a special case to look at the final line, if it's a multiline
3013 # *and* the cut off is somewhere inside it, we take the fixed
3014 # subset up until last_line, this assumes that the number of lines
3015 # does not change in this multiline line.
3016 changed_lines = len(fixed_subsource)
3017 if (start_lines[end_log] != end_lines[end_log]
3018 and end_lines[end_log] > last_line):
3019 after_end = end_lines[end_log] - last_line
3020 fixed_subsource = (fixed_subsource[:-after_end] +
3021 source[sl][-after_end:])
3022 changed_lines -= after_end
3023
3024 options.line_range[1] = (options.line_range[0] +
3025 changed_lines - 1)
3026
3027 return (source[:start_lines[start_log]] +
3028 fixed_subsource +
3029 source[end_lines[end_log] + 1:])
3030
3031 def is_continued_stmt(line,
3032 continued_stmts=frozenset(['else', 'elif',
3033 'finally', 'except'])):
3034 return re.split('[ :]', line.strip(), 1)[0] in continued_stmts
3035
3036 assert options.line_range
3037 start, end = options.line_range
3038 start -= 1
3039 end -= 1
3040 last_line = end # We shouldn't modify lines after this cut-off.
3041
3042 try:
3043 logical = _find_logical(source)
3044 except (SyntaxError, tokenize.TokenError):
3045 return ''.join(source)
3046
3047 if not logical[0]:
3048 # Just blank lines, this should imply that it will become '\n' ?
3049 return apply_global_fixes(source, options)
3050
3051 start_lines, indents = zip(*logical[0])
3052 end_lines, _ = zip(*logical[1])
3053
3054 source = source.splitlines(True)
3055
3056 start_log, start = find_ge(start_lines, start)
3057 end_log, end = find_le(start_lines, end)
3058
3059 # Look behind one line, if it's indented less than current indent
3060 # then we can move to this previous line knowing that its
3061 # indentation level will not be changed.
3062 if (start_log > 0
3063 and indents[start_log - 1] < indents[start_log]
3064 and not is_continued_stmt(source[start_log - 1])):
3065 start_log -= 1
3066 start = start_lines[start_log]
3067
3068 while start < end:
3069
3070 if is_continued_stmt(source[start]):
3071 start_log += 1
3072 start = start_lines[start_log]
3073 continue
3074
3075 ind = indents[start_log]
3076 for t in itertools.takewhile(lambda t: t[1][1] >= ind,
3077 enumerate(logical[0][start_log:])):
3078 n_log, n = start_log + t[0], t[1][0]
3079 # start shares indent up to n.
3080
3081 if n <= end:
3082 source = local_fix(source, start_log, n_log,
3083 start_lines, end_lines,
3084 indents, last_line)
3085 start_log = n_log if n == end else n_log + 1
3086 start = start_lines[start_log]
3087 continue
3088
3089 else:
3090 # Look at the line after end and see if allows us to reindent.
3091 after_end_log, after_end = find_ge(start_lines, end + 1)
3092
3093 if indents[after_end_log] > indents[start_log]:
3094 start_log, start = find_ge(start_lines, start + 1)
3095 continue
3096
3097 if (indents[after_end_log] == indents[start_log]
3098 and is_continued_stmt(source[after_end])):
3099 # find n, the beginning of the last continued statement
3100 # Apply fix to previous block if there is one.
3101 only_block = True
3102 for n, n_ind in logical[0][start_log:end_log + 1][::-1]:
3103 if n_ind == ind and not is_continued_stmt(source[n]):
3104 n_log = start_lines.index(n)
3105 source = local_fix(source, start_log, n_log - 1,
3106 start_lines, end_lines,
3107 indents, last_line)
3108 start_log = n_log + 1
3109 start = start_lines[start_log]
3110 only_block = False
3111 break
3112 if only_block:
3113 end_log, end = find_le(start_lines, end - 1)
3114 continue
3115
3116 source = local_fix(source, start_log, end_log,
3117 start_lines, end_lines,
3118 indents, last_line)
3119 break
3120
3121 return ''.join(source)
3122
3123
3124 def extract_code_from_function(function):
3125 """Return code handled by function."""
3126 if not function.__name__.startswith('fix_'):
3127 return None
3128
3129 code = re.sub('^fix_', '', function.__name__)
3130 if not code:
3131 return None
3132
3133 try:
3134 int(code[1:])
3135 except ValueError:
3136 return None
3137
3138 return code
3139
3140
3141 def create_parser():
3142 """Return command-line parser."""
3143 # Do import locally to be friendly to those who use autopep8 as a library
3144 # and are supporting Python 2.6.
3145 import argparse
3146
3147 parser = argparse.ArgumentParser(description=docstring_summary(__doc__),
3148 prog='autopep8')
3149 parser.add_argument('--version', action='version',
3150 version='%(prog)s ' + __version__)
3151 parser.add_argument('-v', '--verbose', action='count', dest='verbose',
3152 default=0,
3153 help='print verbose messages; '
3154 'multiple -v result in more verbose messages')
3155 parser.add_argument('-d', '--diff', action='store_true', dest='diff',
3156 help='print the diff for the fixed source')
3157 parser.add_argument('-i', '--in-place', action='store_true',
3158 help='make changes to files in place')
3159 parser.add_argument('-r', '--recursive', action='store_true',
3160 help='run recursively over directories; '
3161 'must be used with --in-place or --diff')
3162 parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1,
3163 help='number of parallel jobs; '
3164 'match CPU count if value is less than 1')
3165 parser.add_argument('-p', '--pep8-passes', metavar='n',
3166 default=-1, type=int,
3167 help='maximum number of additional pep8 passes '
3168 '(default: infinite)')
3169 parser.add_argument('-a', '--aggressive', action='count', default=0,
3170 help='enable non-whitespace changes; '
3171 'multiple -a result in more aggressive changes')
3172 parser.add_argument('--experimental', action='store_true',
3173 help='enable experimental fixes')
3174 parser.add_argument('--exclude', metavar='globs',
3175 help='exclude file/directory names that match these '
3176 'comma-separated globs')
3177 parser.add_argument('--list-fixes', action='store_true',
3178 help='list codes for fixes; '
3179 'used by --ignore and --select')
3180 parser.add_argument('--ignore', metavar='errors', default='',
3181 help='do not fix these errors/warnings '
3182 '(default: {0})'.format(DEFAULT_IGNORE))
3183 parser.add_argument('--select', metavar='errors', default='',
3184 help='fix only these errors/warnings (e.g. E4,W)')
3185 parser.add_argument('--max-line-length', metavar='n', default=79, type=int,
3186 help='set maximum allowed line length '
3187 '(default: %(default)s)')
3188 parser.add_argument('--range', metavar='line', dest='line_range',
3189 default=None, type=int, nargs=2,
3190 help='only fix errors found within this inclusive '
3191 'range of line numbers (e.g. 1 99); '
3192 'line numbers are indexed at 1')
3193 parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE,
3194 type=int, metavar='n',
3195 help='number of spaces per indent level '
3196 '(default %(default)s)')
3197 parser.add_argument('files', nargs='*',
3198 help="files to format or '-' for standard in")
3199
3200 return parser
3201
3202
3203 def parse_args(arguments):
3204 """Parse command-line options."""
3205 parser = create_parser()
3206 args = parser.parse_args(arguments)
3207
3208 if not args.files and not args.list_fixes:
3209 parser.error('incorrect number of arguments')
3210
3211 args.files = [decode_filename(name) for name in args.files]
3212
3213 if '-' in args.files:
3214 if len(args.files) > 1:
3215 parser.error('cannot mix stdin and regular files')
3216
3217 if args.diff:
3218 parser.error('--diff cannot be used with standard input')
3219
3220 if args.in_place:
3221 parser.error('--in-place cannot be used with standard input')
3222
3223 if args.recursive:
3224 parser.error('--recursive cannot be used with standard input')
3225
3226 if len(args.files) > 1 and not (args.in_place or args.diff):
3227 parser.error('autopep8 only takes one filename as argument '
3228 'unless the "--in-place" or "--diff" args are '
3229 'used')
3230
3231 if args.recursive and not (args.in_place or args.diff):
3232 parser.error('--recursive must be used with --in-place or --diff')
3233
3234 if args.exclude and not args.recursive:
3235 parser.error('--exclude is only relevant when used with --recursive')
3236
3237 if args.in_place and args.diff:
3238 parser.error('--in-place and --diff are mutually exclusive')
3239
3240 if args.max_line_length <= 0:
3241 parser.error('--max-line-length must be greater than 0')
3242
3243 if args.select:
3244 args.select = args.select.split(',')
3245
3246 if args.ignore:
3247 args.ignore = args.ignore.split(',')
3248 elif not args.select:
3249 if args.aggressive:
3250 # Enable everything by default if aggressive.
3251 args.select = ['E', 'W']
3252 else:
3253 args.ignore = DEFAULT_IGNORE.split(',')
3254
3255 if args.exclude:
3256 args.exclude = args.exclude.split(',')
3257 else:
3258 args.exclude = []
3259
3260 if args.jobs < 1:
3261 # Do not import multiprocessing globally in case it is not supported
3262 # on the platform.
3263 import multiprocessing
3264 args.jobs = multiprocessing.cpu_count()
3265
3266 if args.jobs > 1 and not args.in_place:
3267 parser.error('parallel jobs requires --in-place')
3268
3269 if args.line_range:
3270 if args.line_range[0] <= 0:
3271 parser.error('--range must be positive numbers')
3272 if args.line_range[0] > args.line_range[1]:
3273 parser.error('First value of --range should be less than or equal '
3274 'to the second')
3275
3276 return args
3277
3278
3279 def decode_filename(filename):
3280 """Return Unicode filename."""
3281 if isinstance(filename, unicode):
3282 return filename
3283 else:
3284 return filename.decode(sys.getfilesystemencoding())
3285
3286
3287 def supported_fixes():
3288 """Yield pep8 error codes that autopep8 fixes.
3289
3290 Each item we yield is a tuple of the code followed by its
3291 description.
3292
3293 """
3294 yield ('E101', docstring_summary(reindent.__doc__))
3295
3296 instance = FixPEP8(filename=None, options=None, contents='')
3297 for attribute in dir(instance):
3298 code = re.match('fix_([ew][0-9][0-9][0-9])', attribute)
3299 if code:
3300 yield (
3301 code.group(1).upper(),
3302 re.sub(r'\s+', ' ',
3303 docstring_summary(getattr(instance, attribute).__doc__))
3304 )
3305
3306 for (code, function) in sorted(global_fixes()):
3307 yield (code.upper() + (4 - len(code)) * ' ',
3308 re.sub(r'\s+', ' ', docstring_summary(function.__doc__)))
3309
3310 for code in sorted(CODE_TO_2TO3):
3311 yield (code.upper() + (4 - len(code)) * ' ',
3312 re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__)))
3313
3314
3315 def docstring_summary(docstring):
3316 """Return summary of docstring."""
3317 return docstring.split('\n')[0]
3318
3319
3320 def line_shortening_rank(candidate, indent_word, max_line_length,
3321 experimental=False):
3322 """Return rank of candidate.
3323
3324 This is for sorting candidates.
3325
3326 """
3327 if not candidate.strip():
3328 return 0
3329
3330 rank = 0
3331 lines = candidate.split('\n')
3332
3333 offset = 0
3334 if (
3335 not lines[0].lstrip().startswith('#') and
3336 lines[0].rstrip()[-1] not in '([{'
3337 ):
3338 for (opening, closing) in ('()', '[]', '{}'):
3339 # Don't penalize empty containers that aren't split up. Things like
3340 # this "foo(\n )" aren't particularly good.
3341 opening_loc = lines[0].find(opening)
3342 closing_loc = lines[0].find(closing)
3343 if opening_loc >= 0:
3344 if closing_loc < 0 or closing_loc != opening_loc + 1:
3345 offset = max(offset, 1 + opening_loc)
3346
3347 current_longest = max(offset + len(x.strip()) for x in lines)
3348
3349 rank += 4 * max(0, current_longest - max_line_length)
3350
3351 rank += len(lines)
3352
3353 # Too much variation in line length is ugly.
3354 rank += 2 * standard_deviation(len(line) for line in lines)
3355
3356 bad_staring_symbol = {
3357 '(': ')',
3358 '[': ']',
3359 '{': '}'}.get(lines[0][-1])
3360
3361 if len(lines) > 1:
3362 if (
3363 bad_staring_symbol and
3364 lines[1].lstrip().startswith(bad_staring_symbol)
3365 ):
3366 rank += 20
3367
3368 for lineno, current_line in enumerate(lines):
3369 current_line = current_line.strip()
3370
3371 if current_line.startswith('#'):
3372 continue
3373
3374 for bad_start in ['.', '%', '+', '-', '/']:
3375 if current_line.startswith(bad_start):
3376 rank += 100
3377
3378 # Do not tolerate operators on their own line.
3379 if current_line == bad_start:
3380 rank += 1000
3381
3382 if current_line.endswith(('(', '[', '{', '.')):
3383 # Avoid lonely opening. They result in longer lines.
3384 if len(current_line) <= len(indent_word):
3385 rank += 100
3386
3387 # Avoid the ugliness of ", (\n".
3388 if (
3389 current_line.endswith('(') and
3390 current_line[:-1].rstrip().endswith(',')
3391 ):
3392 rank += 100
3393
3394 # Also avoid the ugliness of "foo.\nbar"
3395 if current_line.endswith('.'):
3396 rank += 100
3397
3398 if has_arithmetic_operator(current_line):
3399 rank += 100
3400
3401 if current_line.endswith(('%', '(', '[', '{')):
3402 rank -= 20
3403
3404 # Try to break list comprehensions at the "for".
3405 if current_line.startswith('for '):
3406 rank -= 50
3407
3408 if current_line.endswith('\\'):
3409 # If a line ends in \-newline, it may be part of a
3410 # multiline string. In that case, we would like to know
3411 # how long that line is without the \-newline. If it's
3412 # longer than the maximum, or has comments, then we assume
3413 # that the \-newline is an okay candidate and only
3414 # penalize it a bit.
3415 total_len = len(current_line)
3416 lineno += 1
3417 while lineno < len(lines):
3418 total_len += len(lines[lineno])
3419
3420 if lines[lineno].lstrip().startswith('#'):
3421 total_len = max_line_length
3422 break
3423
3424 if not lines[lineno].endswith('\\'):
3425 break
3426
3427 lineno += 1
3428
3429 if total_len < max_line_length:
3430 rank += 10
3431 else:
3432 rank += 100 if experimental else 1
3433
3434 # Prefer breaking at commas rather than colon.
3435 if ',' in current_line and current_line.endswith(':'):
3436 rank += 10
3437
3438 rank += 10 * count_unbalanced_brackets(current_line)
3439
3440 return max(0, rank)
3441
3442
3443 def standard_deviation(numbers):
3444 """Return standard devation."""
3445 numbers = list(numbers)
3446 if not numbers:
3447 return 0
3448 mean = sum(numbers) / len(numbers)
3449 return (sum((n - mean) ** 2 for n in numbers) /
3450 len(numbers)) ** .5
3451
3452
3453 def has_arithmetic_operator(line):
3454 """Return True if line contains any arithmetic operators."""
3455 for operator in pep8.ARITHMETIC_OP:
3456 if operator in line:
3457 return True
3458
3459 return False
3460
3461
3462 def count_unbalanced_brackets(line):
3463 """Return number of unmatched open/close brackets."""
3464 count = 0
3465 for opening, closing in ['()', '[]', '{}']:
3466 count += abs(line.count(opening) - line.count(closing))
3467
3468 return count
3469
3470
3471 def split_at_offsets(line, offsets):
3472 """Split line at offsets.
3473
3474 Return list of strings.
3475
3476 """
3477 result = []
3478
3479 previous_offset = 0
3480 current_offset = 0
3481 for current_offset in sorted(offsets):
3482 if current_offset < len(line) and previous_offset != current_offset:
3483 result.append(line[previous_offset:current_offset].strip())
3484 previous_offset = current_offset
3485
3486 result.append(line[current_offset:])
3487
3488 return result
3489
3490
3491 class LineEndingWrapper(object):
3492
3493 r"""Replace line endings to work with sys.stdout.
3494
3495 It seems that sys.stdout expects only '\n' as the line ending, no matter
3496 the platform. Otherwise, we get repeated line endings.
3497
3498 """
3499
3500 def __init__(self, output):
3501 self.__output = output
3502
3503 def write(self, s):
3504 self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n'))
3505
3506 def flush(self):
3507 self.__output.flush()
3508
3509
3510 def match_file(filename, exclude):
3511 """Return True if file is okay for modifying/recursing."""
3512 base_name = os.path.basename(filename)
3513
3514 if base_name.startswith('.'):
3515 return False
3516
3517 for pattern in exclude:
3518 if fnmatch.fnmatch(base_name, pattern):
3519 return False
3520
3521 if not os.path.isdir(filename) and not is_python_file(filename):
3522 return False
3523
3524 return True
3525
3526
3527 def find_files(filenames, recursive, exclude):
3528 """Yield filenames."""
3529 while filenames:
3530 name = filenames.pop(0)
3531 if recursive and os.path.isdir(name):
3532 for root, directories, children in os.walk(name):
3533 filenames += [os.path.join(root, f) for f in children
3534 if match_file(os.path.join(root, f),
3535 exclude)]
3536 directories[:] = [d for d in directories
3537 if match_file(os.path.join(root, d),
3538 exclude)]
3539 else:
3540 yield name
3541
3542
3543 def _fix_file(parameters):
3544 """Helper function for optionally running fix_file() in parallel."""
3545 if parameters[1].verbose:
3546 print('[file:{0}]'.format(parameters[0]), file=sys.stderr)
3547 try:
3548 fix_file(*parameters)
3549 except IOError as error:
3550 print(unicode(error), file=sys.stderr)
3551
3552
3553 def fix_multiple_files(filenames, options, output=None):
3554 """Fix list of files.
3555
3556 Optionally fix files recursively.
3557
3558 """
3559 filenames = find_files(filenames, options.recursive, options.exclude)
3560 if options.jobs > 1:
3561 import multiprocessing
3562 pool = multiprocessing.Pool(options.jobs)
3563 pool.map(_fix_file,
3564 [(name, options) for name in filenames])
3565 else:
3566 for name in filenames:
3567 _fix_file((name, options, output))
3568
3569
3570 def is_python_file(filename):
3571 """Return True if filename is Python file."""
3572 if filename.endswith('.py'):
3573 return True
3574
3575 try:
3576 with open_with_encoding(filename) as f:
3577 first_line = f.readlines(1)[0]
3578 except (IOError, IndexError):
3579 return False
3580
3581 if not PYTHON_SHEBANG_REGEX.match(first_line):
3582 return False
3583
3584 return True
3585
3586
3587 def is_probably_part_of_multiline(line):
3588 """Return True if line is likely part of a multiline string.
3589
3590 When multiline strings are involved, pep8 reports the error as being
3591 at the start of the multiline string, which doesn't work for us.
3592
3593 """
3594 return (
3595 '"""' in line or
3596 "'''" in line or
3597 line.rstrip().endswith('\\')
3598 )
3599
3600
3601 def main():
3602 """Tool main."""
3603 try:
3604 # Exit on broken pipe.
3605 signal.signal(signal.SIGPIPE, signal.SIG_DFL)
3606 except AttributeError: # pragma: no cover
3607 # SIGPIPE is not available on Windows.
3608 pass
3609
3610 try:
3611 args = parse_args(sys.argv[1:])
3612
3613 if args.list_fixes:
3614 for code, description in sorted(supported_fixes()):
3615 print('{code} - {description}'.format(
3616 code=code, description=description))
3617 return 0
3618
3619 if args.files == ['-']:
3620 assert not args.in_place
3621
3622 # LineEndingWrapper is unnecessary here due to the symmetry between
3623 # standard in and standard out.
3624 sys.stdout.write(fix_code(sys.stdin.read(), args))
3625 else:
3626 if args.in_place or args.diff:
3627 args.files = list(set(args.files))
3628 else:
3629 assert len(args.files) == 1
3630 assert not args.recursive
3631
3632 fix_multiple_files(args.files, args, sys.stdout)
3633 except KeyboardInterrupt:
3634 return 1 # pragma: no cover
3635
3636
3637 class CachedTokenizer(object):
3638
3639 """A one-element cache around tokenize.generate_tokens().
3640
3641 Original code written by Ned Batchelder, in coverage.py.
3642
3643 """
3644
3645 def __init__(self):
3646 self.last_text = None
3647 self.last_tokens = None
3648
3649 def generate_tokens(self, text):
3650 """A stand-in for tokenize.generate_tokens()."""
3651 if text != self.last_text:
3652 string_io = io.StringIO(text)
3653 self.last_tokens = list(
3654 tokenize.generate_tokens(string_io.readline)
3655 )
3656 self.last_text = text
3657 return self.last_tokens
3658
3659 _cached_tokenizer = CachedTokenizer()
3660 generate_tokens = _cached_tokenizer.generate_tokens
3661
3662
3663 if __name__ == '__main__':
3664 sys.exit(main())
OLDNEW
« no previous file with comments | « Tools/Scripts/webkitpy/thirdparty/README.chromium ('k') | Tools/Scripts/webkitpy/thirdparty/pep8.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698