Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(153)

Side by Side Diff: third_party/pylint/checkers/similar.py

Issue 753543006: pylint: upgrade to 1.4.0 (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/pylint/checkers/python3.py ('k') | third_party/pylint/checkers/spelling.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # pylint: disable=W0622 1 # pylint: disable=W0622
2 # Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE). 2 # Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE).
3 # http://www.logilab.fr/ -- mailto:contact@logilab.fr 3 # http://www.logilab.fr/ -- mailto:contact@logilab.fr
4 # 4 #
5 # This program is free software; you can redistribute it and/or modify it under 5 # This program is free software; you can redistribute it and/or modify it under
6 # the terms of the GNU General Public License as published by the Free Software 6 # the terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later 7 # Foundation; either version 2 of the License, or (at your option) any later
8 # version. 8 # version.
9 # 9 #
10 # This program is distributed in the hope that it will be useful, but WITHOUT 10 # This program is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details 12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
13 # 13 #
14 # You should have received a copy of the GNU General Public License along with 14 # You should have received a copy of the GNU General Public License along with
15 # this program; if not, write to the Free Software Foundation, Inc., 15 # this program; if not, write to the Free Software Foundation, Inc.,
16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 """a similarities / code duplication command line tool and pylint checker 17 """a similarities / code duplication command line tool and pylint checker
18 """ 18 """
19 from __future__ import print_function
19 import sys 20 import sys
20 from itertools import izip 21 from collections import defaultdict
21 22
22 from logilab.common.ureports import Table 23 from logilab.common.ureports import Table
23 24
24 from pylint.interfaces import IRawChecker 25 from pylint.interfaces import IRawChecker
25 from pylint.checkers import BaseChecker, table_lines_from_stats 26 from pylint.checkers import BaseChecker, table_lines_from_stats
26 27
28 import six
29 from six.moves import zip
30
27 31
28 class Similar(object): 32 class Similar(object):
29 """finds copy-pasted lines of code in a project""" 33 """finds copy-pasted lines of code in a project"""
30 34
31 def __init__(self, min_lines=4, ignore_comments=False, 35 def __init__(self, min_lines=4, ignore_comments=False,
32 ignore_docstrings=False, ignore_imports=False): 36 ignore_docstrings=False, ignore_imports=False):
33 self.min_lines = min_lines 37 self.min_lines = min_lines
34 self.ignore_comments = ignore_comments 38 self.ignore_comments = ignore_comments
35 self.ignore_docstrings = ignore_docstrings 39 self.ignore_docstrings = ignore_docstrings
36 self.ignore_imports = ignore_imports 40 self.ignore_imports = ignore_imports
(...skipping 14 matching lines...) Expand all
51 self.ignore_imports)) 55 self.ignore_imports))
52 except UnicodeDecodeError: 56 except UnicodeDecodeError:
53 pass 57 pass
54 58
55 def run(self): 59 def run(self):
56 """start looking for similarities and display results on stdout""" 60 """start looking for similarities and display results on stdout"""
57 self._display_sims(self._compute_sims()) 61 self._display_sims(self._compute_sims())
58 62
59 def _compute_sims(self): 63 def _compute_sims(self):
60 """compute similarities in appended files""" 64 """compute similarities in appended files"""
61 no_duplicates = {} 65 no_duplicates = defaultdict(list)
62 for num, lineset1, idx1, lineset2, idx2 in self._iter_sims(): 66 for num, lineset1, idx1, lineset2, idx2 in self._iter_sims():
63 duplicate = no_duplicates.setdefault(num, []) 67 duplicate = no_duplicates[num]
64 for couples in duplicate: 68 for couples in duplicate:
65 if (lineset1, idx1) in couples or (lineset2, idx2) in couples: 69 if (lineset1, idx1) in couples or (lineset2, idx2) in couples:
66 couples.add((lineset1, idx1)) 70 couples.add((lineset1, idx1))
67 couples.add((lineset2, idx2)) 71 couples.add((lineset2, idx2))
68 break 72 break
69 else: 73 else:
70 duplicate.append(set([(lineset1, idx1), (lineset2, idx2)])) 74 duplicate.append(set([(lineset1, idx1), (lineset2, idx2)]))
71 sims = [] 75 sims = []
72 for num, ensembles in no_duplicates.iteritems(): 76 for num, ensembles in six.iteritems(no_duplicates):
73 for couples in ensembles: 77 for couples in ensembles:
74 sims.append((num, couples)) 78 sims.append((num, couples))
75 sims.sort() 79 sims.sort()
76 sims.reverse() 80 sims.reverse()
77 return sims 81 return sims
78 82
79 def _display_sims(self, sims): 83 def _display_sims(self, sims):
80 """display computed similarities on stdout""" 84 """display computed similarities on stdout"""
81 nb_lignes_dupliquees = 0 85 nb_lignes_dupliquees = 0
82 for num, couples in sims: 86 for num, couples in sims:
83 print 87 print()
84 print num, "similar lines in", len(couples), "files" 88 print(num, "similar lines in", len(couples), "files")
85 couples = sorted(couples) 89 couples = sorted(couples)
86 for lineset, idx in couples: 90 for lineset, idx in couples:
87 print "==%s:%s" % (lineset.name, idx) 91 print("==%s:%s" % (lineset.name, idx))
88 # pylint: disable=W0631 92 # pylint: disable=W0631
89 for line in lineset._real_lines[idx:idx+num]: 93 for line in lineset._real_lines[idx:idx+num]:
90 print " ", line.rstrip() 94 print(" ", line.rstrip())
91 nb_lignes_dupliquees += num * (len(couples)-1) 95 nb_lignes_dupliquees += num * (len(couples)-1)
92 nb_total_lignes = sum([len(lineset) for lineset in self.linesets]) 96 nb_total_lignes = sum([len(lineset) for lineset in self.linesets])
93 print "TOTAL lines=%s duplicates=%s percent=%.2f" \ 97 print("TOTAL lines=%s duplicates=%s percent=%.2f" \
94 % (nb_total_lignes, nb_lignes_dupliquees, 98 % (nb_total_lignes, nb_lignes_dupliquees,
95 nb_lignes_dupliquees*100. / nb_total_lignes) 99 nb_lignes_dupliquees*100. / nb_total_lignes))
96 100
97 def _find_common(self, lineset1, lineset2): 101 def _find_common(self, lineset1, lineset2):
98 """find similarities in the two given linesets""" 102 """find similarities in the two given linesets"""
99 lines1 = lineset1.enumerate_stripped 103 lines1 = lineset1.enumerate_stripped
100 lines2 = lineset2.enumerate_stripped 104 lines2 = lineset2.enumerate_stripped
101 find = lineset2.find 105 find = lineset2.find
102 index1 = 0 106 index1 = 0
103 min_lines = self.min_lines 107 min_lines = self.min_lines
104 while index1 < len(lineset1): 108 while index1 < len(lineset1):
105 skip = 1 109 skip = 1
106 num = 0 110 num = 0
107 for index2 in find(lineset1[index1]): 111 for index2 in find(lineset1[index1]):
108 non_blank = 0 112 non_blank = 0
109 for num, ((_, line1), (_, line2)) in enumerate( 113 for num, ((_, line1), (_, line2)) in enumerate(
110 izip(lines1(index1), lines2(index2))): 114 zip(lines1(index1), lines2(index2))):
111 if line1 != line2: 115 if line1 != line2:
112 if non_blank > min_lines: 116 if non_blank > min_lines:
113 yield num, lineset1, index1, lineset2, index2 117 yield num, lineset1, index1, lineset2, index2
114 skip = max(skip, num) 118 skip = max(skip, num)
115 break 119 break
116 if line1: 120 if line1:
117 non_blank += 1 121 non_blank += 1
118 else: 122 else:
119 # we may have reach the end 123 # we may have reach the end
120 num += 1 124 num += 1
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
200 #if line: 204 #if line:
201 yield idx, line 205 yield idx, line
202 idx += 1 206 idx += 1
203 207
204 def find(self, stripped_line): 208 def find(self, stripped_line):
205 """return positions of the given stripped line in this set""" 209 """return positions of the given stripped line in this set"""
206 return self._index.get(stripped_line, ()) 210 return self._index.get(stripped_line, ())
207 211
208 def _mk_index(self): 212 def _mk_index(self):
209 """create the index for this set""" 213 """create the index for this set"""
210 index = {} 214 index = defaultdict(list)
211 for line_no, line in enumerate(self._stripped_lines): 215 for line_no, line in enumerate(self._stripped_lines):
212 if line: 216 if line:
213 index.setdefault(line, []).append(line_no) 217 index[line].append(line_no)
214 return index 218 return index
215 219
216 220
217 MSGS = {'R0801': ('Similar lines in %s files\n%s', 221 MSGS = {'R0801': ('Similar lines in %s files\n%s',
218 'duplicate-code', 222 'duplicate-code',
219 'Indicates that a set of similar lines has been detected \ 223 'Indicates that a set of similar lines has been detected \
220 among multiple file. This usually means that the code should \ 224 among multiple file. This usually means that the code should \
221 be refactored to avoid this duplication.')} 225 be refactored to avoid this duplication.')}
222 226
223 def report_similarities(sect, stats, old_stats): 227 def report_similarities(sect, stats, old_stats):
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
316 stats['nb_duplicated_lines'] = duplicated 320 stats['nb_duplicated_lines'] = duplicated
317 stats['percent_duplicated_lines'] = total and duplicated * 100. / total 321 stats['percent_duplicated_lines'] = total and duplicated * 100. / total
318 322
319 323
320 def register(linter): 324 def register(linter):
321 """required method to auto register this checker """ 325 """required method to auto register this checker """
322 linter.register_checker(SimilarChecker(linter)) 326 linter.register_checker(SimilarChecker(linter))
323 327
324 def usage(status=0): 328 def usage(status=0):
325 """display command line usage information""" 329 """display command line usage information"""
326 print "finds copy pasted blocks in a set of files" 330 print("finds copy pasted blocks in a set of files")
327 print 331 print()
328 print 'Usage: symilar [-d|--duplicates min_duplicated_lines] \ 332 print('Usage: symilar [-d|--duplicates min_duplicated_lines] \
329 [-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...' 333 [-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...')
330 sys.exit(status) 334 sys.exit(status)
331 335
332 def Run(argv=None): 336 def Run(argv=None):
333 """standalone command line access point""" 337 """standalone command line access point"""
334 if argv is None: 338 if argv is None:
335 argv = sys.argv[1:] 339 argv = sys.argv[1:]
336 from getopt import getopt 340 from getopt import getopt
337 s_opts = 'hdi' 341 s_opts = 'hdi'
338 l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports', 342 l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',
339 'ignore-docstrings') 343 'ignore-docstrings')
(...skipping 16 matching lines...) Expand all
356 if not args: 360 if not args:
357 usage(1) 361 usage(1)
358 sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports) 362 sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)
359 for filename in args: 363 for filename in args:
360 sim.append_stream(filename, open(filename)) 364 sim.append_stream(filename, open(filename))
361 sim.run() 365 sim.run()
362 sys.exit(0) 366 sys.exit(0)
363 367
364 if __name__ == '__main__': 368 if __name__ == '__main__':
365 Run() 369 Run()
OLDNEW
« no previous file with comments | « third_party/pylint/checkers/python3.py ('k') | third_party/pylint/checkers/spelling.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698