OLD | NEW |
1 # pylint: disable=W0622 | 1 # pylint: disable=W0622 |
2 # Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE). | 2 # Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE). |
3 # http://www.logilab.fr/ -- mailto:contact@logilab.fr | 3 # http://www.logilab.fr/ -- mailto:contact@logilab.fr |
4 # | 4 # |
5 # This program is free software; you can redistribute it and/or modify it under | 5 # This program is free software; you can redistribute it and/or modify it under |
6 # the terms of the GNU General Public License as published by the Free Software | 6 # the terms of the GNU General Public License as published by the Free Software |
7 # Foundation; either version 2 of the License, or (at your option) any later | 7 # Foundation; either version 2 of the License, or (at your option) any later |
8 # version. | 8 # version. |
9 # | 9 # |
10 # This program is distributed in the hope that it will be useful, but WITHOUT | 10 # This program is distributed in the hope that it will be useful, but WITHOUT |
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details | 12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details |
13 # | 13 # |
14 # You should have received a copy of the GNU General Public License along with | 14 # You should have received a copy of the GNU General Public License along with |
15 # this program; if not, write to the Free Software Foundation, Inc., | 15 # this program; if not, write to the Free Software Foundation, Inc., |
16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | 16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
17 """a similarities / code duplication command line tool and pylint checker | 17 """a similarities / code duplication command line tool and pylint checker |
18 """ | 18 """ |
| 19 from __future__ import print_function |
19 import sys | 20 import sys |
20 from itertools import izip | 21 from collections import defaultdict |
21 | 22 |
22 from logilab.common.ureports import Table | 23 from logilab.common.ureports import Table |
23 | 24 |
24 from pylint.interfaces import IRawChecker | 25 from pylint.interfaces import IRawChecker |
25 from pylint.checkers import BaseChecker, table_lines_from_stats | 26 from pylint.checkers import BaseChecker, table_lines_from_stats |
26 | 27 |
| 28 import six |
| 29 from six.moves import zip |
| 30 |
27 | 31 |
28 class Similar(object): | 32 class Similar(object): |
29 """finds copy-pasted lines of code in a project""" | 33 """finds copy-pasted lines of code in a project""" |
30 | 34 |
31 def __init__(self, min_lines=4, ignore_comments=False, | 35 def __init__(self, min_lines=4, ignore_comments=False, |
32 ignore_docstrings=False, ignore_imports=False): | 36 ignore_docstrings=False, ignore_imports=False): |
33 self.min_lines = min_lines | 37 self.min_lines = min_lines |
34 self.ignore_comments = ignore_comments | 38 self.ignore_comments = ignore_comments |
35 self.ignore_docstrings = ignore_docstrings | 39 self.ignore_docstrings = ignore_docstrings |
36 self.ignore_imports = ignore_imports | 40 self.ignore_imports = ignore_imports |
(...skipping 14 matching lines...) Expand all Loading... |
51 self.ignore_imports)) | 55 self.ignore_imports)) |
52 except UnicodeDecodeError: | 56 except UnicodeDecodeError: |
53 pass | 57 pass |
54 | 58 |
55 def run(self): | 59 def run(self): |
56 """start looking for similarities and display results on stdout""" | 60 """start looking for similarities and display results on stdout""" |
57 self._display_sims(self._compute_sims()) | 61 self._display_sims(self._compute_sims()) |
58 | 62 |
59 def _compute_sims(self): | 63 def _compute_sims(self): |
60 """compute similarities in appended files""" | 64 """compute similarities in appended files""" |
61 no_duplicates = {} | 65 no_duplicates = defaultdict(list) |
62 for num, lineset1, idx1, lineset2, idx2 in self._iter_sims(): | 66 for num, lineset1, idx1, lineset2, idx2 in self._iter_sims(): |
63 duplicate = no_duplicates.setdefault(num, []) | 67 duplicate = no_duplicates[num] |
64 for couples in duplicate: | 68 for couples in duplicate: |
65 if (lineset1, idx1) in couples or (lineset2, idx2) in couples: | 69 if (lineset1, idx1) in couples or (lineset2, idx2) in couples: |
66 couples.add((lineset1, idx1)) | 70 couples.add((lineset1, idx1)) |
67 couples.add((lineset2, idx2)) | 71 couples.add((lineset2, idx2)) |
68 break | 72 break |
69 else: | 73 else: |
70 duplicate.append(set([(lineset1, idx1), (lineset2, idx2)])) | 74 duplicate.append(set([(lineset1, idx1), (lineset2, idx2)])) |
71 sims = [] | 75 sims = [] |
72 for num, ensembles in no_duplicates.iteritems(): | 76 for num, ensembles in six.iteritems(no_duplicates): |
73 for couples in ensembles: | 77 for couples in ensembles: |
74 sims.append((num, couples)) | 78 sims.append((num, couples)) |
75 sims.sort() | 79 sims.sort() |
76 sims.reverse() | 80 sims.reverse() |
77 return sims | 81 return sims |
78 | 82 |
79 def _display_sims(self, sims): | 83 def _display_sims(self, sims): |
80 """display computed similarities on stdout""" | 84 """display computed similarities on stdout""" |
81 nb_lignes_dupliquees = 0 | 85 nb_lignes_dupliquees = 0 |
82 for num, couples in sims: | 86 for num, couples in sims: |
83 print | 87 print() |
84 print num, "similar lines in", len(couples), "files" | 88 print(num, "similar lines in", len(couples), "files") |
85 couples = sorted(couples) | 89 couples = sorted(couples) |
86 for lineset, idx in couples: | 90 for lineset, idx in couples: |
87 print "==%s:%s" % (lineset.name, idx) | 91 print("==%s:%s" % (lineset.name, idx)) |
88 # pylint: disable=W0631 | 92 # pylint: disable=W0631 |
89 for line in lineset._real_lines[idx:idx+num]: | 93 for line in lineset._real_lines[idx:idx+num]: |
90 print " ", line.rstrip() | 94 print(" ", line.rstrip()) |
91 nb_lignes_dupliquees += num * (len(couples)-1) | 95 nb_lignes_dupliquees += num * (len(couples)-1) |
92 nb_total_lignes = sum([len(lineset) for lineset in self.linesets]) | 96 nb_total_lignes = sum([len(lineset) for lineset in self.linesets]) |
93 print "TOTAL lines=%s duplicates=%s percent=%.2f" \ | 97 print("TOTAL lines=%s duplicates=%s percent=%.2f" \ |
94 % (nb_total_lignes, nb_lignes_dupliquees, | 98 % (nb_total_lignes, nb_lignes_dupliquees, |
95 nb_lignes_dupliquees*100. / nb_total_lignes) | 99 nb_lignes_dupliquees*100. / nb_total_lignes)) |
96 | 100 |
97 def _find_common(self, lineset1, lineset2): | 101 def _find_common(self, lineset1, lineset2): |
98 """find similarities in the two given linesets""" | 102 """find similarities in the two given linesets""" |
99 lines1 = lineset1.enumerate_stripped | 103 lines1 = lineset1.enumerate_stripped |
100 lines2 = lineset2.enumerate_stripped | 104 lines2 = lineset2.enumerate_stripped |
101 find = lineset2.find | 105 find = lineset2.find |
102 index1 = 0 | 106 index1 = 0 |
103 min_lines = self.min_lines | 107 min_lines = self.min_lines |
104 while index1 < len(lineset1): | 108 while index1 < len(lineset1): |
105 skip = 1 | 109 skip = 1 |
106 num = 0 | 110 num = 0 |
107 for index2 in find(lineset1[index1]): | 111 for index2 in find(lineset1[index1]): |
108 non_blank = 0 | 112 non_blank = 0 |
109 for num, ((_, line1), (_, line2)) in enumerate( | 113 for num, ((_, line1), (_, line2)) in enumerate( |
110 izip(lines1(index1), lines2(index2))): | 114 zip(lines1(index1), lines2(index2))): |
111 if line1 != line2: | 115 if line1 != line2: |
112 if non_blank > min_lines: | 116 if non_blank > min_lines: |
113 yield num, lineset1, index1, lineset2, index2 | 117 yield num, lineset1, index1, lineset2, index2 |
114 skip = max(skip, num) | 118 skip = max(skip, num) |
115 break | 119 break |
116 if line1: | 120 if line1: |
117 non_blank += 1 | 121 non_blank += 1 |
118 else: | 122 else: |
119 # we may have reach the end | 123 # we may have reach the end |
120 num += 1 | 124 num += 1 |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
200 #if line: | 204 #if line: |
201 yield idx, line | 205 yield idx, line |
202 idx += 1 | 206 idx += 1 |
203 | 207 |
204 def find(self, stripped_line): | 208 def find(self, stripped_line): |
205 """return positions of the given stripped line in this set""" | 209 """return positions of the given stripped line in this set""" |
206 return self._index.get(stripped_line, ()) | 210 return self._index.get(stripped_line, ()) |
207 | 211 |
208 def _mk_index(self): | 212 def _mk_index(self): |
209 """create the index for this set""" | 213 """create the index for this set""" |
210 index = {} | 214 index = defaultdict(list) |
211 for line_no, line in enumerate(self._stripped_lines): | 215 for line_no, line in enumerate(self._stripped_lines): |
212 if line: | 216 if line: |
213 index.setdefault(line, []).append(line_no) | 217 index[line].append(line_no) |
214 return index | 218 return index |
215 | 219 |
216 | 220 |
217 MSGS = {'R0801': ('Similar lines in %s files\n%s', | 221 MSGS = {'R0801': ('Similar lines in %s files\n%s', |
218 'duplicate-code', | 222 'duplicate-code', |
219 'Indicates that a set of similar lines has been detected \ | 223 'Indicates that a set of similar lines has been detected \ |
220 among multiple file. This usually means that the code should \ | 224 among multiple file. This usually means that the code should \ |
221 be refactored to avoid this duplication.')} | 225 be refactored to avoid this duplication.')} |
222 | 226 |
223 def report_similarities(sect, stats, old_stats): | 227 def report_similarities(sect, stats, old_stats): |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
316 stats['nb_duplicated_lines'] = duplicated | 320 stats['nb_duplicated_lines'] = duplicated |
317 stats['percent_duplicated_lines'] = total and duplicated * 100. / total | 321 stats['percent_duplicated_lines'] = total and duplicated * 100. / total |
318 | 322 |
319 | 323 |
320 def register(linter): | 324 def register(linter): |
321 """required method to auto register this checker """ | 325 """required method to auto register this checker """ |
322 linter.register_checker(SimilarChecker(linter)) | 326 linter.register_checker(SimilarChecker(linter)) |
323 | 327 |
324 def usage(status=0): | 328 def usage(status=0): |
325 """display command line usage information""" | 329 """display command line usage information""" |
326 print "finds copy pasted blocks in a set of files" | 330 print("finds copy pasted blocks in a set of files") |
327 print | 331 print() |
328 print 'Usage: symilar [-d|--duplicates min_duplicated_lines] \ | 332 print('Usage: symilar [-d|--duplicates min_duplicated_lines] \ |
329 [-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...' | 333 [-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...') |
330 sys.exit(status) | 334 sys.exit(status) |
331 | 335 |
332 def Run(argv=None): | 336 def Run(argv=None): |
333 """standalone command line access point""" | 337 """standalone command line access point""" |
334 if argv is None: | 338 if argv is None: |
335 argv = sys.argv[1:] | 339 argv = sys.argv[1:] |
336 from getopt import getopt | 340 from getopt import getopt |
337 s_opts = 'hdi' | 341 s_opts = 'hdi' |
338 l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports', | 342 l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports', |
339 'ignore-docstrings') | 343 'ignore-docstrings') |
(...skipping 16 matching lines...) Expand all Loading... |
356 if not args: | 360 if not args: |
357 usage(1) | 361 usage(1) |
358 sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports) | 362 sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports) |
359 for filename in args: | 363 for filename in args: |
360 sim.append_stream(filename, open(filename)) | 364 sim.append_stream(filename, open(filename)) |
361 sim.run() | 365 sim.run() |
362 sys.exit(0) | 366 sys.exit(0) |
363 | 367 |
364 if __name__ == '__main__': | 368 if __name__ == '__main__': |
365 Run() | 369 Run() |
OLD | NEW |