Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: third_party/pylint/checkers/similar.py

Issue 719313003: Revert "pylint: upgrade to 1.3.1" (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/pylint/checkers/raw_metrics.py ('k') | third_party/pylint/checkers/stdlib.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # pylint: disable=W0622 1 # pylint: disable=W0622
2 # Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE). 2 # Copyright (c) 2004-2006 LOGILAB S.A. (Paris, FRANCE).
3 # http://www.logilab.fr/ -- mailto:contact@logilab.fr 3 # http://www.logilab.fr/ -- mailto:contact@logilab.fr
4 # 4 #
5 # This program is free software; you can redistribute it and/or modify it under 5 # This program is free software; you can redistribute it and/or modify it under
6 # the terms of the GNU General Public License as published by the Free Software 6 # the terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later 7 # Foundation; either version 2 of the License, or (at your option) any later
8 # version. 8 # version.
9 # 9 #
10 # This program is distributed in the hope that it will be useful, but WITHOUT 10 # This program is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details 12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
13 # 13 #
14 # You should have received a copy of the GNU General Public License along with 14 # You should have received a copy of the GNU General Public License along with
15 # this program; if not, write to the Free Software Foundation, Inc., 15 # this program; if not, write to the Free Software Foundation, Inc.,
16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 """a similarities / code duplication command line tool and pylint checker 17 """a similarities / code duplication command line tool and pylint checker
18 """ 18 """
19 from __future__ import generators
20
19 import sys 21 import sys
20 from itertools import izip 22 from itertools import izip
21 23
22 from logilab.common.ureports import Table 24 from logilab.common.ureports import Table
23 25
24 from pylint.interfaces import IRawChecker 26 from pylint.interfaces import IRawChecker
25 from pylint.checkers import BaseChecker, table_lines_from_stats 27 from pylint.checkers import BaseChecker, table_lines_from_stats
26 28
27 29
28 class Similar(object): 30 class Similar:
29 """finds copy-pasted lines of code in a project""" 31 """finds copy-pasted lines of code in a project"""
30 32
31 def __init__(self, min_lines=4, ignore_comments=False, 33 def __init__(self, min_lines=4, ignore_comments=False,
32 ignore_docstrings=False, ignore_imports=False): 34 ignore_docstrings=False):
33 self.min_lines = min_lines 35 self.min_lines = min_lines
34 self.ignore_comments = ignore_comments 36 self.ignore_comments = ignore_comments
35 self.ignore_docstrings = ignore_docstrings 37 self.ignore_docstrings = ignore_docstrings
36 self.ignore_imports = ignore_imports
37 self.linesets = [] 38 self.linesets = []
38 39
39 def append_stream(self, streamid, stream, encoding=None): 40 def append_stream(self, streamid, stream):
40 """append a file to search for similarities""" 41 """append a file to search for similarities"""
41 stream.seek(0) # XXX may be removed with astroid > 0.23 42 stream.seek(0) # XXX may be removed with astng > 0.23
42 if encoding is None: 43 self.linesets.append(LineSet(streamid,
43 readlines = stream.readlines 44 stream.readlines(),
44 else: 45 self.ignore_comments,
45 readlines = lambda: [line.decode(encoding) for line in stream] 46 self.ignore_docstrings))
46 try:
47 self.linesets.append(LineSet(streamid,
48 readlines(),
49 self.ignore_comments,
50 self.ignore_docstrings,
51 self.ignore_imports))
52 except UnicodeDecodeError:
53 pass
54 47
55 def run(self): 48 def run(self):
56 """start looking for similarities and display results on stdout""" 49 """start looking for similarities and display results on stdout"""
57 self._display_sims(self._compute_sims()) 50 self._display_sims(self._compute_sims())
58 51
59 def _compute_sims(self): 52 def _compute_sims(self):
60 """compute similarities in appended files""" 53 """compute similarities in appended files"""
61 no_duplicates = {} 54 no_duplicates = {}
62 for num, lineset1, idx1, lineset2, idx2 in self._iter_sims(): 55 for num, lineset1, idx1, lineset2, idx2 in self._iter_sims():
63 duplicate = no_duplicates.setdefault(num, []) 56 duplicate = no_duplicates.setdefault(num, [])
64 for couples in duplicate: 57 for couples in duplicate:
65 if (lineset1, idx1) in couples or (lineset2, idx2) in couples: 58 if (lineset1, idx1) in couples or (lineset2, idx2) in couples:
66 couples.add((lineset1, idx1)) 59 couples.add( (lineset1, idx1) )
67 couples.add((lineset2, idx2)) 60 couples.add( (lineset2, idx2) )
68 break 61 break
69 else: 62 else:
70 duplicate.append(set([(lineset1, idx1), (lineset2, idx2)])) 63 duplicate.append( set([(lineset1, idx1), (lineset2, idx2)]) )
71 sims = [] 64 sims = []
72 for num, ensembles in no_duplicates.iteritems(): 65 for num, ensembles in no_duplicates.iteritems():
73 for couples in ensembles: 66 for couples in ensembles:
74 sims.append((num, couples)) 67 sims.append( (num, couples) )
75 sims.sort() 68 sims.sort()
76 sims.reverse() 69 sims.reverse()
77 return sims 70 return sims
78 71
79 def _display_sims(self, sims): 72 def _display_sims(self, sims):
80 """display computed similarities on stdout""" 73 """display computed similarities on stdout"""
81 nb_lignes_dupliquees = 0 74 nb_lignes_dupliquees = 0
82 for num, couples in sims: 75 for num, couples in sims:
83 print 76 print
84 print num, "similar lines in", len(couples), "files" 77 print num, "similar lines in", len(couples), "files"
85 couples = sorted(couples) 78 couples = sorted(couples)
86 for lineset, idx in couples: 79 for lineset, idx in couples:
87 print "==%s:%s" % (lineset.name, idx) 80 print "==%s:%s" % (lineset.name, idx)
88 # pylint: disable=W0631 81 # pylint: disable=W0631
89 for line in lineset._real_lines[idx:idx+num]: 82 for line in lineset._real_lines[idx:idx+num]:
90 print " ", line.rstrip() 83 print " ", line,
91 nb_lignes_dupliquees += num * (len(couples)-1) 84 nb_lignes_dupliquees += num * (len(couples)-1)
92 nb_total_lignes = sum([len(lineset) for lineset in self.linesets]) 85 nb_total_lignes = sum([len(lineset) for lineset in self.linesets])
93 print "TOTAL lines=%s duplicates=%s percent=%.2f" \ 86 print "TOTAL lines=%s duplicates=%s percent=%.2f" \
94 % (nb_total_lignes, nb_lignes_dupliquees, 87 % (nb_total_lignes, nb_lignes_dupliquees,
95 nb_lignes_dupliquees*100. / nb_total_lignes) 88 nb_lignes_dupliquees*100. / nb_total_lignes)
96 89
97 def _find_common(self, lineset1, lineset2): 90 def _find_common(self, lineset1, lineset2):
98 """find similarities in the two given linesets""" 91 """find similarities in the two given linesets"""
99 lines1 = lineset1.enumerate_stripped 92 lines1 = lineset1.enumerate_stripped
100 lines2 = lineset2.enumerate_stripped 93 lines2 = lineset2.enumerate_stripped
101 find = lineset2.find 94 find = lineset2.find
102 index1 = 0 95 index1 = 0
103 min_lines = self.min_lines 96 min_lines = self.min_lines
104 while index1 < len(lineset1): 97 while index1 < len(lineset1):
105 skip = 1 98 skip = 1
106 num = 0 99 num = 0
107 for index2 in find(lineset1[index1]): 100 for index2 in find( lineset1[index1] ):
108 non_blank = 0 101 non_blank = 0
109 for num, ((_, line1), (_, line2)) in enumerate( 102 for num, ((_, line1), (_, line2)) in enumerate(
110 izip(lines1(index1), lines2(index2))): 103 izip(lines1(index1), lines2(index2))):
111 if line1 != line2: 104 if line1 != line2:
112 if non_blank > min_lines: 105 if non_blank > min_lines:
113 yield num, lineset1, index1, lineset2, index2 106 yield num, lineset1, index1, lineset2, index2
114 skip = max(skip, num) 107 skip = max(skip, num)
115 break 108 break
116 if line1: 109 if line1:
117 non_blank += 1 110 non_blank += 1
118 else: 111 else:
119 # we may have reach the end 112 # we may have reach the end
120 num += 1 113 num += 1
121 if non_blank > min_lines: 114 if non_blank > min_lines:
122 yield num, lineset1, index1, lineset2, index2 115 yield num, lineset1, index1, lineset2, index2
123 skip = max(skip, num) 116 skip = max(skip, num)
124 index1 += skip 117 index1 += skip
125 118
126 def _iter_sims(self): 119 def _iter_sims(self):
127 """iterate on similarities among all files, by making a cartesian 120 """iterate on similarities among all files, by making a cartesian
128 product 121 product
129 """ 122 """
130 for idx, lineset in enumerate(self.linesets[:-1]): 123 for idx, lineset in enumerate(self.linesets[:-1]):
131 for lineset2 in self.linesets[idx+1:]: 124 for lineset2 in self.linesets[idx+1:]:
132 for sim in self._find_common(lineset, lineset2): 125 for sim in self._find_common(lineset, lineset2):
133 yield sim 126 yield sim
134 127
135 def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports): 128 def stripped_lines(lines, ignore_comments, ignore_docstrings):
136 """return lines with leading/trailing whitespace and any ignored code
137 features removed
138 """
139
140 strippedlines = [] 129 strippedlines = []
141 docstring = None 130 docstring = None
142 for line in lines: 131 for line in lines:
143 line = line.strip() 132 line = line.strip()
144 if ignore_docstrings: 133 if ignore_docstrings:
145 if not docstring and \ 134 if not docstring and \
146 (line.startswith('"""') or line.startswith("'''")): 135 (line.startswith('"""') or line.startswith("'''")):
147 docstring = line[:3] 136 docstring = line[:3]
148 line = line[3:] 137 line = line[3:]
149 if docstring: 138 if docstring:
150 if line.endswith(docstring): 139 if line.endswith(docstring):
151 docstring = None 140 docstring = None
152 line = '' 141 line = ''
153 if ignore_imports:
154 if line.startswith("import ") or line.startswith("from "):
155 line = ''
156 if ignore_comments: 142 if ignore_comments:
157 # XXX should use regex in checkers/format to avoid cutting 143 # XXX should use regex in checkers/format to avoid cutting
158 # at a "#" in a string 144 # at a "#" in a string
159 line = line.split('#', 1)[0].strip() 145 line = line.split('#', 1)[0].strip()
160 strippedlines.append(line) 146 strippedlines.append(line)
161 return strippedlines 147 return strippedlines
162 148
163 149 class LineSet:
164 class LineSet(object):
165 """Holds and indexes all the lines of a single source file""" 150 """Holds and indexes all the lines of a single source file"""
166 def __init__(self, name, lines, ignore_comments=False, 151 def __init__(self, name, lines, ignore_comments=False,
167 ignore_docstrings=False, ignore_imports=False): 152 ignore_docstrings=False):
168 self.name = name 153 self.name = name
169 self._real_lines = lines 154 self._real_lines = lines
170 self._stripped_lines = stripped_lines(lines, ignore_comments, 155 self._stripped_lines = stripped_lines(lines, ignore_comments,
171 ignore_docstrings, 156 ignore_docstrings)
172 ignore_imports)
173 self._index = self._mk_index() 157 self._index = self._mk_index()
174 158
175 def __str__(self): 159 def __str__(self):
176 return '<Lineset for %s>' % self.name 160 return '<Lineset for %s>' % self.name
177 161
178 def __len__(self): 162 def __len__(self):
179 return len(self._real_lines) 163 return len(self._real_lines)
180 164
181 def __getitem__(self, index): 165 def __getitem__(self, index):
182 return self._stripped_lines[index] 166 return self._stripped_lines[index]
(...skipping 20 matching lines...) Expand all
203 187
204 def find(self, stripped_line): 188 def find(self, stripped_line):
205 """return positions of the given stripped line in this set""" 189 """return positions of the given stripped line in this set"""
206 return self._index.get(stripped_line, ()) 190 return self._index.get(stripped_line, ())
207 191
208 def _mk_index(self): 192 def _mk_index(self):
209 """create the index for this set""" 193 """create the index for this set"""
210 index = {} 194 index = {}
211 for line_no, line in enumerate(self._stripped_lines): 195 for line_no, line in enumerate(self._stripped_lines):
212 if line: 196 if line:
213 index.setdefault(line, []).append(line_no) 197 index.setdefault(line, []).append( line_no )
214 return index 198 return index
215 199
216 200
217 MSGS = {'R0801': ('Similar lines in %s files\n%s', 201 MSGS = {'R0801': ('Similar lines in %s files\n%s',
218 'duplicate-code',
219 'Indicates that a set of similar lines has been detected \ 202 'Indicates that a set of similar lines has been detected \
220 among multiple file. This usually means that the code should \ 203 among multiple file. This usually means that the code should \
221 be refactored to avoid this duplication.')} 204 be refactored to avoid this duplication.')}
222 205
223 def report_similarities(sect, stats, old_stats): 206 def report_similarities(sect, stats, old_stats):
224 """make a layout with some stats about duplication""" 207 """make a layout with some stats about duplication"""
225 lines = ['', 'now', 'previous', 'difference'] 208 lines = ['', 'now', 'previous', 'difference']
226 lines += table_lines_from_stats(stats, old_stats, 209 lines += table_lines_from_stats(stats, old_stats,
227 ('nb_duplicated_lines', 210 ('nb_duplicated_lines',
228 'percent_duplicated_lines')) 211 'percent_duplicated_lines'))
(...skipping 13 matching lines...) Expand all
242 # messages 225 # messages
243 msgs = MSGS 226 msgs = MSGS
244 # configuration options 227 # configuration options
245 # for available dict keys/values see the optik parser 'add_option' method 228 # for available dict keys/values see the optik parser 'add_option' method
246 options = (('min-similarity-lines', 229 options = (('min-similarity-lines',
247 {'default' : 4, 'type' : "int", 'metavar' : '<int>', 230 {'default' : 4, 'type' : "int", 'metavar' : '<int>',
248 'help' : 'Minimum lines number of a similarity.'}), 231 'help' : 'Minimum lines number of a similarity.'}),
249 ('ignore-comments', 232 ('ignore-comments',
250 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>', 233 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
251 'help': 'Ignore comments when computing similarities.'} 234 'help': 'Ignore comments when computing similarities.'}
252 ), 235 ),
253 ('ignore-docstrings', 236 ('ignore-docstrings',
254 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>', 237 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
255 'help': 'Ignore docstrings when computing similarities.'} 238 'help': 'Ignore docstrings when computing similarities.'}
256 ), 239 ),
257 ('ignore-imports', 240 )
258 {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',
259 'help': 'Ignore imports when computing similarities.'}
260 ),
261 )
262 # reports 241 # reports
263 reports = (('RP0801', 'Duplication', report_similarities),) 242 reports = ( ('R0801', 'Duplication', report_similarities), ) # XXX actually a Refactoring message
264 243
265 def __init__(self, linter=None): 244 def __init__(self, linter=None):
266 BaseChecker.__init__(self, linter) 245 BaseChecker.__init__(self, linter)
267 Similar.__init__(self, min_lines=4, 246 Similar.__init__(self, min_lines=4,
268 ignore_comments=True, ignore_docstrings=True) 247 ignore_comments=True, ignore_docstrings=True)
269 self.stats = None 248 self.stats = None
270 249
271 def set_option(self, optname, value, action=None, optdict=None): 250 def set_option(self, optname, value, action=None, optdict=None):
272 """method called to set an option (registered in the options list) 251 """method called to set an option (registered in the options list)
273 252
274 overridden to report options setting to Similar 253 overridden to report options setting to Similar
275 """ 254 """
276 BaseChecker.set_option(self, optname, value, action, optdict) 255 BaseChecker.set_option(self, optname, value, action, optdict)
277 if optname == 'min-similarity-lines': 256 if optname == 'min-similarity-lines':
278 self.min_lines = self.config.min_similarity_lines 257 self.min_lines = self.config.min_similarity_lines
279 elif optname == 'ignore-comments': 258 elif optname == 'ignore-comments':
280 self.ignore_comments = self.config.ignore_comments 259 self.ignore_comments = self.config.ignore_comments
281 elif optname == 'ignore-docstrings': 260 elif optname == 'ignore-docstrings':
282 self.ignore_docstrings = self.config.ignore_docstrings 261 self.ignore_docstrings = self.config.ignore_docstrings
283 elif optname == 'ignore-imports':
284 self.ignore_imports = self.config.ignore_imports
285 262
286 def open(self): 263 def open(self):
287 """init the checkers: reset linesets and statistics information""" 264 """init the checkers: reset linesets and statistics information"""
288 self.linesets = [] 265 self.linesets = []
289 self.stats = self.linter.add_stats(nb_duplicated_lines=0, 266 self.stats = self.linter.add_stats(nb_duplicated_lines=0,
290 percent_duplicated_lines=0) 267 percent_duplicated_lines=0)
291 268
292 def process_module(self, node): 269 def process_module(self, node):
293 """process a module 270 """process a module
294 271
295 the module's content is accessible via the stream object 272 the module's content is accessible via the stream object
296 273
297 stream must implement the readlines method 274 stream must implement the readlines method
298 """ 275 """
299 self.append_stream(self.linter.current_name, node.file_stream, node.file _encoding) 276 self.append_stream(self.linter.current_name, node.file_stream)
300 277
301 def close(self): 278 def close(self):
302 """compute and display similarities on closing (i.e. end of parsing)""" 279 """compute and display similarities on closing (i.e. end of parsing)"""
303 total = sum([len(lineset) for lineset in self.linesets]) 280 total = sum([len(lineset) for lineset in self.linesets])
304 duplicated = 0 281 duplicated = 0
305 stats = self.stats 282 stats = self.stats
306 for num, couples in self._compute_sims(): 283 for num, couples in self._compute_sims():
307 msg = [] 284 msg = []
308 for lineset, idx in couples: 285 for lineset, idx in couples:
309 msg.append("==%s:%s" % (lineset.name, idx)) 286 msg.append("==%s:%s" % (lineset.name, idx))
310 msg.sort() 287 msg.sort()
311 # pylint: disable=W0631 288 # pylint: disable=W0631
312 for line in lineset._real_lines[idx:idx+num]: 289 for line in lineset._real_lines[idx:idx+num]:
313 msg.append(line.rstrip()) 290 msg.append(line.rstrip())
314 self.add_message('R0801', args=(len(couples), '\n'.join(msg))) 291 self.add_message('R0801', args=(len(couples), '\n'.join(msg)))
315 duplicated += num * (len(couples) - 1) 292 duplicated += num * (len(couples) - 1)
316 stats['nb_duplicated_lines'] = duplicated 293 stats['nb_duplicated_lines'] = duplicated
317 stats['percent_duplicated_lines'] = total and duplicated * 100. / total 294 stats['percent_duplicated_lines'] = total and duplicated * 100. / total
318 295
319 296
320 def register(linter): 297 def register(linter):
321 """required method to auto register this checker """ 298 """required method to auto register this checker """
322 linter.register_checker(SimilarChecker(linter)) 299 linter.register_checker(SimilarChecker(linter))
323 300
324 def usage(status=0): 301 def usage(status=0):
325 """display command line usage information""" 302 """display command line usage information"""
326 print "finds copy pasted blocks in a set of files" 303 print "finds copy pasted blocks in a set of files"
327 print 304 print
328 print 'Usage: symilar [-d|--duplicates min_duplicated_lines] \ 305 print 'Usage: symilar [-d|--duplicates min_duplicated_lines] \
329 [-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...' 306 [-i|--ignore-comments] file1...'
330 sys.exit(status) 307 sys.exit(status)
331 308
332 def Run(argv=None): 309 def run(argv=None):
333 """standalone command line access point""" 310 """standalone command line access point"""
334 if argv is None: 311 if argv is None:
335 argv = sys.argv[1:] 312 argv = sys.argv[1:]
336 from getopt import getopt 313 from getopt import getopt
337 s_opts = 'hdi' 314 s_opts = 'hdi'
338 l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports', 315 l_opts = ('help', 'duplicates=', 'ignore-comments')
339 'ignore-docstrings')
340 min_lines = 4 316 min_lines = 4
341 ignore_comments = False 317 ignore_comments = False
342 ignore_docstrings = False
343 ignore_imports = False
344 opts, args = getopt(argv, s_opts, l_opts) 318 opts, args = getopt(argv, s_opts, l_opts)
345 for opt, val in opts: 319 for opt, val in opts:
346 if opt in ('-d', '--duplicates'): 320 if opt in ('-d', '--duplicates'):
347 min_lines = int(val) 321 min_lines = int(val)
348 elif opt in ('-h', '--help'): 322 elif opt in ('-h', '--help'):
349 usage() 323 usage()
350 elif opt in ('-i', '--ignore-comments'): 324 elif opt in ('-i', '--ignore-comments'):
351 ignore_comments = True 325 ignore_comments = True
352 elif opt in ('--ignore-docstrings',):
353 ignore_docstrings = True
354 elif opt in ('--ignore-imports',):
355 ignore_imports = True
356 if not args: 326 if not args:
357 usage(1) 327 usage(1)
358 sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports) 328 sim = Similar(min_lines, ignore_comments)
359 for filename in args: 329 for filename in args:
360 sim.append_stream(filename, open(filename)) 330 sim.append_stream(filename, open(filename))
361 sim.run() 331 sim.run()
362 sys.exit(0)
363 332
364 if __name__ == '__main__': 333 if __name__ == '__main__':
365 Run() 334 run()
OLDNEW
« no previous file with comments | « third_party/pylint/checkers/raw_metrics.py ('k') | third_party/pylint/checkers/stdlib.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698