Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(52)

Side by Side Diff: third_party/pylint/pylint/checkers/similar.py

Issue 1920403002: [content/test/gpu] Run pylint check of gpu tests in unittest instead of PRESUBMIT (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Update path to LICENSE.txt of logilab/README.chromium Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # pylint: disable=W0622
2 # Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE).
3 # http://www.logilab.fr/ -- mailto:contact@logilab.fr
4 #
5 # This program is free software; you can redistribute it and/or modify it under
6 # the terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
8 # version.
9 #
10 # This program is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
13 #
14 # You should have received a copy of the GNU General Public License along with
15 # this program; if not, write to the Free Software Foundation, Inc.,
16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 """a similarities / code duplication command line tool and pylint checker
18 """
19 from __future__ import print_function
20 import sys
21 from collections import defaultdict
22
23 from logilab.common.ureports import Table
24
25 from pylint.interfaces import IRawChecker
26 from pylint.checkers import BaseChecker, table_lines_from_stats
27
28 import six
29 from six.moves import zip
30
31
32 class Similar(object):
33 """finds copy-pasted lines of code in a project"""
34
35 def __init__(self, min_lines=4, ignore_comments=False,
36 ignore_docstrings=False, ignore_imports=False):
37 self.min_lines = min_lines
38 self.ignore_comments = ignore_comments
39 self.ignore_docstrings = ignore_docstrings
40 self.ignore_imports = ignore_imports
41 self.linesets = []
42
43 def append_stream(self, streamid, stream, encoding=None):
44 """append a file to search for similarities"""
45 if encoding is None:
46 readlines = stream.readlines
47 else:
48 readlines = lambda: [line.decode(encoding) for line in stream]
49 try:
50 self.linesets.append(LineSet(streamid,
51 readlines(),
52 self.ignore_comments,
53 self.ignore_docstrings,
54 self.ignore_imports))
55 except UnicodeDecodeError:
56 pass
57
58 def run(self):
59 """start looking for similarities and display results on stdout"""
60 self._display_sims(self._compute_sims())
61
62 def _compute_sims(self):
63 """compute similarities in appended files"""
64 no_duplicates = defaultdict(list)
65 for num, lineset1, idx1, lineset2, idx2 in self._iter_sims():
66 duplicate = no_duplicates[num]
67 for couples in duplicate:
68 if (lineset1, idx1) in couples or (lineset2, idx2) in couples:
69 couples.add((lineset1, idx1))
70 couples.add((lineset2, idx2))
71 break
72 else:
73 duplicate.append(set([(lineset1, idx1), (lineset2, idx2)]))
74 sims = []
75 for num, ensembles in six.iteritems(no_duplicates):
76 for couples in ensembles:
77 sims.append((num, couples))
78 sims.sort()
79 sims.reverse()
80 return sims
81
82 def _display_sims(self, sims):
83 """display computed similarities on stdout"""
84 nb_lignes_dupliquees = 0
85 for num, couples in sims:
86 print()
87 print(num, "similar lines in", len(couples), "files")
88 couples = sorted(couples)
89 for lineset, idx in couples:
90 print("==%s:%s" % (lineset.name, idx))
91 # pylint: disable=W0631
92 for line in lineset._real_lines[idx:idx+num]:
93 print(" ", line.rstrip())
94 nb_lignes_dupliquees += num * (len(couples)-1)
95 nb_total_lignes = sum([len(lineset) for lineset in self.linesets])
96 print("TOTAL lines=%s duplicates=%s percent=%.2f" \
97 % (nb_total_lignes, nb_lignes_dupliquees,
98 nb_lignes_dupliquees*100. / nb_total_lignes))
99
100 def _find_common(self, lineset1, lineset2):
101 """find similarities in the two given linesets"""
102 lines1 = lineset1.enumerate_stripped
103 lines2 = lineset2.enumerate_stripped
104 find = lineset2.find
105 index1 = 0
106 min_lines = self.min_lines
107 while index1 < len(lineset1):
108 skip = 1
109 num = 0
110 for index2 in find(lineset1[index1]):
111 non_blank = 0
112 for num, ((_, line1), (_, line2)) in enumerate(
113 zip(lines1(index1), lines2(index2))):
114 if line1 != line2:
115 if non_blank > min_lines:
116 yield num, lineset1, index1, lineset2, index2
117 skip = max(skip, num)
118 break
119 if line1:
120 non_blank += 1
121 else:
122 # we may have reach the end
123 num += 1
124 if non_blank > min_lines:
125 yield num, lineset1, index1, lineset2, index2
126 skip = max(skip, num)
127 index1 += skip
128
129 def _iter_sims(self):
130 """iterate on similarities among all files, by making a cartesian
131 product
132 """
133 for idx, lineset in enumerate(self.linesets[:-1]):
134 for lineset2 in self.linesets[idx+1:]:
135 for sim in self._find_common(lineset, lineset2):
136 yield sim
137
138 def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
139 """return lines with leading/trailing whitespace and any ignored code
140 features removed
141 """
142
143 strippedlines = []
144 docstring = None
145 for line in lines:
146 line = line.strip()
147 if ignore_docstrings:
148 if not docstring and \
149 (line.startswith('"""') or line.startswith("'''")):
150 docstring = line[:3]
151 line = line[3:]
152 if docstring:
153 if line.endswith(docstring):
154 docstring = None
155 line = ''
156 if ignore_imports:
157 if line.startswith("import ") or line.startswith("from "):
158 line = ''
159 if ignore_comments:
160 # XXX should use regex in checkers/format to avoid cutting
161 # at a "#" in a string
162 line = line.split('#', 1)[0].strip()
163 strippedlines.append(line)
164 return strippedlines
165
166
167 class LineSet(object):
168 """Holds and indexes all the lines of a single source file"""
169 def __init__(self, name, lines, ignore_comments=False,
170 ignore_docstrings=False, ignore_imports=False):
171 self.name = name
172 self._real_lines = lines
173 self._stripped_lines = stripped_lines(lines, ignore_comments,
174 ignore_docstrings,
175 ignore_imports)
176 self._index = self._mk_index()
177
178 def __str__(self):
179 return '<Lineset for %s>' % self.name
180
181 def __len__(self):
182 return len(self._real_lines)
183
184 def __getitem__(self, index):
185 return self._stripped_lines[index]
186
187 def __lt__(self, other):
188 return self.name < other.name
189
190 def __hash__(self):
191 return id(self)
192
193 def enumerate_stripped(self, start_at=0):
194 """return an iterator on stripped lines, starting from a given index
195 if specified, else 0
196 """
197 idx = start_at
198 if start_at:
199 lines = self._stripped_lines[start_at:]
200 else:
201 lines = self._stripped_lines
202 for line in lines:
203 #if line:
204 yield idx, line
205 idx += 1
206
207 def find(self, stripped_line):
208 """return positions of the given stripped line in this set"""
209 return self._index.get(stripped_line, ())
210
211 def _mk_index(self):
212 """create the index for this set"""
213 index = defaultdict(list)
214 for line_no, line in enumerate(self._stripped_lines):
215 if line:
216 index[line].append(line_no)
217 return index
218
219
220 MSGS = {'R0801': ('Similar lines in %s files\n%s',
221 'duplicate-code',
222 'Indicates that a set of similar lines has been detected \
223 among multiple file. This usually means that the code should \
224 be refactored to avoid this duplication.')}
225
226 def report_similarities(sect, stats, old_stats):
227 """make a layout with some stats about duplication"""
228 lines = ['', 'now', 'previous', 'difference']
229 lines += table_lines_from_stats(stats, old_stats,
230 ('nb_duplicated_lines',
231 'percent_duplicated_lines'))
232 sect.append(Table(children=lines, cols=4, rheaders=1, cheaders=1))
233
234
235 # wrapper to get a pylint checker from the similar class
236 class SimilarChecker(BaseChecker, Similar):
237 """checks for similarities and duplicated code. This computation may be
238 memory / CPU intensive, so you should disable it if you experiment some
239 problems.
240 """
241
242 __implements__ = (IRawChecker,)
243 # configuration section name
244 name = 'similarities'
245 # messages
246 msgs = MSGS
247 # configuration options
248 # for available dict keys/values see the optik parser 'add_option' method
249 options = (('min-similarity-lines',
250 {'default' : 4, 'type' : "int", 'metavar' : '<int>',
251 'help' : 'Minimum lines number of a similarity.'}),
252 ('ignore-comments',
253 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
254 'help': 'Ignore comments when computing similarities.'}
255 ),
256 ('ignore-docstrings',
257 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
258 'help': 'Ignore docstrings when computing similarities.'}
259 ),
260 ('ignore-imports',
261 {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',
262 'help': 'Ignore imports when computing similarities.'}
263 ),
264 )
265 # reports
266 reports = (('RP0801', 'Duplication', report_similarities),)
267
268 def __init__(self, linter=None):
269 BaseChecker.__init__(self, linter)
270 Similar.__init__(self, min_lines=4,
271 ignore_comments=True, ignore_docstrings=True)
272 self.stats = None
273
274 def set_option(self, optname, value, action=None, optdict=None):
275 """method called to set an option (registered in the options list)
276
277 overridden to report options setting to Similar
278 """
279 BaseChecker.set_option(self, optname, value, action, optdict)
280 if optname == 'min-similarity-lines':
281 self.min_lines = self.config.min_similarity_lines
282 elif optname == 'ignore-comments':
283 self.ignore_comments = self.config.ignore_comments
284 elif optname == 'ignore-docstrings':
285 self.ignore_docstrings = self.config.ignore_docstrings
286 elif optname == 'ignore-imports':
287 self.ignore_imports = self.config.ignore_imports
288
289 def open(self):
290 """init the checkers: reset linesets and statistics information"""
291 self.linesets = []
292 self.stats = self.linter.add_stats(nb_duplicated_lines=0,
293 percent_duplicated_lines=0)
294
295 def process_module(self, node):
296 """process a module
297
298 the module's content is accessible via the stream object
299
300 stream must implement the readlines method
301 """
302 with node.stream() as stream:
303 self.append_stream(self.linter.current_name,
304 stream,
305 node.file_encoding)
306
307 def close(self):
308 """compute and display similarities on closing (i.e. end of parsing)"""
309 total = sum([len(lineset) for lineset in self.linesets])
310 duplicated = 0
311 stats = self.stats
312 for num, couples in self._compute_sims():
313 msg = []
314 for lineset, idx in couples:
315 msg.append("==%s:%s" % (lineset.name, idx))
316 msg.sort()
317 # pylint: disable=W0631
318 for line in lineset._real_lines[idx:idx+num]:
319 msg.append(line.rstrip())
320 self.add_message('R0801', args=(len(couples), '\n'.join(msg)))
321 duplicated += num * (len(couples) - 1)
322 stats['nb_duplicated_lines'] = duplicated
323 stats['percent_duplicated_lines'] = total and duplicated * 100. / total
324
325
326 def register(linter):
327 """required method to auto register this checker """
328 linter.register_checker(SimilarChecker(linter))
329
330 def usage(status=0):
331 """display command line usage information"""
332 print("finds copy pasted blocks in a set of files")
333 print()
334 print('Usage: symilar [-d|--duplicates min_duplicated_lines] \
335 [-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...')
336 sys.exit(status)
337
338 def Run(argv=None):
339 """standalone command line access point"""
340 if argv is None:
341 argv = sys.argv[1:]
342 from getopt import getopt
343 s_opts = 'hdi'
344 l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',
345 'ignore-docstrings')
346 min_lines = 4
347 ignore_comments = False
348 ignore_docstrings = False
349 ignore_imports = False
350 opts, args = getopt(argv, s_opts, l_opts)
351 for opt, val in opts:
352 if opt in ('-d', '--duplicates'):
353 min_lines = int(val)
354 elif opt in ('-h', '--help'):
355 usage()
356 elif opt in ('-i', '--ignore-comments'):
357 ignore_comments = True
358 elif opt in ('--ignore-docstrings',):
359 ignore_docstrings = True
360 elif opt in ('--ignore-imports',):
361 ignore_imports = True
362 if not args:
363 usage(1)
364 sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)
365 for filename in args:
366 with open(filename) as stream:
367 sim.append_stream(filename, stream)
368 sim.run()
369 sys.exit(0)
370
371 if __name__ == '__main__':
372 Run()
OLDNEW
« no previous file with comments | « third_party/pylint/pylint/checkers/raw_metrics.py ('k') | third_party/pylint/pylint/checkers/spelling.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698