third_party/pylint/pylint/checkers/similar.py - Issue 1920403002: [content/test/gpu] Run pylint check of gpu tests in unittest instead of PRESUBMIT

Side by Side Diff: third_party/pylint/pylint/checkers/similar.py

Issue 1920403002: [content/test/gpu] Run pylint check of gpu tests in unittest instead of PRESUBMIT (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Update path to LICENSE.txt of logilab/README.chromium Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # pylint: disable=W0622

	2 # Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE).

	3 # http://www.logilab.fr/ -- mailto:contact@logilab.fr

	4 #

	5 # This program is free software; you can redistribute it and/or modify it under

	6 # the terms of the GNU General Public License as published by the Free Software

	7 # Foundation; either version 2 of the License, or (at your option) any later

	8 # version.

	9 #

	10 # This program is distributed in the hope that it will be useful, but WITHOUT

	11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

	12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details

	13 #

	14 # You should have received a copy of the GNU General Public License along with

	15 # this program; if not, write to the Free Software Foundation, Inc.,

	16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

	17 """a similarities / code duplication command line tool and pylint checker

	18 """

	19 from __future__ import print_function

	20 import sys

	21 from collections import defaultdict

	22

	23 from logilab.common.ureports import Table

	24

	25 from pylint.interfaces import IRawChecker

	26 from pylint.checkers import BaseChecker, table_lines_from_stats

	27

	28 import six

	29 from six.moves import zip

	30

	31

	32 class Similar(object):

	33 """finds copy-pasted lines of code in a project"""

	34

	35 def __init__(self, min_lines=4, ignore_comments=False,

	36 ignore_docstrings=False, ignore_imports=False):

	37 self.min_lines = min_lines

	38 self.ignore_comments = ignore_comments

	39 self.ignore_docstrings = ignore_docstrings

	40 self.ignore_imports = ignore_imports

	41 self.linesets = []

	42

	43 def append_stream(self, streamid, stream, encoding=None):

	44 """append a file to search for similarities"""

	45 if encoding is None:

	46 readlines = stream.readlines

	47 else:

	48 readlines = lambda: [line.decode(encoding) for line in stream]

	49 try:

	50 self.linesets.append(LineSet(streamid,

	51 readlines(),

	52 self.ignore_comments,

	53 self.ignore_docstrings,

	54 self.ignore_imports))

	55 except UnicodeDecodeError:

	56 pass

	57

	58 def run(self):

	59 """start looking for similarities and display results on stdout"""

	60 self._display_sims(self._compute_sims())

	61

	62 def _compute_sims(self):

	63 """compute similarities in appended files"""

	64 no_duplicates = defaultdict(list)

	65 for num, lineset1, idx1, lineset2, idx2 in self._iter_sims():

	66 duplicate = no_duplicates[num]

	67 for couples in duplicate:

	68 if (lineset1, idx1) in couples or (lineset2, idx2) in couples:

	69 couples.add((lineset1, idx1))

	70 couples.add((lineset2, idx2))

	71 break

	72 else:

	73 duplicate.append(set([(lineset1, idx1), (lineset2, idx2)]))

	74 sims = []

	75 for num, ensembles in six.iteritems(no_duplicates):

	76 for couples in ensembles:

	77 sims.append((num, couples))

	78 sims.sort()

	79 sims.reverse()

	80 return sims

	81

	82 def _display_sims(self, sims):

	83 """display computed similarities on stdout"""

	84 nb_lignes_dupliquees = 0

	85 for num, couples in sims:

	86 print()

	87 print(num, "similar lines in", len(couples), "files")

	88 couples = sorted(couples)

	89 for lineset, idx in couples:

	90 print("==%s:%s" % (lineset.name, idx))

	91 # pylint: disable=W0631

	92 for line in lineset._real_lines[idx:idx+num]:

	93 print(" ", line.rstrip())

	94 nb_lignes_dupliquees += num * (len(couples)-1)

	95 nb_total_lignes = sum([len(lineset) for lineset in self.linesets])

	96 print("TOTAL lines=%s duplicates=%s percent=%.2f" \

	97 % (nb_total_lignes, nb_lignes_dupliquees,

	98 nb_lignes_dupliquees*100. / nb_total_lignes))

	99

	100 def _find_common(self, lineset1, lineset2):

	101 """find similarities in the two given linesets"""

	102 lines1 = lineset1.enumerate_stripped

	103 lines2 = lineset2.enumerate_stripped

	104 find = lineset2.find

	105 index1 = 0

	106 min_lines = self.min_lines

	107 while index1 < len(lineset1):

	108 skip = 1

	109 num = 0

	110 for index2 in find(lineset1[index1]):

	111 non_blank = 0

	112 for num, ((_, line1), (_, line2)) in enumerate(

	113 zip(lines1(index1), lines2(index2))):

	114 if line1 != line2:

	115 if non_blank > min_lines:

	116 yield num, lineset1, index1, lineset2, index2

	117 skip = max(skip, num)

	118 break

	119 if line1:

	120 non_blank += 1

	121 else:

	122 # we may have reach the end

	123 num += 1

	124 if non_blank > min_lines:

	125 yield num, lineset1, index1, lineset2, index2

	126 skip = max(skip, num)

	127 index1 += skip

	128

	129 def _iter_sims(self):

	130 """iterate on similarities among all files, by making a cartesian

	131 product

	132 """

	133 for idx, lineset in enumerate(self.linesets[:-1]):

	134 for lineset2 in self.linesets[idx+1:]:

	135 for sim in self._find_common(lineset, lineset2):

	136 yield sim

	137

	138 def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):

	139 """return lines with leading/trailing whitespace and any ignored code

	140 features removed

	141 """

	142

	143 strippedlines = []

	144 docstring = None

	145 for line in lines:

	146 line = line.strip()

	147 if ignore_docstrings:

	148 if not docstring and \

	149 (line.startswith('"""') or line.startswith("'''")):

	150 docstring = line[:3]

	151 line = line[3:]

	152 if docstring:

	153 if line.endswith(docstring):

	154 docstring = None

	155 line = ''

	156 if ignore_imports:

	157 if line.startswith("import ") or line.startswith("from "):

	158 line = ''

	159 if ignore_comments:

	160 # XXX should use regex in checkers/format to avoid cutting

	161 # at a "#" in a string

	162 line = line.split('#', 1)[0].strip()

	163 strippedlines.append(line)

	164 return strippedlines

	165

	166

	167 class LineSet(object):

	168 """Holds and indexes all the lines of a single source file"""

	169 def __init__(self, name, lines, ignore_comments=False,

	170 ignore_docstrings=False, ignore_imports=False):

	171 self.name = name

	172 self._real_lines = lines

	173 self._stripped_lines = stripped_lines(lines, ignore_comments,

	174 ignore_docstrings,

	175 ignore_imports)

	176 self._index = self._mk_index()

	177

	178 def __str__(self):

	179 return '<Lineset for %s>' % self.name

	180

	181 def __len__(self):

	182 return len(self._real_lines)

	183

	184 def __getitem__(self, index):

	185 return self._stripped_lines[index]

	186

	187 def __lt__(self, other):

	188 return self.name < other.name

	189

	190 def __hash__(self):

	191 return id(self)

	192

	193 def enumerate_stripped(self, start_at=0):

	194 """return an iterator on stripped lines, starting from a given index

	195 if specified, else 0

	196 """

	197 idx = start_at

	198 if start_at:

	199 lines = self._stripped_lines[start_at:]

	200 else:

	201 lines = self._stripped_lines

	202 for line in lines:

	203 #if line:

	204 yield idx, line

	205 idx += 1

	206

	207 def find(self, stripped_line):

	208 """return positions of the given stripped line in this set"""

	209 return self._index.get(stripped_line, ())

	210

	211 def _mk_index(self):

	212 """create the index for this set"""

	213 index = defaultdict(list)

	214 for line_no, line in enumerate(self._stripped_lines):

	215 if line:

	216 index[line].append(line_no)

	217 return index

	218

	219

	220 MSGS = {'R0801': ('Similar lines in %s files\n%s',

	221 'duplicate-code',

	222 'Indicates that a set of similar lines has been detected \

	223 among multiple file. This usually means that the code should \

	224 be refactored to avoid this duplication.')}

	225

	226 def report_similarities(sect, stats, old_stats):

	227 """make a layout with some stats about duplication"""

	228 lines = ['', 'now', 'previous', 'difference']

	229 lines += table_lines_from_stats(stats, old_stats,

	230 ('nb_duplicated_lines',

	231 'percent_duplicated_lines'))

	232 sect.append(Table(children=lines, cols=4, rheaders=1, cheaders=1))

	233

	234

	235 # wrapper to get a pylint checker from the similar class

	236 class SimilarChecker(BaseChecker, Similar):

	237 """checks for similarities and duplicated code. This computation may be

	238 memory / CPU intensive, so you should disable it if you experiment some

	239 problems.

	240 """

	241

	242 __implements__ = (IRawChecker,)

	243 # configuration section name

	244 name = 'similarities'

	245 # messages

	246 msgs = MSGS

	247 # configuration options

	248 # for available dict keys/values see the optik parser 'add_option' method

	249 options = (('min-similarity-lines',

	250 {'default' : 4, 'type' : "int", 'metavar' : '<int>',

	251 'help' : 'Minimum lines number of a similarity.'}),

	252 ('ignore-comments',

	253 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',

	254 'help': 'Ignore comments when computing similarities.'}

	255 ),

	256 ('ignore-docstrings',

	257 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',

	258 'help': 'Ignore docstrings when computing similarities.'}

	259 ),

	260 ('ignore-imports',

	261 {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',

	262 'help': 'Ignore imports when computing similarities.'}

	263 ),

	264 )

	265 # reports

	266 reports = (('RP0801', 'Duplication', report_similarities),)

	267

	268 def __init__(self, linter=None):

	269 BaseChecker.__init__(self, linter)

	270 Similar.__init__(self, min_lines=4,

	271 ignore_comments=True, ignore_docstrings=True)

	272 self.stats = None

	273

	274 def set_option(self, optname, value, action=None, optdict=None):

	275 """method called to set an option (registered in the options list)

	276

	277 overridden to report options setting to Similar

	278 """

	279 BaseChecker.set_option(self, optname, value, action, optdict)

	280 if optname == 'min-similarity-lines':

	281 self.min_lines = self.config.min_similarity_lines

	282 elif optname == 'ignore-comments':

	283 self.ignore_comments = self.config.ignore_comments

	284 elif optname == 'ignore-docstrings':

	285 self.ignore_docstrings = self.config.ignore_docstrings

	286 elif optname == 'ignore-imports':

	287 self.ignore_imports = self.config.ignore_imports

	288

	289 def open(self):

	290 """init the checkers: reset linesets and statistics information"""

	291 self.linesets = []

	292 self.stats = self.linter.add_stats(nb_duplicated_lines=0,

	293 percent_duplicated_lines=0)

	294

	295 def process_module(self, node):

	296 """process a module

	297

	298 the module's content is accessible via the stream object

	299

	300 stream must implement the readlines method

	301 """

	302 with node.stream() as stream:

	303 self.append_stream(self.linter.current_name,

	304 stream,

	305 node.file_encoding)

	306

	307 def close(self):

	308 """compute and display similarities on closing (i.e. end of parsing)"""

	309 total = sum([len(lineset) for lineset in self.linesets])

	310 duplicated = 0

	311 stats = self.stats

	312 for num, couples in self._compute_sims():

	313 msg = []

	314 for lineset, idx in couples:

	315 msg.append("==%s:%s" % (lineset.name, idx))

	316 msg.sort()

	317 # pylint: disable=W0631

	318 for line in lineset._real_lines[idx:idx+num]:

	319 msg.append(line.rstrip())

	320 self.add_message('R0801', args=(len(couples), '\n'.join(msg)))

	321 duplicated += num * (len(couples) - 1)

	322 stats['nb_duplicated_lines'] = duplicated

	323 stats['percent_duplicated_lines'] = total and duplicated * 100. / total

	324

	325

	326 def register(linter):

	327 """required method to auto register this checker """

	328 linter.register_checker(SimilarChecker(linter))

	329

	330 def usage(status=0):

	331 """display command line usage information"""

	332 print("finds copy pasted blocks in a set of files")

	333 print()

	334 print('Usage: symilar [-d\|--duplicates min_duplicated_lines] \

	335 [-i\|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...')

	336 sys.exit(status)

	337

	338 def Run(argv=None):

	339 """standalone command line access point"""

	340 if argv is None:

	341 argv = sys.argv[1:]

	342 from getopt import getopt

	343 s_opts = 'hdi'

	344 l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',

	345 'ignore-docstrings')

	346 min_lines = 4

	347 ignore_comments = False

	348 ignore_docstrings = False

	349 ignore_imports = False

	350 opts, args = getopt(argv, s_opts, l_opts)

	351 for opt, val in opts:

	352 if opt in ('-d', '--duplicates'):

	353 min_lines = int(val)

	354 elif opt in ('-h', '--help'):

	355 usage()

	356 elif opt in ('-i', '--ignore-comments'):

	357 ignore_comments = True

	358 elif opt in ('--ignore-docstrings',):

	359 ignore_docstrings = True

	360 elif opt in ('--ignore-imports',):

	361 ignore_imports = True

	362 if not args:

	363 usage(1)

	364 sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)

	365 for filename in args:

	366 with open(filename) as stream:

	367 sim.append_stream(filename, stream)

	368 sim.run()

	369 sys.exit(0)

	370

	371 if __name__ == '__main__':

	372 Run()

OLD	NEW

« no previous file with comments | « third_party/pylint/pylint/checkers/raw_metrics.py ('k') | third_party/pylint/pylint/checkers/spelling.py » ('j') | no next file with comments »