third_party/pylint/pylint/checkers/similar.py - Issue 1920403002: [content/test/gpu] Run pylint check of gpu tests in unittest instead of PRESUBMIT

Unified Diff: third_party/pylint/pylint/checkers/similar.py

Issue 1920403002: [content/test/gpu] Run pylint check of gpu tests in unittest instead of PRESUBMIT (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Update path to LICENSE.txt of logilab/README.chromium Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/pylint/pylint/checkers/similar.py

diff --git a/third_party/pylint/pylint/checkers/similar.py b/third_party/pylint/pylint/checkers/similar.py

new file mode 100644

index 0000000000000000000000000000000000000000..95420776110df0cf0fc5d3db79469e9b89bf6476

--- /dev/null

+++ b/third_party/pylint/pylint/checkers/similar.py

@@ -0,0 +1,372 @@

+# pylint: disable=W0622

+# http://www.logilab.fr/ -- mailto:contact@logilab.fr

+# This program is free software; you can redistribute it and/or modify it under

+# the terms of the GNU General Public License as published by the Free Software

+# Foundation; either version 2 of the License, or (at your option) any later

+# version.

+# This program is distributed in the hope that it will be useful, but WITHOUT

+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details

+# You should have received a copy of the GNU General Public License along with

+# this program; if not, write to the Free Software Foundation, Inc.,

+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

+"""a similarities / code duplication command line tool and pylint checker

+"""

+from __future__ import print_function

+import sys

+from collections import defaultdict

+from logilab.common.ureports import Table

+from pylint.interfaces import IRawChecker

+from pylint.checkers import BaseChecker, table_lines_from_stats

+import six

+from six.moves import zip

+class Similar(object):

+ """finds copy-pasted lines of code in a project"""

+ def __init__(self, min_lines=4, ignore_comments=False,

+ ignore_docstrings=False, ignore_imports=False):

+ self.min_lines = min_lines

+ self.ignore_comments = ignore_comments

+ self.ignore_docstrings = ignore_docstrings

+ self.ignore_imports = ignore_imports

+ self.linesets = []

+ def append_stream(self, streamid, stream, encoding=None):

+ """append a file to search for similarities"""

+ if encoding is None:

+ readlines = stream.readlines

+ else:

+ readlines = lambda: [line.decode(encoding) for line in stream]

+ try:

+ self.linesets.append(LineSet(streamid,

+ readlines(),

+ self.ignore_comments,

+ self.ignore_docstrings,

+ self.ignore_imports))

+ except UnicodeDecodeError:

+ pass

+ def run(self):

+ """start looking for similarities and display results on stdout"""

+ self._display_sims(self._compute_sims())

+ def _compute_sims(self):

+ """compute similarities in appended files"""

+ no_duplicates = defaultdict(list)

+ for num, lineset1, idx1, lineset2, idx2 in self._iter_sims():

+ duplicate = no_duplicates[num]

+ for couples in duplicate:

+ if (lineset1, idx1) in couples or (lineset2, idx2) in couples:

+ couples.add((lineset1, idx1))

+ couples.add((lineset2, idx2))

+ break

+ else:

+ duplicate.append(set([(lineset1, idx1), (lineset2, idx2)]))

+ sims = []

+ for num, ensembles in six.iteritems(no_duplicates):

+ for couples in ensembles:

+ sims.append((num, couples))

+ sims.sort()

+ sims.reverse()

+ return sims

+ def _display_sims(self, sims):

+ """display computed similarities on stdout"""

+ nb_lignes_dupliquees = 0

+ for num, couples in sims:

+ print()

+ print(num, "similar lines in", len(couples), "files")

+ couples = sorted(couples)

+ for lineset, idx in couples:

+ print("==%s:%s" % (lineset.name, idx))

+ # pylint: disable=W0631

+ for line in lineset._real_lines[idx:idx+num]:

+ print(" ", line.rstrip())

+ nb_lignes_dupliquees += num * (len(couples)-1)

+ nb_total_lignes = sum([len(lineset) for lineset in self.linesets])

+ print("TOTAL lines=%s duplicates=%s percent=%.2f" \

+ % (nb_total_lignes, nb_lignes_dupliquees,

+ nb_lignes_dupliquees*100. / nb_total_lignes))

+ def _find_common(self, lineset1, lineset2):

+ """find similarities in the two given linesets"""

+ lines1 = lineset1.enumerate_stripped

+ lines2 = lineset2.enumerate_stripped

+ find = lineset2.find

+ index1 = 0

+ min_lines = self.min_lines

+ while index1 < len(lineset1):

+ skip = 1

+ num = 0

+ for index2 in find(lineset1[index1]):

+ non_blank = 0

+ for num, ((_, line1), (_, line2)) in enumerate(

+ zip(lines1(index1), lines2(index2))):

+ if line1 != line2:

+ if non_blank > min_lines:

+ yield num, lineset1, index1, lineset2, index2

+ skip = max(skip, num)

+ break

+ if line1:

+ non_blank += 1

+ else:

+ # we may have reach the end

+ num += 1

+ if non_blank > min_lines:

+ yield num, lineset1, index1, lineset2, index2

+ skip = max(skip, num)

+ index1 += skip

+ def _iter_sims(self):

+ """iterate on similarities among all files, by making a cartesian

+ product

+ """

+ for idx, lineset in enumerate(self.linesets[:-1]):

+ for lineset2 in self.linesets[idx+1:]:

+ for sim in self._find_common(lineset, lineset2):

+ yield sim

+def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):

+ """return lines with leading/trailing whitespace and any ignored code

+ features removed

+ """

+ strippedlines = []

+ docstring = None

+ for line in lines:

+ line = line.strip()

+ if ignore_docstrings:

+ if not docstring and \

+ (line.startswith('"""') or line.startswith("'''")):

+ docstring = line[:3]

+ line = line[3:]

+ if docstring:

+ if line.endswith(docstring):

+ docstring = None

+ line = ''

+ if ignore_imports:

+ if line.startswith("import ") or line.startswith("from "):

+ line = ''

+ if ignore_comments:

+ # XXX should use regex in checkers/format to avoid cutting

+ # at a "#" in a string

+ line = line.split('#', 1)[0].strip()

+ strippedlines.append(line)

+ return strippedlines

+class LineSet(object):

+ """Holds and indexes all the lines of a single source file"""

+ def __init__(self, name, lines, ignore_comments=False,

+ ignore_docstrings=False, ignore_imports=False):

+ self.name = name

+ self._real_lines = lines

+ self._stripped_lines = stripped_lines(lines, ignore_comments,

+ ignore_docstrings,

+ ignore_imports)

+ self._index = self._mk_index()

+ def __str__(self):

+ return '<Lineset for %s>' % self.name

+ def __len__(self):

+ return len(self._real_lines)

+ def __getitem__(self, index):

+ return self._stripped_lines[index]

+ def __lt__(self, other):

+ return self.name < other.name

+ def __hash__(self):

+ return id(self)

+ def enumerate_stripped(self, start_at=0):

+ """return an iterator on stripped lines, starting from a given index

+ if specified, else 0

+ """

+ idx = start_at

+ if start_at:

+ lines = self._stripped_lines[start_at:]

+ else:

+ lines = self._stripped_lines

+ for line in lines:

+ #if line:

+ yield idx, line

+ idx += 1

+ def find(self, stripped_line):

+ """return positions of the given stripped line in this set"""

+ return self._index.get(stripped_line, ())

+ def _mk_index(self):

+ """create the index for this set"""

+ index = defaultdict(list)

+ for line_no, line in enumerate(self._stripped_lines):

+ if line:

+ index[line].append(line_no)

+ return index

+MSGS = {'R0801': ('Similar lines in %s files\n%s',

+ 'duplicate-code',

+ 'Indicates that a set of similar lines has been detected \

+ among multiple file. This usually means that the code should \

+ be refactored to avoid this duplication.')}

+def report_similarities(sect, stats, old_stats):

+ """make a layout with some stats about duplication"""

+ lines = ['', 'now', 'previous', 'difference']

+ lines += table_lines_from_stats(stats, old_stats,

+ ('nb_duplicated_lines',

+ 'percent_duplicated_lines'))

+ sect.append(Table(children=lines, cols=4, rheaders=1, cheaders=1))

+# wrapper to get a pylint checker from the similar class

+class SimilarChecker(BaseChecker, Similar):

+ """checks for similarities and duplicated code. This computation may be

+ memory / CPU intensive, so you should disable it if you experiment some

+ problems.

+ """

+ __implements__ = (IRawChecker,)

+ # configuration section name

+ name = 'similarities'

+ # messages

+ msgs = MSGS

+ # configuration options

+ # for available dict keys/values see the optik parser 'add_option' method

+ options = (('min-similarity-lines',

+ {'default' : 4, 'type' : "int", 'metavar' : '<int>',

+ 'help' : 'Minimum lines number of a similarity.'}),

+ ('ignore-comments',

+ {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',

+ 'help': 'Ignore comments when computing similarities.'}

+ ),

+ ('ignore-docstrings',

+ {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',

+ 'help': 'Ignore docstrings when computing similarities.'}

+ ),

+ ('ignore-imports',

+ {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',

+ 'help': 'Ignore imports when computing similarities.'}

+ ),

+ )

+ # reports

+ reports = (('RP0801', 'Duplication', report_similarities),)

+ def __init__(self, linter=None):

+ BaseChecker.__init__(self, linter)

+ Similar.__init__(self, min_lines=4,

+ ignore_comments=True, ignore_docstrings=True)

+ self.stats = None

+ def set_option(self, optname, value, action=None, optdict=None):

+ """method called to set an option (registered in the options list)

+ overridden to report options setting to Similar

+ """

+ BaseChecker.set_option(self, optname, value, action, optdict)

+ if optname == 'min-similarity-lines':

+ self.min_lines = self.config.min_similarity_lines

+ elif optname == 'ignore-comments':

+ self.ignore_comments = self.config.ignore_comments

+ elif optname == 'ignore-docstrings':

+ self.ignore_docstrings = self.config.ignore_docstrings

+ elif optname == 'ignore-imports':

+ self.ignore_imports = self.config.ignore_imports

+ def open(self):

+ """init the checkers: reset linesets and statistics information"""

+ self.linesets = []

+ self.stats = self.linter.add_stats(nb_duplicated_lines=0,

+ percent_duplicated_lines=0)

+ def process_module(self, node):

+ """process a module

+ the module's content is accessible via the stream object

+ stream must implement the readlines method

+ """

+ with node.stream() as stream:

+ self.append_stream(self.linter.current_name,

+ stream,

+ node.file_encoding)

+ def close(self):

+ """compute and display similarities on closing (i.e. end of parsing)"""

+ total = sum([len(lineset) for lineset in self.linesets])

+ duplicated = 0

+ stats = self.stats

+ for num, couples in self._compute_sims():

+ msg = []

+ for lineset, idx in couples:

+ msg.append("==%s:%s" % (lineset.name, idx))

+ msg.sort()

+ # pylint: disable=W0631

+ for line in lineset._real_lines[idx:idx+num]:

+ msg.append(line.rstrip())

+ self.add_message('R0801', args=(len(couples), '\n'.join(msg)))

+ duplicated += num * (len(couples) - 1)

+ stats['nb_duplicated_lines'] = duplicated

+ stats['percent_duplicated_lines'] = total and duplicated * 100. / total

+def register(linter):

+ """required method to auto register this checker """

+ linter.register_checker(SimilarChecker(linter))

+def usage(status=0):

+ """display command line usage information"""

+ print("finds copy pasted blocks in a set of files")

+ print()

+ print('Usage: symilar [-d|--duplicates min_duplicated_lines] \

+[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...')

+ sys.exit(status)

+def Run(argv=None):

+ """standalone command line access point"""

+ if argv is None:

+ argv = sys.argv[1:]

+ from getopt import getopt

+ s_opts = 'hdi'

+ l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',

+ 'ignore-docstrings')

+ min_lines = 4

+ ignore_comments = False

+ ignore_docstrings = False

+ ignore_imports = False

+ opts, args = getopt(argv, s_opts, l_opts)

+ for opt, val in opts:

+ if opt in ('-d', '--duplicates'):

+ min_lines = int(val)

+ elif opt in ('-h', '--help'):

+ usage()

+ elif opt in ('-i', '--ignore-comments'):

+ ignore_comments = True

+ elif opt in ('--ignore-docstrings',):

+ ignore_docstrings = True

+ elif opt in ('--ignore-imports',):

+ ignore_imports = True

+ if not args:

+ usage(1)

+ sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)

+ for filename in args:

+ with open(filename) as stream:

+ sim.append_stream(filename, stream)

+ sim.run()

+ sys.exit(0)

+if __name__ == '__main__':

+ Run()

« no previous file with comments | « third_party/pylint/pylint/checkers/raw_metrics.py ('k') | third_party/pylint/pylint/checkers/spelling.py » ('j') | no next file with comments »