| Index: third_party/pylint/checkers/similar.py
|
| ===================================================================
|
| --- third_party/pylint/checkers/similar.py (revision 293047)
|
| +++ third_party/pylint/checkers/similar.py (working copy)
|
| @@ -1,5 +1,5 @@
|
| # pylint: disable=W0622
|
| -# Copyright (c) 2004-2006 LOGILAB S.A. (Paris, FRANCE).
|
| +# Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE).
|
| # http://www.logilab.fr/ -- mailto:contact@logilab.fr
|
| #
|
| # This program is free software; you can redistribute it and/or modify it under
|
| @@ -13,11 +13,9 @@
|
| #
|
| # You should have received a copy of the GNU General Public License along with
|
| # this program; if not, write to the Free Software Foundation, Inc.,
|
| -# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
| +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
| """a similarities / code duplication command line tool and pylint checker
|
| """
|
| -from __future__ import generators
|
| -
|
| import sys
|
| from itertools import izip
|
|
|
| @@ -27,23 +25,32 @@
|
| from pylint.checkers import BaseChecker, table_lines_from_stats
|
|
|
|
|
| -class Similar:
|
| +class Similar(object):
|
| """finds copy-pasted lines of code in a project"""
|
|
|
| def __init__(self, min_lines=4, ignore_comments=False,
|
| - ignore_docstrings=False):
|
| + ignore_docstrings=False, ignore_imports=False):
|
| self.min_lines = min_lines
|
| self.ignore_comments = ignore_comments
|
| self.ignore_docstrings = ignore_docstrings
|
| + self.ignore_imports = ignore_imports
|
| self.linesets = []
|
|
|
| - def append_stream(self, streamid, stream):
|
| + def append_stream(self, streamid, stream, encoding=None):
|
| """append a file to search for similarities"""
|
| - stream.seek(0) # XXX may be removed with astng > 0.23
|
| - self.linesets.append(LineSet(streamid,
|
| - stream.readlines(),
|
| - self.ignore_comments,
|
| - self.ignore_docstrings))
|
| + stream.seek(0) # XXX may be removed with astroid > 0.23
|
| + if encoding is None:
|
| + readlines = stream.readlines
|
| + else:
|
| + readlines = lambda: [line.decode(encoding) for line in stream]
|
| + try:
|
| + self.linesets.append(LineSet(streamid,
|
| + readlines(),
|
| + self.ignore_comments,
|
| + self.ignore_docstrings,
|
| + self.ignore_imports))
|
| + except UnicodeDecodeError:
|
| + pass
|
|
|
| def run(self):
|
| """start looking for similarities and display results on stdout"""
|
| @@ -56,15 +63,15 @@
|
| duplicate = no_duplicates.setdefault(num, [])
|
| for couples in duplicate:
|
| if (lineset1, idx1) in couples or (lineset2, idx2) in couples:
|
| - couples.add( (lineset1, idx1) )
|
| - couples.add( (lineset2, idx2) )
|
| + couples.add((lineset1, idx1))
|
| + couples.add((lineset2, idx2))
|
| break
|
| else:
|
| - duplicate.append( set([(lineset1, idx1), (lineset2, idx2)]) )
|
| + duplicate.append(set([(lineset1, idx1), (lineset2, idx2)]))
|
| sims = []
|
| for num, ensembles in no_duplicates.iteritems():
|
| for couples in ensembles:
|
| - sims.append( (num, couples) )
|
| + sims.append((num, couples))
|
| sims.sort()
|
| sims.reverse()
|
| return sims
|
| @@ -80,7 +87,7 @@
|
| print "==%s:%s" % (lineset.name, idx)
|
| # pylint: disable=W0631
|
| for line in lineset._real_lines[idx:idx+num]:
|
| - print " ", line,
|
| + print " ", line.rstrip()
|
| nb_lignes_dupliquees += num * (len(couples)-1)
|
| nb_total_lignes = sum([len(lineset) for lineset in self.linesets])
|
| print "TOTAL lines=%s duplicates=%s percent=%.2f" \
|
| @@ -97,10 +104,10 @@
|
| while index1 < len(lineset1):
|
| skip = 1
|
| num = 0
|
| - for index2 in find( lineset1[index1] ):
|
| + for index2 in find(lineset1[index1]):
|
| non_blank = 0
|
| for num, ((_, line1), (_, line2)) in enumerate(
|
| - izip(lines1(index1), lines2(index2))):
|
| + izip(lines1(index1), lines2(index2))):
|
| if line1 != line2:
|
| if non_blank > min_lines:
|
| yield num, lineset1, index1, lineset2, index2
|
| @@ -125,7 +132,11 @@
|
| for sim in self._find_common(lineset, lineset2):
|
| yield sim
|
|
|
| -def stripped_lines(lines, ignore_comments, ignore_docstrings):
|
| +def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
|
| + """return lines with leading/trailing whitespace and any ignored code
|
| + features removed
|
| + """
|
| +
|
| strippedlines = []
|
| docstring = None
|
| for line in lines:
|
| @@ -139,6 +150,9 @@
|
| if line.endswith(docstring):
|
| docstring = None
|
| line = ''
|
| + if ignore_imports:
|
| + if line.startswith("import ") or line.startswith("from "):
|
| + line = ''
|
| if ignore_comments:
|
| # XXX should use regex in checkers/format to avoid cutting
|
| # at a "#" in a string
|
| @@ -146,14 +160,16 @@
|
| strippedlines.append(line)
|
| return strippedlines
|
|
|
| -class LineSet:
|
| +
|
| +class LineSet(object):
|
| """Holds and indexes all the lines of a single source file"""
|
| def __init__(self, name, lines, ignore_comments=False,
|
| - ignore_docstrings=False):
|
| + ignore_docstrings=False, ignore_imports=False):
|
| self.name = name
|
| self._real_lines = lines
|
| self._stripped_lines = stripped_lines(lines, ignore_comments,
|
| - ignore_docstrings)
|
| + ignore_docstrings,
|
| + ignore_imports)
|
| self._index = self._mk_index()
|
|
|
| def __str__(self):
|
| @@ -194,11 +210,12 @@
|
| index = {}
|
| for line_no, line in enumerate(self._stripped_lines):
|
| if line:
|
| - index.setdefault(line, []).append( line_no )
|
| + index.setdefault(line, []).append(line_no)
|
| return index
|
|
|
|
|
| MSGS = {'R0801': ('Similar lines in %s files\n%s',
|
| + 'duplicate-code',
|
| 'Indicates that a set of similar lines has been detected \
|
| among multiple file. This usually means that the code should \
|
| be refactored to avoid this duplication.')}
|
| @@ -232,14 +249,18 @@
|
| ('ignore-comments',
|
| {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
|
| 'help': 'Ignore comments when computing similarities.'}
|
| - ),
|
| + ),
|
| ('ignore-docstrings',
|
| {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
|
| 'help': 'Ignore docstrings when computing similarities.'}
|
| - ),
|
| - )
|
| + ),
|
| + ('ignore-imports',
|
| + {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',
|
| + 'help': 'Ignore imports when computing similarities.'}
|
| + ),
|
| + )
|
| # reports
|
| - reports = ( ('R0801', 'Duplication', report_similarities), ) # XXX actually a Refactoring message
|
| + reports = (('RP0801', 'Duplication', report_similarities),)
|
|
|
| def __init__(self, linter=None):
|
| BaseChecker.__init__(self, linter)
|
| @@ -259,6 +280,8 @@
|
| self.ignore_comments = self.config.ignore_comments
|
| elif optname == 'ignore-docstrings':
|
| self.ignore_docstrings = self.config.ignore_docstrings
|
| + elif optname == 'ignore-imports':
|
| + self.ignore_imports = self.config.ignore_imports
|
|
|
| def open(self):
|
| """init the checkers: reset linesets and statistics information"""
|
| @@ -273,7 +296,7 @@
|
|
|
| stream must implement the readlines method
|
| """
|
| - self.append_stream(self.linter.current_name, node.file_stream)
|
| + self.append_stream(self.linter.current_name, node.file_stream, node.file_encoding)
|
|
|
| def close(self):
|
| """compute and display similarities on closing (i.e. end of parsing)"""
|
| @@ -303,18 +326,21 @@
|
| print "finds copy pasted blocks in a set of files"
|
| print
|
| print 'Usage: symilar [-d|--duplicates min_duplicated_lines] \
|
| -[-i|--ignore-comments] file1...'
|
| +[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...'
|
| sys.exit(status)
|
|
|
| -def run(argv=None):
|
| +def Run(argv=None):
|
| """standalone command line access point"""
|
| if argv is None:
|
| argv = sys.argv[1:]
|
| from getopt import getopt
|
| s_opts = 'hdi'
|
| - l_opts = ('help', 'duplicates=', 'ignore-comments')
|
| + l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',
|
| + 'ignore-docstrings')
|
| min_lines = 4
|
| ignore_comments = False
|
| + ignore_docstrings = False
|
| + ignore_imports = False
|
| opts, args = getopt(argv, s_opts, l_opts)
|
| for opt, val in opts:
|
| if opt in ('-d', '--duplicates'):
|
| @@ -323,12 +349,17 @@
|
| usage()
|
| elif opt in ('-i', '--ignore-comments'):
|
| ignore_comments = True
|
| + elif opt in ('--ignore-docstrings',):
|
| + ignore_docstrings = True
|
| + elif opt in ('--ignore-imports',):
|
| + ignore_imports = True
|
| if not args:
|
| usage(1)
|
| - sim = Similar(min_lines, ignore_comments)
|
| + sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)
|
| for filename in args:
|
| sim.append_stream(filename, open(filename))
|
| sim.run()
|
| + sys.exit(0)
|
|
|
| if __name__ == '__main__':
|
| - run()
|
| + Run()
|
|
|