Index: third_party/pylint/checkers/similar.py
|
===================================================================
|
--- third_party/pylint/checkers/similar.py (revision 293047)
|
+++ third_party/pylint/checkers/similar.py (working copy)
|
@@ -1,5 +1,5 @@
|
# pylint: disable=W0622
|
-# Copyright (c) 2004-2006 LOGILAB S.A. (Paris, FRANCE).
|
+# Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE).
|
# http://www.logilab.fr/ -- mailto:contact@logilab.fr
|
#
|
# This program is free software; you can redistribute it and/or modify it under
|
@@ -13,11 +13,9 @@
|
#
|
# You should have received a copy of the GNU General Public License along with
|
# this program; if not, write to the Free Software Foundation, Inc.,
|
-# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
"""a similarities / code duplication command line tool and pylint checker
|
"""
|
-from __future__ import generators
|
-
|
import sys
|
from itertools import izip
|
|
@@ -27,23 +25,32 @@
|
from pylint.checkers import BaseChecker, table_lines_from_stats
|
|
|
-class Similar:
|
+class Similar(object):
|
"""finds copy-pasted lines of code in a project"""
|
|
def __init__(self, min_lines=4, ignore_comments=False,
|
- ignore_docstrings=False):
|
+ ignore_docstrings=False, ignore_imports=False):
|
self.min_lines = min_lines
|
self.ignore_comments = ignore_comments
|
self.ignore_docstrings = ignore_docstrings
|
+ self.ignore_imports = ignore_imports
|
self.linesets = []
|
|
- def append_stream(self, streamid, stream):
|
+ def append_stream(self, streamid, stream, encoding=None):
|
"""append a file to search for similarities"""
|
- stream.seek(0) # XXX may be removed with astng > 0.23
|
- self.linesets.append(LineSet(streamid,
|
- stream.readlines(),
|
- self.ignore_comments,
|
- self.ignore_docstrings))
|
+ stream.seek(0) # XXX may be removed with astroid > 0.23
|
+ if encoding is None:
|
+ readlines = stream.readlines
|
+ else:
|
+ readlines = lambda: [line.decode(encoding) for line in stream]
|
+ try:
|
+ self.linesets.append(LineSet(streamid,
|
+ readlines(),
|
+ self.ignore_comments,
|
+ self.ignore_docstrings,
|
+ self.ignore_imports))
|
+ except UnicodeDecodeError:
|
+ pass
|
|
def run(self):
|
"""start looking for similarities and display results on stdout"""
|
@@ -56,15 +63,15 @@
|
duplicate = no_duplicates.setdefault(num, [])
|
for couples in duplicate:
|
if (lineset1, idx1) in couples or (lineset2, idx2) in couples:
|
- couples.add( (lineset1, idx1) )
|
- couples.add( (lineset2, idx2) )
|
+ couples.add((lineset1, idx1))
|
+ couples.add((lineset2, idx2))
|
break
|
else:
|
- duplicate.append( set([(lineset1, idx1), (lineset2, idx2)]) )
|
+ duplicate.append(set([(lineset1, idx1), (lineset2, idx2)]))
|
sims = []
|
for num, ensembles in no_duplicates.iteritems():
|
for couples in ensembles:
|
- sims.append( (num, couples) )
|
+ sims.append((num, couples))
|
sims.sort()
|
sims.reverse()
|
return sims
|
@@ -80,7 +87,7 @@
|
print "==%s:%s" % (lineset.name, idx)
|
# pylint: disable=W0631
|
for line in lineset._real_lines[idx:idx+num]:
|
- print " ", line,
|
+ print " ", line.rstrip()
|
nb_lignes_dupliquees += num * (len(couples)-1)
|
nb_total_lignes = sum([len(lineset) for lineset in self.linesets])
|
print "TOTAL lines=%s duplicates=%s percent=%.2f" \
|
@@ -97,10 +104,10 @@
|
while index1 < len(lineset1):
|
skip = 1
|
num = 0
|
- for index2 in find( lineset1[index1] ):
|
+ for index2 in find(lineset1[index1]):
|
non_blank = 0
|
for num, ((_, line1), (_, line2)) in enumerate(
|
- izip(lines1(index1), lines2(index2))):
|
+ izip(lines1(index1), lines2(index2))):
|
if line1 != line2:
|
if non_blank > min_lines:
|
yield num, lineset1, index1, lineset2, index2
|
@@ -125,7 +132,11 @@
|
for sim in self._find_common(lineset, lineset2):
|
yield sim
|
|
-def stripped_lines(lines, ignore_comments, ignore_docstrings):
|
+def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
|
+ """return lines with leading/trailing whitespace and any ignored code
|
+ features removed
|
+ """
|
+
|
strippedlines = []
|
docstring = None
|
for line in lines:
|
@@ -139,6 +150,9 @@
|
if line.endswith(docstring):
|
docstring = None
|
line = ''
|
+ if ignore_imports:
|
+ if line.startswith("import ") or line.startswith("from "):
|
+ line = ''
|
if ignore_comments:
|
# XXX should use regex in checkers/format to avoid cutting
|
# at a "#" in a string
|
@@ -146,14 +160,16 @@
|
strippedlines.append(line)
|
return strippedlines
|
|
-class LineSet:
|
+
|
+class LineSet(object):
|
"""Holds and indexes all the lines of a single source file"""
|
def __init__(self, name, lines, ignore_comments=False,
|
- ignore_docstrings=False):
|
+ ignore_docstrings=False, ignore_imports=False):
|
self.name = name
|
self._real_lines = lines
|
self._stripped_lines = stripped_lines(lines, ignore_comments,
|
- ignore_docstrings)
|
+ ignore_docstrings,
|
+ ignore_imports)
|
self._index = self._mk_index()
|
|
def __str__(self):
|
@@ -194,11 +210,12 @@
|
index = {}
|
for line_no, line in enumerate(self._stripped_lines):
|
if line:
|
- index.setdefault(line, []).append( line_no )
|
+ index.setdefault(line, []).append(line_no)
|
return index
|
|
|
MSGS = {'R0801': ('Similar lines in %s files\n%s',
|
+ 'duplicate-code',
|
'Indicates that a set of similar lines has been detected \
|
among multiple file. This usually means that the code should \
|
be refactored to avoid this duplication.')}
|
@@ -232,14 +249,18 @@
|
('ignore-comments',
|
{'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
|
'help': 'Ignore comments when computing similarities.'}
|
- ),
|
+ ),
|
('ignore-docstrings',
|
{'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
|
'help': 'Ignore docstrings when computing similarities.'}
|
- ),
|
- )
|
+ ),
|
+ ('ignore-imports',
|
+ {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',
|
+ 'help': 'Ignore imports when computing similarities.'}
|
+ ),
|
+ )
|
# reports
|
- reports = ( ('R0801', 'Duplication', report_similarities), ) # XXX actually a Refactoring message
|
+ reports = (('RP0801', 'Duplication', report_similarities),)
|
|
def __init__(self, linter=None):
|
BaseChecker.__init__(self, linter)
|
@@ -259,6 +280,8 @@
|
self.ignore_comments = self.config.ignore_comments
|
elif optname == 'ignore-docstrings':
|
self.ignore_docstrings = self.config.ignore_docstrings
|
+ elif optname == 'ignore-imports':
|
+ self.ignore_imports = self.config.ignore_imports
|
|
def open(self):
|
"""init the checkers: reset linesets and statistics information"""
|
@@ -273,7 +296,7 @@
|
|
stream must implement the readlines method
|
"""
|
- self.append_stream(self.linter.current_name, node.file_stream)
|
+ self.append_stream(self.linter.current_name, node.file_stream, node.file_encoding)
|
|
def close(self):
|
"""compute and display similarities on closing (i.e. end of parsing)"""
|
@@ -303,18 +326,21 @@
|
print "finds copy pasted blocks in a set of files"
|
print
|
print 'Usage: symilar [-d|--duplicates min_duplicated_lines] \
|
-[-i|--ignore-comments] file1...'
|
+[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...'
|
sys.exit(status)
|
|
-def run(argv=None):
|
+def Run(argv=None):
|
"""standalone command line access point"""
|
if argv is None:
|
argv = sys.argv[1:]
|
from getopt import getopt
|
s_opts = 'hdi'
|
- l_opts = ('help', 'duplicates=', 'ignore-comments')
|
+ l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',
|
+ 'ignore-docstrings')
|
min_lines = 4
|
ignore_comments = False
|
+ ignore_docstrings = False
|
+ ignore_imports = False
|
opts, args = getopt(argv, s_opts, l_opts)
|
for opt, val in opts:
|
if opt in ('-d', '--duplicates'):
|
@@ -323,12 +349,17 @@
|
usage()
|
elif opt in ('-i', '--ignore-comments'):
|
ignore_comments = True
|
+ elif opt in ('--ignore-docstrings',):
|
+ ignore_docstrings = True
|
+ elif opt in ('--ignore-imports',):
|
+ ignore_imports = True
|
if not args:
|
usage(1)
|
- sim = Similar(min_lines, ignore_comments)
|
+ sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)
|
for filename in args:
|
sim.append_stream(filename, open(filename))
|
sim.run()
|
+ sys.exit(0)
|
|
if __name__ == '__main__':
|
- run()
|
+ Run()
|
|