third_party/pylint/checkers/similar.py - Issue 739393004: Revert "Revert "pylint: upgrade to 1.3.1""

Side by Side Diff: third_party/pylint/checkers/similar.py

Issue 739393004: Revert "Revert "pylint: upgrade to 1.3.1"" (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools/

Patch Set: Created 6 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # pylint: disable=W0622	1 # pylint: disable=W0622

2 # Copyright (c) 2004-2006 LOGILAB S.A. (Paris, FRANCE).	2 # Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE).

3 # http://www.logilab.fr/ -- mailto:contact@logilab.fr	3 # http://www.logilab.fr/ -- mailto:contact@logilab.fr

4 #	4 #

5 # This program is free software; you can redistribute it and/or modify it under	5 # This program is free software; you can redistribute it and/or modify it under

6 # the terms of the GNU General Public License as published by the Free Software	6 # the terms of the GNU General Public License as published by the Free Software

7 # Foundation; either version 2 of the License, or (at your option) any later	7 # Foundation; either version 2 of the License, or (at your option) any later

8 # version.	8 # version.

9 #	9 #

10 # This program is distributed in the hope that it will be useful, but WITHOUT	10 # This program is distributed in the hope that it will be useful, but WITHOUT

11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS	11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details	12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details

13 #	13 #

14 # You should have received a copy of the GNU General Public License along with	14 # You should have received a copy of the GNU General Public License along with

15 # this program; if not, write to the Free Software Foundation, Inc.,	15 # this program; if not, write to the Free Software Foundation, Inc.,

16 # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.	16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

17 """a similarities / code duplication command line tool and pylint checker	17 """a similarities / code duplication command line tool and pylint checker

18 """	18 """

19 from __future__ import generators

20

21 import sys	19 import sys

22 from itertools import izip	20 from itertools import izip

23	21

24 from logilab.common.ureports import Table	22 from logilab.common.ureports import Table

25	23

26 from pylint.interfaces import IRawChecker	24 from pylint.interfaces import IRawChecker

27 from pylint.checkers import BaseChecker, table_lines_from_stats	25 from pylint.checkers import BaseChecker, table_lines_from_stats

28	26

29	27

30 class Similar:	28 class Similar(object):

31 """finds copy-pasted lines of code in a project"""	29 """finds copy-pasted lines of code in a project"""

32	30

33 def __init__(self, min_lines=4, ignore_comments=False,	31 def __init__(self, min_lines=4, ignore_comments=False,

34 ignore_docstrings=False):	32 ignore_docstrings=False, ignore_imports=False):

35 self.min_lines = min_lines	33 self.min_lines = min_lines

36 self.ignore_comments = ignore_comments	34 self.ignore_comments = ignore_comments

37 self.ignore_docstrings = ignore_docstrings	35 self.ignore_docstrings = ignore_docstrings

	36 self.ignore_imports = ignore_imports

38 self.linesets = []	37 self.linesets = []

39	38

40 def append_stream(self, streamid, stream):	39 def append_stream(self, streamid, stream, encoding=None):

41 """append a file to search for similarities"""	40 """append a file to search for similarities"""

42 stream.seek(0) # XXX may be removed with astng > 0.23	41 stream.seek(0) # XXX may be removed with astroid > 0.23

43 self.linesets.append(LineSet(streamid,	42 if encoding is None:

44 stream.readlines(),	43 readlines = stream.readlines

45 self.ignore_comments,	44 else:

46 self.ignore_docstrings))	45 readlines = lambda: [line.decode(encoding) for line in stream]

	46 try:

	47 self.linesets.append(LineSet(streamid,

	48 readlines(),

	49 self.ignore_comments,

	50 self.ignore_docstrings,

	51 self.ignore_imports))

	52 except UnicodeDecodeError:

	53 pass

47	54

48 def run(self):	55 def run(self):

49 """start looking for similarities and display results on stdout"""	56 """start looking for similarities and display results on stdout"""

50 self._display_sims(self._compute_sims())	57 self._display_sims(self._compute_sims())

51	58

52 def _compute_sims(self):	59 def _compute_sims(self):

53 """compute similarities in appended files"""	60 """compute similarities in appended files"""

54 no_duplicates = {}	61 no_duplicates = {}

55 for num, lineset1, idx1, lineset2, idx2 in self._iter_sims():	62 for num, lineset1, idx1, lineset2, idx2 in self._iter_sims():

56 duplicate = no_duplicates.setdefault(num, [])	63 duplicate = no_duplicates.setdefault(num, [])

57 for couples in duplicate:	64 for couples in duplicate:

58 if (lineset1, idx1) in couples or (lineset2, idx2) in couples:	65 if (lineset1, idx1) in couples or (lineset2, idx2) in couples:

59 couples.add( (lineset1, idx1) )	66 couples.add((lineset1, idx1))

60 couples.add( (lineset2, idx2) )	67 couples.add((lineset2, idx2))

61 break	68 break

62 else:	69 else:

63 duplicate.append( set([(lineset1, idx1), (lineset2, idx2)]) )	70 duplicate.append(set([(lineset1, idx1), (lineset2, idx2)]))

64 sims = []	71 sims = []

65 for num, ensembles in no_duplicates.iteritems():	72 for num, ensembles in no_duplicates.iteritems():

66 for couples in ensembles:	73 for couples in ensembles:

67 sims.append( (num, couples) )	74 sims.append((num, couples))

68 sims.sort()	75 sims.sort()

69 sims.reverse()	76 sims.reverse()

70 return sims	77 return sims

71	78

72 def _display_sims(self, sims):	79 def _display_sims(self, sims):

73 """display computed similarities on stdout"""	80 """display computed similarities on stdout"""

74 nb_lignes_dupliquees = 0	81 nb_lignes_dupliquees = 0

75 for num, couples in sims:	82 for num, couples in sims:

76 print	83 print

77 print num, "similar lines in", len(couples), "files"	84 print num, "similar lines in", len(couples), "files"

78 couples = sorted(couples)	85 couples = sorted(couples)

79 for lineset, idx in couples:	86 for lineset, idx in couples:

80 print "==%s:%s" % (lineset.name, idx)	87 print "==%s:%s" % (lineset.name, idx)

81 # pylint: disable=W0631	88 # pylint: disable=W0631

82 for line in lineset._real_lines[idx:idx+num]:	89 for line in lineset._real_lines[idx:idx+num]:

83 print " ", line,	90 print " ", line.rstrip()

84 nb_lignes_dupliquees += num * (len(couples)-1)	91 nb_lignes_dupliquees += num * (len(couples)-1)

85 nb_total_lignes = sum([len(lineset) for lineset in self.linesets])	92 nb_total_lignes = sum([len(lineset) for lineset in self.linesets])

86 print "TOTAL lines=%s duplicates=%s percent=%.2f" \	93 print "TOTAL lines=%s duplicates=%s percent=%.2f" \

87 % (nb_total_lignes, nb_lignes_dupliquees,	94 % (nb_total_lignes, nb_lignes_dupliquees,

88 nb_lignes_dupliquees*100. / nb_total_lignes)	95 nb_lignes_dupliquees*100. / nb_total_lignes)

89	96

90 def _find_common(self, lineset1, lineset2):	97 def _find_common(self, lineset1, lineset2):

91 """find similarities in the two given linesets"""	98 """find similarities in the two given linesets"""

92 lines1 = lineset1.enumerate_stripped	99 lines1 = lineset1.enumerate_stripped

93 lines2 = lineset2.enumerate_stripped	100 lines2 = lineset2.enumerate_stripped

94 find = lineset2.find	101 find = lineset2.find

95 index1 = 0	102 index1 = 0

96 min_lines = self.min_lines	103 min_lines = self.min_lines

97 while index1 < len(lineset1):	104 while index1 < len(lineset1):

98 skip = 1	105 skip = 1

99 num = 0	106 num = 0

100 for index2 in find( lineset1[index1] ):	107 for index2 in find(lineset1[index1]):

101 non_blank = 0	108 non_blank = 0

102 for num, ((_, line1), (_, line2)) in enumerate(	109 for num, ((_, line1), (_, line2)) in enumerate(

103 izip(lines1(index1), lines2(index2))):	110 izip(lines1(index1), lines2(index2))):

104 if line1 != line2:	111 if line1 != line2:

105 if non_blank > min_lines:	112 if non_blank > min_lines:

106 yield num, lineset1, index1, lineset2, index2	113 yield num, lineset1, index1, lineset2, index2

107 skip = max(skip, num)	114 skip = max(skip, num)

108 break	115 break

109 if line1:	116 if line1:

110 non_blank += 1	117 non_blank += 1

111 else:	118 else:

112 # we may have reach the end	119 # we may have reach the end

113 num += 1	120 num += 1

114 if non_blank > min_lines:	121 if non_blank > min_lines:

115 yield num, lineset1, index1, lineset2, index2	122 yield num, lineset1, index1, lineset2, index2

116 skip = max(skip, num)	123 skip = max(skip, num)

117 index1 += skip	124 index1 += skip

118	125

119 def _iter_sims(self):	126 def _iter_sims(self):

120 """iterate on similarities among all files, by making a cartesian	127 """iterate on similarities among all files, by making a cartesian

121 product	128 product

122 """	129 """

123 for idx, lineset in enumerate(self.linesets[:-1]):	130 for idx, lineset in enumerate(self.linesets[:-1]):

124 for lineset2 in self.linesets[idx+1:]:	131 for lineset2 in self.linesets[idx+1:]:

125 for sim in self._find_common(lineset, lineset2):	132 for sim in self._find_common(lineset, lineset2):

126 yield sim	133 yield sim

127	134

128 def stripped_lines(lines, ignore_comments, ignore_docstrings):	135 def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):

	136 """return lines with leading/trailing whitespace and any ignored code

	137 features removed

	138 """

	139

129 strippedlines = []	140 strippedlines = []

130 docstring = None	141 docstring = None

131 for line in lines:	142 for line in lines:

132 line = line.strip()	143 line = line.strip()

133 if ignore_docstrings:	144 if ignore_docstrings:

134 if not docstring and \	145 if not docstring and \

135 (line.startswith('"""') or line.startswith("'''")):	146 (line.startswith('"""') or line.startswith("'''")):

136 docstring = line[:3]	147 docstring = line[:3]

137 line = line[3:]	148 line = line[3:]

138 if docstring:	149 if docstring:

139 if line.endswith(docstring):	150 if line.endswith(docstring):

140 docstring = None	151 docstring = None

141 line = ''	152 line = ''

	153 if ignore_imports:

	154 if line.startswith("import ") or line.startswith("from "):

	155 line = ''

142 if ignore_comments:	156 if ignore_comments:

143 # XXX should use regex in checkers/format to avoid cutting	157 # XXX should use regex in checkers/format to avoid cutting

144 # at a "#" in a string	158 # at a "#" in a string

145 line = line.split('#', 1)[0].strip()	159 line = line.split('#', 1)[0].strip()

146 strippedlines.append(line)	160 strippedlines.append(line)

147 return strippedlines	161 return strippedlines

148	162

149 class LineSet:	163

	164 class LineSet(object):

150 """Holds and indexes all the lines of a single source file"""	165 """Holds and indexes all the lines of a single source file"""

151 def __init__(self, name, lines, ignore_comments=False,	166 def __init__(self, name, lines, ignore_comments=False,

152 ignore_docstrings=False):	167 ignore_docstrings=False, ignore_imports=False):

153 self.name = name	168 self.name = name

154 self._real_lines = lines	169 self._real_lines = lines

155 self._stripped_lines = stripped_lines(lines, ignore_comments,	170 self._stripped_lines = stripped_lines(lines, ignore_comments,

156 ignore_docstrings)	171 ignore_docstrings,

	172 ignore_imports)

157 self._index = self._mk_index()	173 self._index = self._mk_index()

158	174

159 def __str__(self):	175 def __str__(self):

160 return '<Lineset for %s>' % self.name	176 return '<Lineset for %s>' % self.name

161	177

162 def __len__(self):	178 def __len__(self):

163 return len(self._real_lines)	179 return len(self._real_lines)

164	180

165 def __getitem__(self, index):	181 def __getitem__(self, index):

166 return self._stripped_lines[index]	182 return self._stripped_lines[index]

(...skipping 20 matching lines...) Expand all Loading...
187	203

188 def find(self, stripped_line):	204 def find(self, stripped_line):

189 """return positions of the given stripped line in this set"""	205 """return positions of the given stripped line in this set"""

190 return self._index.get(stripped_line, ())	206 return self._index.get(stripped_line, ())

191	207

192 def _mk_index(self):	208 def _mk_index(self):

193 """create the index for this set"""	209 """create the index for this set"""

194 index = {}	210 index = {}

195 for line_no, line in enumerate(self._stripped_lines):	211 for line_no, line in enumerate(self._stripped_lines):

196 if line:	212 if line:

197 index.setdefault(line, []).append( line_no )	213 index.setdefault(line, []).append(line_no)

198 return index	214 return index

199	215

200	216

201 MSGS = {'R0801': ('Similar lines in %s files\n%s',	217 MSGS = {'R0801': ('Similar lines in %s files\n%s',

	218 'duplicate-code',

202 'Indicates that a set of similar lines has been detected \	219 'Indicates that a set of similar lines has been detected \

203 among multiple file. This usually means that the code should \	220 among multiple file. This usually means that the code should \

204 be refactored to avoid this duplication.')}	221 be refactored to avoid this duplication.')}

205	222

206 def report_similarities(sect, stats, old_stats):	223 def report_similarities(sect, stats, old_stats):

207 """make a layout with some stats about duplication"""	224 """make a layout with some stats about duplication"""

208 lines = ['', 'now', 'previous', 'difference']	225 lines = ['', 'now', 'previous', 'difference']

209 lines += table_lines_from_stats(stats, old_stats,	226 lines += table_lines_from_stats(stats, old_stats,

210 ('nb_duplicated_lines',	227 ('nb_duplicated_lines',

211 'percent_duplicated_lines'))	228 'percent_duplicated_lines'))

(...skipping 13 matching lines...) Expand all Loading...
225 # messages	242 # messages

226 msgs = MSGS	243 msgs = MSGS

227 # configuration options	244 # configuration options

228 # for available dict keys/values see the optik parser 'add_option' method	245 # for available dict keys/values see the optik parser 'add_option' method

229 options = (('min-similarity-lines',	246 options = (('min-similarity-lines',

230 {'default' : 4, 'type' : "int", 'metavar' : '<int>',	247 {'default' : 4, 'type' : "int", 'metavar' : '<int>',

231 'help' : 'Minimum lines number of a similarity.'}),	248 'help' : 'Minimum lines number of a similarity.'}),

232 ('ignore-comments',	249 ('ignore-comments',

233 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',	250 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',

234 'help': 'Ignore comments when computing similarities.'}	251 'help': 'Ignore comments when computing similarities.'}

235 ),	252 ),

236 ('ignore-docstrings',	253 ('ignore-docstrings',

237 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',	254 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',

238 'help': 'Ignore docstrings when computing similarities.'}	255 'help': 'Ignore docstrings when computing similarities.'}

239 ),	256 ),

240 )	257 ('ignore-imports',

	258 {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',

	259 'help': 'Ignore imports when computing similarities.'}

	260 ),

	261 )

241 # reports	262 # reports

242 reports = ( ('R0801', 'Duplication', report_similarities), ) # XXX actually a Refactoring message	263 reports = (('RP0801', 'Duplication', report_similarities),)

243	264

244 def __init__(self, linter=None):	265 def __init__(self, linter=None):

245 BaseChecker.__init__(self, linter)	266 BaseChecker.__init__(self, linter)

246 Similar.__init__(self, min_lines=4,	267 Similar.__init__(self, min_lines=4,

247 ignore_comments=True, ignore_docstrings=True)	268 ignore_comments=True, ignore_docstrings=True)

248 self.stats = None	269 self.stats = None

249	270

250 def set_option(self, optname, value, action=None, optdict=None):	271 def set_option(self, optname, value, action=None, optdict=None):

251 """method called to set an option (registered in the options list)	272 """method called to set an option (registered in the options list)

252	273

253 overridden to report options setting to Similar	274 overridden to report options setting to Similar

254 """	275 """

255 BaseChecker.set_option(self, optname, value, action, optdict)	276 BaseChecker.set_option(self, optname, value, action, optdict)

256 if optname == 'min-similarity-lines':	277 if optname == 'min-similarity-lines':

257 self.min_lines = self.config.min_similarity_lines	278 self.min_lines = self.config.min_similarity_lines

258 elif optname == 'ignore-comments':	279 elif optname == 'ignore-comments':

259 self.ignore_comments = self.config.ignore_comments	280 self.ignore_comments = self.config.ignore_comments

260 elif optname == 'ignore-docstrings':	281 elif optname == 'ignore-docstrings':

261 self.ignore_docstrings = self.config.ignore_docstrings	282 self.ignore_docstrings = self.config.ignore_docstrings

	283 elif optname == 'ignore-imports':

	284 self.ignore_imports = self.config.ignore_imports

262	285

263 def open(self):	286 def open(self):

264 """init the checkers: reset linesets and statistics information"""	287 """init the checkers: reset linesets and statistics information"""

265 self.linesets = []	288 self.linesets = []

266 self.stats = self.linter.add_stats(nb_duplicated_lines=0,	289 self.stats = self.linter.add_stats(nb_duplicated_lines=0,

267 percent_duplicated_lines=0)	290 percent_duplicated_lines=0)

268	291

269 def process_module(self, node):	292 def process_module(self, node):

270 """process a module	293 """process a module

271	294

272 the module's content is accessible via the stream object	295 the module's content is accessible via the stream object

273	296

274 stream must implement the readlines method	297 stream must implement the readlines method

275 """	298 """

276 self.append_stream(self.linter.current_name, node.file_stream)	299 self.append_stream(self.linter.current_name, node.file_stream, node.file _encoding)

277	300

278 def close(self):	301 def close(self):

279 """compute and display similarities on closing (i.e. end of parsing)"""	302 """compute and display similarities on closing (i.e. end of parsing)"""

280 total = sum([len(lineset) for lineset in self.linesets])	303 total = sum([len(lineset) for lineset in self.linesets])

281 duplicated = 0	304 duplicated = 0

282 stats = self.stats	305 stats = self.stats

283 for num, couples in self._compute_sims():	306 for num, couples in self._compute_sims():

284 msg = []	307 msg = []

285 for lineset, idx in couples:	308 for lineset, idx in couples:

286 msg.append("==%s:%s" % (lineset.name, idx))	309 msg.append("==%s:%s" % (lineset.name, idx))

287 msg.sort()	310 msg.sort()

288 # pylint: disable=W0631	311 # pylint: disable=W0631

289 for line in lineset._real_lines[idx:idx+num]:	312 for line in lineset._real_lines[idx:idx+num]:

290 msg.append(line.rstrip())	313 msg.append(line.rstrip())

291 self.add_message('R0801', args=(len(couples), '\n'.join(msg)))	314 self.add_message('R0801', args=(len(couples), '\n'.join(msg)))

292 duplicated += num * (len(couples) - 1)	315 duplicated += num * (len(couples) - 1)

293 stats['nb_duplicated_lines'] = duplicated	316 stats['nb_duplicated_lines'] = duplicated

294 stats['percent_duplicated_lines'] = total and duplicated * 100. / total	317 stats['percent_duplicated_lines'] = total and duplicated * 100. / total

295	318

296	319

297 def register(linter):	320 def register(linter):

298 """required method to auto register this checker """	321 """required method to auto register this checker """

299 linter.register_checker(SimilarChecker(linter))	322 linter.register_checker(SimilarChecker(linter))

300	323

301 def usage(status=0):	324 def usage(status=0):

302 """display command line usage information"""	325 """display command line usage information"""

303 print "finds copy pasted blocks in a set of files"	326 print "finds copy pasted blocks in a set of files"

304 print	327 print

305 print 'Usage: symilar [-d\|--duplicates min_duplicated_lines] \	328 print 'Usage: symilar [-d\|--duplicates min_duplicated_lines] \

306 [-i\|--ignore-comments] file1...'	329 [-i\|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...'

307 sys.exit(status)	330 sys.exit(status)

308	331

309 def run(argv=None):	332 def Run(argv=None):

310 """standalone command line access point"""	333 """standalone command line access point"""

311 if argv is None:	334 if argv is None:

312 argv = sys.argv[1:]	335 argv = sys.argv[1:]

313 from getopt import getopt	336 from getopt import getopt

314 s_opts = 'hdi'	337 s_opts = 'hdi'

315 l_opts = ('help', 'duplicates=', 'ignore-comments')	338 l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',

	339 'ignore-docstrings')

316 min_lines = 4	340 min_lines = 4

317 ignore_comments = False	341 ignore_comments = False

	342 ignore_docstrings = False

	343 ignore_imports = False

318 opts, args = getopt(argv, s_opts, l_opts)	344 opts, args = getopt(argv, s_opts, l_opts)

319 for opt, val in opts:	345 for opt, val in opts:

320 if opt in ('-d', '--duplicates'):	346 if opt in ('-d', '--duplicates'):

321 min_lines = int(val)	347 min_lines = int(val)

322 elif opt in ('-h', '--help'):	348 elif opt in ('-h', '--help'):

323 usage()	349 usage()

324 elif opt in ('-i', '--ignore-comments'):	350 elif opt in ('-i', '--ignore-comments'):

325 ignore_comments = True	351 ignore_comments = True

	352 elif opt in ('--ignore-docstrings',):

	353 ignore_docstrings = True

	354 elif opt in ('--ignore-imports',):

	355 ignore_imports = True

326 if not args:	356 if not args:

327 usage(1)	357 usage(1)

328 sim = Similar(min_lines, ignore_comments)	358 sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)

329 for filename in args:	359 for filename in args:

330 sim.append_stream(filename, open(filename))	360 sim.append_stream(filename, open(filename))

331 sim.run()	361 sim.run()

	362 sys.exit(0)

332	363

333 if __name__ == '__main__':	364 if __name__ == '__main__':

334 run()	365 Run()

OLD	NEW

« no previous file with comments | « third_party/pylint/checkers/raw_metrics.py ('k') | third_party/pylint/checkers/stdlib.py » ('j') | no next file with comments »