third_party/pylint/checkers/similar.py - Issue 741023003: Revert "pylint: upgrade to 1.3.1"

Side by Side Diff: third_party/pylint/checkers/similar.py

Issue 741023003: Revert "pylint: upgrade to 1.3.1" (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # pylint: disable=W0622	1 # pylint: disable=W0622

2 # Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE).	2 # Copyright (c) 2004-2006 LOGILAB S.A. (Paris, FRANCE).

3 # http://www.logilab.fr/ -- mailto:contact@logilab.fr	3 # http://www.logilab.fr/ -- mailto:contact@logilab.fr

4 #	4 #

5 # This program is free software; you can redistribute it and/or modify it under	5 # This program is free software; you can redistribute it and/or modify it under

6 # the terms of the GNU General Public License as published by the Free Software	6 # the terms of the GNU General Public License as published by the Free Software

7 # Foundation; either version 2 of the License, or (at your option) any later	7 # Foundation; either version 2 of the License, or (at your option) any later

8 # version.	8 # version.

9 #	9 #

10 # This program is distributed in the hope that it will be useful, but WITHOUT	10 # This program is distributed in the hope that it will be useful, but WITHOUT

11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS	11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details	12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details

13 #	13 #

14 # You should have received a copy of the GNU General Public License along with	14 # You should have received a copy of the GNU General Public License along with

15 # this program; if not, write to the Free Software Foundation, Inc.,	15 # this program; if not, write to the Free Software Foundation, Inc.,

16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.	16 # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

17 """a similarities / code duplication command line tool and pylint checker	17 """a similarities / code duplication command line tool and pylint checker

18 """	18 """

	19 from __future__ import generators

	20

19 import sys	21 import sys

20 from itertools import izip	22 from itertools import izip

21	23

22 from logilab.common.ureports import Table	24 from logilab.common.ureports import Table

23	25

24 from pylint.interfaces import IRawChecker	26 from pylint.interfaces import IRawChecker

25 from pylint.checkers import BaseChecker, table_lines_from_stats	27 from pylint.checkers import BaseChecker, table_lines_from_stats

26	28

27	29

28 class Similar(object):	30 class Similar:

29 """finds copy-pasted lines of code in a project"""	31 """finds copy-pasted lines of code in a project"""

30	32

31 def __init__(self, min_lines=4, ignore_comments=False,	33 def __init__(self, min_lines=4, ignore_comments=False,

32 ignore_docstrings=False, ignore_imports=False):	34 ignore_docstrings=False):

33 self.min_lines = min_lines	35 self.min_lines = min_lines

34 self.ignore_comments = ignore_comments	36 self.ignore_comments = ignore_comments

35 self.ignore_docstrings = ignore_docstrings	37 self.ignore_docstrings = ignore_docstrings

36 self.ignore_imports = ignore_imports

37 self.linesets = []	38 self.linesets = []

38	39

39 def append_stream(self, streamid, stream, encoding=None):	40 def append_stream(self, streamid, stream):

40 """append a file to search for similarities"""	41 """append a file to search for similarities"""

41 stream.seek(0) # XXX may be removed with astroid > 0.23	42 stream.seek(0) # XXX may be removed with astng > 0.23

42 if encoding is None:	43 self.linesets.append(LineSet(streamid,

43 readlines = stream.readlines	44 stream.readlines(),

44 else:	45 self.ignore_comments,

45 readlines = lambda: [line.decode(encoding) for line in stream]	46 self.ignore_docstrings))

46 try:

47 self.linesets.append(LineSet(streamid,

48 readlines(),

49 self.ignore_comments,

50 self.ignore_docstrings,

51 self.ignore_imports))

52 except UnicodeDecodeError:

53 pass

54	47

55 def run(self):	48 def run(self):

56 """start looking for similarities and display results on stdout"""	49 """start looking for similarities and display results on stdout"""

57 self._display_sims(self._compute_sims())	50 self._display_sims(self._compute_sims())

58	51

59 def _compute_sims(self):	52 def _compute_sims(self):

60 """compute similarities in appended files"""	53 """compute similarities in appended files"""

61 no_duplicates = {}	54 no_duplicates = {}

62 for num, lineset1, idx1, lineset2, idx2 in self._iter_sims():	55 for num, lineset1, idx1, lineset2, idx2 in self._iter_sims():

63 duplicate = no_duplicates.setdefault(num, [])	56 duplicate = no_duplicates.setdefault(num, [])

64 for couples in duplicate:	57 for couples in duplicate:

65 if (lineset1, idx1) in couples or (lineset2, idx2) in couples:	58 if (lineset1, idx1) in couples or (lineset2, idx2) in couples:

66 couples.add((lineset1, idx1))	59 couples.add( (lineset1, idx1) )

67 couples.add((lineset2, idx2))	60 couples.add( (lineset2, idx2) )

68 break	61 break

69 else:	62 else:

70 duplicate.append(set([(lineset1, idx1), (lineset2, idx2)]))	63 duplicate.append( set([(lineset1, idx1), (lineset2, idx2)]) )

71 sims = []	64 sims = []

72 for num, ensembles in no_duplicates.iteritems():	65 for num, ensembles in no_duplicates.iteritems():

73 for couples in ensembles:	66 for couples in ensembles:

74 sims.append((num, couples))	67 sims.append( (num, couples) )

75 sims.sort()	68 sims.sort()

76 sims.reverse()	69 sims.reverse()

77 return sims	70 return sims

78	71

79 def _display_sims(self, sims):	72 def _display_sims(self, sims):

80 """display computed similarities on stdout"""	73 """display computed similarities on stdout"""

81 nb_lignes_dupliquees = 0	74 nb_lignes_dupliquees = 0

82 for num, couples in sims:	75 for num, couples in sims:

83 print	76 print

84 print num, "similar lines in", len(couples), "files"	77 print num, "similar lines in", len(couples), "files"

85 couples = sorted(couples)	78 couples = sorted(couples)

86 for lineset, idx in couples:	79 for lineset, idx in couples:

87 print "==%s:%s" % (lineset.name, idx)	80 print "==%s:%s" % (lineset.name, idx)

88 # pylint: disable=W0631	81 # pylint: disable=W0631

89 for line in lineset._real_lines[idx:idx+num]:	82 for line in lineset._real_lines[idx:idx+num]:

90 print " ", line.rstrip()	83 print " ", line,

91 nb_lignes_dupliquees += num * (len(couples)-1)	84 nb_lignes_dupliquees += num * (len(couples)-1)

92 nb_total_lignes = sum([len(lineset) for lineset in self.linesets])	85 nb_total_lignes = sum([len(lineset) for lineset in self.linesets])

93 print "TOTAL lines=%s duplicates=%s percent=%.2f" \	86 print "TOTAL lines=%s duplicates=%s percent=%.2f" \

94 % (nb_total_lignes, nb_lignes_dupliquees,	87 % (nb_total_lignes, nb_lignes_dupliquees,

95 nb_lignes_dupliquees*100. / nb_total_lignes)	88 nb_lignes_dupliquees*100. / nb_total_lignes)

96	89

97 def _find_common(self, lineset1, lineset2):	90 def _find_common(self, lineset1, lineset2):

98 """find similarities in the two given linesets"""	91 """find similarities in the two given linesets"""

99 lines1 = lineset1.enumerate_stripped	92 lines1 = lineset1.enumerate_stripped

100 lines2 = lineset2.enumerate_stripped	93 lines2 = lineset2.enumerate_stripped

101 find = lineset2.find	94 find = lineset2.find

102 index1 = 0	95 index1 = 0

103 min_lines = self.min_lines	96 min_lines = self.min_lines

104 while index1 < len(lineset1):	97 while index1 < len(lineset1):

105 skip = 1	98 skip = 1

106 num = 0	99 num = 0

107 for index2 in find(lineset1[index1]):	100 for index2 in find( lineset1[index1] ):

108 non_blank = 0	101 non_blank = 0

109 for num, ((_, line1), (_, line2)) in enumerate(	102 for num, ((_, line1), (_, line2)) in enumerate(

110 izip(lines1(index1), lines2(index2))):	103 izip(lines1(index1), lines2(index2))):

111 if line1 != line2:	104 if line1 != line2:

112 if non_blank > min_lines:	105 if non_blank > min_lines:

113 yield num, lineset1, index1, lineset2, index2	106 yield num, lineset1, index1, lineset2, index2

114 skip = max(skip, num)	107 skip = max(skip, num)

115 break	108 break

116 if line1:	109 if line1:

117 non_blank += 1	110 non_blank += 1

118 else:	111 else:

119 # we may have reach the end	112 # we may have reach the end

120 num += 1	113 num += 1

121 if non_blank > min_lines:	114 if non_blank > min_lines:

122 yield num, lineset1, index1, lineset2, index2	115 yield num, lineset1, index1, lineset2, index2

123 skip = max(skip, num)	116 skip = max(skip, num)

124 index1 += skip	117 index1 += skip

125	118

126 def _iter_sims(self):	119 def _iter_sims(self):

127 """iterate on similarities among all files, by making a cartesian	120 """iterate on similarities among all files, by making a cartesian

128 product	121 product

129 """	122 """

130 for idx, lineset in enumerate(self.linesets[:-1]):	123 for idx, lineset in enumerate(self.linesets[:-1]):

131 for lineset2 in self.linesets[idx+1:]:	124 for lineset2 in self.linesets[idx+1:]:

132 for sim in self._find_common(lineset, lineset2):	125 for sim in self._find_common(lineset, lineset2):

133 yield sim	126 yield sim

134	127

135 def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):	128 def stripped_lines(lines, ignore_comments, ignore_docstrings):

136 """return lines with leading/trailing whitespace and any ignored code

137 features removed

138 """

139

140 strippedlines = []	129 strippedlines = []

141 docstring = None	130 docstring = None

142 for line in lines:	131 for line in lines:

143 line = line.strip()	132 line = line.strip()

144 if ignore_docstrings:	133 if ignore_docstrings:

145 if not docstring and \	134 if not docstring and \

146 (line.startswith('"""') or line.startswith("'''")):	135 (line.startswith('"""') or line.startswith("'''")):

147 docstring = line[:3]	136 docstring = line[:3]

148 line = line[3:]	137 line = line[3:]

149 if docstring:	138 if docstring:

150 if line.endswith(docstring):	139 if line.endswith(docstring):

151 docstring = None	140 docstring = None

152 line = ''	141 line = ''

153 if ignore_imports:

154 if line.startswith("import ") or line.startswith("from "):

155 line = ''

156 if ignore_comments:	142 if ignore_comments:

157 # XXX should use regex in checkers/format to avoid cutting	143 # XXX should use regex in checkers/format to avoid cutting

158 # at a "#" in a string	144 # at a "#" in a string

159 line = line.split('#', 1)[0].strip()	145 line = line.split('#', 1)[0].strip()

160 strippedlines.append(line)	146 strippedlines.append(line)

161 return strippedlines	147 return strippedlines

162	148

163	149 class LineSet:

164 class LineSet(object):

165 """Holds and indexes all the lines of a single source file"""	150 """Holds and indexes all the lines of a single source file"""

166 def __init__(self, name, lines, ignore_comments=False,	151 def __init__(self, name, lines, ignore_comments=False,

167 ignore_docstrings=False, ignore_imports=False):	152 ignore_docstrings=False):

168 self.name = name	153 self.name = name

169 self._real_lines = lines	154 self._real_lines = lines

170 self._stripped_lines = stripped_lines(lines, ignore_comments,	155 self._stripped_lines = stripped_lines(lines, ignore_comments,

171 ignore_docstrings,	156 ignore_docstrings)

172 ignore_imports)

173 self._index = self._mk_index()	157 self._index = self._mk_index()

174	158

175 def __str__(self):	159 def __str__(self):

176 return '<Lineset for %s>' % self.name	160 return '<Lineset for %s>' % self.name

177	161

178 def __len__(self):	162 def __len__(self):

179 return len(self._real_lines)	163 return len(self._real_lines)

180	164

181 def __getitem__(self, index):	165 def __getitem__(self, index):

182 return self._stripped_lines[index]	166 return self._stripped_lines[index]

(...skipping 20 matching lines...) Expand all Loading...
203	187

204 def find(self, stripped_line):	188 def find(self, stripped_line):

205 """return positions of the given stripped line in this set"""	189 """return positions of the given stripped line in this set"""

206 return self._index.get(stripped_line, ())	190 return self._index.get(stripped_line, ())

207	191

208 def _mk_index(self):	192 def _mk_index(self):

209 """create the index for this set"""	193 """create the index for this set"""

210 index = {}	194 index = {}

211 for line_no, line in enumerate(self._stripped_lines):	195 for line_no, line in enumerate(self._stripped_lines):

212 if line:	196 if line:

213 index.setdefault(line, []).append(line_no)	197 index.setdefault(line, []).append( line_no )

214 return index	198 return index

215	199

216	200

217 MSGS = {'R0801': ('Similar lines in %s files\n%s',	201 MSGS = {'R0801': ('Similar lines in %s files\n%s',

218 'duplicate-code',

219 'Indicates that a set of similar lines has been detected \	202 'Indicates that a set of similar lines has been detected \

220 among multiple file. This usually means that the code should \	203 among multiple file. This usually means that the code should \

221 be refactored to avoid this duplication.')}	204 be refactored to avoid this duplication.')}

222	205

223 def report_similarities(sect, stats, old_stats):	206 def report_similarities(sect, stats, old_stats):

224 """make a layout with some stats about duplication"""	207 """make a layout with some stats about duplication"""

225 lines = ['', 'now', 'previous', 'difference']	208 lines = ['', 'now', 'previous', 'difference']

226 lines += table_lines_from_stats(stats, old_stats,	209 lines += table_lines_from_stats(stats, old_stats,

227 ('nb_duplicated_lines',	210 ('nb_duplicated_lines',

228 'percent_duplicated_lines'))	211 'percent_duplicated_lines'))

(...skipping 13 matching lines...) Expand all Loading...
242 # messages	225 # messages

243 msgs = MSGS	226 msgs = MSGS

244 # configuration options	227 # configuration options

245 # for available dict keys/values see the optik parser 'add_option' method	228 # for available dict keys/values see the optik parser 'add_option' method

246 options = (('min-similarity-lines',	229 options = (('min-similarity-lines',

247 {'default' : 4, 'type' : "int", 'metavar' : '<int>',	230 {'default' : 4, 'type' : "int", 'metavar' : '<int>',

248 'help' : 'Minimum lines number of a similarity.'}),	231 'help' : 'Minimum lines number of a similarity.'}),

249 ('ignore-comments',	232 ('ignore-comments',

250 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',	233 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',

251 'help': 'Ignore comments when computing similarities.'}	234 'help': 'Ignore comments when computing similarities.'}

252 ),	235 ),

253 ('ignore-docstrings',	236 ('ignore-docstrings',

254 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',	237 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',

255 'help': 'Ignore docstrings when computing similarities.'}	238 'help': 'Ignore docstrings when computing similarities.'}

256 ),	239 ),

257 ('ignore-imports',	240 )

258 {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',

259 'help': 'Ignore imports when computing similarities.'}

260 ),

261 )

262 # reports	241 # reports

263 reports = (('RP0801', 'Duplication', report_similarities),)	242 reports = ( ('R0801', 'Duplication', report_similarities), ) # XXX actually a Refactoring message

264	243

265 def __init__(self, linter=None):	244 def __init__(self, linter=None):

266 BaseChecker.__init__(self, linter)	245 BaseChecker.__init__(self, linter)

267 Similar.__init__(self, min_lines=4,	246 Similar.__init__(self, min_lines=4,

268 ignore_comments=True, ignore_docstrings=True)	247 ignore_comments=True, ignore_docstrings=True)

269 self.stats = None	248 self.stats = None

270	249

271 def set_option(self, optname, value, action=None, optdict=None):	250 def set_option(self, optname, value, action=None, optdict=None):

272 """method called to set an option (registered in the options list)	251 """method called to set an option (registered in the options list)

273	252

274 overridden to report options setting to Similar	253 overridden to report options setting to Similar

275 """	254 """

276 BaseChecker.set_option(self, optname, value, action, optdict)	255 BaseChecker.set_option(self, optname, value, action, optdict)

277 if optname == 'min-similarity-lines':	256 if optname == 'min-similarity-lines':

278 self.min_lines = self.config.min_similarity_lines	257 self.min_lines = self.config.min_similarity_lines

279 elif optname == 'ignore-comments':	258 elif optname == 'ignore-comments':

280 self.ignore_comments = self.config.ignore_comments	259 self.ignore_comments = self.config.ignore_comments

281 elif optname == 'ignore-docstrings':	260 elif optname == 'ignore-docstrings':

282 self.ignore_docstrings = self.config.ignore_docstrings	261 self.ignore_docstrings = self.config.ignore_docstrings

283 elif optname == 'ignore-imports':

284 self.ignore_imports = self.config.ignore_imports

285	262

286 def open(self):	263 def open(self):

287 """init the checkers: reset linesets and statistics information"""	264 """init the checkers: reset linesets and statistics information"""

288 self.linesets = []	265 self.linesets = []

289 self.stats = self.linter.add_stats(nb_duplicated_lines=0,	266 self.stats = self.linter.add_stats(nb_duplicated_lines=0,

290 percent_duplicated_lines=0)	267 percent_duplicated_lines=0)

291	268

292 def process_module(self, node):	269 def process_module(self, node):

293 """process a module	270 """process a module

294	271

295 the module's content is accessible via the stream object	272 the module's content is accessible via the stream object

296	273

297 stream must implement the readlines method	274 stream must implement the readlines method

298 """	275 """

299 self.append_stream(self.linter.current_name, node.file_stream, node.file _encoding)	276 self.append_stream(self.linter.current_name, node.file_stream)

300	277

301 def close(self):	278 def close(self):

302 """compute and display similarities on closing (i.e. end of parsing)"""	279 """compute and display similarities on closing (i.e. end of parsing)"""

303 total = sum([len(lineset) for lineset in self.linesets])	280 total = sum([len(lineset) for lineset in self.linesets])

304 duplicated = 0	281 duplicated = 0

305 stats = self.stats	282 stats = self.stats

306 for num, couples in self._compute_sims():	283 for num, couples in self._compute_sims():

307 msg = []	284 msg = []

308 for lineset, idx in couples:	285 for lineset, idx in couples:

309 msg.append("==%s:%s" % (lineset.name, idx))	286 msg.append("==%s:%s" % (lineset.name, idx))

310 msg.sort()	287 msg.sort()

311 # pylint: disable=W0631	288 # pylint: disable=W0631

312 for line in lineset._real_lines[idx:idx+num]:	289 for line in lineset._real_lines[idx:idx+num]:

313 msg.append(line.rstrip())	290 msg.append(line.rstrip())

314 self.add_message('R0801', args=(len(couples), '\n'.join(msg)))	291 self.add_message('R0801', args=(len(couples), '\n'.join(msg)))

315 duplicated += num * (len(couples) - 1)	292 duplicated += num * (len(couples) - 1)

316 stats['nb_duplicated_lines'] = duplicated	293 stats['nb_duplicated_lines'] = duplicated

317 stats['percent_duplicated_lines'] = total and duplicated * 100. / total	294 stats['percent_duplicated_lines'] = total and duplicated * 100. / total

318	295

319	296

320 def register(linter):	297 def register(linter):

321 """required method to auto register this checker """	298 """required method to auto register this checker """

322 linter.register_checker(SimilarChecker(linter))	299 linter.register_checker(SimilarChecker(linter))

323	300

324 def usage(status=0):	301 def usage(status=0):

325 """display command line usage information"""	302 """display command line usage information"""

326 print "finds copy pasted blocks in a set of files"	303 print "finds copy pasted blocks in a set of files"

327 print	304 print

328 print 'Usage: symilar [-d\|--duplicates min_duplicated_lines] \	305 print 'Usage: symilar [-d\|--duplicates min_duplicated_lines] \

329 [-i\|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...'	306 [-i\|--ignore-comments] file1...'

330 sys.exit(status)	307 sys.exit(status)

331	308

332 def Run(argv=None):	309 def run(argv=None):

333 """standalone command line access point"""	310 """standalone command line access point"""

334 if argv is None:	311 if argv is None:

335 argv = sys.argv[1:]	312 argv = sys.argv[1:]

336 from getopt import getopt	313 from getopt import getopt

337 s_opts = 'hdi'	314 s_opts = 'hdi'

338 l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',	315 l_opts = ('help', 'duplicates=', 'ignore-comments')

339 'ignore-docstrings')

340 min_lines = 4	316 min_lines = 4

341 ignore_comments = False	317 ignore_comments = False

342 ignore_docstrings = False

343 ignore_imports = False

344 opts, args = getopt(argv, s_opts, l_opts)	318 opts, args = getopt(argv, s_opts, l_opts)

345 for opt, val in opts:	319 for opt, val in opts:

346 if opt in ('-d', '--duplicates'):	320 if opt in ('-d', '--duplicates'):

347 min_lines = int(val)	321 min_lines = int(val)

348 elif opt in ('-h', '--help'):	322 elif opt in ('-h', '--help'):

349 usage()	323 usage()

350 elif opt in ('-i', '--ignore-comments'):	324 elif opt in ('-i', '--ignore-comments'):

351 ignore_comments = True	325 ignore_comments = True

352 elif opt in ('--ignore-docstrings',):

353 ignore_docstrings = True

354 elif opt in ('--ignore-imports',):

355 ignore_imports = True

356 if not args:	326 if not args:

357 usage(1)	327 usage(1)

358 sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)	328 sim = Similar(min_lines, ignore_comments)

359 for filename in args:	329 for filename in args:

360 sim.append_stream(filename, open(filename))	330 sim.append_stream(filename, open(filename))

361 sim.run()	331 sim.run()

362 sys.exit(0)

363	332

364 if __name__ == '__main__':	333 if __name__ == '__main__':

365 Run()	334 run()

OLD	NEW

« no previous file with comments | « third_party/pylint/checkers/raw_metrics.py ('k') | third_party/pylint/checkers/stdlib.py » ('j') | no next file with comments »