| Index: third_party/pylint/checkers/similar.py
 | 
| diff --git a/third_party/pylint/checkers/similar.py b/third_party/pylint/checkers/similar.py
 | 
| index e5c930038c8f57eb1b09244f5b8b87ad3271f1ad..1e38ed61e80aa910c8051685cbd3c0c78d9dad93 100644
 | 
| --- a/third_party/pylint/checkers/similar.py
 | 
| +++ b/third_party/pylint/checkers/similar.py
 | 
| @@ -1,5 +1,5 @@
 | 
|  # pylint: disable=W0622
 | 
| -# Copyright (c) 2004-2013 LOGILAB S.A. (Paris, FRANCE).
 | 
| +# Copyright (c) 2004-2006 LOGILAB S.A. (Paris, FRANCE).
 | 
|  # http://www.logilab.fr/ -- mailto:contact@logilab.fr
 | 
|  #
 | 
|  # This program is free software; you can redistribute it and/or modify it under
 | 
| @@ -13,9 +13,11 @@
 | 
|  #
 | 
|  # You should have received a copy of the GNU General Public License along with
 | 
|  # this program; if not, write to the Free Software Foundation, Inc.,
 | 
| -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 | 
| +# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
|  """a similarities / code duplication command line tool and pylint checker
 | 
|  """
 | 
| +from __future__ import generators
 | 
| +
 | 
|  import sys
 | 
|  from itertools import izip
 | 
|  
 | 
| @@ -25,32 +27,23 @@ from pylint.interfaces import IRawChecker
 | 
|  from pylint.checkers import BaseChecker, table_lines_from_stats
 | 
|  
 | 
|  
 | 
| -class Similar(object):
 | 
| +class Similar:
 | 
|      """finds copy-pasted lines of code in a project"""
 | 
|  
 | 
|      def __init__(self, min_lines=4, ignore_comments=False,
 | 
| -                 ignore_docstrings=False, ignore_imports=False):
 | 
| +                 ignore_docstrings=False):
 | 
|          self.min_lines = min_lines
 | 
|          self.ignore_comments = ignore_comments
 | 
|          self.ignore_docstrings = ignore_docstrings
 | 
| -        self.ignore_imports = ignore_imports
 | 
|          self.linesets = []
 | 
|  
 | 
| -    def append_stream(self, streamid, stream, encoding=None):
 | 
| +    def append_stream(self, streamid, stream):
 | 
|          """append a file to search for similarities"""
 | 
| -        stream.seek(0) # XXX may be removed with astroid > 0.23
 | 
| -        if encoding is None:
 | 
| -            readlines = stream.readlines
 | 
| -        else:
 | 
| -            readlines = lambda: [line.decode(encoding) for line in stream]
 | 
| -        try:
 | 
| -            self.linesets.append(LineSet(streamid,
 | 
| -                                         readlines(),
 | 
| -                                         self.ignore_comments,
 | 
| -                                         self.ignore_docstrings,
 | 
| -                                         self.ignore_imports))
 | 
| -        except UnicodeDecodeError:
 | 
| -            pass
 | 
| +        stream.seek(0) # XXX may be removed with astng > 0.23
 | 
| +        self.linesets.append(LineSet(streamid,
 | 
| +                                     stream.readlines(),
 | 
| +                                     self.ignore_comments,
 | 
| +                                     self.ignore_docstrings))
 | 
|  
 | 
|      def run(self):
 | 
|          """start looking for similarities and display results on stdout"""
 | 
| @@ -63,15 +56,15 @@ class Similar(object):
 | 
|              duplicate = no_duplicates.setdefault(num, [])
 | 
|              for couples in duplicate:
 | 
|                  if (lineset1, idx1) in couples or (lineset2, idx2) in couples:
 | 
| -                    couples.add((lineset1, idx1))
 | 
| -                    couples.add((lineset2, idx2))
 | 
| +                    couples.add( (lineset1, idx1) )
 | 
| +                    couples.add( (lineset2, idx2) )
 | 
|                      break
 | 
|              else:
 | 
| -                duplicate.append(set([(lineset1, idx1), (lineset2, idx2)]))
 | 
| +                duplicate.append( set([(lineset1, idx1), (lineset2, idx2)]) )
 | 
|          sims = []
 | 
|          for num, ensembles in no_duplicates.iteritems():
 | 
|              for couples in ensembles:
 | 
| -                sims.append((num, couples))
 | 
| +                sims.append( (num, couples) )
 | 
|          sims.sort()
 | 
|          sims.reverse()
 | 
|          return sims
 | 
| @@ -87,7 +80,7 @@ class Similar(object):
 | 
|                  print "==%s:%s" % (lineset.name, idx)
 | 
|              # pylint: disable=W0631
 | 
|              for line in lineset._real_lines[idx:idx+num]:
 | 
| -                print "  ", line.rstrip()
 | 
| +                print "  ", line,
 | 
|              nb_lignes_dupliquees += num * (len(couples)-1)
 | 
|          nb_total_lignes = sum([len(lineset) for lineset in self.linesets])
 | 
|          print "TOTAL lines=%s duplicates=%s percent=%.2f" \
 | 
| @@ -104,10 +97,10 @@ class Similar(object):
 | 
|          while index1 < len(lineset1):
 | 
|              skip = 1
 | 
|              num = 0
 | 
| -            for index2 in find(lineset1[index1]):
 | 
| +            for index2 in find( lineset1[index1] ):
 | 
|                  non_blank = 0
 | 
|                  for num, ((_, line1), (_, line2)) in enumerate(
 | 
| -                        izip(lines1(index1), lines2(index2))):
 | 
| +                    izip(lines1(index1), lines2(index2))):
 | 
|                      if line1 != line2:
 | 
|                          if non_blank > min_lines:
 | 
|                              yield num, lineset1, index1, lineset2, index2
 | 
| @@ -132,11 +125,7 @@ class Similar(object):
 | 
|                  for sim in self._find_common(lineset, lineset2):
 | 
|                      yield sim
 | 
|  
 | 
| -def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
 | 
| -    """return lines with leading/trailing whitespace and any ignored code
 | 
| -    features removed
 | 
| -    """
 | 
| -
 | 
| +def stripped_lines(lines, ignore_comments, ignore_docstrings):
 | 
|      strippedlines = []
 | 
|      docstring = None
 | 
|      for line in lines:
 | 
| @@ -150,9 +139,6 @@ def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
 | 
|                  if line.endswith(docstring):
 | 
|                      docstring = None
 | 
|                  line = ''
 | 
| -        if ignore_imports:
 | 
| -            if line.startswith("import ") or line.startswith("from "):
 | 
| -                line = ''
 | 
|          if ignore_comments:
 | 
|              # XXX should use regex in checkers/format to avoid cutting
 | 
|              # at a "#" in a string
 | 
| @@ -160,16 +146,14 @@ def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
 | 
|          strippedlines.append(line)
 | 
|      return strippedlines
 | 
|  
 | 
| -
 | 
| -class LineSet(object):
 | 
| +class LineSet:
 | 
|      """Holds and indexes all the lines of a single source file"""
 | 
|      def __init__(self, name, lines, ignore_comments=False,
 | 
| -                 ignore_docstrings=False, ignore_imports=False):
 | 
| +                 ignore_docstrings=False):
 | 
|          self.name = name
 | 
|          self._real_lines = lines
 | 
|          self._stripped_lines = stripped_lines(lines, ignore_comments,
 | 
| -                                              ignore_docstrings,
 | 
| -                                              ignore_imports)
 | 
| +                                              ignore_docstrings)
 | 
|          self._index = self._mk_index()
 | 
|  
 | 
|      def __str__(self):
 | 
| @@ -210,12 +194,11 @@ class LineSet(object):
 | 
|          index = {}
 | 
|          for line_no, line in enumerate(self._stripped_lines):
 | 
|              if line:
 | 
| -                index.setdefault(line, []).append(line_no)
 | 
| +                index.setdefault(line, []).append( line_no )
 | 
|          return index
 | 
|  
 | 
|  
 | 
|  MSGS = {'R0801': ('Similar lines in %s files\n%s',
 | 
| -                  'duplicate-code',
 | 
|                    'Indicates that a set of similar lines has been detected \
 | 
|                    among multiple file. This usually means that the code should \
 | 
|                    be refactored to avoid this duplication.')}
 | 
| @@ -249,18 +232,14 @@ class SimilarChecker(BaseChecker, Similar):
 | 
|                 ('ignore-comments',
 | 
|                  {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
 | 
|                   'help': 'Ignore comments when computing similarities.'}
 | 
| -               ),
 | 
| +                ),
 | 
|                 ('ignore-docstrings',
 | 
|                  {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
 | 
|                   'help': 'Ignore docstrings when computing similarities.'}
 | 
| -               ),
 | 
| -               ('ignore-imports',
 | 
| -                {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',
 | 
| -                 'help': 'Ignore imports when computing similarities.'}
 | 
| -               ),
 | 
| -              )
 | 
| +                ),
 | 
| +               )
 | 
|      # reports
 | 
| -    reports = (('RP0801', 'Duplication', report_similarities),)
 | 
| +    reports = ( ('R0801', 'Duplication', report_similarities), ) # XXX actually a Refactoring message
 | 
|  
 | 
|      def __init__(self, linter=None):
 | 
|          BaseChecker.__init__(self, linter)
 | 
| @@ -280,8 +259,6 @@ class SimilarChecker(BaseChecker, Similar):
 | 
|              self.ignore_comments = self.config.ignore_comments
 | 
|          elif optname == 'ignore-docstrings':
 | 
|              self.ignore_docstrings = self.config.ignore_docstrings
 | 
| -        elif optname == 'ignore-imports':
 | 
| -            self.ignore_imports = self.config.ignore_imports
 | 
|  
 | 
|      def open(self):
 | 
|          """init the checkers: reset linesets and statistics information"""
 | 
| @@ -296,7 +273,7 @@ class SimilarChecker(BaseChecker, Similar):
 | 
|  
 | 
|          stream must implement the readlines method
 | 
|          """
 | 
| -        self.append_stream(self.linter.current_name, node.file_stream, node.file_encoding)
 | 
| +        self.append_stream(self.linter.current_name, node.file_stream)
 | 
|  
 | 
|      def close(self):
 | 
|          """compute and display similarities on closing (i.e. end of parsing)"""
 | 
| @@ -326,21 +303,18 @@ def usage(status=0):
 | 
|      print "finds copy pasted blocks in a set of files"
 | 
|      print
 | 
|      print 'Usage: symilar [-d|--duplicates min_duplicated_lines] \
 | 
| -[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...'
 | 
| +[-i|--ignore-comments] file1...'
 | 
|      sys.exit(status)
 | 
|  
 | 
| -def Run(argv=None):
 | 
| +def run(argv=None):
 | 
|      """standalone command line access point"""
 | 
|      if argv is None:
 | 
|          argv = sys.argv[1:]
 | 
|      from getopt import getopt
 | 
|      s_opts = 'hdi'
 | 
| -    l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',
 | 
| -              'ignore-docstrings')
 | 
| +    l_opts = ('help', 'duplicates=', 'ignore-comments')
 | 
|      min_lines = 4
 | 
|      ignore_comments = False
 | 
| -    ignore_docstrings = False
 | 
| -    ignore_imports = False
 | 
|      opts, args = getopt(argv, s_opts, l_opts)
 | 
|      for opt, val in opts:
 | 
|          if opt in ('-d', '--duplicates'):
 | 
| @@ -349,17 +323,12 @@ def Run(argv=None):
 | 
|              usage()
 | 
|          elif opt in ('-i', '--ignore-comments'):
 | 
|              ignore_comments = True
 | 
| -        elif opt in ('--ignore-docstrings',):
 | 
| -            ignore_docstrings = True
 | 
| -        elif opt in ('--ignore-imports',):
 | 
| -            ignore_imports = True
 | 
|      if not args:
 | 
|          usage(1)
 | 
| -    sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)
 | 
| +    sim = Similar(min_lines, ignore_comments)
 | 
|      for filename in args:
 | 
|          sim.append_stream(filename, open(filename))
 | 
|      sim.run()
 | 
| -    sys.exit(0)
 | 
|  
 | 
|  if __name__ == '__main__':
 | 
| -    Run()
 | 
| +    run()
 | 
| 
 |