Index: third_party/pylint/checkers/misc.py |
=================================================================== |
--- third_party/pylint/checkers/misc.py (revision 292986) |
+++ third_party/pylint/checkers/misc.py (working copy) |
@@ -10,7 +10,7 @@ |
# |
# You should have received a copy of the GNU General Public License along with |
# this program; if not, write to the Free Software Foundation, Inc., |
-# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
""" Copyright (c) 2000-2010 LOGILAB S.A. (Paris, FRANCE). |
http://www.logilab.fr/ -- mailto:contact@logilab.fr |
@@ -17,7 +17,7 @@ |
Check source code is ascii only or has an encoding declaration (PEP 263) |
""" |
-import re, sys |
+import re |
from pylint.interfaces import IRawChecker |
from pylint.checkers import BaseChecker |
@@ -25,13 +25,21 @@ |
MSGS = { |
'W0511': ('%s', |
+ 'fixme', |
'Used when a warning note as FIXME or XXX is detected.'), |
- } |
+ 'W0512': ('Cannot decode using encoding "%s", unexpected byte at position %d', |
+ 'invalid-encoded-data', |
+ 'Used when a source line cannot be decoded using the specified ' |
+ 'source file encoding.', |
+ {'maxversion': (3, 0)}), |
+} |
+ |
class EncodingChecker(BaseChecker): |
+ |
"""checks for: |
* warning notes in the code like FIXME, XXX |
- * PEP 263: source code with non ascii character but no encoding declaration |
+ * encoding issues. |
""" |
__implements__ = IRawChecker |
@@ -40,36 +48,55 @@ |
msgs = MSGS |
options = (('notes', |
- {'type' : 'csv', 'metavar' : '<comma separated values>', |
- 'default' : ('FIXME', 'XXX', 'TODO'), |
- 'help' : 'List of note tags to take in consideration, \ |
-separated by a comma.' |
- }), |
- ) |
+ {'type': 'csv', 'metavar': '<comma separated values>', |
+ 'default': ('FIXME', 'XXX', 'TODO'), |
+ 'help': ('List of note tags to take in consideration, ' |
+ 'separated by a comma.')}),) |
- def __init__(self, linter=None): |
- BaseChecker.__init__(self, linter) |
+ def _check_note(self, notes, lineno, line): |
+ # First, simply check if the notes are in the line at all. This is an |
+ # optimisation to prevent using the regular expression on every line, |
+ # but rather only on lines which may actually contain one of the notes. |
+ # This prevents a pathological problem with lines that are hundreds |
+ # of thousands of characters long. |
+ for note in self.config.notes: |
+ if note in line: |
+ break |
+ else: |
+ return |
- def process_module(self, node): |
- """inspect the source file to found encoding problem or fixmes like |
+ match = notes.search(line) |
+ if not match: |
+ return |
+ self.add_message('fixme', args=line[match.start(1):-1], line=lineno) |
+ |
+ def _check_encoding(self, lineno, line, file_encoding): |
+ try: |
+ return unicode(line, file_encoding) |
+ except UnicodeDecodeError, ex: |
+ self.add_message('invalid-encoded-data', line=lineno, |
+ args=(file_encoding, ex.args[2])) |
+ |
+ def process_module(self, module): |
+ """inspect the source file to find encoding problem or fixmes like |
notes |
""" |
- stream = node.file_stream |
- stream.seek(0) # XXX may be removed with astng > 0.23 |
- # warning notes in the code |
- notes = [] |
- for note in self.config.notes: |
- notes.append(re.compile(note)) |
- linenum = 1 |
- for line in stream.readlines(): |
- for note in notes: |
- match = note.search(line) |
- if match: |
- self.add_message('W0511', args=line[match.start():-1], |
- line=linenum) |
- break |
- linenum += 1 |
+ stream = module.file_stream |
+ stream.seek(0) # XXX may be removed with astroid > 0.23 |
+ if self.config.notes: |
+ notes = re.compile( |
+ r'.*?#\s*(%s)(:*\s*.+)' % "|".join(self.config.notes)) |
+ else: |
+ notes = None |
+ if module.file_encoding: |
+ encoding = module.file_encoding |
+ else: |
+ encoding = 'ascii' |
+ for lineno, line in enumerate(stream): |
+ line = self._check_encoding(lineno + 1, line, encoding) |
+ if line is not None and notes: |
+ self._check_note(notes, lineno + 1, line) |
def register(linter): |