OLD | NEW |
(Empty) | |
| 1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 |
| 2 # For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt |
| 3 |
| 4 """Tests for coverage.py's improved tokenizer.""" |
| 5 |
| 6 import os.path |
| 7 import re |
| 8 |
| 9 from coverage import env |
| 10 from coverage.phystokens import source_token_lines, source_encoding |
| 11 from coverage.phystokens import neuter_encoding_declaration |
| 12 from coverage.python import get_python_source |
| 13 |
| 14 from tests.coveragetest import CoverageTest |
| 15 |
| 16 |
| 17 SIMPLE = u"""\ |
| 18 # yay! |
| 19 def foo(): |
| 20 say('two = %d' % 2) |
| 21 """ |
| 22 |
| 23 MIXED_WS = u"""\ |
| 24 def hello(): |
| 25 a="Hello world!" |
| 26 \tb="indented" |
| 27 """ |
| 28 |
| 29 HERE = os.path.dirname(__file__) |
| 30 |
| 31 |
| 32 class PhysTokensTest(CoverageTest): |
| 33 """Tests for coverage.py's improved tokenizer.""" |
| 34 |
| 35 run_in_temp_dir = False |
| 36 |
| 37 def check_tokenization(self, source): |
| 38 """Tokenize `source`, then put it back together, should be the same.""" |
| 39 tokenized = "" |
| 40 for line in source_token_lines(source): |
| 41 text = "".join(t for _, t in line) |
| 42 tokenized += text + "\n" |
| 43 # source_token_lines doesn't preserve trailing spaces, so trim all that |
| 44 # before comparing. |
| 45 source = source.replace('\r\n', '\n') |
| 46 source = re.sub(r"(?m)[ \t]+$", "", source) |
| 47 tokenized = re.sub(r"(?m)[ \t]+$", "", tokenized) |
| 48 self.assertMultiLineEqual(source, tokenized) |
| 49 |
| 50 def check_file_tokenization(self, fname): |
| 51 """Use the contents of `fname` for `check_tokenization`.""" |
| 52 self.check_tokenization(get_python_source(fname)) |
| 53 |
| 54 def test_simple(self): |
| 55 self.assertEqual(list(source_token_lines(SIMPLE)), |
| 56 [ |
| 57 [('com', "# yay!")], |
| 58 [('key', 'def'), ('ws', ' '), ('nam', 'foo'), ('op', '('), |
| 59 ('op', ')'), ('op', ':')], |
| 60 [('ws', ' '), ('nam', 'say'), ('op', '('), |
| 61 ('str', "'two = %d'"), ('ws', ' '), ('op', '%'), |
| 62 ('ws', ' '), ('num', '2'), ('op', ')')] |
| 63 ]) |
| 64 self.check_tokenization(SIMPLE) |
| 65 |
| 66 def test_tab_indentation(self): |
| 67 # Mixed tabs and spaces... |
| 68 self.assertEqual(list(source_token_lines(MIXED_WS)), |
| 69 [ |
| 70 [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), |
| 71 ('op', ')'), ('op', ':')], |
| 72 [('ws', ' '), ('nam', 'a'), ('op', '='), |
| 73 ('str', '"Hello world!"')], |
| 74 [('ws', ' '), ('nam', 'b'), ('op', '='), |
| 75 ('str', '"indented"')], |
| 76 ]) |
| 77 |
| 78 def test_tokenize_real_file(self): |
| 79 # Check the tokenization of a real file (large, btw). |
| 80 real_file = os.path.join(HERE, "test_coverage.py") |
| 81 self.check_file_tokenization(real_file) |
| 82 |
| 83 def test_stress(self): |
| 84 # Check the tokenization of a stress-test file. |
| 85 stress = os.path.join(HERE, "stress_phystoken.tok") |
| 86 self.check_file_tokenization(stress) |
| 87 stress = os.path.join(HERE, "stress_phystoken_dos.tok") |
| 88 self.check_file_tokenization(stress) |
| 89 |
| 90 |
| 91 # The default encoding is different in Python 2 and Python 3. |
| 92 if env.PY3: |
| 93 DEF_ENCODING = "utf-8" |
| 94 else: |
| 95 DEF_ENCODING = "ascii" |
| 96 |
| 97 |
| 98 ENCODING_DECLARATION_SOURCES = [ |
| 99 # Various forms from http://www.python.org/dev/peps/pep-0263/ |
| 100 b"# coding=cp850\n\n", |
| 101 b"#!/usr/bin/python\n# -*- coding: cp850 -*-\n", |
| 102 b"#!/usr/bin/python\n# vim: set fileencoding=cp850:\n", |
| 103 b"# This Python file uses this encoding: cp850\n", |
| 104 b"# This file uses a different encoding:\n# coding: cp850\n", |
| 105 ] |
| 106 |
| 107 class SourceEncodingTest(CoverageTest): |
| 108 """Tests of source_encoding() for detecting encodings.""" |
| 109 |
| 110 run_in_temp_dir = False |
| 111 |
| 112 def test_detect_source_encoding(self): |
| 113 for source in ENCODING_DECLARATION_SOURCES: |
| 114 self.assertEqual( |
| 115 source_encoding(source), |
| 116 'cp850', |
| 117 "Wrong encoding in %r" % source |
| 118 ) |
| 119 |
| 120 def test_detect_source_encoding_not_in_comment(self): |
| 121 if env.PYPY and env.PY3: |
| 122 # PyPy3 gets this case wrong. Not sure what I can do about it, |
| 123 # so skip the test. |
| 124 self.skip("PyPy3 is wrong about non-comment encoding. Skip it.") |
| 125 # Should not detect anything here |
| 126 source = b'def parse(src, encoding=None):\n pass' |
| 127 self.assertEqual(source_encoding(source), DEF_ENCODING) |
| 128 |
| 129 def test_detect_source_encoding_on_second_line(self): |
| 130 # A coding declaration should be found despite a first blank line. |
| 131 source = b"\n# coding=cp850\n\n" |
| 132 self.assertEqual(source_encoding(source), 'cp850') |
| 133 |
| 134 def test_dont_detect_source_encoding_on_third_line(self): |
| 135 # A coding declaration doesn't count on the third line. |
| 136 source = b"\n\n# coding=cp850\n\n" |
| 137 self.assertEqual(source_encoding(source), DEF_ENCODING) |
| 138 |
| 139 def test_detect_source_encoding_of_empty_file(self): |
| 140 # An important edge case. |
| 141 self.assertEqual(source_encoding(b""), DEF_ENCODING) |
| 142 |
| 143 def test_bom(self): |
| 144 # A BOM means utf-8. |
| 145 source = b"\xEF\xBB\xBFtext = 'hello'\n" |
| 146 self.assertEqual(source_encoding(source), 'utf-8-sig') |
| 147 |
| 148 # But it has to be the only authority. |
| 149 source = b"\xEF\xBB\xBF# coding: cp850\n" |
| 150 with self.assertRaises(SyntaxError): |
| 151 source_encoding(source) |
| 152 |
| 153 |
| 154 class NeuterEncodingDeclarationTest(CoverageTest): |
| 155 """Tests of phystokens.neuter_encoding_declaration().""" |
| 156 |
| 157 run_in_temp_dir = False |
| 158 |
| 159 def test_neuter_encoding_declaration(self): |
| 160 for source in ENCODING_DECLARATION_SOURCES: |
| 161 neutered = neuter_encoding_declaration(source.decode("ascii")) |
| 162 neutered = neutered.encode("ascii") |
| 163 self.assertEqual( |
| 164 source_encoding(neutered), |
| 165 DEF_ENCODING, |
| 166 "Wrong encoding in %r" % neutered |
| 167 ) |
OLD | NEW |