Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: tools/telemetry/third_party/coverage/tests/test_phystokens.py

Issue 1366913004: Add coverage Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2 # For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
3
4 """Tests for coverage.py's improved tokenizer."""
5
6 import os.path
7 import re
8
9 from coverage import env
10 from coverage.phystokens import source_token_lines, source_encoding
11 from coverage.phystokens import neuter_encoding_declaration
12 from coverage.python import get_python_source
13
14 from tests.coveragetest import CoverageTest
15
16
17 SIMPLE = u"""\
18 # yay!
19 def foo():
20 say('two = %d' % 2)
21 """
22
23 MIXED_WS = u"""\
24 def hello():
25 a="Hello world!"
26 \tb="indented"
27 """
28
29 HERE = os.path.dirname(__file__)
30
31
32 class PhysTokensTest(CoverageTest):
33 """Tests for coverage.py's improved tokenizer."""
34
35 run_in_temp_dir = False
36
37 def check_tokenization(self, source):
38 """Tokenize `source`, then put it back together, should be the same."""
39 tokenized = ""
40 for line in source_token_lines(source):
41 text = "".join(t for _, t in line)
42 tokenized += text + "\n"
43 # source_token_lines doesn't preserve trailing spaces, so trim all that
44 # before comparing.
45 source = source.replace('\r\n', '\n')
46 source = re.sub(r"(?m)[ \t]+$", "", source)
47 tokenized = re.sub(r"(?m)[ \t]+$", "", tokenized)
48 self.assertMultiLineEqual(source, tokenized)
49
50 def check_file_tokenization(self, fname):
51 """Use the contents of `fname` for `check_tokenization`."""
52 self.check_tokenization(get_python_source(fname))
53
54 def test_simple(self):
55 self.assertEqual(list(source_token_lines(SIMPLE)),
56 [
57 [('com', "# yay!")],
58 [('key', 'def'), ('ws', ' '), ('nam', 'foo'), ('op', '('),
59 ('op', ')'), ('op', ':')],
60 [('ws', ' '), ('nam', 'say'), ('op', '('),
61 ('str', "'two = %d'"), ('ws', ' '), ('op', '%'),
62 ('ws', ' '), ('num', '2'), ('op', ')')]
63 ])
64 self.check_tokenization(SIMPLE)
65
66 def test_tab_indentation(self):
67 # Mixed tabs and spaces...
68 self.assertEqual(list(source_token_lines(MIXED_WS)),
69 [
70 [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('),
71 ('op', ')'), ('op', ':')],
72 [('ws', ' '), ('nam', 'a'), ('op', '='),
73 ('str', '"Hello world!"')],
74 [('ws', ' '), ('nam', 'b'), ('op', '='),
75 ('str', '"indented"')],
76 ])
77
78 def test_tokenize_real_file(self):
79 # Check the tokenization of a real file (large, btw).
80 real_file = os.path.join(HERE, "test_coverage.py")
81 self.check_file_tokenization(real_file)
82
83 def test_stress(self):
84 # Check the tokenization of a stress-test file.
85 stress = os.path.join(HERE, "stress_phystoken.tok")
86 self.check_file_tokenization(stress)
87 stress = os.path.join(HERE, "stress_phystoken_dos.tok")
88 self.check_file_tokenization(stress)
89
90
91 # The default encoding is different in Python 2 and Python 3.
92 if env.PY3:
93 DEF_ENCODING = "utf-8"
94 else:
95 DEF_ENCODING = "ascii"
96
97
98 ENCODING_DECLARATION_SOURCES = [
99 # Various forms from http://www.python.org/dev/peps/pep-0263/
100 b"# coding=cp850\n\n",
101 b"#!/usr/bin/python\n# -*- coding: cp850 -*-\n",
102 b"#!/usr/bin/python\n# vim: set fileencoding=cp850:\n",
103 b"# This Python file uses this encoding: cp850\n",
104 b"# This file uses a different encoding:\n# coding: cp850\n",
105 ]
106
107 class SourceEncodingTest(CoverageTest):
108 """Tests of source_encoding() for detecting encodings."""
109
110 run_in_temp_dir = False
111
112 def test_detect_source_encoding(self):
113 for source in ENCODING_DECLARATION_SOURCES:
114 self.assertEqual(
115 source_encoding(source),
116 'cp850',
117 "Wrong encoding in %r" % source
118 )
119
120 def test_detect_source_encoding_not_in_comment(self):
121 if env.PYPY and env.PY3:
122 # PyPy3 gets this case wrong. Not sure what I can do about it,
123 # so skip the test.
124 self.skip("PyPy3 is wrong about non-comment encoding. Skip it.")
125 # Should not detect anything here
126 source = b'def parse(src, encoding=None):\n pass'
127 self.assertEqual(source_encoding(source), DEF_ENCODING)
128
129 def test_detect_source_encoding_on_second_line(self):
130 # A coding declaration should be found despite a first blank line.
131 source = b"\n# coding=cp850\n\n"
132 self.assertEqual(source_encoding(source), 'cp850')
133
134 def test_dont_detect_source_encoding_on_third_line(self):
135 # A coding declaration doesn't count on the third line.
136 source = b"\n\n# coding=cp850\n\n"
137 self.assertEqual(source_encoding(source), DEF_ENCODING)
138
139 def test_detect_source_encoding_of_empty_file(self):
140 # An important edge case.
141 self.assertEqual(source_encoding(b""), DEF_ENCODING)
142
143 def test_bom(self):
144 # A BOM means utf-8.
145 source = b"\xEF\xBB\xBFtext = 'hello'\n"
146 self.assertEqual(source_encoding(source), 'utf-8-sig')
147
148 # But it has to be the only authority.
149 source = b"\xEF\xBB\xBF# coding: cp850\n"
150 with self.assertRaises(SyntaxError):
151 source_encoding(source)
152
153
154 class NeuterEncodingDeclarationTest(CoverageTest):
155 """Tests of phystokens.neuter_encoding_declaration()."""
156
157 run_in_temp_dir = False
158
159 def test_neuter_encoding_declaration(self):
160 for source in ENCODING_DECLARATION_SOURCES:
161 neutered = neuter_encoding_declaration(source.decode("ascii"))
162 neutered = neutered.encode("ascii")
163 self.assertEqual(
164 source_encoding(neutered),
165 DEF_ENCODING,
166 "Wrong encoding in %r" % neutered
167 )
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698