| Index: tools/telemetry/third_party/coverage/tests/test_phystokens.py
|
| diff --git a/tools/telemetry/third_party/coverage/tests/test_phystokens.py b/tools/telemetry/third_party/coverage/tests/test_phystokens.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..b4a106fdcda057605d2e6622b492db15998c272f
|
| --- /dev/null
|
| +++ b/tools/telemetry/third_party/coverage/tests/test_phystokens.py
|
| @@ -0,0 +1,167 @@
|
| +# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
|
| +# For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
|
| +
|
| +"""Tests for coverage.py's improved tokenizer."""
|
| +
|
| +import os.path
|
| +import re
|
| +
|
| +from coverage import env
|
| +from coverage.phystokens import source_token_lines, source_encoding
|
| +from coverage.phystokens import neuter_encoding_declaration
|
| +from coverage.python import get_python_source
|
| +
|
| +from tests.coveragetest import CoverageTest
|
| +
|
| +
|
| +SIMPLE = u"""\
|
| +# yay!
|
| +def foo():
|
| + say('two = %d' % 2)
|
| +"""
|
| +
|
| +MIXED_WS = u"""\
|
| +def hello():
|
| + a="Hello world!"
|
| +\tb="indented"
|
| +"""
|
| +
|
| +HERE = os.path.dirname(__file__)
|
| +
|
| +
|
| +class PhysTokensTest(CoverageTest):
|
| + """Tests for coverage.py's improved tokenizer."""
|
| +
|
| + run_in_temp_dir = False
|
| +
|
| + def check_tokenization(self, source):
|
| + """Tokenize `source`, then put it back together, should be the same."""
|
| + tokenized = ""
|
| + for line in source_token_lines(source):
|
| + text = "".join(t for _, t in line)
|
| + tokenized += text + "\n"
|
| + # source_token_lines doesn't preserve trailing spaces, so trim all that
|
| + # before comparing.
|
| + source = source.replace('\r\n', '\n')
|
| + source = re.sub(r"(?m)[ \t]+$", "", source)
|
| + tokenized = re.sub(r"(?m)[ \t]+$", "", tokenized)
|
| + self.assertMultiLineEqual(source, tokenized)
|
| +
|
| + def check_file_tokenization(self, fname):
|
| + """Use the contents of `fname` for `check_tokenization`."""
|
| + self.check_tokenization(get_python_source(fname))
|
| +
|
| + def test_simple(self):
|
| + self.assertEqual(list(source_token_lines(SIMPLE)),
|
| + [
|
| + [('com', "# yay!")],
|
| + [('key', 'def'), ('ws', ' '), ('nam', 'foo'), ('op', '('),
|
| + ('op', ')'), ('op', ':')],
|
| + [('ws', ' '), ('nam', 'say'), ('op', '('),
|
| + ('str', "'two = %d'"), ('ws', ' '), ('op', '%'),
|
| + ('ws', ' '), ('num', '2'), ('op', ')')]
|
| + ])
|
| + self.check_tokenization(SIMPLE)
|
| +
|
| + def test_tab_indentation(self):
|
| + # Mixed tabs and spaces...
|
| + self.assertEqual(list(source_token_lines(MIXED_WS)),
|
| + [
|
| + [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('),
|
| + ('op', ')'), ('op', ':')],
|
| + [('ws', ' '), ('nam', 'a'), ('op', '='),
|
| + ('str', '"Hello world!"')],
|
| + [('ws', ' '), ('nam', 'b'), ('op', '='),
|
| + ('str', '"indented"')],
|
| + ])
|
| +
|
| + def test_tokenize_real_file(self):
|
| + # Check the tokenization of a real file (large, btw).
|
| + real_file = os.path.join(HERE, "test_coverage.py")
|
| + self.check_file_tokenization(real_file)
|
| +
|
| + def test_stress(self):
|
| + # Check the tokenization of a stress-test file.
|
| + stress = os.path.join(HERE, "stress_phystoken.tok")
|
| + self.check_file_tokenization(stress)
|
| + stress = os.path.join(HERE, "stress_phystoken_dos.tok")
|
| + self.check_file_tokenization(stress)
|
| +
|
| +
|
| +# The default encoding is different in Python 2 and Python 3.
|
| +if env.PY3:
|
| + DEF_ENCODING = "utf-8"
|
| +else:
|
| + DEF_ENCODING = "ascii"
|
| +
|
| +
|
| +ENCODING_DECLARATION_SOURCES = [
|
| + # Various forms from http://www.python.org/dev/peps/pep-0263/
|
| + b"# coding=cp850\n\n",
|
| + b"#!/usr/bin/python\n# -*- coding: cp850 -*-\n",
|
| + b"#!/usr/bin/python\n# vim: set fileencoding=cp850:\n",
|
| + b"# This Python file uses this encoding: cp850\n",
|
| + b"# This file uses a different encoding:\n# coding: cp850\n",
|
| +]
|
| +
|
| +class SourceEncodingTest(CoverageTest):
|
| + """Tests of source_encoding() for detecting encodings."""
|
| +
|
| + run_in_temp_dir = False
|
| +
|
| + def test_detect_source_encoding(self):
|
| + for source in ENCODING_DECLARATION_SOURCES:
|
| + self.assertEqual(
|
| + source_encoding(source),
|
| + 'cp850',
|
| + "Wrong encoding in %r" % source
|
| + )
|
| +
|
| + def test_detect_source_encoding_not_in_comment(self):
|
| + if env.PYPY and env.PY3:
|
| + # PyPy3 gets this case wrong. Not sure what I can do about it,
|
| + # so skip the test.
|
| + self.skip("PyPy3 is wrong about non-comment encoding. Skip it.")
|
| + # Should not detect anything here
|
| + source = b'def parse(src, encoding=None):\n pass'
|
| + self.assertEqual(source_encoding(source), DEF_ENCODING)
|
| +
|
| + def test_detect_source_encoding_on_second_line(self):
|
| + # A coding declaration should be found despite a first blank line.
|
| + source = b"\n# coding=cp850\n\n"
|
| + self.assertEqual(source_encoding(source), 'cp850')
|
| +
|
| + def test_dont_detect_source_encoding_on_third_line(self):
|
| + # A coding declaration doesn't count on the third line.
|
| + source = b"\n\n# coding=cp850\n\n"
|
| + self.assertEqual(source_encoding(source), DEF_ENCODING)
|
| +
|
| + def test_detect_source_encoding_of_empty_file(self):
|
| + # An important edge case.
|
| + self.assertEqual(source_encoding(b""), DEF_ENCODING)
|
| +
|
| + def test_bom(self):
|
| + # A BOM means utf-8.
|
| + source = b"\xEF\xBB\xBFtext = 'hello'\n"
|
| + self.assertEqual(source_encoding(source), 'utf-8-sig')
|
| +
|
| + # But it has to be the only authority.
|
| + source = b"\xEF\xBB\xBF# coding: cp850\n"
|
| + with self.assertRaises(SyntaxError):
|
| + source_encoding(source)
|
| +
|
| +
|
| +class NeuterEncodingDeclarationTest(CoverageTest):
|
| + """Tests of phystokens.neuter_encoding_declaration()."""
|
| +
|
| + run_in_temp_dir = False
|
| +
|
| + def test_neuter_encoding_declaration(self):
|
| + for source in ENCODING_DECLARATION_SOURCES:
|
| + neutered = neuter_encoding_declaration(source.decode("ascii"))
|
| + neutered = neutered.encode("ascii")
|
| + self.assertEqual(
|
| + source_encoding(neutered),
|
| + DEF_ENCODING,
|
| + "Wrong encoding in %r" % neutered
|
| + )
|
|
|