Chromium Code Reviews| Index: tools/traffic_annotation/auditor/traffic_annotation_file_filter.py |
| diff --git a/tools/traffic_annotation/auditor/traffic_annotation_file_filter.py b/tools/traffic_annotation/auditor/traffic_annotation_file_filter.py |
| new file mode 100755 |
| index 0000000000000000000000000000000000000000..177f8923fc8d3fbd06dd01e4f9ba660c2039fcbe |
| --- /dev/null |
| +++ b/tools/traffic_annotation/auditor/traffic_annotation_file_filter.py |
| @@ -0,0 +1,158 @@ |
| +#!/usr/bin/env python |
| +# Copyright 2017 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +import codecs |
| +import os.path |
| +import posixpath |
| +import re |
| +import subprocess |
| +import sys |
| + |
| + |
| +def _NormalizePath(path): |
| + """Returns a path normalized to how we write DEPS rules and compare paths.""" |
| + return os.path.normcase(path).replace(os.path.sep, posixpath.sep) |
| + |
| + |
| +def _GitSourceDirectories(base_directory): |
| + """Returns the set of normalized paths to subdirectories containing sources |
| + managed by git. |
| + This code is mostly copied from 'buildtools/checkdeps/builddeps.py'.""" |
| + base_dir_norm = _NormalizePath(base_directory) |
| + git_source_directories = set([base_dir_norm]) |
| + |
| + git_cmd = 'git.bat' if os.name == 'nt' else 'git' |
| + git_ls_files_cmd = [git_cmd, 'ls-files'] |
| + # FIXME: Use a context manager in Python 3.2+ |
| + popen = subprocess.Popen(git_ls_files_cmd, |
| + stdout=subprocess.PIPE, |
| + cwd=base_directory) |
| + try: |
| + try: |
| + for line in popen.stdout: |
| + dir_path = os.path.join(base_directory, os.path.dirname(line)) |
| + dir_path_norm = _NormalizePath(dir_path) |
| + # Add the directory as well as all the parent directories, |
| + # stopping once we reach an already-listed directory. |
| + while dir_path_norm not in git_source_directories: |
| + git_source_directories.add(dir_path_norm) |
| + dir_path_norm = posixpath.dirname(dir_path_norm) |
| + finally: |
| + popen.stdout.close() |
| + finally: |
| + popen.wait() |
| + |
| + return git_source_directories |
| + |
| + |
| +class TrafficAnnotationFileFilter(): |
| + KEYWORDS = [ |
| + 'network_traffic_annotation', |
| + 'network_traffic_annotation_test_helper', |
| + 'NetworkTrafficAnnotationTag', |
| + 'PartialNetworkTrafficAnnotationTag', |
| + 'DefineNetworkTrafficAnnotation', |
| + 'DefinePartialNetworkTrafficAnnotation', |
| + 'CompleteNetworkTrafficAnnotation', |
| + 'BranchedCompleteNetworkTrafficAnnotation', |
| + 'NO_TRAFFIC_ANNOTATION_YET', |
| + 'NO_PARTIAL_TRAFFIC_ANNOTATION_YET', |
| + 'MISSING_TRAFFIC_ANNOTATION', |
| + 'TRAFFIC_ANNOTATION_FOR_TESTS', |
| + 'PARTIAL_TRAFFIC_ANNOTATION_FOR_TESTS', |
| + 'SSLClientSocket', # SSLClientSocket:: |
| + 'TCPClientSocket', # TCPClientSocket:: |
| + 'UDPClientSocket', # UDPClientSocket:: |
| + 'URLFetcher::Create', # This one is used with class as it's too generic. |
| + 'CreateDatagramClientSocket', # ClientSocketFactory:: |
| + 'CreateSSLClientSocket', # ClientSocketFactory:: |
| + 'CreateTransportClientSocket', # ClientSocketFactory:: |
| + 'CreateRequest', # URLRequestContext:: |
| + ] |
| + |
| + def __init__(self, |
| + base_directory=None, |
| + skip_tests=True): |
| + """Creates a new TrafficAnnotationFileFilter. |
| + |
| + Args: |
| + base_directory: str Local or absolute path to root of checkout, e.g. |
| + C:\chr\src. If this value is not present, it would be estimated |
| + relative to the path of this file (by going two directories up). |
| + skip_tests: bool Flag stating if test files should be returned or not. |
| + """ |
| + base_directory = (base_directory or |
| + os.path.join(os.path.dirname(__file__), |
| + os.path.pardir, os.path.pardir)) |
| + self.base_directory = os.path.abspath(base_directory) |
| + self._git_source_directories = None |
| + |
| + if os.path.exists(os.path.join(base_directory, '.git')): |
| + self.is_git = True |
| + elif os.path.exists(os.path.join(base_directory, '.svn')): |
| + self.is_git = False |
| + else: |
| + raise '%s is not a repository root' % base_directory |
| + |
| + assert(all(not re.match('.*[^A-Za-z:_].*', keyword) |
|
msramek
2017/05/31 14:55:24
nit: I'd suggest avoiding the double negative - th
Ramin Halavati
2017/05/31 19:19:13
Done.
|
| + for keyword in self.KEYWORDS)) |
| + self.content_matcher = re.compile('.*(' + '|'.join(self.KEYWORDS) + ').*') |
| + self.file_name_matcher = re.compile( |
| + '(?!.*?test)^.*(\.cc|\.mm)$' if skip_tests else |
| + '.*(\.cc|\.mm)$') |
| + |
| + |
| + def FileHasRelevantContent(self, filename): |
| + with open(filename, 'r') as in_file: |
| + for line in in_file: |
| + if self.content_matcher.match(line): |
| + return True |
| + return False |
| + |
| + |
| + def GetFilteredFilesList(self, dir_name): |
| + """Returns the list of relevant files in given directory. |
| + |
| + Args: |
| + dir_name: str The directory to search for relevant files. All child |
| + directories would also be searched. |
| + |
| + Returns: |
| + list of str List of relevant files. |
| + """ |
| + if self.is_git and self._git_source_directories is None: |
| + self._git_source_directories = _GitSourceDirectories(self.base_directory) |
| + |
| + # Collect a list of all files and directories to check. |
| + files_to_check = [] |
| + if dir_name and not os.path.isabs(dir_name): |
| + dir_name = os.path.join(self.base_directory, dir_name) |
| + dirs_to_check = [dir_name or self.base_directory] |
| + while dirs_to_check: |
| + current_dir = dirs_to_check.pop() |
| + |
| + # Check that this directory is part of the source repository. This |
| + # prevents us from descending into third-party code or directories |
| + # generated by the build system. |
| + if self.is_git: |
| + if _NormalizePath(current_dir) not in self._git_source_directories: |
| + continue |
| + elif not os.path.exists(os.path.join(current_dir, '.svn')): |
| + continue |
| + |
| + current_dir_contents = sorted(os.listdir(current_dir)) |
| + file_names = [] |
| + sub_dirs = [] |
| + for file_name in current_dir_contents: |
| + full_name = os.path.join(current_dir, file_name) |
| + if os.path.isdir(full_name): |
| + sub_dirs.append(full_name) |
| + else: |
| + if self.file_name_matcher.match(full_name) and \ |
| + self.FileHasRelevantContent(full_name): |
| + file_names.append(full_name) |
| + dirs_to_check.extend(reversed(sub_dirs)) |
| + files_to_check += file_names |
| + return files_to_check |