Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1804)

Unified Diff: tools/traffic_annotation/auditor/annotation_relevent_filter.py

Issue 2905263002: Filter added to prune files before applying network annotation extractor. (Closed)
Patch Set: Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/traffic_annotation/auditor/annotation_relevent_filter.py
diff --git a/tools/traffic_annotation/auditor/annotation_relevent_filter.py b/tools/traffic_annotation/auditor/annotation_relevent_filter.py
new file mode 100755
index 0000000000000000000000000000000000000000..6186dd9f83c983ea4f8764b5f3b676b00e983ed0
--- /dev/null
+++ b/tools/traffic_annotation/auditor/annotation_relevent_filter.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+# Copyright (c) 2017 The Chromium Authors. All rights reserved.
msramek 2017/05/31 11:31:36 nit: We don't use (c) anymore.
Ramin Halavati 2017/05/31 12:28:25 Done.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import codecs
+import os.path
+import posixpath
+import re
+import subprocess
+import sys
+
+
+def NormalizePath(path):
msramek 2017/05/31 11:31:35 nit: make this also private?
Ramin Halavati 2017/05/31 12:28:25 Done.
+ """Returns a path normalized to how we write DEPS rules and compare paths."""
+ return os.path.normcase(path).replace(os.path.sep, posixpath.sep)
+
+
+def _GitSourceDirectories(base_directory):
+ """Returns set of normalized paths to subdirectories containing sources
msramek 2017/05/31 11:31:35 nit: the set
Ramin Halavati 2017/05/31 12:28:24 Done.
+ managed by git."""
+ base_dir_norm = NormalizePath(base_directory)
+ git_source_directories = set([base_dir_norm])
+
+ git_cmd = 'git.bat' if os.name == 'nt' else 'git'
msramek 2017/05/31 11:31:35 Where does this come from? Can you add a comment w
Ramin Halavati 2017/05/31 12:28:24 Comment added in docstring.
msramek 2017/05/31 14:55:24 I hoped to be able to point to some documentation
Ramin Halavati 2017/05/31 19:19:13 Function Replaced!
+ git_ls_files_cmd = [git_cmd, 'ls-files']
+ popen = subprocess.Popen(git_ls_files_cmd,
+ stdout=subprocess.PIPE,
+ cwd=base_directory)
+ try:
+ try:
+ for line in popen.stdout:
+ dir_path = os.path.join(base_directory, os.path.dirname(line))
+ dir_path_norm = NormalizePath(dir_path)
+ # Add the directory as well as all the parent directories,
+ # stopping once we reach an already-listed directory.
+ while dir_path_norm not in git_source_directories:
+ git_source_directories.add(dir_path_norm)
+ dir_path_norm = posixpath.dirname(dir_path_norm)
+ finally:
+ popen.stdout.close()
+ finally:
+ popen.wait()
+
+ return git_source_directories
+
+
+class NetworkTrafficAnnotationFileFilter():
+ KEYWORDS = [
+ 'network_traffic_annotation.h',
+ 'network_traffic_annotation_test_helper.h',
+ 'NetworkTrafficAnnotationTag',
+ 'PartialNetworkTrafficAnnotationTag',
+ 'DefineNetworkTrafficAnnotation',
+ 'DefinePartialNetworkTrafficAnnotation',
+ 'CompleteNetworkTrafficAnnotation',
+ 'BranchedCompleteNetworkTrafficAnnotation',
+ 'NO_TRAFFIC_ANNOTATION_YET',
+ 'NO_PARTIAL_TRAFFIC_ANNOTATION_YET',
+ 'MISSING_TRAFFIC_ANNOTATION',
+ 'TRAFFIC_ANNOTATION_FOR_TESTS',
+ 'PARTIAL_TRAFFIC_ANNOTATION_FOR_TESTS',
+ 'SSLClientSocket', # SSLClientSocket::
msramek 2017/05/31 11:31:35 style: two spaces before comments
Ramin Halavati 2017/05/31 12:28:25 Done.
+ 'TCPClientSocket', # TCPClientSocket::
+ 'UDPClientSocket', # UDPClientSocket::
+ 'URLFetcher::Create', # <----
+ 'CreateDatagramClientSocket',# ClientSocketFactory::
+ 'CreateSSLClientSocket', # ClientSocketFactory::
+ 'CreateTransportClientSocket', # ClientSocketFactory::
+ 'CreateRequest', # URLRequestContext::
+ ]
+
+ def __init__(self,
+ base_directory=None,
+ skip_tests=True):
+ """Creates a new NetworkTrafficAnnotationFileFilter.
+
+ Args:
+ base_directory: str Local path to root of checkout, e.g. C:\chr\src.
msramek 2017/05/31 11:31:36 The description says local path, but the example i
Ramin Halavati 2017/05/31 12:28:24 Done.
+ skip_tests: bool Flag stating if test files should be returned or not.
+ """
+ base_directory = (base_directory or
+ os.path.join(os.path.dirname(__file__),
msramek 2017/05/31 11:31:35 Can you document this part (i.e. what happens if b
Ramin Halavati 2017/05/31 12:28:25 Done.
+ os.path.pardir, os.path.pardir))
msramek 2017/05/31 11:31:35 style: should be offset +4 from the beginning of t
Ramin Halavati 2017/05/31 12:28:24 Done.
+ self.base_directory = os.path.abspath(base_directory)
+ self._git_source_directories = None
+
+ if os.path.exists(os.path.join(base_directory, '.git')):
+ self.is_git = True
+ elif os.path.exists(os.path.join(base_directory, '.svn')):
msramek 2017/05/31 11:31:35 Why do we need to support SVN?
Ramin Halavati 2017/05/31 12:28:25 This part is also copied fro builddeps.py. Is it e
msramek 2017/05/31 14:55:24 Chromium used to support both SVN and GIT back in
Ramin Halavati 2017/05/31 19:19:13 Acknowledged.
+ self.is_git = False
+ else:
+ raise "%s is not a repository root" % base_directory
msramek 2017/05/31 11:31:36 Ditto - please be consistent with the single and d
Ramin Halavati 2017/05/31 12:28:24 Done.
+
+ self.content_matcher = re.compile(".*(" + "|".join(self.KEYWORDS) + ").*")
msramek 2017/05/31 11:31:35 The keywords contain "." which will be interpreted
Ramin Halavati 2017/05/31 12:28:24 Done.
+ self.file_name_matcher = re.compile(
+ '(?!.*?test)^.*(\.cc|\.mm)' if skip_tests else
+ '.*(\.cc|\.mm)')
msramek 2017/05/31 11:31:35 nit: Maybe add ^$? This will currently match "file
Ramin Halavati 2017/05/31 12:28:24 $ added, ^ is required?
msramek 2017/05/31 14:55:24 I'm aware that re.match() starts matching from the
Ramin Halavati 2017/05/31 19:19:13 Done.
+
+
+ def FileHasReleventContent(self, filename):
msramek 2017/05/31 11:31:35 s/relevent/relevant/ here and elsewhere, includin
Ramin Halavati 2017/05/31 12:28:24 Done.
+ with open(filename, 'r') as in_file:
+ for line in in_file:
+ if self.content_matcher.match(line):
+ return True
+ return False
+
+
+ def GetFilteredFilesList(self, dir_name):
+ """Returns the list of relevent files in given directory.
+
+ Args:
+ dir_name: str The directory to search for relevent files. All child
+ directories would also be serached.
msramek 2017/05/31 11:31:36 typo: searched
Ramin Halavati 2017/05/31 12:28:25 Done.
+
+ Returns:
+ list of str List of relevent files.
+ """
+ if self.is_git and self._git_source_directories is None:
+ self._git_source_directories = _GitSourceDirectories(self.base_directory)
+
+ # Collect a list of all files and directories to check.
+ files_to_check = []
+ if dir_name and not os.path.isabs(dir_name):
+ dir_name = os.path.join(self.base_directory, dir_name)
+ dirs_to_check = [dir_name or self.base_directory]
+ while dirs_to_check:
+ current_dir = dirs_to_check.pop()
+
+ # Check that this directory is part of the source repository. This
+ # prevents us from descending into third-party code or directories
+ # generated by the build system.
+ if self.is_git:
+ if NormalizePath(current_dir) not in self._git_source_directories:
+ continue
+ elif not os.path.exists(os.path.join(current_dir, '.svn')):
+ continue
+
+ current_dir_contents = sorted(os.listdir(current_dir))
+ file_names = []
+ sub_dirs = []
+ for file_name in current_dir_contents:
+ full_name = os.path.join(current_dir, file_name)
+ if os.path.isdir(full_name):
+ sub_dirs.append(full_name)
+ else:
+ if self.file_name_matcher.match(full_name) and \
+ self.FileHasReleventContent(full_name):
+ file_names.append(full_name)
+ dirs_to_check.extend(reversed(sub_dirs))
+ files_to_check += file_names
+ return files_to_check

Powered by Google App Engine
This is Rietveld 408576698