tools/traffic_annotation/auditor/traffic_annotation_auditor.py - Issue 2905263002: Filter added to prune files before applying network annotation extractor.

Side by Side Diff: tools/traffic_annotation/auditor/traffic_annotation_auditor.py

Issue 2905263002: Filter added to prune files before applying network annotation extractor. (Closed)

Patch Set: Created 3 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2017 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2017 The Chromium Authors. All rights reserved.
	msramek 2017/05/31 11:31:36 Ditto. Ditto. Ramin Halavati 2017/05/31 12:28:25 Done. Show quoted text On 2017/05/31 11:31:36, msramek wrote: > Ditto. Done.
3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """This script is used to extract network traffic annotations from Chrome.	6 """This script is used to extract network traffic annotations from Chrome.

7 Please refer to README.md for running steps."""	7 Please refer to README.md for running steps."""

8	8

9 import argparse	9 import argparse

	10 import datetime

10 import os	11 import os

11 import subprocess	12 import subprocess

12 import sys	13 import sys

	14 import tempfile

	15

	16 from annotation_relevent_filter import NetworkTrafficAnnotationFileFilter
	msramek 2017/05/31 11:31:36 How about renaming the module to traffic_annotatio How about renaming the module to traffic_annotation_file_filter, and the class to TrafficAnnotationFileFilter? That way they're consistent with each other, and with this file. Ramin Halavati 2017/05/31 12:28:26 Done. Show quoted text On 2017/05/31 11:31:36, msramek wrote: > How about renaming the module to traffic_annotation_file_filter, and the class > to TrafficAnnotationFileFilter? That way they're consistent with each other, and > with this file. Done.
	17

13	18

14 # These two lines are required to import protobuf from third_party directory	19 # These two lines are required to import protobuf from third_party directory

15 # instead of the one installed with python.	20 # instead of the one installed with python.

16 from prepare_protobuf import PrepareProtobuf	21 from prepare_protobuf import PrepareProtobuf

17 PrepareProtobuf()	22 PrepareProtobuf()

18	23

19 from google.protobuf import text_format	24 from google.protobuf import text_format

20 import traffic_annotation_pb2	25 import traffic_annotation_pb2

21	26

22	27

(...skipping 10 matching lines...) Expand all Loading...
33 'net/traffic_annotation/network_traffic_annotation.h'.	38 'net/traffic_annotation/network_traffic_annotation.h'.

34 args:	39 args:

35 unique_id: str The string to be converted to hash code.	40 unique_id: str The string to be converted to hash code.

36	41

37 Returns:	42 Returns:

38 unsigned int Hash code of the input string	43 unsigned int Hash code of the input string

39 """	44 """

40 return _RecursiveHash(unique_id) if len(unique_id) else -1	45 return _RecursiveHash(unique_id) if len(unique_id) else -1

41	46

42	47

43 def _RunClangTool(src_dir, build_dir, path_filters):	48 def _RunClangTool(src_dir, build_dir, path_filters, prefilter_files):

44 """Executes the clang tool to extract annotations.	49 """Executes the clang tool to extract annotations.

45 Args:	50 Args:

46 src_dir: str Path to the src directory of Chrome.	51 src_dir: str Path to the src directory of Chrome.

47 build_dir: str Path to the build directory.	52 build_dir: str Path to the build directory.

48 path_filters: list of str List of paths to source directories for	53 path_filters: list of str List of paths to source directories for

49 extraction.	54 extraction.

	55 prefilter_files: bool Flag stating if source files should be first filtered

	56 using annotation related keywords and then given to clang tool.

50	57

51 Returns:	58 Returns:

52 raw_annotations: str Output of clang tool (extracted content and metadata of	59 raw_annotations: str Output of clang tool (extracted content and metadata of

53 annotations).	60 annotations).

54 """	61 """

55 raw_annotations = ""	62 raw_annotations = ""

56 for path in path_filters:	63 args = [

57 args = [

58 src_dir + "/tools/clang/scripts/run_tool.py",	64 src_dir + "/tools/clang/scripts/run_tool.py",

59 "--generate-compdb",	65 "--generate-compdb",

60 "--tool=traffic_annotation_extractor",	66 "--tool=traffic_annotation_extractor",

61 "-p", build_dir,	67 "-p=" + build_dir,

62 path]	68 None]

63 if sys.platform == "win32":	69 if sys.platform == "win32":

64 args.insert(0, "python")	70 args.insert(0, "python")

	71

	72 if prefilter_files:

	73 source_filenames = tempfile.NamedTemporaryFile(mode='w+t', delete=False)
	msramek 2017/05/31 11:31:36 nit: please be consistent in the usage of single / nit: please be consistent in the usage of single / double quotes Ramin Halavati 2017/05/31 12:28:25 Done. Show quoted text On 2017/05/31 11:31:36, msramek wrote: > nit: please be consistent in the usage of single / double quotes Done.
	74 file_filter = NetworkTrafficAnnotationFileFilter(src_dir, False)

	75

	76 for path in path_filters:

	77 files_list = file_filter.GetFilteredFilesList(path)

	78 for filename in files_list:

	79 source_filenames.write("%s\n" % filename)

	80 source_filenames.close()

	81 args[-1] = "--source-filenames=" + source_filenames.name
	msramek 2017/05/31 11:31:36 Adding None and then optionally overwriting it see Adding None and then optionally overwriting it seems strange. Why not just append at the end? Ramin Halavati 2017/05/31 12:28:25 Because when prefilter_files is not selected, this Show quoted text On 2017/05/31 11:31:36, msramek wrote: > Adding None and then optionally overwriting it seems strange. Why not just > append at the end? Because when prefilter_files is not selected, this value is replaced for each provided path. I can remove None and keep adding and removing it in the else part. msramek 2017/05/31 14:55:24 Oh, I see. Could you just comment on the line 70? Show quoted text On 2017/05/31 12:28:25, Ramin Halavati wrote: > On 2017/05/31 11:31:36, msramek wrote: > > Adding None and then optionally overwriting it seems strange. Why not just > > append at the end? > > Because when prefilter_files is not selected, this value is replaced for each > provided path. I can remove None and keep adding and removing it in the else > part. Oh, I see. Could you just comment on the line 70? Something like: None] # Placeholder for a path argument. Ramin Halavati 2017/05/31 19:19:13 Done. Show quoted text On 2017/05/31 14:55:24, msramek wrote: > On 2017/05/31 12:28:25, Ramin Halavati wrote: > > On 2017/05/31 11:31:36, msramek wrote: > > > Adding None and then optionally overwriting it seems strange. Why not just > > > append at the end? > > > > Because when prefilter_files is not selected, this value is replaced for each > > provided path. I can remove None and keep adding and removing it in the else > > part. > > Oh, I see. Could you just comment on the line 70? Something like: > > None] # Placeholder for a path argument. Done.
65 command = subprocess.Popen(args, stdout=subprocess.PIPE,	82 command = subprocess.Popen(args, stdout=subprocess.PIPE,

66 stderr=subprocess.PIPE)	83 stderr=subprocess.PIPE)

67 stdout_text, stderr_text = command.communicate()	84 stdout_text, stderr_text = command.communicate()

68 raw_annotations += stdout_text	85 raw_annotations += stdout_text

69 if stderr_text:	86 if stderr_text:

70 print stderr_text	87 print stderr_text

	88 os.remove(source_filenames.name)

	89 else:

	90 for path in path_filters:

	91 args[-1] = path

	92 command = subprocess.Popen(args, stdout=subprocess.PIPE,

	93 stderr=subprocess.PIPE)
	msramek 2017/05/31 11:31:36 style: offset style: offset Ramin Halavati 2017/05/31 12:28:26 Done. Show quoted text On 2017/05/31 11:31:36, msramek wrote: > style: offset Done.
	94 stdout_text, stderr_text = command.communicate()
	msramek 2017/05/31 11:31:36 Why is this part duplicated? It can be extracted a Why is this part duplicated? It can be extracted after the if-else statement. Ramin Halavati 2017/05/31 12:28:25 In the else part, we call it separately for each p Show quoted text On 2017/05/31 11:31:36, msramek wrote: > Why is this part duplicated? It can be extracted after the if-else statement. In the else part, we call it separately for each path_filter and concatenate the results. I could add a helper function or collect them in a set and execute them without duplicate code, but don't know which is neater.
	95 raw_annotations += stdout_text

	96 if stderr_text:

	97 print stderr_text

71 return raw_annotations	98 return raw_annotations
	msramek 2017/05/31 11:31:36 Maybe return null if command exited with an error? Maybe return null if command exited with an error? Ramin Halavati 2017/05/31 12:28:25 Clang issues some errors for files that are not pa Show quoted text On 2017/05/31 11:31:36, msramek wrote: > Maybe return null if command exited with an error? Clang issues some errors for files that are not part of compiledb, but we don't care about them. I can remove printing stderr, but thought it may attract some attention sometime. msramek 2017/05/31 14:55:24 Acknowledged. Then keep it as it is :) Show quoted text On 2017/05/31 12:28:25, Ramin Halavati wrote: > On 2017/05/31 11:31:36, msramek wrote: > > Maybe return null if command exited with an error? > > Clang issues some errors for files that are not part of compiledb, but we don't > care about them. I can remove printing stderr, but thought it may attract some > attention sometime. Acknowledged. Then keep it as it is :) Ramin Halavati 2017/05/31 19:19:13 Done. Show quoted text On 2017/05/31 14:55:24, msramek wrote: > On 2017/05/31 12:28:25, Ramin Halavati wrote: > > On 2017/05/31 11:31:36, msramek wrote: > > > Maybe return null if command exited with an error? > > > > Clang issues some errors for files that are not part of compiledb, but we > don't > > care about them. I can remove printing stderr, but thought it may attract some > > attention sometime. > > Acknowledged. Then keep it as it is :) Done.
72	99

73	100

74 def _ParsRawAnnotations(raw_annotations):	101 def _ParsRawAnnotations(raw_annotations):

75 """Parses raw annotations texts which are received from the clang tool.	102 """Parses raw annotations texts which are received from the clang tool.

76 Args:	103 Args:

77 raw_annotations: str Serialization of annotations and metadata. Each	104 raw_annotations: str Serialization of annotations and metadata. Each

78 annotation should have either of the following lines:	105 annotation should have either of the following lines:

79 1- "==== NEW ANNOTATION ===="	106 1- "==== NEW ANNOTATION ===="

80 2- File path.	107 2- File path.

81 3- Name of the function including this position.	108 3- Name of the function including this position.

(...skipping 126 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
208	235

209 def _WriteHashCodesFile(annotations, metadata, file_path):	236 def _WriteHashCodesFile(annotations, metadata, file_path):

210 """Writes unique ids and hash codes of annotations into a simple text file.	237 """Writes unique ids and hash codes of annotations into a simple text file.

211 args:	238 args:

212 annotations: ExtractedNetworkTrafficAnnotation A protobuf including all	239 annotations: ExtractedNetworkTrafficAnnotation A protobuf including all

213 extracted annotations.	240 extracted annotations.

214 metadata: list of dict Metadata for annotations, as specified in the outputs	241 metadata: list of dict Metadata for annotations, as specified in the outputs

215 of _ParsRawAnnotations function.	242 of _ParsRawAnnotations function.

216 file_path: str File path to the brief summary file.	243 file_path: str File path to the brief summary file.

217 """	244 """

218 with open(file_path, 'w') as summary_file:	245 hash_list = []

219 for annotation, meta in zip(annotations.network_traffic_annotation,	246 for annotation, meta in zip(annotations.network_traffic_annotation, metadata):

220 metadata):	247 hash_list += ["%s,%s" % (annotation.unique_id, meta['unique_id_hash'])]

221 summary_file.write(	248 for keyword in ("test", "test_partial", "undefined", "missing"):

222 "%s,%s\n" % (annotation.unique_id, meta['unique_id_hash']))	249 hash_list += ["%s,%s" % (keyword, _ComputeStringHash(keyword))]

223 for keyword in ("test", "test_partial", "undefined", "missing"):	250 open(file_path, 'w').write("\n".join(sorted(hash_list)))

224 summary_file.write(

225 "%s,%s\n" % (keyword, _ComputeStringHash(keyword)))

226	251

227	252

228 def main():	253 def main():

229 parser = argparse.ArgumentParser(description='Traffic Annotation Auditor.')	254 parser = argparse.ArgumentParser(description='Traffic Annotation Auditor.')

230 parser.add_argument('--build-dir',	255 parser.add_argument('--build-dir',

231 help='Path to the build directory.')	256 help='Path to the build directory.')

232 parser.add_argument('--extractor-output',	257 parser.add_argument('--extractor-output',

233 help='Optional path to the temporary file that extracted '	258 help='Optional path to the temporary file that extracted '

234 'annotations will be stored into.')	259 'annotations will be stored into.')

235 parser.add_argument('--extractor-input',	260 parser.add_argument('--extractor-input',

236 help='Optional path to the file that temporary extracted '	261 help='Optional path to the file that temporary extracted '

237 'annotations are already stored in. If this is '	262 'annotations are already stored in. If this is '

238 'provided, clang tool is not run and this is used '	263 'provided, clang tool is not run and this is used '

239 'as input.')	264 'as input.')

240 parser.add_argument('--summary-file',	265 parser.add_argument('--summary-file',

241 help='Path to the output file with all annotations.')	266 help='Path to the output file with all annotations.')

242 parser.add_argument('--hash-codes-file',	267 parser.add_argument('--hash-codes-file',

243 help='Path to the output file with the list of unique '	268 help='Path to the output file with the list of unique '

244 'ids and their hash codes.')	269 'ids and their hash codes.')

245 parser.add_argument('path_filters',	270 parser.add_argument('path_filters',

246 nargs='*',	271 nargs='*',

247 help='Optional paths to filter what files the tool is '	272 help='Optional paths to filter what files the tool is '

248 'run on.')	273 'run on.')

	274 parser.add_argument('--prefilter-files', action='store_true',

	275 help='Checks source files for patterns of annotations '

	276 'and network functions that may require annotation '

	277 'and limits running clang tool only on them.')

249 args = parser.parse_args()	278 args = parser.parse_args()

250	279

251 if not args.summary_file and not args.hash_codes_file:	280 if not args.summary_file and not args.hash_codes_file:

252 print "Warning: Output file not specified."	281 print "Warning: Output file not specified."

253	282

254 # If a pre-extracted input file is provided, load it.	283 # If a pre-extracted input file is provided, load it.

255 if args.extractor_input:	284 if args.extractor_input:

256 with open(args.extractor_input, 'r') as raw_file:	285 with open(args.extractor_input, 'r') as raw_file:

257 raw_annotations = raw_file.read()	286 raw_annotations = raw_file.read()

258 else:	287 else:

259 # Either extacted input file or build directory should be provided.	288 # Either extacted input file or build directory should be provided.

260 if not args.build_dir:	289 if not args.build_dir:

261 print "You must either specify the build directory to run the clang " \	290 print "You must either specify the build directory to run the clang " \

262 "tool and extract annotations, or specify the input directory " \	291 "tool and extract annotations, or specify the input directory " \

263 "where extracted annotation files already exist.\n"	292 "where extracted annotation files already exist.\n"

264 return 1	293 return 1

265	294

266 # Get Chrome source directory with relative path from this file.	295 # Get Chrome source directory with relative path from this file.

267 chrome_source = os.path.abspath(os.path.join(os.path.dirname(	296 chrome_source = os.path.abspath(os.path.join(os.path.dirname(

268 os.path.realpath(__file__)), "..", "..", ".."))	297 os.path.realpath(__file__)), "..", "..", ".."))

269 raw_annotations = _RunClangTool(chrome_source, args.build_dir,	298 raw_annotations = _RunClangTool(chrome_source, args.build_dir,

270 args.path_filters if args.path_filters else ["./"])	299 args.path_filters if args.path_filters else [""],
	msramek 2017/05/31 11:31:36 Can't this [""] be the "default" argument to argpa Can't this [""] be the "default" argument to argparse? Ramin Halavati 2017/05/31 12:28:25 Done. Show quoted text On 2017/05/31 11:31:36, msramek wrote: > Can't this [""] be the "default" argument to argparse? Done.
	300 args.prefilter_files)

271	301

272 if args.extractor_output:	302 if args.extractor_output:

273 with open(args.extractor_output, 'w') as raw_file:	303 with open(args.extractor_output, 'w') as raw_file:

274 raw_file.write(raw_annotations)	304 raw_file.write(raw_annotations)

275	305

276 annotations, metadata, errors = _ParsRawAnnotations(raw_annotations)	306 annotations, metadata, errors = _ParsRawAnnotations(raw_annotations)

277	307

278 if not annotations:	308 if not annotations:

279 print "Could not extract any annotation."	309 print "Could not extract any annotation."

280 if errors:	310 if errors:

281 print "Errors:\n%s" % "\n".join(errors)	311 print "Errors:\n%s" % "\n".join(errors)

282 return 1	312 return 1

283	313

284 if args.summary_file:	314 if args.summary_file:

285 _WriteSummaryFile(annotations, metadata, errors, args.summary_file)	315 _WriteSummaryFile(annotations, metadata, errors, args.summary_file)

286	316

287 if args.hash_codes_file:	317 if args.hash_codes_file:

288 _WriteHashCodesFile(annotations, metadata, args.hash_codes_file)	318 _WriteHashCodesFile(annotations, metadata, args.hash_codes_file)

289	319

290 return 0	320 return 0

291	321

292	322

293 if __name__ == '__main__':	323 if __name__ == '__main__':

294 sys.exit(main())	324 sys.exit(main())

OLD	NEW

« tools/traffic_annotation/auditor/annotation_relevent_filter.py ('K') | « tools/traffic_annotation/auditor/annotation_relevent_filter.py ('k') | no next file » | no next file with comments »