OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/env python | |
2 # Copyright 2016 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 '''This script will search through the target folder specified and try to find | |
Dirk Pranke
2016/10/04 20:37:04
Use triple-double quotes (""") for docstrings, eve
skym
2016/10/04 23:45:11
Done.
| |
7 duplicate includes from h and cc files, and remove them from the cc files. The | |
8 current/working directory needs to be chromium_checkout/src/ when this tool is | |
9 run. | |
10 | |
11 Usage remove_duplicate_includes.py --dry-run components/foo components/bar | |
Dirk Pranke
2016/10/04 20:37:04
Nit: s/Usage/Usage:/.
skym
2016/10/04 23:45:11
Done.
| |
12 ''' | |
13 | |
14 import argparse | |
15 import collections | |
16 import logging | |
17 import os | |
18 import re | |
19 import sys | |
20 | |
21 # This could be generalized if desired, and moved to command line arguments. | |
22 H_FILE_SUFFIX = '.h' | |
23 CC_FILE_SUFFIX = '.cc' | |
24 | |
25 # The \s should allow us to ignore any whitespace and only focus on the group | |
26 # captured when comparing between files. | |
27 INCLUDE_REGEX = re.compile('^\s*(#include\s+[\"<](.*?)[\">])\s*$') | |
28 | |
29 def HasSuffix(file_name, suffix): | |
30 return os.path.splitext(file_name)[1] == suffix | |
31 | |
32 def IsEmpty(line): | |
33 return not line.strip() | |
34 | |
35 def FindIncludeSet(input_lines, h_path_to_include_set, cc_file_name): | |
36 ''' | |
37 Find and returns the corresponding include set for the given .cc file. This is | |
Dirk Pranke
2016/10/04 20:37:04
s/Find/Finds/. Also, add a carriage return and a b
skym
2016/10/04 23:45:11
Done.
| |
38 done by finding the first include in the file and then trying to look up an .h | |
39 file in the passed in map. If not present, then None is returned immediately. | |
40 ''' | |
41 for line in input_lines: | |
42 match = INCLUDE_REGEX.search(line) | |
43 # The first include match should be the corresponding .h file, else skip. | |
44 if match: | |
45 h_file_path = os.path.join(os.getcwd(), match.group(2)) | |
46 if h_file_path not in h_path_to_include_set: | |
47 print 'First include did not match to a known .h file, skipping ' + \ | |
48 cc_file_name + ', line: ' + match.group(1) | |
49 return None | |
50 return h_path_to_include_set[h_file_path] | |
51 | |
52 def WithoutDuplicates(input_lines, include_set, cc_file_name): | |
53 ''' | |
54 Checks every input line and sees if we can remove it based on the contents of | |
55 the given include set. Returns what the new contents of the file should be. | |
56 ''' | |
57 output_lines = [] | |
58 # When a section of includes are completely removed, we want to remove the | |
59 # trailing empty as well. | |
60 lastCopiedLineWasEmpty = False | |
61 lastLineWasOmitted = False | |
62 for line in input_lines: | |
63 match = INCLUDE_REGEX.search(line) | |
64 if match and match.group(2) in include_set: | |
65 print 'Removed ' + match.group(1) + ' from ' + cc_file_name | |
66 lastLineWasOmitted = True | |
67 elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line): | |
68 print 'Removed empty line from ' + cc_file_name | |
69 lastLineWasOmitted = True | |
70 else: | |
71 lastCopiedLineWasEmpty = IsEmpty(line) | |
72 lastLineWasOmitted = False | |
73 output_lines.append(line) | |
74 return output_lines | |
75 | |
76 def main(): | |
77 parser = argparse.ArgumentParser() | |
78 parser.add_argument('--dry-run', action='store_true', | |
79 help='Does not actually remove lines when specified.') | |
80 parser.add_argument('targets', nargs='+', | |
81 help='Relative path to folders to search for duplicate includes in.') | |
82 args = parser.parse_args() | |
83 | |
84 # A map of header file paths to the includes they contain. | |
85 h_path_to_include_set = {} | |
86 | |
87 # Simply collects the path of all cc files present. | |
88 cc_file_path_set = set() | |
89 | |
90 for relative_root in args.targets: | |
91 absolute_root = os.path.join(os.getcwd(), relative_root) | |
92 for dir_path, dir_name_list, file_name_list in os.walk(absolute_root): | |
93 for file_name in file_name_list: | |
94 file_path = os.path.join(dir_path, file_name) | |
95 if HasSuffix(file_name, H_FILE_SUFFIX): | |
96 # By manually adding the set instead of using defaultdict we can avoid | |
97 # warning about missing .h files when the .h file has no includes. | |
98 h_path_to_include_set[file_path] = set() | |
99 with open(file_path) as file_handle: | |
100 for line in file_handle: | |
101 match = INCLUDE_REGEX.search(line) | |
102 if match: | |
103 h_path_to_include_set[file_path].add(match.group(2)) | |
104 elif HasSuffix(file_name, CC_FILE_SUFFIX): | |
105 cc_file_path_set.add(file_path) | |
106 | |
107 for cc_file_path in cc_file_path_set: | |
108 cc_file_name = os.path.basename(cc_file_path) | |
109 with open(cc_file_path, 'r' if args.dry_run else 'r+') as file_handle: | |
Dirk Pranke
2016/10/04 20:37:04
Nit: I'd just use 'fh' or 'fp' for the handle here
skym
2016/10/04 23:45:11
Done.
| |
110 # Read out all lines and reset file position to allow overwriting. | |
111 input_lines = file_handle.readlines() | |
112 file_handle.seek(0) | |
113 include_set = FindIncludeSet(input_lines, h_path_to_include_set, | |
114 cc_file_name) | |
115 if include_set: | |
116 output_lines = WithoutDuplicates(input_lines, include_set, cc_file_name) | |
117 if not args.dry_run: | |
118 file_handle.writelines(output_lines) | |
119 file_handle.truncate() | |
120 | |
121 if __name__ == '__main__': | |
122 sys.exit(main()) | |
OLD | NEW |