Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/env python | |
| 2 # Copyright 2016 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 '''This script will search through the target folder specified and try to find | |
| 7 duplicate includes from h and cc files, and remove them from the cc files. The | |
| 8 current/working directory needs to be chromium_checkout/src/ when this tool is | |
| 9 run. | |
| 10 | |
| 11 Usage remove_duplicate_includes.py --dry-run components/foo components/bar | |
| 12 ''' | |
| 13 | |
| 14 import argparse; | |
| 15 import collections; | |
| 16 import logging | |
| 17 import os; | |
| 18 import re; | |
| 19 import sys | |
| 20 | |
| 21 parser = argparse.ArgumentParser() | |
|
maxbogue
2016/10/04 15:13:21
This should be down inside your main() function.
skym
2016/10/04 16:37:39
Done.
| |
| 22 parser.add_argument('--dry-run', action='store_true', | |
| 23 help='Does not actually remove lines when specified.') | |
| 24 parser.add_argument('targets', nargs='+', | |
| 25 help='Relative path to folders to search for duplicate includes in.') | |
| 26 args = parser.parse_args() | |
| 27 | |
| 28 # This could be generalized if desired, and moved to command line arguments. | |
| 29 H_FILE_SUFFIX = '.h' | |
| 30 CC_FILE_SUFFIX = '.cc' | |
| 31 | |
| 32 # The \s should allow us to ignore any whitespace and only focus on the group | |
| 33 # captured when comparing between files. | |
| 34 INCLUDE_REGEX = re.compile('^\s*(#include\s+[\"<](.*?)[\">])\s*$') | |
| 35 | |
| 36 def HasSuffix(file_name, suffix): | |
| 37 return os.path.splitext(file_name)[1] == suffix | |
| 38 | |
| 39 def IsEmpty(line): | |
| 40 return not line.strip() | |
| 41 | |
| 42 def FindIncludeSet(input_lines, h_path_to_include_set): | |
| 43 ''' | |
| 44 Find and returns the corresponding include set for the given .cc file. This is | |
| 45 done by finding the first include in the file and then trying to look up an .h | |
| 46 file in the passed in map. If not present, then None is returned immediately. | |
| 47 ''' | |
| 48 for line in input_lines: | |
| 49 match = INCLUDE_REGEX.search(line) | |
| 50 # The first include match should be the corresponding .h file, else skip. | |
| 51 if match: | |
| 52 h_file_path = os.path.join(os.getcwd(), match.group(2)) | |
| 53 if h_file_path not in h_path_to_include_set: | |
| 54 print 'First include did not match to a known .h file, skipping ' + \ | |
| 55 cc_file_name + ', line: ' + match.group(1) | |
| 56 return None | |
| 57 return h_path_to_include_set[h_file_path] | |
| 58 | |
| 59 def WithoutDuplicates(input_lines, include_set): | |
| 60 ''' | |
| 61 Checks every input line and sees if we can remove it based on the contents of | |
| 62 the given include set. Returns what the new contents of the file should be. | |
| 63 ''' | |
| 64 output_lines = [] | |
| 65 # When a section of includes are completely removed, we want to remove the | |
| 66 # trailing empty as well. | |
| 67 lastCopiedLineWasEmpty = False | |
| 68 lastLineWasOmitted = False | |
| 69 for line in input_lines: | |
| 70 match = INCLUDE_REGEX.search(line) | |
| 71 if match and match.group(2) in include_set: | |
| 72 print 'Removed ' + match.group(1) + ' from ' + cc_file_name | |
| 73 lastLineWasOmitted = True | |
| 74 elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line): | |
| 75 print 'Removed empty line from ' + cc_file_name | |
| 76 lastLineWasOmitted = True | |
| 77 else: | |
| 78 lastCopiedLineWasEmpty = IsEmpty(line) | |
| 79 lastLineWasOmitted = False | |
| 80 output_lines.append(line) | |
| 81 return output_lines | |
| 82 | |
|
maxbogue
2016/10/04 15:13:21
All of the "doing stuff" part (instead of just def
skym
2016/10/04 16:37:39
Done.
| |
| 83 # A map of header file paths to the includes they contain. | |
| 84 h_path_to_include_set = {} | |
| 85 | |
| 86 # Simply collects the path of all cc files present. | |
| 87 cc_file_path_set = set() | |
| 88 | |
| 89 for relative_root in args.targets: | |
| 90 absolute_root = os.path.join(os.getcwd(), relative_root) | |
| 91 for dir_path, dir_name_list, file_name_list in os.walk(absolute_root): | |
| 92 for file_name in file_name_list: | |
| 93 file_path = os.path.join(dir_path, file_name) | |
| 94 if HasSuffix(file_name, H_FILE_SUFFIX): | |
| 95 # By manually adding the set instead of using defaultdict we can avoid | |
| 96 # warning about missing .h files when the .h file contains no includes. | |
| 97 h_path_to_include_set[file_path] = set() | |
| 98 with open(file_path) as file_handle: | |
| 99 for line in file_handle: | |
| 100 match = INCLUDE_REGEX.search(line) | |
| 101 if match: | |
| 102 h_path_to_include_set[file_path].add(match.group(2)) | |
| 103 elif HasSuffix(file_name, CC_FILE_SUFFIX): | |
| 104 cc_file_path_set.add(file_path) | |
| 105 | |
| 106 for cc_file_path in cc_file_path_set: | |
| 107 cc_file_name = os.path.basename(cc_file_path) | |
| 108 with open(cc_file_path, 'r' if args.dry_run else 'r+') as file_handle | |
|
maxbogue
2016/10/04 15:13:21
Shouldn't there be a : at the end of this line? Ho
skym
2016/10/04 16:37:39
You're totally right, it doesn't work! Last minute
| |
| 109 # Read out all lines and reset file position to allow overwriting. | |
| 110 input_lines = file_handle.readlines() | |
| 111 file_handle.seek(0) | |
| 112 include_set = FindIncludeSet(input_lines, h_path_to_include_set) | |
| 113 if include_set: | |
| 114 output_lines = WithoutDuplicates(input_lines, include_set) | |
| 115 if not args.dry_run: | |
| 116 file_handle.writelines(output_lines) | |
| 117 file_handle.truncate() | |
| OLD | NEW |