Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(562)

Unified Diff: tools/remove_duplicate_includes.py

Issue 2379993006: Created a tool to remove duplicate includes between h and cc files. (Closed)
Patch Set: Updates for dpranke Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/remove_duplicate_includes.py
diff --git a/tools/remove_duplicate_includes.py b/tools/remove_duplicate_includes.py
new file mode 100755
index 0000000000000000000000000000000000000000..cdf9b0d4d14804df775d1732fc35acc92852355e
--- /dev/null
+++ b/tools/remove_duplicate_includes.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""This script will search through the target folder specified and try to find
+duplicate includes from h and cc files, and remove them from the cc files. The
+current/working directory needs to be chromium_checkout/src/ when this tool is
+run.
+
+Usage: remove_duplicate_includes.py --dry-run components/foo components/bar
+"""
+
+import argparse
+import collections
+import logging
+import os
+import re
+import sys
+
+# This could be generalized if desired, and moved to command line arguments.
+H_FILE_SUFFIX = '.h'
+CC_FILE_SUFFIX = '.cc'
+
+# The \s should allow us to ignore any whitespace and only focus on the group
+# captured when comparing between files.
+INCLUDE_REGEX = re.compile('^\s*(#include\s+[\"<](.*?)[\">])\s*$')
+
+def HasSuffix(file_name, suffix):
+ return os.path.splitext(file_name)[1] == suffix
+
+def IsEmpty(line):
+ return not line.strip()
+
+def FindIncludeSet(input_lines, h_path_to_include_set, cc_file_name):
+ """Finds and returns the corresponding include set for the given .cc file.
+
+ This is done by finding the first include in the file and then trying to look
+ up an .h file in the passed in map. If not present, then None is returned
+ immediately.
+ """
+ for line in input_lines:
+ match = INCLUDE_REGEX.search(line)
+ # The first include match should be the corresponding .h file, else skip.
+ if match:
+ h_file_path = os.path.join(os.getcwd(), match.group(2))
+ if h_file_path not in h_path_to_include_set:
+ print 'First include did not match to a known .h file, skipping ' + \
+ cc_file_name + ', line: ' + match.group(1)
+ return None
+ return h_path_to_include_set[h_file_path]
+
+def WithoutDuplicates(input_lines, include_set, cc_file_name):
+ """Checks every input line and sees if we can remove it based on the contents
+ of the given include set.
+
+ Returns what the new contents of the file should be.
+ """
+ output_lines = []
+ # When a section of includes are completely removed, we want to remove the
+ # trailing empty as well.
+ lastCopiedLineWasEmpty = False
+ lastLineWasOmitted = False
+ for line in input_lines:
+ match = INCLUDE_REGEX.search(line)
+ if match and match.group(2) in include_set:
+ print 'Removed ' + match.group(1) + ' from ' + cc_file_name
+ lastLineWasOmitted = True
+ elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line):
+ print 'Removed empty line from ' + cc_file_name
+ lastLineWasOmitted = True
+ else:
+ lastCopiedLineWasEmpty = IsEmpty(line)
+ lastLineWasOmitted = False
+ output_lines.append(line)
+ return output_lines
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--dry-run', action='store_true',
+ help='Does not actually remove lines when specified.')
+ parser.add_argument('targets', nargs='+',
+ help='Relative path to folders to search for duplicate includes in.')
+ args = parser.parse_args()
+
+ # A map of header file paths to the includes they contain.
+ h_path_to_include_set = {}
+
+ # Simply collects the path of all cc files present.
+ cc_file_path_set = set()
+
+ for relative_root in args.targets:
+ absolute_root = os.path.join(os.getcwd(), relative_root)
+ for dir_path, dir_name_list, file_name_list in os.walk(absolute_root):
+ for file_name in file_name_list:
+ file_path = os.path.join(dir_path, file_name)
+ if HasSuffix(file_name, H_FILE_SUFFIX):
+ # By manually adding the set instead of using defaultdict we can avoid
+ # warning about missing .h files when the .h file has no includes.
+ h_path_to_include_set[file_path] = set()
+ with open(file_path) as fh:
+ for line in fh:
+ match = INCLUDE_REGEX.search(line)
+ if match:
+ h_path_to_include_set[file_path].add(match.group(2))
+ elif HasSuffix(file_name, CC_FILE_SUFFIX):
+ cc_file_path_set.add(file_path)
+
+ for cc_file_path in cc_file_path_set:
+ cc_file_name = os.path.basename(cc_file_path)
+ with open(cc_file_path, 'r' if args.dry_run else 'r+') as fh:
+ # Read out all lines and reset file position to allow overwriting.
+ input_lines = fh.readlines()
+ fh.seek(0)
+ include_set = FindIncludeSet(input_lines, h_path_to_include_set,
+ cc_file_name)
+ if include_set:
+ output_lines = WithoutDuplicates(input_lines, include_set, cc_file_name)
+ if not args.dry_run:
+ fh.writelines(output_lines)
+ fh.truncate()
+
+if __name__ == '__main__':
+ sys.exit(main())
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698