Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(14)

Unified Diff: tools/remove_duplicate_includes.py

Issue 2379993006: Created a tool to remove duplicate includes between h and cc files. (Closed)
Patch Set: Making file executable and reworking dry-run. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/remove_duplicate_includes.py
diff --git a/tools/remove_duplicate_includes.py b/tools/remove_duplicate_includes.py
new file mode 100755
index 0000000000000000000000000000000000000000..21fdb5acc2d44f85beaa5c0128dbe6d658b45af9
--- /dev/null
+++ b/tools/remove_duplicate_includes.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""This script will search through the target folder specified and try to find
+duplicate includes from h and cc files, and remove them from the cc files.
+
+Usage remove_duplicate_includes.py --dry-run components/foo components/bar
+"""
+
+import argparse;
+import collections;
+import os;
+import re;
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--dry-run', action='store_true',
+ help='Does not actually remove lines when specified.')
+parser.add_argument('--span-dirs', action='store_true',
+ help='Mapping between h and cc files will not be limited to same folders.')
+parser.add_argument('targets', nargs='+',
+ help="Folders to search for duplicate includes in.")
+args = parser.parse_args()
+
+# This could be generlized if desired, and moved to command line arguments.
maxbogue 2016/09/30 17:40:31 generalized
skym 2016/09/30 19:18:12 Done.
+H_FILE_SUFFIX = ".h"
+CC_FILE_SUFFIX = ".cc"
+
+def HasSuffix(file_name, suffix):
+ return os.path.splitext(file_name)[1] == suffix
+
+def IsEmpty(line):
+ return not line.strip()
+
+# The \s should allow us to ignore any whitespace and only focus on the group
+# captured when comparing between files.
+regex = re.compile('^\s*#include\s+(.*?)\s*$')
+
+# The key here depends on the span-dirs flag, if specified then it will only be
maxbogue 2016/09/30 17:40:31 "A map of header files to the includes they contai
skym 2016/09/30 19:18:12 Done.
+# the file name, and this will allows mapping between files not in the same
+# folder. If this flag not present then full path is used.
+h_file_to_include_set = collections.defaultdict(set)
+
+# Key is always the full path to the cc file.
maxbogue 2016/09/30 17:40:31 Explain the purpose of this variable. I think it c
skym 2016/09/30 19:18:12 Done.
+cc_file_path_set = set()
+
+for relative_root in args.targets:
+ absolute_root = os.path.join(os.getcwd(), relative_root)
+ for (dir_path, dir_name_list, file_name_list) in os.walk(absolute_root):
maxbogue 2016/09/30 17:40:31 I haven't worked with python in a while but I'm pr
skym 2016/09/30 19:18:12 Done.
+ for file_name in file_name_list:
+ file_path = os.path.join(dir_path, file_name)
+ if HasSuffix(file_name, H_FILE_SUFFIX):
+ # Can be either name or path depending on flag.
+ file_key = file_name if args.span_dirs else file_path
+ with open(file_path) as file_handle:
+ for line in file_handle:
+ match = regex.search(line)
+ if match:
+ h_file_to_include_set[file_key].add(match.group(1))
+ elif HasSuffix(file_name, CC_FILE_SUFFIX):
+ cc_file_path_set.add(file_path)
+
+for cc_file_path in cc_file_path_set:
+ # The lookup must match index method when adding h files, depending on flag.
+ cc_file_name = os.path.basename(cc_file_path)
+ cc_file_key = cc_file_name if args.span_dirs else cc_file_path
+ h_file_key = os.path.splitext(cc_file_key)[0] + H_FILE_SUFFIX
maxbogue 2016/09/30 17:40:31 You could strip "_unittest" from the end here if i
skym 2016/09/30 19:18:13 Oooooh, actually maybe I should be looking at the
+
+ if h_file_key in h_file_to_include_set:
+ include_set = h_file_to_include_set[h_file_key]
+
+ # Read out all the data and reset file position to start overwriting.
+ file_handle = open(cc_file_path, "r" if args.dry_run else "r+")
+ data = file_handle.readlines()
maxbogue 2016/09/30 17:40:31 I'd probably just call this "lines" tbh
skym 2016/09/30 19:18:12 How about line_list?
+ file_handle.seek(0)
+
+ # When a section of includes are completely removed, we want to remove the
+ # trailing empty as well.
+ lastCopiedLineWasEmpty = False
+ lastLineWasOmitted = False
+ for line in data:
+ match = regex.search(line)
+ if match is not None and match.group(1) in include_set:
+ print "Removed " + match.group(1) + " " + cc_file_name
+ lastLineWasOmitted = True
+ elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line):
+ print "Removed empty line " + cc_file_name
+ lastLineWasOmitted = True
+ else:
+ lastCopiedLineWasEmpty = IsEmpty(line)
+ lastLineWasOmitted = False
+ if not args.dry_run:
+ file_handle.write(line)
+ if not args.dry_run:
+ file_handle.truncate()
+ file_handle.close()
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698