tools/remove_duplicate_includes.py - Issue 2379993006: Created a tool to remove duplicate includes between h and cc files.

Unified Diff: tools/remove_duplicate_includes.py

Issue 2379993006: Created a tool to remove duplicate includes between h and cc files. (Closed)

Patch Set: Making file executable and reworking dry-run. Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/remove_duplicate_includes.py

diff --git a/tools/remove_duplicate_includes.py b/tools/remove_duplicate_includes.py

new file mode 100755

index 0000000000000000000000000000000000000000..21fdb5acc2d44f85beaa5c0128dbe6d658b45af9

--- /dev/null

+++ b/tools/remove_duplicate_includes.py

@@ -0,0 +1,97 @@

+#!/usr/bin/env python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""This script will search through the target folder specified and try to find

+duplicate includes from h and cc files, and remove them from the cc files.

+Usage remove_duplicate_includes.py --dry-run components/foo components/bar

+"""

+import argparse;

+import collections;

+import os;

+import re;

+parser = argparse.ArgumentParser()

+parser.add_argument('--dry-run', action='store_true',

+ help='Does not actually remove lines when specified.')

+parser.add_argument('--span-dirs', action='store_true',

+ help='Mapping between h and cc files will not be limited to same folders.')

+parser.add_argument('targets', nargs='+',

+ help="Folders to search for duplicate includes in.")

+args = parser.parse_args()

+# This could be generlized if desired, and moved to command line arguments.

maxbogue 2016/09/30 17:40:31 generalized

skym 2016/09/30 19:18:12 Done.

+H_FILE_SUFFIX = ".h"

+CC_FILE_SUFFIX = ".cc"

+def HasSuffix(file_name, suffix):

+ return os.path.splitext(file_name)[1] == suffix

+def IsEmpty(line):

+ return not line.strip()

+# The \s should allow us to ignore any whitespace and only focus on the group

+# captured when comparing between files.

+regex = re.compile('^\s*#include\s+(.*?)\s*$')

+# The key here depends on the span-dirs flag, if specified then it will only be

maxbogue 2016/09/30 17:40:31 "A map of header files to the includes they contai

skym 2016/09/30 19:18:12 Done.

+# the file name, and this will allows mapping between files not in the same

+# folder. If this flag not present then full path is used.

+h_file_to_include_set = collections.defaultdict(set)

+# Key is always the full path to the cc file.

maxbogue 2016/09/30 17:40:31 Explain the purpose of this variable. I think it c

skym 2016/09/30 19:18:12 Done.

+cc_file_path_set = set()

+for relative_root in args.targets:

+ absolute_root = os.path.join(os.getcwd(), relative_root)

+ for (dir_path, dir_name_list, file_name_list) in os.walk(absolute_root):

maxbogue 2016/09/30 17:40:31 I haven't worked with python in a while but I'm pr

skym 2016/09/30 19:18:12 Done.

+ for file_name in file_name_list:

+ file_path = os.path.join(dir_path, file_name)

+ if HasSuffix(file_name, H_FILE_SUFFIX):

+ # Can be either name or path depending on flag.

+ file_key = file_name if args.span_dirs else file_path

+ with open(file_path) as file_handle:

+ for line in file_handle:

+ match = regex.search(line)

+ if match:

+ h_file_to_include_set[file_key].add(match.group(1))

+ elif HasSuffix(file_name, CC_FILE_SUFFIX):

+ cc_file_path_set.add(file_path)

+for cc_file_path in cc_file_path_set:

+ # The lookup must match index method when adding h files, depending on flag.

+ cc_file_name = os.path.basename(cc_file_path)

+ cc_file_key = cc_file_name if args.span_dirs else cc_file_path

+ h_file_key = os.path.splitext(cc_file_key)[0] + H_FILE_SUFFIX

maxbogue 2016/09/30 17:40:31 You could strip "_unittest" from the end here if i

skym 2016/09/30 19:18:13 Oooooh, actually maybe I should be looking at the

+ if h_file_key in h_file_to_include_set:

+ include_set = h_file_to_include_set[h_file_key]

+ # Read out all the data and reset file position to start overwriting.

+ file_handle = open(cc_file_path, "r" if args.dry_run else "r+")

+ data = file_handle.readlines()

maxbogue 2016/09/30 17:40:31 I'd probably just call this "lines" tbh

skym 2016/09/30 19:18:12 How about line_list?

+ file_handle.seek(0)

+ # When a section of includes are completely removed, we want to remove the

+ # trailing empty as well.

+ lastCopiedLineWasEmpty = False

+ lastLineWasOmitted = False

+ for line in data:

+ match = regex.search(line)

+ if match is not None and match.group(1) in include_set:

+ print "Removed " + match.group(1) + " " + cc_file_name

+ lastLineWasOmitted = True

+ elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line):

+ print "Removed empty line " + cc_file_name

+ lastLineWasOmitted = True

+ else:

+ lastCopiedLineWasEmpty = IsEmpty(line)

+ lastLineWasOmitted = False

+ if not args.dry_run:

+ file_handle.write(line)

+ if not args.dry_run:

+ file_handle.truncate()

+ file_handle.close()

« no previous file with comments | « no previous file | no next file » | no next file with comments »