tools/remove_duplicate_includes.py - Issue 2379993006: Created a tool to remove duplicate includes between h and cc files.

Unified Diff: tools/remove_duplicate_includes.py

Issue 2379993006: Created a tool to remove duplicate includes between h and cc files. (Closed)

Patch Set: Updates for Max's comments. Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/remove_duplicate_includes.py

diff --git a/tools/remove_duplicate_includes.py b/tools/remove_duplicate_includes.py

new file mode 100755

index 0000000000000000000000000000000000000000..ef7f65cf9ac7e1c8be80a69d31576ff440c0fccf

--- /dev/null

+++ b/tools/remove_duplicate_includes.py

@@ -0,0 +1,117 @@

+#!/usr/bin/env python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+'''This script will search through the target folder specified and try to find

+duplicate includes from h and cc files, and remove them from the cc files. The

+current/working directory needs to be chromium_checkout/src/ when this tool is

+run.

+Usage remove_duplicate_includes.py --dry-run components/foo components/bar

+'''

+import argparse;

+import collections;

+import logging

+import os;

+import re;

+import sys

+parser = argparse.ArgumentParser()

maxbogue 2016/10/04 15:13:21 This should be down inside your main() function.

skym 2016/10/04 16:37:39 Done.

+parser.add_argument('--dry-run', action='store_true',

+ help='Does not actually remove lines when specified.')

+parser.add_argument('targets', nargs='+',

+ help='Relative path to folders to search for duplicate includes in.')

+args = parser.parse_args()

+# This could be generalized if desired, and moved to command line arguments.

+H_FILE_SUFFIX = '.h'

+CC_FILE_SUFFIX = '.cc'

+# The \s should allow us to ignore any whitespace and only focus on the group

+# captured when comparing between files.

+INCLUDE_REGEX = re.compile('^\s*(#include\s+[\"<](.*?)[\">])\s*$')

+def HasSuffix(file_name, suffix):

+ return os.path.splitext(file_name)[1] == suffix

+def IsEmpty(line):

+ return not line.strip()

+def FindIncludeSet(input_lines, h_path_to_include_set):

+ '''

+ Find and returns the corresponding include set for the given .cc file. This is

+ done by finding the first include in the file and then trying to look up an .h

+ file in the passed in map. If not present, then None is returned immediately.

+ '''

+ for line in input_lines:

+ match = INCLUDE_REGEX.search(line)

+ # The first include match should be the corresponding .h file, else skip.

+ if match:

+ h_file_path = os.path.join(os.getcwd(), match.group(2))

+ if h_file_path not in h_path_to_include_set:

+ print 'First include did not match to a known .h file, skipping ' + \

+ cc_file_name + ', line: ' + match.group(1)

+ return None

+ return h_path_to_include_set[h_file_path]

+def WithoutDuplicates(input_lines, include_set):

+ '''

+ Checks every input line and sees if we can remove it based on the contents of

+ the given include set. Returns what the new contents of the file should be.

+ '''

+ output_lines = []

+ # When a section of includes are completely removed, we want to remove the

+ # trailing empty as well.

+ lastCopiedLineWasEmpty = False

+ lastLineWasOmitted = False

+ for line in input_lines:

+ match = INCLUDE_REGEX.search(line)

+ if match and match.group(2) in include_set:

+ print 'Removed ' + match.group(1) + ' from ' + cc_file_name

+ lastLineWasOmitted = True

+ elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line):

+ print 'Removed empty line from ' + cc_file_name

+ lastLineWasOmitted = True

+ else:

+ lastCopiedLineWasEmpty = IsEmpty(line)

+ lastLineWasOmitted = False

+ output_lines.append(line)

+ return output_lines

maxbogue 2016/10/04 15:13:21 All of the "doing stuff" part (instead of just def

skym 2016/10/04 16:37:39 Done.

+# A map of header file paths to the includes they contain.

+h_path_to_include_set = {}

+# Simply collects the path of all cc files present.

+cc_file_path_set = set()

+for relative_root in args.targets:

+ absolute_root = os.path.join(os.getcwd(), relative_root)

+ for dir_path, dir_name_list, file_name_list in os.walk(absolute_root):

+ for file_name in file_name_list:

+ file_path = os.path.join(dir_path, file_name)

+ if HasSuffix(file_name, H_FILE_SUFFIX):

+ # By manually adding the set instead of using defaultdict we can avoid

+ # warning about missing .h files when the .h file contains no includes.

+ h_path_to_include_set[file_path] = set()

+ with open(file_path) as file_handle:

+ for line in file_handle:

+ match = INCLUDE_REGEX.search(line)

+ if match:

+ h_path_to_include_set[file_path].add(match.group(2))

+ elif HasSuffix(file_name, CC_FILE_SUFFIX):

+ cc_file_path_set.add(file_path)

+for cc_file_path in cc_file_path_set:

+ cc_file_name = os.path.basename(cc_file_path)

+ with open(cc_file_path, 'r' if args.dry_run else 'r+') as file_handle

maxbogue 2016/10/04 15:13:21 Shouldn't there be a : at the end of this line? Ho

skym 2016/10/04 16:37:39 You're totally right, it doesn't work! Last minute

+ # Read out all lines and reset file position to allow overwriting.

+ input_lines = file_handle.readlines()

+ file_handle.seek(0)

+ include_set = FindIncludeSet(input_lines, h_path_to_include_set)

+ if include_set:

+ output_lines = WithoutDuplicates(input_lines, include_set)

+ if not args.dry_run:

+ file_handle.writelines(output_lines)

+ file_handle.truncate()

« no previous file with comments | « no previous file | no next file » | no next file with comments »