| Index: tools/remove_duplicate_includes.py
|
| diff --git a/tools/remove_duplicate_includes.py b/tools/remove_duplicate_includes.py
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..cdf9b0d4d14804df775d1732fc35acc92852355e
|
| --- /dev/null
|
| +++ b/tools/remove_duplicate_includes.py
|
| @@ -0,0 +1,124 @@
|
| +#!/usr/bin/env python
|
| +# Copyright 2016 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +"""This script will search through the target folder specified and try to find
|
| +duplicate includes from h and cc files, and remove them from the cc files. The
|
| +current/working directory needs to be chromium_checkout/src/ when this tool is
|
| +run.
|
| +
|
| +Usage: remove_duplicate_includes.py --dry-run components/foo components/bar
|
| +"""
|
| +
|
| +import argparse
|
| +import collections
|
| +import logging
|
| +import os
|
| +import re
|
| +import sys
|
| +
|
| +# This could be generalized if desired, and moved to command line arguments.
|
| +H_FILE_SUFFIX = '.h'
|
| +CC_FILE_SUFFIX = '.cc'
|
| +
|
| +# The \s should allow us to ignore any whitespace and only focus on the group
|
| +# captured when comparing between files.
|
| +INCLUDE_REGEX = re.compile('^\s*(#include\s+[\"<](.*?)[\">])\s*$')
|
| +
|
| +def HasSuffix(file_name, suffix):
|
| + return os.path.splitext(file_name)[1] == suffix
|
| +
|
| +def IsEmpty(line):
|
| + return not line.strip()
|
| +
|
| +def FindIncludeSet(input_lines, h_path_to_include_set, cc_file_name):
|
| + """Finds and returns the corresponding include set for the given .cc file.
|
| +
|
| + This is done by finding the first include in the file and then trying to look
|
| + up an .h file in the passed in map. If not present, then None is returned
|
| + immediately.
|
| + """
|
| + for line in input_lines:
|
| + match = INCLUDE_REGEX.search(line)
|
| + # The first include match should be the corresponding .h file, else skip.
|
| + if match:
|
| + h_file_path = os.path.join(os.getcwd(), match.group(2))
|
| + if h_file_path not in h_path_to_include_set:
|
| + print 'First include did not match to a known .h file, skipping ' + \
|
| + cc_file_name + ', line: ' + match.group(1)
|
| + return None
|
| + return h_path_to_include_set[h_file_path]
|
| +
|
| +def WithoutDuplicates(input_lines, include_set, cc_file_name):
|
| + """Checks every input line and sees if we can remove it based on the contents
|
| + of the given include set.
|
| +
|
| + Returns what the new contents of the file should be.
|
| + """
|
| + output_lines = []
|
| + # When a section of includes are completely removed, we want to remove the
|
| + # trailing empty as well.
|
| + lastCopiedLineWasEmpty = False
|
| + lastLineWasOmitted = False
|
| + for line in input_lines:
|
| + match = INCLUDE_REGEX.search(line)
|
| + if match and match.group(2) in include_set:
|
| + print 'Removed ' + match.group(1) + ' from ' + cc_file_name
|
| + lastLineWasOmitted = True
|
| + elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line):
|
| + print 'Removed empty line from ' + cc_file_name
|
| + lastLineWasOmitted = True
|
| + else:
|
| + lastCopiedLineWasEmpty = IsEmpty(line)
|
| + lastLineWasOmitted = False
|
| + output_lines.append(line)
|
| + return output_lines
|
| +
|
| +def main():
|
| + parser = argparse.ArgumentParser()
|
| + parser.add_argument('--dry-run', action='store_true',
|
| + help='Does not actually remove lines when specified.')
|
| + parser.add_argument('targets', nargs='+',
|
| + help='Relative path to folders to search for duplicate includes in.')
|
| + args = parser.parse_args()
|
| +
|
| + # A map of header file paths to the includes they contain.
|
| + h_path_to_include_set = {}
|
| +
|
| + # Simply collects the path of all cc files present.
|
| + cc_file_path_set = set()
|
| +
|
| + for relative_root in args.targets:
|
| + absolute_root = os.path.join(os.getcwd(), relative_root)
|
| + for dir_path, dir_name_list, file_name_list in os.walk(absolute_root):
|
| + for file_name in file_name_list:
|
| + file_path = os.path.join(dir_path, file_name)
|
| + if HasSuffix(file_name, H_FILE_SUFFIX):
|
| + # By manually adding the set instead of using defaultdict we can avoid
|
| + # warning about missing .h files when the .h file has no includes.
|
| + h_path_to_include_set[file_path] = set()
|
| + with open(file_path) as fh:
|
| + for line in fh:
|
| + match = INCLUDE_REGEX.search(line)
|
| + if match:
|
| + h_path_to_include_set[file_path].add(match.group(2))
|
| + elif HasSuffix(file_name, CC_FILE_SUFFIX):
|
| + cc_file_path_set.add(file_path)
|
| +
|
| + for cc_file_path in cc_file_path_set:
|
| + cc_file_name = os.path.basename(cc_file_path)
|
| + with open(cc_file_path, 'r' if args.dry_run else 'r+') as fh:
|
| + # Read out all lines and reset file position to allow overwriting.
|
| + input_lines = fh.readlines()
|
| + fh.seek(0)
|
| + include_set = FindIncludeSet(input_lines, h_path_to_include_set,
|
| + cc_file_name)
|
| + if include_set:
|
| + output_lines = WithoutDuplicates(input_lines, include_set, cc_file_name)
|
| + if not args.dry_run:
|
| + fh.writelines(output_lines)
|
| + fh.truncate()
|
| +
|
| +if __name__ == '__main__':
|
| + sys.exit(main())
|
|
|