Chromium Code Reviews| Index: tools/remove_duplicate_includes.py |
| diff --git a/tools/remove_duplicate_includes.py b/tools/remove_duplicate_includes.py |
| new file mode 100755 |
| index 0000000000000000000000000000000000000000..21fdb5acc2d44f85beaa5c0128dbe6d658b45af9 |
| --- /dev/null |
| +++ b/tools/remove_duplicate_includes.py |
| @@ -0,0 +1,97 @@ |
| +#!/usr/bin/env python |
| +# Copyright 2016 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +"""This script will search through the target folder specified and try to find |
| +duplicate includes from h and cc files, and remove them from the cc files. |
| + |
| +Usage remove_duplicate_includes.py --dry-run components/foo components/bar |
| +""" |
| + |
| +import argparse; |
| +import collections; |
| +import os; |
| +import re; |
| + |
| +parser = argparse.ArgumentParser() |
| +parser.add_argument('--dry-run', action='store_true', |
| + help='Does not actually remove lines when specified.') |
| +parser.add_argument('--span-dirs', action='store_true', |
| + help='Mapping between h and cc files will not be limited to same folders.') |
| +parser.add_argument('targets', nargs='+', |
| + help="Folders to search for duplicate includes in.") |
| +args = parser.parse_args() |
| + |
| +# This could be generlized if desired, and moved to command line arguments. |
|
maxbogue
2016/09/30 17:40:31
generalized
skym
2016/09/30 19:18:12
Done.
|
| +H_FILE_SUFFIX = ".h" |
| +CC_FILE_SUFFIX = ".cc" |
| + |
| +def HasSuffix(file_name, suffix): |
| + return os.path.splitext(file_name)[1] == suffix |
| + |
| +def IsEmpty(line): |
| + return not line.strip() |
| + |
| +# The \s should allow us to ignore any whitespace and only focus on the group |
| +# captured when comparing between files. |
| +regex = re.compile('^\s*#include\s+(.*?)\s*$') |
| + |
| +# The key here depends on the span-dirs flag, if specified then it will only be |
|
maxbogue
2016/09/30 17:40:31
"A map of header files to the includes they contai
skym
2016/09/30 19:18:12
Done.
|
| +# the file name, and this will allows mapping between files not in the same |
| +# folder. If this flag not present then full path is used. |
| +h_file_to_include_set = collections.defaultdict(set) |
| + |
| +# Key is always the full path to the cc file. |
|
maxbogue
2016/09/30 17:40:31
Explain the purpose of this variable. I think it c
skym
2016/09/30 19:18:12
Done.
|
| +cc_file_path_set = set() |
| + |
| +for relative_root in args.targets: |
| + absolute_root = os.path.join(os.getcwd(), relative_root) |
| + for (dir_path, dir_name_list, file_name_list) in os.walk(absolute_root): |
|
maxbogue
2016/09/30 17:40:31
I haven't worked with python in a while but I'm pr
skym
2016/09/30 19:18:12
Done.
|
| + for file_name in file_name_list: |
| + file_path = os.path.join(dir_path, file_name) |
| + if HasSuffix(file_name, H_FILE_SUFFIX): |
| + # Can be either name or path depending on flag. |
| + file_key = file_name if args.span_dirs else file_path |
| + with open(file_path) as file_handle: |
| + for line in file_handle: |
| + match = regex.search(line) |
| + if match: |
| + h_file_to_include_set[file_key].add(match.group(1)) |
| + elif HasSuffix(file_name, CC_FILE_SUFFIX): |
| + cc_file_path_set.add(file_path) |
| + |
| +for cc_file_path in cc_file_path_set: |
| + # The lookup must match index method when adding h files, depending on flag. |
| + cc_file_name = os.path.basename(cc_file_path) |
| + cc_file_key = cc_file_name if args.span_dirs else cc_file_path |
| + h_file_key = os.path.splitext(cc_file_key)[0] + H_FILE_SUFFIX |
|
maxbogue
2016/09/30 17:40:31
You could strip "_unittest" from the end here if i
skym
2016/09/30 19:18:13
Oooooh, actually maybe I should be looking at the
|
| + |
| + if h_file_key in h_file_to_include_set: |
| + include_set = h_file_to_include_set[h_file_key] |
| + |
| + # Read out all the data and reset file position to start overwriting. |
| + file_handle = open(cc_file_path, "r" if args.dry_run else "r+") |
| + data = file_handle.readlines() |
|
maxbogue
2016/09/30 17:40:31
I'd probably just call this "lines" tbh
skym
2016/09/30 19:18:12
How about line_list?
|
| + file_handle.seek(0) |
| + |
| + # When a section of includes are completely removed, we want to remove the |
| + # trailing empty as well. |
| + lastCopiedLineWasEmpty = False |
| + lastLineWasOmitted = False |
| + for line in data: |
| + match = regex.search(line) |
| + if match is not None and match.group(1) in include_set: |
| + print "Removed " + match.group(1) + " " + cc_file_name |
| + lastLineWasOmitted = True |
| + elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line): |
| + print "Removed empty line " + cc_file_name |
| + lastLineWasOmitted = True |
| + else: |
| + lastCopiedLineWasEmpty = IsEmpty(line) |
| + lastLineWasOmitted = False |
| + if not args.dry_run: |
| + file_handle.write(line) |
| + if not args.dry_run: |
| + file_handle.truncate() |
| + file_handle.close() |