tools/remove_duplicate_includes.py - Issue 2379993006: Created a tool to remove duplicate includes between h and cc files.

Side by Side Diff: tools/remove_duplicate_includes.py

Issue 2379993006: Created a tool to remove duplicate includes between h and cc files. (Closed)

Patch Set: Updates for Max's comments. Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # Copyright 2016 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 '''This script will search through the target folder specified and try to find

	7 duplicate includes from h and cc files, and remove them from the cc files. The

	8 current/working directory needs to be chromium_checkout/src/ when this tool is

	9 run.

	10

	11 Usage remove_duplicate_includes.py --dry-run components/foo components/bar

	12 '''

	13

	14 import argparse;

	15 import collections;

	16 import logging

	17 import os;

	18 import re;

	19 import sys

	20

	21 parser = argparse.ArgumentParser()
	maxbogue 2016/10/04 15:13:21 This should be down inside your main() function. This should be down inside your main() function. skym 2016/10/04 16:37:39 Done. Show quoted text On 2016/10/04 15:13:21, maxbogue wrote: > This should be down inside your main() function. Done.
	22 parser.add_argument('--dry-run', action='store_true',

	23 help='Does not actually remove lines when specified.')

	24 parser.add_argument('targets', nargs='+',

	25 help='Relative path to folders to search for duplicate includes in.')

	26 args = parser.parse_args()

	27

	28 # This could be generalized if desired, and moved to command line arguments.

	29 H_FILE_SUFFIX = '.h'

	30 CC_FILE_SUFFIX = '.cc'

	31

	32 # The \s should allow us to ignore any whitespace and only focus on the group

	33 # captured when comparing between files.

	34 INCLUDE_REGEX = re.compile('^\s(#include\s+[\"<](.?)[\">])\s*$')

	35

	36 def HasSuffix(file_name, suffix):

	37 return os.path.splitext(file_name)[1] == suffix

	38

	39 def IsEmpty(line):

	40 return not line.strip()

	41

	42 def FindIncludeSet(input_lines, h_path_to_include_set):

	43 '''

	44 Find and returns the corresponding include set for the given .cc file. This is

	45 done by finding the first include in the file and then trying to look up an .h

	46 file in the passed in map. If not present, then None is returned immediately.

	47 '''

	48 for line in input_lines:

	49 match = INCLUDE_REGEX.search(line)

	50 # The first include match should be the corresponding .h file, else skip.

	51 if match:

	52 h_file_path = os.path.join(os.getcwd(), match.group(2))

	53 if h_file_path not in h_path_to_include_set:

	54 print 'First include did not match to a known .h file, skipping ' + \

	55 cc_file_name + ', line: ' + match.group(1)

	56 return None

	57 return h_path_to_include_set[h_file_path]

	58

	59 def WithoutDuplicates(input_lines, include_set):

	60 '''

	61 Checks every input line and sees if we can remove it based on the contents of

	62 the given include set. Returns what the new contents of the file should be.

	63 '''

	64 output_lines = []

	65 # When a section of includes are completely removed, we want to remove the

	66 # trailing empty as well.

	67 lastCopiedLineWasEmpty = False

	68 lastLineWasOmitted = False

	69 for line in input_lines:

	70 match = INCLUDE_REGEX.search(line)

	71 if match and match.group(2) in include_set:

	72 print 'Removed ' + match.group(1) + ' from ' + cc_file_name

	73 lastLineWasOmitted = True

	74 elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line):

	75 print 'Removed empty line from ' + cc_file_name

	76 lastLineWasOmitted = True

	77 else:

	78 lastCopiedLineWasEmpty = IsEmpty(line)

	79 lastLineWasOmitted = False

	80 output_lines.append(line)

	81 return output_lines

	82
	maxbogue 2016/10/04 15:13:21 All of the "doing stuff" part (instead of just def All of the "doing stuff" part (instead of just defining functions) should be inside an if block like this, I believe: if __name__ == "__main__": <stuff> This lets people import from the file without executing stuff. Even better is to define a main() function and then call that inside such an if, like https://cs.chromium.org/chromium/src/tools/sort-headers.py does. skym 2016/10/04 16:37:39 Done. Show quoted text On 2016/10/04 15:13:21, maxbogue wrote: > All of the "doing stuff" part (instead of just defining functions) should be > inside an if block like this, I believe: > > if __name__ == "__main__": > <stuff> > > This lets people import from the file without executing stuff. Even better is to > define a main() function and then call that inside such an if, like > https://cs.chromium.org/chromium/src/tools/sort-headers.py does. Done.
	83 # A map of header file paths to the includes they contain.

	84 h_path_to_include_set = {}

	85

	86 # Simply collects the path of all cc files present.

	87 cc_file_path_set = set()

	88

	89 for relative_root in args.targets:

	90 absolute_root = os.path.join(os.getcwd(), relative_root)

	91 for dir_path, dir_name_list, file_name_list in os.walk(absolute_root):

	92 for file_name in file_name_list:

	93 file_path = os.path.join(dir_path, file_name)

	94 if HasSuffix(file_name, H_FILE_SUFFIX):

	95 # By manually adding the set instead of using defaultdict we can avoid

	96 # warning about missing .h files when the .h file contains no includes.

	97 h_path_to_include_set[file_path] = set()

	98 with open(file_path) as file_handle:

	99 for line in file_handle:

	100 match = INCLUDE_REGEX.search(line)

	101 if match:

	102 h_path_to_include_set[file_path].add(match.group(2))

	103 elif HasSuffix(file_name, CC_FILE_SUFFIX):

	104 cc_file_path_set.add(file_path)

	105

	106 for cc_file_path in cc_file_path_set:

	107 cc_file_name = os.path.basename(cc_file_path)

	108 with open(cc_file_path, 'r' if args.dry_run else 'r+') as file_handle
	maxbogue 2016/10/04 15:13:21 Shouldn't there be a : at the end of this line? Ho Shouldn't there be a : at the end of this line? How does this work without that...? skym 2016/10/04 16:37:39 You're totally right, it doesn't work! Last minute Show quoted text On 2016/10/04 15:13:21, maxbogue wrote: > Shouldn't there be a : at the end of this line? How does this work without > that...? You're totally right, it doesn't work! Last minute refactoring that didn't get tested. Done.
	109 # Read out all lines and reset file position to allow overwriting.

	110 input_lines = file_handle.readlines()

	111 file_handle.seek(0)

	112 include_set = FindIncludeSet(input_lines, h_path_to_include_set)

	113 if include_set:

	114 output_lines = WithoutDuplicates(input_lines, include_set)

	115 if not args.dry_run:

	116 file_handle.writelines(output_lines)

	117 file_handle.truncate()

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »