tools/remove_duplicate_includes.py - Issue 2379993006: Created a tool to remove duplicate includes between h and cc files.

Side by Side Diff: tools/remove_duplicate_includes.py

Issue 2379993006: Created a tool to remove duplicate includes between h and cc files. (Closed)

Patch Set: More updates for Max. Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # Copyright 2016 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 '''This script will search through the target folder specified and try to find
	Dirk Pranke 2016/10/04 20:37:04 Use triple-double quotes (""") for docstrings, eve Use triple-double quotes (""") for docstrings, even though we prefer single quotes to double quotes for other strings (see PEP-257). skym 2016/10/04 23:45:11 Done. Show quoted text On 2016/10/04 20:37:04, Dirk Pranke (slow) wrote: > Use triple-double quotes (""") for docstrings, even though we prefer single > quotes to double quotes for other strings (see PEP-257). Done.
	7 duplicate includes from h and cc files, and remove them from the cc files. The

	8 current/working directory needs to be chromium_checkout/src/ when this tool is

	9 run.

	10

	11 Usage remove_duplicate_includes.py --dry-run components/foo components/bar
	Dirk Pranke 2016/10/04 20:37:04 Nit: s/Usage/Usage:/. Nit: s/Usage/Usage:/. skym 2016/10/04 23:45:11 Done. Show quoted text On 2016/10/04 20:37:04, Dirk Pranke (slow) wrote: > Nit: s/Usage/Usage:/. Done.
	12 '''

	13

	14 import argparse

	15 import collections

	16 import logging

	17 import os

	18 import re

	19 import sys

	20

	21 # This could be generalized if desired, and moved to command line arguments.

	22 H_FILE_SUFFIX = '.h'

	23 CC_FILE_SUFFIX = '.cc'

	24

	25 # The \s should allow us to ignore any whitespace and only focus on the group

	26 # captured when comparing between files.

	27 INCLUDE_REGEX = re.compile('^\s(#include\s+[\"<](.?)[\">])\s*$')

	28

	29 def HasSuffix(file_name, suffix):

	30 return os.path.splitext(file_name)[1] == suffix

	31

	32 def IsEmpty(line):

	33 return not line.strip()

	34

	35 def FindIncludeSet(input_lines, h_path_to_include_set, cc_file_name):

	36 '''

	37 Find and returns the corresponding include set for the given .cc file. This is
	Dirk Pranke 2016/10/04 20:37:04 s/Find/Finds/. Also, add a carriage return and a b s/Find/Finds/. Also, add a carriage return and a blank line after the first sentence, to make it into two paragraphs (where possible, docstrings should have a single sentence as the first line/paragraph for easier summarization). skym 2016/10/04 23:45:11 Done. Show quoted text On 2016/10/04 20:37:04, Dirk Pranke (slow) wrote: > s/Find/Finds/. Also, add a carriage return and a blank line after the first > sentence, to make it into two paragraphs (where possible, docstrings should have > a single sentence as the first line/paragraph for easier summarization). Done.
	38 done by finding the first include in the file and then trying to look up an .h

	39 file in the passed in map. If not present, then None is returned immediately.

	40 '''

	41 for line in input_lines:

	42 match = INCLUDE_REGEX.search(line)

	43 # The first include match should be the corresponding .h file, else skip.

	44 if match:

	45 h_file_path = os.path.join(os.getcwd(), match.group(2))

	46 if h_file_path not in h_path_to_include_set:

	47 print 'First include did not match to a known .h file, skipping ' + \

	48 cc_file_name + ', line: ' + match.group(1)

	49 return None

	50 return h_path_to_include_set[h_file_path]

	51

	52 def WithoutDuplicates(input_lines, include_set, cc_file_name):

	53 '''

	54 Checks every input line and sees if we can remove it based on the contents of

	55 the given include set. Returns what the new contents of the file should be.

	56 '''

	57 output_lines = []

	58 # When a section of includes are completely removed, we want to remove the

	59 # trailing empty as well.

	60 lastCopiedLineWasEmpty = False

	61 lastLineWasOmitted = False

	62 for line in input_lines:

	63 match = INCLUDE_REGEX.search(line)

	64 if match and match.group(2) in include_set:

	65 print 'Removed ' + match.group(1) + ' from ' + cc_file_name

	66 lastLineWasOmitted = True

	67 elif lastCopiedLineWasEmpty and lastLineWasOmitted and IsEmpty(line):

	68 print 'Removed empty line from ' + cc_file_name

	69 lastLineWasOmitted = True

	70 else:

	71 lastCopiedLineWasEmpty = IsEmpty(line)

	72 lastLineWasOmitted = False

	73 output_lines.append(line)

	74 return output_lines

	75

	76 def main():

	77 parser = argparse.ArgumentParser()

	78 parser.add_argument('--dry-run', action='store_true',

	79 help='Does not actually remove lines when specified.')

	80 parser.add_argument('targets', nargs='+',

	81 help='Relative path to folders to search for duplicate includes in.')

	82 args = parser.parse_args()

	83

	84 # A map of header file paths to the includes they contain.

	85 h_path_to_include_set = {}

	86

	87 # Simply collects the path of all cc files present.

	88 cc_file_path_set = set()

	89

	90 for relative_root in args.targets:

	91 absolute_root = os.path.join(os.getcwd(), relative_root)

	92 for dir_path, dir_name_list, file_name_list in os.walk(absolute_root):

	93 for file_name in file_name_list:

	94 file_path = os.path.join(dir_path, file_name)

	95 if HasSuffix(file_name, H_FILE_SUFFIX):

	96 # By manually adding the set instead of using defaultdict we can avoid

	97 # warning about missing .h files when the .h file has no includes.

	98 h_path_to_include_set[file_path] = set()

	99 with open(file_path) as file_handle:

	100 for line in file_handle:

	101 match = INCLUDE_REGEX.search(line)

	102 if match:

	103 h_path_to_include_set[file_path].add(match.group(2))

	104 elif HasSuffix(file_name, CC_FILE_SUFFIX):

	105 cc_file_path_set.add(file_path)

	106

	107 for cc_file_path in cc_file_path_set:

	108 cc_file_name = os.path.basename(cc_file_path)

	109 with open(cc_file_path, 'r' if args.dry_run else 'r+') as file_handle:
	Dirk Pranke 2016/10/04 20:37:04 Nit: I'd just use 'fh' or 'fp' for the handle here Nit: I'd just use 'fh' or 'fp' for the handle here; both are common enough names that they are easily recognized and that makes the code shorter as a result. skym 2016/10/04 23:45:11 Done. Show quoted text On 2016/10/04 20:37:04, Dirk Pranke (slow) wrote: > Nit: I'd just use 'fh' or 'fp' for the handle here; both are common enough names > that they are easily recognized and that makes the code shorter as a result. Done.
	110 # Read out all lines and reset file position to allow overwriting.

	111 input_lines = file_handle.readlines()

	112 file_handle.seek(0)

	113 include_set = FindIncludeSet(input_lines, h_path_to_include_set,

	114 cc_file_name)

	115 if include_set:

	116 output_lines = WithoutDuplicates(input_lines, include_set, cc_file_name)

	117 if not args.dry_run:

	118 file_handle.writelines(output_lines)

	119 file_handle.truncate()

	120

	121 if __name__ == '__main__':

	122 sys.exit(main())

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »