OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 import argparse |
| 7 import collections |
| 8 import difflib |
| 9 import os.path |
| 10 import re |
| 11 import sys |
| 12 |
| 13 _HEADER_TYPE_C_SYSTEM = 0 |
| 14 _HEADER_TYPE_CXX_SYSTEM = 1 |
| 15 _HEADER_TYPE_USER = 2 |
| 16 _HEADER_TYPE_INVALID = -1 |
| 17 |
| 18 |
| 19 def ClassifyHeader(decorated_name): |
| 20 if IsCSystemHeader(decorated_name): |
| 21 return _HEADER_TYPE_C_SYSTEM |
| 22 elif IsCXXSystemHeader(decorated_name): |
| 23 return _HEADER_TYPE_CXX_SYSTEM |
| 24 elif IsUserHeader(decorated_name): |
| 25 return _HEADER_TYPE_USER |
| 26 else: |
| 27 return _HEADER_TYPE_INVALID |
| 28 |
| 29 |
| 30 def UndecoratedName(decorated_name): |
| 31 return decorated_name[1:-1] |
| 32 |
| 33 |
| 34 def IsSystemHeader(decorated_name): |
| 35 return decorated_name[0] == '<' and decorated_name[-1] == '>' |
| 36 |
| 37 |
| 38 def IsCSystemHeader(decorated_name): |
| 39 return IsSystemHeader(decorated_name) and UndecoratedName( |
| 40 decorated_name).endswith('.h') |
| 41 |
| 42 |
| 43 def IsCXXSystemHeader(decorated_name): |
| 44 return IsSystemHeader(decorated_name) and not UndecoratedName( |
| 45 decorated_name).endswith('.h') |
| 46 |
| 47 |
| 48 def IsUserHeader(decorated_name): |
| 49 return decorated_name[0] == '"' and decorated_name[-1] == '"' |
| 50 |
| 51 |
| 52 _EMPTY_LINE_RE = re.compile(r'\s*$') |
| 53 _COMMENT_RE = re.compile(r'\s*//(.*)$') |
| 54 _INCLUDE_RE = re.compile( |
| 55 r'\s*#(import|include)\s+([<"].+?[">])\s*?(?://(.*))?$') |
| 56 |
| 57 |
| 58 def FindIncludes(lines): |
| 59 """Finds the block of #includes, assuming Google+Chrome C++ style source. |
| 60 |
| 61 Returns: |
| 62 begin, end: The begin and end indices of the #include block, respectively. |
| 63 If no #include block is found, the returned indices will be negative. |
| 64 """ |
| 65 begin = end = -1 |
| 66 for idx, line in enumerate(lines): |
| 67 # TODO(dcheng): #define and #undef should probably also be allowed. |
| 68 if _EMPTY_LINE_RE.match(line) or _COMMENT_RE.match(line): |
| 69 continue |
| 70 m = _INCLUDE_RE.match(line) |
| 71 if not m: |
| 72 if begin < 0: |
| 73 # No match, but no #includes have been seen yet. Keep scanning for the |
| 74 # first #include. |
| 75 continue |
| 76 break |
| 77 |
| 78 if begin < 0: |
| 79 begin = idx |
| 80 end = idx + 1 |
| 81 return begin, end |
| 82 |
| 83 |
| 84 class Include(object): |
| 85 """Represents an #include and any interesting things associated with it.""" |
| 86 |
| 87 def __init__(self, decorated_name, directive, preamble, inline_comment): |
| 88 self.decorated_name = decorated_name |
| 89 self.directive = directive |
| 90 self.preamble = preamble |
| 91 self.inline_comment = inline_comment |
| 92 self.header_type = ClassifyHeader(decorated_name) |
| 93 assert self.header_type != _HEADER_TYPE_INVALID |
| 94 self.is_primary_header = False |
| 95 |
| 96 def __repr__(self): |
| 97 return str((self.decorated_name, self.directive, self.preamble, |
| 98 self.inline_comment, self.header_type, self.is_primary_header)) |
| 99 |
| 100 def ShouldInsertNewline(self, previous_include): |
| 101 return (self.is_primary_header != previous_include.is_primary_header or |
| 102 self.header_type != previous_include.header_type) |
| 103 |
| 104 def ToSource(self): |
| 105 source = [] |
| 106 source.extend(self.preamble) |
| 107 include_line = '#%s %s' % (self.directive, self.decorated_name) |
| 108 if self.inline_comment: |
| 109 include_line = include_line + ' //' + self.inline_comment |
| 110 source.append(include_line) |
| 111 return [line.rstrip() for line in source] |
| 112 |
| 113 |
| 114 def ParseIncludes(lines): |
| 115 """Parses lines into a list of Include objects. Returns None on failure. |
| 116 |
| 117 Args: |
| 118 lines: A list of strings representing C++ source code. |
| 119 """ |
| 120 includes = [] |
| 121 preamble = [] |
| 122 for line in lines: |
| 123 if _EMPTY_LINE_RE.match(line): |
| 124 if preamble: |
| 125 # preamble contents are flushed when an #include directive is matched. |
| 126 # If preamble is non-empty, that means there is a preamble separated |
| 127 # from its #include directive by at least one newline. Just give up, |
| 128 # since the sorter has no idea how to preserve structure in this case. |
| 129 return |
| 130 continue |
| 131 m = _INCLUDE_RE.match(line) |
| 132 if not m: |
| 133 preamble.append(line) |
| 134 continue |
| 135 includes.append(Include(m.group(2), m.group(1), preamble, m.group(3))) |
| 136 preamble = [] |
| 137 if preamble: |
| 138 return |
| 139 return includes |
| 140 |
| 141 |
| 142 def _DecomposePath(filename): |
| 143 """Decomposes a filename into a list of directories and the basename.""" |
| 144 dirs = [] |
| 145 dirname, basename = os.path.split(filename) |
| 146 while dirname: |
| 147 dirname, last = os.path.split(dirname) |
| 148 dirs.append(last) |
| 149 dirs.reverse() |
| 150 # Remove the extension from the basename. |
| 151 basename = os.path.splitext(basename)[0] |
| 152 return dirs, basename |
| 153 |
| 154 |
| 155 def MarkPrimaryInclude(includes, filename): |
| 156 """Finds the primary header in includes and marks it as such. |
| 157 |
| 158 Per the style guide, if moo.cc's main purpose is to implement or test the |
| 159 functionality in moo.h, moo.h should be ordered first in the includes. |
| 160 |
| 161 Args: |
| 162 includes: A list of Include objects. |
| 163 filename: The filename to use as the basis for finding the primary header. |
| 164 """ |
| 165 # Header files never have a primary include. |
| 166 if filename.endswith('.h'): |
| 167 return |
| 168 |
| 169 basis = _DecomposePath(filename) |
| 170 PLATFORM_SUFFIX = \ |
| 171 r'(?:_(?:android|aura|chromeos|ios|linux|mac|ozone|posix|win|x11))?' |
| 172 TEST_SUFFIX = \ |
| 173 r'(?:_(?:browser|interactive_ui|ui|unit)?test)?' |
| 174 |
| 175 # The list of includes is searched in reverse order of length. Even though |
| 176 # matching is fuzzy, moo_posix.h should take precedence over moo.h when |
| 177 # considering moo_posix.cc. |
| 178 includes.sort(key=lambda i: -len(i.decorated_name)) |
| 179 for include in includes: |
| 180 if include.header_type != _HEADER_TYPE_USER: |
| 181 continue |
| 182 to_test = _DecomposePath(UndecoratedName(include.decorated_name)) |
| 183 |
| 184 # If the basename to test is longer than the basis, just skip it and |
| 185 # continue. moo.c should never match against moo_posix.h. |
| 186 if len(to_test[1]) > len(basis[1]): |
| 187 continue |
| 188 |
| 189 # The basename in the two paths being compared need to fuzzily match. |
| 190 # This allows for situations where moo_posix.cc implements the interfaces |
| 191 # defined in moo.h. |
| 192 escaped_basename = re.escape(to_test[1]) |
| 193 if not (re.match(escaped_basename + PLATFORM_SUFFIX + TEST_SUFFIX + '$', |
| 194 basis[1]) or |
| 195 re.match(escaped_basename + TEST_SUFFIX + PLATFORM_SUFFIX + '$', |
| 196 basis[1])): |
| 197 continue |
| 198 |
| 199 # The topmost directory name must match, and the rest of the directory path |
| 200 # should be 'substantially similar'. |
| 201 s = difflib.SequenceMatcher(None, to_test[0], basis[0]) |
| 202 first_matched = False |
| 203 total_matched = 0 |
| 204 for match in s.get_matching_blocks(): |
| 205 if total_matched == 0 and match.a == 0 and match.b == 0: |
| 206 first_matched = True |
| 207 total_matched += match.size |
| 208 |
| 209 if not first_matched: |
| 210 continue |
| 211 |
| 212 # 'Substantially similar' is defined to be: |
| 213 # - no more than two differences |
| 214 # - at least one match besides the topmost directory |
| 215 total_differences = abs(total_matched - len(to_test[0])) + abs( |
| 216 total_matched - len(basis[0])) |
| 217 # Note: total_differences != 0 is mainly intended to allow more succint |
| 218 # tests (otherwise tests with just a basename would always trip the |
| 219 # total_matched < 2 check). |
| 220 if total_differences != 0 and (total_differences > 2 or total_matched < 2): |
| 221 continue |
| 222 |
| 223 include.is_primary_header = True |
| 224 return |
| 225 |
| 226 |
| 227 def SerializeIncludes(includes): |
| 228 """Turns includes back into the corresponding C++ source code. |
| 229 |
| 230 This function assumes that the list of input Include objects is already sorted |
| 231 according to Google style. |
| 232 |
| 233 Args: |
| 234 includes: a list of Include objects. |
| 235 |
| 236 Returns: |
| 237 A list of strings representing C++ source code. |
| 238 """ |
| 239 source = [] |
| 240 |
| 241 # Assume there's always at least one include. |
| 242 previous_include = None |
| 243 for include in includes: |
| 244 if previous_include and include.ShouldInsertNewline(previous_include): |
| 245 source.append('') |
| 246 source.extend(include.ToSource()) |
| 247 previous_include = include |
| 248 return source |
| 249 |
| 250 |
| 251 def InsertHeaderIntoSource(filename, source, decorated_name): |
| 252 """Inserts the specified header into some source text, if needed. |
| 253 |
| 254 Args: |
| 255 filename: The name of the source file. |
| 256 source: A string containing the contents of the source file. |
| 257 decorated_name: The decorated name of the header to insert. |
| 258 |
| 259 Returns: |
| 260 None on failure or the modified source text on success. |
| 261 """ |
| 262 lines = source.splitlines() |
| 263 begin, end = FindIncludes(lines) |
| 264 |
| 265 # No #includes in this file. Just give up. |
| 266 # TODO(dcheng): Be more clever and insert it after the file-level comment or |
| 267 # include guard as appropriate. |
| 268 if begin < 0: |
| 269 return |
| 270 |
| 271 includes = ParseIncludes(lines[begin:end]) |
| 272 if not includes: |
| 273 return |
| 274 if decorated_name in [i.decorated_name for i in includes]: |
| 275 # Nothing to do. |
| 276 return source |
| 277 MarkPrimaryInclude(includes, filename) |
| 278 includes.append(Include(decorated_name, 'include', [], None)) |
| 279 |
| 280 def SortKey(include): |
| 281 return (not include.is_primary_header, include.header_type, |
| 282 include.decorated_name) |
| 283 |
| 284 includes.sort(key=SortKey) |
| 285 lines[begin:end] = SerializeIncludes(includes) |
| 286 lines.append('') # To avoid eating the newline at the end of the file. |
| 287 return '\n'.join(lines) |
| 288 |
| 289 |
| 290 def main(): |
| 291 parser = argparse.ArgumentParser( |
| 292 description='Mass insert a new header into a bunch of files.') |
| 293 parser.add_argument( |
| 294 '--header', |
| 295 help='The decorated filename of the header to insert (e.g. "a" or <a>)', |
| 296 required=True) |
| 297 parser.add_argument('files', nargs='+') |
| 298 args = parser.parse_args() |
| 299 if ClassifyHeader(args.header) == _HEADER_TYPE_INVALID: |
| 300 print '--header argument must be a decorated filename, e.g.' |
| 301 print ' --header "<utility>"' |
| 302 print 'or' |
| 303 print ' --header \'"moo.h"\'' |
| 304 return 1 |
| 305 print 'Inserting #include %s...' % args.header |
| 306 for filename in args.files: |
| 307 with file(filename, 'r') as f: |
| 308 new_source = InsertHeaderIntoSource( |
| 309 os.path.normpath(filename), f.read(), args.header) |
| 310 if not new_source: |
| 311 print 'Failed to process file: %s' % filename |
| 312 continue |
| 313 with file(filename, 'w') as f: |
| 314 f.write(new_source) |
| 315 |
| 316 |
| 317 if __name__ == '__main__': |
| 318 sys.exit(main()) |
OLD | NEW |