| OLD | NEW |
| (Empty) |
| 1 # Copyright 2014 Google Inc. All Rights Reserved. | |
| 2 # | |
| 3 # Licensed under the Apache License, Version 2.0 (the "License"); | |
| 4 # you may not use this file except in compliance with the License. | |
| 5 # You may obtain a copy of the License at | |
| 6 # | |
| 7 # http://www.apache.org/licenses/LICENSE-2.0 | |
| 8 # | |
| 9 # Unless required by applicable law or agreed to in writing, software | |
| 10 # distributed under the License is distributed on an "AS IS" BASIS, | |
| 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 12 # See the License for the specific language governing permissions and | |
| 13 # limitations under the License. | |
| 14 # | |
| 15 # This file was originally copied from syzygy project available at | |
| 16 # https://github.com/google/syzygy. | |
| 17 """A utility script for checking out subdirectories of many GIT repositories | |
| 18 to specified locations, like is possible with SVN and gclient. This uses a | |
| 19 combination of GIT, sparse-checkout, shallow-clone and filesystem junctions. | |
| 20 | |
| 21 For each dependency in a 'gitdeps' file this script will checkout one | |
| 22 subdirectory of one repository into a specified location. The input is as | |
| 23 follows: | |
| 24 | |
| 25 - The user specifies a local destination for the checkout. | |
| 26 - The user specifies a source repository. | |
| 27 - The user specifies a list of subdirectories of the repository to get. | |
| 28 - The user specifies a revision. | |
| 29 | |
| 30 The checkout works as follows: | |
| 31 | |
| 32 - An empty git checkout is initialized in the cache directory. This will be | |
| 33 in a subfolder with an essentially random name. | |
| 34 - The specified repository is added as a remote to that repo. | |
| 35 - A sparse-checkout directive is added to select only the desired | |
| 36 subdirectories. | |
| 37 - The repository is cloned using a depth of 1 (no history, only the actual | |
| 38 contents of the desired revision). | |
| 39 - The destination directories are created as junctions pointing to the | |
| 40 desired subdirectory of the checkout in the cache directory. | |
| 41 | |
| 42 The script maintains its state in the root of the cache directory, allowing it | |
| 43 to reuse checkout directories when possible. | |
| 44 """ | |
| 45 | |
| 46 import ast | |
| 47 import glob | |
| 48 import hashlib | |
| 49 import logging | |
| 50 import optparse | |
| 51 import os | |
| 52 import random | |
| 53 import re | |
| 54 import subprocess | |
| 55 import threading | |
| 56 | |
| 57 | |
| 58 _LOGGER = logging.getLogger(os.path.basename(__file__)) | |
| 59 | |
| 60 | |
| 61 # Matches a SHA1 hash used as a git revision. | |
| 62 _GIT_SHA1_RE = re.compile('^[A-Fa-f0-9]{40}$') | |
| 63 | |
| 64 | |
| 65 def _ParseCommandLine(): | |
| 66 """Parses the command-line and returns an options structure.""" | |
| 67 option_parser = optparse.OptionParser() | |
| 68 option_parser.add_option('--cache-dir', type='string', | |
| 69 default='.gitdeps-cache', | |
| 70 help='The directory to be used for storing cache files. Defaults to ' | |
| 71 '.gitdeps-cache in the current working directory.') | |
| 72 option_parser.add_option('--output-dir', type='string', default='.', | |
| 73 help='The directory to be used as the root of all output. Defaults to ' | |
| 74 'the current working directory.') | |
| 75 option_parser.add_option('--dry-run', action='store_true', default=False, | |
| 76 help='If true then will simply list actions that would be performed.') | |
| 77 option_parser.add_option('--force', action='store_true', default=False, | |
| 78 help='If true then will force the checkout to be completely rebuilt.') | |
| 79 option_parser.add_option('--verbose', dest='log_level', action='store_const', | |
| 80 default=logging.INFO, const=logging.DEBUG, | |
| 81 help='Enables verbose logging.') | |
| 82 option_parser.add_option('--quiet', dest='log_level', action='store_const', | |
| 83 default=logging.INFO, const=logging.ERROR, | |
| 84 help='Disables all output except for errors.') | |
| 85 | |
| 86 options, args = option_parser.parse_args() | |
| 87 | |
| 88 # Configure logging. | |
| 89 logging.basicConfig(level=options.log_level) | |
| 90 | |
| 91 # Set default values. | |
| 92 if not args: | |
| 93 # Default to checking for a file in the current working directory. | |
| 94 _LOGGER.info('Defaulting to using GITDEPS in current working directory.') | |
| 95 args = ['GITDEPS'] | |
| 96 | |
| 97 # Validate arguments and options. | |
| 98 if not os.path.isdir(options.output_dir): | |
| 99 option_parser.error('Output directory does not exist: %s' % | |
| 100 options.output_dir) | |
| 101 for path in args: | |
| 102 if not os.path.exists(path): | |
| 103 option_parser.error('Missing dependency file: %s' % path) | |
| 104 | |
| 105 # Normalize local paths for prettier output. | |
| 106 options.cache_dir = os.path.normpath(os.path.abspath(options.cache_dir)) | |
| 107 options.output_dir = os.path.normpath(os.path.abspath(options.output_dir)) | |
| 108 | |
| 109 return options, args | |
| 110 | |
| 111 | |
| 112 class RepoOptions(object): | |
| 113 """Light object used for shuttling around information about a dependency.""" | |
| 114 | |
| 115 def __init__(self): | |
| 116 self.repository = None | |
| 117 self.revision = None | |
| 118 self.output_dir = None | |
| 119 self.remote_dirs = [] | |
| 120 self.deps_file = None | |
| 121 self.checkout_dir = None | |
| 122 self.recurse = False | |
| 123 | |
| 124 def __str__(self): | |
| 125 """Stringifies this object for debugging.""" | |
| 126 return ('RepoOptions(repository=%s, revision=%s, output_dir=%s, ' | |
| 127 'remote_dirs=%s, deps_file=%s, checkout_dir=%s, recurse=%s)') % ( | |
| 128 self.repository.__repr__(), | |
| 129 self.revision.__repr__(), | |
| 130 self.output_dir.__repr__(), | |
| 131 self.remote_dirs.__repr__(), | |
| 132 self.deps_file.__repr__(), | |
| 133 self.checkout_dir.__repr__(), | |
| 134 self.recurse.__repr__()) | |
| 135 | |
| 136 | |
| 137 def _ParseRepoOptions(cache_dir, root_output_dir, deps_file_path, key, value): | |
| 138 """Given the |root_output_dir| specified on the command line, a |key| and | |
| 139 |value| pair from a GITDEPS file, and the path of the deps file, generates | |
| 140 a corresponding RepoOptions object. The |key| is the output path of the | |
| 141 checkout relative to |root_output_dir|, and |value| consists of a | |
| 142 (repository URL, remote directory, revision hash) tuple. This can raise an | |
| 143 Exception on failure. | |
| 144 """ | |
| 145 bad = False | |
| 146 if ((type(value) != list and type(value) != tuple) or len(value) < 3 or | |
| 147 len(value) > 4 or (type(value[1]) != list and type(value[1]) != tuple)): | |
| 148 bad = True | |
| 149 if len(value) == 4 and type(value[3]) != dict: | |
| 150 bad = True | |
| 151 if bad: | |
| 152 _LOGGER.error('Invalid dependency tuple: %s', value) | |
| 153 raise Exception() | |
| 154 | |
| 155 # Always use lowercase SHA1 hashes for consistency. | |
| 156 refspec = value[2] | |
| 157 if _GIT_SHA1_RE.match(refspec): | |
| 158 refspec = refspec.lower() | |
| 159 | |
| 160 repo_options = RepoOptions() | |
| 161 repo_options.output_dir = os.path.normpath(os.path.abspath(os.path.join( | |
| 162 root_output_dir, key))) | |
| 163 repo_options.repository = value[0] | |
| 164 repo_options.remote_dirs = value[1] | |
| 165 repo_options.revision = refspec | |
| 166 repo_options.deps_file = deps_file_path | |
| 167 | |
| 168 # Parse additional options. | |
| 169 if len(value) > 3: | |
| 170 repo_options.recurse = value[3].get('recurse', False) == True | |
| 171 | |
| 172 # Create a unique name for the checkout in the cache directory. Make the | |
| 173 # output directory relative to the cache directory so that they can be | |
| 174 # moved around together. | |
| 175 output_dir_rel = os.path.relpath(repo_options.output_dir, | |
| 176 root_output_dir).lower() | |
| 177 if output_dir_rel.startswith('..'): | |
| 178 raise Exception('Invalid output directory: %s' % key) | |
| 179 n = hashlib.md5(output_dir_rel).hexdigest() | |
| 180 repo_options.checkout_dir = os.path.abspath(os.path.join(cache_dir, n, 'src')) | |
| 181 | |
| 182 return repo_options | |
| 183 | |
| 184 | |
| 185 def _EnsureDirectoryExists(path, comment_name, dry_run): | |
| 186 """Ensures that the given |path| exists. Only actually creates the directory | |
| 187 if |dry_run| is False. |comment_name| is used during logging of this | |
| 188 operation. | |
| 189 """ | |
| 190 if not comment_name: | |
| 191 comment_name += ' ' | |
| 192 else: | |
| 193 comment_name = '' | |
| 194 if not os.path.exists(path): | |
| 195 _LOGGER.debug('Creating %sdirectory: %s', comment_name, path) | |
| 196 if not dry_run: | |
| 197 os.makedirs(path) | |
| 198 | |
| 199 | |
| 200 def _GetCasedFilename(filename): | |
| 201 """Returns the full case-sensitive filename for the given |filename|. If the | |
| 202 path does not exist, returns the original |filename| as is. | |
| 203 """ | |
| 204 pattern = '%s[%s]' % (filename[:-1], filename[-1]) | |
| 205 filenames = glob.glob(pattern) | |
| 206 if not filenames: | |
| 207 return filename | |
| 208 return filenames[0] | |
| 209 | |
| 210 | |
| 211 def _Shell(*cmd, **kw): | |
| 212 """Runs |cmd|, returns the results from Popen(cmd).communicate(). Additional | |
| 213 keyword arguments are passed on to subprocess.Popen. If |stdout| and |stderr| | |
| 214 are not specified, they default to subprocess.PIPE. If |dry_run| is not | |
| 215 specified it defaults to True. The command is only actually run if |dry_run| | |
| 216 is False. This can raise a RuntimeError on failure. | |
| 217 """ | |
| 218 if 'cwd' in kw: | |
| 219 _LOGGER.debug('Executing %s in "%s".', cmd, kw['cwd']) | |
| 220 else: | |
| 221 _LOGGER.debug('Executing %s.', cmd) | |
| 222 if kw.get('dry_run', True): | |
| 223 return ('', '') | |
| 224 kw.pop('dry_run', None) | |
| 225 dump_on_error = kw.pop('dump_on_error', False) | |
| 226 | |
| 227 kw['shell'] = True | |
| 228 kw.setdefault('stdout', subprocess.PIPE) | |
| 229 kw.setdefault('stderr', subprocess.PIPE) | |
| 230 prog = subprocess.Popen(cmd, **kw) | |
| 231 | |
| 232 stdout, stderr = prog.communicate() | |
| 233 if prog.returncode != 0: | |
| 234 if dump_on_error: | |
| 235 print stdout | |
| 236 print stderr | |
| 237 raise RuntimeError('Command "%s" returned %d.' % (cmd, prog.returncode)) | |
| 238 return (stdout, stderr) | |
| 239 | |
| 240 | |
| 241 def _IsGitCheckoutRoot(path): | |
| 242 """Return true if the given |path| is the root of a git checkout.""" | |
| 243 return os.path.exists(os.path.join(path, '.git')) | |
| 244 | |
| 245 | |
| 246 # Matches a GIT config file section header, and grabs the name of the section | |
| 247 # in the first group. Used by _GetGitOrigin. | |
| 248 _GIT_CONFIG_SECTION_RE = re.compile(r'^\s*\[(.*?)\]\s*$') | |
| 249 # Matches the URL line from a 'remote' section of a GIT config. Used by | |
| 250 # _GetGitOrigin. | |
| 251 _GIT_CONFIG_REMOTE_URL_RE = re.compile(r'^\s*url\s*=\s*(.*?)\s*$') | |
| 252 | |
| 253 | |
| 254 def _GetGitOrigin(path): | |
| 255 """Returns the URL of the 'origin' remote for the git repo in |path|. Returns | |
| 256 None if the 'origin' remote doesn't exist. Raises an IOError if |path| doesn't | |
| 257 exist or is not a git repo. | |
| 258 """ | |
| 259 section = None | |
| 260 for line in open(os.path.join(path, '.git', 'config'), 'rb'): | |
| 261 m = _GIT_CONFIG_SECTION_RE.match(line) | |
| 262 if m: | |
| 263 section = m.group(1) | |
| 264 continue | |
| 265 | |
| 266 # We only care about the 'origin' configuration. | |
| 267 if section != 'remote "origin"': | |
| 268 continue | |
| 269 | |
| 270 m = _GIT_CONFIG_REMOTE_URL_RE.match(line) | |
| 271 if m: | |
| 272 return m.group(1).strip() | |
| 273 | |
| 274 return None | |
| 275 | |
| 276 | |
| 277 def _GetGitHead(repo): | |
| 278 """Returns the hash of the head of the git repo local checkout. | |
| 279 | |
| 280 Raises: | |
| 281 IOError: if repo's checkout directory doesn't exist or not a git repository. | |
| 282 """ | |
| 283 with open(os.path.join(repo.checkout_dir, '.git', 'HEAD'), 'rb') as head: | |
| 284 return head.read().strip() | |
| 285 | |
| 286 | |
| 287 def _GetGitFetchHead(repo): | |
| 288 """Returns the hash of the latest fetched revision. | |
| 289 | |
| 290 Raises: | |
| 291 IOError: if repo's checkout directory doesn't exist or not a git repository. | |
| 292 KeyError: if the fetched head of the remote repository is not found in the | |
| 293 local checkout. | |
| 294 """ | |
| 295 path = os.path.join(repo.checkout_dir, '.git', 'FETCH_HEAD') | |
| 296 with open(path, 'rb') as heads_file: | |
| 297 for line in heads_file.readlines(): | |
| 298 if not line.strip(): | |
| 299 continue | |
| 300 head, repo_url = line.strip().split() | |
| 301 if repo_url == repo.repository: | |
| 302 return head | |
| 303 raise KeyError('Did not find fetched head for %s in %s' % | |
| 304 (repo.repository, path)) | |
| 305 | |
| 306 | |
| 307 def _NormalizeGitPath(path): | |
| 308 """Given a |path| in a GIT repository (relative to its root), normalizes it so | |
| 309 it will match only that exact path in a sparse checkout. | |
| 310 """ | |
| 311 path = path.strip() | |
| 312 if not path.startswith('/'): | |
| 313 path = '/' + path | |
| 314 if not path.endswith('/'): | |
| 315 path += '/' | |
| 316 return path | |
| 317 | |
| 318 | |
| 319 def _RenameCheckout(path, dry_run): | |
| 320 """Renames the checkout in |path| so that it can be subsequently deleted. | |
| 321 Only actually does the work if |dry_run| is False. Returns the path of the | |
| 322 renamed checkout directory. Raises an Exception on failure. | |
| 323 """ | |
| 324 | |
| 325 def _RenameCheckoutImpl(path, dry_run): | |
| 326 if dry_run: | |
| 327 return path + '-old-dryrun' | |
| 328 attempts = 0 | |
| 329 while attempts < 10: | |
| 330 newpath = '%s-old-%04d' % (path, random.randint(0, 999)) | |
| 331 try: | |
| 332 os.rename(path, newpath) | |
| 333 return newpath | |
| 334 except WindowsError: | |
| 335 attempts += 1 | |
| 336 raise Exception('Unable to rename checkout directory: %s' % path) | |
| 337 | |
| 338 newpath = _RenameCheckoutImpl(path, dry_run) | |
| 339 _LOGGER.debug('Renamed checkout directory: %s', newpath) | |
| 340 return newpath | |
| 341 | |
| 342 | |
| 343 def _DeleteCheckout(path, dry_run): | |
| 344 """Deletes the checkout in |path|. Only actually deletes the checkout if | |
| 345 |dry_run| is False. | |
| 346 """ | |
| 347 _LOGGER.info('Deleting checkout directory: %s', path) | |
| 348 if dry_run: | |
| 349 return | |
| 350 _Shell('rmdir', '/S', '/Q', path, dry_run=False) | |
| 351 | |
| 352 | |
| 353 def _GenerateSparseCheckoutPathAndContents(repo): | |
| 354 """Generates the path to the sparse checkout file, and the desired | |
| 355 contents. Returns a tuple of (path, contents). |repo| is a RepoOptions object. | |
| 356 """ | |
| 357 sparse_file = os.path.join(repo.checkout_dir, '.git', 'info', | |
| 358 'sparse-checkout') | |
| 359 if not repo.remote_dirs: | |
| 360 contents = '*\n' | |
| 361 else: | |
| 362 contents = ''.join(_NormalizeGitPath(dir) + '\n' | |
| 363 for dir in repo.remote_dirs) | |
| 364 return (sparse_file, contents) | |
| 365 | |
| 366 | |
| 367 def _HasValidSparseCheckoutConfig(repo): | |
| 368 """Determines if the GIT repo in |path| has a valid sparse-checkout | |
| 369 configuration as configured by the RepoOptions |repo|. Returns True or False. | |
| 370 """ | |
| 371 (sparse_file, contents) = _GenerateSparseCheckoutPathAndContents(repo) | |
| 372 try: | |
| 373 if open(sparse_file, 'rb').read() == contents: | |
| 374 return True | |
| 375 return False | |
| 376 except IOError: | |
| 377 return False | |
| 378 | |
| 379 | |
| 380 def _CreateCheckout(path, repo, dry_run): | |
| 381 """Creates a checkout in the provided |path|. The |path| must not already | |
| 382 exist. Uses the repository configuration from the provided |repo| RepoOptions | |
| 383 object. Only actually creates the checkout if |dry_run| is false. | |
| 384 """ | |
| 385 # We expect the directory not to exist, as this is a fresh checkout we are | |
| 386 # creating. | |
| 387 if not dry_run: | |
| 388 if os.path.exists(path): | |
| 389 raise Exception('Checkout directory already exists: %s' % path) | |
| 390 | |
| 391 _LOGGER.info('Creating checkout directory: %s', path) | |
| 392 if not dry_run: | |
| 393 os.makedirs(path) | |
| 394 | |
| 395 _LOGGER.debug('Initializing the checkout.') | |
| 396 _Shell('git', 'init', cwd=path, dry_run=dry_run) | |
| 397 _Shell('git', 'remote', 'add', 'origin', repo.repository, cwd=path, | |
| 398 dry_run=dry_run) | |
| 399 _Shell('git', 'config', 'core.sparsecheckout', 'true', cwd=path, | |
| 400 dry_run=dry_run) | |
| 401 if not dry_run: | |
| 402 _LOGGER.debug('Creating sparse checkout configuration file for ' | |
| 403 'directory: %s', repo.remote_dirs) | |
| 404 if not dry_run: | |
| 405 (path, contents) = _GenerateSparseCheckoutPathAndContents(repo) | |
| 406 with open(path, 'wb') as io: | |
| 407 io.write(contents) | |
| 408 | |
| 409 | |
| 410 def _UpdateCheckout(path, repo, dry_run): | |
| 411 """Updates a GIT checkout in |path| by pulling down a specific revision | |
| 412 from it, as configured by RepoOptions |repo|. Only actually runs if | |
| 413 |dry_run| is False. | |
| 414 """ | |
| 415 try: | |
| 416 # If the repo has a revision specified, try a checkout first. If this fails | |
| 417 # then we'll actually need to fetch. | |
| 418 if _GIT_SHA1_RE.match(repo.revision): | |
| 419 _LOGGER.info('Trying to checkout revision %s.', repo.revision) | |
| 420 _Shell('git', 'checkout', repo.revision, cwd=path, | |
| 421 dry_run=dry_run) | |
| 422 return | |
| 423 except RuntimeError: | |
| 424 pass | |
| 425 | |
| 426 # Fetch the revision and then check it out. Let output go to screen rather | |
| 427 # than be buffered. | |
| 428 _LOGGER.info('Fetching revision %s.', repo.revision) | |
| 429 _Shell('git', 'fetch', '--depth=1', 'origin', repo.revision, | |
| 430 cwd=path, dry_run=dry_run, stdout=None, stderr=None) | |
| 431 new_rev = _GetGitFetchHead(repo) if repo.revision == 'HEAD' else repo.revision | |
| 432 _LOGGER.info('Checking out revision %s.', new_rev) | |
| 433 _Shell('git', 'checkout', new_rev, cwd=path, | |
| 434 dry_run=dry_run, stdout=None, stderr=None) | |
| 435 | |
| 436 | |
| 437 # Used by _GetJunctionInfo to extract information about junctions. | |
| 438 _DIR_JUNCTION_RE = re.compile(r'^.*<JUNCTION>\s+(.+)\s+\[(.+)\]$') | |
| 439 | |
| 440 | |
| 441 # TODO(chrisha): This is ugly, and there has to be a better way! | |
| 442 def _GetJunctionInfo(junction): | |
| 443 """Returns the target of a junction, if it exists, None otherwise.""" | |
| 444 dirname = os.path.dirname(junction) | |
| 445 basename = os.path.basename(junction) | |
| 446 try: | |
| 447 stdout, dummy_stderr = _Shell('dir', '/AL', '/N', dirname, dry_run=False) | |
| 448 except RuntimeError: | |
| 449 return | |
| 450 | |
| 451 lines = stdout.splitlines(False) | |
| 452 for line in stdout.splitlines(False): | |
| 453 m = _DIR_JUNCTION_RE.match(line) | |
| 454 if not m: | |
| 455 continue | |
| 456 if m.group(1).lower() == basename.lower(): | |
| 457 return m.group(2) | |
| 458 | |
| 459 return None | |
| 460 | |
| 461 | |
| 462 def _EnsureJunction(cache_dir, target_dir, options, repo): | |
| 463 """Ensures that the appropriate junction exists from the configured output | |
| 464 directory to the specified sub-directory of the GIT checkout. | |
| 465 """ | |
| 466 # Ensure that the target directory was created. | |
| 467 target_cache_dir = _GetCasedFilename(os.path.normpath( | |
| 468 os.path.join(cache_dir, target_dir))) | |
| 469 if not options.dry_run and not os.path.isdir(target_cache_dir): | |
| 470 raise Exception('Checkout does not contain the desired remote folder.') | |
| 471 | |
| 472 # Ensure the parent directory exists before checking if the junction needs to | |
| 473 # be created. | |
| 474 output_dir = os.path.normpath(os.path.join(repo.output_dir, target_dir)) | |
| 475 _EnsureDirectoryExists( | |
| 476 os.path.dirname(output_dir), 'junction', options.dry_run) | |
| 477 | |
| 478 # Determine if the link needs to be created. | |
| 479 create_link = True | |
| 480 if os.path.exists(output_dir): | |
| 481 dest = _GetJunctionInfo(output_dir) | |
| 482 | |
| 483 # If the junction is valid nothing needs to be done. If it points to the | |
| 484 # wrong place or isn't a junction then delete it and let it be remade. | |
| 485 if dest == target_cache_dir: | |
| 486 _LOGGER.debug('Junction is up to date.') | |
| 487 create_link = False | |
| 488 else: | |
| 489 if dest: | |
| 490 _LOGGER.info('Erasing existing junction: %s', output_dir) | |
| 491 else: | |
| 492 _LOGGER.info('Deleting existing directory: %s', output_dir) | |
| 493 _Shell('rmdir', '/S', '/Q', output_dir, dry_run=options.dry_run) | |
| 494 | |
| 495 if create_link: | |
| 496 _LOGGER.info('Creating output junction: %s', output_dir) | |
| 497 _Shell('mklink', '/J', output_dir, target_cache_dir, | |
| 498 dry_run=options.dry_run) | |
| 499 | |
| 500 | |
| 501 def _InstallRepository(options, repo): | |
| 502 """Installs a repository as configured by the options. Assumes that the | |
| 503 specified cache directory already exists. | |
| 504 | |
| 505 Returns True if the checkout was modified, False otherwise. | |
| 506 """ | |
| 507 | |
| 508 _LOGGER.debug('Processing directories "%s" from repository "%s".', | |
| 509 repo.remote_dirs, repo.repository) | |
| 510 | |
| 511 # Ensure the output directory's *parent* exists. | |
| 512 output_dirname = os.path.dirname(repo.output_dir) | |
| 513 output_basename = os.path.basename(repo.output_dir) | |
| 514 _EnsureDirectoryExists(output_dirname, 'output', options.dry_run) | |
| 515 | |
| 516 # Get the properly cased names for the output directories. | |
| 517 output_dirname = _GetCasedFilename(output_dirname) | |
| 518 repo.output_dir = os.path.join(output_dirname, output_basename) | |
| 519 | |
| 520 # These are the 3 basic steps that need to occur. Depending on the state of | |
| 521 # the checkout we may not need to perform all of them. We assume initially | |
| 522 # that everything needs to be done, unless proven otherwise. | |
| 523 create_checkout = True | |
| 524 update_checkout = True | |
| 525 | |
| 526 # If the cache directory exists then lookup the repo and the revision and see | |
| 527 # what needs to be updated. | |
| 528 threads = [] | |
| 529 if os.path.exists(repo.checkout_dir): | |
| 530 keep_cache_dir = False | |
| 531 | |
| 532 # Only run these checks if we're not in 'force' mode. Otherwise, we | |
| 533 # deliberately turf the cache directory and start from scratch. | |
| 534 if not options.force and _IsGitCheckoutRoot(repo.checkout_dir): | |
| 535 # Get the repo origin. | |
| 536 repo_url = _GetGitOrigin(repo.checkout_dir) | |
| 537 if (repo_url == repo.repository and | |
| 538 _HasValidSparseCheckoutConfig(repo)): | |
| 539 _LOGGER.debug('Checkout is for correct repository and subdirectory.') | |
| 540 keep_cache_dir = True | |
| 541 create_checkout = False | |
| 542 | |
| 543 # Get the checked out revision. | |
| 544 revhash = _GetGitHead(repo) | |
| 545 if revhash == repo.revision: | |
| 546 _LOGGER.debug('Checkout is already up to date.') | |
| 547 update_checkout = False | |
| 548 | |
| 549 if not keep_cache_dir: | |
| 550 # The old checkout directory is renamed and erased in a separate thread | |
| 551 # so that the new checkout can start immediately. | |
| 552 _LOGGER.info('Erasing stale checkout directory: %s', repo.checkout_dir) | |
| 553 | |
| 554 # Any existing junctions to this repo must be removed otherwise the | |
| 555 # rename may fail. | |
| 556 for d in repo.remote_dirs: | |
| 557 j = os.path.abspath(os.path.join(repo.output_dir, d)) | |
| 558 _RemoveOrphanedJunction(options, j) | |
| 559 | |
| 560 newpath = _RenameCheckout(repo.checkout_dir, options.dry_run) | |
| 561 thread = threading.Thread(target=_DeleteCheckout, | |
| 562 args=(newpath, options.dry_run)) | |
| 563 threads.append(thread) | |
| 564 thread.start() | |
| 565 | |
| 566 # Create and update the checkout as necessary. | |
| 567 if create_checkout: | |
| 568 _CreateCheckout(repo.checkout_dir, repo, options.dry_run) | |
| 569 else: | |
| 570 _LOGGER.debug('Reusing checkout directory: %s', repo.checkout_dir) | |
| 571 if update_checkout: | |
| 572 _UpdateCheckout(repo.checkout_dir, repo, options.dry_run) | |
| 573 | |
| 574 # Ensure the junctions exists. | |
| 575 if repo.remote_dirs: | |
| 576 for remote_dir in repo.remote_dirs: | |
| 577 _EnsureJunction(repo.checkout_dir, remote_dir, options, repo) | |
| 578 else: | |
| 579 _EnsureJunction(repo.checkout_dir, '', options, repo) | |
| 580 | |
| 581 # Join any worker threads that are ongoing. | |
| 582 for thread in threads: | |
| 583 thread.join() | |
| 584 | |
| 585 # Return True if any modifications were made. | |
| 586 return create_checkout or update_checkout | |
| 587 | |
| 588 | |
| 589 def _WriteIfChanged(path, contents, dry_run): | |
| 590 if os.path.exists(path): | |
| 591 d = open(path, 'rb').read() | |
| 592 if d == contents: | |
| 593 _LOGGER.debug('Contents unchanged, not writing file: %s', path) | |
| 594 return | |
| 595 | |
| 596 _LOGGER.info('Writing file: %s', path) | |
| 597 if not dry_run: | |
| 598 open(path, 'wb').write(contents) | |
| 599 | |
| 600 | |
| 601 def _RecurseRepository(options, repo): | |
| 602 """Recursively follows dependencies in the given repository.""" | |
| 603 # Only run if there's an appropriate DEPS file. | |
| 604 deps = os.path.isfile(os.path.join(repo.checkout_dir, 'DEPS')) | |
| 605 gitdeps = os.path.isfile(os.path.join(repo.checkout_dir, '.DEPS.git')) | |
| 606 if not deps and not gitdeps: | |
| 607 _LOGGER.debug('No deps file found in repository: %s', repo.repository) | |
| 608 return | |
| 609 | |
| 610 # Generate the .gclient solution file. | |
| 611 cache_dir = os.path.dirname(os.path.abspath(repo.checkout_dir)) | |
| 612 gclient_file = os.path.join(cache_dir, '.gclient') | |
| 613 deps_file = 'DEPS' | |
| 614 if gitdeps: | |
| 615 deps_file = '.DEPS.git' | |
| 616 solutions = [ | |
| 617 { | |
| 618 'name': 'src', | |
| 619 'url': repo.repository, | |
| 620 'managed': False, | |
| 621 'custom_deps': [], | |
| 622 'deps_file': deps_file, | |
| 623 'safesync_url': '', | |
| 624 } | |
| 625 ] | |
| 626 solutions = 'solutions=%s' % solutions.__repr__() | |
| 627 _WriteIfChanged(gclient_file, solutions, options.dry_run) | |
| 628 | |
| 629 # Invoke 'gclient' on the sub-repository. | |
| 630 _Shell('gclient', 'sync', cwd=repo.checkout_dir, dry_run=options.dry_run) | |
| 631 | |
| 632 | |
| 633 def _FindGlobalVariableInAstTree(tree, name, functions=None): | |
| 634 """Finds and evaluates to global assignment of the variables |name| in the | |
| 635 AST |tree|. Will allow the evaluations of some functions as defined in | |
| 636 |functions|. | |
| 637 """ | |
| 638 if functions is None: | |
| 639 functions = {} | |
| 640 | |
| 641 class FunctionEvaluator(ast.NodeTransformer): | |
| 642 """A tree transformer that evaluates permitted functions.""" | |
| 643 | |
| 644 def visit_BinOp(self, binop_node): | |
| 645 """Is called for BinOp nodes. We only support string additions.""" | |
| 646 if type(binop_node.op) != ast.Add: | |
| 647 return binop_node | |
| 648 left = ast.literal_eval(self.visit(binop_node.left)) | |
| 649 right = ast.literal_eval(self.visit(binop_node.right)) | |
| 650 value = left + right | |
| 651 new_node = ast.Str(s=value) | |
| 652 new_node = ast.copy_location(new_node, binop_node) | |
| 653 return new_node | |
| 654 | |
| 655 def visit_Call(self, call_node): | |
| 656 """Evaluates function calls that return a single string as output.""" | |
| 657 func_name = call_node.func.id | |
| 658 if func_name not in functions: | |
| 659 return call_node | |
| 660 func = functions[func_name] | |
| 661 | |
| 662 # Evaluate the arguments. We don't care about starargs, keywords or | |
| 663 # kwargs. | |
| 664 args = [ast.literal_eval(self.visit(arg)) for arg in | |
| 665 call_node.args] | |
| 666 | |
| 667 # Now evaluate the function. | |
| 668 value = func(*args) | |
| 669 new_node = ast.Str(s=value) | |
| 670 new_node = ast.copy_location(new_node, call_node) | |
| 671 return new_node | |
| 672 | |
| 673 # Look for assignment nodes. | |
| 674 for node in tree.body: | |
| 675 if type(node) != ast.Assign: | |
| 676 continue | |
| 677 # Look for assignment in the 'store' context, to a variable with | |
| 678 # the given name. | |
| 679 for target in node.targets: | |
| 680 if type(target) != ast.Name: | |
| 681 continue | |
| 682 if type(target.ctx) != ast.Store: | |
| 683 continue | |
| 684 if target.id == name: | |
| 685 value = FunctionEvaluator().visit(node.value) | |
| 686 value = ast.fix_missing_locations(value) | |
| 687 value = ast.literal_eval(value) | |
| 688 return value | |
| 689 | |
| 690 | |
| 691 def _ParseDepsFile(path): | |
| 692 """Parsed a DEPS-like file at the given |path|.""" | |
| 693 # Utility function for performing variable expansions. | |
| 694 vars_dict = {} | |
| 695 def _Var(s): | |
| 696 return vars_dict[s] | |
| 697 | |
| 698 contents = open(path, 'rb').read() | |
| 699 tree = ast.parse(contents, path) | |
| 700 vars_dict = _FindGlobalVariableInAstTree(tree, 'vars') | |
| 701 deps_dict = _FindGlobalVariableInAstTree( | |
| 702 tree, 'deps', functions={'Var': _Var}) | |
| 703 return deps_dict | |
| 704 | |
| 705 | |
| 706 def _RemoveFile(options, path): | |
| 707 """Removes the provided file. If it doesn't exist, raises an Exception.""" | |
| 708 _LOGGER.debug('Removing file: %s', path) | |
| 709 if not os.path.isfile(path): | |
| 710 raise Exception('Path does not exist: %s' % path) | |
| 711 | |
| 712 if not options.dry_run: | |
| 713 os.remove(path) | |
| 714 | |
| 715 | |
| 716 def _RemoveOrphanedJunction(options, junction): | |
| 717 """Removes an orphaned junction at the path |junction|. If the path doesn't | |
| 718 exist or is not a junction, raises an Exception. | |
| 719 """ | |
| 720 _LOGGER.debug('Removing orphaned junction: %s', junction) | |
| 721 absdir = os.path.join(options.output_dir, junction) | |
| 722 if not os.path.exists(absdir): | |
| 723 _LOGGER.debug('Junction path does not exist, ignoring.') | |
| 724 return | |
| 725 if not _GetJunctionInfo(absdir): | |
| 726 _LOGGER.error('Path is not a junction: %s', absdir) | |
| 727 raise Exception() | |
| 728 _Shell('rmdir', '/S', '/Q', absdir, dry_run=options.dry_run) | |
| 729 | |
| 730 reldir = os.path.dirname(junction) | |
| 731 while reldir: | |
| 732 absdir = os.path.join(options.output_dir, reldir) | |
| 733 if os.listdir(absdir): | |
| 734 return | |
| 735 _LOGGER.debug('Removing empty parent directory of junction: %s', absdir) | |
| 736 _Shell('rmdir', '/S', '/Q', absdir, dry_run=options.dry_run) | |
| 737 reldir = os.path.dirname(reldir) | |
| 738 | |
| 739 | |
| 740 def _GetCacheDirEntryVersion(path): | |
| 741 """Returns the version of the cache directory entry, -1 if invalid.""" | |
| 742 | |
| 743 git = os.path.join(path, '.git') | |
| 744 src = os.path.join(path, 'src') | |
| 745 gclient = os.path.join(path, '.gclient') | |
| 746 | |
| 747 # Version 0 contains a '.git' directory and no '.gclient' entry. | |
| 748 if os.path.isdir(git): | |
| 749 if os.path.exists(gclient): | |
| 750 return -1 | |
| 751 return 0 | |
| 752 | |
| 753 # Version 1 contains a 'src' directory and no '.git' entry. | |
| 754 if os.path.isdir(src): | |
| 755 if os.path.exists(git): | |
| 756 return -1 | |
| 757 return 1 | |
| 758 | |
| 759 | |
| 760 def _GetCacheDirEntries(cache_dir): | |
| 761 """Returns the list of entries in the given |cache_dir|.""" | |
| 762 entries = [] | |
| 763 for path in os.listdir(cache_dir): | |
| 764 if not re.match('^[a-z0-9]{32}$', path): | |
| 765 continue | |
| 766 entries.append(path) | |
| 767 return entries | |
| 768 | |
| 769 | |
| 770 def _GetCacheDirVersion(cache_dir): | |
| 771 """Returns the version of the cache directory.""" | |
| 772 # If it doesn't exist then it's clearly the latest version. | |
| 773 if not os.path.exists(cache_dir): | |
| 774 return 1 | |
| 775 | |
| 776 cache_version = None | |
| 777 for path in _GetCacheDirEntries(cache_dir): | |
| 778 repo = os.path.join(cache_dir, path) | |
| 779 if not os.path.isdir(repo): | |
| 780 return -1 | |
| 781 | |
| 782 entry_version = _GetCacheDirEntryVersion(repo) | |
| 783 if entry_version == -1: | |
| 784 return -1 | |
| 785 | |
| 786 if cache_version == None: | |
| 787 cache_version = entry_version | |
| 788 else: | |
| 789 if cache_version != entry_version: | |
| 790 return -1 | |
| 791 | |
| 792 # If there are no entries in the cache it may as well be the latest version. | |
| 793 if cache_version is None: | |
| 794 return 1 | |
| 795 | |
| 796 return cache_version | |
| 797 | |
| 798 | |
| 799 def _GetJunctionStatePath(options): | |
| 800 """Returns the junction state file path.""" | |
| 801 return os.path.join(options.cache_dir, '.gitdeps_junctions') | |
| 802 | |
| 803 | |
| 804 def _ReadJunctions(options): | |
| 805 """Reads the list of junctions as a dictionary.""" | |
| 806 state_path = _GetJunctionStatePath(options) | |
| 807 old_junctions = {} | |
| 808 if os.path.exists(state_path): | |
| 809 _LOGGER.debug('Loading list of existing junctions.') | |
| 810 for j in open(state_path, 'rb'): | |
| 811 old_junctions[j.strip()] = True | |
| 812 | |
| 813 return old_junctions | |
| 814 | |
| 815 | |
| 816 def _Rename(src, dst, dry_run): | |
| 817 _LOGGER.debug('Renaming "%s" to "%s".', src, dst) | |
| 818 if not dry_run: | |
| 819 os.rename(src, dst) | |
| 820 | |
| 821 | |
| 822 def _UpgradeCacheDir(options): | |
| 823 """Upgrades the cache directory format to the most modern layout. | |
| 824 | |
| 825 Returns true on success, false otherwise. | |
| 826 """ | |
| 827 cache_version = _GetCacheDirVersion(options.cache_dir) | |
| 828 if cache_version == 1: | |
| 829 _LOGGER.debug('No cache directory upgrade required.') | |
| 830 return | |
| 831 | |
| 832 _LOGGER.debug('Upgrading cache directory from version 0 to 1.') | |
| 833 | |
| 834 _LOGGER.debug('Removing all junctions.') | |
| 835 junctions = _ReadJunctions(options).keys() | |
| 836 junctions = sorted(junctions, key=lambda j: len(j), reverse=True) | |
| 837 for junction in junctions: | |
| 838 _RemoveOrphanedJunction(options, junction) | |
| 839 _RemoveFile(options, _GetJunctionStatePath(options)) | |
| 840 | |
| 841 for entry in _GetCacheDirEntries(options.cache_dir): | |
| 842 _LOGGER.debug('Upgrading cache entry "%s".', entry) | |
| 843 tmp_entry = os.path.abspath(os.path.join( | |
| 844 options.cache_dir, | |
| 845 'TMP%d-%04d' % (os.getpid(), random.randint(0, 999)))) | |
| 846 abs_entry = os.path.abspath(os.path.join(options.cache_dir, entry)) | |
| 847 src = os.path.join(abs_entry, 'src') | |
| 848 _Rename(abs_entry, tmp_entry, options.dry_run) | |
| 849 _EnsureDirectoryExists(abs_entry, 'cache entry', options.dry_run) | |
| 850 _Rename(tmp_entry, src, options.dry_run) | |
| 851 | |
| 852 if options.dry_run: | |
| 853 _LOGGER.debug('Cache needs upgrading, unable to further simulate dry-run.') | |
| 854 raise Exception("") | |
| 855 | |
| 856 | |
| 857 def main(): | |
| 858 options, args = _ParseCommandLine() | |
| 859 | |
| 860 # Upgrade the cache directory if necessary. | |
| 861 _UpgradeCacheDir(options) | |
| 862 | |
| 863 # Ensure the cache directory exists and get the full properly cased path to | |
| 864 # it. | |
| 865 _EnsureDirectoryExists(options.cache_dir, 'cache', options.dry_run) | |
| 866 options.cache_dir = _GetCasedFilename(options.cache_dir) | |
| 867 | |
| 868 # Read junctions that have been written in previous runs. | |
| 869 state_path = _GetJunctionStatePath(options) | |
| 870 old_junctions = _ReadJunctions(options) | |
| 871 | |
| 872 # Parse each deps file in order, and extract the dependencies, looking for | |
| 873 # conflicts in the output directories. | |
| 874 output_dirs = {} | |
| 875 all_deps = [] | |
| 876 for deps_file in args: | |
| 877 deps = _ParseDepsFile(deps_file) | |
| 878 for key, value in deps.iteritems(): | |
| 879 repo_options = _ParseRepoOptions( | |
| 880 options.cache_dir, options.output_dir, deps_file, key, value) | |
| 881 if repo_options.output_dir in output_dirs: | |
| 882 other_repo_options = output_dirs[repo_options.output_dir] | |
| 883 _LOGGER.error('Conflicting output directory: %s', | |
| 884 repo_options.output_dir) | |
| 885 _LOGGER.error('First specified in file: %s', | |
| 886 other_repo_options.deps_file) | |
| 887 _LOGGER.error('And then specified in file: %s', repo_options.deps_file) | |
| 888 output_dirs[repo_options.output_dir] = repo_options | |
| 889 all_deps.append(repo_options) | |
| 890 output_dirs = {} | |
| 891 | |
| 892 # Handle each dependency, in order of shortest path names first. This ensures | |
| 893 # that nested dependencies are handled properly. | |
| 894 checkout_dirs = {} | |
| 895 deps = sorted(all_deps, key=lambda x: len(x.deps_file)) | |
| 896 junctions = [] | |
| 897 for repo in all_deps: | |
| 898 changes_made = _InstallRepository(options, repo) | |
| 899 checkout_dirs[repo.checkout_dir] = changes_made | |
| 900 | |
| 901 new_junction_dirs = repo.remote_dirs if repo.remote_dirs else [''] | |
| 902 for new_junction_dir in new_junction_dirs: | |
| 903 junction = os.path.relpath( | |
| 904 os.path.join(repo.output_dir, new_junction_dir), | |
| 905 options.output_dir) | |
| 906 old_junctions.pop(junction, None) | |
| 907 # Write each junction as we create it. This allows for recovery from | |
| 908 # partial runs. | |
| 909 if not options.dry_run: | |
| 910 open(state_path, 'ab').write(junction + '\n') | |
| 911 junctions.append(junction) | |
| 912 | |
| 913 # Clean up orphaned junctions if there are any. | |
| 914 if old_junctions: | |
| 915 _LOGGER.debug('Removing orphaned junctions.') | |
| 916 for j in old_junctions.iterkeys(): | |
| 917 _RemoveOrphanedJunction(options, j) | |
| 918 | |
| 919 # Output the final list of junctions. | |
| 920 _LOGGER.debug('Writing final list of junctions.') | |
| 921 if not options.dry_run: | |
| 922 with open(state_path, 'wb') as io: | |
| 923 for j in sorted(junctions): | |
| 924 io.write(j) | |
| 925 io.write('\n') | |
| 926 | |
| 927 # Iterate all directories in the cache directory. Any that we didn't | |
| 928 # specifically create or update should be cleaned up. Do this in parallel | |
| 929 # so things are cleaned up as soon as possible. | |
| 930 threads = [] | |
| 931 for path in glob.glob(os.path.join(options.cache_dir, '*')): | |
| 932 if os.path.join(path, 'src') not in checkout_dirs: | |
| 933 _LOGGER.debug('Erasing orphaned checkout directory: %s', path) | |
| 934 thread = threading.Thread(target=_DeleteCheckout, | |
| 935 args=(path, options.dry_run)) | |
| 936 threads.append(thread) | |
| 937 thread.start() | |
| 938 for thread in threads: | |
| 939 thread.join() | |
| 940 | |
| 941 # Recursively process other dependencies. | |
| 942 for repo in all_deps: | |
| 943 if not repo.recurse: | |
| 944 continue | |
| 945 if not checkout_dirs[repo.checkout_dir] and not options.force: | |
| 946 continue | |
| 947 _RecurseRepository(options, repo) | |
| 948 | |
| 949 return | |
| 950 | |
| 951 | |
| 952 if __name__ == '__main__': | |
| 953 main() | |
| OLD | NEW |