OLD | NEW |
| (Empty) |
1 # Copyright 2014 Google Inc. All Rights Reserved. | |
2 # | |
3 # Licensed under the Apache License, Version 2.0 (the "License"); | |
4 # you may not use this file except in compliance with the License. | |
5 # You may obtain a copy of the License at | |
6 # | |
7 # http://www.apache.org/licenses/LICENSE-2.0 | |
8 # | |
9 # Unless required by applicable law or agreed to in writing, software | |
10 # distributed under the License is distributed on an "AS IS" BASIS, | |
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 # See the License for the specific language governing permissions and | |
13 # limitations under the License. | |
14 # | |
15 # This file was originally copied from syzygy project available at | |
16 # https://github.com/google/syzygy. | |
17 """A utility script for checking out subdirectories of many GIT repositories | |
18 to specified locations, like is possible with SVN and gclient. This uses a | |
19 combination of GIT, sparse-checkout, shallow-clone and filesystem junctions. | |
20 | |
21 For each dependency in a 'gitdeps' file this script will checkout one | |
22 subdirectory of one repository into a specified location. The input is as | |
23 follows: | |
24 | |
25 - The user specifies a local destination for the checkout. | |
26 - The user specifies a source repository. | |
27 - The user specifies a list of subdirectories of the repository to get. | |
28 - The user specifies a revision. | |
29 | |
30 The checkout works as follows: | |
31 | |
32 - An empty git checkout is initialized in the cache directory. This will be | |
33 in a subfolder with an essentially random name. | |
34 - The specified repository is added as a remote to that repo. | |
35 - A sparse-checkout directive is added to select only the desired | |
36 subdirectories. | |
37 - The repository is cloned using a depth of 1 (no history, only the actual | |
38 contents of the desired revision). | |
39 - The destination directories are created as junctions pointing to the | |
40 desired subdirectory of the checkout in the cache directory. | |
41 | |
42 The script maintains its state in the root of the cache directory, allowing it | |
43 to reuse checkout directories when possible. | |
44 """ | |
45 | |
46 import ast | |
47 import glob | |
48 import hashlib | |
49 import logging | |
50 import optparse | |
51 import os | |
52 import random | |
53 import re | |
54 import subprocess | |
55 import threading | |
56 | |
57 | |
58 _LOGGER = logging.getLogger(os.path.basename(__file__)) | |
59 | |
60 | |
61 # Matches a SHA1 hash used as a git revision. | |
62 _GIT_SHA1_RE = re.compile('^[A-Fa-f0-9]{40}$') | |
63 | |
64 | |
65 def _ParseCommandLine(): | |
66 """Parses the command-line and returns an options structure.""" | |
67 option_parser = optparse.OptionParser() | |
68 option_parser.add_option('--cache-dir', type='string', | |
69 default='.gitdeps-cache', | |
70 help='The directory to be used for storing cache files. Defaults to ' | |
71 '.gitdeps-cache in the current working directory.') | |
72 option_parser.add_option('--output-dir', type='string', default='.', | |
73 help='The directory to be used as the root of all output. Defaults to ' | |
74 'the current working directory.') | |
75 option_parser.add_option('--dry-run', action='store_true', default=False, | |
76 help='If true then will simply list actions that would be performed.') | |
77 option_parser.add_option('--force', action='store_true', default=False, | |
78 help='If true then will force the checkout to be completely rebuilt.') | |
79 option_parser.add_option('--verbose', dest='log_level', action='store_const', | |
80 default=logging.INFO, const=logging.DEBUG, | |
81 help='Enables verbose logging.') | |
82 option_parser.add_option('--quiet', dest='log_level', action='store_const', | |
83 default=logging.INFO, const=logging.ERROR, | |
84 help='Disables all output except for errors.') | |
85 | |
86 options, args = option_parser.parse_args() | |
87 | |
88 # Configure logging. | |
89 logging.basicConfig(level=options.log_level) | |
90 | |
91 # Set default values. | |
92 if not args: | |
93 # Default to checking for a file in the current working directory. | |
94 _LOGGER.info('Defaulting to using GITDEPS in current working directory.') | |
95 args = ['GITDEPS'] | |
96 | |
97 # Validate arguments and options. | |
98 if not os.path.isdir(options.output_dir): | |
99 option_parser.error('Output directory does not exist: %s' % | |
100 options.output_dir) | |
101 for path in args: | |
102 if not os.path.exists(path): | |
103 option_parser.error('Missing dependency file: %s' % path) | |
104 | |
105 # Normalize local paths for prettier output. | |
106 options.cache_dir = os.path.normpath(os.path.abspath(options.cache_dir)) | |
107 options.output_dir = os.path.normpath(os.path.abspath(options.output_dir)) | |
108 | |
109 return options, args | |
110 | |
111 | |
112 class RepoOptions(object): | |
113 """Light object used for shuttling around information about a dependency.""" | |
114 | |
115 def __init__(self): | |
116 self.repository = None | |
117 self.revision = None | |
118 self.output_dir = None | |
119 self.remote_dirs = [] | |
120 self.deps_file = None | |
121 self.checkout_dir = None | |
122 self.recurse = False | |
123 | |
124 def __str__(self): | |
125 """Stringifies this object for debugging.""" | |
126 return ('RepoOptions(repository=%s, revision=%s, output_dir=%s, ' | |
127 'remote_dirs=%s, deps_file=%s, checkout_dir=%s, recurse=%s)') % ( | |
128 self.repository.__repr__(), | |
129 self.revision.__repr__(), | |
130 self.output_dir.__repr__(), | |
131 self.remote_dirs.__repr__(), | |
132 self.deps_file.__repr__(), | |
133 self.checkout_dir.__repr__(), | |
134 self.recurse.__repr__()) | |
135 | |
136 | |
137 def _ParseRepoOptions(cache_dir, root_output_dir, deps_file_path, key, value): | |
138 """Given the |root_output_dir| specified on the command line, a |key| and | |
139 |value| pair from a GITDEPS file, and the path of the deps file, generates | |
140 a corresponding RepoOptions object. The |key| is the output path of the | |
141 checkout relative to |root_output_dir|, and |value| consists of a | |
142 (repository URL, remote directory, revision hash) tuple. This can raise an | |
143 Exception on failure. | |
144 """ | |
145 bad = False | |
146 if ((type(value) != list and type(value) != tuple) or len(value) < 3 or | |
147 len(value) > 4 or (type(value[1]) != list and type(value[1]) != tuple)): | |
148 bad = True | |
149 if len(value) == 4 and type(value[3]) != dict: | |
150 bad = True | |
151 if bad: | |
152 _LOGGER.error('Invalid dependency tuple: %s', value) | |
153 raise Exception() | |
154 | |
155 # Always use lowercase SHA1 hashes for consistency. | |
156 refspec = value[2] | |
157 if _GIT_SHA1_RE.match(refspec): | |
158 refspec = refspec.lower() | |
159 | |
160 repo_options = RepoOptions() | |
161 repo_options.output_dir = os.path.normpath(os.path.abspath(os.path.join( | |
162 root_output_dir, key))) | |
163 repo_options.repository = value[0] | |
164 repo_options.remote_dirs = value[1] | |
165 repo_options.revision = refspec | |
166 repo_options.deps_file = deps_file_path | |
167 | |
168 # Parse additional options. | |
169 if len(value) > 3: | |
170 repo_options.recurse = value[3].get('recurse', False) == True | |
171 | |
172 # Create a unique name for the checkout in the cache directory. Make the | |
173 # output directory relative to the cache directory so that they can be | |
174 # moved around together. | |
175 output_dir_rel = os.path.relpath(repo_options.output_dir, | |
176 root_output_dir).lower() | |
177 if output_dir_rel.startswith('..'): | |
178 raise Exception('Invalid output directory: %s' % key) | |
179 n = hashlib.md5(output_dir_rel).hexdigest() | |
180 repo_options.checkout_dir = os.path.abspath(os.path.join(cache_dir, n, 'src')) | |
181 | |
182 return repo_options | |
183 | |
184 | |
185 def _EnsureDirectoryExists(path, comment_name, dry_run): | |
186 """Ensures that the given |path| exists. Only actually creates the directory | |
187 if |dry_run| is False. |comment_name| is used during logging of this | |
188 operation. | |
189 """ | |
190 if not comment_name: | |
191 comment_name += ' ' | |
192 else: | |
193 comment_name = '' | |
194 if not os.path.exists(path): | |
195 _LOGGER.debug('Creating %sdirectory: %s', comment_name, path) | |
196 if not dry_run: | |
197 os.makedirs(path) | |
198 | |
199 | |
200 def _GetCasedFilename(filename): | |
201 """Returns the full case-sensitive filename for the given |filename|. If the | |
202 path does not exist, returns the original |filename| as is. | |
203 """ | |
204 pattern = '%s[%s]' % (filename[:-1], filename[-1]) | |
205 filenames = glob.glob(pattern) | |
206 if not filenames: | |
207 return filename | |
208 return filenames[0] | |
209 | |
210 | |
211 def _Shell(*cmd, **kw): | |
212 """Runs |cmd|, returns the results from Popen(cmd).communicate(). Additional | |
213 keyword arguments are passed on to subprocess.Popen. If |stdout| and |stderr| | |
214 are not specified, they default to subprocess.PIPE. If |dry_run| is not | |
215 specified it defaults to True. The command is only actually run if |dry_run| | |
216 is False. This can raise a RuntimeError on failure. | |
217 """ | |
218 if 'cwd' in kw: | |
219 _LOGGER.debug('Executing %s in "%s".', cmd, kw['cwd']) | |
220 else: | |
221 _LOGGER.debug('Executing %s.', cmd) | |
222 if kw.get('dry_run', True): | |
223 return ('', '') | |
224 kw.pop('dry_run', None) | |
225 dump_on_error = kw.pop('dump_on_error', False) | |
226 | |
227 kw['shell'] = True | |
228 kw.setdefault('stdout', subprocess.PIPE) | |
229 kw.setdefault('stderr', subprocess.PIPE) | |
230 prog = subprocess.Popen(cmd, **kw) | |
231 | |
232 stdout, stderr = prog.communicate() | |
233 if prog.returncode != 0: | |
234 if dump_on_error: | |
235 print stdout | |
236 print stderr | |
237 raise RuntimeError('Command "%s" returned %d.' % (cmd, prog.returncode)) | |
238 return (stdout, stderr) | |
239 | |
240 | |
241 def _IsGitCheckoutRoot(path): | |
242 """Return true if the given |path| is the root of a git checkout.""" | |
243 return os.path.exists(os.path.join(path, '.git')) | |
244 | |
245 | |
246 # Matches a GIT config file section header, and grabs the name of the section | |
247 # in the first group. Used by _GetGitOrigin. | |
248 _GIT_CONFIG_SECTION_RE = re.compile(r'^\s*\[(.*?)\]\s*$') | |
249 # Matches the URL line from a 'remote' section of a GIT config. Used by | |
250 # _GetGitOrigin. | |
251 _GIT_CONFIG_REMOTE_URL_RE = re.compile(r'^\s*url\s*=\s*(.*?)\s*$') | |
252 | |
253 | |
254 def _GetGitOrigin(path): | |
255 """Returns the URL of the 'origin' remote for the git repo in |path|. Returns | |
256 None if the 'origin' remote doesn't exist. Raises an IOError if |path| doesn't | |
257 exist or is not a git repo. | |
258 """ | |
259 section = None | |
260 for line in open(os.path.join(path, '.git', 'config'), 'rb'): | |
261 m = _GIT_CONFIG_SECTION_RE.match(line) | |
262 if m: | |
263 section = m.group(1) | |
264 continue | |
265 | |
266 # We only care about the 'origin' configuration. | |
267 if section != 'remote "origin"': | |
268 continue | |
269 | |
270 m = _GIT_CONFIG_REMOTE_URL_RE.match(line) | |
271 if m: | |
272 return m.group(1).strip() | |
273 | |
274 return None | |
275 | |
276 | |
277 def _GetGitHead(repo): | |
278 """Returns the hash of the head of the git repo local checkout. | |
279 | |
280 Raises: | |
281 IOError: if repo's checkout directory doesn't exist or not a git repository. | |
282 """ | |
283 with open(os.path.join(repo.checkout_dir, '.git', 'HEAD'), 'rb') as head: | |
284 return head.read().strip() | |
285 | |
286 | |
287 def _GetGitFetchHead(repo): | |
288 """Returns the hash of the latest fetched revision. | |
289 | |
290 Raises: | |
291 IOError: if repo's checkout directory doesn't exist or not a git repository. | |
292 KeyError: if the fetched head of the remote repository is not found in the | |
293 local checkout. | |
294 """ | |
295 path = os.path.join(repo.checkout_dir, '.git', 'FETCH_HEAD') | |
296 with open(path, 'rb') as heads_file: | |
297 for line in heads_file.readlines(): | |
298 if not line.strip(): | |
299 continue | |
300 head, repo_url = line.strip().split() | |
301 if repo_url == repo.repository: | |
302 return head | |
303 raise KeyError('Did not find fetched head for %s in %s' % | |
304 (repo.repository, path)) | |
305 | |
306 | |
307 def _NormalizeGitPath(path): | |
308 """Given a |path| in a GIT repository (relative to its root), normalizes it so | |
309 it will match only that exact path in a sparse checkout. | |
310 """ | |
311 path = path.strip() | |
312 if not path.startswith('/'): | |
313 path = '/' + path | |
314 if not path.endswith('/'): | |
315 path += '/' | |
316 return path | |
317 | |
318 | |
319 def _RenameCheckout(path, dry_run): | |
320 """Renames the checkout in |path| so that it can be subsequently deleted. | |
321 Only actually does the work if |dry_run| is False. Returns the path of the | |
322 renamed checkout directory. Raises an Exception on failure. | |
323 """ | |
324 | |
325 def _RenameCheckoutImpl(path, dry_run): | |
326 if dry_run: | |
327 return path + '-old-dryrun' | |
328 attempts = 0 | |
329 while attempts < 10: | |
330 newpath = '%s-old-%04d' % (path, random.randint(0, 999)) | |
331 try: | |
332 os.rename(path, newpath) | |
333 return newpath | |
334 except WindowsError: | |
335 attempts += 1 | |
336 raise Exception('Unable to rename checkout directory: %s' % path) | |
337 | |
338 newpath = _RenameCheckoutImpl(path, dry_run) | |
339 _LOGGER.debug('Renamed checkout directory: %s', newpath) | |
340 return newpath | |
341 | |
342 | |
343 def _DeleteCheckout(path, dry_run): | |
344 """Deletes the checkout in |path|. Only actually deletes the checkout if | |
345 |dry_run| is False. | |
346 """ | |
347 _LOGGER.info('Deleting checkout directory: %s', path) | |
348 if dry_run: | |
349 return | |
350 _Shell('rmdir', '/S', '/Q', path, dry_run=False) | |
351 | |
352 | |
353 def _GenerateSparseCheckoutPathAndContents(repo): | |
354 """Generates the path to the sparse checkout file, and the desired | |
355 contents. Returns a tuple of (path, contents). |repo| is a RepoOptions object. | |
356 """ | |
357 sparse_file = os.path.join(repo.checkout_dir, '.git', 'info', | |
358 'sparse-checkout') | |
359 if not repo.remote_dirs: | |
360 contents = '*\n' | |
361 else: | |
362 contents = ''.join(_NormalizeGitPath(dir) + '\n' | |
363 for dir in repo.remote_dirs) | |
364 return (sparse_file, contents) | |
365 | |
366 | |
367 def _HasValidSparseCheckoutConfig(repo): | |
368 """Determines if the GIT repo in |path| has a valid sparse-checkout | |
369 configuration as configured by the RepoOptions |repo|. Returns True or False. | |
370 """ | |
371 (sparse_file, contents) = _GenerateSparseCheckoutPathAndContents(repo) | |
372 try: | |
373 if open(sparse_file, 'rb').read() == contents: | |
374 return True | |
375 return False | |
376 except IOError: | |
377 return False | |
378 | |
379 | |
380 def _CreateCheckout(path, repo, dry_run): | |
381 """Creates a checkout in the provided |path|. The |path| must not already | |
382 exist. Uses the repository configuration from the provided |repo| RepoOptions | |
383 object. Only actually creates the checkout if |dry_run| is false. | |
384 """ | |
385 # We expect the directory not to exist, as this is a fresh checkout we are | |
386 # creating. | |
387 if not dry_run: | |
388 if os.path.exists(path): | |
389 raise Exception('Checkout directory already exists: %s' % path) | |
390 | |
391 _LOGGER.info('Creating checkout directory: %s', path) | |
392 if not dry_run: | |
393 os.makedirs(path) | |
394 | |
395 _LOGGER.debug('Initializing the checkout.') | |
396 _Shell('git', 'init', cwd=path, dry_run=dry_run) | |
397 _Shell('git', 'remote', 'add', 'origin', repo.repository, cwd=path, | |
398 dry_run=dry_run) | |
399 _Shell('git', 'config', 'core.sparsecheckout', 'true', cwd=path, | |
400 dry_run=dry_run) | |
401 if not dry_run: | |
402 _LOGGER.debug('Creating sparse checkout configuration file for ' | |
403 'directory: %s', repo.remote_dirs) | |
404 if not dry_run: | |
405 (path, contents) = _GenerateSparseCheckoutPathAndContents(repo) | |
406 with open(path, 'wb') as io: | |
407 io.write(contents) | |
408 | |
409 | |
410 def _UpdateCheckout(path, repo, dry_run): | |
411 """Updates a GIT checkout in |path| by pulling down a specific revision | |
412 from it, as configured by RepoOptions |repo|. Only actually runs if | |
413 |dry_run| is False. | |
414 """ | |
415 try: | |
416 # If the repo has a revision specified, try a checkout first. If this fails | |
417 # then we'll actually need to fetch. | |
418 if _GIT_SHA1_RE.match(repo.revision): | |
419 _LOGGER.info('Trying to checkout revision %s.', repo.revision) | |
420 _Shell('git', 'checkout', repo.revision, cwd=path, | |
421 dry_run=dry_run) | |
422 return | |
423 except RuntimeError: | |
424 pass | |
425 | |
426 # Fetch the revision and then check it out. Let output go to screen rather | |
427 # than be buffered. | |
428 _LOGGER.info('Fetching revision %s.', repo.revision) | |
429 _Shell('git', 'fetch', '--depth=1', 'origin', repo.revision, | |
430 cwd=path, dry_run=dry_run, stdout=None, stderr=None) | |
431 new_rev = _GetGitFetchHead(repo) if repo.revision == 'HEAD' else repo.revision | |
432 _LOGGER.info('Checking out revision %s.', new_rev) | |
433 _Shell('git', 'checkout', new_rev, cwd=path, | |
434 dry_run=dry_run, stdout=None, stderr=None) | |
435 | |
436 | |
437 # Used by _GetJunctionInfo to extract information about junctions. | |
438 _DIR_JUNCTION_RE = re.compile(r'^.*<JUNCTION>\s+(.+)\s+\[(.+)\]$') | |
439 | |
440 | |
441 # TODO(chrisha): This is ugly, and there has to be a better way! | |
442 def _GetJunctionInfo(junction): | |
443 """Returns the target of a junction, if it exists, None otherwise.""" | |
444 dirname = os.path.dirname(junction) | |
445 basename = os.path.basename(junction) | |
446 try: | |
447 stdout, dummy_stderr = _Shell('dir', '/AL', '/N', dirname, dry_run=False) | |
448 except RuntimeError: | |
449 return | |
450 | |
451 lines = stdout.splitlines(False) | |
452 for line in stdout.splitlines(False): | |
453 m = _DIR_JUNCTION_RE.match(line) | |
454 if not m: | |
455 continue | |
456 if m.group(1).lower() == basename.lower(): | |
457 return m.group(2) | |
458 | |
459 return None | |
460 | |
461 | |
462 def _EnsureJunction(cache_dir, target_dir, options, repo): | |
463 """Ensures that the appropriate junction exists from the configured output | |
464 directory to the specified sub-directory of the GIT checkout. | |
465 """ | |
466 # Ensure that the target directory was created. | |
467 target_cache_dir = _GetCasedFilename(os.path.normpath( | |
468 os.path.join(cache_dir, target_dir))) | |
469 if not options.dry_run and not os.path.isdir(target_cache_dir): | |
470 raise Exception('Checkout does not contain the desired remote folder.') | |
471 | |
472 # Ensure the parent directory exists before checking if the junction needs to | |
473 # be created. | |
474 output_dir = os.path.normpath(os.path.join(repo.output_dir, target_dir)) | |
475 _EnsureDirectoryExists( | |
476 os.path.dirname(output_dir), 'junction', options.dry_run) | |
477 | |
478 # Determine if the link needs to be created. | |
479 create_link = True | |
480 if os.path.exists(output_dir): | |
481 dest = _GetJunctionInfo(output_dir) | |
482 | |
483 # If the junction is valid nothing needs to be done. If it points to the | |
484 # wrong place or isn't a junction then delete it and let it be remade. | |
485 if dest == target_cache_dir: | |
486 _LOGGER.debug('Junction is up to date.') | |
487 create_link = False | |
488 else: | |
489 if dest: | |
490 _LOGGER.info('Erasing existing junction: %s', output_dir) | |
491 else: | |
492 _LOGGER.info('Deleting existing directory: %s', output_dir) | |
493 _Shell('rmdir', '/S', '/Q', output_dir, dry_run=options.dry_run) | |
494 | |
495 if create_link: | |
496 _LOGGER.info('Creating output junction: %s', output_dir) | |
497 _Shell('mklink', '/J', output_dir, target_cache_dir, | |
498 dry_run=options.dry_run) | |
499 | |
500 | |
501 def _InstallRepository(options, repo): | |
502 """Installs a repository as configured by the options. Assumes that the | |
503 specified cache directory already exists. | |
504 | |
505 Returns True if the checkout was modified, False otherwise. | |
506 """ | |
507 | |
508 _LOGGER.debug('Processing directories "%s" from repository "%s".', | |
509 repo.remote_dirs, repo.repository) | |
510 | |
511 # Ensure the output directory's *parent* exists. | |
512 output_dirname = os.path.dirname(repo.output_dir) | |
513 output_basename = os.path.basename(repo.output_dir) | |
514 _EnsureDirectoryExists(output_dirname, 'output', options.dry_run) | |
515 | |
516 # Get the properly cased names for the output directories. | |
517 output_dirname = _GetCasedFilename(output_dirname) | |
518 repo.output_dir = os.path.join(output_dirname, output_basename) | |
519 | |
520 # These are the 3 basic steps that need to occur. Depending on the state of | |
521 # the checkout we may not need to perform all of them. We assume initially | |
522 # that everything needs to be done, unless proven otherwise. | |
523 create_checkout = True | |
524 update_checkout = True | |
525 | |
526 # If the cache directory exists then lookup the repo and the revision and see | |
527 # what needs to be updated. | |
528 threads = [] | |
529 if os.path.exists(repo.checkout_dir): | |
530 keep_cache_dir = False | |
531 | |
532 # Only run these checks if we're not in 'force' mode. Otherwise, we | |
533 # deliberately turf the cache directory and start from scratch. | |
534 if not options.force and _IsGitCheckoutRoot(repo.checkout_dir): | |
535 # Get the repo origin. | |
536 repo_url = _GetGitOrigin(repo.checkout_dir) | |
537 if (repo_url == repo.repository and | |
538 _HasValidSparseCheckoutConfig(repo)): | |
539 _LOGGER.debug('Checkout is for correct repository and subdirectory.') | |
540 keep_cache_dir = True | |
541 create_checkout = False | |
542 | |
543 # Get the checked out revision. | |
544 revhash = _GetGitHead(repo) | |
545 if revhash == repo.revision: | |
546 _LOGGER.debug('Checkout is already up to date.') | |
547 update_checkout = False | |
548 | |
549 if not keep_cache_dir: | |
550 # The old checkout directory is renamed and erased in a separate thread | |
551 # so that the new checkout can start immediately. | |
552 _LOGGER.info('Erasing stale checkout directory: %s', repo.checkout_dir) | |
553 | |
554 # Any existing junctions to this repo must be removed otherwise the | |
555 # rename may fail. | |
556 for d in repo.remote_dirs: | |
557 j = os.path.abspath(os.path.join(repo.output_dir, d)) | |
558 _RemoveOrphanedJunction(options, j) | |
559 | |
560 newpath = _RenameCheckout(repo.checkout_dir, options.dry_run) | |
561 thread = threading.Thread(target=_DeleteCheckout, | |
562 args=(newpath, options.dry_run)) | |
563 threads.append(thread) | |
564 thread.start() | |
565 | |
566 # Create and update the checkout as necessary. | |
567 if create_checkout: | |
568 _CreateCheckout(repo.checkout_dir, repo, options.dry_run) | |
569 else: | |
570 _LOGGER.debug('Reusing checkout directory: %s', repo.checkout_dir) | |
571 if update_checkout: | |
572 _UpdateCheckout(repo.checkout_dir, repo, options.dry_run) | |
573 | |
574 # Ensure the junctions exists. | |
575 if repo.remote_dirs: | |
576 for remote_dir in repo.remote_dirs: | |
577 _EnsureJunction(repo.checkout_dir, remote_dir, options, repo) | |
578 else: | |
579 _EnsureJunction(repo.checkout_dir, '', options, repo) | |
580 | |
581 # Join any worker threads that are ongoing. | |
582 for thread in threads: | |
583 thread.join() | |
584 | |
585 # Return True if any modifications were made. | |
586 return create_checkout or update_checkout | |
587 | |
588 | |
589 def _WriteIfChanged(path, contents, dry_run): | |
590 if os.path.exists(path): | |
591 d = open(path, 'rb').read() | |
592 if d == contents: | |
593 _LOGGER.debug('Contents unchanged, not writing file: %s', path) | |
594 return | |
595 | |
596 _LOGGER.info('Writing file: %s', path) | |
597 if not dry_run: | |
598 open(path, 'wb').write(contents) | |
599 | |
600 | |
601 def _RecurseRepository(options, repo): | |
602 """Recursively follows dependencies in the given repository.""" | |
603 # Only run if there's an appropriate DEPS file. | |
604 deps = os.path.isfile(os.path.join(repo.checkout_dir, 'DEPS')) | |
605 gitdeps = os.path.isfile(os.path.join(repo.checkout_dir, '.DEPS.git')) | |
606 if not deps and not gitdeps: | |
607 _LOGGER.debug('No deps file found in repository: %s', repo.repository) | |
608 return | |
609 | |
610 # Generate the .gclient solution file. | |
611 cache_dir = os.path.dirname(os.path.abspath(repo.checkout_dir)) | |
612 gclient_file = os.path.join(cache_dir, '.gclient') | |
613 deps_file = 'DEPS' | |
614 if gitdeps: | |
615 deps_file = '.DEPS.git' | |
616 solutions = [ | |
617 { | |
618 'name': 'src', | |
619 'url': repo.repository, | |
620 'managed': False, | |
621 'custom_deps': [], | |
622 'deps_file': deps_file, | |
623 'safesync_url': '', | |
624 } | |
625 ] | |
626 solutions = 'solutions=%s' % solutions.__repr__() | |
627 _WriteIfChanged(gclient_file, solutions, options.dry_run) | |
628 | |
629 # Invoke 'gclient' on the sub-repository. | |
630 _Shell('gclient', 'sync', cwd=repo.checkout_dir, dry_run=options.dry_run) | |
631 | |
632 | |
633 def _FindGlobalVariableInAstTree(tree, name, functions=None): | |
634 """Finds and evaluates to global assignment of the variables |name| in the | |
635 AST |tree|. Will allow the evaluations of some functions as defined in | |
636 |functions|. | |
637 """ | |
638 if functions is None: | |
639 functions = {} | |
640 | |
641 class FunctionEvaluator(ast.NodeTransformer): | |
642 """A tree transformer that evaluates permitted functions.""" | |
643 | |
644 def visit_BinOp(self, binop_node): | |
645 """Is called for BinOp nodes. We only support string additions.""" | |
646 if type(binop_node.op) != ast.Add: | |
647 return binop_node | |
648 left = ast.literal_eval(self.visit(binop_node.left)) | |
649 right = ast.literal_eval(self.visit(binop_node.right)) | |
650 value = left + right | |
651 new_node = ast.Str(s=value) | |
652 new_node = ast.copy_location(new_node, binop_node) | |
653 return new_node | |
654 | |
655 def visit_Call(self, call_node): | |
656 """Evaluates function calls that return a single string as output.""" | |
657 func_name = call_node.func.id | |
658 if func_name not in functions: | |
659 return call_node | |
660 func = functions[func_name] | |
661 | |
662 # Evaluate the arguments. We don't care about starargs, keywords or | |
663 # kwargs. | |
664 args = [ast.literal_eval(self.visit(arg)) for arg in | |
665 call_node.args] | |
666 | |
667 # Now evaluate the function. | |
668 value = func(*args) | |
669 new_node = ast.Str(s=value) | |
670 new_node = ast.copy_location(new_node, call_node) | |
671 return new_node | |
672 | |
673 # Look for assignment nodes. | |
674 for node in tree.body: | |
675 if type(node) != ast.Assign: | |
676 continue | |
677 # Look for assignment in the 'store' context, to a variable with | |
678 # the given name. | |
679 for target in node.targets: | |
680 if type(target) != ast.Name: | |
681 continue | |
682 if type(target.ctx) != ast.Store: | |
683 continue | |
684 if target.id == name: | |
685 value = FunctionEvaluator().visit(node.value) | |
686 value = ast.fix_missing_locations(value) | |
687 value = ast.literal_eval(value) | |
688 return value | |
689 | |
690 | |
691 def _ParseDepsFile(path): | |
692 """Parsed a DEPS-like file at the given |path|.""" | |
693 # Utility function for performing variable expansions. | |
694 vars_dict = {} | |
695 def _Var(s): | |
696 return vars_dict[s] | |
697 | |
698 contents = open(path, 'rb').read() | |
699 tree = ast.parse(contents, path) | |
700 vars_dict = _FindGlobalVariableInAstTree(tree, 'vars') | |
701 deps_dict = _FindGlobalVariableInAstTree( | |
702 tree, 'deps', functions={'Var': _Var}) | |
703 return deps_dict | |
704 | |
705 | |
706 def _RemoveFile(options, path): | |
707 """Removes the provided file. If it doesn't exist, raises an Exception.""" | |
708 _LOGGER.debug('Removing file: %s', path) | |
709 if not os.path.isfile(path): | |
710 raise Exception('Path does not exist: %s' % path) | |
711 | |
712 if not options.dry_run: | |
713 os.remove(path) | |
714 | |
715 | |
716 def _RemoveOrphanedJunction(options, junction): | |
717 """Removes an orphaned junction at the path |junction|. If the path doesn't | |
718 exist or is not a junction, raises an Exception. | |
719 """ | |
720 _LOGGER.debug('Removing orphaned junction: %s', junction) | |
721 absdir = os.path.join(options.output_dir, junction) | |
722 if not os.path.exists(absdir): | |
723 _LOGGER.debug('Junction path does not exist, ignoring.') | |
724 return | |
725 if not _GetJunctionInfo(absdir): | |
726 _LOGGER.error('Path is not a junction: %s', absdir) | |
727 raise Exception() | |
728 _Shell('rmdir', '/S', '/Q', absdir, dry_run=options.dry_run) | |
729 | |
730 reldir = os.path.dirname(junction) | |
731 while reldir: | |
732 absdir = os.path.join(options.output_dir, reldir) | |
733 if os.listdir(absdir): | |
734 return | |
735 _LOGGER.debug('Removing empty parent directory of junction: %s', absdir) | |
736 _Shell('rmdir', '/S', '/Q', absdir, dry_run=options.dry_run) | |
737 reldir = os.path.dirname(reldir) | |
738 | |
739 | |
740 def _GetCacheDirEntryVersion(path): | |
741 """Returns the version of the cache directory entry, -1 if invalid.""" | |
742 | |
743 git = os.path.join(path, '.git') | |
744 src = os.path.join(path, 'src') | |
745 gclient = os.path.join(path, '.gclient') | |
746 | |
747 # Version 0 contains a '.git' directory and no '.gclient' entry. | |
748 if os.path.isdir(git): | |
749 if os.path.exists(gclient): | |
750 return -1 | |
751 return 0 | |
752 | |
753 # Version 1 contains a 'src' directory and no '.git' entry. | |
754 if os.path.isdir(src): | |
755 if os.path.exists(git): | |
756 return -1 | |
757 return 1 | |
758 | |
759 | |
760 def _GetCacheDirEntries(cache_dir): | |
761 """Returns the list of entries in the given |cache_dir|.""" | |
762 entries = [] | |
763 for path in os.listdir(cache_dir): | |
764 if not re.match('^[a-z0-9]{32}$', path): | |
765 continue | |
766 entries.append(path) | |
767 return entries | |
768 | |
769 | |
770 def _GetCacheDirVersion(cache_dir): | |
771 """Returns the version of the cache directory.""" | |
772 # If it doesn't exist then it's clearly the latest version. | |
773 if not os.path.exists(cache_dir): | |
774 return 1 | |
775 | |
776 cache_version = None | |
777 for path in _GetCacheDirEntries(cache_dir): | |
778 repo = os.path.join(cache_dir, path) | |
779 if not os.path.isdir(repo): | |
780 return -1 | |
781 | |
782 entry_version = _GetCacheDirEntryVersion(repo) | |
783 if entry_version == -1: | |
784 return -1 | |
785 | |
786 if cache_version == None: | |
787 cache_version = entry_version | |
788 else: | |
789 if cache_version != entry_version: | |
790 return -1 | |
791 | |
792 # If there are no entries in the cache it may as well be the latest version. | |
793 if cache_version is None: | |
794 return 1 | |
795 | |
796 return cache_version | |
797 | |
798 | |
799 def _GetJunctionStatePath(options): | |
800 """Returns the junction state file path.""" | |
801 return os.path.join(options.cache_dir, '.gitdeps_junctions') | |
802 | |
803 | |
804 def _ReadJunctions(options): | |
805 """Reads the list of junctions as a dictionary.""" | |
806 state_path = _GetJunctionStatePath(options) | |
807 old_junctions = {} | |
808 if os.path.exists(state_path): | |
809 _LOGGER.debug('Loading list of existing junctions.') | |
810 for j in open(state_path, 'rb'): | |
811 old_junctions[j.strip()] = True | |
812 | |
813 return old_junctions | |
814 | |
815 | |
816 def _Rename(src, dst, dry_run): | |
817 _LOGGER.debug('Renaming "%s" to "%s".', src, dst) | |
818 if not dry_run: | |
819 os.rename(src, dst) | |
820 | |
821 | |
822 def _UpgradeCacheDir(options): | |
823 """Upgrades the cache directory format to the most modern layout. | |
824 | |
825 Returns true on success, false otherwise. | |
826 """ | |
827 cache_version = _GetCacheDirVersion(options.cache_dir) | |
828 if cache_version == 1: | |
829 _LOGGER.debug('No cache directory upgrade required.') | |
830 return | |
831 | |
832 _LOGGER.debug('Upgrading cache directory from version 0 to 1.') | |
833 | |
834 _LOGGER.debug('Removing all junctions.') | |
835 junctions = _ReadJunctions(options).keys() | |
836 junctions = sorted(junctions, key=lambda j: len(j), reverse=True) | |
837 for junction in junctions: | |
838 _RemoveOrphanedJunction(options, junction) | |
839 _RemoveFile(options, _GetJunctionStatePath(options)) | |
840 | |
841 for entry in _GetCacheDirEntries(options.cache_dir): | |
842 _LOGGER.debug('Upgrading cache entry "%s".', entry) | |
843 tmp_entry = os.path.abspath(os.path.join( | |
844 options.cache_dir, | |
845 'TMP%d-%04d' % (os.getpid(), random.randint(0, 999)))) | |
846 abs_entry = os.path.abspath(os.path.join(options.cache_dir, entry)) | |
847 src = os.path.join(abs_entry, 'src') | |
848 _Rename(abs_entry, tmp_entry, options.dry_run) | |
849 _EnsureDirectoryExists(abs_entry, 'cache entry', options.dry_run) | |
850 _Rename(tmp_entry, src, options.dry_run) | |
851 | |
852 if options.dry_run: | |
853 _LOGGER.debug('Cache needs upgrading, unable to further simulate dry-run.') | |
854 raise Exception("") | |
855 | |
856 | |
857 def main(): | |
858 options, args = _ParseCommandLine() | |
859 | |
860 # Upgrade the cache directory if necessary. | |
861 _UpgradeCacheDir(options) | |
862 | |
863 # Ensure the cache directory exists and get the full properly cased path to | |
864 # it. | |
865 _EnsureDirectoryExists(options.cache_dir, 'cache', options.dry_run) | |
866 options.cache_dir = _GetCasedFilename(options.cache_dir) | |
867 | |
868 # Read junctions that have been written in previous runs. | |
869 state_path = _GetJunctionStatePath(options) | |
870 old_junctions = _ReadJunctions(options) | |
871 | |
872 # Parse each deps file in order, and extract the dependencies, looking for | |
873 # conflicts in the output directories. | |
874 output_dirs = {} | |
875 all_deps = [] | |
876 for deps_file in args: | |
877 deps = _ParseDepsFile(deps_file) | |
878 for key, value in deps.iteritems(): | |
879 repo_options = _ParseRepoOptions( | |
880 options.cache_dir, options.output_dir, deps_file, key, value) | |
881 if repo_options.output_dir in output_dirs: | |
882 other_repo_options = output_dirs[repo_options.output_dir] | |
883 _LOGGER.error('Conflicting output directory: %s', | |
884 repo_options.output_dir) | |
885 _LOGGER.error('First specified in file: %s', | |
886 other_repo_options.deps_file) | |
887 _LOGGER.error('And then specified in file: %s', repo_options.deps_file) | |
888 output_dirs[repo_options.output_dir] = repo_options | |
889 all_deps.append(repo_options) | |
890 output_dirs = {} | |
891 | |
892 # Handle each dependency, in order of shortest path names first. This ensures | |
893 # that nested dependencies are handled properly. | |
894 checkout_dirs = {} | |
895 deps = sorted(all_deps, key=lambda x: len(x.deps_file)) | |
896 junctions = [] | |
897 for repo in all_deps: | |
898 changes_made = _InstallRepository(options, repo) | |
899 checkout_dirs[repo.checkout_dir] = changes_made | |
900 | |
901 new_junction_dirs = repo.remote_dirs if repo.remote_dirs else [''] | |
902 for new_junction_dir in new_junction_dirs: | |
903 junction = os.path.relpath( | |
904 os.path.join(repo.output_dir, new_junction_dir), | |
905 options.output_dir) | |
906 old_junctions.pop(junction, None) | |
907 # Write each junction as we create it. This allows for recovery from | |
908 # partial runs. | |
909 if not options.dry_run: | |
910 open(state_path, 'ab').write(junction + '\n') | |
911 junctions.append(junction) | |
912 | |
913 # Clean up orphaned junctions if there are any. | |
914 if old_junctions: | |
915 _LOGGER.debug('Removing orphaned junctions.') | |
916 for j in old_junctions.iterkeys(): | |
917 _RemoveOrphanedJunction(options, j) | |
918 | |
919 # Output the final list of junctions. | |
920 _LOGGER.debug('Writing final list of junctions.') | |
921 if not options.dry_run: | |
922 with open(state_path, 'wb') as io: | |
923 for j in sorted(junctions): | |
924 io.write(j) | |
925 io.write('\n') | |
926 | |
927 # Iterate all directories in the cache directory. Any that we didn't | |
928 # specifically create or update should be cleaned up. Do this in parallel | |
929 # so things are cleaned up as soon as possible. | |
930 threads = [] | |
931 for path in glob.glob(os.path.join(options.cache_dir, '*')): | |
932 if os.path.join(path, 'src') not in checkout_dirs: | |
933 _LOGGER.debug('Erasing orphaned checkout directory: %s', path) | |
934 thread = threading.Thread(target=_DeleteCheckout, | |
935 args=(path, options.dry_run)) | |
936 threads.append(thread) | |
937 thread.start() | |
938 for thread in threads: | |
939 thread.join() | |
940 | |
941 # Recursively process other dependencies. | |
942 for repo in all_deps: | |
943 if not repo.recurse: | |
944 continue | |
945 if not checkout_dirs[repo.checkout_dir] and not options.force: | |
946 continue | |
947 _RecurseRepository(options, repo) | |
948 | |
949 return | |
950 | |
951 | |
952 if __name__ == '__main__': | |
953 main() | |
OLD | NEW |