Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(317)

Side by Side Diff: git_cache.py

Issue 164823002: Create "git cache" command. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: Shallow fetches and other improvements Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """A git command for managing a local cache of git repositories."""
7
8 import errno
9 import logging
10 import optparse
11 import os
12 import tempfile
13 import subprocess
14 import sys
15 import urlparse
16
17 import gclient_utils
18 import subcommand
19
20
21 def NormalizeUrl(url):
22 """Converts a git url to a normalized form."""
23 parsed = urlparse.urlparse(url)
24 norm_url = 'https://' + parsed.netloc + parsed.path
25 if not norm_url.endswith('.git'):
26 norm_url += '.git'
27 return norm_url
28
29
30 def UrlToCacheDir(url):
31 """Converts a git url to a normalized form for the cache dir path."""
32 parsed = urlparse.urlparse(url)
33 norm_url = parsed.netloc + parsed.path
34 if norm_url.endswith('.git'):
35 norm_url = norm_url[:-len('.git')]
36 return norm_url.replace('-', '--').replace('/', '-')
iannucci 2014/02/21 11:32:07 do either of these need to do case normalization?
agable 2014/02/21 19:33:44 Url no, just in case someone is dumb; cache_dir, y
37
38
39 def RunGit(cmd, **kwargs):
40 """Runs git in a subprocess."""
41 kwargs.setdefault('cwd', os.getcwd())
42 if kwargs.get('filter_fn'):
43 kwargs['filter_fn'] = gclient_utils.GitFilter(kwargs.get('filter_fn'))
44 kwargs.setdefault('print_stdout', False)
45 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
46 env.setdefault('GIT_ASKPASS', 'true')
47 env.setdefault('SSH_ASKPASS', 'true')
iannucci 2014/02/21 11:32:07 hmm... copypasta magic? Should this go in gclient_
agable 2014/02/21 19:33:44 It's a tiny bit incompatible with the version of R
48 else:
49 kwargs.setdefault('print_stdout', True)
50 stdout = kwargs.get('stdout', sys.stdout)
51 print >> stdout, 'running "git %s" in "%s"' % (' '.join(cmd), kwargs['cwd'])
52 gclient_utils.CheckCallAndFilter(['git'] + cmd, **kwargs)
53
54
55 class LockError(Exception):
56 pass
57
58
59 class Lockfile(object):
60 """Class to represent a cross-platform process-specific lockfile."""
61 def __init__(self, path):
62 self.path = os.path.abspath(path)
63 self.lockfile = self.path + ".lock"
64 self.pid = os.getpid()
65
66 def _read_pid(self):
67 """Reads the pid stored in the lockfile.
68
69 Note: This method is potentially racy. By the time it returns the lockfile
70 may have been unlocked, removed, or stolen by some other process.
71 """
72 try:
73 with open(self.lockfile, 'r') as f:
74 pid = int(f.readline().strip())
75 except (IOError, ValueError):
76 pid = None
77 return pid
78
79 def _make_lockfile(self):
80 """Safely creates a lockfile containing the current pid."""
81 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
82 fd = os.open(self.lockfile, open_flags, 0o644)
83 f = os.fdopen(fd, 'w')
84 print >> f, self.pid
85 f.close()
86
87 def _remove_lockfile(self):
88 """Deletes the lockfile. Complains (implicitly) if it doesn't exist."""
89 os.remove(self.lockfile)
90
91 def lock(self):
92 """Acquire the lock.
93
94 Note: This is a NON-BLOCKING FAIL-FAST operation.
95 Do. Or do not. There is no try.
96 """
97 try:
98 self._make_lockfile()
99 except OSError as e:
100 if e.errno == errno.EEXIST:
101 raise LockError("%s is already locked" % self.path)
102 else:
103 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
104
105 def unlock(self):
106 """Release the lock."""
107 if not self.is_locked():
108 raise LockError("%s is not locked" % self.path)
109 if not self.i_am_locking():
110 raise LockError("%s is locked, but not by me" % self.path)
111 self._remove_lockfile()
112
113 def break_lock(self):
114 """Remove the lock, even if it was created by someone else."""
115 try:
116 self._remove_lockfile()
117 return True
118 except OSError as exc:
119 if exc.errno == errno.ENOENT:
120 return False
121 else:
122 raise
123
124 def is_locked(self):
125 """Test if the file is locked by anyone.
126
127 Note: This method is potentially racy. By the time it returns the lockfile
128 may have been unlocked, removed, or stolen by some other process.
129 """
130 return os.path.exists(self.lockfile)
131
132 def i_am_locking(self):
133 """Test if the file is locked by this process."""
134 return self.is_locked() and self.pid == self._read_pid()
135
136 def __enter__(self):
137 self.lock()
138 return self
139
140 def __exit__(self, *_exc):
141 self.unlock()
142
143
144 @subcommand.usage('[url of repo to check for caching]')
145 def CMDexists(parser, args):
146 """Checks to see if there already is a cache of the given repo."""
147 options, args = parser.parse_args(args)
148 if not len(args) == 1:
149 parser.error('git cache exists only takes exactly one repo url.')
150 url = args[0]
151 repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url))
152 flag_file = os.path.join(repo_dir, 'config')
153 if os.path.isdir(repo_dir) and os.path.isfile(flag_file):
154 print repo_dir
155 return 0
156 return 1
157
158
159 @subcommand.usage('[url of repo to add to or update in cache]')
160 def CMDpopulate(parser, args):
161 """Bare clones or updates a repository in the cache."""
162 parser.add_option('--depth',
163 help='only cache DEPTH commits of history')
164 parser.add_option('--shallow', '-s', action='store_true',
165 help='only cache 10000 commits of history')
iannucci 2014/02/21 11:32:07 Not sure I see the need for both --shallow and --d
agable 2014/02/21 19:33:44 I thought it would be nice to have a simple defaul
166 parser.add_option('--ref', action='append',
167 help='specify additional refs to be fetched')
168 options, args = parser.parse_args(args)
169 if options.shallow and not options.depth:
170 options.depth = 10000
171 try:
172 options.depth = int(options.depth)
173 except ValueError:
174 parser.error('--depth only takes integer arguments')
iannucci 2014/02/21 11:32:07 http://docs.python.org/2/library/optparse.html#opt
agable 2014/02/21 19:33:44 Damn, I knew that existed, but couldn't find it. T
175 if not len(args) == 1:
176 parser.error('git cache populate only takes exactly one repo url.')
177 url = args[0]
178
179 gclient_utils.safe_makedirs(options.cache_dir)
180 repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url))
181
182 v = []
183 filter_fn = lambda l: '[up to date]' not in l
iannucci 2014/02/21 11:32:07 does filter_fn return the line to print, or does i
agable 2014/02/21 19:33:44 This miniature filter function (which just does tr
184 if options.verbose:
185 v = ['-v', '--progress']
186 filter_fn = None
187
188 d = []
189 if options.depth:
190 d = ['--depth', '%d' % options.depth]
191
192 def _config(directory):
193 RunGit(['config', 'core.deltaBaseCacheLimit', '2g'],
194 cwd=directory)
195 RunGit(['config', 'remote.origin.url', NormalizeUrl(url)],
196 cwd=directory)
197 RunGit(['config', '--replace-all', 'remote.origin.fetch',
198 '+refs/heads/*:refs/heads/*'],
199 cwd=directory)
200 for ref in options.ref or []:
201 ref = ref.rstrip('/')
202 refspec = '+refs/%s/*:refs/%s/*' % (ref, ref)
203 RunGit(['config', '--add', 'remote.origin.fetch', refspec],
204 cwd=directory)
205
206 with Lockfile(repo_dir):
207 # Setup from scratch if the repo is new or is in a bad state.
208 if not os.path.exists(os.path.join(repo_dir, 'config')):
209 gclient_utils.rmtree(repo_dir)
210 tempdir = tempfile.mkdtemp(suffix=UrlToCacheDir(url),
211 dir=options.cache_dir)
212 RunGit(['init', '--bare'], cwd=tempdir)
213 _config(tempdir)
214 fetch_cmd = ['fetch'] + v + d + ['--update-shallow', '--tags', 'origin']
iannucci 2014/02/21 11:32:07 update-shallow ? Where are the docs on that for gi
agable 2014/02/21 19:33:44 Damn, looks like I was reading 1.9 docs. And also
215 RunGit(fetch_cmd, filter_fn=filter_fn, cwd=tempdir, retry=True)
216 os.rename(tempdir, repo_dir)
217 else:
218 _config(repo_dir)
219 if options.depth:
220 logging.warn('Shallow fetch requested, but repo cache already exists.')
iannucci 2014/02/21 11:32:07 We should only print this if the existing repo isn
agable 2014/02/21 19:33:44 Done.
221 fetch_cmd = ['fetch'] + v + ['--update-shallow', '--tags', 'origin']
222 RunGit(fetch_cmd, filter_fn=filter_fn, cwd=repo_dir, retry=True)
223
224
225 @subcommand.usage('[url of repo to unlock, or -a|--all]')
226 def CMDunlock(parser, args):
227 """Unlocks one or all repos if their lock files are still around."""
228 parser.add_option('--force', '-f', action='store_true',
229 help='actually perform the action')
230 parser.add_option('--all', '-a', action='store_true',
231 help='unlock all repository caches')
232 options, args = parser.parse_args(args)
233 if len(args) > 1 or (len(args) == 0 and not options.all):
234 parser.error('git cache unlock takes exactly one repo url, or --all')
235
236 if not options.all:
237 url = args[0]
238 repo_dirs = [os.path.join(options.cache_dir, UrlToCacheDir(url))]
239 else:
240 repo_dirs = [path for path in os.listdir(options.cache_dir)
241 if os.path.isdir(path)]
242 lockfiles = [repo_dir + '.lock' for repo_dir in repo_dirs
243 if os.path.exists(repo_dir + '.lock')]
244
245 if not options.force:
246 logging.warn('Not performing any actions. '
247 'Pass -f|--force to remove the following lockfiles: '
248 '%s' % lockfiles)
249 return
250
251 unlocked = untouched = []
252 for repo_dir in repo_dirs:
253 lf = Lockfile(repo_dir)
254 if lf.break_lock():
iannucci 2014/02/21 11:32:07 should print error if the lock exists and is owned
agable 2014/02/21 19:33:44 Define 'wrong pid'. Every time 'git cache X' runs,
iannucci 2014/02/24 19:43:03 I guess I meant, can we print a warning/error if t
agable 2014/02/24 22:11:41 Ah. No. Not on windows, especially.
255 unlocked.append(repo_dir)
256 else:
257 untouched.append(repo_dir)
258
259 if unlocked:
260 logging.info('Broke locks on these caches: %s' % unlocked)
261 if untouched:
262 logging.debug('Did not touch these caches: %s' % untouched)
263
264
265 class OptionParser(optparse.OptionParser):
266 """Wrapper class for OptionParser to handle global options."""
267 def __init__(self, *args, **kwargs):
268 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
269 self.add_option('-c', '--cache-dir',
270 help='Path to the directory containing the cache.')
271 self.add_option('-v', '--verbose', action='count', default=0,
272 help='Increase verbosity (can be passed multiple times).')
273
274 def parse_args(self, args=None, values=None):
275 options, args = optparse.OptionParser.parse_args(self, args, values)
276
277 try:
278 global_cache_dir = subprocess.check_output(
279 ['git', 'config', '--global', 'cache.cachepath']).strip()
280 if options.cache_dir:
281 logging.warn('Overriding globally-configured cache directory.')
282 else:
283 options.cache_dir = global_cache_dir
284 except subprocess.CalledProcessError:
285 if not options.cache_dir:
286 self.error('No cache directory specified on command line '
287 'or in cache.cachepath.')
288 options.cache_dir = os.path.abspath(options.cache_dir)
289
290 levels = [logging.WARNING, logging.INFO, logging.DEBUG]
291 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
292
293 return options, args
294
295
296 def main(argv):
297 dispatcher = subcommand.CommandDispatcher(__name__)
298 return dispatcher.execute(OptionParser(), argv)
299
300
301 if __name__ == '__main__':
302 sys.exit(main(sys.argv[1:]))
OLDNEW
« git-cache ('K') | « git-cache ('k') | tests/gclient_utils_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698