Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(140)

Side by Side Diff: recipe_engine/fetch.py

Issue 2362993002: More strategic retries in fetch. (Closed)
Patch Set: More strategic retries in fetch. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | recipe_engine/unittests/fetch_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The LUCI Authors. All rights reserved. 1 # Copyright 2016 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file. 3 # that can be found in the LICENSE file.
4 4
5 import base64 5 import base64
6 import functools 6 import functools
7 import httplib
7 import json 8 import json
8 import logging 9 import logging
9 import os 10 import os
10 import random
11 import shutil 11 import shutil
12 import sys 12 import sys
13 import tarfile 13 import tarfile
14 import tempfile 14 import tempfile
15 import time 15 import time
16 16
17 # Add third party paths. 17 # Add third party paths.
18 from . import env 18 from . import env
19 from . import requests_ssl 19 from . import requests_ssl
20 from . import util
20 from .requests_ssl import requests 21 from .requests_ssl import requests
21 22
22 import subprocess42 23 import subprocess42
23 from google.protobuf import text_format 24 from google.protobuf import text_format
24 25
25 from . import package_pb2 26 from . import package_pb2
26 27
27 28
28 class FetchError(Exception): 29 class FetchError(Exception):
29 pass 30 pass
30 31
31 32
32 class UncleanFilesystemError(FetchError):
33 pass
34
35
36 class FetchNotAllowedError(FetchError): 33 class FetchNotAllowedError(FetchError):
37 pass 34 pass
38 35
39 36
40 def _run_git(checkout_dir, *args): 37 def _run_git(checkout_dir, *args):
41 if sys.platform.startswith(('win', 'cygwin')): 38 if sys.platform.startswith(('win', 'cygwin')):
42 cmd = ['git.bat'] 39 cmd = ['git.bat']
43 else: 40 else:
44 cmd = ['git'] 41 cmd = ['git']
45 42
46 if checkout_dir is not None: 43 if checkout_dir is not None:
47 cmd += ['-C', checkout_dir] 44 cmd += ['-C', checkout_dir]
48 cmd += list(args) 45 cmd += list(args)
49 46
50 logging.info('Running: %s', cmd) 47 logging.info('Running: %s', cmd)
51 return subprocess42.check_output(cmd) 48 return subprocess42.check_output(cmd)
52 49
53 50
54 def _retry(f):
55 @functools.wraps(f)
56 def wrapper(*args, **kwargs):
57 delay = random.uniform(2, 5)
58 for _ in range(5):
59 try:
60 return f(*args, **kwargs)
61 except (requests.exceptions.RequestException,
62 subprocess42.CalledProcessError):
63 # Only retry specific errors that may be transient.
64 logging.exception('retrying')
65 time.sleep(delay)
66 delay *= 2
67 return f(*args, **kwargs)
68 return wrapper
69
70
71 class Backend(object): 51 class Backend(object):
72 @property 52 @property
73 def repo_type(self): 53 def repo_type(self):
74 """Returns repo type (see package_pb2.DepSpec).""" 54 """Returns repo type (see package_pb2.DepSpec)."""
75 raise NotImplementedError() 55 raise NotImplementedError()
76 56
77 def branch_spec(self, branch): 57 @staticmethod
58 def branch_spec(branch):
78 """Returns branch spec for given branch suitable for given git backend.""" 59 """Returns branch spec for given branch suitable for given git backend."""
79 raise NotImplementedError() 60 raise NotImplementedError()
80 61
81 def checkout(self, repo, revision, checkout_dir, allow_fetch): 62 def checkout(self, repo, revision, checkout_dir, allow_fetch):
82 """Checks out given |repo| at |revision| to |checkout_dir|. 63 """Checks out given |repo| at |revision| to |checkout_dir|.
83 64
84 Network operations are performed only if |allow_fetch| is True. 65 Network operations are performed only if |allow_fetch| is True.
85 """ 66 """
86 raise NotImplementedError() 67 raise NotImplementedError()
87 68
88 def updates(self, repo, revision, checkout_dir, allow_fetch, 69 def updates(self, repo, revision, checkout_dir, allow_fetch,
89 other_revision, paths): 70 other_revision, paths):
90 """Returns a list of revisions between |revision| and |other_revision|. 71 """Returns a list of revisions between |revision| and |other_revision|.
91 72
92 Network operations are performed only if |allow_fetch| is True. 73 Network operations are performed only if |allow_fetch| is True.
93 74
94 If |paths| is a non-empty list, the history is scoped just to these paths. 75 If |paths| is a non-empty list, the history is scoped just to these paths.
95 """ 76 """
96 raise NotImplementedError() 77 raise NotImplementedError()
97 78
98 def commit_metadata(self, repo, revision, checkout_dir, allow_fetch): 79 def commit_metadata(self, repo, revision, checkout_dir, allow_fetch):
99 """Returns a dictionary of metadata about commit |revision|. 80 """Returns a dictionary of metadata about commit |revision|.
100 81
101 The dictionary contains the following keys: author, message. 82 The dictionary contains the following keys: author, message.
102 """ 83 """
103 raise NotImplementedError() 84 raise NotImplementedError()
104 85
105 86
87 class UncleanFilesystemError(FetchError):
88 pass
89
90
91 class GitFetchError(FetchError):
92 pass
93
94
106 class GitBackend(Backend): 95 class GitBackend(Backend):
107 """GitBackend uses a local git checkout.""" 96 """GitBackend uses a local git checkout."""
108 97
109 @property 98 @property
110 def repo_type(self): 99 def repo_type(self):
111 return package_pb2.DepSpec.GIT 100 return package_pb2.DepSpec.GIT
112 101
113 @staticmethod 102 @staticmethod
114 def branch_spec(branch): 103 def branch_spec(branch):
115 return 'origin/%s' % branch 104 return 'origin/%s' % branch
116 105
117 @_retry 106 @util.exponential_retry(condition=lambda e: isinstance(e, GitFetchError))
118 def checkout(self, repo, revision, checkout_dir, allow_fetch): 107 def checkout(self, repo, revision, checkout_dir, allow_fetch):
119 logging.info('Freshening repository %s in %s', repo, checkout_dir) 108 logging.info('Freshening repository %s in %s', repo, checkout_dir)
120 109
121 if not os.path.isdir(checkout_dir): 110 if not os.path.isdir(checkout_dir):
122 if not allow_fetch: 111 if not allow_fetch:
123 raise FetchNotAllowedError( 112 raise FetchNotAllowedError(
124 'need to clone %s but fetch not allowed' % repo) 113 'need to clone %s but fetch not allowed' % repo)
125 _run_git(None, 'clone', '-q', repo, checkout_dir) 114 _run_git(None, 'clone', '-q', repo, checkout_dir)
126 elif not os.path.isdir(os.path.join(checkout_dir, '.git')): 115 elif not os.path.isdir(os.path.join(checkout_dir, '.git')):
127 raise UncleanFilesystemError( 116 raise UncleanFilesystemError(
128 '%s exists but is not a git repo' % checkout_dir) 117 '%s exists but is not a git repo' % checkout_dir)
129 118
130 _run_git(checkout_dir, 'config', 'remote.origin.url', repo) 119 _run_git(checkout_dir, 'config', 'remote.origin.url', repo)
131 try: 120 try:
132 _run_git(checkout_dir, 'rev-parse', '-q', '--verify', 121 _run_git(checkout_dir, 'rev-parse', '-q', '--verify',
133 '%s^{commit}' % revision) 122 '%s^{commit}' % revision)
134 except subprocess42.CalledProcessError: 123 except subprocess42.CalledProcessError:
135 if not allow_fetch: 124 if not allow_fetch:
136 raise FetchNotAllowedError( 125 raise FetchNotAllowedError(
137 'need to fetch %s but fetch not allowed' % repo) 126 'need to fetch %s but fetch not allowed' % repo)
138 _run_git(checkout_dir, 'fetch') 127
128 # Fetch from the remote Git repository. Wrap this in a GitFetchError
129 # for exponential retry on failure.
130 try:
131 _run_git(checkout_dir, 'fetch')
132 except subprocess42.CalledProcessError as e:
133 raise GitFetchError(e.message)
134
139 _run_git(checkout_dir, 'reset', '-q', '--hard', revision) 135 _run_git(checkout_dir, 'reset', '-q', '--hard', revision)
140 136
141 def updates(self, repo, revision, checkout_dir, allow_fetch, 137 def updates(self, repo, revision, checkout_dir, allow_fetch,
142 other_revision, paths): 138 other_revision, paths):
143 self.checkout(repo, revision, checkout_dir, allow_fetch) 139 self.checkout(repo, revision, checkout_dir, allow_fetch)
144 if allow_fetch: 140 if allow_fetch:
145 _run_git(checkout_dir, 'fetch') 141 _run_git(checkout_dir, 'fetch')
146 args = [ 142 args = [
147 'rev-list', 143 'rev-list',
148 '--reverse', 144 '--reverse',
149 '%s..%s' % (revision, other_revision), 145 '%s..%s' % (revision, other_revision),
150 ] 146 ]
151 if paths: 147 if paths:
152 args.extend(['--'] + paths) 148 args.extend(['--'] + paths)
153 return filter(bool, _run_git(checkout_dir, *args).strip().split('\n')) 149 return filter(bool, _run_git(checkout_dir, *args).strip().split('\n'))
154 150
155 def commit_metadata(self, repo, revision, checkout_dir, allow_fetch): 151 def commit_metadata(self, repo, revision, checkout_dir, allow_fetch):
156 return { 152 return {
157 'author': _run_git(checkout_dir, 'show', '-s', '--pretty=%aE', 153 'author': _run_git(checkout_dir, 'show', '-s', '--pretty=%aE',
158 revision).strip(), 154 revision).strip(),
159 'message': _run_git(checkout_dir, 'show', '-s', '--pretty=%B', 155 'message': _run_git(checkout_dir, 'show', '-s', '--pretty=%B',
160 revision).strip(), 156 revision).strip(),
161 } 157 }
162 158
163 159
160 class GitilesFetchError(FetchError):
161 """An HTTP error that occurred during Gitiles fetching."""
162
163 def __init__(self, status, message):
164 super(GitilesFetchError, self).__init__(
165 'Gitiles error code (%d): %s' % (status, message))
166 self.status = status
167 self.message = message
168
169 @staticmethod
170 def transient(e):
171 """
172 Returns (bool): True if "e" is a GitilesFetchError with transient HTTP code.
173 """
174 return (isinstance(e, GitilesFetchError) and
175 e.status >= httplib.INTERNAL_SERVER_ERROR)
176
177
164 class GitilesBackend(Backend): 178 class GitilesBackend(Backend):
165 """GitilesBackend uses a repo served by Gitiles.""" 179 """GitilesBackend uses a repo served by Gitiles."""
166 180
181 # Header at the beginning of Gerrit/Gitiles JSON API responses.
182 _GERRIT_XSRF_HEADER = ')]}\'\n'
183
167 @property 184 @property
168 def repo_type(self): 185 def repo_type(self):
169 return package_pb2.DepSpec.GITILES 186 return package_pb2.DepSpec.GITILES
170 187
171 @staticmethod 188 @staticmethod
172 def branch_spec(branch): 189 def branch_spec(branch):
173 return branch 190 return branch
174 191
175 @_retry
176 def checkout(self, repo, revision, checkout_dir, allow_fetch): 192 def checkout(self, repo, revision, checkout_dir, allow_fetch):
177 requests_ssl.check_requests_ssl() 193 requests_ssl.check_requests_ssl()
178 logging.info('Freshening repository %s in %s', repo, checkout_dir) 194 logging.info('Freshening repository %s in %s', repo, checkout_dir)
179 195
180 # TODO(phajdan.jr): implement caching. 196 # TODO(phajdan.jr): implement caching.
181 if not allow_fetch: 197 if not allow_fetch:
182 raise FetchNotAllowedError( 198 raise FetchNotAllowedError(
183 'need to download %s from gitiles but fetch not allowed' % repo) 199 'need to download %s from gitiles but fetch not allowed' % repo)
184 200
185 revision = self._resolve_revision(repo, revision) 201 revision = self._resolve_revision(repo, revision)
186 202
187 shutil.rmtree(checkout_dir, ignore_errors=True) 203 shutil.rmtree(checkout_dir, ignore_errors=True)
188 204
189 recipes_cfg_url = '%s/+/%s/infra/config/recipes.cfg?format=TEXT' % ( 205 recipes_cfg_url = '%s/+/%s/infra/config/recipes.cfg?format=TEXT' % (
190 repo, requests.utils.quote(revision)) 206 repo, requests.utils.quote(revision))
191 logging.info('fetching %s' % recipes_cfg_url) 207 recipes_cfg_text = base64.b64decode(
192 recipes_cfg_request = requests.get(recipes_cfg_url) 208 self._fetch_gitiles(recipes_cfg_url).text)
193 recipes_cfg_text = base64.b64decode(recipes_cfg_request.text)
194 recipes_cfg_proto = package_pb2.Package() 209 recipes_cfg_proto = package_pb2.Package()
195 text_format.Merge(recipes_cfg_text, recipes_cfg_proto) 210 text_format.Merge(recipes_cfg_text, recipes_cfg_proto)
196 recipes_path_rel = recipes_cfg_proto.recipes_path 211 recipes_path_rel = recipes_cfg_proto.recipes_path
197 212
198 # Re-create recipes.cfg in |checkout_dir| so that the repo's recipes.py 213 # Re-create recipes.cfg in |checkout_dir| so that the repo's recipes.py
199 # can look it up. 214 # can look it up.
200 recipes_cfg_path = os.path.join( 215 recipes_cfg_path = os.path.join(
201 checkout_dir, 'infra', 'config', 'recipes.cfg') 216 checkout_dir, 'infra', 'config', 'recipes.cfg')
202 os.makedirs(os.path.dirname(recipes_cfg_path)) 217 os.makedirs(os.path.dirname(recipes_cfg_path))
203 with open(recipes_cfg_path, 'w') as f: 218 with open(recipes_cfg_path, 'w') as f:
204 f.write(recipes_cfg_text) 219 f.write(recipes_cfg_text)
205 220
206 recipes_path = os.path.join(checkout_dir, recipes_path_rel) 221 recipes_path = os.path.join(checkout_dir, recipes_path_rel)
207 if not os.path.exists(recipes_path): 222 if not os.path.exists(recipes_path):
208 os.makedirs(recipes_path) 223 os.makedirs(recipes_path)
209 224
210 archive_url = '%s/+archive/%s/%s.tar.gz' % ( 225 archive_url = '%s/+archive/%s/%s.tar.gz' % (
211 repo, requests.utils.quote(revision), recipes_path_rel) 226 repo, requests.utils.quote(revision), recipes_path_rel)
212 logging.info('fetching %s' % archive_url) 227 archive_response = self._fetch_gitiles(archive_url)
213 archive_request = requests.get(archive_url)
214 with tempfile.NamedTemporaryFile(delete=False) as f: 228 with tempfile.NamedTemporaryFile(delete=False) as f:
215 f.write(archive_request.content) 229 f.write(archive_response.content)
216 f.close() 230 f.close()
217 231
218 try: 232 try:
219 with tarfile.open(f.name) as archive_tarfile: 233 with tarfile.open(f.name) as archive_tarfile:
220 archive_tarfile.extractall(recipes_path) 234 archive_tarfile.extractall(recipes_path)
221 finally: 235 finally:
222 os.unlink(f.name) 236 os.unlink(f.name)
223 237
224 def updates(self, repo, revision, checkout_dir, allow_fetch, 238 def updates(self, repo, revision, checkout_dir, allow_fetch,
225 other_revision, paths): 239 other_revision, paths):
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
267 '%s/+/%s?format=JSON' % (repo, requests.utils.quote(revision))) 281 '%s/+/%s?format=JSON' % (repo, requests.utils.quote(revision)))
268 282
269 def _resolve_revision(self, repo, revision): 283 def _resolve_revision(self, repo, revision):
270 """Returns a git sha corresponding to given revision. 284 """Returns a git sha corresponding to given revision.
271 285
272 Examples of non-sha revision: origin/master, HEAD.""" 286 Examples of non-sha revision: origin/master, HEAD."""
273 rev_json = self._revision_metadata(repo, revision) 287 rev_json = self._revision_metadata(repo, revision)
274 logging.info('resolved %s to %s', revision, rev_json['commit']) 288 logging.info('resolved %s to %s', revision, rev_json['commit'])
275 return rev_json['commit'] 289 return rev_json['commit']
276 290
277 def _fetch_gitiles_json(self, url): 291 @staticmethod
292 @util.exponential_retry(condition=GitilesFetchError.transient)
293 def _fetch_gitiles(url):
294 """Fetches a remote URL and returns the response object on success."""
295 logging.info('fetching %s' % url)
296 resp = requests.get(url)
297 if resp.status_code != httplib.OK:
298 raise GitilesFetchError(resp.status_code, resp.text)
299 return resp
300
301 @classmethod
302 @util.exponential_retry(condition=GitilesFetchError.transient)
303 def _fetch_gitiles_json(cls, url):
278 """Fetches JSON from Gitiles and returns parsed result.""" 304 """Fetches JSON from Gitiles and returns parsed result."""
279 logging.info('fetching %s', url) 305 logging.info('fetching %s', url)
280 raw = requests.get(url).text 306
281 if not raw.startswith(')]}\'\n'): 307 resp = requests.get(url)
282 raise FetchError('Unexpected gitiles response: %s' % raw) 308 if resp.status_code != httplib.OK:
283 return json.loads(raw.split('\n', 1)[1]) 309 raise GitilesFetchError(resp.status_code, resp.text)
310
311 if not resp.text.startswith(cls._GERRIT_XSRF_HEADER):
312 raise GitilesFetchError(resp.status_code, 'Missing XSRF header')
313
314 return json.loads(resp.text[len(cls._GERRIT_XSRF_HEADER):])
OLDNEW
« no previous file with comments | « no previous file | recipe_engine/unittests/fetch_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698