Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(76)

Side by Side Diff: recipe_engine/fetch.py

Issue 2362993002: More strategic retries in fetch. (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | recipe_engine/unittests/fetch_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The LUCI Authors. All rights reserved. 1 # Copyright 2016 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file. 3 # that can be found in the LICENSE file.
4 4
5 import base64 5 import base64
6 import functools 6 import functools
7 import httplib
7 import json 8 import json
8 import logging 9 import logging
9 import os 10 import os
10 import random
11 import shutil 11 import shutil
12 import sys 12 import sys
13 import tarfile 13 import tarfile
14 import tempfile 14 import tempfile
15 import time 15 import time
16 16
17 # Add third party paths. 17 # Add third party paths.
18 from . import env 18 from . import env
19 from . import requests_ssl 19 from . import requests_ssl
20 from . import util
20 from .requests_ssl import requests 21 from .requests_ssl import requests
21 22
22 import subprocess42 23 import subprocess42
23 from google.protobuf import text_format 24 from google.protobuf import text_format
24 25
25 from . import package_pb2 26 from . import package_pb2
26 27
27 28
28 class FetchError(Exception): 29 class FetchError(Exception):
29 pass 30 pass
30 31
31 32
32 class UncleanFilesystemError(FetchError):
33 pass
34
35
36 class FetchNotAllowedError(FetchError): 33 class FetchNotAllowedError(FetchError):
37 pass 34 pass
38 35
39 36
40 def _run_git(checkout_dir, *args): 37 def _run_git(checkout_dir, *args):
41 if sys.platform.startswith(('win', 'cygwin')): 38 if sys.platform.startswith(('win', 'cygwin')):
42 cmd = ['git.bat'] 39 cmd = ['git.bat']
43 else: 40 else:
44 cmd = ['git'] 41 cmd = ['git']
45 42
46 if checkout_dir is not None: 43 if checkout_dir is not None:
47 cmd += ['-C', checkout_dir] 44 cmd += ['-C', checkout_dir]
48 cmd += list(args) 45 cmd += list(args)
49 46
50 logging.info('Running: %s', cmd) 47 logging.info('Running: %s', cmd)
51 return subprocess42.check_output(cmd) 48 return subprocess42.check_output(cmd)
52 49
53 50
54 def _retry(f):
55 @functools.wraps(f)
56 def wrapper(*args, **kwargs):
57 delay = random.uniform(2, 5)
58 for _ in range(5):
59 try:
60 return f(*args, **kwargs)
61 except (requests.exceptions.RequestException,
62 subprocess42.CalledProcessError):
63 # Only retry specific errors that may be transient.
64 logging.exception('retrying')
65 time.sleep(delay)
66 delay *= 2
67 return f(*args, **kwargs)
68 return wrapper
69
70
71 class Backend(object): 51 class Backend(object):
72 @property 52 @property
73 def repo_type(self): 53 def repo_type(self):
74 """Returns repo type (see package_pb2.DepSpec).""" 54 """Returns repo type (see package_pb2.DepSpec)."""
75 raise NotImplementedError() 55 raise NotImplementedError()
76 56
77 def branch_spec(self, branch): 57 @staticmethod
dnj 2016/09/23 00:42:46 This was a pylint failure b/c it's static in child
58 def branch_spec(branch):
78 """Returns branch spec for given branch suitable for given git backend.""" 59 """Returns branch spec for given branch suitable for given git backend."""
79 raise NotImplementedError() 60 raise NotImplementedError()
80 61
81 def checkout(self, repo, revision, checkout_dir, allow_fetch): 62 def checkout(self, repo, revision, checkout_dir, allow_fetch):
82 """Checks out given |repo| at |revision| to |checkout_dir|. 63 """Checks out given |repo| at |revision| to |checkout_dir|.
83 64
84 Network operations are performed only if |allow_fetch| is True. 65 Network operations are performed only if |allow_fetch| is True.
85 """ 66 """
86 raise NotImplementedError() 67 raise NotImplementedError()
87 68
88 def updates(self, repo, revision, checkout_dir, allow_fetch, 69 def updates(self, repo, revision, checkout_dir, allow_fetch,
89 other_revision, paths): 70 other_revision, paths):
90 """Returns a list of revisions between |revision| and |other_revision|. 71 """Returns a list of revisions between |revision| and |other_revision|.
91 72
92 Network operations are performed only if |allow_fetch| is True. 73 Network operations are performed only if |allow_fetch| is True.
93 74
94 If |paths| is a non-empty list, the history is scoped just to these paths. 75 If |paths| is a non-empty list, the history is scoped just to these paths.
95 """ 76 """
96 raise NotImplementedError() 77 raise NotImplementedError()
97 78
98 def commit_metadata(self, repo, revision, checkout_dir, allow_fetch): 79 def commit_metadata(self, repo, revision, checkout_dir, allow_fetch):
99 """Returns a dictionary of metadata about commit |revision|. 80 """Returns a dictionary of metadata about commit |revision|.
100 81
101 The dictionary contains the following keys: author, message. 82 The dictionary contains the following keys: author, message.
102 """ 83 """
103 raise NotImplementedError() 84 raise NotImplementedError()
104 85
105 86
87 class UncleanFilesystemError(FetchError):
dnj 2016/09/23 00:42:46 (Moved closer to GitBackend)
88 pass
89
90
91 class GitFetchError(FetchError):
92 pass
93
94
106 class GitBackend(Backend): 95 class GitBackend(Backend):
107 """GitBackend uses a local git checkout.""" 96 """GitBackend uses a local git checkout."""
108 97
109 @property 98 @property
110 def repo_type(self): 99 def repo_type(self):
111 return package_pb2.DepSpec.GIT 100 return package_pb2.DepSpec.GIT
112 101
113 @staticmethod 102 @staticmethod
114 def branch_spec(branch): 103 def branch_spec(branch):
115 return 'origin/%s' % branch 104 return 'origin/%s' % branch
116 105
117 @_retry 106 @util.exponential_retry(condition=lambda e: isinstance(e, GitFetchError))
118 def checkout(self, repo, revision, checkout_dir, allow_fetch): 107 def checkout(self, repo, revision, checkout_dir, allow_fetch):
119 logging.info('Freshening repository %s in %s', repo, checkout_dir) 108 logging.info('Freshening repository %s in %s', repo, checkout_dir)
120 109
121 if not os.path.isdir(checkout_dir): 110 if not os.path.isdir(checkout_dir):
122 if not allow_fetch: 111 if not allow_fetch:
123 raise FetchNotAllowedError( 112 raise FetchNotAllowedError(
124 'need to clone %s but fetch not allowed' % repo) 113 'need to clone %s but fetch not allowed' % repo)
125 _run_git(None, 'clone', '-q', repo, checkout_dir) 114 _run_git(None, 'clone', '-q', repo, checkout_dir)
126 elif not os.path.isdir(os.path.join(checkout_dir, '.git')): 115 elif not os.path.isdir(os.path.join(checkout_dir, '.git')):
127 raise UncleanFilesystemError( 116 raise UncleanFilesystemError(
128 '%s exists but is not a git repo' % checkout_dir) 117 '%s exists but is not a git repo' % checkout_dir)
129 118
130 _run_git(checkout_dir, 'config', 'remote.origin.url', repo) 119 _run_git(checkout_dir, 'config', 'remote.origin.url', repo)
131 try: 120 try:
132 _run_git(checkout_dir, 'rev-parse', '-q', '--verify', 121 _run_git(checkout_dir, 'rev-parse', '-q', '--verify',
133 '%s^{commit}' % revision) 122 '%s^{commit}' % revision)
134 except subprocess42.CalledProcessError: 123 except subprocess42.CalledProcessError:
135 if not allow_fetch: 124 if not allow_fetch:
136 raise FetchNotAllowedError( 125 raise FetchNotAllowedError(
137 'need to fetch %s but fetch not allowed' % repo) 126 'need to fetch %s but fetch not allowed' % repo)
138 _run_git(checkout_dir, 'fetch') 127
128 # Fetch from the remote Git repository. Wrap this in a GitFetchError
129 # for exponential retry on failure.
130 try:
131 _run_git(checkout_dir, 'fetch')
132 except subprocess42.CalledProcessError as e:
133 raise GitFetchError(e.message)
134
139 _run_git(checkout_dir, 'reset', '-q', '--hard', revision) 135 _run_git(checkout_dir, 'reset', '-q', '--hard', revision)
140 136
141 def updates(self, repo, revision, checkout_dir, allow_fetch, 137 def updates(self, repo, revision, checkout_dir, allow_fetch,
142 other_revision, paths): 138 other_revision, paths):
143 self.checkout(repo, revision, checkout_dir, allow_fetch) 139 self.checkout(repo, revision, checkout_dir, allow_fetch)
144 if allow_fetch: 140 if allow_fetch:
145 _run_git(checkout_dir, 'fetch') 141 _run_git(checkout_dir, 'fetch')
146 args = [ 142 args = [
147 'rev-list', 143 'rev-list',
148 '--reverse', 144 '--reverse',
149 '%s..%s' % (revision, other_revision), 145 '%s..%s' % (revision, other_revision),
150 ] 146 ]
151 if paths: 147 if paths:
152 args.extend(['--'] + paths) 148 args.extend(['--'] + paths)
153 return filter(bool, _run_git(checkout_dir, *args).strip().split('\n')) 149 return filter(bool, _run_git(checkout_dir, *args).strip().split('\n'))
154 150
155 def commit_metadata(self, repo, revision, checkout_dir, allow_fetch): 151 def commit_metadata(self, repo, revision, checkout_dir, allow_fetch):
156 return { 152 return {
157 'author': _run_git(checkout_dir, 'show', '-s', '--pretty=%aE', 153 'author': _run_git(checkout_dir, 'show', '-s', '--pretty=%aE',
158 revision).strip(), 154 revision).strip(),
159 'message': _run_git(checkout_dir, 'show', '-s', '--pretty=%B', 155 'message': _run_git(checkout_dir, 'show', '-s', '--pretty=%B',
160 revision).strip(), 156 revision).strip(),
161 } 157 }
162 158
163 159
160 class GitilesFetchError(FetchError):
161 """An HTTP error that occurred during Gitiles fetching."""
162
163 def __init__(self, status, message):
164 super(GitilesFetchError, self).__init__(
165 'Gitiles error code (%d): %s' % (status, message))
166 self.status = status
167 self.message = message
168
169 @staticmethod
170 def transient(e):
171 """Returns (bool): True "e" is a GitilesFetchError with transient HTTP code.
martiniss 2016/09/23 00:49:50 nit: docs formatting
dnj 2016/09/23 01:40:55 Done.
172 """
173 return (isinstance(e, GitilesFetchError) and
174 e.status >= httplib.INTERNAL_SERVER_ERROR)
175
176
164 class GitilesBackend(Backend): 177 class GitilesBackend(Backend):
165 """GitilesBackend uses a repo served by Gitiles.""" 178 """GitilesBackend uses a repo served by Gitiles."""
166 179
180 # Header at the beginning of Gerrit/Gitiles JSON API responses.
181 _GERRIT_XSRF_HEADER = ')]}\'\n'
182
167 @property 183 @property
168 def repo_type(self): 184 def repo_type(self):
169 return package_pb2.DepSpec.GITILES 185 return package_pb2.DepSpec.GITILES
170 186
171 @staticmethod 187 @staticmethod
172 def branch_spec(branch): 188 def branch_spec(branch):
173 return branch 189 return branch
174 190
175 @_retry
176 def checkout(self, repo, revision, checkout_dir, allow_fetch): 191 def checkout(self, repo, revision, checkout_dir, allow_fetch):
177 requests_ssl.check_requests_ssl() 192 requests_ssl.check_requests_ssl()
178 logging.info('Freshening repository %s in %s', repo, checkout_dir) 193 logging.info('Freshening repository %s in %s', repo, checkout_dir)
179 194
180 # TODO(phajdan.jr): implement caching. 195 # TODO(phajdan.jr): implement caching.
181 if not allow_fetch: 196 if not allow_fetch:
182 raise FetchNotAllowedError( 197 raise FetchNotAllowedError(
183 'need to download %s from gitiles but fetch not allowed' % repo) 198 'need to download %s from gitiles but fetch not allowed' % repo)
184 199
185 revision = self._resolve_revision(repo, revision) 200 revision = self._resolve_revision(repo, revision)
186 201
187 shutil.rmtree(checkout_dir, ignore_errors=True) 202 shutil.rmtree(checkout_dir, ignore_errors=True)
188 203
189 recipes_cfg_url = '%s/+/%s/infra/config/recipes.cfg?format=TEXT' % ( 204 recipes_cfg_url = '%s/+/%s/infra/config/recipes.cfg?format=TEXT' % (
190 repo, requests.utils.quote(revision)) 205 repo, requests.utils.quote(revision))
191 logging.info('fetching %s' % recipes_cfg_url) 206 recipes_cfg_text = base64.b64decode(
martiniss 2016/09/23 00:49:50 Can you add something like what I did: try:
dnj 2016/09/23 01:40:55 Does Gitiles actually return 200 w/ a UnicodeError
192 recipes_cfg_request = requests.get(recipes_cfg_url) 207 self._fetch_gitiles(recipes_cfg_url).text)
193 recipes_cfg_text = base64.b64decode(recipes_cfg_request.text)
194 recipes_cfg_proto = package_pb2.Package() 208 recipes_cfg_proto = package_pb2.Package()
195 text_format.Merge(recipes_cfg_text, recipes_cfg_proto) 209 text_format.Merge(recipes_cfg_text, recipes_cfg_proto)
196 recipes_path_rel = recipes_cfg_proto.recipes_path 210 recipes_path_rel = recipes_cfg_proto.recipes_path
197 211
198 # Re-create recipes.cfg in |checkout_dir| so that the repo's recipes.py 212 # Re-create recipes.cfg in |checkout_dir| so that the repo's recipes.py
199 # can look it up. 213 # can look it up.
200 recipes_cfg_path = os.path.join( 214 recipes_cfg_path = os.path.join(
201 checkout_dir, 'infra', 'config', 'recipes.cfg') 215 checkout_dir, 'infra', 'config', 'recipes.cfg')
202 os.makedirs(os.path.dirname(recipes_cfg_path)) 216 os.makedirs(os.path.dirname(recipes_cfg_path))
203 with open(recipes_cfg_path, 'w') as f: 217 with open(recipes_cfg_path, 'w') as f:
204 f.write(recipes_cfg_text) 218 f.write(recipes_cfg_text)
205 219
206 recipes_path = os.path.join(checkout_dir, recipes_path_rel) 220 recipes_path = os.path.join(checkout_dir, recipes_path_rel)
207 if not os.path.exists(recipes_path): 221 if not os.path.exists(recipes_path):
208 os.makedirs(recipes_path) 222 os.makedirs(recipes_path)
209 223
210 archive_url = '%s/+archive/%s/%s.tar.gz' % ( 224 archive_url = '%s/+archive/%s/%s.tar.gz' % (
211 repo, requests.utils.quote(revision), recipes_path_rel) 225 repo, requests.utils.quote(revision), recipes_path_rel)
212 logging.info('fetching %s' % archive_url) 226 archive_response = self._fetch_gitiles(archive_url)
213 archive_request = requests.get(archive_url)
214 with tempfile.NamedTemporaryFile(delete=False) as f: 227 with tempfile.NamedTemporaryFile(delete=False) as f:
215 f.write(archive_request.content) 228 f.write(archive_response.content)
216 f.close() 229 f.close()
217 230
218 try: 231 try:
219 with tarfile.open(f.name) as archive_tarfile: 232 with tarfile.open(f.name) as archive_tarfile:
220 archive_tarfile.extractall(recipes_path) 233 archive_tarfile.extractall(recipes_path)
221 finally: 234 finally:
222 os.unlink(f.name) 235 os.unlink(f.name)
223 236
224 def updates(self, repo, revision, checkout_dir, allow_fetch, 237 def updates(self, repo, revision, checkout_dir, allow_fetch,
225 other_revision, paths): 238 other_revision, paths):
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
267 '%s/+/%s?format=JSON' % (repo, requests.utils.quote(revision))) 280 '%s/+/%s?format=JSON' % (repo, requests.utils.quote(revision)))
268 281
269 def _resolve_revision(self, repo, revision): 282 def _resolve_revision(self, repo, revision):
270 """Returns a git sha corresponding to given revision. 283 """Returns a git sha corresponding to given revision.
271 284
272 Examples of non-sha revision: origin/master, HEAD.""" 285 Examples of non-sha revision: origin/master, HEAD."""
273 rev_json = self._revision_metadata(repo, revision) 286 rev_json = self._revision_metadata(repo, revision)
274 logging.info('resolved %s to %s', revision, rev_json['commit']) 287 logging.info('resolved %s to %s', revision, rev_json['commit'])
275 return rev_json['commit'] 288 return rev_json['commit']
276 289
277 def _fetch_gitiles_json(self, url): 290 @staticmethod
291 @util.exponential_retry(condition=GitilesFetchError.transient)
292 def _fetch_gitiles(url):
293 """Fetches a remote URL and returns the response object on success."""
294 logging.info('fetching %s' % url)
295 resp = requests.get(url)
296 if resp.status_code != httplib.OK:
297 raise GitilesFetchError(resp.status_code, resp.text)
298 return resp
299
300 @classmethod
301 @util.exponential_retry(condition=GitilesFetchError.transient)
302 def _fetch_gitiles_json(cls, url):
278 """Fetches JSON from Gitiles and returns parsed result.""" 303 """Fetches JSON from Gitiles and returns parsed result."""
279 logging.info('fetching %s', url) 304 logging.info('fetching %s', url)
280 raw = requests.get(url).text 305
281 if not raw.startswith(')]}\'\n'): 306 resp = requests.get(url)
282 raise FetchError('Unexpected gitiles response: %s' % raw) 307 if resp.status_code != httplib.OK:
283 return json.loads(raw.split('\n', 1)[1]) 308 raise GitilesFetchError(resp.status_code, resp.text)
309
310 if not resp.text.startswith(cls._GERRIT_XSRF_HEADER):
311 raise GitilesFetchError(resp.status_code, 'Missing XSRF header')
312
313 return json.loads(resp.text[len(cls._GERRIT_XSRF_HEADER):])
OLDNEW
« no previous file with comments | « no previous file | recipe_engine/unittests/fetch_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698