Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(186)

Side by Side Diff: tools/telemetry/catapult_base/cloud_storage.py

Issue 1599413006: Remove catapult_base from telemetry. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@perf_cb_move
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Wrappers for gsutil, for basic interaction with Google Cloud Storage."""
6
7 import collections
8 import contextlib
9 import hashlib
10 import logging
11 import os
12 import shutil
13 import stat
14 import subprocess
15 import sys
16 import tempfile
17 import time
18
19 try:
20 import fcntl
21 except ImportError:
22 fcntl = None
23
24 from catapult_base import util
25
26
27 PUBLIC_BUCKET = 'chromium-telemetry'
28 PARTNER_BUCKET = 'chrome-partner-telemetry'
29 INTERNAL_BUCKET = 'chrome-telemetry'
30 TELEMETRY_OUTPUT = 'chrome-telemetry-output'
31
32 # Uses ordered dict to make sure that bucket's key-value items are ordered from
33 # the most open to the most restrictive.
34 BUCKET_ALIASES = collections.OrderedDict((
35 ('public', PUBLIC_BUCKET),
36 ('partner', PARTNER_BUCKET),
37 ('internal', INTERNAL_BUCKET),
38 ('output', TELEMETRY_OUTPUT),
39 ))
40
41 BUCKET_ALIAS_NAMES = BUCKET_ALIASES.keys()
42
43
44 _GSUTIL_PATH = os.path.join(util.GetCatapultDir(), 'third_party', 'gsutil',
45 'gsutil')
46
47 # TODO(tbarzic): A workaround for http://crbug.com/386416 and
48 # http://crbug.com/359293. See |_RunCommand|.
49 _CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/'
50
51
52
53 class CloudStorageError(Exception):
54 @staticmethod
55 def _GetConfigInstructions():
56 command = _GSUTIL_PATH
57 if util.IsRunningOnCrosDevice():
58 command = 'HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, _GSUTIL_PATH)
59 return ('To configure your credentials:\n'
60 ' 1. Run "%s config" and follow its instructions.\n'
61 ' 2. If you have a @google.com account, use that account.\n'
62 ' 3. For the project-id, just enter 0.' % command)
63
64
65 class PermissionError(CloudStorageError):
66 def __init__(self):
67 super(PermissionError, self).__init__(
68 'Attempted to access a file from Cloud Storage but you don\'t '
69 'have permission. ' + self._GetConfigInstructions())
70
71
72 class CredentialsError(CloudStorageError):
73 def __init__(self):
74 super(CredentialsError, self).__init__(
75 'Attempted to access a file from Cloud Storage but you have no '
76 'configured credentials. ' + self._GetConfigInstructions())
77
78
79 class NotFoundError(CloudStorageError):
80 pass
81
82
83 class ServerError(CloudStorageError):
84 pass
85
86
87 # TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()?
88 def _FindExecutableInPath(relative_executable_path, *extra_search_paths):
89 search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep)
90 for search_path in search_paths:
91 executable_path = os.path.join(search_path, relative_executable_path)
92 if util.IsExecutable(executable_path):
93 return executable_path
94 return None
95
96 def _EnsureExecutable(gsutil):
97 """chmod +x if gsutil is not executable."""
98 st = os.stat(gsutil)
99 if not st.st_mode & stat.S_IEXEC:
100 os.chmod(gsutil, st.st_mode | stat.S_IEXEC)
101
102 def _RunCommand(args):
103 # On cros device, as telemetry is running as root, home will be set to /root/,
104 # which is not writable. gsutil will attempt to create a download tracker dir
105 # in home dir and fail. To avoid this, override HOME dir to something writable
106 # when running on cros device.
107 #
108 # TODO(tbarzic): Figure out a better way to handle gsutil on cros.
109 # http://crbug.com/386416, http://crbug.com/359293.
110 gsutil_env = None
111 if util.IsRunningOnCrosDevice():
112 gsutil_env = os.environ.copy()
113 gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR
114
115 if os.name == 'nt':
116 # If Windows, prepend python. Python scripts aren't directly executable.
117 args = [sys.executable, _GSUTIL_PATH] + args
118 else:
119 # Don't do it on POSIX, in case someone is using a shell script to redirect.
120 args = [_GSUTIL_PATH] + args
121 _EnsureExecutable(_GSUTIL_PATH)
122
123 gsutil = subprocess.Popen(args, stdout=subprocess.PIPE,
124 stderr=subprocess.PIPE, env=gsutil_env)
125 stdout, stderr = gsutil.communicate()
126
127 if gsutil.returncode:
128 if stderr.startswith((
129 'You are attempting to access protected data with no configured',
130 'Failure: No handler was ready to authenticate.')):
131 raise CredentialsError()
132 if ('status=403' in stderr or 'status 403' in stderr or
133 '403 Forbidden' in stderr):
134 raise PermissionError()
135 if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or
136 'No URLs matched' in stderr or 'One or more URLs matched no' in stderr):
137 raise NotFoundError(stderr)
138 if '500 Internal Server Error' in stderr:
139 raise ServerError(stderr)
140 raise CloudStorageError(stderr)
141
142 return stdout
143
144
145 def List(bucket):
146 query = 'gs://%s/' % bucket
147 stdout = _RunCommand(['ls', query])
148 return [url[len(query):] for url in stdout.splitlines()]
149
150
151 def Exists(bucket, remote_path):
152 try:
153 _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)])
154 return True
155 except NotFoundError:
156 return False
157
158
159 def Move(bucket1, bucket2, remote_path):
160 url1 = 'gs://%s/%s' % (bucket1, remote_path)
161 url2 = 'gs://%s/%s' % (bucket2, remote_path)
162 logging.info('Moving %s to %s' % (url1, url2))
163 _RunCommand(['mv', url1, url2])
164
165
166 def Copy(bucket_from, bucket_to, remote_path_from, remote_path_to):
167 """Copy a file from one location in CloudStorage to another.
168
169 Args:
170 bucket_from: The cloud storage bucket where the file is currently located.
171 bucket_to: The cloud storage bucket it is being copied to.
172 remote_path_from: The file path where the file is located in bucket_from.
173 remote_path_to: The file path it is being copied to in bucket_to.
174
175 It should: cause no changes locally or to the starting file, and will
176 overwrite any existing files in the destination location.
177 """
178 url1 = 'gs://%s/%s' % (bucket_from, remote_path_from)
179 url2 = 'gs://%s/%s' % (bucket_to, remote_path_to)
180 logging.info('Copying %s to %s' % (url1, url2))
181 _RunCommand(['cp', url1, url2])
182
183
184 def Delete(bucket, remote_path):
185 url = 'gs://%s/%s' % (bucket, remote_path)
186 logging.info('Deleting %s' % url)
187 _RunCommand(['rm', url])
188
189
190 def Get(bucket, remote_path, local_path):
191 with _PseudoFileLock(local_path):
192 _GetLocked(bucket, remote_path, local_path)
193
194
195 @contextlib.contextmanager
196 def _PseudoFileLock(base_path):
197 pseudo_lock_path = '%s.pseudo_lock' % base_path
198 _CreateDirectoryIfNecessary(os.path.dirname(pseudo_lock_path))
199 # This is somewhat of a racy hack because we don't have a good
200 # cross-platform file lock. If we get one, this should be refactored
201 # to use it.
202 while os.path.exists(pseudo_lock_path):
203 time.sleep(0.1)
204 fd = os.open(pseudo_lock_path, os.O_RDONLY | os.O_CREAT)
205 if fcntl:
206 fcntl.flock(fd, fcntl.LOCK_EX)
207 try:
208 yield
209 finally:
210 if fcntl:
211 fcntl.flock(fd, fcntl.LOCK_UN)
212 try:
213 os.close(fd)
214 os.remove(pseudo_lock_path)
215 except OSError:
216 # We don't care if the pseudo-lock gets removed elsewhere before we have
217 # a chance to do so.
218 pass
219
220
221 def _CreateDirectoryIfNecessary(directory):
222 if not os.path.exists(directory):
223 os.makedirs(directory)
224
225
226 def _GetLocked(bucket, remote_path, local_path):
227 url = 'gs://%s/%s' % (bucket, remote_path)
228 logging.info('Downloading %s to %s' % (url, local_path))
229 _CreateDirectoryIfNecessary(os.path.dirname(local_path))
230 with tempfile.NamedTemporaryFile(
231 dir=os.path.dirname(local_path),
232 delete=False) as partial_download_path:
233 try:
234 # Windows won't download to an open file.
235 partial_download_path.close()
236 try:
237 _RunCommand(['cp', url, partial_download_path.name])
238 except ServerError:
239 logging.info('Cloud Storage server error, retrying download')
240 _RunCommand(['cp', url, partial_download_path.name])
241 shutil.move(partial_download_path.name, local_path)
242 finally:
243 if os.path.exists(partial_download_path.name):
244 os.remove(partial_download_path.name)
245
246
247 def Insert(bucket, remote_path, local_path, publicly_readable=False):
248 """ Upload file in |local_path| to cloud storage.
249 Args:
250 bucket: the google cloud storage bucket name.
251 remote_path: the remote file path in |bucket|.
252 local_path: path of the local file to be uploaded.
253 publicly_readable: whether the uploaded file has publicly readable
254 permission.
255
256 Returns:
257 The url where the file is uploaded to.
258 """
259 url = 'gs://%s/%s' % (bucket, remote_path)
260 command_and_args = ['cp']
261 extra_info = ''
262 if publicly_readable:
263 command_and_args += ['-a', 'public-read']
264 extra_info = ' (publicly readable)'
265 command_and_args += [local_path, url]
266 logging.info('Uploading %s to %s%s' % (local_path, url, extra_info))
267 _RunCommand(command_and_args)
268 return 'https://console.developers.google.com/m/cloudstorage/b/%s/o/%s' % (
269 bucket, remote_path)
270
271
272 def GetIfHashChanged(cs_path, download_path, bucket, file_hash):
273 """Downloads |download_path| to |file_path| if |file_path| doesn't exist or
274 it's hash doesn't match |file_hash|.
275
276 Returns:
277 True if the binary was changed.
278 Raises:
279 CredentialsError if the user has no configured credentials.
280 PermissionError if the user does not have permission to access the bucket.
281 NotFoundError if the file is not in the given bucket in cloud_storage.
282 """
283 with _PseudoFileLock(download_path):
284 if (os.path.exists(download_path) and
285 CalculateHash(download_path) == file_hash):
286 return False
287 _GetLocked(bucket, cs_path, download_path)
288 return True
289
290
291 def GetIfChanged(file_path, bucket):
292 """Gets the file at file_path if it has a hash file that doesn't match or
293 if there is no local copy of file_path, but there is a hash file for it.
294
295 Returns:
296 True if the binary was changed.
297 Raises:
298 CredentialsError if the user has no configured credentials.
299 PermissionError if the user does not have permission to access the bucket.
300 NotFoundError if the file is not in the given bucket in cloud_storage.
301 """
302 with _PseudoFileLock(file_path):
303 hash_path = file_path + '.sha1'
304 if not os.path.exists(hash_path):
305 logging.warning('Hash file not found: %s' % hash_path)
306 return False
307
308 expected_hash = ReadHash(hash_path)
309 if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash:
310 return False
311 _GetLocked(bucket, expected_hash, file_path)
312 return True
313
314
315 def GetFilesInDirectoryIfChanged(directory, bucket):
316 """ Scan the directory for .sha1 files, and download them from the given
317 bucket in cloud storage if the local and remote hash don't match or
318 there is no local copy.
319 """
320 if not os.path.isdir(directory):
321 raise ValueError('Must provide a valid directory.')
322 # Don't allow the root directory to be a serving_dir.
323 if directory == os.path.abspath(os.sep):
324 raise ValueError('Trying to serve root directory from HTTP server.')
325 for dirpath, _, filenames in os.walk(directory):
326 for filename in filenames:
327 path_name, extension = os.path.splitext(
328 os.path.join(dirpath, filename))
329 if extension != '.sha1':
330 continue
331 GetIfChanged(path_name, bucket)
332
333 def CalculateHash(file_path):
334 """Calculates and returns the hash of the file at file_path."""
335 sha1 = hashlib.sha1()
336 with open(file_path, 'rb') as f:
337 while True:
338 # Read in 1mb chunks, so it doesn't all have to be loaded into memory.
339 chunk = f.read(1024*1024)
340 if not chunk:
341 break
342 sha1.update(chunk)
343 return sha1.hexdigest()
344
345
346 def ReadHash(hash_path):
347 with open(hash_path, 'rb') as f:
348 return f.read(1024).rstrip()
OLDNEW
« no previous file with comments | « tools/telemetry/catapult_base/bin/run_tests ('k') | tools/telemetry/catapult_base/dependency_manager/OWNERS » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698