OLD | NEW |
| (Empty) |
1 # Copyright 2014 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 """Wrappers for gsutil, for basic interaction with Google Cloud Storage.""" | |
6 | |
7 import collections | |
8 import contextlib | |
9 import hashlib | |
10 import logging | |
11 import os | |
12 import shutil | |
13 import stat | |
14 import subprocess | |
15 import sys | |
16 import tempfile | |
17 import time | |
18 | |
19 try: | |
20 import fcntl | |
21 except ImportError: | |
22 fcntl = None | |
23 | |
24 from catapult_base import util | |
25 | |
26 | |
27 PUBLIC_BUCKET = 'chromium-telemetry' | |
28 PARTNER_BUCKET = 'chrome-partner-telemetry' | |
29 INTERNAL_BUCKET = 'chrome-telemetry' | |
30 TELEMETRY_OUTPUT = 'chrome-telemetry-output' | |
31 | |
32 # Uses ordered dict to make sure that bucket's key-value items are ordered from | |
33 # the most open to the most restrictive. | |
34 BUCKET_ALIASES = collections.OrderedDict(( | |
35 ('public', PUBLIC_BUCKET), | |
36 ('partner', PARTNER_BUCKET), | |
37 ('internal', INTERNAL_BUCKET), | |
38 ('output', TELEMETRY_OUTPUT), | |
39 )) | |
40 | |
41 BUCKET_ALIAS_NAMES = BUCKET_ALIASES.keys() | |
42 | |
43 | |
44 _GSUTIL_PATH = os.path.join(util.GetCatapultDir(), 'third_party', 'gsutil', | |
45 'gsutil') | |
46 | |
47 # TODO(tbarzic): A workaround for http://crbug.com/386416 and | |
48 # http://crbug.com/359293. See |_RunCommand|. | |
49 _CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/' | |
50 | |
51 | |
52 | |
53 class CloudStorageError(Exception): | |
54 @staticmethod | |
55 def _GetConfigInstructions(): | |
56 command = _GSUTIL_PATH | |
57 if util.IsRunningOnCrosDevice(): | |
58 command = 'HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, _GSUTIL_PATH) | |
59 return ('To configure your credentials:\n' | |
60 ' 1. Run "%s config" and follow its instructions.\n' | |
61 ' 2. If you have a @google.com account, use that account.\n' | |
62 ' 3. For the project-id, just enter 0.' % command) | |
63 | |
64 | |
65 class PermissionError(CloudStorageError): | |
66 def __init__(self): | |
67 super(PermissionError, self).__init__( | |
68 'Attempted to access a file from Cloud Storage but you don\'t ' | |
69 'have permission. ' + self._GetConfigInstructions()) | |
70 | |
71 | |
72 class CredentialsError(CloudStorageError): | |
73 def __init__(self): | |
74 super(CredentialsError, self).__init__( | |
75 'Attempted to access a file from Cloud Storage but you have no ' | |
76 'configured credentials. ' + self._GetConfigInstructions()) | |
77 | |
78 | |
79 class NotFoundError(CloudStorageError): | |
80 pass | |
81 | |
82 | |
83 class ServerError(CloudStorageError): | |
84 pass | |
85 | |
86 | |
87 # TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()? | |
88 def _FindExecutableInPath(relative_executable_path, *extra_search_paths): | |
89 search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep) | |
90 for search_path in search_paths: | |
91 executable_path = os.path.join(search_path, relative_executable_path) | |
92 if util.IsExecutable(executable_path): | |
93 return executable_path | |
94 return None | |
95 | |
96 def _EnsureExecutable(gsutil): | |
97 """chmod +x if gsutil is not executable.""" | |
98 st = os.stat(gsutil) | |
99 if not st.st_mode & stat.S_IEXEC: | |
100 os.chmod(gsutil, st.st_mode | stat.S_IEXEC) | |
101 | |
102 def _RunCommand(args): | |
103 # On cros device, as telemetry is running as root, home will be set to /root/, | |
104 # which is not writable. gsutil will attempt to create a download tracker dir | |
105 # in home dir and fail. To avoid this, override HOME dir to something writable | |
106 # when running on cros device. | |
107 # | |
108 # TODO(tbarzic): Figure out a better way to handle gsutil on cros. | |
109 # http://crbug.com/386416, http://crbug.com/359293. | |
110 gsutil_env = None | |
111 if util.IsRunningOnCrosDevice(): | |
112 gsutil_env = os.environ.copy() | |
113 gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR | |
114 | |
115 if os.name == 'nt': | |
116 # If Windows, prepend python. Python scripts aren't directly executable. | |
117 args = [sys.executable, _GSUTIL_PATH] + args | |
118 else: | |
119 # Don't do it on POSIX, in case someone is using a shell script to redirect. | |
120 args = [_GSUTIL_PATH] + args | |
121 _EnsureExecutable(_GSUTIL_PATH) | |
122 | |
123 gsutil = subprocess.Popen(args, stdout=subprocess.PIPE, | |
124 stderr=subprocess.PIPE, env=gsutil_env) | |
125 stdout, stderr = gsutil.communicate() | |
126 | |
127 if gsutil.returncode: | |
128 if stderr.startswith(( | |
129 'You are attempting to access protected data with no configured', | |
130 'Failure: No handler was ready to authenticate.')): | |
131 raise CredentialsError() | |
132 if ('status=403' in stderr or 'status 403' in stderr or | |
133 '403 Forbidden' in stderr): | |
134 raise PermissionError() | |
135 if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or | |
136 'No URLs matched' in stderr or 'One or more URLs matched no' in stderr): | |
137 raise NotFoundError(stderr) | |
138 if '500 Internal Server Error' in stderr: | |
139 raise ServerError(stderr) | |
140 raise CloudStorageError(stderr) | |
141 | |
142 return stdout | |
143 | |
144 | |
145 def List(bucket): | |
146 query = 'gs://%s/' % bucket | |
147 stdout = _RunCommand(['ls', query]) | |
148 return [url[len(query):] for url in stdout.splitlines()] | |
149 | |
150 | |
151 def Exists(bucket, remote_path): | |
152 try: | |
153 _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)]) | |
154 return True | |
155 except NotFoundError: | |
156 return False | |
157 | |
158 | |
159 def Move(bucket1, bucket2, remote_path): | |
160 url1 = 'gs://%s/%s' % (bucket1, remote_path) | |
161 url2 = 'gs://%s/%s' % (bucket2, remote_path) | |
162 logging.info('Moving %s to %s' % (url1, url2)) | |
163 _RunCommand(['mv', url1, url2]) | |
164 | |
165 | |
166 def Copy(bucket_from, bucket_to, remote_path_from, remote_path_to): | |
167 """Copy a file from one location in CloudStorage to another. | |
168 | |
169 Args: | |
170 bucket_from: The cloud storage bucket where the file is currently located. | |
171 bucket_to: The cloud storage bucket it is being copied to. | |
172 remote_path_from: The file path where the file is located in bucket_from. | |
173 remote_path_to: The file path it is being copied to in bucket_to. | |
174 | |
175 It should: cause no changes locally or to the starting file, and will | |
176 overwrite any existing files in the destination location. | |
177 """ | |
178 url1 = 'gs://%s/%s' % (bucket_from, remote_path_from) | |
179 url2 = 'gs://%s/%s' % (bucket_to, remote_path_to) | |
180 logging.info('Copying %s to %s' % (url1, url2)) | |
181 _RunCommand(['cp', url1, url2]) | |
182 | |
183 | |
184 def Delete(bucket, remote_path): | |
185 url = 'gs://%s/%s' % (bucket, remote_path) | |
186 logging.info('Deleting %s' % url) | |
187 _RunCommand(['rm', url]) | |
188 | |
189 | |
190 def Get(bucket, remote_path, local_path): | |
191 with _PseudoFileLock(local_path): | |
192 _GetLocked(bucket, remote_path, local_path) | |
193 | |
194 | |
195 @contextlib.contextmanager | |
196 def _PseudoFileLock(base_path): | |
197 pseudo_lock_path = '%s.pseudo_lock' % base_path | |
198 _CreateDirectoryIfNecessary(os.path.dirname(pseudo_lock_path)) | |
199 # This is somewhat of a racy hack because we don't have a good | |
200 # cross-platform file lock. If we get one, this should be refactored | |
201 # to use it. | |
202 while os.path.exists(pseudo_lock_path): | |
203 time.sleep(0.1) | |
204 fd = os.open(pseudo_lock_path, os.O_RDONLY | os.O_CREAT) | |
205 if fcntl: | |
206 fcntl.flock(fd, fcntl.LOCK_EX) | |
207 try: | |
208 yield | |
209 finally: | |
210 if fcntl: | |
211 fcntl.flock(fd, fcntl.LOCK_UN) | |
212 try: | |
213 os.close(fd) | |
214 os.remove(pseudo_lock_path) | |
215 except OSError: | |
216 # We don't care if the pseudo-lock gets removed elsewhere before we have | |
217 # a chance to do so. | |
218 pass | |
219 | |
220 | |
221 def _CreateDirectoryIfNecessary(directory): | |
222 if not os.path.exists(directory): | |
223 os.makedirs(directory) | |
224 | |
225 | |
226 def _GetLocked(bucket, remote_path, local_path): | |
227 url = 'gs://%s/%s' % (bucket, remote_path) | |
228 logging.info('Downloading %s to %s' % (url, local_path)) | |
229 _CreateDirectoryIfNecessary(os.path.dirname(local_path)) | |
230 with tempfile.NamedTemporaryFile( | |
231 dir=os.path.dirname(local_path), | |
232 delete=False) as partial_download_path: | |
233 try: | |
234 # Windows won't download to an open file. | |
235 partial_download_path.close() | |
236 try: | |
237 _RunCommand(['cp', url, partial_download_path.name]) | |
238 except ServerError: | |
239 logging.info('Cloud Storage server error, retrying download') | |
240 _RunCommand(['cp', url, partial_download_path.name]) | |
241 shutil.move(partial_download_path.name, local_path) | |
242 finally: | |
243 if os.path.exists(partial_download_path.name): | |
244 os.remove(partial_download_path.name) | |
245 | |
246 | |
247 def Insert(bucket, remote_path, local_path, publicly_readable=False): | |
248 """ Upload file in |local_path| to cloud storage. | |
249 Args: | |
250 bucket: the google cloud storage bucket name. | |
251 remote_path: the remote file path in |bucket|. | |
252 local_path: path of the local file to be uploaded. | |
253 publicly_readable: whether the uploaded file has publicly readable | |
254 permission. | |
255 | |
256 Returns: | |
257 The url where the file is uploaded to. | |
258 """ | |
259 url = 'gs://%s/%s' % (bucket, remote_path) | |
260 command_and_args = ['cp'] | |
261 extra_info = '' | |
262 if publicly_readable: | |
263 command_and_args += ['-a', 'public-read'] | |
264 extra_info = ' (publicly readable)' | |
265 command_and_args += [local_path, url] | |
266 logging.info('Uploading %s to %s%s' % (local_path, url, extra_info)) | |
267 _RunCommand(command_and_args) | |
268 return 'https://console.developers.google.com/m/cloudstorage/b/%s/o/%s' % ( | |
269 bucket, remote_path) | |
270 | |
271 | |
272 def GetIfHashChanged(cs_path, download_path, bucket, file_hash): | |
273 """Downloads |download_path| to |file_path| if |file_path| doesn't exist or | |
274 it's hash doesn't match |file_hash|. | |
275 | |
276 Returns: | |
277 True if the binary was changed. | |
278 Raises: | |
279 CredentialsError if the user has no configured credentials. | |
280 PermissionError if the user does not have permission to access the bucket. | |
281 NotFoundError if the file is not in the given bucket in cloud_storage. | |
282 """ | |
283 with _PseudoFileLock(download_path): | |
284 if (os.path.exists(download_path) and | |
285 CalculateHash(download_path) == file_hash): | |
286 return False | |
287 _GetLocked(bucket, cs_path, download_path) | |
288 return True | |
289 | |
290 | |
291 def GetIfChanged(file_path, bucket): | |
292 """Gets the file at file_path if it has a hash file that doesn't match or | |
293 if there is no local copy of file_path, but there is a hash file for it. | |
294 | |
295 Returns: | |
296 True if the binary was changed. | |
297 Raises: | |
298 CredentialsError if the user has no configured credentials. | |
299 PermissionError if the user does not have permission to access the bucket. | |
300 NotFoundError if the file is not in the given bucket in cloud_storage. | |
301 """ | |
302 with _PseudoFileLock(file_path): | |
303 hash_path = file_path + '.sha1' | |
304 if not os.path.exists(hash_path): | |
305 logging.warning('Hash file not found: %s' % hash_path) | |
306 return False | |
307 | |
308 expected_hash = ReadHash(hash_path) | |
309 if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash: | |
310 return False | |
311 _GetLocked(bucket, expected_hash, file_path) | |
312 return True | |
313 | |
314 | |
315 def GetFilesInDirectoryIfChanged(directory, bucket): | |
316 """ Scan the directory for .sha1 files, and download them from the given | |
317 bucket in cloud storage if the local and remote hash don't match or | |
318 there is no local copy. | |
319 """ | |
320 if not os.path.isdir(directory): | |
321 raise ValueError('Must provide a valid directory.') | |
322 # Don't allow the root directory to be a serving_dir. | |
323 if directory == os.path.abspath(os.sep): | |
324 raise ValueError('Trying to serve root directory from HTTP server.') | |
325 for dirpath, _, filenames in os.walk(directory): | |
326 for filename in filenames: | |
327 path_name, extension = os.path.splitext( | |
328 os.path.join(dirpath, filename)) | |
329 if extension != '.sha1': | |
330 continue | |
331 GetIfChanged(path_name, bucket) | |
332 | |
333 def CalculateHash(file_path): | |
334 """Calculates and returns the hash of the file at file_path.""" | |
335 sha1 = hashlib.sha1() | |
336 with open(file_path, 'rb') as f: | |
337 while True: | |
338 # Read in 1mb chunks, so it doesn't all have to be loaded into memory. | |
339 chunk = f.read(1024*1024) | |
340 if not chunk: | |
341 break | |
342 sha1.update(chunk) | |
343 return sha1.hexdigest() | |
344 | |
345 | |
346 def ReadHash(hash_path): | |
347 with open(hash_path, 'rb') as f: | |
348 return f.read(1024).rstrip() | |
OLD | NEW |