Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(214)

Side by Side Diff: slave/skia_slave_scripts/utils/old_gs_utils.py

Issue 648353002: Remove Skia's forked buildbot code (Closed) Base URL: https://skia.googlesource.com/buildbot.git@master
Patch Set: Fix launch_slaves, remove more stuff Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """This module contains utilities related to Google Storage manipulations.
7
8 TODO(epoger): Replace this old gs_utils.py with a new one, within the common
9 repo, that uses google-api-python-client rather than the gsutil tool.
10 See http://skbug.com/2618 ('buildbot code: use google-api-python-client instead
11 of gsutil tool')
12 """
13
14 import hashlib
15 import os
16 import posixpath
17 import re
18 import shutil
19 import tempfile
20 import time
21
22 from py.utils import shell_utils
23 from slave import slave_utils
24
25 import file_utils
26
27
28 DEFAULT_DEST_GSBASE = 'gs://chromium-skia-gm'
29 TIMESTAMP_STARTED_FILENAME = 'TIMESTAMP_LAST_UPLOAD_STARTED'
30 TIMESTAMP_COMPLETED_FILENAME = 'TIMESTAMP_LAST_UPLOAD_COMPLETED'
31 LAST_REBASELINED_BY_FILENAME = 'LAST_REBASELINED_BY'
32
33 FILES_CHUNK = 500
34 BUFSIZE = 64 * 1024
35
36 ETAG_REGEX = re.compile(r'ETag:\s*(\S+)')
37
38
39 def delete_storage_object(object_name):
40 """Delete an object on Google Storage."""
41 gsutil = slave_utils.GSUtilSetup()
42 command = [gsutil]
43 command.extend(['rm', '-R', object_name])
44 print 'Running command: %s' % command
45 shell_utils.run(command)
46
47
48 def upload_file(local_src_path, remote_dest_path, gs_acl='private',
49 http_header_lines=None, only_if_modified=False):
50 """Upload contents of a local file to Google Storage.
51
52 params:
53 local_src_path: path to file on local disk
54 remote_dest_path: GS URL (gs://BUCKETNAME/PATH)
55 gs_acl: which predefined ACL to apply to the file on Google Storage; see
56 https://developers.google.com/storage/docs/accesscontrol#extension
57 http_header_lines: a list of HTTP header strings to add, if any
58 only_if_modified: if True, only upload the file if it would actually change
59 the content on Google Storage (uploads the file if remote_dest_path
60 does not exist, or if it exists but has different contents than
61 local_src_path). Note that this may take longer than just uploading the
62 file without checking first, due to extra round-trips!
63
64 TODO(epoger): Consider adding a do_compress parameter that would compress
65 the file using gzip before upload, and add a "Content-Encoding:gzip" header
66 so that HTTP downloads of the file would be unzipped automatically.
67 See https://developers.google.com/storage/docs/gsutil/addlhelp/
68 WorkingWithObjectMetadata#content-encoding
69 """
70 gsutil = slave_utils.GSUtilSetup()
71
72 if only_if_modified:
73 # Return early if we don't need to do the upload.
74 command = [gsutil, 'ls', '-L', remote_dest_path]
75 try:
76 ls_output = shell_utils.run(command)
77 matches = ETAG_REGEX.search(ls_output)
78 if matches:
79 # TODO(epoger): In my testing, this has always returned an MD5 hash
80 # that is comparable to local_md5 below. But from my reading of
81 # https://developers.google.com/storage/docs/hashes-etags , this is
82 # not something we can always rely on ("composite objects don't support
83 # MD5 hashes"; I'm not sure if we ever encounter composite objects,
84 # though). It would be good for us to find a more reliable hash, but
85 # I haven't found a way to get one out of gsutil yet.
86 #
87 # For now: if the remote_md5 is not found, or is computed in
88 # such a way that is different from local_md5, then we will re-upload
89 # the file even if it did not change.
90 remote_md5 = matches.group(1)
91 hasher = hashlib.md5()
92 with open(local_src_path, 'rb') as filereader:
93 while True:
94 data = filereader.read(BUFSIZE)
95 if not data:
96 break
97 hasher.update(data)
98 local_md5 = hasher.hexdigest()
99 if local_md5 == remote_md5:
100 print ('local_src_path %s and remote_dest_path %s have same hash %s' %
101 (local_src_path, remote_dest_path, local_md5))
102 return
103 except shell_utils.CommandFailedException:
104 # remote_dest_path probably does not exist. Go ahead and do the upload.
105 pass
106
107 command = [gsutil]
108 if http_header_lines:
109 for http_header_line in http_header_lines:
110 command.extend(['-h', http_header_line])
111 command.extend(['cp', '-a', gs_acl, local_src_path, remote_dest_path])
112 print 'Running command: %s' % command
113 shell_utils.run(command)
114
115
116 def upload_dir_contents(local_src_dir, remote_dest_dir, gs_acl='private',
117 http_header_lines=None):
118 """Upload contents of a local directory to Google Storage.
119
120 params:
121 local_src_dir: directory on local disk to upload contents of
122 remote_dest_dir: GS URL (gs://BUCKETNAME/PATH)
123 gs_acl: which predefined ACL to apply to the files on Google Storage; see
124 https://developers.google.com/storage/docs/accesscontrol#extension
125 http_header_lines: a list of HTTP header strings to add, if any
126
127 The copy operates as a "merge with overwrite": any files in src_dir will be
128 "overlaid" on top of the existing content in dest_dir. Existing files with
129 the same names will be overwritten.
130
131 We upload each file as a separate call to gsutil. This takes longer than
132 calling "gsutil -m cp -R <source> <dest>", which can perform the uploads in
133 parallel... but in http://skbug.com/2618 ('The Case of the Missing
134 Mandrills') we figured out that was silently failing in some cases!
135
136 TODO(epoger): Use the google-api-python-client API, like we do in
137 https://skia.googlesource.com/skia/+/master/tools/pyutils/gs_utils.py ,
138 rather than calling out to the gsutil tool. See http://skbug.com/2618
139
140 TODO(epoger): Upload multiple files simultaneously to reduce latency.
141
142 TODO(epoger): Add a "noclobber" mode that will not upload any files would
143 overwrite existing files in Google Storage.
144
145 TODO(epoger): Consider adding a do_compress parameter that would compress
146 the file using gzip before upload, and add a "Content-Encoding:gzip" header
147 so that HTTP downloads of the file would be unzipped automatically.
148 See https://developers.google.com/storage/docs/gsutil/addlhelp/
149 WorkingWithObjectMetadata#content-encoding
150 """
151 gsutil = slave_utils.GSUtilSetup()
152 command = [gsutil]
153 if http_header_lines:
154 for http_header_line in http_header_lines:
155 command.extend(['-h', http_header_line])
156 command.extend(['cp', '-a', gs_acl])
157
158 abs_local_src_dir = os.path.abspath(local_src_dir)
159 for (abs_src_dirpath, _, filenames) in os.walk(abs_local_src_dir):
160 if abs_src_dirpath == abs_local_src_dir:
161 # This file is within local_src_dir; no need to add subdirs to
162 # abs_dest_dirpath.
163 abs_dest_dirpath = remote_dest_dir
164 else:
165 # This file is within a subdir, so add subdirs to abs_dest_dirpath.
166 abs_dest_dirpath = posixpath.join(
167 remote_dest_dir,
168 _convert_to_posixpath(
169 os.path.relpath(abs_src_dirpath, abs_local_src_dir)))
170 for filename in sorted(filenames):
171 abs_src_filepath = os.path.join(abs_src_dirpath, filename)
172 abs_dest_filepath = posixpath.join(abs_dest_dirpath, filename)
173 shell_utils.run(command + [abs_src_filepath, abs_dest_filepath])
174
175
176 def download_dir_contents(remote_src_dir, local_dest_dir, multi=True):
177 """Download contents of a Google Storage directory to local disk.
178
179 params:
180 remote_src_dir: GS URL (gs://BUCKETNAME/PATH)
181 local_dest_dir: directory on local disk to write the contents into
182 multi: boolean; whether to perform the copy in multithreaded mode.
183
184 The copy operates as a "merge with overwrite": any files in src_dir will be
185 "overlaid" on top of the existing content in dest_dir. Existing files with
186 the same names will be overwritten.
187 """
188 gsutil = slave_utils.GSUtilSetup()
189 command = [gsutil]
190 if multi:
191 command.append('-m')
192 command.extend(['cp', '-R', remote_src_dir, local_dest_dir])
193 print 'Running command: %s' % command
194 shell_utils.run(command)
195
196
197 def copy_dir_contents(remote_src_dir, remote_dest_dir, gs_acl='private',
198 http_header_lines=None):
199 """Copy contents of one Google Storage directory to another.
200
201 params:
202 remote_src_dir: source GS URL (gs://BUCKETNAME/PATH)
203 remote_dest_dir: dest GS URL (gs://BUCKETNAME/PATH)
204 gs_acl: which predefined ACL to apply to the new files; see
205 https://developers.google.com/storage/docs/accesscontrol#extension
206 http_header_lines: a list of HTTP header strings to add, if any
207
208 The copy operates as a "merge with overwrite": any files in src_dir will be
209 "overlaid" on top of the existing content in dest_dir. Existing files with
210 the same names will be overwritten.
211
212 Performs the copy in multithreaded mode, in case there are a large number of
213 files.
214 """
215 gsutil = slave_utils.GSUtilSetup()
216 command = [gsutil, '-m']
217 if http_header_lines:
218 for http_header_line in http_header_lines:
219 command.extend(['-h', http_header_line])
220 command.extend(['cp', '-a', gs_acl, '-R', remote_src_dir, remote_dest_dir])
221 print 'Running command: %s' % command
222 shell_utils.run(command)
223
224
225 def move_storage_directory(src_dir, dest_dir):
226 """Move a directory on Google Storage."""
227 gsutil = slave_utils.GSUtilSetup()
228 command = [gsutil]
229 command.extend(['mv', '-p', src_dir, dest_dir])
230 print 'Running command: %s' % command
231 shell_utils.run(command)
232
233
234 def list_storage_directory(dest_gsbase, subdir):
235 """List the contents of the specified Storage directory."""
236 gsbase_subdir = posixpath.join(dest_gsbase, subdir)
237 status, output_gsutil_ls = slave_utils.GSUtilListBucket(gsbase_subdir, [])
238 if status != 0:
239 raise Exception(
240 'Could not list contents of %s in Google Storage!' % gsbase_subdir)
241
242 gs_files = []
243 for line in set(output_gsutil_ls.splitlines()):
244 # Ignore lines with warnings and status messages.
245 if line and line.startswith(gsbase_subdir) and line != gsbase_subdir:
246 gs_files.append(line)
247 return gs_files
248
249
250 def does_storage_object_exist(object_name):
251 """Checks if an object exists on Google Storage.
252
253 Returns True if it exists else returns False.
254 """
255 gsutil = slave_utils.GSUtilSetup()
256 command = [gsutil]
257 command.extend(['ls', object_name])
258 print 'Running command: %s' % command
259 try:
260 shell_utils.run(command)
261 return True
262 except shell_utils.CommandFailedException:
263 return False
264
265
266 def download_directory_contents_if_changed(gs_base, gs_relative_dir, local_dir):
267 """Compares the TIMESTAMP_LAST_UPLOAD_COMPLETED and downloads if different.
268
269 The goal of download_directory_contents_if_changed and
270 upload_directory_contents_if_changed is to attempt to replicate directory
271 level rsync functionality to the Google Storage directories we care about.
272 """
273 if _are_timestamps_equal(gs_base, gs_relative_dir, local_dir):
274 print '\n\n=======Local directory is current=======\n\n'
275 else:
276 file_utils.create_clean_local_dir(local_dir)
277 gs_source = posixpath.join(gs_base, gs_relative_dir, '*')
278 slave_utils.GSUtilDownloadFile(src=gs_source, dst=local_dir)
279 if not _are_timestamps_equal(gs_base, gs_relative_dir, local_dir):
280 raise Exception('Failed to download from GS: %s' % gs_source)
281
282
283 def _get_chunks(seq, n):
284 """Yield successive n-sized chunks from the specified sequence."""
285 for i in xrange(0, len(seq), n):
286 yield seq[i:i+n]
287
288
289 def delete_directory_contents(gs_base, gs_relative_dir, files_to_delete):
290 """Deletes the specified files from the Google Storage Directory.
291
292 Args:
293 gs_base: str - The Google Storage base. Eg: gs://rmistry.
294 gs_relative_dir: str - Relative directory to the Google Storage base.
295 files_to_delete: Files that should be deleted from the Google Storage
296 directory. The files are deleted one at a time. If files_to_delete is
297 None or empty then all directory contents are deleted.
298 """
299 gs_dest = posixpath.join(gs_base, gs_relative_dir)
300 if files_to_delete:
301 for file_to_delete in files_to_delete:
302 delete_storage_object(object_name=posixpath.join(gs_dest, file_to_delete))
303 else:
304 delete_storage_object(gs_dest)
305
306
307 def upload_directory_contents_if_changed(gs_base, gs_relative_dir, gs_acl,
308 local_dir, force_upload=False,
309 upload_chunks=False,
310 files_to_upload=None):
311 """Compares the TIMESTAMP_LAST_UPLOAD_COMPLETED and uploads if different.
312
313 Args:
314 gs_base: str - The Google Storage base. Eg: gs://rmistry.
315 gs_relative_dir: str - Relative directory to the Google Storage base.
316 gs_acl: str - ACL to use when uploading to Google Storage.
317 local_dir: str - The local directory to upload.
318 force_upload: bool - Whether upload should be done regardless of timestamps
319 matching or not.
320 upload_chunks: bool - Whether upload should be done in chunks or in a single
321 command.
322 files_to_upload: str seq - Specific files that should be uploaded, if not
323 specified then all files in local_dir are uploaded. If upload_chunks is
324 True then files will be uploaded in chunks else they will be uploaded
325 one at a time. The Google Storage directory is not cleaned before upload
326 if files_to_upload is specified.
327
328 The goal of download_directory_contents_if_changed and
329 upload_directory_contents_if_changed is to attempt to replicate directory
330 level rsync functionality to the Google Storage directories we care about.
331
332 Returns True if contents were uploaded, else returns False.
333 """
334 if not force_upload and _are_timestamps_equal(gs_base, gs_relative_dir,
335 local_dir):
336 print '\n\n=======Local directory is current=======\n\n'
337 return False
338 else:
339 local_src = os.path.join(local_dir, '*')
340 gs_dest = posixpath.join(gs_base, gs_relative_dir)
341 timestamp_value = time.time()
342
343 if not files_to_upload:
344 print '\n\n=======Delete Storage directory before uploading=======\n\n'
345 delete_storage_object(gs_dest)
346
347 print '\n\n=======Writing new TIMESTAMP_LAST_UPLOAD_STARTED=======\n\n'
348 write_timestamp_file(
349 timestamp_file_name=TIMESTAMP_STARTED_FILENAME,
350 timestamp_value=timestamp_value, gs_base=gs_base,
351 gs_relative_dir=gs_relative_dir, local_dir=local_dir, gs_acl=gs_acl)
352
353 if upload_chunks:
354 if files_to_upload:
355 local_files = [
356 os.path.join(local_dir, local_file)
357 for local_file in files_to_upload]
358 else:
359 local_files = [
360 os.path.join(local_dir, local_file)
361 for local_file in os.listdir(local_dir)]
362 for files_chunk in _get_chunks(local_files, FILES_CHUNK):
363 gsutil = slave_utils.GSUtilSetup()
364 command = [gsutil, 'cp'] + files_chunk + [gs_dest]
365 try:
366 shell_utils.run(command)
367 except shell_utils.CommandFailedException:
368 raise Exception(
369 'Could not upload the chunk to Google Storage! The chunk: %s'
370 % files_chunk)
371 else:
372 if files_to_upload:
373 for file_to_upload in files_to_upload:
374 if slave_utils.GSUtilDownloadFile(
375 src=os.path.join(local_dir, file_to_upload), dst=gs_dest) != 0:
376 raise Exception(
377 'Could not upload %s to Google Storage!' % file_to_upload)
378 else:
379 if slave_utils.GSUtilDownloadFile(src=local_src, dst=gs_dest) != 0:
380 raise Exception('Could not upload %s to Google Storage!' % local_src)
381
382 print '\n\n=======Writing new TIMESTAMP_LAST_UPLOAD_COMPLETED=======\n\n'
383 write_timestamp_file(
384 timestamp_file_name=TIMESTAMP_COMPLETED_FILENAME,
385 timestamp_value=timestamp_value, gs_base=gs_base,
386 gs_relative_dir=gs_relative_dir, local_dir=local_dir, gs_acl=gs_acl)
387 return True
388
389
390 def _are_timestamps_equal(gs_base, gs_relative_dir, local_dir):
391 """Compares the local TIMESTAMP with the TIMESTAMP from Google Storage."""
392
393 local_timestamp_file = os.path.join(local_dir, TIMESTAMP_COMPLETED_FILENAME)
394 # Make sure that the local TIMESTAMP file exists.
395 if not os.path.exists(local_timestamp_file):
396 return False
397
398 # Get the timestamp file from Google Storage.
399 src = posixpath.join(gs_base, gs_relative_dir, TIMESTAMP_COMPLETED_FILENAME)
400 temp_file = tempfile.mkstemp()[1]
401 slave_utils.GSUtilDownloadFile(src=src, dst=temp_file)
402
403 local_file_obj = open(local_timestamp_file, 'r')
404 storage_file_obj = open(temp_file, 'r')
405 try:
406 local_timestamp = local_file_obj.read().strip()
407 storage_timestamp = storage_file_obj.read().strip()
408 return local_timestamp == storage_timestamp
409 finally:
410 local_file_obj.close()
411 storage_file_obj.close()
412
413
414 def read_timestamp_file(timestamp_file_name, gs_base, gs_relative_dir):
415 """Reads the specified TIMESTAMP file from the specified GS dir.
416
417 Returns 0 if the file is empty or does not exist.
418 """
419 src = posixpath.join(gs_base, gs_relative_dir, timestamp_file_name)
420 temp_file = tempfile.mkstemp()[1]
421 slave_utils.GSUtilDownloadFile(src=src, dst=temp_file)
422
423 storage_file_obj = open(temp_file, 'r')
424 try:
425 timestamp_value = storage_file_obj.read().strip()
426 return timestamp_value if timestamp_value else "0"
427 finally:
428 storage_file_obj.close()
429
430
431 def write_timestamp_file(timestamp_file_name, timestamp_value, gs_base=None,
432 gs_relative_dir=None, gs_acl=None, local_dir=None):
433 """Adds a timestamp file to a Google Storage and/or a Local Directory.
434
435 If gs_base, gs_relative_dir and gs_acl are provided then the timestamp is
436 written to Google Storage. If local_dir is provided then the timestamp is
437 written to a local directory.
438 """
439 timestamp_file = os.path.join(tempfile.gettempdir(), timestamp_file_name)
440 f = open(timestamp_file, 'w')
441 try:
442 f.write(str(timestamp_value))
443 finally:
444 f.close()
445 if local_dir:
446 shutil.copyfile(timestamp_file,
447 os.path.join(local_dir, timestamp_file_name))
448 if gs_base and gs_relative_dir and gs_acl:
449 slave_utils.GSUtilCopyFile(filename=timestamp_file, gs_base=gs_base,
450 subdir=gs_relative_dir, gs_acl=gs_acl)
451
452
453 def _convert_to_posixpath(localpath):
454 """Convert localpath to posix format."""
455 if os.sep == '/':
456 return localpath
457 else:
458 return '/'.join(localpath.split(os.sep))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698