Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Uploads files to Google Storage content addressed.""" | 6 """Uploads files to Google Storage content addressed.""" |
| 7 | 7 |
| 8 import hashlib | 8 import hashlib |
| 9 import optparse | 9 import optparse |
| 10 import os | 10 import os |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 61 return md5_match.group(1) | 61 return md5_match.group(1) |
| 62 else: | 62 else: |
| 63 md5_hash = get_md5(filename) | 63 md5_hash = get_md5(filename) |
| 64 with open('%s.md5' % filename, 'wb') as f: | 64 with open('%s.md5' % filename, 'wb') as f: |
| 65 f.write(md5_hash) | 65 f.write(md5_hash) |
| 66 return md5_hash | 66 return md5_hash |
| 67 | 67 |
| 68 | 68 |
| 69 def _upload_worker( | 69 def _upload_worker( |
| 70 thread_num, upload_queue, base_url, gsutil, md5_lock, force, | 70 thread_num, upload_queue, base_url, gsutil, md5_lock, force, |
| 71 use_md5, stdout_queue, ret_codes): | 71 use_md5, stdout_queue, ret_codes, public): |
| 72 while True: | 72 while True: |
| 73 filename, sha1_sum = upload_queue.get() | 73 filename, sha1_sum = upload_queue.get() |
| 74 if not filename: | 74 if not filename: |
| 75 break | 75 break |
| 76 file_url = '%s/%s' % (base_url, sha1_sum) | 76 file_url = '%s/%s' % (base_url, sha1_sum) |
| 77 if gsutil.check_call('ls', file_url)[0] == 0 and not force: | 77 if gsutil.check_call('ls', file_url)[0] == 0 and not force: |
| 78 # File exists, check MD5 hash. | 78 # File exists, check MD5 hash. |
| 79 _, out, _ = gsutil.check_call('ls', '-L', file_url) | 79 _, out, _ = gsutil.check_call('ls', '-L', file_url) |
| 80 etag_match = re.search('ETag:\s+([a-z0-9]{32})', out) | 80 etag_match = re.search('ETag:\s+([a-z0-9]{32})', out) |
| 81 if etag_match: | 81 if etag_match: |
| 82 remote_md5 = etag_match.group(1) | 82 remote_md5 = etag_match.group(1) |
| 83 # Calculate the MD5 checksum to match it to Google Storage's ETag. | 83 # Calculate the MD5 checksum to match it to Google Storage's ETag. |
| 84 with md5_lock: | 84 with md5_lock: |
| 85 if use_md5: | 85 if use_md5: |
| 86 local_md5 = get_md5_cached(filename) | 86 local_md5 = get_md5_cached(filename) |
| 87 else: | 87 else: |
| 88 local_md5 = get_md5(filename) | 88 local_md5 = get_md5(filename) |
| 89 if local_md5 == remote_md5: | 89 if local_md5 == remote_md5: |
| 90 stdout_queue.put( | 90 stdout_queue.put( |
| 91 '%d> File %s already exists and MD5 matches, upload skipped' % | 91 '%d> File %s already exists and MD5 matches, upload skipped' % |
| 92 (thread_num, filename)) | 92 (thread_num, filename)) |
| 93 continue | 93 continue |
| 94 stdout_queue.put('%d> Uploading %s...' % ( | 94 stdout_queue.put('%d> Uploading %s...' % ( |
| 95 thread_num, filename)) | 95 thread_num, filename)) |
| 96 code, _, err = gsutil.check_call('cp', filename, file_url) | 96 args = ['cp'] |
| 97 if public: | |
| 98 args.extend(['-a', 'public-read']) | |
| 99 args.extend([filename, file_url]) | |
| 100 code, _, err = gsutil.check_call(*args) | |
| 101 | |
| 97 if code != 0: | 102 if code != 0: |
| 98 ret_codes.put( | 103 ret_codes.put( |
| 99 (code, | 104 (code, |
| 100 'Encountered error on uploading %s to %s\n%s' % | 105 'Encountered error on uploading %s to %s\n%s' % |
| 101 (filename, file_url, err))) | 106 (filename, file_url, err))) |
| 102 continue | 107 continue |
| 103 | 108 |
| 104 # Mark executable files with the header "x-goog-meta-executable: 1" which | 109 # Mark executable files with the header "x-goog-meta-executable: 1" which |
| 105 # the download script will check for to preserve the executable bit. | 110 # the download script will check for to preserve the executable bit. |
| 106 if not sys.platform.startswith('win'): | 111 if not sys.platform.startswith('win'): |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 123 if use_null_terminator: | 128 if use_null_terminator: |
| 124 return sys.stdin.read().split('\0') | 129 return sys.stdin.read().split('\0') |
| 125 else: | 130 else: |
| 126 return sys.stdin.read().splitlines() | 131 return sys.stdin.read().splitlines() |
| 127 else: | 132 else: |
| 128 return args | 133 return args |
| 129 | 134 |
| 130 | 135 |
| 131 def upload_to_google_storage( | 136 def upload_to_google_storage( |
| 132 input_filenames, base_url, gsutil, force, | 137 input_filenames, base_url, gsutil, force, |
| 133 use_md5, num_threads, skip_hashing): | 138 use_md5, num_threads, skip_hashing, public): |
| 134 # We only want one MD5 calculation happening at a time to avoid HD thrashing. | 139 # We only want one MD5 calculation happening at a time to avoid HD thrashing. |
| 135 md5_lock = threading.Lock() | 140 md5_lock = threading.Lock() |
| 136 | 141 |
| 137 # Start up all the worker threads plus the printer thread. | 142 # Start up all the worker threads plus the printer thread. |
| 138 all_threads = [] | 143 all_threads = [] |
| 139 ret_codes = Queue.Queue() | 144 ret_codes = Queue.Queue() |
| 140 ret_codes.put((0, None)) | 145 ret_codes.put((0, None)) |
| 141 upload_queue = Queue.Queue() | 146 upload_queue = Queue.Queue() |
| 142 upload_timer = time.time() | 147 upload_timer = time.time() |
| 143 stdout_queue = Queue.Queue() | 148 stdout_queue = Queue.Queue() |
| 144 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 149 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) |
| 145 printer_thread.daemon = True | 150 printer_thread.daemon = True |
| 146 printer_thread.start() | 151 printer_thread.start() |
| 147 for thread_num in range(num_threads): | 152 for thread_num in range(num_threads): |
| 148 t = threading.Thread( | 153 t = threading.Thread( |
| 149 target=_upload_worker, | 154 target=_upload_worker, |
| 150 args=[thread_num, upload_queue, base_url, gsutil, md5_lock, | 155 args=[thread_num, upload_queue, base_url, gsutil, md5_lock, |
| 151 force, use_md5, stdout_queue, ret_codes]) | 156 force, use_md5, stdout_queue, ret_codes, public]) |
| 152 t.daemon = True | 157 t.daemon = True |
| 153 t.start() | 158 t.start() |
| 154 all_threads.append(t) | 159 all_threads.append(t) |
| 155 | 160 |
| 156 # We want to hash everything in a single thread since its faster. | 161 # We want to hash everything in a single thread since its faster. |
| 157 # The bottleneck is in disk IO, not CPU. | 162 # The bottleneck is in disk IO, not CPU. |
| 158 hashing_start = time.time() | 163 hashing_start = time.time() |
| 159 for filename in input_filenames: | 164 for filename in input_filenames: |
| 160 if not os.path.exists(filename): | 165 if not os.path.exists(filename): |
| 161 stdout_queue.put('Main> Error: %s not found, skipping.' % filename) | 166 stdout_queue.put('Main> Error: %s not found, skipping.' % filename) |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 204 return max_ret_code | 209 return max_ret_code |
| 205 | 210 |
| 206 | 211 |
| 207 def main(args): | 212 def main(args): |
| 208 parser = optparse.OptionParser(USAGE_STRING) | 213 parser = optparse.OptionParser(USAGE_STRING) |
| 209 parser.add_option('-b', '--bucket', | 214 parser.add_option('-b', '--bucket', |
| 210 help='Google Storage bucket to upload to.') | 215 help='Google Storage bucket to upload to.') |
| 211 parser.add_option('-e', '--boto', help='Specify a custom boto file.') | 216 parser.add_option('-e', '--boto', help='Specify a custom boto file.') |
| 212 parser.add_option('-f', '--force', action='store_true', | 217 parser.add_option('-f', '--force', action='store_true', |
| 213 help='Force upload even if remote file exists.') | 218 help='Force upload even if remote file exists.') |
| 214 parser.add_option('-g', '--gsutil_path', default=GSUTIL_DEFAULT_PATH, | |
|
ricow1
2015/01/19 16:02:28
this does not seem to be supported anyway, so remo
hinoka
2015/01/20 19:28:52
Acknowledged.
| |
| 215 help='Path to the gsutil script.') | |
| 216 parser.add_option('-m', '--use_md5', action='store_true', | 219 parser.add_option('-m', '--use_md5', action='store_true', |
| 217 help='Generate MD5 files when scanning, and don\'t check ' | 220 help='Generate MD5 files when scanning, and don\'t check ' |
| 218 'the MD5 checksum if a .md5 file is found.') | 221 'the MD5 checksum if a .md5 file is found.') |
| 219 parser.add_option('-t', '--num_threads', default=1, type='int', | 222 parser.add_option('-t', '--num_threads', default=1, type='int', |
| 220 help='Number of uploader threads to run.') | 223 help='Number of uploader threads to run.') |
| 221 parser.add_option('-s', '--skip_hashing', action='store_true', | 224 parser.add_option('-s', '--skip_hashing', action='store_true', |
| 222 help='Skip hashing if .sha1 file exists.') | 225 help='Skip hashing if .sha1 file exists.') |
| 226 parser.add_option('-p', '--public', action='store_true', | |
|
hinoka
2015/01/20 19:28:52
Why?
In general, I prefer to set this on a bucket
ricow1
2015/01/22 15:46:04
Valid point, removed support for this
| |
| 227 help='Make the uploaded file public read.') | |
| 223 parser.add_option('-0', '--use_null_terminator', action='store_true', | 228 parser.add_option('-0', '--use_null_terminator', action='store_true', |
| 224 help='Use \\0 instead of \\n when parsing ' | 229 help='Use \\0 instead of \\n when parsing ' |
| 225 'the file list from stdin. This is useful if the input ' | 230 'the file list from stdin. This is useful if the input ' |
| 226 'is coming from "find ... -print0".') | 231 'is coming from "find ... -print0".') |
| 227 (options, args) = parser.parse_args() | 232 (options, args) = parser.parse_args() |
| 228 | 233 |
| 229 # Enumerate our inputs. | 234 # Enumerate our inputs. |
| 230 input_filenames = get_targets(args, parser, options.use_null_terminator) | 235 input_filenames = get_targets(args, parser, options.use_null_terminator) |
| 231 | 236 |
| 232 # Make sure we can find a working instance of gsutil. | 237 # Make sure we can find a working instance of gsutil. |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 243 | 248 |
| 244 base_url = 'gs://%s' % options.bucket | 249 base_url = 'gs://%s' % options.bucket |
| 245 | 250 |
| 246 # Check we have a valid bucket with valid permissions. | 251 # Check we have a valid bucket with valid permissions. |
| 247 code = check_bucket_permissions(base_url, gsutil) | 252 code = check_bucket_permissions(base_url, gsutil) |
| 248 if code: | 253 if code: |
| 249 return code | 254 return code |
| 250 | 255 |
| 251 return upload_to_google_storage( | 256 return upload_to_google_storage( |
| 252 input_filenames, base_url, gsutil, options.force, options.use_md5, | 257 input_filenames, base_url, gsutil, options.force, options.use_md5, |
| 253 options.num_threads, options.skip_hashing) | 258 options.num_threads, options.skip_hashing, options.public) |
| 254 | 259 |
| 255 | 260 |
| 256 if __name__ == '__main__': | 261 if __name__ == '__main__': |
| 257 sys.exit(main(sys.argv)) | 262 sys.exit(main(sys.argv)) |
| OLD | NEW |