Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: upload_to_google_storage.py

Issue 807463005: Add support for tar.gz archive files to download from download_from_google_storage (Closed) Base URL: http://src.chromium.org/svn/trunk/tools/depot_tools/
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Uploads files to Google Storage content addressed.""" 6 """Uploads files to Google Storage content addressed."""
7 7
8 import hashlib 8 import hashlib
9 import optparse 9 import optparse
10 import os 10 import os
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
61 return md5_match.group(1) 61 return md5_match.group(1)
62 else: 62 else:
63 md5_hash = get_md5(filename) 63 md5_hash = get_md5(filename)
64 with open('%s.md5' % filename, 'wb') as f: 64 with open('%s.md5' % filename, 'wb') as f:
65 f.write(md5_hash) 65 f.write(md5_hash)
66 return md5_hash 66 return md5_hash
67 67
68 68
69 def _upload_worker( 69 def _upload_worker(
70 thread_num, upload_queue, base_url, gsutil, md5_lock, force, 70 thread_num, upload_queue, base_url, gsutil, md5_lock, force,
71 use_md5, stdout_queue, ret_codes): 71 use_md5, stdout_queue, ret_codes, public):
72 while True: 72 while True:
73 filename, sha1_sum = upload_queue.get() 73 filename, sha1_sum = upload_queue.get()
74 if not filename: 74 if not filename:
75 break 75 break
76 file_url = '%s/%s' % (base_url, sha1_sum) 76 file_url = '%s/%s' % (base_url, sha1_sum)
77 if gsutil.check_call('ls', file_url)[0] == 0 and not force: 77 if gsutil.check_call('ls', file_url)[0] == 0 and not force:
78 # File exists, check MD5 hash. 78 # File exists, check MD5 hash.
79 _, out, _ = gsutil.check_call('ls', '-L', file_url) 79 _, out, _ = gsutil.check_call('ls', '-L', file_url)
80 etag_match = re.search('ETag:\s+([a-z0-9]{32})', out) 80 etag_match = re.search('ETag:\s+([a-z0-9]{32})', out)
81 if etag_match: 81 if etag_match:
82 remote_md5 = etag_match.group(1) 82 remote_md5 = etag_match.group(1)
83 # Calculate the MD5 checksum to match it to Google Storage's ETag. 83 # Calculate the MD5 checksum to match it to Google Storage's ETag.
84 with md5_lock: 84 with md5_lock:
85 if use_md5: 85 if use_md5:
86 local_md5 = get_md5_cached(filename) 86 local_md5 = get_md5_cached(filename)
87 else: 87 else:
88 local_md5 = get_md5(filename) 88 local_md5 = get_md5(filename)
89 if local_md5 == remote_md5: 89 if local_md5 == remote_md5:
90 stdout_queue.put( 90 stdout_queue.put(
91 '%d> File %s already exists and MD5 matches, upload skipped' % 91 '%d> File %s already exists and MD5 matches, upload skipped' %
92 (thread_num, filename)) 92 (thread_num, filename))
93 continue 93 continue
94 stdout_queue.put('%d> Uploading %s...' % ( 94 stdout_queue.put('%d> Uploading %s...' % (
95 thread_num, filename)) 95 thread_num, filename))
96 code, _, err = gsutil.check_call('cp', filename, file_url) 96 args = ['cp']
97 if public:
98 args.extend(['-a', 'public-read'])
99 args.extend([filename, file_url])
100 code, _, err = gsutil.check_call(*args)
101
97 if code != 0: 102 if code != 0:
98 ret_codes.put( 103 ret_codes.put(
99 (code, 104 (code,
100 'Encountered error on uploading %s to %s\n%s' % 105 'Encountered error on uploading %s to %s\n%s' %
101 (filename, file_url, err))) 106 (filename, file_url, err)))
102 continue 107 continue
103 108
104 # Mark executable files with the header "x-goog-meta-executable: 1" which 109 # Mark executable files with the header "x-goog-meta-executable: 1" which
105 # the download script will check for to preserve the executable bit. 110 # the download script will check for to preserve the executable bit.
106 if not sys.platform.startswith('win'): 111 if not sys.platform.startswith('win'):
(...skipping 16 matching lines...) Expand all
123 if use_null_terminator: 128 if use_null_terminator:
124 return sys.stdin.read().split('\0') 129 return sys.stdin.read().split('\0')
125 else: 130 else:
126 return sys.stdin.read().splitlines() 131 return sys.stdin.read().splitlines()
127 else: 132 else:
128 return args 133 return args
129 134
130 135
131 def upload_to_google_storage( 136 def upload_to_google_storage(
132 input_filenames, base_url, gsutil, force, 137 input_filenames, base_url, gsutil, force,
133 use_md5, num_threads, skip_hashing): 138 use_md5, num_threads, skip_hashing, public):
134 # We only want one MD5 calculation happening at a time to avoid HD thrashing. 139 # We only want one MD5 calculation happening at a time to avoid HD thrashing.
135 md5_lock = threading.Lock() 140 md5_lock = threading.Lock()
136 141
137 # Start up all the worker threads plus the printer thread. 142 # Start up all the worker threads plus the printer thread.
138 all_threads = [] 143 all_threads = []
139 ret_codes = Queue.Queue() 144 ret_codes = Queue.Queue()
140 ret_codes.put((0, None)) 145 ret_codes.put((0, None))
141 upload_queue = Queue.Queue() 146 upload_queue = Queue.Queue()
142 upload_timer = time.time() 147 upload_timer = time.time()
143 stdout_queue = Queue.Queue() 148 stdout_queue = Queue.Queue()
144 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) 149 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue])
145 printer_thread.daemon = True 150 printer_thread.daemon = True
146 printer_thread.start() 151 printer_thread.start()
147 for thread_num in range(num_threads): 152 for thread_num in range(num_threads):
148 t = threading.Thread( 153 t = threading.Thread(
149 target=_upload_worker, 154 target=_upload_worker,
150 args=[thread_num, upload_queue, base_url, gsutil, md5_lock, 155 args=[thread_num, upload_queue, base_url, gsutil, md5_lock,
151 force, use_md5, stdout_queue, ret_codes]) 156 force, use_md5, stdout_queue, ret_codes, public])
152 t.daemon = True 157 t.daemon = True
153 t.start() 158 t.start()
154 all_threads.append(t) 159 all_threads.append(t)
155 160
156 # We want to hash everything in a single thread since its faster. 161 # We want to hash everything in a single thread since its faster.
157 # The bottleneck is in disk IO, not CPU. 162 # The bottleneck is in disk IO, not CPU.
158 hashing_start = time.time() 163 hashing_start = time.time()
159 for filename in input_filenames: 164 for filename in input_filenames:
160 if not os.path.exists(filename): 165 if not os.path.exists(filename):
161 stdout_queue.put('Main> Error: %s not found, skipping.' % filename) 166 stdout_queue.put('Main> Error: %s not found, skipping.' % filename)
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
204 return max_ret_code 209 return max_ret_code
205 210
206 211
207 def main(args): 212 def main(args):
208 parser = optparse.OptionParser(USAGE_STRING) 213 parser = optparse.OptionParser(USAGE_STRING)
209 parser.add_option('-b', '--bucket', 214 parser.add_option('-b', '--bucket',
210 help='Google Storage bucket to upload to.') 215 help='Google Storage bucket to upload to.')
211 parser.add_option('-e', '--boto', help='Specify a custom boto file.') 216 parser.add_option('-e', '--boto', help='Specify a custom boto file.')
212 parser.add_option('-f', '--force', action='store_true', 217 parser.add_option('-f', '--force', action='store_true',
213 help='Force upload even if remote file exists.') 218 help='Force upload even if remote file exists.')
214 parser.add_option('-g', '--gsutil_path', default=GSUTIL_DEFAULT_PATH,
ricow1 2015/01/19 16:02:28 this does not seem to be supported anyway, so remo
hinoka 2015/01/20 19:28:52 Acknowledged.
215 help='Path to the gsutil script.')
216 parser.add_option('-m', '--use_md5', action='store_true', 219 parser.add_option('-m', '--use_md5', action='store_true',
217 help='Generate MD5 files when scanning, and don\'t check ' 220 help='Generate MD5 files when scanning, and don\'t check '
218 'the MD5 checksum if a .md5 file is found.') 221 'the MD5 checksum if a .md5 file is found.')
219 parser.add_option('-t', '--num_threads', default=1, type='int', 222 parser.add_option('-t', '--num_threads', default=1, type='int',
220 help='Number of uploader threads to run.') 223 help='Number of uploader threads to run.')
221 parser.add_option('-s', '--skip_hashing', action='store_true', 224 parser.add_option('-s', '--skip_hashing', action='store_true',
222 help='Skip hashing if .sha1 file exists.') 225 help='Skip hashing if .sha1 file exists.')
226 parser.add_option('-p', '--public', action='store_true',
hinoka 2015/01/20 19:28:52 Why? In general, I prefer to set this on a bucket
ricow1 2015/01/22 15:46:04 Valid point, removed support for this
227 help='Make the uploaded file public read.')
223 parser.add_option('-0', '--use_null_terminator', action='store_true', 228 parser.add_option('-0', '--use_null_terminator', action='store_true',
224 help='Use \\0 instead of \\n when parsing ' 229 help='Use \\0 instead of \\n when parsing '
225 'the file list from stdin. This is useful if the input ' 230 'the file list from stdin. This is useful if the input '
226 'is coming from "find ... -print0".') 231 'is coming from "find ... -print0".')
227 (options, args) = parser.parse_args() 232 (options, args) = parser.parse_args()
228 233
229 # Enumerate our inputs. 234 # Enumerate our inputs.
230 input_filenames = get_targets(args, parser, options.use_null_terminator) 235 input_filenames = get_targets(args, parser, options.use_null_terminator)
231 236
232 # Make sure we can find a working instance of gsutil. 237 # Make sure we can find a working instance of gsutil.
(...skipping 10 matching lines...) Expand all
243 248
244 base_url = 'gs://%s' % options.bucket 249 base_url = 'gs://%s' % options.bucket
245 250
246 # Check we have a valid bucket with valid permissions. 251 # Check we have a valid bucket with valid permissions.
247 code = check_bucket_permissions(base_url, gsutil) 252 code = check_bucket_permissions(base_url, gsutil)
248 if code: 253 if code:
249 return code 254 return code
250 255
251 return upload_to_google_storage( 256 return upload_to_google_storage(
252 input_filenames, base_url, gsutil, options.force, options.use_md5, 257 input_filenames, base_url, gsutil, options.force, options.use_md5,
253 options.num_threads, options.skip_hashing) 258 options.num_threads, options.skip_hashing, options.public)
254 259
255 260
256 if __name__ == '__main__': 261 if __name__ == '__main__':
257 sys.exit(main(sys.argv)) 262 sys.exit(main(sys.argv))
OLDNEW
« download_from_google_storage.py ('K') | « download_from_google_storage.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698