OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Uploads files to Google Storage content addressed.""" | 6 """Uploads files to Google Storage content addressed.""" |
7 | 7 |
8 import hashlib | 8 import hashlib |
9 import optparse | 9 import optparse |
10 import os | 10 import os |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
61 return md5_match.group(1) | 61 return md5_match.group(1) |
62 else: | 62 else: |
63 md5_hash = get_md5(filename) | 63 md5_hash = get_md5(filename) |
64 with open('%s.md5' % filename, 'wb') as f: | 64 with open('%s.md5' % filename, 'wb') as f: |
65 f.write(md5_hash) | 65 f.write(md5_hash) |
66 return md5_hash | 66 return md5_hash |
67 | 67 |
68 | 68 |
69 def _upload_worker( | 69 def _upload_worker( |
70 thread_num, upload_queue, base_url, gsutil, md5_lock, force, | 70 thread_num, upload_queue, base_url, gsutil, md5_lock, force, |
71 use_md5, stdout_queue, ret_codes): | 71 use_md5, stdout_queue, ret_codes, public): |
72 while True: | 72 while True: |
73 filename, sha1_sum = upload_queue.get() | 73 filename, sha1_sum = upload_queue.get() |
74 if not filename: | 74 if not filename: |
75 break | 75 break |
76 file_url = '%s/%s' % (base_url, sha1_sum) | 76 file_url = '%s/%s' % (base_url, sha1_sum) |
77 if gsutil.check_call('ls', file_url)[0] == 0 and not force: | 77 if gsutil.check_call('ls', file_url)[0] == 0 and not force: |
78 # File exists, check MD5 hash. | 78 # File exists, check MD5 hash. |
79 _, out, _ = gsutil.check_call('ls', '-L', file_url) | 79 _, out, _ = gsutil.check_call('ls', '-L', file_url) |
80 etag_match = re.search('ETag:\s+([a-z0-9]{32})', out) | 80 etag_match = re.search('ETag:\s+([a-z0-9]{32})', out) |
81 if etag_match: | 81 if etag_match: |
82 remote_md5 = etag_match.group(1) | 82 remote_md5 = etag_match.group(1) |
83 # Calculate the MD5 checksum to match it to Google Storage's ETag. | 83 # Calculate the MD5 checksum to match it to Google Storage's ETag. |
84 with md5_lock: | 84 with md5_lock: |
85 if use_md5: | 85 if use_md5: |
86 local_md5 = get_md5_cached(filename) | 86 local_md5 = get_md5_cached(filename) |
87 else: | 87 else: |
88 local_md5 = get_md5(filename) | 88 local_md5 = get_md5(filename) |
89 if local_md5 == remote_md5: | 89 if local_md5 == remote_md5: |
90 stdout_queue.put( | 90 stdout_queue.put( |
91 '%d> File %s already exists and MD5 matches, upload skipped' % | 91 '%d> File %s already exists and MD5 matches, upload skipped' % |
92 (thread_num, filename)) | 92 (thread_num, filename)) |
93 continue | 93 continue |
94 stdout_queue.put('%d> Uploading %s...' % ( | 94 stdout_queue.put('%d> Uploading %s...' % ( |
95 thread_num, filename)) | 95 thread_num, filename)) |
96 code, _, err = gsutil.check_call('cp', filename, file_url) | 96 args = ['cp'] |
97 if public: | |
98 args.extend(['-a', 'public-read']) | |
99 args.extend([filename, file_url]) | |
100 code, _, err = gsutil.check_call(*args) | |
101 | |
97 if code != 0: | 102 if code != 0: |
98 ret_codes.put( | 103 ret_codes.put( |
99 (code, | 104 (code, |
100 'Encountered error on uploading %s to %s\n%s' % | 105 'Encountered error on uploading %s to %s\n%s' % |
101 (filename, file_url, err))) | 106 (filename, file_url, err))) |
102 continue | 107 continue |
103 | 108 |
104 # Mark executable files with the header "x-goog-meta-executable: 1" which | 109 # Mark executable files with the header "x-goog-meta-executable: 1" which |
105 # the download script will check for to preserve the executable bit. | 110 # the download script will check for to preserve the executable bit. |
106 if not sys.platform.startswith('win'): | 111 if not sys.platform.startswith('win'): |
(...skipping 16 matching lines...) Expand all Loading... | |
123 if use_null_terminator: | 128 if use_null_terminator: |
124 return sys.stdin.read().split('\0') | 129 return sys.stdin.read().split('\0') |
125 else: | 130 else: |
126 return sys.stdin.read().splitlines() | 131 return sys.stdin.read().splitlines() |
127 else: | 132 else: |
128 return args | 133 return args |
129 | 134 |
130 | 135 |
131 def upload_to_google_storage( | 136 def upload_to_google_storage( |
132 input_filenames, base_url, gsutil, force, | 137 input_filenames, base_url, gsutil, force, |
133 use_md5, num_threads, skip_hashing): | 138 use_md5, num_threads, skip_hashing, public): |
134 # We only want one MD5 calculation happening at a time to avoid HD thrashing. | 139 # We only want one MD5 calculation happening at a time to avoid HD thrashing. |
135 md5_lock = threading.Lock() | 140 md5_lock = threading.Lock() |
136 | 141 |
137 # Start up all the worker threads plus the printer thread. | 142 # Start up all the worker threads plus the printer thread. |
138 all_threads = [] | 143 all_threads = [] |
139 ret_codes = Queue.Queue() | 144 ret_codes = Queue.Queue() |
140 ret_codes.put((0, None)) | 145 ret_codes.put((0, None)) |
141 upload_queue = Queue.Queue() | 146 upload_queue = Queue.Queue() |
142 upload_timer = time.time() | 147 upload_timer = time.time() |
143 stdout_queue = Queue.Queue() | 148 stdout_queue = Queue.Queue() |
144 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 149 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) |
145 printer_thread.daemon = True | 150 printer_thread.daemon = True |
146 printer_thread.start() | 151 printer_thread.start() |
147 for thread_num in range(num_threads): | 152 for thread_num in range(num_threads): |
148 t = threading.Thread( | 153 t = threading.Thread( |
149 target=_upload_worker, | 154 target=_upload_worker, |
150 args=[thread_num, upload_queue, base_url, gsutil, md5_lock, | 155 args=[thread_num, upload_queue, base_url, gsutil, md5_lock, |
151 force, use_md5, stdout_queue, ret_codes]) | 156 force, use_md5, stdout_queue, ret_codes, public]) |
152 t.daemon = True | 157 t.daemon = True |
153 t.start() | 158 t.start() |
154 all_threads.append(t) | 159 all_threads.append(t) |
155 | 160 |
156 # We want to hash everything in a single thread since its faster. | 161 # We want to hash everything in a single thread since its faster. |
157 # The bottleneck is in disk IO, not CPU. | 162 # The bottleneck is in disk IO, not CPU. |
158 hashing_start = time.time() | 163 hashing_start = time.time() |
159 for filename in input_filenames: | 164 for filename in input_filenames: |
160 if not os.path.exists(filename): | 165 if not os.path.exists(filename): |
161 stdout_queue.put('Main> Error: %s not found, skipping.' % filename) | 166 stdout_queue.put('Main> Error: %s not found, skipping.' % filename) |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
204 return max_ret_code | 209 return max_ret_code |
205 | 210 |
206 | 211 |
207 def main(args): | 212 def main(args): |
208 parser = optparse.OptionParser(USAGE_STRING) | 213 parser = optparse.OptionParser(USAGE_STRING) |
209 parser.add_option('-b', '--bucket', | 214 parser.add_option('-b', '--bucket', |
210 help='Google Storage bucket to upload to.') | 215 help='Google Storage bucket to upload to.') |
211 parser.add_option('-e', '--boto', help='Specify a custom boto file.') | 216 parser.add_option('-e', '--boto', help='Specify a custom boto file.') |
212 parser.add_option('-f', '--force', action='store_true', | 217 parser.add_option('-f', '--force', action='store_true', |
213 help='Force upload even if remote file exists.') | 218 help='Force upload even if remote file exists.') |
214 parser.add_option('-g', '--gsutil_path', default=GSUTIL_DEFAULT_PATH, | |
ricow1
2015/01/19 16:02:28
this does not seem to be supported anyway, so remo
hinoka
2015/01/20 19:28:52
Acknowledged.
| |
215 help='Path to the gsutil script.') | |
216 parser.add_option('-m', '--use_md5', action='store_true', | 219 parser.add_option('-m', '--use_md5', action='store_true', |
217 help='Generate MD5 files when scanning, and don\'t check ' | 220 help='Generate MD5 files when scanning, and don\'t check ' |
218 'the MD5 checksum if a .md5 file is found.') | 221 'the MD5 checksum if a .md5 file is found.') |
219 parser.add_option('-t', '--num_threads', default=1, type='int', | 222 parser.add_option('-t', '--num_threads', default=1, type='int', |
220 help='Number of uploader threads to run.') | 223 help='Number of uploader threads to run.') |
221 parser.add_option('-s', '--skip_hashing', action='store_true', | 224 parser.add_option('-s', '--skip_hashing', action='store_true', |
222 help='Skip hashing if .sha1 file exists.') | 225 help='Skip hashing if .sha1 file exists.') |
226 parser.add_option('-p', '--public', action='store_true', | |
hinoka
2015/01/20 19:28:52
Why?
In general, I prefer to set this on a bucket
ricow1
2015/01/22 15:46:04
Valid point, removed support for this
| |
227 help='Make the uploaded file public read.') | |
223 parser.add_option('-0', '--use_null_terminator', action='store_true', | 228 parser.add_option('-0', '--use_null_terminator', action='store_true', |
224 help='Use \\0 instead of \\n when parsing ' | 229 help='Use \\0 instead of \\n when parsing ' |
225 'the file list from stdin. This is useful if the input ' | 230 'the file list from stdin. This is useful if the input ' |
226 'is coming from "find ... -print0".') | 231 'is coming from "find ... -print0".') |
227 (options, args) = parser.parse_args() | 232 (options, args) = parser.parse_args() |
228 | 233 |
229 # Enumerate our inputs. | 234 # Enumerate our inputs. |
230 input_filenames = get_targets(args, parser, options.use_null_terminator) | 235 input_filenames = get_targets(args, parser, options.use_null_terminator) |
231 | 236 |
232 # Make sure we can find a working instance of gsutil. | 237 # Make sure we can find a working instance of gsutil. |
(...skipping 10 matching lines...) Expand all Loading... | |
243 | 248 |
244 base_url = 'gs://%s' % options.bucket | 249 base_url = 'gs://%s' % options.bucket |
245 | 250 |
246 # Check we have a valid bucket with valid permissions. | 251 # Check we have a valid bucket with valid permissions. |
247 code = check_bucket_permissions(base_url, gsutil) | 252 code = check_bucket_permissions(base_url, gsutil) |
248 if code: | 253 if code: |
249 return code | 254 return code |
250 | 255 |
251 return upload_to_google_storage( | 256 return upload_to_google_storage( |
252 input_filenames, base_url, gsutil, options.force, options.use_md5, | 257 input_filenames, base_url, gsutil, options.force, options.use_md5, |
253 options.num_threads, options.skip_hashing) | 258 options.num_threads, options.skip_hashing, options.public) |
254 | 259 |
255 | 260 |
256 if __name__ == '__main__': | 261 if __name__ == '__main__': |
257 sys.exit(main(sys.argv)) | 262 sys.exit(main(sys.argv)) |
OLD | NEW |