Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(192)

Side by Side Diff: bin/s3multiput

Issue 8386013: Merging in latest boto. (Closed) Base URL: svn://svn.chromium.org/boto
Patch Set: Redoing vendor drop by deleting and then merging. Created 9 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « bin/route53 ('k') | bin/s3put » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2006,2007,2008 Mitch Garnaat http://garnaat.org/ 2 # Copyright (c) 2006,2007,2008 Mitch Garnaat http://garnaat.org/
3 # 3 #
4 # Permission is hereby granted, free of charge, to any person obtaining a 4 # Permission is hereby granted, free of charge, to any person obtaining a
5 # copy of this software and associated documentation files (the 5 # copy of this software and associated documentation files (the
6 # "Software"), to deal in the Software without restriction, including 6 # "Software"), to deal in the Software without restriction, including
7 # without limitation the rights to use, copy, modify, merge, publish, dis- 7 # without limitation the rights to use, copy, modify, merge, publish, dis-
8 # tribute, sublicense, and/or sell copies of the Software, and to permit 8 # tribute, sublicense, and/or sell copies of the Software, and to permit
9 # persons to whom the Software is furnished to do so, subject to the fol- 9 # persons to whom the Software is furnished to do so, subject to the fol-
10 # lowing conditions: 10 # lowing conditions:
11 # 11 #
12 # The above copyright notice and this permission notice shall be included 12 # The above copyright notice and this permission notice shall be included
13 # in all copies or substantial portions of the Software. 13 # in all copies or substantial portions of the Software.
14 # 14 #
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- 16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 # IN THE SOFTWARE. 21 # IN THE SOFTWARE.
22 # 22 #
23
24 # multipart portions copyright Fabian Topfstedt
25 # https://gist.github.com/924094
26
27
28 import math
29 import mimetypes
30 from multiprocessing import Pool
23 import getopt, sys, os 31 import getopt, sys, os
32
24 import boto 33 import boto
25 from boto.exception import S3ResponseError 34 from boto.exception import S3ResponseError
26 35
36 from boto.s3.connection import S3Connection
37 from filechunkio import FileChunkIO
38
27 usage_string = """ 39 usage_string = """
28 SYNOPSIS 40 SYNOPSIS
29 s3put [-a/--access_key <access_key>] [-s/--secret_key <secret_key>] 41 s3put [-a/--access_key <access_key>] [-s/--secret_key <secret_key>]
30 -b/--bucket <bucket_name> [-c/--callback <num_cb>] 42 -b/--bucket <bucket_name> [-c/--callback <num_cb>]
31 [-d/--debug <debug_level>] [-i/--ignore <ignore_dirs>] 43 [-d/--debug <debug_level>] [-i/--ignore <ignore_dirs>]
32 [-n/--no_op] [-p/--prefix <prefix>] [-q/--quiet] 44 [-n/--no_op] [-p/--prefix <prefix>] [-q/--quiet]
33 [-g/--grant grant] [-w/--no_overwrite] path 45 [-g/--grant grant] [-w/--no_overwrite] [-r/--reduced] path
34 46
35 Where 47 Where
36 access_key - Your AWS Access Key ID. If not supplied, boto will 48 access_key - Your AWS Access Key ID. If not supplied, boto will
37 use the value of the environment variable 49 use the value of the environment variable
38 AWS_ACCESS_KEY_ID 50 AWS_ACCESS_KEY_ID
39 secret_key - Your AWS Secret Access Key. If not supplied, boto 51 secret_key - Your AWS Secret Access Key. If not supplied, boto
40 will use the value of the environment variable 52 will use the value of the environment variable
41 AWS_SECRET_ACCESS_KEY 53 AWS_SECRET_ACCESS_KEY
42 bucket_name - The name of the S3 bucket the file(s) should be 54 bucket_name - The name of the S3 bucket the file(s) should be
43 copied to. 55 copied to.
(...skipping 13 matching lines...) Expand all
57 called 10 times for each file transferred. 69 called 10 times for each file transferred.
58 prefix - A file path prefix that will be stripped from the full 70 prefix - A file path prefix that will be stripped from the full
59 path of the file when determining the key name in S3. 71 path of the file when determining the key name in S3.
60 For example, if the full path of a file is: 72 For example, if the full path of a file is:
61 /home/foo/bar/fie.baz 73 /home/foo/bar/fie.baz
62 and the prefix is specified as "-p /home/foo/" the 74 and the prefix is specified as "-p /home/foo/" the
63 resulting key name in S3 will be: 75 resulting key name in S3 will be:
64 /bar/fie.baz 76 /bar/fie.baz
65 The prefix must end in a trailing separator and if it 77 The prefix must end in a trailing separator and if it
66 does not then one will be added. 78 does not then one will be added.
79 reduced - Use Reduced Redundancy storage
67 grant - A canned ACL policy that will be granted on each file 80 grant - A canned ACL policy that will be granted on each file
68 transferred to S3. The value of provided must be one 81 transferred to S3. The value of provided must be one
69 of the "canned" ACL policies supported by S3: 82 of the "canned" ACL policies supported by S3:
70 private|public-read|public-read-write|authenticated-read 83 private|public-read|public-read-write|authenticated-read
71 no_overwrite - No files will be overwritten on S3, if the file/key 84 no_overwrite - No files will be overwritten on S3, if the file/key
72 exists on s3 it will be kept. This is useful for 85 exists on s3 it will be kept. This is useful for
73 resuming interrupted transfers. Note this is not a 86 resuming interrupted transfers. Note this is not a
74 sync, even if the file has been updated locally if 87 sync, even if the file has been updated locally if
75 the key exists on s3 the file on s3 will not be 88 the key exists on s3 the file on s3 will not be
76 updated. 89 updated.
77 90
78 If the -n option is provided, no files will be transferred to S3 but 91 If the -n option is provided, no files will be transferred to S3 but
79 informational messages will be printed about what would happen. 92 informational messages will be printed about what would happen.
80 """ 93 """
81 def usage(): 94 def usage():
82 print usage_string 95 print usage_string
83 sys.exit() 96 sys.exit()
84 97
85 def submit_cb(bytes_so_far, total_bytes): 98 def submit_cb(bytes_so_far, total_bytes):
86 print '%d bytes transferred / %d bytes total' % (bytes_so_far, total_bytes) 99 print '%d bytes transferred / %d bytes total' % (bytes_so_far, total_bytes)
87 100
88 def get_key_name(fullpath, prefix): 101 def get_key_name(fullpath, prefix):
89 key_name = fullpath[len(prefix):] 102 key_name = fullpath[len(prefix):]
90 l = key_name.split(os.sep) 103 l = key_name.split(os.sep)
91 return '/'.join(l) 104 return '/'.join(l)
92 105
106 def _upload_part(bucketname, aws_key, aws_secret, multipart_id, part_num,
107 source_path, offset, bytes, debug, cb, num_cb, amount_of_retries=10):
108 if debug == 1:
109 print "_upload_part(%s, %s, %s)" % (source_path, offset, bytes)
110 """
111 Uploads a part with retries.
112 """
113 def _upload(retries_left=amount_of_retries):
114 try:
115 if debug == 1:
116 print 'Start uploading part #%d ...' % part_num
117 conn = S3Connection(aws_key, aws_secret)
118 conn.debug = debug
119 bucket = conn.get_bucket(bucketname)
120 for mp in bucket.get_all_multipart_uploads():
121 if mp.id == multipart_id:
122 with FileChunkIO(source_path, 'r', offset=offset,
123 bytes=bytes) as fp:
124 mp.upload_part_from_file(fp=fp, part_num=part_num, cb=cb , num_cb=num_cb)
125 break
126 except Exception, exc:
127 if retries_left:
128 _upload(retries_left=retries_left - 1)
129 else:
130 print 'Failed uploading part #%d' % part_num
131 raise exc
132 else:
133 if debug == 1:
134 print '... Uploaded part #%d' % part_num
135
136 _upload()
137
138 def upload(bucketname, aws_key, aws_secret, source_path, keyname,
139 reduced, debug, cb, num_cb,
140 acl='private', headers={}, guess_mimetype=True, parallel_processes=4):
141 """
142 Parallel multipart upload.
143 """
144 conn = S3Connection(aws_key, aws_secret)
145 conn.debug = debug
146 bucket = conn.get_bucket(bucketname)
147
148 if guess_mimetype:
149 mtype = mimetypes.guess_type(keyname)[0] or 'application/octet-stream'
150 headers.update({'Content-Type': mtype})
151
152 mp = bucket.initiate_multipart_upload(keyname, headers=headers, reduced_redu ndancy=reduced)
153
154 source_size = os.stat(source_path).st_size
155 bytes_per_chunk = max(int(math.sqrt(5242880) * math.sqrt(source_size)),
156 5242880)
157 chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk)))
158
159 pool = Pool(processes=parallel_processes)
160 for i in range(chunk_amount):
161 offset = i * bytes_per_chunk
162 remaining_bytes = source_size - offset
163 bytes = min([bytes_per_chunk, remaining_bytes])
164 part_num = i + 1
165 pool.apply_async(_upload_part, [bucketname, aws_key, aws_secret, mp.id,
166 part_num, source_path, offset, bytes, debug, cb, num_cb])
167 pool.close()
168 pool.join()
169
170 if len(mp.get_all_parts()) == chunk_amount:
171 mp.complete_upload()
172 key = bucket.get_key(keyname)
173 key.set_acl(acl)
174 else:
175 mp.cancel_upload()
176
177
93 def main(): 178 def main():
179
180 # default values
181 aws_access_key_id = None
182 aws_secret_access_key = None
183 bucket_name = ''
184 ignore_dirs = []
185 total = 0
186 debug = 0
187 cb = None
188 num_cb = 0
189 quiet = False
190 no_op = False
191 prefix = '/'
192 grant = None
193 no_overwrite = False
194 reduced = False
195
94 try: 196 try:
95 opts, args = getopt.getopt(sys.argv[1:], 'a:b:c::d:g:hi:np:qs:vw', 197 opts, args = getopt.getopt(sys.argv[1:], 'a:b:c::d:g:hi:np:qs:wr',
96 ['access_key', 'bucket', 'callback', 'debug', 'help', 'grant', 198 ['access_key', 'bucket', 'callback', 'debug', 'help', 'grant',
97 'ignore', 'no_op', 'prefix', 'quiet', 'secre t_key', 'no_overwrite']) 199 'ignore', 'no_op', 'prefix', 'quiet', 'secre t_key', 'no_overwrite',
200 'reduced'])
98 except: 201 except:
99 usage() 202 usage()
100 ignore_dirs = [] 203
101 aws_access_key_id = None 204 # parse opts
102 aws_secret_access_key = None
103 bucket_name = ''
104 total = 0
105 debug = 0
106 cb = None
107 num_cb = 0
108 quiet = False
109 no_op = False
110 prefix = '/'
111 grant = None
112 no_overwrite = False
113 for o, a in opts: 205 for o, a in opts:
114 if o in ('-h', '--help'): 206 if o in ('-h', '--help'):
115 usage() 207 usage()
116 sys.exit()
117 if o in ('-a', '--access_key'): 208 if o in ('-a', '--access_key'):
118 aws_access_key_id = a 209 aws_access_key_id = a
119 if o in ('-b', '--bucket'): 210 if o in ('-b', '--bucket'):
120 bucket_name = a 211 bucket_name = a
121 if o in ('-c', '--callback'): 212 if o in ('-c', '--callback'):
122 num_cb = int(a) 213 num_cb = int(a)
123 cb = submit_cb 214 cb = submit_cb
124 if o in ('-d', '--debug'): 215 if o in ('-d', '--debug'):
125 debug = int(a) 216 debug = int(a)
126 if o in ('-g', '--grant'): 217 if o in ('-g', '--grant'):
127 grant = a 218 grant = a
128 if o in ('-i', '--ignore'): 219 if o in ('-i', '--ignore'):
129 ignore_dirs = a.split(',') 220 ignore_dirs = a.split(',')
130 if o in ('-n', '--no_op'): 221 if o in ('-n', '--no_op'):
131 no_op = True 222 no_op = True
132 if o in ('w', '--no_overwrite'): 223 if o in ('w', '--no_overwrite'):
133 no_overwrite = True 224 no_overwrite = True
134 if o in ('-p', '--prefix'): 225 if o in ('-p', '--prefix'):
135 prefix = a 226 prefix = a
136 if prefix[-1] != os.sep: 227 if prefix[-1] != os.sep:
137 prefix = prefix + os.sep 228 prefix = prefix + os.sep
138 if o in ('-q', '--quiet'): 229 if o in ('-q', '--quiet'):
139 quiet = True 230 quiet = True
140 if o in ('-s', '--secret_key'): 231 if o in ('-s', '--secret_key'):
141 aws_secret_access_key = a 232 aws_secret_access_key = a
233 if o in ('-r', '--reduced'):
234 reduced = True
235
142 if len(args) != 1: 236 if len(args) != 1:
143 print usage() 237 usage()
238
239
144 path = os.path.expanduser(args[0]) 240 path = os.path.expanduser(args[0])
145 path = os.path.expandvars(path) 241 path = os.path.expandvars(path)
146 path = os.path.abspath(path) 242 path = os.path.abspath(path)
147 if bucket_name: 243
148 c = boto.connect_s3(aws_access_key_id=aws_access_key_id, 244 if not bucket_name:
149 aws_secret_access_key=aws_secret_access_key) 245 print "bucket name is required!"
150 c.debug = debug 246 usage()
151 b = c.get_bucket(bucket_name) 247
152 if os.path.isdir(path): 248 c = boto.connect_s3(aws_access_key_id=aws_access_key_id,
153 if no_overwrite: 249 aws_secret_access_key=aws_secret_access_key)
250 c.debug = debug
251 b = c.get_bucket(bucket_name)
252
253 # upload a directory of files recursively
254 if os.path.isdir(path):
255 if no_overwrite:
256 if not quiet:
257 print 'Getting list of existing keys to check against'
258 keys = []
259 for key in b.list():
260 keys.append(key.name)
261 for root, dirs, files in os.walk(path):
262 for ignore in ignore_dirs:
263 if ignore in dirs:
264 dirs.remove(ignore)
265 for file in files:
266 fullpath = os.path.join(root, file)
267 key_name = get_key_name(fullpath, prefix)
268 copy_file = True
269 if no_overwrite:
270 if key_name in keys:
271 copy_file = False
272 if not quiet:
273 print 'Skipping %s as it exists in s3' % file
274
275 if copy_file:
276 if not quiet:
277 print 'Copying %s to %s/%s' % (file, bucket_name, key_na me)
278
279 if not no_op:
280 if os.stat(fullpath).st_size == 0:
281 # 0-byte files don't work and also don't need multip art upload
282 k = b.new_key(key_name)
283 k.set_contents_from_filename(fullpath, cb=cb, num_cb =num_cb,
284 policy=grant, reduced_r edundancy=reduced)
285 else:
286 upload(bucket_name, aws_access_key_id,
287 aws_secret_access_key, fullpath, key_name,
288 reduced, debug, cb, num_cb)
289 total += 1
290
291 # upload a single file
292 elif os.path.isfile(path):
293 key_name = get_key_name(os.path.abspath(path), prefix)
294 copy_file = True
295 if no_overwrite:
296 if b.get_key(key_name):
297 copy_file = False
154 if not quiet: 298 if not quiet:
155 print 'Getting list of existing keys to check against' 299 print 'Skipping %s as it exists in s3' % path
156 keys = [] 300
157 for key in b.list(): 301 if copy_file:
158 keys.append(key.name) 302 if not quiet:
159 for root, dirs, files in os.walk(path): 303 print 'Copying %s to %s/%s' % (path, bucket_name, key_name)
160 for ignore in ignore_dirs: 304
161 if ignore in dirs: 305 if not no_op:
162 dirs.remove(ignore) 306 if os.stat(path).st_size == 0:
163 for file in files: 307 # 0-byte files don't work and also don't need multipart uplo ad
164 fullpath = os.path.join(root, file) 308 k = b.new_key(key_name)
165 key_name = get_key_name(fullpath, prefix) 309 k.set_contents_from_filename(path, cb=cb, num_cb=num_cb, pol icy=grant,
166 copy_file = True 310 reduced_redundancy=reduced)
167 if no_overwrite: 311 else:
168 if key_name in keys: 312 upload(bucket_name, aws_access_key_id,
169 copy_file = False 313 aws_secret_access_key, path, key_name,
170 if not quiet: 314 reduced, debug, cb, num_cb)
171 print 'Skipping %s as it exists in s3' % file
172 if copy_file:
173 if not quiet:
174 print 'Copying %s to %s/%s' % (file, bucket_name, ke y_name)
175 if not no_op:
176 k = b.new_key(key_name)
177 k.set_contents_from_filename(fullpath, cb=cb,
178 num_cb=num_cb, polic y=grant)
179 total += 1
180 elif os.path.isfile(path):
181 key_name = os.path.split(path)[1]
182 copy_file = True
183 if no_overwrite:
184 if b.get_key(key_name):
185 copy_file = False
186 if not quiet:
187 print 'Skipping %s as it exists in s3' % path
188 if copy_file:
189 k = b.new_key(key_name)
190 k.set_contents_from_filename(path, cb=cb, num_cb=num_cb, policy= grant)
191 else:
192 print usage()
193 315
194 if __name__ == "__main__": 316 if __name__ == "__main__":
195 main() 317 main()
196
OLDNEW
« no previous file with comments | « bin/route53 ('k') | bin/s3put » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698