OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2006,2007,2008 Mitch Garnaat http://garnaat.org/ | 2 # Copyright (c) 2006,2007,2008 Mitch Garnaat http://garnaat.org/ |
3 # | 3 # |
4 # Permission is hereby granted, free of charge, to any person obtaining a | 4 # Permission is hereby granted, free of charge, to any person obtaining a |
5 # copy of this software and associated documentation files (the | 5 # copy of this software and associated documentation files (the |
6 # "Software"), to deal in the Software without restriction, including | 6 # "Software"), to deal in the Software without restriction, including |
7 # without limitation the rights to use, copy, modify, merge, publish, dis- | 7 # without limitation the rights to use, copy, modify, merge, publish, dis- |
8 # tribute, sublicense, and/or sell copies of the Software, and to permit | 8 # tribute, sublicense, and/or sell copies of the Software, and to permit |
9 # persons to whom the Software is furnished to do so, subject to the fol- | 9 # persons to whom the Software is furnished to do so, subject to the fol- |
10 # lowing conditions: | 10 # lowing conditions: |
11 # | 11 # |
12 # The above copyright notice and this permission notice shall be included | 12 # The above copyright notice and this permission notice shall be included |
13 # in all copies or substantial portions of the Software. | 13 # in all copies or substantial portions of the Software. |
14 # | 14 # |
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | 15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | 16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | 17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | 18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | 20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
21 # IN THE SOFTWARE. | 21 # IN THE SOFTWARE. |
22 # | 22 # |
| 23 |
| 24 # multipart portions copyright Fabian Topfstedt |
| 25 # https://gist.github.com/924094 |
| 26 |
| 27 |
| 28 import math |
| 29 import mimetypes |
| 30 from multiprocessing import Pool |
23 import getopt, sys, os | 31 import getopt, sys, os |
| 32 |
24 import boto | 33 import boto |
25 from boto.exception import S3ResponseError | 34 from boto.exception import S3ResponseError |
26 | 35 |
| 36 from boto.s3.connection import S3Connection |
| 37 from filechunkio import FileChunkIO |
| 38 |
27 usage_string = """ | 39 usage_string = """ |
28 SYNOPSIS | 40 SYNOPSIS |
29 s3put [-a/--access_key <access_key>] [-s/--secret_key <secret_key>] | 41 s3put [-a/--access_key <access_key>] [-s/--secret_key <secret_key>] |
30 -b/--bucket <bucket_name> [-c/--callback <num_cb>] | 42 -b/--bucket <bucket_name> [-c/--callback <num_cb>] |
31 [-d/--debug <debug_level>] [-i/--ignore <ignore_dirs>] | 43 [-d/--debug <debug_level>] [-i/--ignore <ignore_dirs>] |
32 [-n/--no_op] [-p/--prefix <prefix>] [-q/--quiet] | 44 [-n/--no_op] [-p/--prefix <prefix>] [-q/--quiet] |
33 [-g/--grant grant] [-w/--no_overwrite] path | 45 [-g/--grant grant] [-w/--no_overwrite] [-r/--reduced] path |
34 | 46 |
35 Where | 47 Where |
36 access_key - Your AWS Access Key ID. If not supplied, boto will | 48 access_key - Your AWS Access Key ID. If not supplied, boto will |
37 use the value of the environment variable | 49 use the value of the environment variable |
38 AWS_ACCESS_KEY_ID | 50 AWS_ACCESS_KEY_ID |
39 secret_key - Your AWS Secret Access Key. If not supplied, boto | 51 secret_key - Your AWS Secret Access Key. If not supplied, boto |
40 will use the value of the environment variable | 52 will use the value of the environment variable |
41 AWS_SECRET_ACCESS_KEY | 53 AWS_SECRET_ACCESS_KEY |
42 bucket_name - The name of the S3 bucket the file(s) should be | 54 bucket_name - The name of the S3 bucket the file(s) should be |
43 copied to. | 55 copied to. |
(...skipping 13 matching lines...) Expand all Loading... |
57 called 10 times for each file transferred. | 69 called 10 times for each file transferred. |
58 prefix - A file path prefix that will be stripped from the full | 70 prefix - A file path prefix that will be stripped from the full |
59 path of the file when determining the key name in S3. | 71 path of the file when determining the key name in S3. |
60 For example, if the full path of a file is: | 72 For example, if the full path of a file is: |
61 /home/foo/bar/fie.baz | 73 /home/foo/bar/fie.baz |
62 and the prefix is specified as "-p /home/foo/" the | 74 and the prefix is specified as "-p /home/foo/" the |
63 resulting key name in S3 will be: | 75 resulting key name in S3 will be: |
64 /bar/fie.baz | 76 /bar/fie.baz |
65 The prefix must end in a trailing separator and if it | 77 The prefix must end in a trailing separator and if it |
66 does not then one will be added. | 78 does not then one will be added. |
| 79 reduced - Use Reduced Redundancy storage |
67 grant - A canned ACL policy that will be granted on each file | 80 grant - A canned ACL policy that will be granted on each file |
68 transferred to S3. The value of provided must be one | 81 transferred to S3. The value of provided must be one |
69 of the "canned" ACL policies supported by S3: | 82 of the "canned" ACL policies supported by S3: |
70 private|public-read|public-read-write|authenticated-read | 83 private|public-read|public-read-write|authenticated-read |
71 no_overwrite - No files will be overwritten on S3, if the file/key | 84 no_overwrite - No files will be overwritten on S3, if the file/key |
72 exists on s3 it will be kept. This is useful for | 85 exists on s3 it will be kept. This is useful for |
73 resuming interrupted transfers. Note this is not a | 86 resuming interrupted transfers. Note this is not a |
74 sync, even if the file has been updated locally if | 87 sync, even if the file has been updated locally if |
75 the key exists on s3 the file on s3 will not be | 88 the key exists on s3 the file on s3 will not be |
76 updated. | 89 updated. |
77 | 90 |
78 If the -n option is provided, no files will be transferred to S3 but | 91 If the -n option is provided, no files will be transferred to S3 but |
79 informational messages will be printed about what would happen. | 92 informational messages will be printed about what would happen. |
80 """ | 93 """ |
81 def usage(): | 94 def usage(): |
82 print usage_string | 95 print usage_string |
83 sys.exit() | 96 sys.exit() |
84 | 97 |
85 def submit_cb(bytes_so_far, total_bytes): | 98 def submit_cb(bytes_so_far, total_bytes): |
86 print '%d bytes transferred / %d bytes total' % (bytes_so_far, total_bytes) | 99 print '%d bytes transferred / %d bytes total' % (bytes_so_far, total_bytes) |
87 | 100 |
88 def get_key_name(fullpath, prefix): | 101 def get_key_name(fullpath, prefix): |
89 key_name = fullpath[len(prefix):] | 102 key_name = fullpath[len(prefix):] |
90 l = key_name.split(os.sep) | 103 l = key_name.split(os.sep) |
91 return '/'.join(l) | 104 return '/'.join(l) |
92 | 105 |
| 106 def _upload_part(bucketname, aws_key, aws_secret, multipart_id, part_num, |
| 107 source_path, offset, bytes, debug, cb, num_cb, amount_of_retries=10): |
| 108 if debug == 1: |
| 109 print "_upload_part(%s, %s, %s)" % (source_path, offset, bytes) |
| 110 """ |
| 111 Uploads a part with retries. |
| 112 """ |
| 113 def _upload(retries_left=amount_of_retries): |
| 114 try: |
| 115 if debug == 1: |
| 116 print 'Start uploading part #%d ...' % part_num |
| 117 conn = S3Connection(aws_key, aws_secret) |
| 118 conn.debug = debug |
| 119 bucket = conn.get_bucket(bucketname) |
| 120 for mp in bucket.get_all_multipart_uploads(): |
| 121 if mp.id == multipart_id: |
| 122 with FileChunkIO(source_path, 'r', offset=offset, |
| 123 bytes=bytes) as fp: |
| 124 mp.upload_part_from_file(fp=fp, part_num=part_num, cb=cb
, num_cb=num_cb) |
| 125 break |
| 126 except Exception, exc: |
| 127 if retries_left: |
| 128 _upload(retries_left=retries_left - 1) |
| 129 else: |
| 130 print 'Failed uploading part #%d' % part_num |
| 131 raise exc |
| 132 else: |
| 133 if debug == 1: |
| 134 print '... Uploaded part #%d' % part_num |
| 135 |
| 136 _upload() |
| 137 |
| 138 def upload(bucketname, aws_key, aws_secret, source_path, keyname, |
| 139 reduced, debug, cb, num_cb, |
| 140 acl='private', headers={}, guess_mimetype=True, parallel_processes=4): |
| 141 """ |
| 142 Parallel multipart upload. |
| 143 """ |
| 144 conn = S3Connection(aws_key, aws_secret) |
| 145 conn.debug = debug |
| 146 bucket = conn.get_bucket(bucketname) |
| 147 |
| 148 if guess_mimetype: |
| 149 mtype = mimetypes.guess_type(keyname)[0] or 'application/octet-stream' |
| 150 headers.update({'Content-Type': mtype}) |
| 151 |
| 152 mp = bucket.initiate_multipart_upload(keyname, headers=headers, reduced_redu
ndancy=reduced) |
| 153 |
| 154 source_size = os.stat(source_path).st_size |
| 155 bytes_per_chunk = max(int(math.sqrt(5242880) * math.sqrt(source_size)), |
| 156 5242880) |
| 157 chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk))) |
| 158 |
| 159 pool = Pool(processes=parallel_processes) |
| 160 for i in range(chunk_amount): |
| 161 offset = i * bytes_per_chunk |
| 162 remaining_bytes = source_size - offset |
| 163 bytes = min([bytes_per_chunk, remaining_bytes]) |
| 164 part_num = i + 1 |
| 165 pool.apply_async(_upload_part, [bucketname, aws_key, aws_secret, mp.id, |
| 166 part_num, source_path, offset, bytes, debug, cb, num_cb]) |
| 167 pool.close() |
| 168 pool.join() |
| 169 |
| 170 if len(mp.get_all_parts()) == chunk_amount: |
| 171 mp.complete_upload() |
| 172 key = bucket.get_key(keyname) |
| 173 key.set_acl(acl) |
| 174 else: |
| 175 mp.cancel_upload() |
| 176 |
| 177 |
93 def main(): | 178 def main(): |
| 179 |
| 180 # default values |
| 181 aws_access_key_id = None |
| 182 aws_secret_access_key = None |
| 183 bucket_name = '' |
| 184 ignore_dirs = [] |
| 185 total = 0 |
| 186 debug = 0 |
| 187 cb = None |
| 188 num_cb = 0 |
| 189 quiet = False |
| 190 no_op = False |
| 191 prefix = '/' |
| 192 grant = None |
| 193 no_overwrite = False |
| 194 reduced = False |
| 195 |
94 try: | 196 try: |
95 opts, args = getopt.getopt(sys.argv[1:], 'a:b:c::d:g:hi:np:qs:vw', | 197 opts, args = getopt.getopt(sys.argv[1:], 'a:b:c::d:g:hi:np:qs:wr', |
96 ['access_key', 'bucket', 'callback', 'debug',
'help', 'grant', | 198 ['access_key', 'bucket', 'callback', 'debug',
'help', 'grant', |
97 'ignore', 'no_op', 'prefix', 'quiet', 'secre
t_key', 'no_overwrite']) | 199 'ignore', 'no_op', 'prefix', 'quiet', 'secre
t_key', 'no_overwrite', |
| 200 'reduced']) |
98 except: | 201 except: |
99 usage() | 202 usage() |
100 ignore_dirs = [] | 203 |
101 aws_access_key_id = None | 204 # parse opts |
102 aws_secret_access_key = None | |
103 bucket_name = '' | |
104 total = 0 | |
105 debug = 0 | |
106 cb = None | |
107 num_cb = 0 | |
108 quiet = False | |
109 no_op = False | |
110 prefix = '/' | |
111 grant = None | |
112 no_overwrite = False | |
113 for o, a in opts: | 205 for o, a in opts: |
114 if o in ('-h', '--help'): | 206 if o in ('-h', '--help'): |
115 usage() | 207 usage() |
116 sys.exit() | |
117 if o in ('-a', '--access_key'): | 208 if o in ('-a', '--access_key'): |
118 aws_access_key_id = a | 209 aws_access_key_id = a |
119 if o in ('-b', '--bucket'): | 210 if o in ('-b', '--bucket'): |
120 bucket_name = a | 211 bucket_name = a |
121 if o in ('-c', '--callback'): | 212 if o in ('-c', '--callback'): |
122 num_cb = int(a) | 213 num_cb = int(a) |
123 cb = submit_cb | 214 cb = submit_cb |
124 if o in ('-d', '--debug'): | 215 if o in ('-d', '--debug'): |
125 debug = int(a) | 216 debug = int(a) |
126 if o in ('-g', '--grant'): | 217 if o in ('-g', '--grant'): |
127 grant = a | 218 grant = a |
128 if o in ('-i', '--ignore'): | 219 if o in ('-i', '--ignore'): |
129 ignore_dirs = a.split(',') | 220 ignore_dirs = a.split(',') |
130 if o in ('-n', '--no_op'): | 221 if o in ('-n', '--no_op'): |
131 no_op = True | 222 no_op = True |
132 if o in ('w', '--no_overwrite'): | 223 if o in ('w', '--no_overwrite'): |
133 no_overwrite = True | 224 no_overwrite = True |
134 if o in ('-p', '--prefix'): | 225 if o in ('-p', '--prefix'): |
135 prefix = a | 226 prefix = a |
136 if prefix[-1] != os.sep: | 227 if prefix[-1] != os.sep: |
137 prefix = prefix + os.sep | 228 prefix = prefix + os.sep |
138 if o in ('-q', '--quiet'): | 229 if o in ('-q', '--quiet'): |
139 quiet = True | 230 quiet = True |
140 if o in ('-s', '--secret_key'): | 231 if o in ('-s', '--secret_key'): |
141 aws_secret_access_key = a | 232 aws_secret_access_key = a |
| 233 if o in ('-r', '--reduced'): |
| 234 reduced = True |
| 235 |
142 if len(args) != 1: | 236 if len(args) != 1: |
143 print usage() | 237 usage() |
| 238 |
| 239 |
144 path = os.path.expanduser(args[0]) | 240 path = os.path.expanduser(args[0]) |
145 path = os.path.expandvars(path) | 241 path = os.path.expandvars(path) |
146 path = os.path.abspath(path) | 242 path = os.path.abspath(path) |
147 if bucket_name: | 243 |
148 c = boto.connect_s3(aws_access_key_id=aws_access_key_id, | 244 if not bucket_name: |
149 aws_secret_access_key=aws_secret_access_key) | 245 print "bucket name is required!" |
150 c.debug = debug | 246 usage() |
151 b = c.get_bucket(bucket_name) | 247 |
152 if os.path.isdir(path): | 248 c = boto.connect_s3(aws_access_key_id=aws_access_key_id, |
153 if no_overwrite: | 249 aws_secret_access_key=aws_secret_access_key) |
| 250 c.debug = debug |
| 251 b = c.get_bucket(bucket_name) |
| 252 |
| 253 # upload a directory of files recursively |
| 254 if os.path.isdir(path): |
| 255 if no_overwrite: |
| 256 if not quiet: |
| 257 print 'Getting list of existing keys to check against' |
| 258 keys = [] |
| 259 for key in b.list(): |
| 260 keys.append(key.name) |
| 261 for root, dirs, files in os.walk(path): |
| 262 for ignore in ignore_dirs: |
| 263 if ignore in dirs: |
| 264 dirs.remove(ignore) |
| 265 for file in files: |
| 266 fullpath = os.path.join(root, file) |
| 267 key_name = get_key_name(fullpath, prefix) |
| 268 copy_file = True |
| 269 if no_overwrite: |
| 270 if key_name in keys: |
| 271 copy_file = False |
| 272 if not quiet: |
| 273 print 'Skipping %s as it exists in s3' % file |
| 274 |
| 275 if copy_file: |
| 276 if not quiet: |
| 277 print 'Copying %s to %s/%s' % (file, bucket_name, key_na
me) |
| 278 |
| 279 if not no_op: |
| 280 if os.stat(fullpath).st_size == 0: |
| 281 # 0-byte files don't work and also don't need multip
art upload |
| 282 k = b.new_key(key_name) |
| 283 k.set_contents_from_filename(fullpath, cb=cb, num_cb
=num_cb, |
| 284 policy=grant, reduced_r
edundancy=reduced) |
| 285 else: |
| 286 upload(bucket_name, aws_access_key_id, |
| 287 aws_secret_access_key, fullpath, key_name, |
| 288 reduced, debug, cb, num_cb) |
| 289 total += 1 |
| 290 |
| 291 # upload a single file |
| 292 elif os.path.isfile(path): |
| 293 key_name = get_key_name(os.path.abspath(path), prefix) |
| 294 copy_file = True |
| 295 if no_overwrite: |
| 296 if b.get_key(key_name): |
| 297 copy_file = False |
154 if not quiet: | 298 if not quiet: |
155 print 'Getting list of existing keys to check against' | 299 print 'Skipping %s as it exists in s3' % path |
156 keys = [] | 300 |
157 for key in b.list(): | 301 if copy_file: |
158 keys.append(key.name) | 302 if not quiet: |
159 for root, dirs, files in os.walk(path): | 303 print 'Copying %s to %s/%s' % (path, bucket_name, key_name) |
160 for ignore in ignore_dirs: | 304 |
161 if ignore in dirs: | 305 if not no_op: |
162 dirs.remove(ignore) | 306 if os.stat(path).st_size == 0: |
163 for file in files: | 307 # 0-byte files don't work and also don't need multipart uplo
ad |
164 fullpath = os.path.join(root, file) | 308 k = b.new_key(key_name) |
165 key_name = get_key_name(fullpath, prefix) | 309 k.set_contents_from_filename(path, cb=cb, num_cb=num_cb, pol
icy=grant, |
166 copy_file = True | 310 reduced_redundancy=reduced) |
167 if no_overwrite: | 311 else: |
168 if key_name in keys: | 312 upload(bucket_name, aws_access_key_id, |
169 copy_file = False | 313 aws_secret_access_key, path, key_name, |
170 if not quiet: | 314 reduced, debug, cb, num_cb) |
171 print 'Skipping %s as it exists in s3' % file | |
172 if copy_file: | |
173 if not quiet: | |
174 print 'Copying %s to %s/%s' % (file, bucket_name, ke
y_name) | |
175 if not no_op: | |
176 k = b.new_key(key_name) | |
177 k.set_contents_from_filename(fullpath, cb=cb, | |
178 num_cb=num_cb, polic
y=grant) | |
179 total += 1 | |
180 elif os.path.isfile(path): | |
181 key_name = os.path.split(path)[1] | |
182 copy_file = True | |
183 if no_overwrite: | |
184 if b.get_key(key_name): | |
185 copy_file = False | |
186 if not quiet: | |
187 print 'Skipping %s as it exists in s3' % path | |
188 if copy_file: | |
189 k = b.new_key(key_name) | |
190 k.set_contents_from_filename(path, cb=cb, num_cb=num_cb, policy=
grant) | |
191 else: | |
192 print usage() | |
193 | 315 |
194 if __name__ == "__main__": | 316 if __name__ == "__main__": |
195 main() | 317 main() |
196 | |
OLD | NEW |