OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2006,2007,2008 Mitch Garnaat http://garnaat.org/ |
| 3 # |
| 4 # Permission is hereby granted, free of charge, to any person obtaining a |
| 5 # copy of this software and associated documentation files (the |
| 6 # "Software"), to deal in the Software without restriction, including |
| 7 # without limitation the rights to use, copy, modify, merge, publish, dis- |
| 8 # tribute, sublicense, and/or sell copies of the Software, and to permit |
| 9 # persons to whom the Software is furnished to do so, subject to the fol- |
| 10 # lowing conditions: |
| 11 # |
| 12 # The above copyright notice and this permission notice shall be included |
| 13 # in all copies or substantial portions of the Software. |
| 14 # |
| 15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
| 17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
| 18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 21 # IN THE SOFTWARE. |
| 22 # |
| 23 import getopt |
| 24 import sys |
| 25 import os |
| 26 import boto |
| 27 |
| 28 from boto.compat import six |
| 29 |
| 30 try: |
| 31 # multipart portions copyright Fabian Topfstedt |
| 32 # https://gist.github.com/924094 |
| 33 |
| 34 import math |
| 35 import mimetypes |
| 36 from multiprocessing import Pool |
| 37 from boto.s3.connection import S3Connection |
| 38 from filechunkio import FileChunkIO |
| 39 multipart_capable = True |
| 40 usage_flag_multipart_capable = """ [--multipart]""" |
| 41 usage_string_multipart_capable = """ |
| 42 multipart - Upload files as multiple parts. This needs filechunkio. |
| 43 Requires ListBucket, ListMultipartUploadParts, |
| 44 ListBucketMultipartUploads and PutObject permissions.""" |
| 45 except ImportError as err: |
| 46 multipart_capable = False |
| 47 usage_flag_multipart_capable = "" |
| 48 if six.PY2: |
| 49 attribute = 'message' |
| 50 else: |
| 51 attribute = 'msg' |
| 52 usage_string_multipart_capable = '\n\n "' + \ |
| 53 getattr(err, attribute)[len('No module named '):] + \ |
| 54 '" is missing for multipart support ' |
| 55 |
| 56 |
| 57 DEFAULT_REGION = 'us-east-1' |
| 58 |
| 59 usage_string = """ |
| 60 SYNOPSIS |
| 61 s3put [-a/--access_key <access_key>] [-s/--secret_key <secret_key>] |
| 62 -b/--bucket <bucket_name> [-c/--callback <num_cb>] |
| 63 [-d/--debug <debug_level>] [-i/--ignore <ignore_dirs>] |
| 64 [-n/--no_op] [-p/--prefix <prefix>] [-k/--key_prefix <key_prefix>] |
| 65 [-q/--quiet] [-g/--grant grant] [-w/--no_overwrite] [-r/--reduced] |
| 66 [--header] [--region <name>] [--host <s3_host>]""" + \ |
| 67 usage_flag_multipart_capable + """ path [path...] |
| 68 |
| 69 Where |
| 70 access_key - Your AWS Access Key ID. If not supplied, boto will |
| 71 use the value of the environment variable |
| 72 AWS_ACCESS_KEY_ID |
| 73 secret_key - Your AWS Secret Access Key. If not supplied, boto |
| 74 will use the value of the environment variable |
| 75 AWS_SECRET_ACCESS_KEY |
| 76 bucket_name - The name of the S3 bucket the file(s) should be |
| 77 copied to. |
| 78 path - A path to a directory or file that represents the items |
| 79 to be uploaded. If the path points to an individual file, |
| 80 that file will be uploaded to the specified bucket. If the |
| 81 path points to a directory, it will recursively traverse |
| 82 the directory and upload all files to the specified bucket. |
| 83 debug_level - 0 means no debug output (default), 1 means normal |
| 84 debug output from boto, and 2 means boto debug output |
| 85 plus request/response output from httplib |
| 86 ignore_dirs - a comma-separated list of directory names that will |
| 87 be ignored and not uploaded to S3. |
| 88 num_cb - The number of progress callbacks to display. The default |
| 89 is zero which means no callbacks. If you supplied a value |
| 90 of "-c 10" for example, the progress callback would be |
| 91 called 10 times for each file transferred. |
| 92 prefix - A file path prefix that will be stripped from the full |
| 93 path of the file when determining the key name in S3. |
| 94 For example, if the full path of a file is: |
| 95 /home/foo/bar/fie.baz |
| 96 and the prefix is specified as "-p /home/foo/" the |
| 97 resulting key name in S3 will be: |
| 98 /bar/fie.baz |
| 99 The prefix must end in a trailing separator and if it |
| 100 does not then one will be added. |
| 101 key_prefix - A prefix to be added to the S3 key name, after any |
| 102 stripping of the file path is done based on the |
| 103 "-p/--prefix" option. |
| 104 reduced - Use Reduced Redundancy storage |
| 105 grant - A canned ACL policy that will be granted on each file |
| 106 transferred to S3. The value of provided must be one |
| 107 of the "canned" ACL policies supported by S3: |
| 108 private|public-read|public-read-write|authenticated-read |
| 109 no_overwrite - No files will be overwritten on S3, if the file/key |
| 110 exists on s3 it will be kept. This is useful for |
| 111 resuming interrupted transfers. Note this is not a |
| 112 sync, even if the file has been updated locally if |
| 113 the key exists on s3 the file on s3 will not be |
| 114 updated. |
| 115 header - key=value pairs of extra header(s) to pass along in the |
| 116 request |
| 117 region - Manually set a region for buckets that are not in the US |
| 118 classic region. Normally the region is autodetected, but |
| 119 setting this yourself is more efficient. |
| 120 host - Hostname override, for using an endpoint other then AWS S3 |
| 121 """ + usage_string_multipart_capable + """ |
| 122 |
| 123 |
| 124 If the -n option is provided, no files will be transferred to S3 but |
| 125 informational messages will be printed about what would happen. |
| 126 """ |
| 127 |
| 128 |
| 129 def usage(status=1): |
| 130 print(usage_string) |
| 131 sys.exit(status) |
| 132 |
| 133 |
| 134 def submit_cb(bytes_so_far, total_bytes): |
| 135 print('%d bytes transferred / %d bytes total' % (bytes_so_far, total_bytes)) |
| 136 |
| 137 |
| 138 def get_key_name(fullpath, prefix, key_prefix): |
| 139 if fullpath.startswith(prefix): |
| 140 key_name = fullpath[len(prefix):] |
| 141 else: |
| 142 key_name = fullpath |
| 143 l = key_name.split(os.sep) |
| 144 return key_prefix + '/'.join(l) |
| 145 |
| 146 |
| 147 def _upload_part(bucketname, aws_key, aws_secret, multipart_id, part_num, |
| 148 source_path, offset, bytes, debug, cb, num_cb, |
| 149 amount_of_retries=10): |
| 150 """ |
| 151 Uploads a part with retries. |
| 152 """ |
| 153 if debug == 1: |
| 154 print("_upload_part(%s, %s, %s)" % (source_path, offset, bytes)) |
| 155 |
| 156 def _upload(retries_left=amount_of_retries): |
| 157 try: |
| 158 if debug == 1: |
| 159 print('Start uploading part #%d ...' % part_num) |
| 160 conn = S3Connection(aws_key, aws_secret) |
| 161 conn.debug = debug |
| 162 bucket = conn.get_bucket(bucketname) |
| 163 for mp in bucket.get_all_multipart_uploads(): |
| 164 if mp.id == multipart_id: |
| 165 with FileChunkIO(source_path, 'r', offset=offset, |
| 166 bytes=bytes) as fp: |
| 167 mp.upload_part_from_file(fp=fp, part_num=part_num, |
| 168 cb=cb, num_cb=num_cb) |
| 169 break |
| 170 except Exception as exc: |
| 171 if retries_left: |
| 172 _upload(retries_left=retries_left - 1) |
| 173 else: |
| 174 print('Failed uploading part #%d' % part_num) |
| 175 raise exc |
| 176 else: |
| 177 if debug == 1: |
| 178 print('... Uploaded part #%d' % part_num) |
| 179 |
| 180 _upload() |
| 181 |
| 182 def check_valid_region(conn, region): |
| 183 if conn is None: |
| 184 print('Invalid region (%s)' % region) |
| 185 sys.exit(1) |
| 186 |
| 187 def multipart_upload(bucketname, aws_key, aws_secret, source_path, keyname, |
| 188 reduced, debug, cb, num_cb, acl='private', headers={}, |
| 189 guess_mimetype=True, parallel_processes=4, |
| 190 region=DEFAULT_REGION): |
| 191 """ |
| 192 Parallel multipart upload. |
| 193 """ |
| 194 conn = boto.s3.connect_to_region(region, aws_access_key_id=aws_key, |
| 195 aws_secret_access_key=aws_secret) |
| 196 check_valid_region(conn, region) |
| 197 conn.debug = debug |
| 198 bucket = conn.get_bucket(bucketname) |
| 199 |
| 200 if guess_mimetype: |
| 201 mtype = mimetypes.guess_type(keyname)[0] or 'application/octet-stream' |
| 202 headers.update({'Content-Type': mtype}) |
| 203 |
| 204 mp = bucket.initiate_multipart_upload(keyname, headers=headers, |
| 205 reduced_redundancy=reduced) |
| 206 |
| 207 source_size = os.stat(source_path).st_size |
| 208 bytes_per_chunk = max(int(math.sqrt(5242880) * math.sqrt(source_size)), |
| 209 5242880) |
| 210 chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk))) |
| 211 |
| 212 pool = Pool(processes=parallel_processes) |
| 213 for i in range(chunk_amount): |
| 214 offset = i * bytes_per_chunk |
| 215 remaining_bytes = source_size - offset |
| 216 bytes = min([bytes_per_chunk, remaining_bytes]) |
| 217 part_num = i + 1 |
| 218 pool.apply_async(_upload_part, [bucketname, aws_key, aws_secret, mp.id, |
| 219 part_num, source_path, offset, bytes, |
| 220 debug, cb, num_cb]) |
| 221 pool.close() |
| 222 pool.join() |
| 223 |
| 224 if len(mp.get_all_parts()) == chunk_amount: |
| 225 mp.complete_upload() |
| 226 key = bucket.get_key(keyname) |
| 227 key.set_acl(acl) |
| 228 else: |
| 229 mp.cancel_upload() |
| 230 |
| 231 |
| 232 def singlepart_upload(bucket, key_name, fullpath, *kargs, **kwargs): |
| 233 """ |
| 234 Single upload. |
| 235 """ |
| 236 k = bucket.new_key(key_name) |
| 237 k.set_contents_from_filename(fullpath, *kargs, **kwargs) |
| 238 |
| 239 |
| 240 def expand_path(path): |
| 241 path = os.path.expanduser(path) |
| 242 path = os.path.expandvars(path) |
| 243 return os.path.abspath(path) |
| 244 |
| 245 |
| 246 def main(): |
| 247 |
| 248 # default values |
| 249 aws_access_key_id = None |
| 250 aws_secret_access_key = None |
| 251 bucket_name = '' |
| 252 ignore_dirs = [] |
| 253 debug = 0 |
| 254 cb = None |
| 255 num_cb = 0 |
| 256 quiet = False |
| 257 no_op = False |
| 258 prefix = '/' |
| 259 key_prefix = '' |
| 260 grant = None |
| 261 no_overwrite = False |
| 262 reduced = False |
| 263 headers = {} |
| 264 host = None |
| 265 multipart_requested = False |
| 266 region = None |
| 267 |
| 268 try: |
| 269 opts, args = getopt.getopt( |
| 270 sys.argv[1:], 'a:b:c::d:g:hi:k:np:qs:wr', |
| 271 ['access_key=', 'bucket=', 'callback=', 'debug=', 'help', 'grant=', |
| 272 'ignore=', 'key_prefix=', 'no_op', 'prefix=', 'quiet', |
| 273 'secret_key=', 'no_overwrite', 'reduced', 'header=', 'multipart', |
| 274 'host=', 'region=']) |
| 275 except: |
| 276 usage(1) |
| 277 |
| 278 # parse opts |
| 279 for o, a in opts: |
| 280 if o in ('-h', '--help'): |
| 281 usage(0) |
| 282 if o in ('-a', '--access_key'): |
| 283 aws_access_key_id = a |
| 284 if o in ('-b', '--bucket'): |
| 285 bucket_name = a |
| 286 if o in ('-c', '--callback'): |
| 287 num_cb = int(a) |
| 288 cb = submit_cb |
| 289 if o in ('-d', '--debug'): |
| 290 debug = int(a) |
| 291 if o in ('-g', '--grant'): |
| 292 grant = a |
| 293 if o in ('-i', '--ignore'): |
| 294 ignore_dirs = a.split(',') |
| 295 if o in ('-n', '--no_op'): |
| 296 no_op = True |
| 297 if o in ('-w', '--no_overwrite'): |
| 298 no_overwrite = True |
| 299 if o in ('-p', '--prefix'): |
| 300 prefix = a |
| 301 if prefix[-1] != os.sep: |
| 302 prefix = prefix + os.sep |
| 303 prefix = expand_path(prefix) |
| 304 if o in ('-k', '--key_prefix'): |
| 305 key_prefix = a |
| 306 if o in ('-q', '--quiet'): |
| 307 quiet = True |
| 308 if o in ('-s', '--secret_key'): |
| 309 aws_secret_access_key = a |
| 310 if o in ('-r', '--reduced'): |
| 311 reduced = True |
| 312 if o == '--header': |
| 313 (k, v) = a.split("=", 1) |
| 314 headers[k] = v |
| 315 if o == '--host': |
| 316 host = a |
| 317 if o == '--multipart': |
| 318 if multipart_capable: |
| 319 multipart_requested = True |
| 320 else: |
| 321 print("multipart upload requested but not capable") |
| 322 sys.exit(4) |
| 323 if o == '--region': |
| 324 regions = boto.s3.regions() |
| 325 for region_info in regions: |
| 326 if region_info.name == a: |
| 327 region = a |
| 328 break |
| 329 else: |
| 330 raise ValueError('Invalid region %s specified' % a) |
| 331 |
| 332 if len(args) < 1: |
| 333 usage(2) |
| 334 |
| 335 if not bucket_name: |
| 336 print("bucket name is required!") |
| 337 usage(3) |
| 338 |
| 339 connect_args = { |
| 340 'aws_access_key_id': aws_access_key_id, |
| 341 'aws_secret_access_key': aws_secret_access_key |
| 342 } |
| 343 |
| 344 if host: |
| 345 connect_args['host'] = host |
| 346 |
| 347 c = boto.s3.connect_to_region(region or DEFAULT_REGION, **connect_args) |
| 348 check_valid_region(c, region or DEFAULT_REGION) |
| 349 c.debug = debug |
| 350 b = c.get_bucket(bucket_name, validate=False) |
| 351 |
| 352 # Attempt to determine location and warn if no --host or --region |
| 353 # arguments were passed. Then try to automagically figure out |
| 354 # what should have been passed and fix it. |
| 355 if host is None and region is None: |
| 356 try: |
| 357 location = b.get_location() |
| 358 |
| 359 # Classic region will be '', any other will have a name |
| 360 if location: |
| 361 print('Bucket exists in %s but no host or region given!' % locat
ion) |
| 362 |
| 363 # Override for EU, which is really Ireland according to the docs |
| 364 if location == 'EU': |
| 365 location = 'eu-west-1' |
| 366 |
| 367 print('Automatically setting region to %s' % location) |
| 368 |
| 369 # Here we create a new connection, and then take the existing |
| 370 # bucket and set it to use the new connection |
| 371 c = boto.s3.connect_to_region(location, **connect_args) |
| 372 c.debug = debug |
| 373 b.connection = c |
| 374 except Exception as e: |
| 375 if debug > 0: |
| 376 print(e) |
| 377 print('Could not get bucket region info, skipping...') |
| 378 |
| 379 existing_keys_to_check_against = [] |
| 380 files_to_check_for_upload = [] |
| 381 |
| 382 for path in args: |
| 383 path = expand_path(path) |
| 384 # upload a directory of files recursively |
| 385 if os.path.isdir(path): |
| 386 if no_overwrite: |
| 387 if not quiet: |
| 388 print('Getting list of existing keys to check against') |
| 389 for key in b.list(get_key_name(path, prefix, key_prefix)): |
| 390 existing_keys_to_check_against.append(key.name) |
| 391 for root, dirs, files in os.walk(path): |
| 392 for ignore in ignore_dirs: |
| 393 if ignore in dirs: |
| 394 dirs.remove(ignore) |
| 395 for path in files: |
| 396 if path.startswith("."): |
| 397 continue |
| 398 files_to_check_for_upload.append(os.path.join(root, path)) |
| 399 |
| 400 # upload a single file |
| 401 elif os.path.isfile(path): |
| 402 fullpath = os.path.abspath(path) |
| 403 key_name = get_key_name(fullpath, prefix, key_prefix) |
| 404 files_to_check_for_upload.append(fullpath) |
| 405 existing_keys_to_check_against.append(key_name) |
| 406 |
| 407 # we are trying to upload something unknown |
| 408 else: |
| 409 print("I don't know what %s is, so i can't upload it" % path) |
| 410 |
| 411 for fullpath in files_to_check_for_upload: |
| 412 key_name = get_key_name(fullpath, prefix, key_prefix) |
| 413 |
| 414 if no_overwrite and key_name in existing_keys_to_check_against: |
| 415 if b.get_key(key_name): |
| 416 if not quiet: |
| 417 print('Skipping %s as it exists in s3' % fullpath) |
| 418 continue |
| 419 |
| 420 if not quiet: |
| 421 print('Copying %s to %s/%s' % (fullpath, bucket_name, key_name)) |
| 422 |
| 423 if not no_op: |
| 424 # 0-byte files don't work and also don't need multipart upload |
| 425 if os.stat(fullpath).st_size != 0 and multipart_capable and \ |
| 426 multipart_requested: |
| 427 multipart_upload(bucket_name, aws_access_key_id, |
| 428 aws_secret_access_key, fullpath, key_name, |
| 429 reduced, debug, cb, num_cb, |
| 430 grant or 'private', headers, |
| 431 region=region or DEFAULT_REGION) |
| 432 else: |
| 433 singlepart_upload(b, key_name, fullpath, cb=cb, num_cb=num_cb, |
| 434 policy=grant, reduced_redundancy=reduced, |
| 435 headers=headers) |
| 436 |
| 437 if __name__ == "__main__": |
| 438 main() |
OLD | NEW |