Index: upload_to_google_storage.py |
=================================================================== |
--- upload_to_google_storage.py (revision 293993) |
+++ upload_to_google_storage.py (working copy) |
@@ -12,6 +12,7 @@ |
import re |
import stat |
import sys |
+import tarfile |
import threading |
import time |
@@ -130,7 +131,7 @@ |
def upload_to_google_storage( |
input_filenames, base_url, gsutil, force, |
- use_md5, num_threads, skip_hashing): |
+ use_md5, num_threads, skip_hashing, archive): |
# We only want one MD5 calculation happening at a time to avoid HD thrashing. |
md5_lock = threading.Lock() |
@@ -203,16 +204,38 @@ |
return max_ret_code |
+def create_archives(dirs): |
+ archive_names = [] |
+ for name in dirs: |
+ tarname = '%s.tar.gz' % name |
+ with tarfile.open(tarname, 'w:gz') as tar: |
+ tar.add(name) |
+ archive_names.append(tarname) |
+ return archive_names |
+def validate_archive_dirs(dirs): |
+ # We don't allow .. in paths in our archives. |
hinoka
2015/02/11 00:16:57
Each of these deserve their own error message.
|
+ if any(map(lambda x: '..' in x, dirs)): |
+ return True |
+ # We only allow dirs. |
+ if any(map(lambda x: not os.path.isdir(x), dirs)): |
+ return True |
+ # We don't allow sym links in our archives. |
+ if any(map(os.path.islink, dirs)): |
+ return True |
+ # We required that the subdirectories we are archiving are all just below |
+ # cwd. |
+ return any(map(lambda x: x not in next(os.walk('.'))[1], dirs)) |
+ |
def main(args): |
parser = optparse.OptionParser(USAGE_STRING) |
parser.add_option('-b', '--bucket', |
help='Google Storage bucket to upload to.') |
parser.add_option('-e', '--boto', help='Specify a custom boto file.') |
+ parser.add_option('-z', '--archive', action='store_true', |
+ help='Archive directory as a tar.gz file') |
parser.add_option('-f', '--force', action='store_true', |
help='Force upload even if remote file exists.') |
- parser.add_option('-g', '--gsutil_path', default=GSUTIL_DEFAULT_PATH, |
- help='Path to the gsutil script.') |
parser.add_option('-m', '--use_md5', action='store_true', |
help='Generate MD5 files when scanning, and don\'t check ' |
'the MD5 checksum if a .md5 file is found.') |
@@ -229,6 +252,16 @@ |
# Enumerate our inputs. |
input_filenames = get_targets(args, parser, options.use_null_terminator) |
+ |
+ if options.archive: |
+ if validate_archive_dirs(input_filenames): |
+ parser.error('Only directories just below cwd are valid entries when ' |
+ 'using the --archive argument. Entries can not contain .. ' |
+ ' and entries can not be symlinks. Entries was %s' % |
+ input_filenames) |
+ return 1 |
+ input_filenames = create_archives(input_filenames) |
+ |
# Make sure we can find a working instance of gsutil. |
if os.path.exists(GSUTIL_DEFAULT_PATH): |
gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=options.boto) |
@@ -250,7 +283,7 @@ |
return upload_to_google_storage( |
input_filenames, base_url, gsutil, options.force, options.use_md5, |
- options.num_threads, options.skip_hashing) |
+ options.num_threads, options.skip_hashing, options.archive) |
if __name__ == '__main__': |