Index: download_from_google_storage.py |
diff --git a/download_from_google_storage.py b/download_from_google_storage.py |
index a51b16a259d4b53794342e7d4ce7074d31a02a45..4500f5f9c7ef52e1f10de3512d970e89ac5f0934 100755 |
--- a/download_from_google_storage.py |
+++ b/download_from_google_storage.py |
@@ -22,6 +22,13 @@ import subprocess2 |
GSUTIL_DEFAULT_PATH = os.path.join( |
os.path.dirname(os.path.abspath(__file__)), |
'third_party', 'gsutil', 'gsutil') |
+# Maps sys.platform to what we actually want to call them. |
+PLATFORM_MAPPING = { |
+ 'linux2': 'linux', |
+ 'win32': 'win', |
+ 'cygwin': 'win', |
+ 'darwin': 'mac', |
M-A Ruel
2014/01/17 20:19:21
sort keys
Ryan Tseng
2014/01/17 20:58:37
Done.
|
+} |
class FileNotFoundError(IOError): |
@@ -32,6 +39,10 @@ class InvalidFileError(IOError): |
pass |
+class InvalidPlatformError(Exception): |
+ pass |
+ |
+ |
def GetNormalizedPlatform(): |
"""Returns the result of sys.platform accounting for cygwin. |
Under cygwin, this will always return "win32" like the native Python.""" |
@@ -116,6 +127,17 @@ def check_bucket_permissions(bucket, gsutil): |
return (base_url, code) |
+def check_platform(target): |
+ """Check if any parent directory of target matches (win|mac|linux).""" |
M-A Ruel
2014/01/17 20:19:21
Checks
Ryan Tseng
2014/01/17 20:58:37
Done.
|
+ if not target: |
+ return None |
+ full_path = os.path.abspath(target) |
M-A Ruel
2014/01/17 20:19:21
what about asserting the path is absolute?
Ryan Tseng
2014/01/17 20:58:37
Done.
|
+ root, target_name = os.path.split(full_path) |
+ if target_name in ('win', 'mac', 'linux'): |
M-A Ruel
2014/01/17 20:19:21
sort
Ryan Tseng
2014/01/17 20:58:37
Done.
|
+ return target_name |
+ return check_platform(root) |
M-A Ruel
2014/01/17 20:19:21
What happens if you call it with 'e:\\' ?
Ryan Tseng
2014/01/17 20:58:37
('e:\\', '')
|
+ |
+ |
def get_sha1(filename): |
sha1 = hashlib.sha1() |
with open(filename, 'rb') as f: |
@@ -131,7 +153,8 @@ def get_sha1(filename): |
# Download-specific code starts here |
def enumerate_work_queue(input_filename, work_queue, directory, |
- recursive, ignore_errors, output, sha1_file): |
+ recursive, ignore_errors, output, sha1_file, |
+ auto_platform): |
if sha1_file: |
if not os.path.exists(input_filename): |
if not ignore_errors: |
@@ -164,6 +187,21 @@ def enumerate_work_queue(input_filename, work_queue, directory, |
for filename in files: |
full_path = os.path.join(root, filename) |
if full_path.endswith('.sha1'): |
+ if auto_platform: |
+ # Skip if the platform does not match. |
+ target_platform = check_platform(full_path) |
+ if not target_platform: |
+ err = ('--auto_platform passed in but no ' |
M-A Ruel
2014/01/17 20:19:21
the wrapping seems excessive, try to extend nearer
Ryan Tseng
2014/01/17 20:58:37
Done.
|
+ 'platform name found in the path of %s' |
+ % full_path) |
+ if not ignore_errors: |
+ raise InvalidFileError(err) |
+ print >> sys.stderr, err |
+ continue |
+ current_platform = PLATFORM_MAPPING[sys.platform] |
+ if current_platform != target_platform: |
+ continue |
+ |
with open(full_path, 'rb') as f: |
sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) |
if sha1_match: |
@@ -240,7 +278,7 @@ def printer_worker(output_queue): |
def download_from_google_storage( |
input_filename, base_url, gsutil, num_threads, directory, recursive, |
- force, output, ignore_errors, sha1_file, verbose): |
+ force, output, ignore_errors, sha1_file, verbose, auto_platform): |
# Start up all the worker threads. |
all_threads = [] |
download_start = time.time() |
@@ -263,7 +301,7 @@ def download_from_google_storage( |
# Enumerate our work queue. |
work_queue_size = enumerate_work_queue( |
input_filename, work_queue, directory, recursive, |
- ignore_errors, output, sha1_file) |
+ ignore_errors, output, sha1_file, auto_platform) |
for _ in all_threads: |
work_queue.put((None, None)) # Used to tell worker threads to stop. |
@@ -333,6 +371,11 @@ def main(args): |
help='A regular expression that is compared against ' |
'Python\'s sys.platform. If this option is specified, ' |
'the download will happen only if there is a match.') |
+ parser.add_option('-a', '--auto_platform', |
+ help='Detects if any parent folder of the target matches ' |
+ '(win|mac|linux). If so, the script will only ' |
M-A Ruel
2014/01/17 20:19:21
sort
Ryan Tseng
2014/01/17 20:58:37
Done.
|
+ 'process files that are in the paths that ' |
+ 'that matches the current platform.') |
parser.add_option('-v', '--verbose', action='store_true', |
help='Output extra diagnostic and progress information.') |
@@ -340,6 +383,9 @@ def main(args): |
# Make sure we should run at all based on platform matching. |
if options.platform: |
+ if options.auto_platform: |
+ parser.error('--platform can not be specified with --auto_platform') |
+ return 1 |
M-A Ruel
2014/01/17 20:19:21
parser.error() calls sys.exit(2), no need for retu
Ryan Tseng
2014/01/17 20:58:37
Done.
|
if not re.match(options.platform, GetNormalizedPlatform()): |
if options.verbose: |
print('The current platform doesn\'t match "%s", skipping.' % |
@@ -376,6 +422,11 @@ def main(args): |
parser.error('--recursive specified but --directory not specified.') |
if options.output and options.directory: |
parser.error('--directory is specified, so --output has no effect.') |
+ if (not (options.sha1_file or options.directory) |
+ and options.auto_platform): |
+ parser.error('--auto_platform must be specified with either ' |
+ '--sha1_file or --directory') |
+ |
input_filename = args[0] |
# Set output filename if not specified. |
@@ -408,7 +459,7 @@ def main(args): |
return download_from_google_storage( |
input_filename, base_url, gsutil, options.num_threads, options.directory, |
options.recursive, options.force, options.output, options.ignore_errors, |
- options.sha1_file, options.verbose) |
+ options.sha1_file, options.verbose, options.auto_platform) |
if __name__ == '__main__': |