Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(86)

Side by Side Diff: download_from_google_storage.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Removed gsutil/tests and gsutil/docs Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Script to download files from Google Storage."""
M-A Ruel 2013/02/22 01:15:56 """Downloads .. We know it's a script.
Ryan Tseng 2013/02/22 02:38:00 Done.
7
8
9 import optparse
10 import os
11 import Queue
12 import re
13 import sys
14 import threading
15 import time
16
17 from common import Gsutil
18 from common import CheckSHA1
19
M-A Ruel 2013/02/22 01:15:56 2 lines
Ryan Tseng 2013/02/22 02:38:00 Done.
20 # Assuming depot_tools/third_party/gsutil exists.
M-A Ruel 2013/02/22 01:15:56 Remove comment, it's fine.
Ryan Tseng 2013/02/22 02:38:00 Done.
21 GSUTIL_DEFAULT_PATH = os.path.join(
22 os.path.dirname(os.path.abspath(__file__)),
23 'third_party', 'gsutil', 'gsutil')
24
25
26 def _downloader_worker_thread(thread_num, q, options, base_url, gsutil, out_q):
27 while True:
28 try:
29 input_sha1_sum, output_filename = q.get_nowait()
30 except Queue.Empty:
31 out_q.put('Thread %d is done' % thread_num)
M-A Ruel 2013/02/22 01:15:56 That's a race condition. You should enqueue None w
Ryan Tseng 2013/02/22 02:38:00 Done.
32 return
33 if os.path.exists(output_filename) and not options.force:
34 if CheckSHA1(input_sha1_sum, output_filename):
35 out_q.put('File %s exists and SHA1 sum (%s) matches. Skipping.' % (
36 output_filename , input_sha1_sum))
37 continue
38 # Check if file exists.
39 file_url = '%s/%s' % (base_url, input_sha1_sum)
40 if gsutil.check_call('ls', file_url)[0] != 0:
41 out_q.put('File %s for %s does not exist, skipping.' % (
42 file_url, output_filename))
43 continue
44 # Fetch the file.
45 out_q.put('Downloading %s to %s...' % (file_url, output_filename))
46 code = gsutil.call('cp', '-q', file_url, output_filename)
47 if code != 0:
48 out_q.put(gsutil.stderr)
49 return code
50
51
52 def download_from_google_storage(input_filename, options):
53 # Main logic of the script goes here.
M-A Ruel 2013/02/22 01:15:56 Remove.
Ryan Tseng 2013/02/22 02:38:00 Done.
54 base_url = 'gs://%s' % options.bucket
55
56 # Make sure we can find a working instance of gsutil.
57 if os.path.exists(options.gsutil_path):
58 gsutil = Gsutil(options.gsutil_path, boto_path=options.boto)
59 else:
60 for path in os.environ["PATH"].split(os.pathsep):
61 if os.path.exists(path) and 'gsutil' in os.listdir(path):
62 gsutil = Gsutil(os.path.join(path, 'gsutil'), boto_path=options.boto)
63
64 # Check if we have permissions to the Google Storage bucket.
65 code, _, ls_err = gsutil.check_call('ls', base_url)
66 if code == 403:
67 code = gsutil.call('config')
68 if code != 0:
69 print >> sys.stderr, 'Error while authenticating to %s.' % base_url
70 return 403
71 elif code == 404:
72 print >> sys.stderr, '%s not found.' % base_url
73 return 404
74 elif code != 0:
75 print >> sys.stderr, ls_err
76 return code
77
78 # Enumerate our work queue.
79 work_queue = Queue.Queue()
80 work_queue_size = 0
81 if options.directory:
82 if options.recursive:
83 for root, dirs, files in os.walk(input_filename):
84 for exclude in ['.svn', '.git']:
85 if exclude in dirs:
86 dirs.remove(exclude)
87 if not options.recursive:
88 for item in dirs:
89 dirs.remove(item)
90 for filename in files:
91 full_path = os.path.join(root, filename)
92 if full_path.endswith('.sha1'):
93 with open(full_path) as f:
94 sha1_match = re.search('^([A-Za-z0-9]{40})$', f.read(1024))
95 if sha1_match:
96 work_queue.put((sha1_match.groups(1)[0],
97 full_path.replace('.sha1', '')))
98 work_queue_size += 1
99 else:
100 print >> sys.stderr, 'No sha1 sum found in %s.' % filename
101 else:
102 work_queue.put((input_filename, options.output))
103 work_queue_size += 1
104
105 # Start up all the worker threads.
106 all_threads = []
107 download_timer = time.time()
108 output_queue = Queue.Queue() # For printing out to stdio.
109 for thread_num in range(options.num_threads):
110 t = threading.Thread(target=_downloader_worker_thread, args=[thread_num,
111 work_queue, options, base_url, gsutil.clone(), output_queue])
112 t.daemon = True
113 t.start()
114 all_threads.append(t)
115
116 # Wait for all downloads to finish.
117 while True:
118 num_alive_threads = 0
119 for t in all_threads:
120 if t.is_alive():
121 num_alive_threads += 1
122 if num_alive_threads == 0 and output_queue.empty():
123 break
124 line = output_queue.get()
125 print line
126
127
128 print 'Success.'
129 print 'Downloading %d files took %1f second(s)' % (
130 work_queue_size, time.time() - download_timer)
131 return 0
132
133
134 def main(args):
135 usage = ('usage: %prog [options] target\nTarget must be:\n'
136 '(default) a sha1 sum ([A-Za-z0-9]{40}).\n(-s or --sha1_file) a '
137 '.sha1 file, containing a sha1 sum on the first line. (-d or '
138 '--directory) A directory to scan for .sha1 files. ')
139 parser = optparse.OptionParser(usage)
140 parser.add_option('-o', '--output', default=None,
M-A Ruel 2013/02/22 01:15:56 Same comments than on the other file.
Ryan Tseng 2013/02/22 02:38:00 Done.
141 help='Specify the output file name. Defaults to:\n'
142 '(a) Given a SHA1 hash, the name is the SHA1 hash.\n'
143 '(b) Given a .sha1 file or directory, the name will '
144 'match (.*).sha1.')
145 parser.add_option('-b', '--bucket', default='chrome-artifacts',
146 help='Google Storage bucket to fetch from.')
147 parser.add_option('-e', '--boto', default=None,
148 help='Specify a custom boto file.')
149 parser.add_option('-c', '--no_resume', action='store_true', default=False,
150 help='Resume download if file is partially downloaded.')
151 parser.add_option('-f', '--force', action='store_true', default=False,
152 help='Force download even if local file exists.')
153 parser.add_option('-r', '--recursive', action='store_true', default=False,
154 help='Scan folders recursively for .sha1 files. '
155 'Must be used with -d/--directory')
156 parser.add_option('-t', '--num_threads', default=1, type='int',
157 help='Number of downloader threads to run.')
158 parser.add_option('-d', '--directory', action='store_true', default=False,
159 help='The target is a directory. '
160 'Cannot be used with -s/--sha1_file.')
161 parser.add_option('-s', '--sha1_file', action='store_true', default=False,
162 help='The target is a file containing a sha1 sum. '
163 'Cannot be used with -d/--directory.')
164 # This file should be stored in tools/deps_scripts/ and we want the path to
165 # third_party/gsutil/gsutil
166 parser.add_option('-g', '--gsutil_path', default=GSUTIL_DEFAULT_PATH,
167 help='Path to the gsutil script.')
168
169 (options, args) = parser.parse_args()
170 if len(args) < 1:
171 parser.error('Missing target.')
172 if len(args) > 1:
173 parser.error('Too many targets.')
174 if options.sha1_file and options.directory:
175 parser.error('Both --directory and --sha1_file are specified, '
176 'can only specify one.')
177 elif options.recursive and not options.directory:
178 parser.error('--recursive specified but --directory not specified.')
179 elif options.output and options.directory:
180 parser.error('--directory is specified, so --output has no effect.')
181 else:
182 input_filename = args[0]
183
184 # Set output filename if not specified.
185 if not options.output and not options.directory:
186 if not options.sha1_file:
187 # Target is a sha1 sum, so output filename would also be the sha1 sum.
188 options.output = input_filename
189 elif options.sha1_file:
190 # Target is a .sha1 file.
191 if not input_filename.endswith('.sha1'):
192 parser.error('--sha1_file is specified, but the input filename '
193 'does not end with .sha1, and no --output is specified. '
194 'Either make sure the input filename has a .sha1 '
195 'extension, or specify --output.')
196 options.output = input_filename[:-5]
197 else:
198 raise NotImplementedError('Unreachable state.')
199
200 # Check if output file already exists.
201 if not options.directory and not options.force and not options.no_resume:
202 if os.path.exists(options.output):
203 parser.error('Output file %s exists and --no_resume is specified.'
204 % options.output)
205
206 return download_from_google_storage(input_filename, options)
207
208
209 if __name__ == '__main__':
210 sys.exit(main(sys.argv))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698