Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(179)

Side by Side Diff: build/download_from_google_storage.py

Issue 11664024: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | build/upload_to_google_storage.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Script to download files from Google Storage."""
7
8
9 import hashlib
10 import os
11 import Queue
12 import re
13 import subprocess
14 import sys
15 import tempfile
16 import threading
17 import time
18 import zipfile
19
20 from optparse import OptionParser
M-A Ruel 2013/01/10 02:33:08 Actually, why import this one differently from the
Ryan Tseng 2013/01/14 21:37:13 Done.
21
22 GSUTIL_DEFAULT_PATH = os.path.join(os.path.dirname(os.path.normpath(__file__)),
23 '..', '..', 'third_party', 'gsutil', 'gsutil')
M-A Ruel 2013/01/10 02:33:08 Aligning at +10 seems quite arbitrary.
Ryan Tseng 2013/01/14 21:37:13 Fixed
24
25
26 class Gsutil():
27 def __init__(self, path, boto_path=None, timeout=None):
28 if os.path.exists(path):
M-A Ruel 2013/01/10 02:33:08 if not os.path.exists(path): raise OSError('GSUt
Ryan Tseng 2013/01/14 21:37:13 Done.
29 self.path = path
30 else:
31 raise OSError('GSUtil not found in %s' % path)
32 self.timeout = timeout
33 self.boto_path = boto_path
34
35 def call(self, *args):
36 def _thread_main():
37 thr = threading.current_thread()
38 env = os.environ.copy()
39 if self.boto_path is not None:
40 env['AWS_CREDENTIAL_FILE'] = self.boto_path
41 p = subprocess.Popen((sys.executable, self.path) + args, env=env)
M-A Ruel 2013/01/10 02:33:08 you want subprocess.call()
Ryan Tseng 2013/01/14 21:37:13 Done.
42 thr.status = p.wait()
43 t = threading.Thread(target=_thread_main)
44 t.start()
45 t.join(self.timeout)
46 if thr.isAlive():
47 raise RuntimeError('%s %s timed out after %d seconds.' % (
48 self.path, ' '.join(args), self.timeout))
49 return thr.status
50
51 def check_call(self, *args):
52 def _thread_main():
M-A Ruel 2013/01/10 02:33:08 This code is never called?
Ryan Tseng 2013/01/14 21:37:13 Done.
53 thr = threading.current_thread()
54 env = os.environ.copy()
55 if self.boto_path is not None:
56 env['AWS_CREDENTIAL_FILE'] = self.boto_path
57 p = subprocess.Popen((sys.executable, self.path) + args,
58 stdout=subprocess.PIPE,
59 stderr=subprocess.PIPE,
60 env=env)
61 thr.status = p.wait()
62 out, err = p.communicate()
63
64 if code == 0:
65 return 0
66
67 status_code_match = re.search('status=([0-9]+)', err)
68 if status_code_match:
69 return int(status_code_match.groups(1))
70 elif ('You are attempting to access protected data with '
71 'no configured credentials.' in err):
72 return (403, out, err)
73 elif 'No such object' in err:
74 return (404, out, err)
75 else:
76 return (code, out, err)
77
78 def clone(self):
79 return Gsutil(self.path, self.boto_path, self.timeout)
80
81
82 def CheckSHA1(sha1_sum, filename):
83 sha1 = hashlib.sha1()
M-A Ruel 2013/01/10 02:33:08 FYI, this doesn't work with files > 1.5 gb or so.
Ryan Tseng 2013/01/14 21:37:13 Done.
84 sha1.update(open(filename).read())
85 return sha1_sum == sha1.hexdigest()
86
87
88 def _downloader_worker_thread(thread_num, q, options, base_url, gsutil):
89 while True:
90 try:
91 input_sha1_sum, output_filename = q.get_nowait()
92 if os.path.exists(output_filename) and not options.force:
93 if CheckSHA1(input_sha1_sum, output_filename):
94 print 'File %s exists and SHA1 sum (%s) matches. Skipping.' % (
95 output_filename , input_sha1_sum)
96 continue
97 # Check if file exists.
98 file_url = '%s/%s' % (base_url, input_sha1_sum)
99 if gsutil.check_call('ls', file_url) != 0:
100 print >>sys.stderr, 'File %s for %s does not exist, skipping.' % (
101 file_url, output_filename)
102 continue
103 # Fetch the file.
104 print 'Downloading %s to %s...' % (file_url, output_filename)
105 code, out, err = gsutil.call('cp', '-q', file_url, output_filename)
106 if code != 0:
107 print >>sys.stderr, gsutil.stderr
108 return code
109 except Queue.Empty:
110 return
111
112
113 def main(args):
114 usage = ('usage: %prog [options] target\nTarget must be:\n'
115 '(1) a directory.\n(2) a sha1 sum ([A-Za-z0-9]{40}).\n'
116 '(3) a .sha1 file, containing a sha1 sum on the first line.')
117 parser = OptionParser(usage)
118 parser.add_option('-o', '--output', default=None,
119 help='Specify the output file name. Defaults to:\n'
120 '(a) Given a SHA1 hash, the name is the SHA1 hash.\n'
121 '(b) Given a .sha1 file or directory, the name will '
122 'match (.*).sha1.')
123 parser.add_option('-b', '--bucket', default='chrome-artifacts',
124 help='Google Storage bucket to fetch from.')
125 parser.add_option('-f', '--force', action='store_true', default=False,
126 help='Force download even if local file exists.')
127 parser.add_option('-r', '--recursive', action='store_true', default=False,
128 help='Scan folders recursively for .sha1 files.')
129 parser.add_option('-t', '--num_threads', default=1, type='int',
130 help='Number of downloader threads to run.')
131 # This file should be stored in tools/deps_scripts/ and we want the path to
132 # third_party/gsutil/gsutil
133 parser.add_option('-g', '--gsutil_path', default=GSUTIL_DEFAULT_PATH,
134 help='Path to the gsutil script.')
135
136 (options, args) = parser.parse_args()
137 if len(args) < 1:
138 print >>sys.stderr, 'ERROR: Missing target.'
M-A Ruel 2013/01/10 02:33:08 parser.error() and line 142 too
Ryan Tseng 2013/01/14 21:37:13 Done.
139 parser.print_help()
140 return 1
141 elif len(args) > 1:
142 # TODO(hinoka): Multi target support.
143 print >>sys.stderr, 'ERROR: Too many targets.'
144 parser.print_help()
145 return 1
146 else:
147 input_filename = args[0]
148
149 # input_filename is a file? This could mean one of three things:
M-A Ruel 2013/01/10 02:33:08 Remove the guess work and require an argument. Gu
Ryan Tseng 2013/01/14 21:37:13 Done.
150 # 1. The input is a directory
151 # 2. The input is a .sha1 file
152 # 3. The input is an already downloaded binary file.
153 if os.path.exists(input_filename):
154 if os.path.isdir(input_filename):
155 # Check if the input is a directory.
156 dir_name = True
157 checked_sha1 = False
158 else:
159 with open(input_filename) as f:
160 sha1_match = re.search('^([A-Za-z0-9]{40})\s*$', f.read(1024))
161 if sha1_match:
162 # Check if we can match a sha1 sum in the first 1024 bytes.
163 if input_filename.endswith('.sha1'):
164 options.output = input_filename[:-5]
165 input_filename = sha1_match.groups(1)
166 dir_name = False
167 checked_sha1 = False
168 elif CheckSHA1(input_filename, input_filename):
169 # Check if input_filename is already downloaded.
170 dir_name = False
171 checked_sha1 = True
172 else:
173 if not re.match('[A-Za-z0-9]{40}', input_filename):
174 print >>sys.stderr, 'Input %s not recognized.' % input_filename
175 parser.print_help()
176 return 1
177
178 if not options.output:
179 options.output = input_filename
180 base_url = 'gs://%s' % options.bucket
181
182 if os.path.exists(options.gsutil_path):
183 gsutil = Gsutil(options.gsutil_path)
184 else:
185 for path in os.environ["PATH"].split(os.pathsep):
186 if os.path.exists(path) and 'gsutil' in os.listdir(path):
187 gsutil = Gsutil(os.path.join(path, 'gsutil'))
188
189 # Check if we have permissions.
190 code, ls_out, ls_err = gsutil.check_call('ls', base_url)
191 if code == 403:
192 code, _, _ = gsutil.call('config')
193 if code != 0:
194 print >>sys.stderr, 'Error while authenticating to %s, exiting' % base_url
195 return 403
196 elif code == 404:
197 print >>sys.stderr, '%s not found.' % base_url
198 return 404
199 elif code != 0:
200 print >>sys.stderr, ls_err
201 return code
202
203 # Enumerate our work queue.
204 work_queue = Queue.Queue()
205 if dir_name:
206 if options.recursive:
207 for root, dirs, files in os.walk(input_filename):
208 if '.svn' in dirs:
209 dirs.remove('.svn')
210 if not options.recursive:
211 for item in dirs:
212 dirs.remove(item)
213 for filename in files:
214 full_path = os.path.join(root, filename)
215 if full_path.endswith('.sha1'):
216 with open(full_path) as f:
217 sha1_match = re.search('([A-Za-z0-9]{40})', f.read(1024))
218 if sha1_match:
219 work_queue.put((sha1_match.groups(1)[0],
220 full_path.replace('.sha1', '')))
221 else:
222 work_queue.put((input_filename, options.output))
223
224 # Start up all the worker threads.
225 all_threads = []
226 for thread_num in range(options.num_threads):
227 t = threading.Thread(target=_downloader_worker_thread, args=[thread_num,
M-A Ruel 2013/01/10 02:33:08 Are you going to start 1000 threads if there are 1
Ryan Tseng 2013/01/14 21:37:13 Nope, it'll start 1 thread by default, or 10 threa
228 work_queue, options, base_url, gsutil.clone()])
229 t.daemon = True
230 t.start()
231 all_threads.append(t)
232
233 # Wait for all downloads to finish.
234 for t in threads:
235 t.join()
M-A Ruel 2013/01/10 02:33:08 return 0
236
237
238 if __name__ == '__main__':
239 sys.exit(main(sys.argv))
OLDNEW
« no previous file with comments | « no previous file | build/upload_to_google_storage.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698