Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(44)

Side by Side Diff: build/download_from_google_storage.py

Issue 11664024: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: newline fixes Created 7 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | build/upload_to_google_storage.py » ('j') | build/upload_to_google_storage.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Script to download files from Google Storage."""
7
8
9 import os
10 import zipfile
11 import tempfile
12 import subprocess
13 import hashlib
14 import sys
15 import re
16 import zipfile
17 import threading
18 import time
19 import Queue
cmp 2013/01/04 17:42:04 these should be sorted alphabetically
Ryan Tseng 2013/01/14 21:37:12 Done.
20 from optparse import OptionParser
cmp 2013/01/04 17:42:04 insert an empty line before line 20
Ryan Tseng 2013/01/14 21:37:12 Done.
21 GSUTIL_DEFAULT_PATH = os.path.join(os.path.dirname(os.path.normpath(__file__)),
cmp 2013/01/04 17:42:04 insert an empty line before line 21
Ryan Tseng 2013/01/14 21:37:12 Done.
22 '..', '..', 'third_party', 'gsutil', 'gsutil')
23
24
25 class Gsutil():
szager1 2013/01/04 17:54:02 I strongly recommend (but will not require) timeou
Ryan Tseng 2013/01/14 21:37:12 Done.
26 def __init__(self, path):
szager1 2013/01/04 17:54:02 Maybe optionally take a path to a credential file?
Ryan Tseng 2013/01/14 21:37:12 Done.
27 if os.path.exists(path):
28 self.path = path
29 else:
30 raise IOError('GSUtil not found in %s' % path)
szager1 2013/01/04 17:54:02 I'd prefer for this error to be surfaced the same
31 stdout = None
szager1 2013/01/04 17:54:02 This and the next line are no-ops
Ryan Tseng 2013/01/14 21:37:12 Done.
32 stderr = None
33 def call_interactive(self, *args):
cmp 2013/01/04 17:42:04 at indent of 2 spaces, there should be a single em
szager1 2013/01/04 17:54:02 I recommend renaming this and the next method to c
Ryan Tseng 2013/01/14 21:37:12 Done.
34 p = subprocess.Popen(('python', self.path) + args, stdout=sys.stdout,
cmp 2013/01/04 17:42:04 please use sys.executable instead of 'python' sinc
szager1 2013/01/04 17:54:02 stdin, stdout, stderr args are unnecessary; defaul
Ryan Tseng 2013/01/14 21:37:12 Done.
35 stderr=sys.stderr, stdin=sys.stdin)
36 return p.wait()
37 def call(self, *args):
38 p = subprocess.Popen(('python', self.path) + args, stdout=subprocess.PIPE,
39 stderr=subprocess.PIPE)
40 code = p.wait()
41 out, err = p.communicate()
42 self.stdout = out
43 self.stderr = err
44
45 if code == 0:
46 return 0
47 else:
cmp 2013/01/04 17:42:04 since you return from the 'code == 0' branch at li
Ryan Tseng 2013/01/14 21:37:12 Done.
48 status_code_match = re.search('status=([0-9]+)', err)
49 if status_code_match:
50 return int(status_code_match.groups(1))
51 elif ('You are attempting to access protected data with '
52 'no configured credentials.' in err):
53 return 403
54 elif 'No such object' in err:
55 return 404
56 else:
57 return code
58
59
60 def check_sha1(sha1_sum, filename):
szager1 2013/01/04 17:54:02 CamelCase
Ryan Tseng 2013/01/14 21:37:12 Done.
61 sha1 = hashlib.sha1()
62 sha1.update(open(filename).read())
63 return sha1_sum == sha1.hexdigest()
64
65
66 def _downloader_worker_thread(thread_num, q, options, base_url, gsutil):
67 while True:
68 try:
69 input_sha1_sum, output_filename = q.get_nowait()
70 if os.path.exists(output_filename) and not options.force:
71 if check_sha1(input_sha1_sum, output_filename):
72 print 'File %s exists and SHA1 sum (%s) matches. Skipping.' % (
73 output_filename , input_sha1_sum)
74 continue
75 # Check if file exists.
76 file_url = '%s/%s' % (base_url, input_sha1_sum)
77 if gsutil.call('ls', file_url) != 0:
78 print >>sys.stderr, 'File %s for %s does not exist, skipping.' % (
79 file_url, output_filename)
80 continue
81 # Fetch the file.
82 print 'Downloading %s to %s...' % (file_url, output_filename)
83 code = gsutil.call_interactive('cp', '-q', file_url, output_filename)
84 if code != 0:
85 print >>sys.stderr, gsutil.stderr
86 return code
87 # TODO(hinoka): Delete and unzip.
88 # if options.unzip:
89 # with zipfile.ZipFile(options.output, 'r') as source_zipfile:
90 # source_zipfile.extractall(os.path.dirname(options.output))
91 except Queue.Empty:
92 return
93
94
95 def main(args):
96 parser = OptionParser()
97 parser.add_option('-z', '--unzip', action='store_true', default=False,
98 help='The target file is a zip file, unzip file after '
99 'the download completes.')
100 parser.add_option('-d', '--delete', action='store_true', default=False,
101 help='Deletes the target file after unzip.')
102 parser.add_option('-o', '--output', default=None,
103 help='Specify the output file name.'
104 'Defaults to the SHA1 hash.')
105 parser.add_option('-b', '--bucket', default='chrome-artifacts',
106 help='Google Storage bucket to fetch from.')
107 parser.add_option('-f', '--force', action='store_true', default=False,
108 help='Force download even if local file exists.')
109 parser.add_option('-r', '--recursive', action='store_true', default=False,
110 help='Scan folders recursively for .sha1 files.')
111 parser.add_option('-t', '--num_threads', default=1, type='int',
112 help='Number of downloader threads to run.')
113 # This file should be stored in tools/deps_scripts/ and we want the path to
114 # third_party/gsutil/gsutil
115 parser.add_option('-g', '--gsutil_path', default=GSUTIL_DEFAULT_PATH,
116 help='Path to the gsutil script.')
117
118 (options, args) = parser.parse_args()
119 if len(args) < 1:
120 print >>sys.stderr, 'Missing target.'
szager1 2013/01/04 17:54:02 This is cryptic; please add a usage message.
Ryan Tseng 2013/01/14 21:37:12 Done.
121 return 1
122 else:
123 input_filename = args[0]
szager1 2013/01/04 17:54:02 What about handling multiple input files? Current
Ryan Tseng 2013/01/14 21:37:12 Added error message and TODO
124
125 # This could mean one of three things:
126 # 1. The input is a directory
127 # 2. The input is an already downloaded binary file.
128 # 3. The input is a .sha1 file
szager1 2013/01/04 17:54:02 Please add an option to create a .sha1 file after
Ryan Tseng 2013/01/14 21:37:12 Its done using the upload_to_google_storage.py scr
129 if os.path.exists(input_filename):
130 if os.path.isdir(input_filename):
131 dir_name = True
132 checked_sha1 = False
133 elif check_sha1(input_filename, input_filename):
szager1 2013/01/04 17:54:02 This is the most expensive condition to check, so
Ryan Tseng 2013/01/14 21:37:12 Done.
134 dir_name = False
135 checked_sha1 = True
136 else:
137 with open(input_filename) as f:
138 sha1_match = re.search('([A-Za-z0-9]{40})', f.read(1024))
139 if sha1_match:
140 if input_filename.endswith('.sha1'):
141 options.output = input_filename.replace('.sha1', '')
szager1 2013/01/04 17:54:02 Maybe I'm paranoid, but I prefer: options.output
Ryan Tseng 2013/01/14 21:37:12 Done.
142 input_filename = sha1_match.groups(1)
143 dir_name = False
144 checked_sha1 = False
145 else:
146 if not re.match('[A-Za-z0-9]{40}', input_filename):
147 print >>sys.stderr, 'Input %s not recognized.' % input_filename
szager1 2013/01/04 17:54:02 Please include the substance of this error message
Ryan Tseng 2013/01/14 21:37:12 Done.
148 print >>sys.stderr, 'Input must be: '
149 print >>sys.stderr, '(1) a directory, (2) a .sha1 file or '
150 print >>sys.stderr, '(3) a sha1 sum ([A-Za-z0-9]{40})'
151 return 1
152
153 if not options.output:
154 options.output = input_filename
155 base_url = 'gs://%s' % options.bucket
156
157 if os.path.exists(options.gsutil_path):
158 gsutil = Gsutil(options.gsutil_path)
szager1 2013/01/04 17:54:02 The way your code is structured, there will be a s
Ryan Tseng 2013/01/14 21:37:12 Done by cloning the gsutil instance for each worke
159 else:
160 for path in os.environ["PATH"].split(os.pathsep):
161 if os.path.exists(path) and 'gsutil' in os.listdir(path):
162 gsutil = Gsutil(os.path.join(path, 'gsutil'))
163
164 # Check if we have permissions.
165 code = gsutil.call('ls', base_url)
166 if code == 403:
167 code = gsutil.call_interactive('config')
168 if code != 0:
169 print >>sys.stderr, 'Error while authenticating to %s, exiting' % base_url
170 return 403
171 elif code == 404:
172 print >>sys.stderr, '%s not found.' % base_url
173 return 404
174 elif code != 0:
175 print >>sys.stderr, gsutil.stderr
176 return code
177
178 # Enumerate our work queue.
179 work_queue = Queue.Queue()
180 if dir_name:
181 if options.recursive:
182 for root, dirs, files in os.walk(input_filename):
183 if '.svn' in dirs:
184 dirs.remove('.svn')
185 if not options.recursive:
186 for item in dirs:
187 dirs.remove(item)
188 for filename in files:
189 full_path = os.path.join(root, filename)
190 if full_path.endswith('.sha1'):
191 with open(full_path) as f:
192 sha1_match = re.search('([A-Za-z0-9]{40})', f.read(1024))
193 if sha1_match:
194 work_queue.put((sha1_match.groups(1)[0],
195 full_path.replace('.sha1', '')))
196 else:
197 work_queue.put((input_filename, options.output))
198
199 # Start up all the worker threads.
200 all_threads = []
201 for thread_num in range(options.num_threads):
202 t = threading.Thread(target=_downloader_worker_thread, args=[thread_num,
203 work_queue, options, base_url, gsutil])
204 t.daemon = True
205 t.start()
206 all_threads.append(t)
207
208 def _wait_thread(threads, done):
209 for t in threads:
210 t.join()
211 print 'Now we\'re done'
212 done.set()
213
214 # Have a thread set a flag when all the tasks are done.
szager1 2013/01/04 17:54:02 This is not necessary. You can just join() all th
Ryan Tseng 2013/01/14 21:37:12 Done.
215 done = threading.Event()
216 done_thread = threading.Thread(target=_wait_thread, args=[all_threads, done])
217 done_thread.daemon = True
218 done_thread.start()
219
220 while not done.is_set():
221 time.sleep(1) # Do a sleep loop so we can ctrl + c out of this anytime.
szager1 2013/01/04 17:54:02 My previous comment means this clause is unnecessa
Ryan Tseng 2013/01/14 21:37:12 Done.
222
223
224 if __name__ == '__main__':
225 sys.exit(main(sys.argv))
OLDNEW
« no previous file with comments | « no previous file | build/upload_to_google_storage.py » ('j') | build/upload_to_google_storage.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698