OLD | NEW |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 # This Source Code Form is subject to the terms of the Mozilla Public | 1 # This Source Code Form is subject to the terms of the Mozilla Public |
4 # License, v. 2.0. If a copy of the MPL was not distributed with this | 2 # License, v. 2.0. If a copy of the MPL was not distributed with this |
5 # file, You can obtain one at http://mozilla.org/MPL/2.0/. | 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
6 | 4 |
7 """Module to handle downloads for different types of Firefox and Thunderbird bui lds.""" | |
8 | |
9 | |
10 from datetime import datetime | 5 from datetime import datetime |
11 from optparse import OptionParser, OptionGroup | 6 import logging |
12 import os | 7 import os |
13 import re | 8 import re |
9 import requests | |
14 import sys | 10 import sys |
15 import time | 11 import time |
16 import urllib | 12 import urllib |
17 import urllib2 | 13 from urlparse import urlparse |
18 | 14 |
19 import mozinfo | 15 import mozinfo |
16 import progressbar as pb | |
kjellander_chromium
2015/11/17 10:19:29
Any idea how the script can work without the progr
phoglund_chromium
2015/11/17 11:13:07
Crap, you're right. I have it installed on my mach
kjellander_chromium
2015/11/17 12:57:17
Not necessarily screwed since we can bundle those
phoglund_chromium
2015/11/17 13:47:03
I managed to pull requests from the mirror you ind
| |
17 | |
18 import errors | |
20 | 19 |
21 from parser import DirectoryParser | 20 from parser import DirectoryParser |
22 from timezones import PacificTimezone | 21 from timezones import PacificTimezone |
23 | 22 from utils import urljoin |
24 | 23 |
25 APPLICATIONS = ['b2g', 'firefox', 'thunderbird'] | 24 |
25 APPLICATIONS = ('b2g', 'firefox', 'fennec', 'thunderbird') | |
26 | |
27 # Some applications contain all locales in a single build | |
28 APPLICATIONS_MULTI_LOCALE = ('b2g', 'fennec') | |
29 | |
30 # Used if the application is named differently than the subfolder on the server | |
31 APPLICATIONS_TO_FTP_DIRECTORY = {'fennec': 'mobile'} | |
26 | 32 |
27 # Base URL for the path to all builds | 33 # Base URL for the path to all builds |
28 BASE_URL = 'https://ftp.mozilla.org/pub/mozilla.org' | 34 BASE_URL = 'https://archive.mozilla.org/pub/' |
29 | 35 |
30 PLATFORM_FRAGMENTS = {'linux': 'linux-i686', | 36 # Chunk size when downloading a file |
31 'linux64': 'linux-x86_64', | 37 CHUNK_SIZE = 16 * 1024 |
32 'mac': 'mac', | 38 |
33 'mac64': 'mac64', | 39 DEFAULT_FILE_EXTENSIONS = {'android-api-9': 'apk', |
34 'win32': 'win32', | 40 'android-api-11': 'apk', |
35 'win64': 'win64-x86_64'} | 41 'android-x86': 'apk', |
36 | 42 'linux': 'tar.bz2', |
37 DEFAULT_FILE_EXTENSIONS = {'linux': 'tar.bz2', | |
38 'linux64': 'tar.bz2', | 43 'linux64': 'tar.bz2', |
39 'mac': 'dmg', | 44 'mac': 'dmg', |
40 'mac64': 'dmg', | 45 'mac64': 'dmg', |
41 'win32': 'exe', | 46 'win32': 'exe', |
42 'win64': 'exe'} | 47 'win64': 'exe'} |
43 | 48 |
44 class NotFoundException(Exception): | 49 PLATFORM_FRAGMENTS = {'android-api-9': r'android-arm', |
45 """Exception for a resource not being found (e.g. no logs)""" | 50 'android-api-11': r'android-arm', |
46 def __init__(self, message, location): | 51 'android-x86': r'android-i386', |
47 self.location = location | 52 'linux': r'linux-i686', |
48 Exception.__init__(self, ': '.join([message, location])) | 53 'linux64': r'linux-x86_64', |
54 'mac': r'mac', | |
55 'mac64': r'mac(64)?', | |
56 'win32': r'win32', | |
57 'win64': r'win64(-x86_64)?'} | |
49 | 58 |
50 | 59 |
51 class Scraper(object): | 60 class Scraper(object): |
52 """Generic class to download an application from the Mozilla server""" | 61 """Generic class to download an application from the Mozilla server""" |
53 | 62 |
54 def __init__(self, directory, version, platform=None, | 63 def __init__(self, destination=None, platform=None, |
55 application='firefox', locale='en-US', extension=None, | 64 application='firefox', locale=None, extension=None, |
56 authentication=None, retry_attempts=3, retry_delay=10): | 65 username=None, password=None, |
66 retry_attempts=0, retry_delay=10., | |
67 is_stub_installer=False, timeout=None, | |
68 log_level='INFO', | |
69 base_url=BASE_URL): | |
57 | 70 |
58 # Private properties for caching | 71 # Private properties for caching |
59 self._target = None | 72 self._filename = None |
60 self._binary = None | 73 self._binary = None |
61 | 74 |
62 self.directory = directory | 75 self.destination = destination or os.getcwd() |
63 self.locale = locale | 76 |
77 if not locale: | |
78 if application in APPLICATIONS_MULTI_LOCALE: | |
79 self.locale = 'multi' | |
80 else: | |
81 self.locale = 'en-US' | |
82 else: | |
83 self.locale = locale | |
84 | |
64 self.platform = platform or self.detect_platform() | 85 self.platform = platform or self.detect_platform() |
65 self.version = version | 86 |
66 self.extension = extension or DEFAULT_FILE_EXTENSIONS[self.platform] | 87 self.session = requests.Session() |
67 self.authentication = authentication | 88 if (username, password) != (None, None): |
89 self.session.auth = (username, password) | |
90 | |
68 self.retry_attempts = retry_attempts | 91 self.retry_attempts = retry_attempts |
69 self.retry_delay = retry_delay | 92 self.retry_delay = retry_delay |
93 self.is_stub_installer = is_stub_installer | |
94 self.timeout_download = timeout | |
95 # this is the timeout used in requests.get. Unlike "auth", | |
96 # it does not work if we attach it on the session, so we handle | |
97 # it independently. | |
98 self.timeout_network = 60. | |
99 | |
100 logging.basicConfig(format=' %(levelname)s | %(message)s') | |
101 self.logger = logging.getLogger(self.__module__) | |
102 self.logger.setLevel(log_level) | |
70 | 103 |
71 # build the base URL | 104 # build the base URL |
72 self.application = application | 105 self.application = application |
73 self.base_url = '/'.join([BASE_URL, self.application]) | 106 self.base_url = '%s/' % urljoin( |
74 | 107 base_url, |
108 APPLICATIONS_TO_FTP_DIRECTORY.get(self.application, self.application ) | |
109 ) | |
110 | |
111 if extension: | |
112 self.extension = extension | |
113 else: | |
114 if self.application in APPLICATIONS_MULTI_LOCALE and \ | |
115 self.platform in ('win32', 'win64'): | |
116 # builds for APPLICATIONS_MULTI_LOCALE only exist in zip | |
117 self.extension = 'zip' | |
118 else: | |
119 self.extension = DEFAULT_FILE_EXTENSIONS[self.platform] | |
120 | |
121 attempt = 0 | |
122 while True: | |
123 attempt += 1 | |
124 try: | |
125 self.get_build_info() | |
126 break | |
127 except (errors.NotFoundError, requests.exceptions.RequestException), e: | |
128 if self.retry_attempts > 0: | |
129 # Log only if multiple attempts are requested | |
130 self.logger.warning("Build not found: '%s'" % e.message) | |
131 self.logger.info('Will retry in %s seconds...' % | |
132 (self.retry_delay)) | |
133 time.sleep(self.retry_delay) | |
134 self.logger.info("Retrying... (attempt %s)" % attempt) | |
135 | |
136 if attempt >= self.retry_attempts: | |
137 if hasattr(e, 'response') and \ | |
138 e.response.status_code == 404: | |
139 message = "Specified build has not been found" | |
140 raise errors.NotFoundError(message, e.response.url) | |
141 else: | |
142 raise | |
143 | |
144 def _create_directory_parser(self, url): | |
145 return DirectoryParser(url, | |
146 session=self.session, | |
147 timeout=self.timeout_network) | |
75 | 148 |
76 @property | 149 @property |
77 def binary(self): | 150 def binary(self): |
78 """Return the name of the build""" | 151 """Return the name of the build""" |
79 | 152 |
80 if self._binary is None: | 153 attempt = 0 |
81 # Retrieve all entries from the remote virtual folder | 154 |
82 parser = DirectoryParser(self.path) | 155 while self._binary is None: |
83 if not parser.entries: | 156 attempt += 1 |
84 raise NotFoundException('No entries found', self.path) | 157 try: |
85 | 158 # Retrieve all entries from the remote virtual folder |
86 # Download the first matched directory entry | 159 parser = self._create_directory_parser(self.path) |
87 pattern = re.compile(self.binary_regex, re.IGNORECASE) | 160 if not parser.entries: |
88 for entry in parser.entries: | 161 raise errors.NotFoundError('No entries found', self.path) |
89 try: | 162 |
90 self._binary = pattern.match(entry).group() | 163 # Download the first matched directory entry |
91 break | 164 pattern = re.compile(self.binary_regex, re.IGNORECASE) |
92 except: | 165 for entry in parser.entries: |
93 # No match, continue with next entry | 166 try: |
94 continue | 167 self._binary = pattern.match(entry).group() |
95 | 168 break |
96 if self._binary is None: | 169 except: |
97 raise NotFoundException("Binary not found in folder", self.path) | 170 # No match, continue with next entry |
98 else: | 171 continue |
99 return self._binary | 172 else: |
100 | 173 raise errors.NotFoundError("Binary not found in folder", |
174 self.path) | |
175 except (errors.NotFoundError, requests.exceptions.RequestException), e: | |
176 if self.retry_attempts > 0: | |
177 # Log only if multiple attempts are requested | |
178 self.logger.warning("Build not found: '%s'" % e.message) | |
179 self.logger.info('Will retry in %s seconds...' % | |
180 (self.retry_delay)) | |
181 time.sleep(self.retry_delay) | |
182 self.logger.info("Retrying... (attempt %s)" % attempt) | |
183 | |
184 if attempt >= self.retry_attempts: | |
185 if hasattr(e, 'response') and \ | |
186 e.response.status_code == 404: | |
187 message = "Specified build has not been found" | |
188 raise errors.NotFoundError(message, self.path) | |
189 else: | |
190 raise | |
191 | |
192 return self._binary | |
101 | 193 |
102 @property | 194 @property |
103 def binary_regex(self): | 195 def binary_regex(self): |
104 """Return the regex for the binary filename""" | 196 """Return the regex for the binary filename""" |
105 | 197 |
106 raise NotImplementedError(sys._getframe(0).f_code.co_name) | 198 raise errors.NotImplementedError(sys._getframe(0).f_code.co_name) |
107 | 199 |
108 | 200 @property |
109 @property | 201 def url(self): |
110 def final_url(self): | 202 """Return the URL of the build""" |
111 """Return the final URL of the build""" | 203 |
112 | 204 return urljoin(self.path, self.binary) |
113 return '/'.join([self.path, self.binary]) | |
114 | |
115 | 205 |
116 @property | 206 @property |
117 def path(self): | 207 def path(self): |
118 """Return the path to the build""" | 208 """Return the path to the build folder""" |
119 | 209 |
120 return '/'.join([self.base_url, self.path_regex]) | 210 return urljoin(self.base_url, self.path_regex) |
121 | |
122 | 211 |
123 @property | 212 @property |
124 def path_regex(self): | 213 def path_regex(self): |
125 """Return the regex for the path to the build""" | 214 """Return the regex for the path to the build folder""" |
126 | 215 |
127 raise NotImplementedError(sys._getframe(0).f_code.co_name) | 216 raise errors.NotImplementedError(sys._getframe(0).f_code.co_name) |
128 | |
129 | 217 |
130 @property | 218 @property |
131 def platform_regex(self): | 219 def platform_regex(self): |
132 """Return the platform fragment of the URL""" | 220 """Return the platform fragment of the URL""" |
133 | 221 |
134 return PLATFORM_FRAGMENTS[self.platform]; | 222 return PLATFORM_FRAGMENTS[self.platform] |
135 | 223 |
136 | 224 @property |
137 @property | 225 def filename(self): |
138 def target(self): | 226 """Return the local filename of the build""" |
139 """Return the target file name of the build""" | 227 |
140 | 228 if self._filename is None: |
141 if self._target is None: | 229 if os.path.splitext(self.destination)[1]: |
142 self._target = os.path.join(self.directory, | 230 # If the filename has been given make use of it |
143 self.build_filename(self.binary)) | 231 target_file = self.destination |
144 return self._target | 232 else: |
145 | 233 # Otherwise create it from the build details |
234 target_file = os.path.join(self.destination, | |
235 self.build_filename(self.binary)) | |
236 | |
237 self._filename = os.path.abspath(target_file) | |
238 | |
239 return self._filename | |
240 | |
241 def get_build_info(self): | |
242 """Returns additional build information in subclasses if necessary""" | |
243 pass | |
146 | 244 |
147 def build_filename(self, binary): | 245 def build_filename(self, binary): |
148 """Return the proposed filename with extension for the binary""" | 246 """Return the proposed filename with extension for the binary""" |
149 | 247 |
150 raise NotImplementedError(sys._getframe(0).f_code.co_name) | 248 raise errors.NotImplementedError(sys._getframe(0).f_code.co_name) |
151 | |
152 | 249 |
153 def detect_platform(self): | 250 def detect_platform(self): |
154 """Detect the current platform""" | 251 """Detect the current platform""" |
155 | 252 |
156 # For Mac and Linux 32bit we do not need the bits appended | 253 # For Mac and Linux 32bit we do not need the bits appended |
157 if mozinfo.os == 'mac' or (mozinfo.os == 'linux' and mozinfo.bits == 32) : | 254 if mozinfo.os == 'mac' or \ |
255 (mozinfo.os == 'linux' and mozinfo.bits == 32): | |
158 return mozinfo.os | 256 return mozinfo.os |
159 else: | 257 else: |
160 return "%s%d" % (mozinfo.os, mozinfo.bits) | 258 return "%s%d" % (mozinfo.os, mozinfo.bits) |
161 | 259 |
162 | |
163 def download(self): | 260 def download(self): |
164 """Download the specified file""" | 261 """Download the specified file""" |
165 | 262 |
166 attempts = 0 | 263 def total_seconds(td): |
167 | 264 # Keep backward compatibility with Python 2.6 which doesn't have |
168 if not os.path.isdir(self.directory): | 265 # this method |
169 os.makedirs(self.directory) | 266 if hasattr(td, 'total_seconds'): |
267 return td.total_seconds() | |
268 else: | |
269 return (td.microseconds + | |
270 (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 10 ** 6 | |
271 | |
272 attempt = 0 | |
170 | 273 |
171 # Don't re-download the file | 274 # Don't re-download the file |
172 if os.path.isfile(os.path.abspath(self.target)): | 275 if os.path.isfile(os.path.abspath(self.filename)): |
173 print "File has already been downloaded: %s" % (self.target) | 276 self.logger.info("File has already been downloaded: %s" % |
174 return | 277 (self.filename)) |
175 | 278 return self.filename |
176 print 'Downloading from: %s' % (urllib.unquote(self.final_url)) | 279 |
177 tmp_file = self.target + ".part" | 280 directory = os.path.dirname(self.filename) |
178 | 281 if not os.path.isdir(directory): |
179 if self.authentication \ | 282 os.makedirs(directory) |
180 and self.authentication['username'] \ | 283 |
181 and self.authentication['password']: | 284 self.logger.info('Downloading from: %s' % |
182 password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() | 285 (urllib.unquote(self.url))) |
183 password_mgr.add_password(None, | 286 self.logger.info('Saving as: %s' % self.filename) |
184 self.final_url, | 287 |
185 self.authentication['username'], | 288 tmp_file = self.filename + ".part" |
186 self.authentication['password']) | |
187 handler = urllib2.HTTPBasicAuthHandler(password_mgr) | |
188 opener = urllib2.build_opener(urllib2.HTTPHandler, handler) | |
189 urllib2.install_opener(opener) | |
190 | 289 |
191 while True: | 290 while True: |
192 attempts += 1 | 291 attempt += 1 |
193 try: | 292 try: |
194 r = urllib2.urlopen(self.final_url) | 293 start_time = datetime.now() |
195 CHUNK = 16 * 1024 | 294 |
295 # Enable streaming mode so we can download content in chunks | |
296 r = self.session.get(self.url, stream=True) | |
297 r.raise_for_status() | |
298 | |
299 content_length = r.headers.get('Content-length') | |
300 # ValueError: Value out of range if only total_size given | |
301 if content_length: | |
302 total_size = int(content_length.strip()) | |
303 max_value = ((total_size / CHUNK_SIZE) + 1) * CHUNK_SIZE | |
304 | |
305 bytes_downloaded = 0 | |
306 | |
307 log_level = self.logger.getEffectiveLevel() | |
308 if log_level <= logging.INFO and content_length: | |
309 widgets = [pb.Percentage(), ' ', pb.Bar(), ' ', pb.ETA(), | |
310 ' ', pb.FileTransferSpeed()] | |
311 pbar = pb.ProgressBar(widgets=widgets, | |
312 maxval=max_value).start() | |
313 | |
196 with open(tmp_file, 'wb') as f: | 314 with open(tmp_file, 'wb') as f: |
197 for chunk in iter(lambda: r.read(CHUNK), ''): | 315 for chunk in iter(lambda: r.raw.read(CHUNK_SIZE), ''): |
198 f.write(chunk) | 316 f.write(chunk) |
317 bytes_downloaded += CHUNK_SIZE | |
318 | |
319 if log_level <= logging.INFO and content_length: | |
320 pbar.update(bytes_downloaded) | |
321 | |
322 t1 = total_seconds(datetime.now() - start_time) | |
323 if self.timeout_download and \ | |
324 t1 >= self.timeout_download: | |
325 raise errors.TimeoutError | |
326 | |
327 if log_level <= logging.INFO and content_length: | |
328 pbar.finish() | |
199 break | 329 break |
200 except (urllib2.HTTPError, urllib2.URLError): | 330 except (requests.exceptions.RequestException, errors.TimeoutError), e: |
201 if tmp_file and os.path.isfile(tmp_file): | 331 if tmp_file and os.path.isfile(tmp_file): |
202 os.remove(tmp_file) | 332 os.remove(tmp_file) |
203 print 'Download failed! Retrying... (attempt %s)' % attempts | 333 if self.retry_attempts > 0: |
204 if attempts >= self.retry_attempts: | 334 # Log only if multiple attempts are requested |
335 self.logger.warning('Download failed: "%s"' % str(e)) | |
336 self.logger.info('Will retry in %s seconds...' % | |
337 (self.retry_delay)) | |
338 time.sleep(self.retry_delay) | |
339 self.logger.info("Retrying... (attempt %s)" % attempt) | |
340 if attempt >= self.retry_attempts: | |
205 raise | 341 raise |
206 time.sleep(self.retry_delay) | 342 time.sleep(self.retry_delay) |
207 | 343 |
208 os.rename(tmp_file, self.target) | 344 os.rename(tmp_file, self.filename) |
345 | |
346 return self.filename | |
347 | |
348 def show_matching_builds(self, builds): | |
349 """Output the matching builds""" | |
350 self.logger.info('Found %s build%s: %s' % ( | |
351 len(builds), | |
352 len(builds) > 1 and 's' or '', | |
353 len(builds) > 10 and | |
354 ' ... '.join([', '.join(builds[:5]), ', '.join(builds[-5:])]) or | |
355 ', '.join(builds))) | |
209 | 356 |
210 | 357 |
211 class DailyScraper(Scraper): | 358 class DailyScraper(Scraper): |
212 """Class to download a daily build from the Mozilla server""" | 359 """Class to download a daily build from the Mozilla server""" |
213 | 360 |
214 def __init__(self, branch='mozilla-central', build_id=None, date=None, | 361 def __init__(self, branch='mozilla-central', build_id=None, date=None, |
215 build_number=None, *args, **kwargs): | 362 build_number=None, *args, **kwargs): |
216 | 363 |
364 self.branch = branch | |
365 self.build_id = build_id | |
366 self.date = date | |
367 self.build_number = build_number | |
368 | |
217 Scraper.__init__(self, *args, **kwargs) | 369 Scraper.__init__(self, *args, **kwargs) |
218 self.branch = branch | 370 |
371 def get_build_info(self): | |
372 """Defines additional build information""" | |
219 | 373 |
220 # Internally we access builds via index | 374 # Internally we access builds via index |
221 if build_number is not None: | 375 if self.build_number is not None: |
222 self.build_index = int(build_number) - 1 | 376 self.build_index = int(self.build_number) - 1 |
223 else: | 377 else: |
224 self.build_index = None | 378 self.build_index = None |
225 | 379 |
226 if build_id: | 380 if self.build_id: |
227 # A build id has been specified. Split up its components so the date | 381 # A build id has been specified. Split up its components so the |
228 # and time can be extracted: '20111212042025' -> '2011-12-12 04:20:2 5' | 382 # date and time can be extracted: |
229 self.date = datetime.strptime(build_id, '%Y%m%d%H%M%S') | 383 # '20111212042025' -> '2011-12-12 04:20:25' |
230 self.builds, self.build_index = self.get_build_info_for_date(self.da te, | 384 self.date = datetime.strptime(self.build_id, '%Y%m%d%H%M%S') |
231 has_tim e=True) | |
232 | 385 |
233 elif date: | 386 elif self.date: |
234 # A date (without time) has been specified. Use its value and the | 387 # A date (without time) has been specified. Use its value and the |
235 # build index to find the requested build for that day. | 388 # build index to find the requested build for that day. |
236 self.date = datetime.strptime(date, '%Y-%m-%d') | 389 try: |
237 self.builds, self.build_index = self.get_build_info_for_date(self.da te, | 390 self.date = datetime.strptime(self.date, '%Y-%m-%d') |
238 build_i ndex=self.build_index) | 391 except: |
239 | 392 raise ValueError('%s is not a valid date' % self.date) |
240 else: | 393 else: |
241 # If no build id nor date have been specified the lastest available | 394 # If no build id nor date have been specified the latest available |
242 # build of the given branch has to be identified. We also have to | 395 # build of the given branch has to be identified. We also have to |
243 # retrieve the date of the build via its build id. | 396 # retrieve the date of the build via its build id. |
244 url = '%s/nightly/latest-%s/' % (self.base_url, self.branch) | 397 self.date = self.get_latest_build_date() |
245 | 398 |
246 print 'Retrieving the build status file from %s' % url | 399 self.builds, self.build_index = self.get_build_info_for_date( |
247 parser = DirectoryParser(url) | 400 self.date, self.build_index) |
248 parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex) | |
249 if not parser.entries: | |
250 message = 'Status file for %s build cannot be found' % self.plat form_regex | |
251 raise NotFoundException(message, url) | |
252 | 401 |
253 # Read status file for the platform, retrieve build id, and convert to a date | 402 def get_latest_build_date(self): |
254 status_file = url + parser.entries[-1] | 403 """ Returns date of latest available nightly build.""" |
255 f = urllib.urlopen(status_file) | 404 if self.application not in ('fennec'): |
256 self.date = datetime.strptime(f.readline().strip(), '%Y%m%d%H%M%S') | 405 url = urljoin(self.base_url, 'nightly', 'latest-%s/' % self.branch) |
257 self.builds, self.build_index = self.get_build_info_for_date(self.da te, | 406 else: |
258 has_tim e=True) | 407 url = urljoin(self.base_url, 'nightly', 'latest-%s-%s/' % |
408 (self.branch, self.platform)) | |
259 | 409 |
410 self.logger.info('Retrieving the build status file from %s' % url) | |
411 parser = self._create_directory_parser(url) | |
412 parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex) | |
413 if not parser.entries: | |
414 message = 'Status file for %s build cannot be found' % \ | |
415 self.platform_regex | |
416 raise errors.NotFoundError(message, url) | |
260 | 417 |
261 def get_build_info_for_date(self, date, has_time=False, build_index=None): | 418 # Read status file for the platform, retrieve build id, |
262 url = '/'.join([self.base_url, self.monthly_build_list_regex]) | 419 # and convert to a date |
420 headers = {'Cache-Control': 'max-age=0'} | |
263 | 421 |
264 print 'Retrieving list of builds from %s' % url | 422 r = self.session.get(url + parser.entries[-1], headers=headers) |
265 parser = DirectoryParser(url) | 423 try: |
266 regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s$' % { | 424 r.raise_for_status() |
267 'DATE': date.strftime('%Y-%m-%d'), | 425 |
268 'BRANCH': self.branch, | 426 return datetime.strptime(r.text.split('\n')[0], '%Y%m%d%H%M%S') |
269 'L10N': '' if self.locale == 'en-US' else '-l10n'} | 427 finally: |
428 r.close() | |
429 | |
430 def is_build_dir(self, folder_name): | |
431 """Return whether or not the given dir contains a build.""" | |
432 | |
433 # Cannot move up to base scraper due to parser.entries call in | |
434 # get_build_info_for_date (see below) | |
435 | |
436 url = '%s/' % urljoin(self.base_url, self.monthly_build_list_regex, fold er_name) | |
437 if self.application in APPLICATIONS_MULTI_LOCALE \ | |
438 and self.locale != 'multi': | |
439 url = '%s/' % urljoin(url, self.locale) | |
440 | |
441 parser = self._create_directory_parser(url) | |
442 | |
443 pattern = re.compile(self.binary_regex, re.IGNORECASE) | |
444 for entry in parser.entries: | |
445 try: | |
446 pattern.match(entry).group() | |
447 return True | |
448 except: | |
449 # No match, continue with next entry | |
450 continue | |
451 return False | |
452 | |
453 def get_build_info_for_date(self, date, build_index=None): | |
454 url = urljoin(self.base_url, self.monthly_build_list_regex) | |
455 has_time = date and date.time() | |
456 | |
457 self.logger.info('Retrieving list of builds from %s' % url) | |
458 parser = self._create_directory_parser(url) | |
459 regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s%(PLATFORM)s$' % { | |
460 'DATE': date.strftime('%Y-%m-%d'), | |
461 'BRANCH': self.branch, | |
462 # ensure to select the correct subfolder for localized builds | |
463 'L10N': '' if self.locale in ('en-US', 'multi') else '(-l10n)?', | |
464 'PLATFORM': '' if self.application not in ( | |
465 'fennec') else '-' + self.platform | |
466 } | |
467 | |
270 parser.entries = parser.filter(regex) | 468 parser.entries = parser.filter(regex) |
271 if not parser.entries: | 469 parser.entries = parser.filter(self.is_build_dir) |
272 message = 'Folder for builds on %s has not been found' % self.date.s trftime('%Y-%m-%d') | |
273 raise NotFoundException(message, url) | |
274 | 470 |
275 if has_time: | 471 if has_time: |
276 # If a time is included in the date, use it to determine the build's index | 472 # If a time is included in the date, use it to determine the |
473 # build's index | |
277 regex = r'.*%s.*' % date.strftime('%H-%M-%S') | 474 regex = r'.*%s.*' % date.strftime('%H-%M-%S') |
278 build_index = parser.entries.index(parser.filter(regex)[0]) | 475 parser.entries = parser.filter(regex) |
279 else: | 476 |
280 # If no index has been given, set it to the last build of the day. | 477 if not parser.entries: |
281 if build_index is None: | 478 date_format = '%Y-%m-%d-%H-%M-%S' if has_time else '%Y-%m-%d' |
282 build_index = len(parser.entries) - 1 | 479 message = 'Folder for builds on %s has not been found' % \ |
480 self.date.strftime(date_format) | |
481 raise errors.NotFoundError(message, url) | |
482 | |
483 # If no index has been given, set it to the last build of the day. | |
484 self.show_matching_builds(parser.entries) | |
485 # If no index has been given, set it to the last build of the day. | |
486 if build_index is None: | |
487 # Find the most recent non-empty entry. | |
488 build_index = len(parser.entries) | |
489 for build in reversed(parser.entries): | |
490 build_index -= 1 | |
491 if not build_index or self.is_build_dir(build): | |
492 break | |
493 self.logger.info('Selected build: %s' % parser.entries[build_index]) | |
283 | 494 |
284 return (parser.entries, build_index) | 495 return (parser.entries, build_index) |
285 | 496 |
286 | |
287 @property | 497 @property |
288 def binary_regex(self): | 498 def binary_regex(self): |
289 """Return the regex for the binary""" | 499 """Return the regex for the binary""" |
290 | 500 |
291 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' | 501 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' |
292 regex_suffix = {'linux': r'\.%(EXT)s$', | 502 regex_suffix = {'android-api-9': r'\.%(EXT)s$', |
503 'android-api-11': r'\.%(EXT)s$', | |
504 'android-x86': r'\.%(EXT)s$', | |
505 'linux': r'\.%(EXT)s$', | |
293 'linux64': r'\.%(EXT)s$', | 506 'linux64': r'\.%(EXT)s$', |
294 'mac': r'\.%(EXT)s$', | 507 'mac': r'\.%(EXT)s$', |
295 'mac64': r'\.%(EXT)s$', | 508 'mac64': r'\.%(EXT)s$', |
296 'win32': r'(\.installer)\.%(EXT)s$', | 509 'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$', |
297 'win64': r'(\.installer)\.%(EXT)s$'} | 510 'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'} |
298 regex = regex_base_name + regex_suffix[self.platform] | 511 regex = regex_base_name + regex_suffix[self.platform] |
299 | 512 |
300 return regex % {'APP': self.application, | 513 return regex % {'APP': self.application, |
301 'LOCALE': self.locale, | 514 'LOCALE': self.locale, |
302 'PLATFORM': self.platform_regex, | 515 'PLATFORM': self.platform_regex, |
303 'EXT': self.extension} | 516 'EXT': self.extension, |
304 | 517 'STUB': '-stub' if self.is_stub_installer else ''} |
305 | 518 |
306 def build_filename(self, binary): | 519 def build_filename(self, binary): |
307 """Return the proposed filename with extension for the binary""" | 520 """Return the proposed filename with extension for the binary""" |
308 | 521 |
309 try: | 522 try: |
310 # Get exact timestamp of the build to build the local file name | 523 # Get exact timestamp of the build to build the local file name |
311 folder = self.builds[self.build_index] | 524 folder = self.builds[self.build_index] |
312 timestamp = re.search('([\d\-]+)-\D.*', folder).group(1) | 525 timestamp = re.search('([\d\-]+)-\D.*', folder).group(1) |
313 except: | 526 except: |
314 # If it's not available use the build's date | 527 # If it's not available use the build's date |
315 timestamp = self.date.strftime('%Y-%m-%d') | 528 timestamp = self.date.strftime('%Y-%m-%d') |
316 | 529 |
317 return '%(TIMESTAMP)s-%(BRANCH)s-%(NAME)s' % { | 530 return '%(TIMESTAMP)s-%(BRANCH)s-%(NAME)s' % { |
318 'TIMESTAMP': timestamp, | 531 'TIMESTAMP': timestamp, |
319 'BRANCH': self.branch, | 532 'BRANCH': self.branch, |
320 'NAME': binary} | 533 'NAME': binary} |
321 | |
322 | 534 |
323 @property | 535 @property |
324 def monthly_build_list_regex(self): | 536 def monthly_build_list_regex(self): |
325 """Return the regex for the folder which contains the builds of a month. """ | 537 """Return the regex for the folder containing builds of a month.""" |
326 | 538 |
327 # Regex for possible builds for the given date | 539 # Regex for possible builds for the given date |
328 return r'nightly/%(YEAR)s/%(MONTH)s/' % { | 540 return r'nightly/%(YEAR)s/%(MONTH)s/' % { |
329 'YEAR': self.date.year, | 541 'YEAR': self.date.year, |
330 'MONTH': str(self.date.month).zfill(2) } | 542 'MONTH': str(self.date.month).zfill(2)} |
331 | |
332 | 543 |
333 @property | 544 @property |
334 def path_regex(self): | 545 def path_regex(self): |
335 """Return the regex for the path""" | 546 """Return the regex for the path to the build folder""" |
336 | 547 |
337 try: | 548 try: |
338 return self.monthly_build_list_regex + self.builds[self.build_index] | 549 path = '%s/' % urljoin(self.monthly_build_list_regex, |
550 self.builds[self.build_index]) | |
551 if self.application in APPLICATIONS_MULTI_LOCALE \ | |
552 and self.locale != 'multi': | |
553 path = '%s/' % urljoin(path, self.locale) | |
554 return path | |
339 except: | 555 except: |
340 raise NotFoundException("Specified sub folder cannot be found", | 556 folder = urljoin(self.base_url, self.monthly_build_list_regex) |
341 self.base_url + self.monthly_build_list_rege x) | 557 raise errors.NotFoundError("Specified sub folder cannot be found", |
558 folder) | |
342 | 559 |
343 | 560 |
344 class DirectScraper(Scraper): | 561 class DirectScraper(Scraper): |
345 """Class to download a file from a specified URL""" | 562 """Class to download a file from a specified URL""" |
346 | 563 |
347 def __init__(self, url, *args, **kwargs): | 564 def __init__(self, url, *args, **kwargs): |
565 self._url = url | |
566 | |
348 Scraper.__init__(self, *args, **kwargs) | 567 Scraper.__init__(self, *args, **kwargs) |
349 | 568 |
350 self.url = url | 569 @property |
570 def filename(self): | |
571 if os.path.splitext(self.destination)[1]: | |
572 # If the filename has been given make use of it | |
573 target_file = self.destination | |
574 else: | |
575 # Otherwise determine it from the url. | |
576 parsed_url = urlparse(self.url) | |
577 source_filename = (parsed_url.path.rpartition('/')[-1] or | |
578 parsed_url.hostname) | |
579 target_file = os.path.join(self.destination, source_filename) | |
580 | |
581 return os.path.abspath(target_file) | |
351 | 582 |
352 @property | 583 @property |
353 def target(self): | 584 def url(self): |
354 return urllib.splitquery(self.final_url)[0].rpartition('/')[-1] | 585 return self._url |
355 | |
356 @property | |
357 def final_url(self): | |
358 return self.url | |
359 | 586 |
360 | 587 |
361 class ReleaseScraper(Scraper): | 588 class ReleaseScraper(Scraper): |
362 """Class to download a release build from the Mozilla server""" | 589 """Class to download a release build from the Mozilla server""" |
363 | 590 |
364 def __init__(self, *args, **kwargs): | 591 def __init__(self, version, *args, **kwargs): |
592 self.version = version | |
593 | |
365 Scraper.__init__(self, *args, **kwargs) | 594 Scraper.__init__(self, *args, **kwargs) |
366 | 595 |
367 @property | 596 @property |
368 def binary_regex(self): | 597 def binary_regex(self): |
369 """Return the regex for the binary""" | 598 """Return the regex for the binary""" |
370 | 599 |
371 regex = {'linux': r'^%(APP)s-.*\.%(EXT)s$', | 600 regex = {'linux': r'^%(APP)s-.*\.%(EXT)s$', |
372 'linux64': r'^%(APP)s-.*\.%(EXT)s$', | 601 'linux64': r'^%(APP)s-.*\.%(EXT)s$', |
373 'mac': r'^%(APP)s.*\.%(EXT)s$', | 602 'mac': r'^%(APP)s.*\.%(EXT)s$', |
374 'mac64': r'^%(APP)s.*\.%(EXT)s$', | 603 'mac64': r'^%(APP)s.*\.%(EXT)s$', |
375 'win32': r'^%(APP)s.*\.%(EXT)s$', | 604 'win32': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$', |
376 'win64': r'^%(APP)s.*\.%(EXT)s$'} | 605 'win64': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$'} |
377 return regex[self.platform] % {'APP': self.application, | 606 return regex[self.platform] % { |
378 'EXT': self.extension} | 607 'APP': self.application, |
379 | 608 'EXT': self.extension, |
609 'STUB': 'Stub' if self.is_stub_installer else ''} | |
380 | 610 |
381 @property | 611 @property |
382 def path_regex(self): | 612 def path_regex(self): |
383 """Return the regex for the path""" | 613 """Return the regex for the path to the build folder""" |
384 | 614 |
385 regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s' | 615 regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s/' |
386 return regex % {'LOCALE': self.locale, | 616 return regex % {'LOCALE': self.locale, |
387 'PLATFORM': self.platform_regex, | 617 'PLATFORM': self.platform_regex, |
388 'VERSION': self.version} | 618 'VERSION': self.version} |
389 | 619 |
620 @property | |
621 def platform_regex(self): | |
622 """Return the platform fragment of the URL""" | |
623 | |
624 if self.platform == 'win64': | |
625 return self.platform | |
626 | |
627 return PLATFORM_FRAGMENTS[self.platform] | |
390 | 628 |
391 def build_filename(self, binary): | 629 def build_filename(self, binary): |
392 """Return the proposed filename with extension for the binary""" | 630 """Return the proposed filename with extension for the binary""" |
393 | 631 |
394 template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s.%(EXT)s' | 632 template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s%(STUB)s' \ |
633 '.%(EXT)s' | |
395 return template % {'APP': self.application, | 634 return template % {'APP': self.application, |
396 'VERSION': self.version, | 635 'VERSION': self.version, |
397 'LOCALE': self.locale, | 636 'LOCALE': self.locale, |
398 'PLATFORM': self.platform, | 637 'PLATFORM': self.platform, |
638 'STUB': '-stub' if self.is_stub_installer else '', | |
399 'EXT': self.extension} | 639 'EXT': self.extension} |
400 | 640 |
401 | 641 |
402 class ReleaseCandidateScraper(ReleaseScraper): | 642 class ReleaseCandidateScraper(ReleaseScraper): |
403 """Class to download a release candidate build from the Mozilla server""" | 643 """Class to download a release candidate build from the Mozilla server""" |
404 | 644 |
405 def __init__(self, build_number=None, no_unsigned=False, *args, **kwargs): | 645 def __init__(self, version, build_number=None, *args, **kwargs): |
646 self.version = version | |
647 self.build_number = build_number | |
648 | |
406 Scraper.__init__(self, *args, **kwargs) | 649 Scraper.__init__(self, *args, **kwargs) |
407 | 650 |
651 def get_build_info(self): | |
652 """Defines additional build information""" | |
653 | |
408 # Internally we access builds via index | 654 # Internally we access builds via index |
409 if build_number is not None: | 655 url = urljoin(self.base_url, self.candidate_build_list_regex) |
410 self.build_index = int(build_number) - 1 | 656 self.logger.info('Retrieving list of candidate builds from %s' % url) |
657 | |
658 parser = self._create_directory_parser(url) | |
659 if not parser.entries: | |
660 message = 'Folder for specific candidate builds at %s has not' \ | |
661 'been found' % url | |
662 raise errors.NotFoundError(message, url) | |
663 | |
664 self.show_matching_builds(parser.entries) | |
665 self.builds = parser.entries | |
666 self.build_index = len(parser.entries) - 1 | |
667 | |
668 if self.build_number and \ | |
669 ('build%s' % self.build_number) in self.builds: | |
670 self.builds = ['build%s' % self.build_number] | |
671 self.build_index = 0 | |
672 self.logger.info('Selected build: build%s' % self.build_number) | |
411 else: | 673 else: |
412 self.build_index = None | 674 self.logger.info('Selected build: build%d' % |
413 | 675 (self.build_index + 1)) |
414 self.builds, self.build_index = self.get_build_info_for_version(self.ver sion, self.build_index) | |
415 | |
416 self.no_unsigned = no_unsigned | |
417 self.unsigned = False | |
418 | |
419 | |
420 def get_build_info_for_version(self, version, build_index=None): | |
421 url = '/'.join([self.base_url, self.candidate_build_list_regex]) | |
422 | |
423 print 'Retrieving list of candidate builds from %s' % url | |
424 parser = DirectoryParser(url) | |
425 if not parser.entries: | |
426 message = 'Folder for specific candidate builds at has not been foun d' | |
427 raise NotFoundException(message, url) | |
428 | |
429 # If no index has been given, set it to the last build of the given vers ion. | |
430 if build_index is None: | |
431 build_index = len(parser.entries) - 1 | |
432 | |
433 return (parser.entries, build_index) | |
434 | |
435 | 676 |
436 @property | 677 @property |
437 def candidate_build_list_regex(self): | 678 def candidate_build_list_regex(self): |
438 """Return the regex for the folder which contains the builds of | 679 """Return the regex for the folder which contains the builds of |
439 a candidate build.""" | 680 a candidate build.""" |
440 | 681 |
441 # Regex for possible builds for the given date | 682 # Regex for possible builds for the given date |
442 return r'nightly/%(VERSION)s-candidates/' % { | 683 return r'candidates/%(VERSION)s-candidates/' % { |
443 'VERSION': self.version } | 684 'VERSION': self.version} |
444 | |
445 | 685 |
446 @property | 686 @property |
447 def path_regex(self): | 687 def path_regex(self): |
448 """Return the regex for the path""" | 688 """Return the regex for the path to the build folder""" |
449 | 689 |
450 regex = r'%(PREFIX)s%(BUILD)s/%(UNSIGNED)s%(PLATFORM)s/%(LOCALE)s' | 690 regex = r'%(PREFIX)s%(BUILD)s/%(PLATFORM)s/%(LOCALE)s/' |
451 return regex % {'PREFIX': self.candidate_build_list_regex, | 691 return regex % {'PREFIX': self.candidate_build_list_regex, |
452 'BUILD': self.builds[self.build_index], | 692 'BUILD': self.builds[self.build_index], |
453 'LOCALE': self.locale, | 693 'LOCALE': self.locale, |
454 'PLATFORM': self.platform_regex, | 694 'PLATFORM': self.platform_regex} |
455 'UNSIGNED': "unsigned/" if self.unsigned else ""} | |
456 | 695 |
696 @property | |
697 def platform_regex(self): | |
698 """Return the platform fragment of the URL""" | |
699 | |
700 if self.platform == 'win64': | |
701 return self.platform | |
702 | |
703 return PLATFORM_FRAGMENTS[self.platform] | |
457 | 704 |
458 def build_filename(self, binary): | 705 def build_filename(self, binary): |
459 """Return the proposed filename with extension for the binary""" | 706 """Return the proposed filename with extension for the binary""" |
460 | 707 |
461 template = '%(APP)s-%(VERSION)s-build%(BUILD)s.%(LOCALE)s.%(PLATFORM)s.% (EXT)s' | 708 template = '%(APP)s-%(VERSION)s-%(BUILD)s.%(LOCALE)s.' \ |
709 '%(PLATFORM)s%(STUB)s.%(EXT)s' | |
462 return template % {'APP': self.application, | 710 return template % {'APP': self.application, |
463 'VERSION': self.version, | 711 'VERSION': self.version, |
464 'BUILD': self.builds[self.build_index], | 712 'BUILD': self.builds[self.build_index], |
465 'LOCALE': self.locale, | 713 'LOCALE': self.locale, |
466 'PLATFORM': self.platform, | 714 'PLATFORM': self.platform, |
715 'STUB': '-stub' if self.is_stub_installer else '', | |
467 'EXT': self.extension} | 716 'EXT': self.extension} |
468 | 717 |
469 | |
470 def download(self): | 718 def download(self): |
471 """Download the specified file""" | 719 """Download the specified file""" |
472 | 720 |
473 try: | 721 try: |
474 # Try to download the signed candidate build | 722 # Try to download the signed candidate build |
475 Scraper.download(self) | 723 Scraper.download(self) |
476 except NotFoundException, e: | 724 except errors.NotFoundError, e: |
477 print str(e) | 725 self.logger.exception(str(e)) |
478 | |
479 # If the signed build cannot be downloaded and unsigned builds are | |
480 # allowed, try to download the unsigned build instead | |
481 if self.no_unsigned: | |
482 raise | |
483 else: | |
484 print "Signed build has not been found. Falling back to unsigned build." | |
485 self.unsigned = True | |
486 Scraper.download(self) | |
487 | 726 |
488 | 727 |
489 class TinderboxScraper(Scraper): | 728 class TinderboxScraper(Scraper): |
490 """Class to download a tinderbox build from the Mozilla server. | 729 """Class to download a tinderbox build from the Mozilla server. |
491 | 730 |
492 There are two ways to specify a unique build: | 731 There are two ways to specify a unique build: |
493 1. If the date (%Y-%m-%d) is given and build_number is given where | 732 1. If the date (%Y-%m-%d) is given and build_number is given where |
494 the build_number is the index of the build on the date | 733 the build_number is the index of the build on the date |
495 2. If the build timestamp (UNIX) is given, and matches a specific build. | 734 2. If the build timestamp (UNIX) is given, and matches a specific build. |
496 """ | 735 """ |
497 | 736 |
498 def __init__(self, branch='mozilla-central', build_number=None, date=None, | 737 def __init__(self, branch='mozilla-central', build_number=None, date=None, |
499 debug_build=False, *args, **kwargs): | 738 debug_build=False, *args, **kwargs): |
739 | |
740 self.branch = branch | |
741 self.build_number = build_number | |
742 self.debug_build = debug_build | |
743 self.date = date | |
744 | |
745 self.timestamp = None | |
746 # Currently any time in RelEng is based on the Pacific time zone. | |
747 self.timezone = PacificTimezone() | |
748 | |
500 Scraper.__init__(self, *args, **kwargs) | 749 Scraper.__init__(self, *args, **kwargs) |
501 | 750 |
502 self.branch = branch | 751 def get_build_info(self): |
503 self.debug_build = debug_build | 752 "Defines additional build information" |
504 self.locale_build = self.locale != 'en-US' | |
505 self.timestamp = None | |
506 | |
507 # Currently any time in RelEng is based on the Pacific time zone. | |
508 self.timezone = PacificTimezone(); | |
509 | 753 |
510 # Internally we access builds via index | 754 # Internally we access builds via index |
511 if build_number is not None: | 755 if self.build_number is not None: |
512 self.build_index = int(build_number) - 1 | 756 self.build_index = int(self.build_number) - 1 |
513 else: | 757 else: |
514 self.build_index = None | 758 self.build_index = None |
515 | 759 |
516 if date is not None: | 760 if self.date is not None: |
517 try: | 761 try: |
518 self.date = datetime.fromtimestamp(float(date), self.timezone) | 762 # date is provided in the format 2013-07-23 |
519 self.timestamp = date | 763 self.date = datetime.strptime(self.date, '%Y-%m-%d') |
520 except: | 764 except: |
521 self.date = datetime.strptime(date, '%Y-%m-%d') | 765 try: |
522 else: | 766 # date is provided as a unix timestamp |
523 self.date = None | 767 datetime.fromtimestamp(float(self.date)) |
768 self.timestamp = self.date | |
769 except: | |
770 raise ValueError('%s is not a valid date' % self.date) | |
524 | 771 |
772 self.locale_build = self.locale != 'en-US' | |
525 # For localized builds we do not have to retrieve the list of builds | 773 # For localized builds we do not have to retrieve the list of builds |
526 # because only the last build is available | 774 # because only the last build is available |
527 if not self.locale_build: | 775 if not self.locale_build: |
528 self.builds, self.build_index = self.get_build_info(self.build_index ) | 776 self.builds, self.build_index = self.get_build_info_for_index( |
529 | 777 self.build_index) |
530 try: | |
531 self.timestamp = self.builds[self.build_index] | |
532 except: | |
533 raise NotFoundException("Specified sub folder cannot be found", | |
534 self.base_url + self.monthly_build_list_ regex) | |
535 | |
536 | 778 |
537 @property | 779 @property |
538 def binary_regex(self): | 780 def binary_regex(self): |
539 """Return the regex for the binary""" | 781 """Return the regex for the binary""" |
540 | 782 |
541 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.' | 783 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' |
542 regex_suffix = {'linux': r'.*\.%(EXT)s$', | 784 regex_suffix = {'linux': r'.*\.%(EXT)s$', |
543 'linux64': r'.*\.%(EXT)s$', | 785 'linux64': r'.*\.%(EXT)s$', |
544 'mac': r'.*\.%(EXT)s$', | 786 'mac': r'.*\.%(EXT)s$', |
545 'mac64': r'.*\.%(EXT)s$', | 787 'mac64': r'.*\.%(EXT)s$', |
546 'win32': r'.*(\.installer)\.%(EXT)s$', | 788 'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$', |
547 'win64': r'.*(\.installer)\.%(EXT)s$'} | 789 'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'} |
548 | 790 |
549 regex = regex_base_name + regex_suffix[self.platform] | 791 regex = regex_base_name + regex_suffix[self.platform] |
550 | 792 |
551 return regex % {'APP': self.application, | 793 return regex % {'APP': self.application, |
552 'LOCALE': self.locale, | 794 'LOCALE': self.locale, |
795 'PLATFORM': PLATFORM_FRAGMENTS[self.platform], | |
796 'STUB': '-stub' if self.is_stub_installer else '', | |
553 'EXT': self.extension} | 797 'EXT': self.extension} |
554 | 798 |
555 | |
556 def build_filename(self, binary): | 799 def build_filename(self, binary): |
557 """Return the proposed filename with extension for the binary""" | 800 """Return the proposed filename with extension for the binary""" |
558 | 801 |
559 return '%(TIMESTAMP)s%(BRANCH)s%(DEBUG)s-%(NAME)s' % { | 802 return '%(TIMESTAMP)s%(BRANCH)s%(DEBUG)s-%(NAME)s' % { |
560 'TIMESTAMP': self.timestamp + '-' if self.timestamp else '', | 803 'TIMESTAMP': self.timestamp + '-' if self.timestamp else '', |
561 'BRANCH': self.branch, | 804 'BRANCH': self.branch, |
562 'DEBUG': '-debug' if self.debug_build else '', | 805 'DEBUG': '-debug' if self.debug_build else '', |
563 'NAME': binary} | 806 'NAME': binary} |
564 | |
565 | 807 |
566 @property | 808 @property |
567 def build_list_regex(self): | 809 def build_list_regex(self): |
568 """Return the regex for the folder which contains the list of builds""" | 810 """Return the regex for the folder which contains the list of builds""" |
569 | 811 |
570 regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s' | 812 regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s/' |
571 | 813 |
572 return regex % {'BRANCH': self.branch, | 814 return regex % { |
573 'PLATFORM': '' if self.locale_build else self.platform_r egex, | 815 'BRANCH': self.branch, |
574 'L10N': 'l10n' if self.locale_build else '', | 816 'PLATFORM': '' if self.locale_build else self.platform_regex, |
575 'DEBUG': '-debug' if self.debug_build else ''} | 817 'L10N': 'l10n' if self.locale_build else '', |
576 | 818 'DEBUG': '-debug' if self.debug_build else ''} |
577 | 819 |
578 def date_matches(self, timestamp): | 820 def date_matches(self, timestamp): |
579 """Determines whether the timestamp date is equal to the argument date"" " | 821 """ |
822 Determines whether the timestamp date is equal to the argument date | |
823 """ | |
580 | 824 |
581 if self.date is None: | 825 if self.date is None: |
582 return False | 826 return False |
583 | 827 |
584 timestamp = datetime.fromtimestamp(float(timestamp), self.timezone) | 828 timestamp = datetime.fromtimestamp(float(timestamp), self.timezone) |
585 if self.date.date() == timestamp.date(): | 829 if self.date.date() == timestamp.date(): |
586 return True | 830 return True |
587 | 831 |
588 return False | 832 return False |
589 | 833 |
590 | |
591 @property | |
592 def date_validation_regex(self): | |
593 """Return the regex for a valid date argument value""" | |
594 | |
595 return r'^\d{4}-\d{1,2}-\d{1,2}$|^\d+$' | |
596 | |
597 | |
598 def detect_platform(self): | 834 def detect_platform(self): |
599 """Detect the current platform""" | 835 """Detect the current platform""" |
600 | 836 |
601 platform = Scraper.detect_platform(self) | 837 platform = Scraper.detect_platform(self) |
602 | 838 |
603 # On OS X we have to special case the platform detection code and fallba ck | 839 # On OS X we have to special case the platform detection code and |
604 # to 64 bit builds for the en-US locale | 840 # fallback to 64 bit builds for the en-US locale |
605 if mozinfo.os == 'mac' and self.locale == 'en-US' and mozinfo.bits == 64 : | 841 if mozinfo.os == 'mac' and self.locale == 'en-US' and \ |
842 mozinfo.bits == 64: | |
606 platform = "%s%d" % (mozinfo.os, mozinfo.bits) | 843 platform = "%s%d" % (mozinfo.os, mozinfo.bits) |
607 | 844 |
608 return platform | 845 return platform |
609 | 846 |
847 def is_build_dir(self, folder_name): | |
848 """Return whether or not the given dir contains a build.""" | |
610 | 849 |
611 def get_build_info(self, build_index=None): | 850 # Cannot move up to base scraper due to parser.entries call in |
612 url = '/'.join([self.base_url, self.build_list_regex]) | 851 # get_build_info_for_index (see below) |
852 url = '%s/' % urljoin(self.base_url, self.build_list_regex, folder_name) | |
613 | 853 |
614 print 'Retrieving list of builds from %s' % url | 854 if self.application in APPLICATIONS_MULTI_LOCALE \ |
855 and self.locale != 'multi': | |
856 url = '%s/' % urljoin(url, self.locale) | |
615 | 857 |
616 # If a timestamp is given, retrieve just that build | 858 parser = self._create_directory_parser(url) |
617 regex = '^' + self.timestamp + '$' if self.timestamp else r'^\d+$' | |
618 | 859 |
619 parser = DirectoryParser(url) | 860 pattern = re.compile(self.binary_regex, re.IGNORECASE) |
620 parser.entries = parser.filter(regex) | 861 for entry in parser.entries: |
862 try: | |
863 pattern.match(entry).group() | |
864 return True | |
865 except: | |
866 # No match, continue with next entry | |
867 continue | |
868 return False | |
621 | 869 |
622 # If date is given, retrieve the subset of builds on that date | 870 def get_build_info_for_index(self, build_index=None): |
623 if self.date is not None: | 871 url = urljoin(self.base_url, self.build_list_regex) |
872 | |
873 self.logger.info('Retrieving list of builds from %s' % url) | |
874 parser = self._create_directory_parser(url) | |
875 parser.entries = parser.filter(r'^\d+$') | |
876 | |
877 if self.timestamp: | |
878 # If a timestamp is given, retrieve the folder with the timestamp | |
879 # as name | |
880 parser.entries = self.timestamp in parser.entries and \ | |
881 [self.timestamp] | |
882 | |
883 elif self.date: | |
884 # If date is given, retrieve the subset of builds on that date | |
624 parser.entries = filter(self.date_matches, parser.entries) | 885 parser.entries = filter(self.date_matches, parser.entries) |
625 | 886 |
626 if not parser.entries: | 887 if not parser.entries: |
627 message = 'No builds have been found' | 888 message = 'No builds have been found' |
628 raise NotFoundException(message, url) | 889 raise errors.NotFoundError(message, url) |
890 | |
891 self.show_matching_builds(parser.entries) | |
629 | 892 |
630 # If no index has been given, set it to the last build of the day. | 893 # If no index has been given, set it to the last build of the day. |
631 if build_index is None: | 894 if build_index is None: |
632 build_index = len(parser.entries) - 1 | 895 # Find the most recent non-empty entry. |
896 build_index = len(parser.entries) | |
897 for build in reversed(parser.entries): | |
898 build_index -= 1 | |
899 if not build_index or self.is_build_dir(build): | |
900 break | |
901 | |
902 self.logger.info('Selected build: %s' % parser.entries[build_index]) | |
633 | 903 |
634 return (parser.entries, build_index) | 904 return (parser.entries, build_index) |
635 | 905 |
636 | |
637 @property | 906 @property |
638 def path_regex(self): | 907 def path_regex(self): |
639 """Return the regex for the path""" | 908 """Return the regex for the path to the build folder""" |
640 | 909 |
641 if self.locale_build: | 910 if self.locale_build: |
642 return self.build_list_regex | 911 return self.build_list_regex |
643 | 912 |
644 return '/'.join([self.build_list_regex, self.builds[self.build_index]]) | 913 return '%s/' % urljoin(self.build_list_regex, self.builds[self.build_ind ex]) |
645 | |
646 | 914 |
647 @property | 915 @property |
648 def platform_regex(self): | 916 def platform_regex(self): |
649 """Return the platform fragment of the URL""" | 917 """Return the platform fragment of the URL""" |
650 | 918 |
651 PLATFORM_FRAGMENTS = {'linux': 'linux', | 919 PLATFORM_FRAGMENTS = {'linux': 'linux', |
652 'linux64': 'linux64', | 920 'linux64': 'linux64', |
653 'mac': 'macosx', | 921 'mac': 'macosx64', |
654 'mac64': 'macosx64', | 922 'mac64': 'macosx64', |
655 'win32': 'win32', | 923 'win32': 'win32', |
656 'win64': 'win64'} | 924 'win64': 'win64'} |
657 | 925 |
658 return PLATFORM_FRAGMENTS[self.platform] | 926 return PLATFORM_FRAGMENTS[self.platform] |
659 | 927 |
660 | 928 |
661 def cli(): | 929 class TryScraper(Scraper): |
662 """Main function for the downloader""" | 930 "Class to download a try build from the Mozilla server." |
663 | 931 |
664 BUILD_TYPES = {'release': ReleaseScraper, | 932 def __init__(self, changeset=None, debug_build=False, *args, **kwargs): |
665 'candidate': ReleaseCandidateScraper, | |
666 'daily': DailyScraper, | |
667 'tinderbox': TinderboxScraper } | |
668 | 933 |
669 usage = 'usage: %prog [options]' | 934 self.debug_build = debug_build |
670 parser = OptionParser(usage=usage, description=__doc__) | 935 self.changeset = changeset |
671 parser.add_option('--application', '-a', | |
672 dest='application', | |
673 choices=APPLICATIONS, | |
674 default='firefox', | |
675 metavar='APPLICATION', | |
676 help='The name of the application to download, ' | |
677 'default: "%default"') | |
678 parser.add_option('--directory', '-d', | |
679 dest='directory', | |
680 default=os.getcwd(), | |
681 metavar='DIRECTORY', | |
682 help='Target directory for the download, default: ' | |
683 'current working directory') | |
684 parser.add_option('--build-number', | |
685 dest='build_number', | |
686 default=None, | |
687 type="int", | |
688 metavar='BUILD_NUMBER', | |
689 help='Number of the build (for candidate, daily, ' | |
690 'and tinderbox builds)') | |
691 parser.add_option('--locale', '-l', | |
692 dest='locale', | |
693 default='en-US', | |
694 metavar='LOCALE', | |
695 help='Locale of the application, default: "%default"') | |
696 parser.add_option('--platform', '-p', | |
697 dest='platform', | |
698 choices=PLATFORM_FRAGMENTS.keys(), | |
699 metavar='PLATFORM', | |
700 help='Platform of the application') | |
701 parser.add_option('--type', '-t', | |
702 dest='type', | |
703 choices=BUILD_TYPES.keys(), | |
704 default='release', | |
705 metavar='BUILD_TYPE', | |
706 help='Type of build to download, default: "%default"') | |
707 parser.add_option('--url', | |
708 dest='url', | |
709 default=None, | |
710 metavar='URL', | |
711 help='URL to download.') | |
712 parser.add_option('--version', '-v', | |
713 dest='version', | |
714 metavar='VERSION', | |
715 help='Version of the application to be used by release and \ | |
716 candidate builds, i.e. "3.6"') | |
717 parser.add_option('--extension', | |
718 dest='extension', | |
719 default=None, | |
720 metavar='EXTENSION', | |
721 help='File extension of the build (e.g. "zip"), default:\ | |
722 the standard build extension on the platform.') | |
723 parser.add_option('--username', | |
724 dest='username', | |
725 default=None, | |
726 metavar='USERNAME', | |
727 help='Username for basic HTTP authentication.') | |
728 parser.add_option('--password', | |
729 dest='password', | |
730 default=None, | |
731 metavar='PASSWORD', | |
732 help='Password for basic HTTP authentication.') | |
733 parser.add_option('--retry-attempts', | |
734 dest='retry_attempts', | |
735 default=3, | |
736 type=int, | |
737 metavar='RETRY_ATTEMPTS', | |
738 help='Number of times the download will be attempted in ' | |
739 'the event of a failure, default: %default') | |
740 parser.add_option('--retry-delay', | |
741 dest='retry_delay', | |
742 default=10, | |
743 type=int, | |
744 metavar='RETRY_DELAY', | |
745 help='Amount of time (in seconds) to wait between retry ' | |
746 'attempts, default: %default') | |
747 | 936 |
748 # Option group for candidate builds | 937 Scraper.__init__(self, *args, **kwargs) |
749 group = OptionGroup(parser, "Candidate builds", | |
750 "Extra options for candidate builds.") | |
751 group.add_option('--no-unsigned', | |
752 dest='no_unsigned', | |
753 action="store_true", | |
754 help="Don't allow to download unsigned builds if signed\ | |
755 builds are not available") | |
756 parser.add_option_group(group) | |
757 | 938 |
758 # Option group for daily builds | 939 def get_build_info(self): |
759 group = OptionGroup(parser, "Daily builds", | 940 "Defines additional build information" |
760 "Extra options for daily builds.") | |
761 group.add_option('--branch', | |
762 dest='branch', | |
763 default='mozilla-central', | |
764 metavar='BRANCH', | |
765 help='Name of the branch, default: "%default"') | |
766 group.add_option('--build-id', | |
767 dest='build_id', | |
768 default=None, | |
769 metavar='BUILD_ID', | |
770 help='ID of the build to download') | |
771 group.add_option('--date', | |
772 dest='date', | |
773 default=None, | |
774 metavar='DATE', | |
775 help='Date of the build, default: latest build') | |
776 parser.add_option_group(group) | |
777 | 941 |
778 # Option group for tinderbox builds | 942 self.builds, self.build_index = self.get_build_info_for_index() |
779 group = OptionGroup(parser, "Tinderbox builds", | |
780 "Extra options for tinderbox builds.") | |
781 group.add_option('--debug-build', | |
782 dest='debug_build', | |
783 action="store_true", | |
784 help="Download a debug build") | |
785 parser.add_option_group(group) | |
786 | 943 |
787 # TODO: option group for nightly builds | 944 @property |
788 (options, args) = parser.parse_args() | 945 def binary_regex(self): |
946 """Return the regex for the binary""" | |
789 | 947 |
790 # Check for required options and arguments | 948 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' |
791 # Note: Will be optional when ini file support has been landed | 949 regex_suffix = {'linux': r'.*\.%(EXT)s$', |
792 if not options.url \ | 950 'linux64': r'.*\.%(EXT)s$', |
793 and not options.type in ['daily', 'tinderbox'] \ | 951 'mac': r'.*\.%(EXT)s$', |
794 and not options.version: | 952 'mac64': r'.*\.%(EXT)s$', |
795 parser.error('The version of the application to download has not been sp ecified.') | 953 'win32': r'.*(\.installer%(STUB)s)\.%(EXT)s$', |
954 'win64': r'.*(\.installer%(STUB)s)\.%(EXT)s$'} | |
796 | 955 |
797 # Instantiate scraper and download the build | 956 regex = regex_base_name + regex_suffix[self.platform] |
798 scraper_keywords = {'application': options.application, | |
799 'locale': options.locale, | |
800 'platform': options.platform, | |
801 'version': options.version, | |
802 'directory': options.directory, | |
803 'extension': options.extension, | |
804 'authentication': { | |
805 'username': options.username, | |
806 'password': options.password}, | |
807 'retry_attempts': options.retry_attempts, | |
808 'retry_delay': options.retry_delay} | |
809 scraper_options = {'candidate': { | |
810 'build_number': options.build_number, | |
811 'no_unsigned': options.no_unsigned}, | |
812 'daily': { | |
813 'branch': options.branch, | |
814 'build_number': options.build_number, | |
815 'build_id': options.build_id, | |
816 'date': options.date}, | |
817 'tinderbox': { | |
818 'branch': options.branch, | |
819 'build_number': options.build_number, | |
820 'date': options.date, | |
821 'debug_build': options.debug_build} | |
822 } | |
823 | 957 |
824 kwargs = scraper_keywords.copy() | 958 return regex % {'APP': self.application, |
825 kwargs.update(scraper_options.get(options.type, {})) | 959 'LOCALE': self.locale, |
960 'PLATFORM': PLATFORM_FRAGMENTS[self.platform], | |
961 'STUB': '-stub' if self.is_stub_installer else '', | |
962 'EXT': self.extension} | |
826 | 963 |
827 if options.url: | 964 def build_filename(self, binary): |
828 build = DirectScraper(options.url, **kwargs) | 965 """Return the proposed filename with extension for the binary""" |
829 else: | |
830 build = BUILD_TYPES[options.type](**kwargs) | |
831 | 966 |
832 build.download() | 967 return '%(CHANGESET)s%(DEBUG)s-%(NAME)s' % { |
968 'CHANGESET': self.changeset, | |
969 'DEBUG': '-debug' if self.debug_build else '', | |
970 'NAME': binary} | |
833 | 971 |
834 if __name__ == "__main__": | 972 @property |
835 cli() | 973 def build_list_regex(self): |
974 """Return the regex for the folder which contains the list of builds""" | |
975 | |
976 return 'try-builds/' | |
977 | |
978 def detect_platform(self): | |
979 """Detect the current platform""" | |
980 | |
981 platform = Scraper.detect_platform(self) | |
982 | |
983 # On OS X we have to special case the platform detection code and | |
984 # fallback to 64 bit builds for the en-US locale | |
985 if mozinfo.os == 'mac' and self.locale == 'en-US' and \ | |
986 mozinfo.bits == 64: | |
987 platform = "%s%d" % (mozinfo.os, mozinfo.bits) | |
988 | |
989 return platform | |
990 | |
991 def get_build_info_for_index(self, build_index=None): | |
992 url = urljoin(self.base_url, self.build_list_regex) | |
993 | |
994 self.logger.info('Retrieving list of builds from %s' % url) | |
995 parser = self._create_directory_parser(url) | |
996 parser.entries = parser.filter('.*-%s$' % self.changeset) | |
997 | |
998 if not parser.entries: | |
999 raise errors.NotFoundError('No builds have been found', url) | |
1000 | |
1001 self.show_matching_builds(parser.entries) | |
1002 | |
1003 self.logger.info('Selected build: %s' % parser.entries[0]) | |
1004 | |
1005 return (parser.entries, 0) | |
1006 | |
1007 @property | |
1008 def path_regex(self): | |
1009 """Return the regex for the path to the build folder""" | |
1010 | |
1011 build_dir = 'try-%(PLATFORM)s%(DEBUG)s/' % { | |
1012 'PLATFORM': self.platform_regex, | |
1013 'DEBUG': '-debug' if self.debug_build else ''} | |
1014 return urljoin(self.build_list_regex, | |
1015 self.builds[self.build_index], | |
1016 build_dir) | |
1017 | |
1018 @property | |
1019 def platform_regex(self): | |
1020 """Return the platform fragment of the URL""" | |
1021 | |
1022 PLATFORM_FRAGMENTS = {'linux': 'linux', | |
1023 'linux64': 'linux64', | |
1024 'mac': 'macosx64', | |
1025 'mac64': 'macosx64', | |
1026 'win32': 'win32', | |
1027 'win64': 'win64'} | |
1028 | |
1029 return PLATFORM_FRAGMENTS[self.platform] | |
OLD | NEW |