Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(89)

Side by Side Diff: mozdownload/scraper.py

Issue 1451373002: Updating mozdownload (excluding tests) (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/mozdownload@master
Patch Set: Updated README.md Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « mozdownload/parser.py ('k') | mozdownload/timezones.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python
2
3 # This Source Code Form is subject to the terms of the Mozilla Public 1 # This Source Code Form is subject to the terms of the Mozilla Public
4 # License, v. 2.0. If a copy of the MPL was not distributed with this 2 # License, v. 2.0. If a copy of the MPL was not distributed with this
5 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 4
7 """Module to handle downloads for different types of Firefox and Thunderbird bui lds."""
8
9
10 from datetime import datetime 5 from datetime import datetime
11 from optparse import OptionParser, OptionGroup 6 import logging
12 import os 7 import os
13 import re 8 import re
9 import requests
14 import sys 10 import sys
15 import time 11 import time
16 import urllib 12 import urllib
17 import urllib2 13 from urlparse import urlparse
18 14
19 import mozinfo 15 import mozinfo
20 16
17 import errors
18
21 from parser import DirectoryParser 19 from parser import DirectoryParser
22 from timezones import PacificTimezone 20 from timezones import PacificTimezone
23 21 from utils import urljoin
24 22
25 APPLICATIONS = ['b2g', 'firefox', 'thunderbird'] 23
24 APPLICATIONS = ('b2g', 'firefox', 'fennec', 'thunderbird')
25
26 # Some applications contain all locales in a single build
27 APPLICATIONS_MULTI_LOCALE = ('b2g', 'fennec')
28
29 # Used if the application is named differently than the subfolder on the server
30 APPLICATIONS_TO_FTP_DIRECTORY = {'fennec': 'mobile'}
26 31
27 # Base URL for the path to all builds 32 # Base URL for the path to all builds
28 BASE_URL = 'https://ftp.mozilla.org/pub/mozilla.org' 33 BASE_URL = 'https://archive.mozilla.org/pub/'
29 34
30 PLATFORM_FRAGMENTS = {'linux': 'linux-i686', 35 # Chunk size when downloading a file
31 'linux64': 'linux-x86_64', 36 CHUNK_SIZE = 16 * 1024
32 'mac': 'mac', 37
33 'mac64': 'mac64', 38 DEFAULT_FILE_EXTENSIONS = {'android-api-9': 'apk',
34 'win32': 'win32', 39 'android-api-11': 'apk',
35 'win64': 'win64-x86_64'} 40 'android-x86': 'apk',
36 41 'linux': 'tar.bz2',
37 DEFAULT_FILE_EXTENSIONS = {'linux': 'tar.bz2',
38 'linux64': 'tar.bz2', 42 'linux64': 'tar.bz2',
39 'mac': 'dmg', 43 'mac': 'dmg',
40 'mac64': 'dmg', 44 'mac64': 'dmg',
41 'win32': 'exe', 45 'win32': 'exe',
42 'win64': 'exe'} 46 'win64': 'exe'}
43 47
44 class NotFoundException(Exception): 48 PLATFORM_FRAGMENTS = {'android-api-9': r'android-arm',
45 """Exception for a resource not being found (e.g. no logs)""" 49 'android-api-11': r'android-arm',
46 def __init__(self, message, location): 50 'android-x86': r'android-i386',
47 self.location = location 51 'linux': r'linux-i686',
48 Exception.__init__(self, ': '.join([message, location])) 52 'linux64': r'linux-x86_64',
53 'mac': r'mac',
54 'mac64': r'mac(64)?',
55 'win32': r'win32',
56 'win64': r'win64(-x86_64)?'}
49 57
50 58
51 class Scraper(object): 59 class Scraper(object):
52 """Generic class to download an application from the Mozilla server""" 60 """Generic class to download an application from the Mozilla server"""
53 61
54 def __init__(self, directory, version, platform=None, 62 def __init__(self, destination=None, platform=None,
55 application='firefox', locale='en-US', extension=None, 63 application='firefox', locale=None, extension=None,
56 authentication=None, retry_attempts=3, retry_delay=10): 64 username=None, password=None,
65 retry_attempts=0, retry_delay=10.,
66 is_stub_installer=False, timeout=None,
67 log_level='INFO',
68 base_url=BASE_URL):
57 69
58 # Private properties for caching 70 # Private properties for caching
59 self._target = None 71 self._filename = None
60 self._binary = None 72 self._binary = None
61 73
62 self.directory = directory 74 self.destination = destination or os.getcwd()
63 self.locale = locale 75
76 if not locale:
77 if application in APPLICATIONS_MULTI_LOCALE:
78 self.locale = 'multi'
79 else:
80 self.locale = 'en-US'
81 else:
82 self.locale = locale
83
64 self.platform = platform or self.detect_platform() 84 self.platform = platform or self.detect_platform()
65 self.version = version 85
66 self.extension = extension or DEFAULT_FILE_EXTENSIONS[self.platform] 86 self.session = requests.Session()
67 self.authentication = authentication 87 if (username, password) != (None, None):
88 self.session.auth = (username, password)
89
68 self.retry_attempts = retry_attempts 90 self.retry_attempts = retry_attempts
69 self.retry_delay = retry_delay 91 self.retry_delay = retry_delay
92 self.is_stub_installer = is_stub_installer
93 self.timeout_download = timeout
94 # this is the timeout used in requests.get. Unlike "auth",
95 # it does not work if we attach it on the session, so we handle
96 # it independently.
97 self.timeout_network = 60.
98
99 logging.basicConfig(format=' %(levelname)s | %(message)s')
100 self.logger = logging.getLogger(self.__module__)
101 self.logger.setLevel(log_level)
70 102
71 # build the base URL 103 # build the base URL
72 self.application = application 104 self.application = application
73 self.base_url = '/'.join([BASE_URL, self.application]) 105 self.base_url = '%s/' % urljoin(
74 106 base_url,
107 APPLICATIONS_TO_FTP_DIRECTORY.get(self.application, self.application )
108 )
109
110 if extension:
111 self.extension = extension
112 else:
113 if self.application in APPLICATIONS_MULTI_LOCALE and \
114 self.platform in ('win32', 'win64'):
115 # builds for APPLICATIONS_MULTI_LOCALE only exist in zip
116 self.extension = 'zip'
117 else:
118 self.extension = DEFAULT_FILE_EXTENSIONS[self.platform]
119
120 attempt = 0
121 while True:
122 attempt += 1
123 try:
124 self.get_build_info()
125 break
126 except (errors.NotFoundError, requests.exceptions.RequestException), e:
127 if self.retry_attempts > 0:
128 # Log only if multiple attempts are requested
129 self.logger.warning("Build not found: '%s'" % e.message)
130 self.logger.info('Will retry in %s seconds...' %
131 (self.retry_delay))
132 time.sleep(self.retry_delay)
133 self.logger.info("Retrying... (attempt %s)" % attempt)
134
135 if attempt >= self.retry_attempts:
136 if hasattr(e, 'response') and \
137 e.response.status_code == 404:
138 message = "Specified build has not been found"
139 raise errors.NotFoundError(message, e.response.url)
140 else:
141 raise
142
143 def _create_directory_parser(self, url):
144 return DirectoryParser(url,
145 session=self.session,
146 timeout=self.timeout_network)
75 147
76 @property 148 @property
77 def binary(self): 149 def binary(self):
78 """Return the name of the build""" 150 """Return the name of the build"""
79 151
80 if self._binary is None: 152 attempt = 0
81 # Retrieve all entries from the remote virtual folder 153
82 parser = DirectoryParser(self.path) 154 while self._binary is None:
83 if not parser.entries: 155 attempt += 1
84 raise NotFoundException('No entries found', self.path) 156 try:
85 157 # Retrieve all entries from the remote virtual folder
86 # Download the first matched directory entry 158 parser = self._create_directory_parser(self.path)
87 pattern = re.compile(self.binary_regex, re.IGNORECASE) 159 if not parser.entries:
88 for entry in parser.entries: 160 raise errors.NotFoundError('No entries found', self.path)
89 try: 161
90 self._binary = pattern.match(entry).group() 162 # Download the first matched directory entry
91 break 163 pattern = re.compile(self.binary_regex, re.IGNORECASE)
92 except: 164 for entry in parser.entries:
93 # No match, continue with next entry 165 try:
94 continue 166 self._binary = pattern.match(entry).group()
95 167 break
96 if self._binary is None: 168 except:
97 raise NotFoundException("Binary not found in folder", self.path) 169 # No match, continue with next entry
98 else: 170 continue
99 return self._binary 171 else:
100 172 raise errors.NotFoundError("Binary not found in folder",
173 self.path)
174 except (errors.NotFoundError, requests.exceptions.RequestException), e:
175 if self.retry_attempts > 0:
176 # Log only if multiple attempts are requested
177 self.logger.warning("Build not found: '%s'" % e.message)
178 self.logger.info('Will retry in %s seconds...' %
179 (self.retry_delay))
180 time.sleep(self.retry_delay)
181 self.logger.info("Retrying... (attempt %s)" % attempt)
182
183 if attempt >= self.retry_attempts:
184 if hasattr(e, 'response') and \
185 e.response.status_code == 404:
186 message = "Specified build has not been found"
187 raise errors.NotFoundError(message, self.path)
188 else:
189 raise
190
191 return self._binary
101 192
102 @property 193 @property
103 def binary_regex(self): 194 def binary_regex(self):
104 """Return the regex for the binary filename""" 195 """Return the regex for the binary filename"""
105 196
106 raise NotImplementedError(sys._getframe(0).f_code.co_name) 197 raise errors.NotImplementedError(sys._getframe(0).f_code.co_name)
107 198
108 199 @property
109 @property 200 def url(self):
110 def final_url(self): 201 """Return the URL of the build"""
111 """Return the final URL of the build""" 202
112 203 return urljoin(self.path, self.binary)
113 return '/'.join([self.path, self.binary])
114
115 204
116 @property 205 @property
117 def path(self): 206 def path(self):
118 """Return the path to the build""" 207 """Return the path to the build folder"""
119 208
120 return '/'.join([self.base_url, self.path_regex]) 209 return urljoin(self.base_url, self.path_regex)
121
122 210
123 @property 211 @property
124 def path_regex(self): 212 def path_regex(self):
125 """Return the regex for the path to the build""" 213 """Return the regex for the path to the build folder"""
126 214
127 raise NotImplementedError(sys._getframe(0).f_code.co_name) 215 raise errors.NotImplementedError(sys._getframe(0).f_code.co_name)
128
129 216
130 @property 217 @property
131 def platform_regex(self): 218 def platform_regex(self):
132 """Return the platform fragment of the URL""" 219 """Return the platform fragment of the URL"""
133 220
134 return PLATFORM_FRAGMENTS[self.platform]; 221 return PLATFORM_FRAGMENTS[self.platform]
135 222
136 223 @property
137 @property 224 def filename(self):
138 def target(self): 225 """Return the local filename of the build"""
139 """Return the target file name of the build""" 226
140 227 if self._filename is None:
141 if self._target is None: 228 if os.path.splitext(self.destination)[1]:
142 self._target = os.path.join(self.directory, 229 # If the filename has been given make use of it
143 self.build_filename(self.binary)) 230 target_file = self.destination
144 return self._target 231 else:
145 232 # Otherwise create it from the build details
233 target_file = os.path.join(self.destination,
234 self.build_filename(self.binary))
235
236 self._filename = os.path.abspath(target_file)
237
238 return self._filename
239
240 def get_build_info(self):
241 """Returns additional build information in subclasses if necessary"""
242 pass
146 243
147 def build_filename(self, binary): 244 def build_filename(self, binary):
148 """Return the proposed filename with extension for the binary""" 245 """Return the proposed filename with extension for the binary"""
149 246
150 raise NotImplementedError(sys._getframe(0).f_code.co_name) 247 raise errors.NotImplementedError(sys._getframe(0).f_code.co_name)
151
152 248
153 def detect_platform(self): 249 def detect_platform(self):
154 """Detect the current platform""" 250 """Detect the current platform"""
155 251
156 # For Mac and Linux 32bit we do not need the bits appended 252 # For Mac and Linux 32bit we do not need the bits appended
157 if mozinfo.os == 'mac' or (mozinfo.os == 'linux' and mozinfo.bits == 32) : 253 if mozinfo.os == 'mac' or \
254 (mozinfo.os == 'linux' and mozinfo.bits == 32):
158 return mozinfo.os 255 return mozinfo.os
159 else: 256 else:
160 return "%s%d" % (mozinfo.os, mozinfo.bits) 257 return "%s%d" % (mozinfo.os, mozinfo.bits)
161 258
162
163 def download(self): 259 def download(self):
164 """Download the specified file""" 260 """Download the specified file"""
165 261
166 attempts = 0 262 def total_seconds(td):
167 263 # Keep backward compatibility with Python 2.6 which doesn't have
168 if not os.path.isdir(self.directory): 264 # this method
169 os.makedirs(self.directory) 265 if hasattr(td, 'total_seconds'):
266 return td.total_seconds()
267 else:
268 return (td.microseconds +
269 (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 10 ** 6
270
271 attempt = 0
170 272
171 # Don't re-download the file 273 # Don't re-download the file
172 if os.path.isfile(os.path.abspath(self.target)): 274 if os.path.isfile(os.path.abspath(self.filename)):
173 print "File has already been downloaded: %s" % (self.target) 275 self.logger.info("File has already been downloaded: %s" %
174 return 276 (self.filename))
175 277 return self.filename
176 print 'Downloading from: %s' % (urllib.unquote(self.final_url)) 278
177 tmp_file = self.target + ".part" 279 directory = os.path.dirname(self.filename)
178 280 if not os.path.isdir(directory):
179 if self.authentication \ 281 os.makedirs(directory)
180 and self.authentication['username'] \ 282
181 and self.authentication['password']: 283 self.logger.info('Downloading from: %s' %
182 password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() 284 (urllib.unquote(self.url)))
183 password_mgr.add_password(None, 285 self.logger.info('Saving as: %s' % self.filename)
184 self.final_url, 286
185 self.authentication['username'], 287 tmp_file = self.filename + ".part"
186 self.authentication['password'])
187 handler = urllib2.HTTPBasicAuthHandler(password_mgr)
188 opener = urllib2.build_opener(urllib2.HTTPHandler, handler)
189 urllib2.install_opener(opener)
190 288
191 while True: 289 while True:
192 attempts += 1 290 attempt += 1
193 try: 291 try:
194 r = urllib2.urlopen(self.final_url) 292 start_time = datetime.now()
195 CHUNK = 16 * 1024 293
294 # Enable streaming mode so we can download content in chunks
295 r = self.session.get(self.url, stream=True)
296 r.raise_for_status()
297
298 content_length = r.headers.get('Content-length')
299 # ValueError: Value out of range if only total_size given
300 if content_length:
301 total_size = int(content_length.strip())
302 max_value = ((total_size / CHUNK_SIZE) + 1) * CHUNK_SIZE
303
304 bytes_downloaded = 0
305
196 with open(tmp_file, 'wb') as f: 306 with open(tmp_file, 'wb') as f:
197 for chunk in iter(lambda: r.read(CHUNK), ''): 307 for chunk in iter(lambda: r.raw.read(CHUNK_SIZE), ''):
198 f.write(chunk) 308 f.write(chunk)
309 bytes_downloaded += CHUNK_SIZE
310
311 t1 = total_seconds(datetime.now() - start_time)
312 if self.timeout_download and \
313 t1 >= self.timeout_download:
314 raise errors.TimeoutError
199 break 315 break
200 except (urllib2.HTTPError, urllib2.URLError): 316 except (requests.exceptions.RequestException, errors.TimeoutError), e:
201 if tmp_file and os.path.isfile(tmp_file): 317 if tmp_file and os.path.isfile(tmp_file):
202 os.remove(tmp_file) 318 os.remove(tmp_file)
203 print 'Download failed! Retrying... (attempt %s)' % attempts 319 if self.retry_attempts > 0:
204 if attempts >= self.retry_attempts: 320 # Log only if multiple attempts are requested
321 self.logger.warning('Download failed: "%s"' % str(e))
322 self.logger.info('Will retry in %s seconds...' %
323 (self.retry_delay))
324 time.sleep(self.retry_delay)
325 self.logger.info("Retrying... (attempt %s)" % attempt)
326 if attempt >= self.retry_attempts:
205 raise 327 raise
206 time.sleep(self.retry_delay) 328 time.sleep(self.retry_delay)
207 329
208 os.rename(tmp_file, self.target) 330 os.rename(tmp_file, self.filename)
331
332 return self.filename
333
334 def show_matching_builds(self, builds):
335 """Output the matching builds"""
336 self.logger.info('Found %s build%s: %s' % (
337 len(builds),
338 len(builds) > 1 and 's' or '',
339 len(builds) > 10 and
340 ' ... '.join([', '.join(builds[:5]), ', '.join(builds[-5:])]) or
341 ', '.join(builds)))
209 342
210 343
211 class DailyScraper(Scraper): 344 class DailyScraper(Scraper):
212 """Class to download a daily build from the Mozilla server""" 345 """Class to download a daily build from the Mozilla server"""
213 346
214 def __init__(self, branch='mozilla-central', build_id=None, date=None, 347 def __init__(self, branch='mozilla-central', build_id=None, date=None,
215 build_number=None, *args, **kwargs): 348 build_number=None, *args, **kwargs):
216 349
350 self.branch = branch
351 self.build_id = build_id
352 self.date = date
353 self.build_number = build_number
354
217 Scraper.__init__(self, *args, **kwargs) 355 Scraper.__init__(self, *args, **kwargs)
218 self.branch = branch 356
357 def get_build_info(self):
358 """Defines additional build information"""
219 359
220 # Internally we access builds via index 360 # Internally we access builds via index
221 if build_number is not None: 361 if self.build_number is not None:
222 self.build_index = int(build_number) - 1 362 self.build_index = int(self.build_number) - 1
223 else: 363 else:
224 self.build_index = None 364 self.build_index = None
225 365
226 if build_id: 366 if self.build_id:
227 # A build id has been specified. Split up its components so the date 367 # A build id has been specified. Split up its components so the
228 # and time can be extracted: '20111212042025' -> '2011-12-12 04:20:2 5' 368 # date and time can be extracted:
229 self.date = datetime.strptime(build_id, '%Y%m%d%H%M%S') 369 # '20111212042025' -> '2011-12-12 04:20:25'
230 self.builds, self.build_index = self.get_build_info_for_date(self.da te, 370 self.date = datetime.strptime(self.build_id, '%Y%m%d%H%M%S')
231 has_tim e=True)
232 371
233 elif date: 372 elif self.date:
234 # A date (without time) has been specified. Use its value and the 373 # A date (without time) has been specified. Use its value and the
235 # build index to find the requested build for that day. 374 # build index to find the requested build for that day.
236 self.date = datetime.strptime(date, '%Y-%m-%d') 375 try:
237 self.builds, self.build_index = self.get_build_info_for_date(self.da te, 376 self.date = datetime.strptime(self.date, '%Y-%m-%d')
238 build_i ndex=self.build_index) 377 except:
239 378 raise ValueError('%s is not a valid date' % self.date)
240 else: 379 else:
241 # If no build id nor date have been specified the lastest available 380 # If no build id nor date have been specified the latest available
242 # build of the given branch has to be identified. We also have to 381 # build of the given branch has to be identified. We also have to
243 # retrieve the date of the build via its build id. 382 # retrieve the date of the build via its build id.
244 url = '%s/nightly/latest-%s/' % (self.base_url, self.branch) 383 self.date = self.get_latest_build_date()
245 384
246 print 'Retrieving the build status file from %s' % url 385 self.builds, self.build_index = self.get_build_info_for_date(
247 parser = DirectoryParser(url) 386 self.date, self.build_index)
248 parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex)
249 if not parser.entries:
250 message = 'Status file for %s build cannot be found' % self.plat form_regex
251 raise NotFoundException(message, url)
252 387
253 # Read status file for the platform, retrieve build id, and convert to a date 388 def get_latest_build_date(self):
254 status_file = url + parser.entries[-1] 389 """ Returns date of latest available nightly build."""
255 f = urllib.urlopen(status_file) 390 if self.application not in ('fennec'):
256 self.date = datetime.strptime(f.readline().strip(), '%Y%m%d%H%M%S') 391 url = urljoin(self.base_url, 'nightly', 'latest-%s/' % self.branch)
257 self.builds, self.build_index = self.get_build_info_for_date(self.da te, 392 else:
258 has_tim e=True) 393 url = urljoin(self.base_url, 'nightly', 'latest-%s-%s/' %
394 (self.branch, self.platform))
259 395
396 self.logger.info('Retrieving the build status file from %s' % url)
397 parser = self._create_directory_parser(url)
398 parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex)
399 if not parser.entries:
400 message = 'Status file for %s build cannot be found' % \
401 self.platform_regex
402 raise errors.NotFoundError(message, url)
260 403
261 def get_build_info_for_date(self, date, has_time=False, build_index=None): 404 # Read status file for the platform, retrieve build id,
262 url = '/'.join([self.base_url, self.monthly_build_list_regex]) 405 # and convert to a date
406 headers = {'Cache-Control': 'max-age=0'}
263 407
264 print 'Retrieving list of builds from %s' % url 408 r = self.session.get(url + parser.entries[-1], headers=headers)
265 parser = DirectoryParser(url) 409 try:
266 regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s$' % { 410 r.raise_for_status()
267 'DATE': date.strftime('%Y-%m-%d'), 411
268 'BRANCH': self.branch, 412 return datetime.strptime(r.text.split('\n')[0], '%Y%m%d%H%M%S')
269 'L10N': '' if self.locale == 'en-US' else '-l10n'} 413 finally:
414 r.close()
415
416 def is_build_dir(self, folder_name):
417 """Return whether or not the given dir contains a build."""
418
419 # Cannot move up to base scraper due to parser.entries call in
420 # get_build_info_for_date (see below)
421
422 url = '%s/' % urljoin(self.base_url, self.monthly_build_list_regex, fold er_name)
423 if self.application in APPLICATIONS_MULTI_LOCALE \
424 and self.locale != 'multi':
425 url = '%s/' % urljoin(url, self.locale)
426
427 parser = self._create_directory_parser(url)
428
429 pattern = re.compile(self.binary_regex, re.IGNORECASE)
430 for entry in parser.entries:
431 try:
432 pattern.match(entry).group()
433 return True
434 except:
435 # No match, continue with next entry
436 continue
437 return False
438
439 def get_build_info_for_date(self, date, build_index=None):
440 url = urljoin(self.base_url, self.monthly_build_list_regex)
441 has_time = date and date.time()
442
443 self.logger.info('Retrieving list of builds from %s' % url)
444 parser = self._create_directory_parser(url)
445 regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s%(PLATFORM)s$' % {
446 'DATE': date.strftime('%Y-%m-%d'),
447 'BRANCH': self.branch,
448 # ensure to select the correct subfolder for localized builds
449 'L10N': '' if self.locale in ('en-US', 'multi') else '(-l10n)?',
450 'PLATFORM': '' if self.application not in (
451 'fennec') else '-' + self.platform
452 }
453
270 parser.entries = parser.filter(regex) 454 parser.entries = parser.filter(regex)
271 if not parser.entries: 455 parser.entries = parser.filter(self.is_build_dir)
272 message = 'Folder for builds on %s has not been found' % self.date.s trftime('%Y-%m-%d')
273 raise NotFoundException(message, url)
274 456
275 if has_time: 457 if has_time:
276 # If a time is included in the date, use it to determine the build's index 458 # If a time is included in the date, use it to determine the
459 # build's index
277 regex = r'.*%s.*' % date.strftime('%H-%M-%S') 460 regex = r'.*%s.*' % date.strftime('%H-%M-%S')
278 build_index = parser.entries.index(parser.filter(regex)[0]) 461 parser.entries = parser.filter(regex)
279 else: 462
280 # If no index has been given, set it to the last build of the day. 463 if not parser.entries:
281 if build_index is None: 464 date_format = '%Y-%m-%d-%H-%M-%S' if has_time else '%Y-%m-%d'
282 build_index = len(parser.entries) - 1 465 message = 'Folder for builds on %s has not been found' % \
466 self.date.strftime(date_format)
467 raise errors.NotFoundError(message, url)
468
469 # If no index has been given, set it to the last build of the day.
470 self.show_matching_builds(parser.entries)
471 # If no index has been given, set it to the last build of the day.
472 if build_index is None:
473 # Find the most recent non-empty entry.
474 build_index = len(parser.entries)
475 for build in reversed(parser.entries):
476 build_index -= 1
477 if not build_index or self.is_build_dir(build):
478 break
479 self.logger.info('Selected build: %s' % parser.entries[build_index])
283 480
284 return (parser.entries, build_index) 481 return (parser.entries, build_index)
285 482
286
287 @property 483 @property
288 def binary_regex(self): 484 def binary_regex(self):
289 """Return the regex for the binary""" 485 """Return the regex for the binary"""
290 486
291 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' 487 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s'
292 regex_suffix = {'linux': r'\.%(EXT)s$', 488 regex_suffix = {'android-api-9': r'\.%(EXT)s$',
489 'android-api-11': r'\.%(EXT)s$',
490 'android-x86': r'\.%(EXT)s$',
491 'linux': r'\.%(EXT)s$',
293 'linux64': r'\.%(EXT)s$', 492 'linux64': r'\.%(EXT)s$',
294 'mac': r'\.%(EXT)s$', 493 'mac': r'\.%(EXT)s$',
295 'mac64': r'\.%(EXT)s$', 494 'mac64': r'\.%(EXT)s$',
296 'win32': r'(\.installer)\.%(EXT)s$', 495 'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$',
297 'win64': r'(\.installer)\.%(EXT)s$'} 496 'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'}
298 regex = regex_base_name + regex_suffix[self.platform] 497 regex = regex_base_name + regex_suffix[self.platform]
299 498
300 return regex % {'APP': self.application, 499 return regex % {'APP': self.application,
301 'LOCALE': self.locale, 500 'LOCALE': self.locale,
302 'PLATFORM': self.platform_regex, 501 'PLATFORM': self.platform_regex,
303 'EXT': self.extension} 502 'EXT': self.extension,
304 503 'STUB': '-stub' if self.is_stub_installer else ''}
305 504
306 def build_filename(self, binary): 505 def build_filename(self, binary):
307 """Return the proposed filename with extension for the binary""" 506 """Return the proposed filename with extension for the binary"""
308 507
309 try: 508 try:
310 # Get exact timestamp of the build to build the local file name 509 # Get exact timestamp of the build to build the local file name
311 folder = self.builds[self.build_index] 510 folder = self.builds[self.build_index]
312 timestamp = re.search('([\d\-]+)-\D.*', folder).group(1) 511 timestamp = re.search('([\d\-]+)-\D.*', folder).group(1)
313 except: 512 except:
314 # If it's not available use the build's date 513 # If it's not available use the build's date
315 timestamp = self.date.strftime('%Y-%m-%d') 514 timestamp = self.date.strftime('%Y-%m-%d')
316 515
317 return '%(TIMESTAMP)s-%(BRANCH)s-%(NAME)s' % { 516 return '%(TIMESTAMP)s-%(BRANCH)s-%(NAME)s' % {
318 'TIMESTAMP': timestamp, 517 'TIMESTAMP': timestamp,
319 'BRANCH': self.branch, 518 'BRANCH': self.branch,
320 'NAME': binary} 519 'NAME': binary}
321
322 520
323 @property 521 @property
324 def monthly_build_list_regex(self): 522 def monthly_build_list_regex(self):
325 """Return the regex for the folder which contains the builds of a month. """ 523 """Return the regex for the folder containing builds of a month."""
326 524
327 # Regex for possible builds for the given date 525 # Regex for possible builds for the given date
328 return r'nightly/%(YEAR)s/%(MONTH)s/' % { 526 return r'nightly/%(YEAR)s/%(MONTH)s/' % {
329 'YEAR': self.date.year, 527 'YEAR': self.date.year,
330 'MONTH': str(self.date.month).zfill(2) } 528 'MONTH': str(self.date.month).zfill(2)}
331
332 529
333 @property 530 @property
334 def path_regex(self): 531 def path_regex(self):
335 """Return the regex for the path""" 532 """Return the regex for the path to the build folder"""
336 533
337 try: 534 try:
338 return self.monthly_build_list_regex + self.builds[self.build_index] 535 path = '%s/' % urljoin(self.monthly_build_list_regex,
536 self.builds[self.build_index])
537 if self.application in APPLICATIONS_MULTI_LOCALE \
538 and self.locale != 'multi':
539 path = '%s/' % urljoin(path, self.locale)
540 return path
339 except: 541 except:
340 raise NotFoundException("Specified sub folder cannot be found", 542 folder = urljoin(self.base_url, self.monthly_build_list_regex)
341 self.base_url + self.monthly_build_list_rege x) 543 raise errors.NotFoundError("Specified sub folder cannot be found",
544 folder)
342 545
343 546
344 class DirectScraper(Scraper): 547 class DirectScraper(Scraper):
345 """Class to download a file from a specified URL""" 548 """Class to download a file from a specified URL"""
346 549
347 def __init__(self, url, *args, **kwargs): 550 def __init__(self, url, *args, **kwargs):
551 self._url = url
552
348 Scraper.__init__(self, *args, **kwargs) 553 Scraper.__init__(self, *args, **kwargs)
349 554
350 self.url = url 555 @property
556 def filename(self):
557 if os.path.splitext(self.destination)[1]:
558 # If the filename has been given make use of it
559 target_file = self.destination
560 else:
561 # Otherwise determine it from the url.
562 parsed_url = urlparse(self.url)
563 source_filename = (parsed_url.path.rpartition('/')[-1] or
564 parsed_url.hostname)
565 target_file = os.path.join(self.destination, source_filename)
566
567 return os.path.abspath(target_file)
351 568
352 @property 569 @property
353 def target(self): 570 def url(self):
354 return urllib.splitquery(self.final_url)[0].rpartition('/')[-1] 571 return self._url
355
356 @property
357 def final_url(self):
358 return self.url
359 572
360 573
361 class ReleaseScraper(Scraper): 574 class ReleaseScraper(Scraper):
362 """Class to download a release build from the Mozilla server""" 575 """Class to download a release build from the Mozilla server"""
363 576
364 def __init__(self, *args, **kwargs): 577 def __init__(self, version, *args, **kwargs):
578 self.version = version
579
365 Scraper.__init__(self, *args, **kwargs) 580 Scraper.__init__(self, *args, **kwargs)
366 581
367 @property 582 @property
368 def binary_regex(self): 583 def binary_regex(self):
369 """Return the regex for the binary""" 584 """Return the regex for the binary"""
370 585
371 regex = {'linux': r'^%(APP)s-.*\.%(EXT)s$', 586 regex = {'linux': r'^%(APP)s-.*\.%(EXT)s$',
372 'linux64': r'^%(APP)s-.*\.%(EXT)s$', 587 'linux64': r'^%(APP)s-.*\.%(EXT)s$',
373 'mac': r'^%(APP)s.*\.%(EXT)s$', 588 'mac': r'^%(APP)s.*\.%(EXT)s$',
374 'mac64': r'^%(APP)s.*\.%(EXT)s$', 589 'mac64': r'^%(APP)s.*\.%(EXT)s$',
375 'win32': r'^%(APP)s.*\.%(EXT)s$', 590 'win32': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$',
376 'win64': r'^%(APP)s.*\.%(EXT)s$'} 591 'win64': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$'}
377 return regex[self.platform] % {'APP': self.application, 592 return regex[self.platform] % {
378 'EXT': self.extension} 593 'APP': self.application,
379 594 'EXT': self.extension,
595 'STUB': 'Stub' if self.is_stub_installer else ''}
380 596
381 @property 597 @property
382 def path_regex(self): 598 def path_regex(self):
383 """Return the regex for the path""" 599 """Return the regex for the path to the build folder"""
384 600
385 regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s' 601 regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s/'
386 return regex % {'LOCALE': self.locale, 602 return regex % {'LOCALE': self.locale,
387 'PLATFORM': self.platform_regex, 603 'PLATFORM': self.platform_regex,
388 'VERSION': self.version} 604 'VERSION': self.version}
389 605
606 @property
607 def platform_regex(self):
608 """Return the platform fragment of the URL"""
609
610 if self.platform == 'win64':
611 return self.platform
612
613 return PLATFORM_FRAGMENTS[self.platform]
390 614
391 def build_filename(self, binary): 615 def build_filename(self, binary):
392 """Return the proposed filename with extension for the binary""" 616 """Return the proposed filename with extension for the binary"""
393 617
394 template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s.%(EXT)s' 618 template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s%(STUB)s' \
619 '.%(EXT)s'
395 return template % {'APP': self.application, 620 return template % {'APP': self.application,
396 'VERSION': self.version, 621 'VERSION': self.version,
397 'LOCALE': self.locale, 622 'LOCALE': self.locale,
398 'PLATFORM': self.platform, 623 'PLATFORM': self.platform,
624 'STUB': '-stub' if self.is_stub_installer else '',
399 'EXT': self.extension} 625 'EXT': self.extension}
400 626
401 627
402 class ReleaseCandidateScraper(ReleaseScraper): 628 class ReleaseCandidateScraper(ReleaseScraper):
403 """Class to download a release candidate build from the Mozilla server""" 629 """Class to download a release candidate build from the Mozilla server"""
404 630
405 def __init__(self, build_number=None, no_unsigned=False, *args, **kwargs): 631 def __init__(self, version, build_number=None, *args, **kwargs):
632 self.version = version
633 self.build_number = build_number
634
406 Scraper.__init__(self, *args, **kwargs) 635 Scraper.__init__(self, *args, **kwargs)
407 636
637 def get_build_info(self):
638 """Defines additional build information"""
639
408 # Internally we access builds via index 640 # Internally we access builds via index
409 if build_number is not None: 641 url = urljoin(self.base_url, self.candidate_build_list_regex)
410 self.build_index = int(build_number) - 1 642 self.logger.info('Retrieving list of candidate builds from %s' % url)
643
644 parser = self._create_directory_parser(url)
645 if not parser.entries:
646 message = 'Folder for specific candidate builds at %s has not' \
647 'been found' % url
648 raise errors.NotFoundError(message, url)
649
650 self.show_matching_builds(parser.entries)
651 self.builds = parser.entries
652 self.build_index = len(parser.entries) - 1
653
654 if self.build_number and \
655 ('build%s' % self.build_number) in self.builds:
656 self.builds = ['build%s' % self.build_number]
657 self.build_index = 0
658 self.logger.info('Selected build: build%s' % self.build_number)
411 else: 659 else:
412 self.build_index = None 660 self.logger.info('Selected build: build%d' %
413 661 (self.build_index + 1))
414 self.builds, self.build_index = self.get_build_info_for_version(self.ver sion, self.build_index)
415
416 self.no_unsigned = no_unsigned
417 self.unsigned = False
418
419
420 def get_build_info_for_version(self, version, build_index=None):
421 url = '/'.join([self.base_url, self.candidate_build_list_regex])
422
423 print 'Retrieving list of candidate builds from %s' % url
424 parser = DirectoryParser(url)
425 if not parser.entries:
426 message = 'Folder for specific candidate builds at has not been foun d'
427 raise NotFoundException(message, url)
428
429 # If no index has been given, set it to the last build of the given vers ion.
430 if build_index is None:
431 build_index = len(parser.entries) - 1
432
433 return (parser.entries, build_index)
434
435 662
436 @property 663 @property
437 def candidate_build_list_regex(self): 664 def candidate_build_list_regex(self):
438 """Return the regex for the folder which contains the builds of 665 """Return the regex for the folder which contains the builds of
439 a candidate build.""" 666 a candidate build."""
440 667
441 # Regex for possible builds for the given date 668 # Regex for possible builds for the given date
442 return r'nightly/%(VERSION)s-candidates/' % { 669 return r'candidates/%(VERSION)s-candidates/' % {
443 'VERSION': self.version } 670 'VERSION': self.version}
444
445 671
446 @property 672 @property
447 def path_regex(self): 673 def path_regex(self):
448 """Return the regex for the path""" 674 """Return the regex for the path to the build folder"""
449 675
450 regex = r'%(PREFIX)s%(BUILD)s/%(UNSIGNED)s%(PLATFORM)s/%(LOCALE)s' 676 regex = r'%(PREFIX)s%(BUILD)s/%(PLATFORM)s/%(LOCALE)s/'
451 return regex % {'PREFIX': self.candidate_build_list_regex, 677 return regex % {'PREFIX': self.candidate_build_list_regex,
452 'BUILD': self.builds[self.build_index], 678 'BUILD': self.builds[self.build_index],
453 'LOCALE': self.locale, 679 'LOCALE': self.locale,
454 'PLATFORM': self.platform_regex, 680 'PLATFORM': self.platform_regex}
455 'UNSIGNED': "unsigned/" if self.unsigned else ""}
456 681
682 @property
683 def platform_regex(self):
684 """Return the platform fragment of the URL"""
685
686 if self.platform == 'win64':
687 return self.platform
688
689 return PLATFORM_FRAGMENTS[self.platform]
457 690
458 def build_filename(self, binary): 691 def build_filename(self, binary):
459 """Return the proposed filename with extension for the binary""" 692 """Return the proposed filename with extension for the binary"""
460 693
461 template = '%(APP)s-%(VERSION)s-build%(BUILD)s.%(LOCALE)s.%(PLATFORM)s.% (EXT)s' 694 template = '%(APP)s-%(VERSION)s-%(BUILD)s.%(LOCALE)s.' \
695 '%(PLATFORM)s%(STUB)s.%(EXT)s'
462 return template % {'APP': self.application, 696 return template % {'APP': self.application,
463 'VERSION': self.version, 697 'VERSION': self.version,
464 'BUILD': self.builds[self.build_index], 698 'BUILD': self.builds[self.build_index],
465 'LOCALE': self.locale, 699 'LOCALE': self.locale,
466 'PLATFORM': self.platform, 700 'PLATFORM': self.platform,
701 'STUB': '-stub' if self.is_stub_installer else '',
467 'EXT': self.extension} 702 'EXT': self.extension}
468 703
469
470 def download(self): 704 def download(self):
471 """Download the specified file""" 705 """Download the specified file"""
472 706
473 try: 707 try:
474 # Try to download the signed candidate build 708 # Try to download the signed candidate build
475 Scraper.download(self) 709 Scraper.download(self)
476 except NotFoundException, e: 710 except errors.NotFoundError, e:
477 print str(e) 711 self.logger.exception(str(e))
478
479 # If the signed build cannot be downloaded and unsigned builds are
480 # allowed, try to download the unsigned build instead
481 if self.no_unsigned:
482 raise
483 else:
484 print "Signed build has not been found. Falling back to unsigned build."
485 self.unsigned = True
486 Scraper.download(self)
487 712
488 713
489 class TinderboxScraper(Scraper): 714 class TinderboxScraper(Scraper):
490 """Class to download a tinderbox build from the Mozilla server. 715 """Class to download a tinderbox build from the Mozilla server.
491 716
492 There are two ways to specify a unique build: 717 There are two ways to specify a unique build:
493 1. If the date (%Y-%m-%d) is given and build_number is given where 718 1. If the date (%Y-%m-%d) is given and build_number is given where
494 the build_number is the index of the build on the date 719 the build_number is the index of the build on the date
495 2. If the build timestamp (UNIX) is given, and matches a specific build. 720 2. If the build timestamp (UNIX) is given, and matches a specific build.
496 """ 721 """
497 722
498 def __init__(self, branch='mozilla-central', build_number=None, date=None, 723 def __init__(self, branch='mozilla-central', build_number=None, date=None,
499 debug_build=False, *args, **kwargs): 724 debug_build=False, *args, **kwargs):
725
726 self.branch = branch
727 self.build_number = build_number
728 self.debug_build = debug_build
729 self.date = date
730
731 self.timestamp = None
732 # Currently any time in RelEng is based on the Pacific time zone.
733 self.timezone = PacificTimezone()
734
500 Scraper.__init__(self, *args, **kwargs) 735 Scraper.__init__(self, *args, **kwargs)
501 736
502 self.branch = branch 737 def get_build_info(self):
503 self.debug_build = debug_build 738 "Defines additional build information"
504 self.locale_build = self.locale != 'en-US'
505 self.timestamp = None
506
507 # Currently any time in RelEng is based on the Pacific time zone.
508 self.timezone = PacificTimezone();
509 739
510 # Internally we access builds via index 740 # Internally we access builds via index
511 if build_number is not None: 741 if self.build_number is not None:
512 self.build_index = int(build_number) - 1 742 self.build_index = int(self.build_number) - 1
513 else: 743 else:
514 self.build_index = None 744 self.build_index = None
515 745
516 if date is not None: 746 if self.date is not None:
517 try: 747 try:
518 self.date = datetime.fromtimestamp(float(date), self.timezone) 748 # date is provided in the format 2013-07-23
519 self.timestamp = date 749 self.date = datetime.strptime(self.date, '%Y-%m-%d')
520 except: 750 except:
521 self.date = datetime.strptime(date, '%Y-%m-%d') 751 try:
522 else: 752 # date is provided as a unix timestamp
523 self.date = None 753 datetime.fromtimestamp(float(self.date))
754 self.timestamp = self.date
755 except:
756 raise ValueError('%s is not a valid date' % self.date)
524 757
758 self.locale_build = self.locale != 'en-US'
525 # For localized builds we do not have to retrieve the list of builds 759 # For localized builds we do not have to retrieve the list of builds
526 # because only the last build is available 760 # because only the last build is available
527 if not self.locale_build: 761 if not self.locale_build:
528 self.builds, self.build_index = self.get_build_info(self.build_index ) 762 self.builds, self.build_index = self.get_build_info_for_index(
529 763 self.build_index)
530 try:
531 self.timestamp = self.builds[self.build_index]
532 except:
533 raise NotFoundException("Specified sub folder cannot be found",
534 self.base_url + self.monthly_build_list_ regex)
535
536 764
537 @property 765 @property
538 def binary_regex(self): 766 def binary_regex(self):
539 """Return the regex for the binary""" 767 """Return the regex for the binary"""
540 768
541 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.' 769 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s'
542 regex_suffix = {'linux': r'.*\.%(EXT)s$', 770 regex_suffix = {'linux': r'.*\.%(EXT)s$',
543 'linux64': r'.*\.%(EXT)s$', 771 'linux64': r'.*\.%(EXT)s$',
544 'mac': r'.*\.%(EXT)s$', 772 'mac': r'.*\.%(EXT)s$',
545 'mac64': r'.*\.%(EXT)s$', 773 'mac64': r'.*\.%(EXT)s$',
546 'win32': r'.*(\.installer)\.%(EXT)s$', 774 'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$',
547 'win64': r'.*(\.installer)\.%(EXT)s$'} 775 'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'}
548 776
549 regex = regex_base_name + regex_suffix[self.platform] 777 regex = regex_base_name + regex_suffix[self.platform]
550 778
551 return regex % {'APP': self.application, 779 return regex % {'APP': self.application,
552 'LOCALE': self.locale, 780 'LOCALE': self.locale,
781 'PLATFORM': PLATFORM_FRAGMENTS[self.platform],
782 'STUB': '-stub' if self.is_stub_installer else '',
553 'EXT': self.extension} 783 'EXT': self.extension}
554 784
555
556 def build_filename(self, binary): 785 def build_filename(self, binary):
557 """Return the proposed filename with extension for the binary""" 786 """Return the proposed filename with extension for the binary"""
558 787
559 return '%(TIMESTAMP)s%(BRANCH)s%(DEBUG)s-%(NAME)s' % { 788 return '%(TIMESTAMP)s%(BRANCH)s%(DEBUG)s-%(NAME)s' % {
560 'TIMESTAMP': self.timestamp + '-' if self.timestamp else '', 789 'TIMESTAMP': self.timestamp + '-' if self.timestamp else '',
561 'BRANCH': self.branch, 790 'BRANCH': self.branch,
562 'DEBUG': '-debug' if self.debug_build else '', 791 'DEBUG': '-debug' if self.debug_build else '',
563 'NAME': binary} 792 'NAME': binary}
564
565 793
566 @property 794 @property
567 def build_list_regex(self): 795 def build_list_regex(self):
568 """Return the regex for the folder which contains the list of builds""" 796 """Return the regex for the folder which contains the list of builds"""
569 797
570 regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s' 798 regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s/'
571 799
572 return regex % {'BRANCH': self.branch, 800 return regex % {
573 'PLATFORM': '' if self.locale_build else self.platform_r egex, 801 'BRANCH': self.branch,
574 'L10N': 'l10n' if self.locale_build else '', 802 'PLATFORM': '' if self.locale_build else self.platform_regex,
575 'DEBUG': '-debug' if self.debug_build else ''} 803 'L10N': 'l10n' if self.locale_build else '',
576 804 'DEBUG': '-debug' if self.debug_build else ''}
577 805
578 def date_matches(self, timestamp): 806 def date_matches(self, timestamp):
579 """Determines whether the timestamp date is equal to the argument date"" " 807 """
808 Determines whether the timestamp date is equal to the argument date
809 """
580 810
581 if self.date is None: 811 if self.date is None:
582 return False 812 return False
583 813
584 timestamp = datetime.fromtimestamp(float(timestamp), self.timezone) 814 timestamp = datetime.fromtimestamp(float(timestamp), self.timezone)
585 if self.date.date() == timestamp.date(): 815 if self.date.date() == timestamp.date():
586 return True 816 return True
587 817
588 return False 818 return False
589 819
590
591 @property
592 def date_validation_regex(self):
593 """Return the regex for a valid date argument value"""
594
595 return r'^\d{4}-\d{1,2}-\d{1,2}$|^\d+$'
596
597
598 def detect_platform(self): 820 def detect_platform(self):
599 """Detect the current platform""" 821 """Detect the current platform"""
600 822
601 platform = Scraper.detect_platform(self) 823 platform = Scraper.detect_platform(self)
602 824
603 # On OS X we have to special case the platform detection code and fallba ck 825 # On OS X we have to special case the platform detection code and
604 # to 64 bit builds for the en-US locale 826 # fallback to 64 bit builds for the en-US locale
605 if mozinfo.os == 'mac' and self.locale == 'en-US' and mozinfo.bits == 64 : 827 if mozinfo.os == 'mac' and self.locale == 'en-US' and \
828 mozinfo.bits == 64:
606 platform = "%s%d" % (mozinfo.os, mozinfo.bits) 829 platform = "%s%d" % (mozinfo.os, mozinfo.bits)
607 830
608 return platform 831 return platform
609 832
833 def is_build_dir(self, folder_name):
834 """Return whether or not the given dir contains a build."""
610 835
611 def get_build_info(self, build_index=None): 836 # Cannot move up to base scraper due to parser.entries call in
612 url = '/'.join([self.base_url, self.build_list_regex]) 837 # get_build_info_for_index (see below)
838 url = '%s/' % urljoin(self.base_url, self.build_list_regex, folder_name)
613 839
614 print 'Retrieving list of builds from %s' % url 840 if self.application in APPLICATIONS_MULTI_LOCALE \
841 and self.locale != 'multi':
842 url = '%s/' % urljoin(url, self.locale)
615 843
616 # If a timestamp is given, retrieve just that build 844 parser = self._create_directory_parser(url)
617 regex = '^' + self.timestamp + '$' if self.timestamp else r'^\d+$'
618 845
619 parser = DirectoryParser(url) 846 pattern = re.compile(self.binary_regex, re.IGNORECASE)
620 parser.entries = parser.filter(regex) 847 for entry in parser.entries:
848 try:
849 pattern.match(entry).group()
850 return True
851 except:
852 # No match, continue with next entry
853 continue
854 return False
621 855
622 # If date is given, retrieve the subset of builds on that date 856 def get_build_info_for_index(self, build_index=None):
623 if self.date is not None: 857 url = urljoin(self.base_url, self.build_list_regex)
858
859 self.logger.info('Retrieving list of builds from %s' % url)
860 parser = self._create_directory_parser(url)
861 parser.entries = parser.filter(r'^\d+$')
862
863 if self.timestamp:
864 # If a timestamp is given, retrieve the folder with the timestamp
865 # as name
866 parser.entries = self.timestamp in parser.entries and \
867 [self.timestamp]
868
869 elif self.date:
870 # If date is given, retrieve the subset of builds on that date
624 parser.entries = filter(self.date_matches, parser.entries) 871 parser.entries = filter(self.date_matches, parser.entries)
625 872
626 if not parser.entries: 873 if not parser.entries:
627 message = 'No builds have been found' 874 message = 'No builds have been found'
628 raise NotFoundException(message, url) 875 raise errors.NotFoundError(message, url)
876
877 self.show_matching_builds(parser.entries)
629 878
630 # If no index has been given, set it to the last build of the day. 879 # If no index has been given, set it to the last build of the day.
631 if build_index is None: 880 if build_index is None:
632 build_index = len(parser.entries) - 1 881 # Find the most recent non-empty entry.
882 build_index = len(parser.entries)
883 for build in reversed(parser.entries):
884 build_index -= 1
885 if not build_index or self.is_build_dir(build):
886 break
887
888 self.logger.info('Selected build: %s' % parser.entries[build_index])
633 889
634 return (parser.entries, build_index) 890 return (parser.entries, build_index)
635 891
636
637 @property 892 @property
638 def path_regex(self): 893 def path_regex(self):
639 """Return the regex for the path""" 894 """Return the regex for the path to the build folder"""
640 895
641 if self.locale_build: 896 if self.locale_build:
642 return self.build_list_regex 897 return self.build_list_regex
643 898
644 return '/'.join([self.build_list_regex, self.builds[self.build_index]]) 899 return '%s/' % urljoin(self.build_list_regex, self.builds[self.build_ind ex])
645
646 900
647 @property 901 @property
648 def platform_regex(self): 902 def platform_regex(self):
649 """Return the platform fragment of the URL""" 903 """Return the platform fragment of the URL"""
650 904
651 PLATFORM_FRAGMENTS = {'linux': 'linux', 905 PLATFORM_FRAGMENTS = {'linux': 'linux',
652 'linux64': 'linux64', 906 'linux64': 'linux64',
653 'mac': 'macosx', 907 'mac': 'macosx64',
654 'mac64': 'macosx64', 908 'mac64': 'macosx64',
655 'win32': 'win32', 909 'win32': 'win32',
656 'win64': 'win64'} 910 'win64': 'win64'}
657 911
658 return PLATFORM_FRAGMENTS[self.platform] 912 return PLATFORM_FRAGMENTS[self.platform]
659 913
660 914
661 def cli(): 915 class TryScraper(Scraper):
662 """Main function for the downloader""" 916 "Class to download a try build from the Mozilla server."
663 917
664 BUILD_TYPES = {'release': ReleaseScraper, 918 def __init__(self, changeset=None, debug_build=False, *args, **kwargs):
665 'candidate': ReleaseCandidateScraper,
666 'daily': DailyScraper,
667 'tinderbox': TinderboxScraper }
668 919
669 usage = 'usage: %prog [options]' 920 self.debug_build = debug_build
670 parser = OptionParser(usage=usage, description=__doc__) 921 self.changeset = changeset
671 parser.add_option('--application', '-a',
672 dest='application',
673 choices=APPLICATIONS,
674 default='firefox',
675 metavar='APPLICATION',
676 help='The name of the application to download, '
677 'default: "%default"')
678 parser.add_option('--directory', '-d',
679 dest='directory',
680 default=os.getcwd(),
681 metavar='DIRECTORY',
682 help='Target directory for the download, default: '
683 'current working directory')
684 parser.add_option('--build-number',
685 dest='build_number',
686 default=None,
687 type="int",
688 metavar='BUILD_NUMBER',
689 help='Number of the build (for candidate, daily, '
690 'and tinderbox builds)')
691 parser.add_option('--locale', '-l',
692 dest='locale',
693 default='en-US',
694 metavar='LOCALE',
695 help='Locale of the application, default: "%default"')
696 parser.add_option('--platform', '-p',
697 dest='platform',
698 choices=PLATFORM_FRAGMENTS.keys(),
699 metavar='PLATFORM',
700 help='Platform of the application')
701 parser.add_option('--type', '-t',
702 dest='type',
703 choices=BUILD_TYPES.keys(),
704 default='release',
705 metavar='BUILD_TYPE',
706 help='Type of build to download, default: "%default"')
707 parser.add_option('--url',
708 dest='url',
709 default=None,
710 metavar='URL',
711 help='URL to download.')
712 parser.add_option('--version', '-v',
713 dest='version',
714 metavar='VERSION',
715 help='Version of the application to be used by release and \
716 candidate builds, i.e. "3.6"')
717 parser.add_option('--extension',
718 dest='extension',
719 default=None,
720 metavar='EXTENSION',
721 help='File extension of the build (e.g. "zip"), default:\
722 the standard build extension on the platform.')
723 parser.add_option('--username',
724 dest='username',
725 default=None,
726 metavar='USERNAME',
727 help='Username for basic HTTP authentication.')
728 parser.add_option('--password',
729 dest='password',
730 default=None,
731 metavar='PASSWORD',
732 help='Password for basic HTTP authentication.')
733 parser.add_option('--retry-attempts',
734 dest='retry_attempts',
735 default=3,
736 type=int,
737 metavar='RETRY_ATTEMPTS',
738 help='Number of times the download will be attempted in '
739 'the event of a failure, default: %default')
740 parser.add_option('--retry-delay',
741 dest='retry_delay',
742 default=10,
743 type=int,
744 metavar='RETRY_DELAY',
745 help='Amount of time (in seconds) to wait between retry '
746 'attempts, default: %default')
747 922
748 # Option group for candidate builds 923 Scraper.__init__(self, *args, **kwargs)
749 group = OptionGroup(parser, "Candidate builds",
750 "Extra options for candidate builds.")
751 group.add_option('--no-unsigned',
752 dest='no_unsigned',
753 action="store_true",
754 help="Don't allow to download unsigned builds if signed\
755 builds are not available")
756 parser.add_option_group(group)
757 924
758 # Option group for daily builds 925 def get_build_info(self):
759 group = OptionGroup(parser, "Daily builds", 926 "Defines additional build information"
760 "Extra options for daily builds.")
761 group.add_option('--branch',
762 dest='branch',
763 default='mozilla-central',
764 metavar='BRANCH',
765 help='Name of the branch, default: "%default"')
766 group.add_option('--build-id',
767 dest='build_id',
768 default=None,
769 metavar='BUILD_ID',
770 help='ID of the build to download')
771 group.add_option('--date',
772 dest='date',
773 default=None,
774 metavar='DATE',
775 help='Date of the build, default: latest build')
776 parser.add_option_group(group)
777 927
778 # Option group for tinderbox builds 928 self.builds, self.build_index = self.get_build_info_for_index()
779 group = OptionGroup(parser, "Tinderbox builds",
780 "Extra options for tinderbox builds.")
781 group.add_option('--debug-build',
782 dest='debug_build',
783 action="store_true",
784 help="Download a debug build")
785 parser.add_option_group(group)
786 929
787 # TODO: option group for nightly builds 930 @property
788 (options, args) = parser.parse_args() 931 def binary_regex(self):
932 """Return the regex for the binary"""
789 933
790 # Check for required options and arguments 934 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s'
791 # Note: Will be optional when ini file support has been landed 935 regex_suffix = {'linux': r'.*\.%(EXT)s$',
792 if not options.url \ 936 'linux64': r'.*\.%(EXT)s$',
793 and not options.type in ['daily', 'tinderbox'] \ 937 'mac': r'.*\.%(EXT)s$',
794 and not options.version: 938 'mac64': r'.*\.%(EXT)s$',
795 parser.error('The version of the application to download has not been sp ecified.') 939 'win32': r'.*(\.installer%(STUB)s)\.%(EXT)s$',
940 'win64': r'.*(\.installer%(STUB)s)\.%(EXT)s$'}
796 941
797 # Instantiate scraper and download the build 942 regex = regex_base_name + regex_suffix[self.platform]
798 scraper_keywords = {'application': options.application,
799 'locale': options.locale,
800 'platform': options.platform,
801 'version': options.version,
802 'directory': options.directory,
803 'extension': options.extension,
804 'authentication': {
805 'username': options.username,
806 'password': options.password},
807 'retry_attempts': options.retry_attempts,
808 'retry_delay': options.retry_delay}
809 scraper_options = {'candidate': {
810 'build_number': options.build_number,
811 'no_unsigned': options.no_unsigned},
812 'daily': {
813 'branch': options.branch,
814 'build_number': options.build_number,
815 'build_id': options.build_id,
816 'date': options.date},
817 'tinderbox': {
818 'branch': options.branch,
819 'build_number': options.build_number,
820 'date': options.date,
821 'debug_build': options.debug_build}
822 }
823 943
824 kwargs = scraper_keywords.copy() 944 return regex % {'APP': self.application,
825 kwargs.update(scraper_options.get(options.type, {})) 945 'LOCALE': self.locale,
946 'PLATFORM': PLATFORM_FRAGMENTS[self.platform],
947 'STUB': '-stub' if self.is_stub_installer else '',
948 'EXT': self.extension}
826 949
827 if options.url: 950 def build_filename(self, binary):
828 build = DirectScraper(options.url, **kwargs) 951 """Return the proposed filename with extension for the binary"""
829 else:
830 build = BUILD_TYPES[options.type](**kwargs)
831 952
832 build.download() 953 return '%(CHANGESET)s%(DEBUG)s-%(NAME)s' % {
954 'CHANGESET': self.changeset,
955 'DEBUG': '-debug' if self.debug_build else '',
956 'NAME': binary}
833 957
834 if __name__ == "__main__": 958 @property
835 cli() 959 def build_list_regex(self):
960 """Return the regex for the folder which contains the list of builds"""
961
962 return 'try-builds/'
963
964 def detect_platform(self):
965 """Detect the current platform"""
966
967 platform = Scraper.detect_platform(self)
968
969 # On OS X we have to special case the platform detection code and
970 # fallback to 64 bit builds for the en-US locale
971 if mozinfo.os == 'mac' and self.locale == 'en-US' and \
972 mozinfo.bits == 64:
973 platform = "%s%d" % (mozinfo.os, mozinfo.bits)
974
975 return platform
976
977 def get_build_info_for_index(self, build_index=None):
978 url = urljoin(self.base_url, self.build_list_regex)
979
980 self.logger.info('Retrieving list of builds from %s' % url)
981 parser = self._create_directory_parser(url)
982 parser.entries = parser.filter('.*-%s$' % self.changeset)
983
984 if not parser.entries:
985 raise errors.NotFoundError('No builds have been found', url)
986
987 self.show_matching_builds(parser.entries)
988
989 self.logger.info('Selected build: %s' % parser.entries[0])
990
991 return (parser.entries, 0)
992
993 @property
994 def path_regex(self):
995 """Return the regex for the path to the build folder"""
996
997 build_dir = 'try-%(PLATFORM)s%(DEBUG)s/' % {
998 'PLATFORM': self.platform_regex,
999 'DEBUG': '-debug' if self.debug_build else ''}
1000 return urljoin(self.build_list_regex,
1001 self.builds[self.build_index],
1002 build_dir)
1003
1004 @property
1005 def platform_regex(self):
1006 """Return the platform fragment of the URL"""
1007
1008 PLATFORM_FRAGMENTS = {'linux': 'linux',
1009 'linux64': 'linux64',
1010 'mac': 'macosx64',
1011 'mac64': 'macosx64',
1012 'win32': 'win32',
1013 'win64': 'win64'}
1014
1015 return PLATFORM_FRAGMENTS[self.platform]
OLDNEW
« no previous file with comments | « mozdownload/parser.py ('k') | mozdownload/timezones.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698