| Index: mozdownload/scraper.py
|
| diff --git a/mozdownload/scraper.py b/mozdownload/scraper.py
|
| index 9011cab26c1801ea9804caff62fc60a8b812eee3..9a68b2aec9d063a5c3d39d92e7ce3eba4810e7ee 100755
|
| --- a/mozdownload/scraper.py
|
| +++ b/mozdownload/scraper.py
|
| @@ -1,211 +1,344 @@
|
| -#!/usr/bin/env python
|
| -
|
| # This Source Code Form is subject to the terms of the Mozilla Public
|
| # License, v. 2.0. If a copy of the MPL was not distributed with this
|
| # file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
| -"""Module to handle downloads for different types of Firefox and Thunderbird builds."""
|
| -
|
| -
|
| from datetime import datetime
|
| -from optparse import OptionParser, OptionGroup
|
| +import logging
|
| import os
|
| import re
|
| +import requests
|
| import sys
|
| import time
|
| import urllib
|
| -import urllib2
|
| +from urlparse import urlparse
|
|
|
| import mozinfo
|
|
|
| +import errors
|
| +
|
| from parser import DirectoryParser
|
| from timezones import PacificTimezone
|
| +from utils import urljoin
|
|
|
|
|
| -APPLICATIONS = ['b2g', 'firefox', 'thunderbird']
|
| +APPLICATIONS = ('b2g', 'firefox', 'fennec', 'thunderbird')
|
| +
|
| +# Some applications contain all locales in a single build
|
| +APPLICATIONS_MULTI_LOCALE = ('b2g', 'fennec')
|
| +
|
| +# Used if the application is named differently than the subfolder on the server
|
| +APPLICATIONS_TO_FTP_DIRECTORY = {'fennec': 'mobile'}
|
|
|
| # Base URL for the path to all builds
|
| -BASE_URL = 'https://ftp.mozilla.org/pub/mozilla.org'
|
| +BASE_URL = 'https://archive.mozilla.org/pub/'
|
|
|
| -PLATFORM_FRAGMENTS = {'linux': 'linux-i686',
|
| - 'linux64': 'linux-x86_64',
|
| - 'mac': 'mac',
|
| - 'mac64': 'mac64',
|
| - 'win32': 'win32',
|
| - 'win64': 'win64-x86_64'}
|
| +# Chunk size when downloading a file
|
| +CHUNK_SIZE = 16 * 1024
|
|
|
| -DEFAULT_FILE_EXTENSIONS = {'linux': 'tar.bz2',
|
| +DEFAULT_FILE_EXTENSIONS = {'android-api-9': 'apk',
|
| + 'android-api-11': 'apk',
|
| + 'android-x86': 'apk',
|
| + 'linux': 'tar.bz2',
|
| 'linux64': 'tar.bz2',
|
| 'mac': 'dmg',
|
| 'mac64': 'dmg',
|
| 'win32': 'exe',
|
| 'win64': 'exe'}
|
|
|
| -class NotFoundException(Exception):
|
| - """Exception for a resource not being found (e.g. no logs)"""
|
| - def __init__(self, message, location):
|
| - self.location = location
|
| - Exception.__init__(self, ': '.join([message, location]))
|
| +PLATFORM_FRAGMENTS = {'android-api-9': r'android-arm',
|
| + 'android-api-11': r'android-arm',
|
| + 'android-x86': r'android-i386',
|
| + 'linux': r'linux-i686',
|
| + 'linux64': r'linux-x86_64',
|
| + 'mac': r'mac',
|
| + 'mac64': r'mac(64)?',
|
| + 'win32': r'win32',
|
| + 'win64': r'win64(-x86_64)?'}
|
|
|
|
|
| class Scraper(object):
|
| """Generic class to download an application from the Mozilla server"""
|
|
|
| - def __init__(self, directory, version, platform=None,
|
| - application='firefox', locale='en-US', extension=None,
|
| - authentication=None, retry_attempts=3, retry_delay=10):
|
| + def __init__(self, destination=None, platform=None,
|
| + application='firefox', locale=None, extension=None,
|
| + username=None, password=None,
|
| + retry_attempts=0, retry_delay=10.,
|
| + is_stub_installer=False, timeout=None,
|
| + log_level='INFO',
|
| + base_url=BASE_URL):
|
|
|
| # Private properties for caching
|
| - self._target = None
|
| + self._filename = None
|
| self._binary = None
|
|
|
| - self.directory = directory
|
| - self.locale = locale
|
| + self.destination = destination or os.getcwd()
|
| +
|
| + if not locale:
|
| + if application in APPLICATIONS_MULTI_LOCALE:
|
| + self.locale = 'multi'
|
| + else:
|
| + self.locale = 'en-US'
|
| + else:
|
| + self.locale = locale
|
| +
|
| self.platform = platform or self.detect_platform()
|
| - self.version = version
|
| - self.extension = extension or DEFAULT_FILE_EXTENSIONS[self.platform]
|
| - self.authentication = authentication
|
| +
|
| + self.session = requests.Session()
|
| + if (username, password) != (None, None):
|
| + self.session.auth = (username, password)
|
| +
|
| self.retry_attempts = retry_attempts
|
| self.retry_delay = retry_delay
|
| + self.is_stub_installer = is_stub_installer
|
| + self.timeout_download = timeout
|
| + # this is the timeout used in requests.get. Unlike "auth",
|
| + # it does not work if we attach it on the session, so we handle
|
| + # it independently.
|
| + self.timeout_network = 60.
|
| +
|
| + logging.basicConfig(format=' %(levelname)s | %(message)s')
|
| + self.logger = logging.getLogger(self.__module__)
|
| + self.logger.setLevel(log_level)
|
|
|
| # build the base URL
|
| self.application = application
|
| - self.base_url = '/'.join([BASE_URL, self.application])
|
| + self.base_url = '%s/' % urljoin(
|
| + base_url,
|
| + APPLICATIONS_TO_FTP_DIRECTORY.get(self.application, self.application)
|
| + )
|
|
|
| + if extension:
|
| + self.extension = extension
|
| + else:
|
| + if self.application in APPLICATIONS_MULTI_LOCALE and \
|
| + self.platform in ('win32', 'win64'):
|
| + # builds for APPLICATIONS_MULTI_LOCALE only exist in zip
|
| + self.extension = 'zip'
|
| + else:
|
| + self.extension = DEFAULT_FILE_EXTENSIONS[self.platform]
|
| +
|
| + attempt = 0
|
| + while True:
|
| + attempt += 1
|
| + try:
|
| + self.get_build_info()
|
| + break
|
| + except (errors.NotFoundError, requests.exceptions.RequestException), e:
|
| + if self.retry_attempts > 0:
|
| + # Log only if multiple attempts are requested
|
| + self.logger.warning("Build not found: '%s'" % e.message)
|
| + self.logger.info('Will retry in %s seconds...' %
|
| + (self.retry_delay))
|
| + time.sleep(self.retry_delay)
|
| + self.logger.info("Retrying... (attempt %s)" % attempt)
|
| +
|
| + if attempt >= self.retry_attempts:
|
| + if hasattr(e, 'response') and \
|
| + e.response.status_code == 404:
|
| + message = "Specified build has not been found"
|
| + raise errors.NotFoundError(message, e.response.url)
|
| + else:
|
| + raise
|
| +
|
| + def _create_directory_parser(self, url):
|
| + return DirectoryParser(url,
|
| + session=self.session,
|
| + timeout=self.timeout_network)
|
|
|
| @property
|
| def binary(self):
|
| """Return the name of the build"""
|
|
|
| - if self._binary is None:
|
| - # Retrieve all entries from the remote virtual folder
|
| - parser = DirectoryParser(self.path)
|
| - if not parser.entries:
|
| - raise NotFoundException('No entries found', self.path)
|
| -
|
| - # Download the first matched directory entry
|
| - pattern = re.compile(self.binary_regex, re.IGNORECASE)
|
| - for entry in parser.entries:
|
| - try:
|
| - self._binary = pattern.match(entry).group()
|
| - break
|
| - except:
|
| - # No match, continue with next entry
|
| - continue
|
| -
|
| - if self._binary is None:
|
| - raise NotFoundException("Binary not found in folder", self.path)
|
| - else:
|
| - return self._binary
|
| + attempt = 0
|
|
|
| + while self._binary is None:
|
| + attempt += 1
|
| + try:
|
| + # Retrieve all entries from the remote virtual folder
|
| + parser = self._create_directory_parser(self.path)
|
| + if not parser.entries:
|
| + raise errors.NotFoundError('No entries found', self.path)
|
| +
|
| + # Download the first matched directory entry
|
| + pattern = re.compile(self.binary_regex, re.IGNORECASE)
|
| + for entry in parser.entries:
|
| + try:
|
| + self._binary = pattern.match(entry).group()
|
| + break
|
| + except:
|
| + # No match, continue with next entry
|
| + continue
|
| + else:
|
| + raise errors.NotFoundError("Binary not found in folder",
|
| + self.path)
|
| + except (errors.NotFoundError, requests.exceptions.RequestException), e:
|
| + if self.retry_attempts > 0:
|
| + # Log only if multiple attempts are requested
|
| + self.logger.warning("Build not found: '%s'" % e.message)
|
| + self.logger.info('Will retry in %s seconds...' %
|
| + (self.retry_delay))
|
| + time.sleep(self.retry_delay)
|
| + self.logger.info("Retrying... (attempt %s)" % attempt)
|
| +
|
| + if attempt >= self.retry_attempts:
|
| + if hasattr(e, 'response') and \
|
| + e.response.status_code == 404:
|
| + message = "Specified build has not been found"
|
| + raise errors.NotFoundError(message, self.path)
|
| + else:
|
| + raise
|
| +
|
| + return self._binary
|
|
|
| @property
|
| def binary_regex(self):
|
| """Return the regex for the binary filename"""
|
|
|
| - raise NotImplementedError(sys._getframe(0).f_code.co_name)
|
| -
|
| + raise errors.NotImplementedError(sys._getframe(0).f_code.co_name)
|
|
|
| @property
|
| - def final_url(self):
|
| - """Return the final URL of the build"""
|
| -
|
| - return '/'.join([self.path, self.binary])
|
| + def url(self):
|
| + """Return the URL of the build"""
|
|
|
| + return urljoin(self.path, self.binary)
|
|
|
| @property
|
| def path(self):
|
| - """Return the path to the build"""
|
| -
|
| - return '/'.join([self.base_url, self.path_regex])
|
| + """Return the path to the build folder"""
|
|
|
| + return urljoin(self.base_url, self.path_regex)
|
|
|
| @property
|
| def path_regex(self):
|
| - """Return the regex for the path to the build"""
|
| -
|
| - raise NotImplementedError(sys._getframe(0).f_code.co_name)
|
| + """Return the regex for the path to the build folder"""
|
|
|
| + raise errors.NotImplementedError(sys._getframe(0).f_code.co_name)
|
|
|
| @property
|
| def platform_regex(self):
|
| """Return the platform fragment of the URL"""
|
|
|
| - return PLATFORM_FRAGMENTS[self.platform];
|
| -
|
| + return PLATFORM_FRAGMENTS[self.platform]
|
|
|
| @property
|
| - def target(self):
|
| - """Return the target file name of the build"""
|
| + def filename(self):
|
| + """Return the local filename of the build"""
|
| +
|
| + if self._filename is None:
|
| + if os.path.splitext(self.destination)[1]:
|
| + # If the filename has been given make use of it
|
| + target_file = self.destination
|
| + else:
|
| + # Otherwise create it from the build details
|
| + target_file = os.path.join(self.destination,
|
| + self.build_filename(self.binary))
|
| +
|
| + self._filename = os.path.abspath(target_file)
|
|
|
| - if self._target is None:
|
| - self._target = os.path.join(self.directory,
|
| - self.build_filename(self.binary))
|
| - return self._target
|
| + return self._filename
|
|
|
| + def get_build_info(self):
|
| + """Returns additional build information in subclasses if necessary"""
|
| + pass
|
|
|
| def build_filename(self, binary):
|
| """Return the proposed filename with extension for the binary"""
|
|
|
| - raise NotImplementedError(sys._getframe(0).f_code.co_name)
|
| -
|
| + raise errors.NotImplementedError(sys._getframe(0).f_code.co_name)
|
|
|
| def detect_platform(self):
|
| """Detect the current platform"""
|
|
|
| # For Mac and Linux 32bit we do not need the bits appended
|
| - if mozinfo.os == 'mac' or (mozinfo.os == 'linux' and mozinfo.bits == 32):
|
| + if mozinfo.os == 'mac' or \
|
| + (mozinfo.os == 'linux' and mozinfo.bits == 32):
|
| return mozinfo.os
|
| else:
|
| return "%s%d" % (mozinfo.os, mozinfo.bits)
|
|
|
| -
|
| def download(self):
|
| """Download the specified file"""
|
|
|
| - attempts = 0
|
| + def total_seconds(td):
|
| + # Keep backward compatibility with Python 2.6 which doesn't have
|
| + # this method
|
| + if hasattr(td, 'total_seconds'):
|
| + return td.total_seconds()
|
| + else:
|
| + return (td.microseconds +
|
| + (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 10 ** 6
|
|
|
| - if not os.path.isdir(self.directory):
|
| - os.makedirs(self.directory)
|
| + attempt = 0
|
|
|
| # Don't re-download the file
|
| - if os.path.isfile(os.path.abspath(self.target)):
|
| - print "File has already been downloaded: %s" % (self.target)
|
| - return
|
| -
|
| - print 'Downloading from: %s' % (urllib.unquote(self.final_url))
|
| - tmp_file = self.target + ".part"
|
| -
|
| - if self.authentication \
|
| - and self.authentication['username'] \
|
| - and self.authentication['password']:
|
| - password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
|
| - password_mgr.add_password(None,
|
| - self.final_url,
|
| - self.authentication['username'],
|
| - self.authentication['password'])
|
| - handler = urllib2.HTTPBasicAuthHandler(password_mgr)
|
| - opener = urllib2.build_opener(urllib2.HTTPHandler, handler)
|
| - urllib2.install_opener(opener)
|
| + if os.path.isfile(os.path.abspath(self.filename)):
|
| + self.logger.info("File has already been downloaded: %s" %
|
| + (self.filename))
|
| + return self.filename
|
| +
|
| + directory = os.path.dirname(self.filename)
|
| + if not os.path.isdir(directory):
|
| + os.makedirs(directory)
|
| +
|
| + self.logger.info('Downloading from: %s' %
|
| + (urllib.unquote(self.url)))
|
| + self.logger.info('Saving as: %s' % self.filename)
|
| +
|
| + tmp_file = self.filename + ".part"
|
|
|
| while True:
|
| - attempts += 1
|
| + attempt += 1
|
| try:
|
| - r = urllib2.urlopen(self.final_url)
|
| - CHUNK = 16 * 1024
|
| + start_time = datetime.now()
|
| +
|
| + # Enable streaming mode so we can download content in chunks
|
| + r = self.session.get(self.url, stream=True)
|
| + r.raise_for_status()
|
| +
|
| + content_length = r.headers.get('Content-length')
|
| + # ValueError: Value out of range if only total_size given
|
| + if content_length:
|
| + total_size = int(content_length.strip())
|
| + max_value = ((total_size / CHUNK_SIZE) + 1) * CHUNK_SIZE
|
| +
|
| + bytes_downloaded = 0
|
| +
|
| with open(tmp_file, 'wb') as f:
|
| - for chunk in iter(lambda: r.read(CHUNK), ''):
|
| + for chunk in iter(lambda: r.raw.read(CHUNK_SIZE), ''):
|
| f.write(chunk)
|
| + bytes_downloaded += CHUNK_SIZE
|
| +
|
| + t1 = total_seconds(datetime.now() - start_time)
|
| + if self.timeout_download and \
|
| + t1 >= self.timeout_download:
|
| + raise errors.TimeoutError
|
| break
|
| - except (urllib2.HTTPError, urllib2.URLError):
|
| + except (requests.exceptions.RequestException, errors.TimeoutError), e:
|
| if tmp_file and os.path.isfile(tmp_file):
|
| os.remove(tmp_file)
|
| - print 'Download failed! Retrying... (attempt %s)' % attempts
|
| - if attempts >= self.retry_attempts:
|
| + if self.retry_attempts > 0:
|
| + # Log only if multiple attempts are requested
|
| + self.logger.warning('Download failed: "%s"' % str(e))
|
| + self.logger.info('Will retry in %s seconds...' %
|
| + (self.retry_delay))
|
| + time.sleep(self.retry_delay)
|
| + self.logger.info("Retrying... (attempt %s)" % attempt)
|
| + if attempt >= self.retry_attempts:
|
| raise
|
| time.sleep(self.retry_delay)
|
|
|
| - os.rename(tmp_file, self.target)
|
| + os.rename(tmp_file, self.filename)
|
| +
|
| + return self.filename
|
| +
|
| + def show_matching_builds(self, builds):
|
| + """Output the matching builds"""
|
| + self.logger.info('Found %s build%s: %s' % (
|
| + len(builds),
|
| + len(builds) > 1 and 's' or '',
|
| + len(builds) > 10 and
|
| + ' ... '.join([', '.join(builds[:5]), ', '.join(builds[-5:])]) or
|
| + ', '.join(builds)))
|
|
|
|
|
| class DailyScraper(Scraper):
|
| @@ -214,94 +347,160 @@ class DailyScraper(Scraper):
|
| def __init__(self, branch='mozilla-central', build_id=None, date=None,
|
| build_number=None, *args, **kwargs):
|
|
|
| - Scraper.__init__(self, *args, **kwargs)
|
| self.branch = branch
|
| + self.build_id = build_id
|
| + self.date = date
|
| + self.build_number = build_number
|
| +
|
| + Scraper.__init__(self, *args, **kwargs)
|
| +
|
| + def get_build_info(self):
|
| + """Defines additional build information"""
|
|
|
| # Internally we access builds via index
|
| - if build_number is not None:
|
| - self.build_index = int(build_number) - 1
|
| + if self.build_number is not None:
|
| + self.build_index = int(self.build_number) - 1
|
| else:
|
| self.build_index = None
|
|
|
| - if build_id:
|
| - # A build id has been specified. Split up its components so the date
|
| - # and time can be extracted: '20111212042025' -> '2011-12-12 04:20:25'
|
| - self.date = datetime.strptime(build_id, '%Y%m%d%H%M%S')
|
| - self.builds, self.build_index = self.get_build_info_for_date(self.date,
|
| - has_time=True)
|
| + if self.build_id:
|
| + # A build id has been specified. Split up its components so the
|
| + # date and time can be extracted:
|
| + # '20111212042025' -> '2011-12-12 04:20:25'
|
| + self.date = datetime.strptime(self.build_id, '%Y%m%d%H%M%S')
|
|
|
| - elif date:
|
| + elif self.date:
|
| # A date (without time) has been specified. Use its value and the
|
| # build index to find the requested build for that day.
|
| - self.date = datetime.strptime(date, '%Y-%m-%d')
|
| - self.builds, self.build_index = self.get_build_info_for_date(self.date,
|
| - build_index=self.build_index)
|
| -
|
| + try:
|
| + self.date = datetime.strptime(self.date, '%Y-%m-%d')
|
| + except:
|
| + raise ValueError('%s is not a valid date' % self.date)
|
| else:
|
| - # If no build id nor date have been specified the lastest available
|
| + # If no build id nor date have been specified the latest available
|
| # build of the given branch has to be identified. We also have to
|
| # retrieve the date of the build via its build id.
|
| - url = '%s/nightly/latest-%s/' % (self.base_url, self.branch)
|
| -
|
| - print 'Retrieving the build status file from %s' % url
|
| - parser = DirectoryParser(url)
|
| - parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex)
|
| - if not parser.entries:
|
| - message = 'Status file for %s build cannot be found' % self.platform_regex
|
| - raise NotFoundException(message, url)
|
| -
|
| - # Read status file for the platform, retrieve build id, and convert to a date
|
| - status_file = url + parser.entries[-1]
|
| - f = urllib.urlopen(status_file)
|
| - self.date = datetime.strptime(f.readline().strip(), '%Y%m%d%H%M%S')
|
| - self.builds, self.build_index = self.get_build_info_for_date(self.date,
|
| - has_time=True)
|
| -
|
| -
|
| - def get_build_info_for_date(self, date, has_time=False, build_index=None):
|
| - url = '/'.join([self.base_url, self.monthly_build_list_regex])
|
| -
|
| - print 'Retrieving list of builds from %s' % url
|
| - parser = DirectoryParser(url)
|
| - regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s$' % {
|
| - 'DATE': date.strftime('%Y-%m-%d'),
|
| - 'BRANCH': self.branch,
|
| - 'L10N': '' if self.locale == 'en-US' else '-l10n'}
|
| - parser.entries = parser.filter(regex)
|
| + self.date = self.get_latest_build_date()
|
| +
|
| + self.builds, self.build_index = self.get_build_info_for_date(
|
| + self.date, self.build_index)
|
| +
|
| + def get_latest_build_date(self):
|
| + """ Returns date of latest available nightly build."""
|
| + if self.application not in ('fennec'):
|
| + url = urljoin(self.base_url, 'nightly', 'latest-%s/' % self.branch)
|
| + else:
|
| + url = urljoin(self.base_url, 'nightly', 'latest-%s-%s/' %
|
| + (self.branch, self.platform))
|
| +
|
| + self.logger.info('Retrieving the build status file from %s' % url)
|
| + parser = self._create_directory_parser(url)
|
| + parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex)
|
| if not parser.entries:
|
| - message = 'Folder for builds on %s has not been found' % self.date.strftime('%Y-%m-%d')
|
| - raise NotFoundException(message, url)
|
| + message = 'Status file for %s build cannot be found' % \
|
| + self.platform_regex
|
| + raise errors.NotFoundError(message, url)
|
| +
|
| + # Read status file for the platform, retrieve build id,
|
| + # and convert to a date
|
| + headers = {'Cache-Control': 'max-age=0'}
|
| +
|
| + r = self.session.get(url + parser.entries[-1], headers=headers)
|
| + try:
|
| + r.raise_for_status()
|
| +
|
| + return datetime.strptime(r.text.split('\n')[0], '%Y%m%d%H%M%S')
|
| + finally:
|
| + r.close()
|
| +
|
| + def is_build_dir(self, folder_name):
|
| + """Return whether or not the given dir contains a build."""
|
| +
|
| + # Cannot move up to base scraper due to parser.entries call in
|
| + # get_build_info_for_date (see below)
|
| +
|
| + url = '%s/' % urljoin(self.base_url, self.monthly_build_list_regex, folder_name)
|
| + if self.application in APPLICATIONS_MULTI_LOCALE \
|
| + and self.locale != 'multi':
|
| + url = '%s/' % urljoin(url, self.locale)
|
| +
|
| + parser = self._create_directory_parser(url)
|
| +
|
| + pattern = re.compile(self.binary_regex, re.IGNORECASE)
|
| + for entry in parser.entries:
|
| + try:
|
| + pattern.match(entry).group()
|
| + return True
|
| + except:
|
| + # No match, continue with next entry
|
| + continue
|
| + return False
|
| +
|
| + def get_build_info_for_date(self, date, build_index=None):
|
| + url = urljoin(self.base_url, self.monthly_build_list_regex)
|
| + has_time = date and date.time()
|
| +
|
| + self.logger.info('Retrieving list of builds from %s' % url)
|
| + parser = self._create_directory_parser(url)
|
| + regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s%(PLATFORM)s$' % {
|
| + 'DATE': date.strftime('%Y-%m-%d'),
|
| + 'BRANCH': self.branch,
|
| + # ensure to select the correct subfolder for localized builds
|
| + 'L10N': '' if self.locale in ('en-US', 'multi') else '(-l10n)?',
|
| + 'PLATFORM': '' if self.application not in (
|
| + 'fennec') else '-' + self.platform
|
| + }
|
| +
|
| + parser.entries = parser.filter(regex)
|
| + parser.entries = parser.filter(self.is_build_dir)
|
|
|
| if has_time:
|
| - # If a time is included in the date, use it to determine the build's index
|
| + # If a time is included in the date, use it to determine the
|
| + # build's index
|
| regex = r'.*%s.*' % date.strftime('%H-%M-%S')
|
| - build_index = parser.entries.index(parser.filter(regex)[0])
|
| - else:
|
| - # If no index has been given, set it to the last build of the day.
|
| - if build_index is None:
|
| - build_index = len(parser.entries) - 1
|
| + parser.entries = parser.filter(regex)
|
|
|
| - return (parser.entries, build_index)
|
| + if not parser.entries:
|
| + date_format = '%Y-%m-%d-%H-%M-%S' if has_time else '%Y-%m-%d'
|
| + message = 'Folder for builds on %s has not been found' % \
|
| + self.date.strftime(date_format)
|
| + raise errors.NotFoundError(message, url)
|
| +
|
| + # If no index has been given, set it to the last build of the day.
|
| + self.show_matching_builds(parser.entries)
|
| + # If no index has been given, set it to the last build of the day.
|
| + if build_index is None:
|
| + # Find the most recent non-empty entry.
|
| + build_index = len(parser.entries)
|
| + for build in reversed(parser.entries):
|
| + build_index -= 1
|
| + if not build_index or self.is_build_dir(build):
|
| + break
|
| + self.logger.info('Selected build: %s' % parser.entries[build_index])
|
|
|
| + return (parser.entries, build_index)
|
|
|
| @property
|
| def binary_regex(self):
|
| """Return the regex for the binary"""
|
|
|
| regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s'
|
| - regex_suffix = {'linux': r'\.%(EXT)s$',
|
| + regex_suffix = {'android-api-9': r'\.%(EXT)s$',
|
| + 'android-api-11': r'\.%(EXT)s$',
|
| + 'android-x86': r'\.%(EXT)s$',
|
| + 'linux': r'\.%(EXT)s$',
|
| 'linux64': r'\.%(EXT)s$',
|
| 'mac': r'\.%(EXT)s$',
|
| 'mac64': r'\.%(EXT)s$',
|
| - 'win32': r'(\.installer)\.%(EXT)s$',
|
| - 'win64': r'(\.installer)\.%(EXT)s$'}
|
| + 'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$',
|
| + 'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'}
|
| regex = regex_base_name + regex_suffix[self.platform]
|
|
|
| return regex % {'APP': self.application,
|
| 'LOCALE': self.locale,
|
| 'PLATFORM': self.platform_regex,
|
| - 'EXT': self.extension}
|
| -
|
| + 'EXT': self.extension,
|
| + 'STUB': '-stub' if self.is_stub_installer else ''}
|
|
|
| def build_filename(self, binary):
|
| """Return the proposed filename with extension for the binary"""
|
| @@ -315,53 +514,69 @@ class DailyScraper(Scraper):
|
| timestamp = self.date.strftime('%Y-%m-%d')
|
|
|
| return '%(TIMESTAMP)s-%(BRANCH)s-%(NAME)s' % {
|
| - 'TIMESTAMP': timestamp,
|
| - 'BRANCH': self.branch,
|
| - 'NAME': binary}
|
| -
|
| + 'TIMESTAMP': timestamp,
|
| + 'BRANCH': self.branch,
|
| + 'NAME': binary}
|
|
|
| @property
|
| def monthly_build_list_regex(self):
|
| - """Return the regex for the folder which contains the builds of a month."""
|
| + """Return the regex for the folder containing builds of a month."""
|
|
|
| # Regex for possible builds for the given date
|
| return r'nightly/%(YEAR)s/%(MONTH)s/' % {
|
| - 'YEAR': self.date.year,
|
| - 'MONTH': str(self.date.month).zfill(2) }
|
| -
|
| + 'YEAR': self.date.year,
|
| + 'MONTH': str(self.date.month).zfill(2)}
|
|
|
| @property
|
| def path_regex(self):
|
| - """Return the regex for the path"""
|
| + """Return the regex for the path to the build folder"""
|
|
|
| try:
|
| - return self.monthly_build_list_regex + self.builds[self.build_index]
|
| + path = '%s/' % urljoin(self.monthly_build_list_regex,
|
| + self.builds[self.build_index])
|
| + if self.application in APPLICATIONS_MULTI_LOCALE \
|
| + and self.locale != 'multi':
|
| + path = '%s/' % urljoin(path, self.locale)
|
| + return path
|
| except:
|
| - raise NotFoundException("Specified sub folder cannot be found",
|
| - self.base_url + self.monthly_build_list_regex)
|
| + folder = urljoin(self.base_url, self.monthly_build_list_regex)
|
| + raise errors.NotFoundError("Specified sub folder cannot be found",
|
| + folder)
|
|
|
|
|
| class DirectScraper(Scraper):
|
| """Class to download a file from a specified URL"""
|
|
|
| def __init__(self, url, *args, **kwargs):
|
| - Scraper.__init__(self, *args, **kwargs)
|
| + self._url = url
|
|
|
| - self.url = url
|
| + Scraper.__init__(self, *args, **kwargs)
|
|
|
| @property
|
| - def target(self):
|
| - return urllib.splitquery(self.final_url)[0].rpartition('/')[-1]
|
| + def filename(self):
|
| + if os.path.splitext(self.destination)[1]:
|
| + # If the filename has been given make use of it
|
| + target_file = self.destination
|
| + else:
|
| + # Otherwise determine it from the url.
|
| + parsed_url = urlparse(self.url)
|
| + source_filename = (parsed_url.path.rpartition('/')[-1] or
|
| + parsed_url.hostname)
|
| + target_file = os.path.join(self.destination, source_filename)
|
| +
|
| + return os.path.abspath(target_file)
|
|
|
| @property
|
| - def final_url(self):
|
| - return self.url
|
| + def url(self):
|
| + return self._url
|
|
|
|
|
| class ReleaseScraper(Scraper):
|
| """Class to download a release build from the Mozilla server"""
|
|
|
| - def __init__(self, *args, **kwargs):
|
| + def __init__(self, version, *args, **kwargs):
|
| + self.version = version
|
| +
|
| Scraper.__init__(self, *args, **kwargs)
|
|
|
| @property
|
| @@ -372,66 +587,78 @@ class ReleaseScraper(Scraper):
|
| 'linux64': r'^%(APP)s-.*\.%(EXT)s$',
|
| 'mac': r'^%(APP)s.*\.%(EXT)s$',
|
| 'mac64': r'^%(APP)s.*\.%(EXT)s$',
|
| - 'win32': r'^%(APP)s.*\.%(EXT)s$',
|
| - 'win64': r'^%(APP)s.*\.%(EXT)s$'}
|
| - return regex[self.platform] % {'APP': self.application,
|
| - 'EXT': self.extension}
|
| -
|
| + 'win32': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$',
|
| + 'win64': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$'}
|
| + return regex[self.platform] % {
|
| + 'APP': self.application,
|
| + 'EXT': self.extension,
|
| + 'STUB': 'Stub' if self.is_stub_installer else ''}
|
|
|
| @property
|
| def path_regex(self):
|
| - """Return the regex for the path"""
|
| + """Return the regex for the path to the build folder"""
|
|
|
| - regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s'
|
| + regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s/'
|
| return regex % {'LOCALE': self.locale,
|
| 'PLATFORM': self.platform_regex,
|
| 'VERSION': self.version}
|
|
|
| + @property
|
| + def platform_regex(self):
|
| + """Return the platform fragment of the URL"""
|
| +
|
| + if self.platform == 'win64':
|
| + return self.platform
|
| +
|
| + return PLATFORM_FRAGMENTS[self.platform]
|
|
|
| def build_filename(self, binary):
|
| """Return the proposed filename with extension for the binary"""
|
|
|
| - template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s.%(EXT)s'
|
| + template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s%(STUB)s' \
|
| + '.%(EXT)s'
|
| return template % {'APP': self.application,
|
| 'VERSION': self.version,
|
| 'LOCALE': self.locale,
|
| 'PLATFORM': self.platform,
|
| + 'STUB': '-stub' if self.is_stub_installer else '',
|
| 'EXT': self.extension}
|
|
|
|
|
| class ReleaseCandidateScraper(ReleaseScraper):
|
| """Class to download a release candidate build from the Mozilla server"""
|
|
|
| - def __init__(self, build_number=None, no_unsigned=False, *args, **kwargs):
|
| - Scraper.__init__(self, *args, **kwargs)
|
| -
|
| - # Internally we access builds via index
|
| - if build_number is not None:
|
| - self.build_index = int(build_number) - 1
|
| - else:
|
| - self.build_index = None
|
| -
|
| - self.builds, self.build_index = self.get_build_info_for_version(self.version, self.build_index)
|
| + def __init__(self, version, build_number=None, *args, **kwargs):
|
| + self.version = version
|
| + self.build_number = build_number
|
|
|
| - self.no_unsigned = no_unsigned
|
| - self.unsigned = False
|
| + Scraper.__init__(self, *args, **kwargs)
|
|
|
| + def get_build_info(self):
|
| + """Defines additional build information"""
|
|
|
| - def get_build_info_for_version(self, version, build_index=None):
|
| - url = '/'.join([self.base_url, self.candidate_build_list_regex])
|
| + # Internally we access builds via index
|
| + url = urljoin(self.base_url, self.candidate_build_list_regex)
|
| + self.logger.info('Retrieving list of candidate builds from %s' % url)
|
|
|
| - print 'Retrieving list of candidate builds from %s' % url
|
| - parser = DirectoryParser(url)
|
| + parser = self._create_directory_parser(url)
|
| if not parser.entries:
|
| - message = 'Folder for specific candidate builds at has not been found'
|
| - raise NotFoundException(message, url)
|
| -
|
| - # If no index has been given, set it to the last build of the given version.
|
| - if build_index is None:
|
| - build_index = len(parser.entries) - 1
|
| -
|
| - return (parser.entries, build_index)
|
| -
|
| + message = 'Folder for specific candidate builds at %s has not' \
|
| + 'been found' % url
|
| + raise errors.NotFoundError(message, url)
|
| +
|
| + self.show_matching_builds(parser.entries)
|
| + self.builds = parser.entries
|
| + self.build_index = len(parser.entries) - 1
|
| +
|
| + if self.build_number and \
|
| + ('build%s' % self.build_number) in self.builds:
|
| + self.builds = ['build%s' % self.build_number]
|
| + self.build_index = 0
|
| + self.logger.info('Selected build: build%s' % self.build_number)
|
| + else:
|
| + self.logger.info('Selected build: build%d' %
|
| + (self.build_index + 1))
|
|
|
| @property
|
| def candidate_build_list_regex(self):
|
| @@ -439,51 +666,49 @@ class ReleaseCandidateScraper(ReleaseScraper):
|
| a candidate build."""
|
|
|
| # Regex for possible builds for the given date
|
| - return r'nightly/%(VERSION)s-candidates/' % {
|
| - 'VERSION': self.version }
|
| -
|
| + return r'candidates/%(VERSION)s-candidates/' % {
|
| + 'VERSION': self.version}
|
|
|
| @property
|
| def path_regex(self):
|
| - """Return the regex for the path"""
|
| + """Return the regex for the path to the build folder"""
|
|
|
| - regex = r'%(PREFIX)s%(BUILD)s/%(UNSIGNED)s%(PLATFORM)s/%(LOCALE)s'
|
| + regex = r'%(PREFIX)s%(BUILD)s/%(PLATFORM)s/%(LOCALE)s/'
|
| return regex % {'PREFIX': self.candidate_build_list_regex,
|
| 'BUILD': self.builds[self.build_index],
|
| 'LOCALE': self.locale,
|
| - 'PLATFORM': self.platform_regex,
|
| - 'UNSIGNED': "unsigned/" if self.unsigned else ""}
|
| + 'PLATFORM': self.platform_regex}
|
| +
|
| + @property
|
| + def platform_regex(self):
|
| + """Return the platform fragment of the URL"""
|
|
|
| + if self.platform == 'win64':
|
| + return self.platform
|
| +
|
| + return PLATFORM_FRAGMENTS[self.platform]
|
|
|
| def build_filename(self, binary):
|
| """Return the proposed filename with extension for the binary"""
|
|
|
| - template = '%(APP)s-%(VERSION)s-build%(BUILD)s.%(LOCALE)s.%(PLATFORM)s.%(EXT)s'
|
| + template = '%(APP)s-%(VERSION)s-%(BUILD)s.%(LOCALE)s.' \
|
| + '%(PLATFORM)s%(STUB)s.%(EXT)s'
|
| return template % {'APP': self.application,
|
| 'VERSION': self.version,
|
| 'BUILD': self.builds[self.build_index],
|
| 'LOCALE': self.locale,
|
| 'PLATFORM': self.platform,
|
| + 'STUB': '-stub' if self.is_stub_installer else '',
|
| 'EXT': self.extension}
|
|
|
| -
|
| def download(self):
|
| """Download the specified file"""
|
|
|
| try:
|
| # Try to download the signed candidate build
|
| Scraper.download(self)
|
| - except NotFoundException, e:
|
| - print str(e)
|
| -
|
| - # If the signed build cannot be downloaded and unsigned builds are
|
| - # allowed, try to download the unsigned build instead
|
| - if self.no_unsigned:
|
| - raise
|
| - else:
|
| - print "Signed build has not been found. Falling back to unsigned build."
|
| - self.unsigned = True
|
| - Scraper.download(self)
|
| + except errors.NotFoundError, e:
|
| + self.logger.exception(str(e))
|
|
|
|
|
| class TinderboxScraper(Scraper):
|
| @@ -497,86 +722,91 @@ class TinderboxScraper(Scraper):
|
|
|
| def __init__(self, branch='mozilla-central', build_number=None, date=None,
|
| debug_build=False, *args, **kwargs):
|
| - Scraper.__init__(self, *args, **kwargs)
|
|
|
| self.branch = branch
|
| + self.build_number = build_number
|
| self.debug_build = debug_build
|
| - self.locale_build = self.locale != 'en-US'
|
| - self.timestamp = None
|
| + self.date = date
|
|
|
| + self.timestamp = None
|
| # Currently any time in RelEng is based on the Pacific time zone.
|
| - self.timezone = PacificTimezone();
|
| + self.timezone = PacificTimezone()
|
| +
|
| + Scraper.__init__(self, *args, **kwargs)
|
| +
|
| + def get_build_info(self):
|
| + "Defines additional build information"
|
|
|
| # Internally we access builds via index
|
| - if build_number is not None:
|
| - self.build_index = int(build_number) - 1
|
| + if self.build_number is not None:
|
| + self.build_index = int(self.build_number) - 1
|
| else:
|
| self.build_index = None
|
|
|
| - if date is not None:
|
| + if self.date is not None:
|
| try:
|
| - self.date = datetime.fromtimestamp(float(date), self.timezone)
|
| - self.timestamp = date
|
| + # date is provided in the format 2013-07-23
|
| + self.date = datetime.strptime(self.date, '%Y-%m-%d')
|
| except:
|
| - self.date = datetime.strptime(date, '%Y-%m-%d')
|
| - else:
|
| - self.date = None
|
| + try:
|
| + # date is provided as a unix timestamp
|
| + datetime.fromtimestamp(float(self.date))
|
| + self.timestamp = self.date
|
| + except:
|
| + raise ValueError('%s is not a valid date' % self.date)
|
|
|
| + self.locale_build = self.locale != 'en-US'
|
| # For localized builds we do not have to retrieve the list of builds
|
| # because only the last build is available
|
| if not self.locale_build:
|
| - self.builds, self.build_index = self.get_build_info(self.build_index)
|
| -
|
| - try:
|
| - self.timestamp = self.builds[self.build_index]
|
| - except:
|
| - raise NotFoundException("Specified sub folder cannot be found",
|
| - self.base_url + self.monthly_build_list_regex)
|
| -
|
| + self.builds, self.build_index = self.get_build_info_for_index(
|
| + self.build_index)
|
|
|
| @property
|
| def binary_regex(self):
|
| """Return the regex for the binary"""
|
|
|
| - regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.'
|
| + regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s'
|
| regex_suffix = {'linux': r'.*\.%(EXT)s$',
|
| 'linux64': r'.*\.%(EXT)s$',
|
| 'mac': r'.*\.%(EXT)s$',
|
| 'mac64': r'.*\.%(EXT)s$',
|
| - 'win32': r'.*(\.installer)\.%(EXT)s$',
|
| - 'win64': r'.*(\.installer)\.%(EXT)s$'}
|
| + 'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$',
|
| + 'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'}
|
|
|
| regex = regex_base_name + regex_suffix[self.platform]
|
|
|
| return regex % {'APP': self.application,
|
| 'LOCALE': self.locale,
|
| + 'PLATFORM': PLATFORM_FRAGMENTS[self.platform],
|
| + 'STUB': '-stub' if self.is_stub_installer else '',
|
| 'EXT': self.extension}
|
|
|
| -
|
| def build_filename(self, binary):
|
| """Return the proposed filename with extension for the binary"""
|
|
|
| return '%(TIMESTAMP)s%(BRANCH)s%(DEBUG)s-%(NAME)s' % {
|
| - 'TIMESTAMP': self.timestamp + '-' if self.timestamp else '',
|
| - 'BRANCH': self.branch,
|
| - 'DEBUG': '-debug' if self.debug_build else '',
|
| - 'NAME': binary}
|
| -
|
| + 'TIMESTAMP': self.timestamp + '-' if self.timestamp else '',
|
| + 'BRANCH': self.branch,
|
| + 'DEBUG': '-debug' if self.debug_build else '',
|
| + 'NAME': binary}
|
|
|
| @property
|
| def build_list_regex(self):
|
| """Return the regex for the folder which contains the list of builds"""
|
|
|
| - regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s'
|
| -
|
| - return regex % {'BRANCH': self.branch,
|
| - 'PLATFORM': '' if self.locale_build else self.platform_regex,
|
| - 'L10N': 'l10n' if self.locale_build else '',
|
| - 'DEBUG': '-debug' if self.debug_build else ''}
|
| + regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s/'
|
|
|
| + return regex % {
|
| + 'BRANCH': self.branch,
|
| + 'PLATFORM': '' if self.locale_build else self.platform_regex,
|
| + 'L10N': 'l10n' if self.locale_build else '',
|
| + 'DEBUG': '-debug' if self.debug_build else ''}
|
|
|
| def date_matches(self, timestamp):
|
| - """Determines whether the timestamp date is equal to the argument date"""
|
| + """
|
| + Determines whether the timestamp date is equal to the argument date
|
| + """
|
|
|
| if self.date is None:
|
| return False
|
| @@ -584,65 +814,89 @@ class TinderboxScraper(Scraper):
|
| timestamp = datetime.fromtimestamp(float(timestamp), self.timezone)
|
| if self.date.date() == timestamp.date():
|
| return True
|
| -
|
| - return False
|
| -
|
| -
|
| - @property
|
| - def date_validation_regex(self):
|
| - """Return the regex for a valid date argument value"""
|
| -
|
| - return r'^\d{4}-\d{1,2}-\d{1,2}$|^\d+$'
|
|
|
| + return False
|
|
|
| def detect_platform(self):
|
| """Detect the current platform"""
|
|
|
| platform = Scraper.detect_platform(self)
|
|
|
| - # On OS X we have to special case the platform detection code and fallback
|
| - # to 64 bit builds for the en-US locale
|
| - if mozinfo.os == 'mac' and self.locale == 'en-US' and mozinfo.bits == 64:
|
| + # On OS X we have to special case the platform detection code and
|
| + # fallback to 64 bit builds for the en-US locale
|
| + if mozinfo.os == 'mac' and self.locale == 'en-US' and \
|
| + mozinfo.bits == 64:
|
| platform = "%s%d" % (mozinfo.os, mozinfo.bits)
|
|
|
| return platform
|
|
|
| + def is_build_dir(self, folder_name):
|
| + """Return whether or not the given dir contains a build."""
|
|
|
| - def get_build_info(self, build_index=None):
|
| - url = '/'.join([self.base_url, self.build_list_regex])
|
| + # Cannot move up to base scraper due to parser.entries call in
|
| + # get_build_info_for_index (see below)
|
| + url = '%s/' % urljoin(self.base_url, self.build_list_regex, folder_name)
|
|
|
| - print 'Retrieving list of builds from %s' % url
|
| + if self.application in APPLICATIONS_MULTI_LOCALE \
|
| + and self.locale != 'multi':
|
| + url = '%s/' % urljoin(url, self.locale)
|
|
|
| - # If a timestamp is given, retrieve just that build
|
| - regex = '^' + self.timestamp + '$' if self.timestamp else r'^\d+$'
|
| + parser = self._create_directory_parser(url)
|
|
|
| - parser = DirectoryParser(url)
|
| - parser.entries = parser.filter(regex)
|
| + pattern = re.compile(self.binary_regex, re.IGNORECASE)
|
| + for entry in parser.entries:
|
| + try:
|
| + pattern.match(entry).group()
|
| + return True
|
| + except:
|
| + # No match, continue with next entry
|
| + continue
|
| + return False
|
|
|
| - # If date is given, retrieve the subset of builds on that date
|
| - if self.date is not None:
|
| + def get_build_info_for_index(self, build_index=None):
|
| + url = urljoin(self.base_url, self.build_list_regex)
|
| +
|
| + self.logger.info('Retrieving list of builds from %s' % url)
|
| + parser = self._create_directory_parser(url)
|
| + parser.entries = parser.filter(r'^\d+$')
|
| +
|
| + if self.timestamp:
|
| + # If a timestamp is given, retrieve the folder with the timestamp
|
| + # as name
|
| + parser.entries = self.timestamp in parser.entries and \
|
| + [self.timestamp]
|
| +
|
| + elif self.date:
|
| + # If date is given, retrieve the subset of builds on that date
|
| parser.entries = filter(self.date_matches, parser.entries)
|
|
|
| if not parser.entries:
|
| message = 'No builds have been found'
|
| - raise NotFoundException(message, url)
|
| + raise errors.NotFoundError(message, url)
|
| +
|
| + self.show_matching_builds(parser.entries)
|
|
|
| # If no index has been given, set it to the last build of the day.
|
| if build_index is None:
|
| - build_index = len(parser.entries) - 1
|
| + # Find the most recent non-empty entry.
|
| + build_index = len(parser.entries)
|
| + for build in reversed(parser.entries):
|
| + build_index -= 1
|
| + if not build_index or self.is_build_dir(build):
|
| + break
|
|
|
| - return (parser.entries, build_index)
|
| + self.logger.info('Selected build: %s' % parser.entries[build_index])
|
|
|
| + return (parser.entries, build_index)
|
|
|
| @property
|
| def path_regex(self):
|
| - """Return the regex for the path"""
|
| + """Return the regex for the path to the build folder"""
|
|
|
| if self.locale_build:
|
| return self.build_list_regex
|
|
|
| - return '/'.join([self.build_list_regex, self.builds[self.build_index]])
|
| -
|
| + return '%s/' % urljoin(self.build_list_regex, self.builds[self.build_index])
|
|
|
| @property
|
| def platform_regex(self):
|
| @@ -650,7 +904,7 @@ class TinderboxScraper(Scraper):
|
|
|
| PLATFORM_FRAGMENTS = {'linux': 'linux',
|
| 'linux64': 'linux64',
|
| - 'mac': 'macosx',
|
| + 'mac': 'macosx64',
|
| 'mac64': 'macosx64',
|
| 'win32': 'win32',
|
| 'win64': 'win64'}
|
| @@ -658,178 +912,104 @@ class TinderboxScraper(Scraper):
|
| return PLATFORM_FRAGMENTS[self.platform]
|
|
|
|
|
| -def cli():
|
| - """Main function for the downloader"""
|
| -
|
| - BUILD_TYPES = {'release': ReleaseScraper,
|
| - 'candidate': ReleaseCandidateScraper,
|
| - 'daily': DailyScraper,
|
| - 'tinderbox': TinderboxScraper }
|
| -
|
| - usage = 'usage: %prog [options]'
|
| - parser = OptionParser(usage=usage, description=__doc__)
|
| - parser.add_option('--application', '-a',
|
| - dest='application',
|
| - choices=APPLICATIONS,
|
| - default='firefox',
|
| - metavar='APPLICATION',
|
| - help='The name of the application to download, '
|
| - 'default: "%default"')
|
| - parser.add_option('--directory', '-d',
|
| - dest='directory',
|
| - default=os.getcwd(),
|
| - metavar='DIRECTORY',
|
| - help='Target directory for the download, default: '
|
| - 'current working directory')
|
| - parser.add_option('--build-number',
|
| - dest='build_number',
|
| - default=None,
|
| - type="int",
|
| - metavar='BUILD_NUMBER',
|
| - help='Number of the build (for candidate, daily, '
|
| - 'and tinderbox builds)')
|
| - parser.add_option('--locale', '-l',
|
| - dest='locale',
|
| - default='en-US',
|
| - metavar='LOCALE',
|
| - help='Locale of the application, default: "%default"')
|
| - parser.add_option('--platform', '-p',
|
| - dest='platform',
|
| - choices=PLATFORM_FRAGMENTS.keys(),
|
| - metavar='PLATFORM',
|
| - help='Platform of the application')
|
| - parser.add_option('--type', '-t',
|
| - dest='type',
|
| - choices=BUILD_TYPES.keys(),
|
| - default='release',
|
| - metavar='BUILD_TYPE',
|
| - help='Type of build to download, default: "%default"')
|
| - parser.add_option('--url',
|
| - dest='url',
|
| - default=None,
|
| - metavar='URL',
|
| - help='URL to download.')
|
| - parser.add_option('--version', '-v',
|
| - dest='version',
|
| - metavar='VERSION',
|
| - help='Version of the application to be used by release and\
|
| - candidate builds, i.e. "3.6"')
|
| - parser.add_option('--extension',
|
| - dest='extension',
|
| - default=None,
|
| - metavar='EXTENSION',
|
| - help='File extension of the build (e.g. "zip"), default:\
|
| - the standard build extension on the platform.')
|
| - parser.add_option('--username',
|
| - dest='username',
|
| - default=None,
|
| - metavar='USERNAME',
|
| - help='Username for basic HTTP authentication.')
|
| - parser.add_option('--password',
|
| - dest='password',
|
| - default=None,
|
| - metavar='PASSWORD',
|
| - help='Password for basic HTTP authentication.')
|
| - parser.add_option('--retry-attempts',
|
| - dest='retry_attempts',
|
| - default=3,
|
| - type=int,
|
| - metavar='RETRY_ATTEMPTS',
|
| - help='Number of times the download will be attempted in '
|
| - 'the event of a failure, default: %default')
|
| - parser.add_option('--retry-delay',
|
| - dest='retry_delay',
|
| - default=10,
|
| - type=int,
|
| - metavar='RETRY_DELAY',
|
| - help='Amount of time (in seconds) to wait between retry '
|
| - 'attempts, default: %default')
|
| -
|
| - # Option group for candidate builds
|
| - group = OptionGroup(parser, "Candidate builds",
|
| - "Extra options for candidate builds.")
|
| - group.add_option('--no-unsigned',
|
| - dest='no_unsigned',
|
| - action="store_true",
|
| - help="Don't allow to download unsigned builds if signed\
|
| - builds are not available")
|
| - parser.add_option_group(group)
|
| -
|
| - # Option group for daily builds
|
| - group = OptionGroup(parser, "Daily builds",
|
| - "Extra options for daily builds.")
|
| - group.add_option('--branch',
|
| - dest='branch',
|
| - default='mozilla-central',
|
| - metavar='BRANCH',
|
| - help='Name of the branch, default: "%default"')
|
| - group.add_option('--build-id',
|
| - dest='build_id',
|
| - default=None,
|
| - metavar='BUILD_ID',
|
| - help='ID of the build to download')
|
| - group.add_option('--date',
|
| - dest='date',
|
| - default=None,
|
| - metavar='DATE',
|
| - help='Date of the build, default: latest build')
|
| - parser.add_option_group(group)
|
| -
|
| - # Option group for tinderbox builds
|
| - group = OptionGroup(parser, "Tinderbox builds",
|
| - "Extra options for tinderbox builds.")
|
| - group.add_option('--debug-build',
|
| - dest='debug_build',
|
| - action="store_true",
|
| - help="Download a debug build")
|
| - parser.add_option_group(group)
|
| -
|
| - # TODO: option group for nightly builds
|
| - (options, args) = parser.parse_args()
|
| -
|
| - # Check for required options and arguments
|
| - # Note: Will be optional when ini file support has been landed
|
| - if not options.url \
|
| - and not options.type in ['daily', 'tinderbox'] \
|
| - and not options.version:
|
| - parser.error('The version of the application to download has not been specified.')
|
| -
|
| - # Instantiate scraper and download the build
|
| - scraper_keywords = {'application': options.application,
|
| - 'locale': options.locale,
|
| - 'platform': options.platform,
|
| - 'version': options.version,
|
| - 'directory': options.directory,
|
| - 'extension': options.extension,
|
| - 'authentication': {
|
| - 'username': options.username,
|
| - 'password': options.password},
|
| - 'retry_attempts': options.retry_attempts,
|
| - 'retry_delay': options.retry_delay}
|
| - scraper_options = {'candidate': {
|
| - 'build_number': options.build_number,
|
| - 'no_unsigned': options.no_unsigned},
|
| - 'daily': {
|
| - 'branch': options.branch,
|
| - 'build_number': options.build_number,
|
| - 'build_id': options.build_id,
|
| - 'date': options.date},
|
| - 'tinderbox': {
|
| - 'branch': options.branch,
|
| - 'build_number': options.build_number,
|
| - 'date': options.date,
|
| - 'debug_build': options.debug_build}
|
| - }
|
| -
|
| - kwargs = scraper_keywords.copy()
|
| - kwargs.update(scraper_options.get(options.type, {}))
|
| -
|
| - if options.url:
|
| - build = DirectScraper(options.url, **kwargs)
|
| - else:
|
| - build = BUILD_TYPES[options.type](**kwargs)
|
| -
|
| - build.download()
|
| -
|
| -if __name__ == "__main__":
|
| - cli()
|
| +class TryScraper(Scraper):
|
| + "Class to download a try build from the Mozilla server."
|
| +
|
| + def __init__(self, changeset=None, debug_build=False, *args, **kwargs):
|
| +
|
| + self.debug_build = debug_build
|
| + self.changeset = changeset
|
| +
|
| + Scraper.__init__(self, *args, **kwargs)
|
| +
|
| + def get_build_info(self):
|
| + "Defines additional build information"
|
| +
|
| + self.builds, self.build_index = self.get_build_info_for_index()
|
| +
|
| + @property
|
| + def binary_regex(self):
|
| + """Return the regex for the binary"""
|
| +
|
| + regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s'
|
| + regex_suffix = {'linux': r'.*\.%(EXT)s$',
|
| + 'linux64': r'.*\.%(EXT)s$',
|
| + 'mac': r'.*\.%(EXT)s$',
|
| + 'mac64': r'.*\.%(EXT)s$',
|
| + 'win32': r'.*(\.installer%(STUB)s)\.%(EXT)s$',
|
| + 'win64': r'.*(\.installer%(STUB)s)\.%(EXT)s$'}
|
| +
|
| + regex = regex_base_name + regex_suffix[self.platform]
|
| +
|
| + return regex % {'APP': self.application,
|
| + 'LOCALE': self.locale,
|
| + 'PLATFORM': PLATFORM_FRAGMENTS[self.platform],
|
| + 'STUB': '-stub' if self.is_stub_installer else '',
|
| + 'EXT': self.extension}
|
| +
|
| + def build_filename(self, binary):
|
| + """Return the proposed filename with extension for the binary"""
|
| +
|
| + return '%(CHANGESET)s%(DEBUG)s-%(NAME)s' % {
|
| + 'CHANGESET': self.changeset,
|
| + 'DEBUG': '-debug' if self.debug_build else '',
|
| + 'NAME': binary}
|
| +
|
| + @property
|
| + def build_list_regex(self):
|
| + """Return the regex for the folder which contains the list of builds"""
|
| +
|
| + return 'try-builds/'
|
| +
|
| + def detect_platform(self):
|
| + """Detect the current platform"""
|
| +
|
| + platform = Scraper.detect_platform(self)
|
| +
|
| + # On OS X we have to special case the platform detection code and
|
| + # fallback to 64 bit builds for the en-US locale
|
| + if mozinfo.os == 'mac' and self.locale == 'en-US' and \
|
| + mozinfo.bits == 64:
|
| + platform = "%s%d" % (mozinfo.os, mozinfo.bits)
|
| +
|
| + return platform
|
| +
|
| + def get_build_info_for_index(self, build_index=None):
|
| + url = urljoin(self.base_url, self.build_list_regex)
|
| +
|
| + self.logger.info('Retrieving list of builds from %s' % url)
|
| + parser = self._create_directory_parser(url)
|
| + parser.entries = parser.filter('.*-%s$' % self.changeset)
|
| +
|
| + if not parser.entries:
|
| + raise errors.NotFoundError('No builds have been found', url)
|
| +
|
| + self.show_matching_builds(parser.entries)
|
| +
|
| + self.logger.info('Selected build: %s' % parser.entries[0])
|
| +
|
| + return (parser.entries, 0)
|
| +
|
| + @property
|
| + def path_regex(self):
|
| + """Return the regex for the path to the build folder"""
|
| +
|
| + build_dir = 'try-%(PLATFORM)s%(DEBUG)s/' % {
|
| + 'PLATFORM': self.platform_regex,
|
| + 'DEBUG': '-debug' if self.debug_build else ''}
|
| + return urljoin(self.build_list_regex,
|
| + self.builds[self.build_index],
|
| + build_dir)
|
| +
|
| + @property
|
| + def platform_regex(self):
|
| + """Return the platform fragment of the URL"""
|
| +
|
| + PLATFORM_FRAGMENTS = {'linux': 'linux',
|
| + 'linux64': 'linux64',
|
| + 'mac': 'macosx64',
|
| + 'mac64': 'macosx64',
|
| + 'win32': 'win32',
|
| + 'win64': 'win64'}
|
| +
|
| + return PLATFORM_FRAGMENTS[self.platform]
|
|
|