Chromium Code Reviews| Index: mozdownload/scraper.py |
| diff --git a/mozdownload/scraper.py b/mozdownload/scraper.py |
| index 9011cab26c1801ea9804caff62fc60a8b812eee3..6122f8c02379d154f95ae5fee3fda3920a004482 100755 |
| --- a/mozdownload/scraper.py |
| +++ b/mozdownload/scraper.py |
| @@ -1,211 +1,358 @@ |
| -#!/usr/bin/env python |
| - |
| # This Source Code Form is subject to the terms of the Mozilla Public |
| # License, v. 2.0. If a copy of the MPL was not distributed with this |
| # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| -"""Module to handle downloads for different types of Firefox and Thunderbird builds.""" |
| - |
| - |
| from datetime import datetime |
| -from optparse import OptionParser, OptionGroup |
| +import logging |
| import os |
| import re |
| +import requests |
| import sys |
| import time |
| import urllib |
| -import urllib2 |
| +from urlparse import urlparse |
| import mozinfo |
| +import progressbar as pb |
|
kjellander_chromium
2015/11/17 10:19:29
Any idea how the script can work without the progr
phoglund_chromium
2015/11/17 11:13:07
Crap, you're right. I have it installed on my mach
kjellander_chromium
2015/11/17 12:57:17
Not necessarily screwed since we can bundle those
phoglund_chromium
2015/11/17 13:47:03
I managed to pull requests from the mirror you ind
|
| + |
| +import errors |
| from parser import DirectoryParser |
| from timezones import PacificTimezone |
| +from utils import urljoin |
| -APPLICATIONS = ['b2g', 'firefox', 'thunderbird'] |
| +APPLICATIONS = ('b2g', 'firefox', 'fennec', 'thunderbird') |
| + |
| +# Some applications contain all locales in a single build |
| +APPLICATIONS_MULTI_LOCALE = ('b2g', 'fennec') |
| + |
| +# Used if the application is named differently than the subfolder on the server |
| +APPLICATIONS_TO_FTP_DIRECTORY = {'fennec': 'mobile'} |
| # Base URL for the path to all builds |
| -BASE_URL = 'https://ftp.mozilla.org/pub/mozilla.org' |
| +BASE_URL = 'https://archive.mozilla.org/pub/' |
| -PLATFORM_FRAGMENTS = {'linux': 'linux-i686', |
| - 'linux64': 'linux-x86_64', |
| - 'mac': 'mac', |
| - 'mac64': 'mac64', |
| - 'win32': 'win32', |
| - 'win64': 'win64-x86_64'} |
| +# Chunk size when downloading a file |
| +CHUNK_SIZE = 16 * 1024 |
| -DEFAULT_FILE_EXTENSIONS = {'linux': 'tar.bz2', |
| +DEFAULT_FILE_EXTENSIONS = {'android-api-9': 'apk', |
| + 'android-api-11': 'apk', |
| + 'android-x86': 'apk', |
| + 'linux': 'tar.bz2', |
| 'linux64': 'tar.bz2', |
| 'mac': 'dmg', |
| 'mac64': 'dmg', |
| 'win32': 'exe', |
| 'win64': 'exe'} |
| -class NotFoundException(Exception): |
| - """Exception for a resource not being found (e.g. no logs)""" |
| - def __init__(self, message, location): |
| - self.location = location |
| - Exception.__init__(self, ': '.join([message, location])) |
| +PLATFORM_FRAGMENTS = {'android-api-9': r'android-arm', |
| + 'android-api-11': r'android-arm', |
| + 'android-x86': r'android-i386', |
| + 'linux': r'linux-i686', |
| + 'linux64': r'linux-x86_64', |
| + 'mac': r'mac', |
| + 'mac64': r'mac(64)?', |
| + 'win32': r'win32', |
| + 'win64': r'win64(-x86_64)?'} |
| class Scraper(object): |
| """Generic class to download an application from the Mozilla server""" |
| - def __init__(self, directory, version, platform=None, |
| - application='firefox', locale='en-US', extension=None, |
| - authentication=None, retry_attempts=3, retry_delay=10): |
| + def __init__(self, destination=None, platform=None, |
| + application='firefox', locale=None, extension=None, |
| + username=None, password=None, |
| + retry_attempts=0, retry_delay=10., |
| + is_stub_installer=False, timeout=None, |
| + log_level='INFO', |
| + base_url=BASE_URL): |
| # Private properties for caching |
| - self._target = None |
| + self._filename = None |
| self._binary = None |
| - self.directory = directory |
| - self.locale = locale |
| + self.destination = destination or os.getcwd() |
| + |
| + if not locale: |
| + if application in APPLICATIONS_MULTI_LOCALE: |
| + self.locale = 'multi' |
| + else: |
| + self.locale = 'en-US' |
| + else: |
| + self.locale = locale |
| + |
| self.platform = platform or self.detect_platform() |
| - self.version = version |
| - self.extension = extension or DEFAULT_FILE_EXTENSIONS[self.platform] |
| - self.authentication = authentication |
| + |
| + self.session = requests.Session() |
| + if (username, password) != (None, None): |
| + self.session.auth = (username, password) |
| + |
| self.retry_attempts = retry_attempts |
| self.retry_delay = retry_delay |
| + self.is_stub_installer = is_stub_installer |
| + self.timeout_download = timeout |
| + # this is the timeout used in requests.get. Unlike "auth", |
| + # it does not work if we attach it on the session, so we handle |
| + # it independently. |
| + self.timeout_network = 60. |
| + |
| + logging.basicConfig(format=' %(levelname)s | %(message)s') |
| + self.logger = logging.getLogger(self.__module__) |
| + self.logger.setLevel(log_level) |
| # build the base URL |
| self.application = application |
| - self.base_url = '/'.join([BASE_URL, self.application]) |
| + self.base_url = '%s/' % urljoin( |
| + base_url, |
| + APPLICATIONS_TO_FTP_DIRECTORY.get(self.application, self.application) |
| + ) |
| + if extension: |
| + self.extension = extension |
| + else: |
| + if self.application in APPLICATIONS_MULTI_LOCALE and \ |
| + self.platform in ('win32', 'win64'): |
| + # builds for APPLICATIONS_MULTI_LOCALE only exist in zip |
| + self.extension = 'zip' |
| + else: |
| + self.extension = DEFAULT_FILE_EXTENSIONS[self.platform] |
| + |
| + attempt = 0 |
| + while True: |
| + attempt += 1 |
| + try: |
| + self.get_build_info() |
| + break |
| + except (errors.NotFoundError, requests.exceptions.RequestException), e: |
| + if self.retry_attempts > 0: |
| + # Log only if multiple attempts are requested |
| + self.logger.warning("Build not found: '%s'" % e.message) |
| + self.logger.info('Will retry in %s seconds...' % |
| + (self.retry_delay)) |
| + time.sleep(self.retry_delay) |
| + self.logger.info("Retrying... (attempt %s)" % attempt) |
| + |
| + if attempt >= self.retry_attempts: |
| + if hasattr(e, 'response') and \ |
| + e.response.status_code == 404: |
| + message = "Specified build has not been found" |
| + raise errors.NotFoundError(message, e.response.url) |
| + else: |
| + raise |
| + |
| + def _create_directory_parser(self, url): |
| + return DirectoryParser(url, |
| + session=self.session, |
| + timeout=self.timeout_network) |
| @property |
| def binary(self): |
| """Return the name of the build""" |
| - if self._binary is None: |
| - # Retrieve all entries from the remote virtual folder |
| - parser = DirectoryParser(self.path) |
| - if not parser.entries: |
| - raise NotFoundException('No entries found', self.path) |
| - |
| - # Download the first matched directory entry |
| - pattern = re.compile(self.binary_regex, re.IGNORECASE) |
| - for entry in parser.entries: |
| - try: |
| - self._binary = pattern.match(entry).group() |
| - break |
| - except: |
| - # No match, continue with next entry |
| - continue |
| - |
| - if self._binary is None: |
| - raise NotFoundException("Binary not found in folder", self.path) |
| - else: |
| - return self._binary |
| + attempt = 0 |
| + while self._binary is None: |
| + attempt += 1 |
| + try: |
| + # Retrieve all entries from the remote virtual folder |
| + parser = self._create_directory_parser(self.path) |
| + if not parser.entries: |
| + raise errors.NotFoundError('No entries found', self.path) |
| + |
| + # Download the first matched directory entry |
| + pattern = re.compile(self.binary_regex, re.IGNORECASE) |
| + for entry in parser.entries: |
| + try: |
| + self._binary = pattern.match(entry).group() |
| + break |
| + except: |
| + # No match, continue with next entry |
| + continue |
| + else: |
| + raise errors.NotFoundError("Binary not found in folder", |
| + self.path) |
| + except (errors.NotFoundError, requests.exceptions.RequestException), e: |
| + if self.retry_attempts > 0: |
| + # Log only if multiple attempts are requested |
| + self.logger.warning("Build not found: '%s'" % e.message) |
| + self.logger.info('Will retry in %s seconds...' % |
| + (self.retry_delay)) |
| + time.sleep(self.retry_delay) |
| + self.logger.info("Retrying... (attempt %s)" % attempt) |
| + |
| + if attempt >= self.retry_attempts: |
| + if hasattr(e, 'response') and \ |
| + e.response.status_code == 404: |
| + message = "Specified build has not been found" |
| + raise errors.NotFoundError(message, self.path) |
| + else: |
| + raise |
| + |
| + return self._binary |
| @property |
| def binary_regex(self): |
| """Return the regex for the binary filename""" |
| - raise NotImplementedError(sys._getframe(0).f_code.co_name) |
| - |
| + raise errors.NotImplementedError(sys._getframe(0).f_code.co_name) |
| @property |
| - def final_url(self): |
| - """Return the final URL of the build""" |
| - |
| - return '/'.join([self.path, self.binary]) |
| + def url(self): |
| + """Return the URL of the build""" |
| + return urljoin(self.path, self.binary) |
| @property |
| def path(self): |
| - """Return the path to the build""" |
| - |
| - return '/'.join([self.base_url, self.path_regex]) |
| + """Return the path to the build folder""" |
| + return urljoin(self.base_url, self.path_regex) |
| @property |
| def path_regex(self): |
| - """Return the regex for the path to the build""" |
| - |
| - raise NotImplementedError(sys._getframe(0).f_code.co_name) |
| + """Return the regex for the path to the build folder""" |
| + raise errors.NotImplementedError(sys._getframe(0).f_code.co_name) |
| @property |
| def platform_regex(self): |
| """Return the platform fragment of the URL""" |
| - return PLATFORM_FRAGMENTS[self.platform]; |
| - |
| + return PLATFORM_FRAGMENTS[self.platform] |
| @property |
| - def target(self): |
| - """Return the target file name of the build""" |
| + def filename(self): |
| + """Return the local filename of the build""" |
| + |
| + if self._filename is None: |
| + if os.path.splitext(self.destination)[1]: |
| + # If the filename has been given make use of it |
| + target_file = self.destination |
| + else: |
| + # Otherwise create it from the build details |
| + target_file = os.path.join(self.destination, |
| + self.build_filename(self.binary)) |
| + |
| + self._filename = os.path.abspath(target_file) |
| - if self._target is None: |
| - self._target = os.path.join(self.directory, |
| - self.build_filename(self.binary)) |
| - return self._target |
| + return self._filename |
| + def get_build_info(self): |
| + """Returns additional build information in subclasses if necessary""" |
| + pass |
| def build_filename(self, binary): |
| """Return the proposed filename with extension for the binary""" |
| - raise NotImplementedError(sys._getframe(0).f_code.co_name) |
| - |
| + raise errors.NotImplementedError(sys._getframe(0).f_code.co_name) |
| def detect_platform(self): |
| """Detect the current platform""" |
| # For Mac and Linux 32bit we do not need the bits appended |
| - if mozinfo.os == 'mac' or (mozinfo.os == 'linux' and mozinfo.bits == 32): |
| + if mozinfo.os == 'mac' or \ |
| + (mozinfo.os == 'linux' and mozinfo.bits == 32): |
| return mozinfo.os |
| else: |
| return "%s%d" % (mozinfo.os, mozinfo.bits) |
| - |
| def download(self): |
| """Download the specified file""" |
| - attempts = 0 |
| + def total_seconds(td): |
| + # Keep backward compatibility with Python 2.6 which doesn't have |
| + # this method |
| + if hasattr(td, 'total_seconds'): |
| + return td.total_seconds() |
| + else: |
| + return (td.microseconds + |
| + (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 10 ** 6 |
| - if not os.path.isdir(self.directory): |
| - os.makedirs(self.directory) |
| + attempt = 0 |
| # Don't re-download the file |
| - if os.path.isfile(os.path.abspath(self.target)): |
| - print "File has already been downloaded: %s" % (self.target) |
| - return |
| - |
| - print 'Downloading from: %s' % (urllib.unquote(self.final_url)) |
| - tmp_file = self.target + ".part" |
| - |
| - if self.authentication \ |
| - and self.authentication['username'] \ |
| - and self.authentication['password']: |
| - password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() |
| - password_mgr.add_password(None, |
| - self.final_url, |
| - self.authentication['username'], |
| - self.authentication['password']) |
| - handler = urllib2.HTTPBasicAuthHandler(password_mgr) |
| - opener = urllib2.build_opener(urllib2.HTTPHandler, handler) |
| - urllib2.install_opener(opener) |
| + if os.path.isfile(os.path.abspath(self.filename)): |
| + self.logger.info("File has already been downloaded: %s" % |
| + (self.filename)) |
| + return self.filename |
| + |
| + directory = os.path.dirname(self.filename) |
| + if not os.path.isdir(directory): |
| + os.makedirs(directory) |
| + |
| + self.logger.info('Downloading from: %s' % |
| + (urllib.unquote(self.url))) |
| + self.logger.info('Saving as: %s' % self.filename) |
| + |
| + tmp_file = self.filename + ".part" |
| while True: |
| - attempts += 1 |
| + attempt += 1 |
| try: |
| - r = urllib2.urlopen(self.final_url) |
| - CHUNK = 16 * 1024 |
| + start_time = datetime.now() |
| + |
| + # Enable streaming mode so we can download content in chunks |
| + r = self.session.get(self.url, stream=True) |
| + r.raise_for_status() |
| + |
| + content_length = r.headers.get('Content-length') |
| + # ValueError: Value out of range if only total_size given |
| + if content_length: |
| + total_size = int(content_length.strip()) |
| + max_value = ((total_size / CHUNK_SIZE) + 1) * CHUNK_SIZE |
| + |
| + bytes_downloaded = 0 |
| + |
| + log_level = self.logger.getEffectiveLevel() |
| + if log_level <= logging.INFO and content_length: |
| + widgets = [pb.Percentage(), ' ', pb.Bar(), ' ', pb.ETA(), |
| + ' ', pb.FileTransferSpeed()] |
| + pbar = pb.ProgressBar(widgets=widgets, |
| + maxval=max_value).start() |
| + |
| with open(tmp_file, 'wb') as f: |
| - for chunk in iter(lambda: r.read(CHUNK), ''): |
| + for chunk in iter(lambda: r.raw.read(CHUNK_SIZE), ''): |
| f.write(chunk) |
| + bytes_downloaded += CHUNK_SIZE |
| + |
| + if log_level <= logging.INFO and content_length: |
| + pbar.update(bytes_downloaded) |
| + |
| + t1 = total_seconds(datetime.now() - start_time) |
| + if self.timeout_download and \ |
| + t1 >= self.timeout_download: |
| + raise errors.TimeoutError |
| + |
| + if log_level <= logging.INFO and content_length: |
| + pbar.finish() |
| break |
| - except (urllib2.HTTPError, urllib2.URLError): |
| + except (requests.exceptions.RequestException, errors.TimeoutError), e: |
| if tmp_file and os.path.isfile(tmp_file): |
| os.remove(tmp_file) |
| - print 'Download failed! Retrying... (attempt %s)' % attempts |
| - if attempts >= self.retry_attempts: |
| + if self.retry_attempts > 0: |
| + # Log only if multiple attempts are requested |
| + self.logger.warning('Download failed: "%s"' % str(e)) |
| + self.logger.info('Will retry in %s seconds...' % |
| + (self.retry_delay)) |
| + time.sleep(self.retry_delay) |
| + self.logger.info("Retrying... (attempt %s)" % attempt) |
| + if attempt >= self.retry_attempts: |
| raise |
| time.sleep(self.retry_delay) |
| - os.rename(tmp_file, self.target) |
| + os.rename(tmp_file, self.filename) |
| + |
| + return self.filename |
| + |
| + def show_matching_builds(self, builds): |
| + """Output the matching builds""" |
| + self.logger.info('Found %s build%s: %s' % ( |
| + len(builds), |
| + len(builds) > 1 and 's' or '', |
| + len(builds) > 10 and |
| + ' ... '.join([', '.join(builds[:5]), ', '.join(builds[-5:])]) or |
| + ', '.join(builds))) |
| class DailyScraper(Scraper): |
| @@ -214,94 +361,160 @@ class DailyScraper(Scraper): |
| def __init__(self, branch='mozilla-central', build_id=None, date=None, |
| build_number=None, *args, **kwargs): |
| - Scraper.__init__(self, *args, **kwargs) |
| self.branch = branch |
| + self.build_id = build_id |
| + self.date = date |
| + self.build_number = build_number |
| + |
| + Scraper.__init__(self, *args, **kwargs) |
| + |
| + def get_build_info(self): |
| + """Defines additional build information""" |
| # Internally we access builds via index |
| - if build_number is not None: |
| - self.build_index = int(build_number) - 1 |
| + if self.build_number is not None: |
| + self.build_index = int(self.build_number) - 1 |
| else: |
| self.build_index = None |
| - if build_id: |
| - # A build id has been specified. Split up its components so the date |
| - # and time can be extracted: '20111212042025' -> '2011-12-12 04:20:25' |
| - self.date = datetime.strptime(build_id, '%Y%m%d%H%M%S') |
| - self.builds, self.build_index = self.get_build_info_for_date(self.date, |
| - has_time=True) |
| + if self.build_id: |
| + # A build id has been specified. Split up its components so the |
| + # date and time can be extracted: |
| + # '20111212042025' -> '2011-12-12 04:20:25' |
| + self.date = datetime.strptime(self.build_id, '%Y%m%d%H%M%S') |
| - elif date: |
| + elif self.date: |
| # A date (without time) has been specified. Use its value and the |
| # build index to find the requested build for that day. |
| - self.date = datetime.strptime(date, '%Y-%m-%d') |
| - self.builds, self.build_index = self.get_build_info_for_date(self.date, |
| - build_index=self.build_index) |
| - |
| + try: |
| + self.date = datetime.strptime(self.date, '%Y-%m-%d') |
| + except: |
| + raise ValueError('%s is not a valid date' % self.date) |
| else: |
| - # If no build id nor date have been specified the lastest available |
| + # If no build id nor date have been specified the latest available |
| # build of the given branch has to be identified. We also have to |
| # retrieve the date of the build via its build id. |
| - url = '%s/nightly/latest-%s/' % (self.base_url, self.branch) |
| - |
| - print 'Retrieving the build status file from %s' % url |
| - parser = DirectoryParser(url) |
| - parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex) |
| - if not parser.entries: |
| - message = 'Status file for %s build cannot be found' % self.platform_regex |
| - raise NotFoundException(message, url) |
| - |
| - # Read status file for the platform, retrieve build id, and convert to a date |
| - status_file = url + parser.entries[-1] |
| - f = urllib.urlopen(status_file) |
| - self.date = datetime.strptime(f.readline().strip(), '%Y%m%d%H%M%S') |
| - self.builds, self.build_index = self.get_build_info_for_date(self.date, |
| - has_time=True) |
| - |
| - |
| - def get_build_info_for_date(self, date, has_time=False, build_index=None): |
| - url = '/'.join([self.base_url, self.monthly_build_list_regex]) |
| - |
| - print 'Retrieving list of builds from %s' % url |
| - parser = DirectoryParser(url) |
| - regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s$' % { |
| - 'DATE': date.strftime('%Y-%m-%d'), |
| - 'BRANCH': self.branch, |
| - 'L10N': '' if self.locale == 'en-US' else '-l10n'} |
| - parser.entries = parser.filter(regex) |
| + self.date = self.get_latest_build_date() |
| + |
| + self.builds, self.build_index = self.get_build_info_for_date( |
| + self.date, self.build_index) |
| + |
| + def get_latest_build_date(self): |
| + """ Returns date of latest available nightly build.""" |
| + if self.application not in ('fennec'): |
| + url = urljoin(self.base_url, 'nightly', 'latest-%s/' % self.branch) |
| + else: |
| + url = urljoin(self.base_url, 'nightly', 'latest-%s-%s/' % |
| + (self.branch, self.platform)) |
| + |
| + self.logger.info('Retrieving the build status file from %s' % url) |
| + parser = self._create_directory_parser(url) |
| + parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex) |
| if not parser.entries: |
| - message = 'Folder for builds on %s has not been found' % self.date.strftime('%Y-%m-%d') |
| - raise NotFoundException(message, url) |
| + message = 'Status file for %s build cannot be found' % \ |
| + self.platform_regex |
| + raise errors.NotFoundError(message, url) |
| + |
| + # Read status file for the platform, retrieve build id, |
| + # and convert to a date |
| + headers = {'Cache-Control': 'max-age=0'} |
| + |
| + r = self.session.get(url + parser.entries[-1], headers=headers) |
| + try: |
| + r.raise_for_status() |
| + |
| + return datetime.strptime(r.text.split('\n')[0], '%Y%m%d%H%M%S') |
| + finally: |
| + r.close() |
| + |
| + def is_build_dir(self, folder_name): |
| + """Return whether or not the given dir contains a build.""" |
| + |
| + # Cannot move up to base scraper due to parser.entries call in |
| + # get_build_info_for_date (see below) |
| + |
| + url = '%s/' % urljoin(self.base_url, self.monthly_build_list_regex, folder_name) |
| + if self.application in APPLICATIONS_MULTI_LOCALE \ |
| + and self.locale != 'multi': |
| + url = '%s/' % urljoin(url, self.locale) |
| + |
| + parser = self._create_directory_parser(url) |
| + |
| + pattern = re.compile(self.binary_regex, re.IGNORECASE) |
| + for entry in parser.entries: |
| + try: |
| + pattern.match(entry).group() |
| + return True |
| + except: |
| + # No match, continue with next entry |
| + continue |
| + return False |
| + |
| + def get_build_info_for_date(self, date, build_index=None): |
| + url = urljoin(self.base_url, self.monthly_build_list_regex) |
| + has_time = date and date.time() |
| + |
| + self.logger.info('Retrieving list of builds from %s' % url) |
| + parser = self._create_directory_parser(url) |
| + regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s%(PLATFORM)s$' % { |
| + 'DATE': date.strftime('%Y-%m-%d'), |
| + 'BRANCH': self.branch, |
| + # ensure to select the correct subfolder for localized builds |
| + 'L10N': '' if self.locale in ('en-US', 'multi') else '(-l10n)?', |
| + 'PLATFORM': '' if self.application not in ( |
| + 'fennec') else '-' + self.platform |
| + } |
| + |
| + parser.entries = parser.filter(regex) |
| + parser.entries = parser.filter(self.is_build_dir) |
| if has_time: |
| - # If a time is included in the date, use it to determine the build's index |
| + # If a time is included in the date, use it to determine the |
| + # build's index |
| regex = r'.*%s.*' % date.strftime('%H-%M-%S') |
| - build_index = parser.entries.index(parser.filter(regex)[0]) |
| - else: |
| - # If no index has been given, set it to the last build of the day. |
| - if build_index is None: |
| - build_index = len(parser.entries) - 1 |
| + parser.entries = parser.filter(regex) |
| - return (parser.entries, build_index) |
| + if not parser.entries: |
| + date_format = '%Y-%m-%d-%H-%M-%S' if has_time else '%Y-%m-%d' |
| + message = 'Folder for builds on %s has not been found' % \ |
| + self.date.strftime(date_format) |
| + raise errors.NotFoundError(message, url) |
| + # If no index has been given, set it to the last build of the day. |
| + self.show_matching_builds(parser.entries) |
| + # If no index has been given, set it to the last build of the day. |
| + if build_index is None: |
| + # Find the most recent non-empty entry. |
| + build_index = len(parser.entries) |
| + for build in reversed(parser.entries): |
| + build_index -= 1 |
| + if not build_index or self.is_build_dir(build): |
| + break |
| + self.logger.info('Selected build: %s' % parser.entries[build_index]) |
| + |
| + return (parser.entries, build_index) |
| @property |
| def binary_regex(self): |
| """Return the regex for the binary""" |
| regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' |
| - regex_suffix = {'linux': r'\.%(EXT)s$', |
| + regex_suffix = {'android-api-9': r'\.%(EXT)s$', |
| + 'android-api-11': r'\.%(EXT)s$', |
| + 'android-x86': r'\.%(EXT)s$', |
| + 'linux': r'\.%(EXT)s$', |
| 'linux64': r'\.%(EXT)s$', |
| 'mac': r'\.%(EXT)s$', |
| 'mac64': r'\.%(EXT)s$', |
| - 'win32': r'(\.installer)\.%(EXT)s$', |
| - 'win64': r'(\.installer)\.%(EXT)s$'} |
| + 'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$', |
| + 'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'} |
| regex = regex_base_name + regex_suffix[self.platform] |
| return regex % {'APP': self.application, |
| 'LOCALE': self.locale, |
| 'PLATFORM': self.platform_regex, |
| - 'EXT': self.extension} |
| - |
| + 'EXT': self.extension, |
| + 'STUB': '-stub' if self.is_stub_installer else ''} |
| def build_filename(self, binary): |
| """Return the proposed filename with extension for the binary""" |
| @@ -315,53 +528,69 @@ class DailyScraper(Scraper): |
| timestamp = self.date.strftime('%Y-%m-%d') |
| return '%(TIMESTAMP)s-%(BRANCH)s-%(NAME)s' % { |
| - 'TIMESTAMP': timestamp, |
| - 'BRANCH': self.branch, |
| - 'NAME': binary} |
| - |
| + 'TIMESTAMP': timestamp, |
| + 'BRANCH': self.branch, |
| + 'NAME': binary} |
| @property |
| def monthly_build_list_regex(self): |
| - """Return the regex for the folder which contains the builds of a month.""" |
| + """Return the regex for the folder containing builds of a month.""" |
| # Regex for possible builds for the given date |
| return r'nightly/%(YEAR)s/%(MONTH)s/' % { |
| - 'YEAR': self.date.year, |
| - 'MONTH': str(self.date.month).zfill(2) } |
| - |
| + 'YEAR': self.date.year, |
| + 'MONTH': str(self.date.month).zfill(2)} |
| @property |
| def path_regex(self): |
| - """Return the regex for the path""" |
| + """Return the regex for the path to the build folder""" |
| try: |
| - return self.monthly_build_list_regex + self.builds[self.build_index] |
| + path = '%s/' % urljoin(self.monthly_build_list_regex, |
| + self.builds[self.build_index]) |
| + if self.application in APPLICATIONS_MULTI_LOCALE \ |
| + and self.locale != 'multi': |
| + path = '%s/' % urljoin(path, self.locale) |
| + return path |
| except: |
| - raise NotFoundException("Specified sub folder cannot be found", |
| - self.base_url + self.monthly_build_list_regex) |
| + folder = urljoin(self.base_url, self.monthly_build_list_regex) |
| + raise errors.NotFoundError("Specified sub folder cannot be found", |
| + folder) |
| class DirectScraper(Scraper): |
| """Class to download a file from a specified URL""" |
| def __init__(self, url, *args, **kwargs): |
| - Scraper.__init__(self, *args, **kwargs) |
| + self._url = url |
| - self.url = url |
| + Scraper.__init__(self, *args, **kwargs) |
| @property |
| - def target(self): |
| - return urllib.splitquery(self.final_url)[0].rpartition('/')[-1] |
| + def filename(self): |
| + if os.path.splitext(self.destination)[1]: |
| + # If the filename has been given make use of it |
| + target_file = self.destination |
| + else: |
| + # Otherwise determine it from the url. |
| + parsed_url = urlparse(self.url) |
| + source_filename = (parsed_url.path.rpartition('/')[-1] or |
| + parsed_url.hostname) |
| + target_file = os.path.join(self.destination, source_filename) |
| + |
| + return os.path.abspath(target_file) |
| @property |
| - def final_url(self): |
| - return self.url |
| + def url(self): |
| + return self._url |
| class ReleaseScraper(Scraper): |
| """Class to download a release build from the Mozilla server""" |
| - def __init__(self, *args, **kwargs): |
| + def __init__(self, version, *args, **kwargs): |
| + self.version = version |
| + |
| Scraper.__init__(self, *args, **kwargs) |
| @property |
| @@ -372,66 +601,78 @@ class ReleaseScraper(Scraper): |
| 'linux64': r'^%(APP)s-.*\.%(EXT)s$', |
| 'mac': r'^%(APP)s.*\.%(EXT)s$', |
| 'mac64': r'^%(APP)s.*\.%(EXT)s$', |
| - 'win32': r'^%(APP)s.*\.%(EXT)s$', |
| - 'win64': r'^%(APP)s.*\.%(EXT)s$'} |
| - return regex[self.platform] % {'APP': self.application, |
| - 'EXT': self.extension} |
| - |
| + 'win32': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$', |
| + 'win64': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$'} |
| + return regex[self.platform] % { |
| + 'APP': self.application, |
| + 'EXT': self.extension, |
| + 'STUB': 'Stub' if self.is_stub_installer else ''} |
| @property |
| def path_regex(self): |
| - """Return the regex for the path""" |
| + """Return the regex for the path to the build folder""" |
| - regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s' |
| + regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s/' |
| return regex % {'LOCALE': self.locale, |
| 'PLATFORM': self.platform_regex, |
| 'VERSION': self.version} |
| + @property |
| + def platform_regex(self): |
| + """Return the platform fragment of the URL""" |
| + |
| + if self.platform == 'win64': |
| + return self.platform |
| + |
| + return PLATFORM_FRAGMENTS[self.platform] |
| def build_filename(self, binary): |
| """Return the proposed filename with extension for the binary""" |
| - template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s.%(EXT)s' |
| + template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s%(STUB)s' \ |
| + '.%(EXT)s' |
| return template % {'APP': self.application, |
| 'VERSION': self.version, |
| 'LOCALE': self.locale, |
| 'PLATFORM': self.platform, |
| + 'STUB': '-stub' if self.is_stub_installer else '', |
| 'EXT': self.extension} |
| class ReleaseCandidateScraper(ReleaseScraper): |
| """Class to download a release candidate build from the Mozilla server""" |
| - def __init__(self, build_number=None, no_unsigned=False, *args, **kwargs): |
| - Scraper.__init__(self, *args, **kwargs) |
| - |
| - # Internally we access builds via index |
| - if build_number is not None: |
| - self.build_index = int(build_number) - 1 |
| - else: |
| - self.build_index = None |
| - |
| - self.builds, self.build_index = self.get_build_info_for_version(self.version, self.build_index) |
| + def __init__(self, version, build_number=None, *args, **kwargs): |
| + self.version = version |
| + self.build_number = build_number |
| - self.no_unsigned = no_unsigned |
| - self.unsigned = False |
| + Scraper.__init__(self, *args, **kwargs) |
| + def get_build_info(self): |
| + """Defines additional build information""" |
| - def get_build_info_for_version(self, version, build_index=None): |
| - url = '/'.join([self.base_url, self.candidate_build_list_regex]) |
| + # Internally we access builds via index |
| + url = urljoin(self.base_url, self.candidate_build_list_regex) |
| + self.logger.info('Retrieving list of candidate builds from %s' % url) |
| - print 'Retrieving list of candidate builds from %s' % url |
| - parser = DirectoryParser(url) |
| + parser = self._create_directory_parser(url) |
| if not parser.entries: |
| - message = 'Folder for specific candidate builds at has not been found' |
| - raise NotFoundException(message, url) |
| - |
| - # If no index has been given, set it to the last build of the given version. |
| - if build_index is None: |
| - build_index = len(parser.entries) - 1 |
| - |
| - return (parser.entries, build_index) |
| - |
| + message = 'Folder for specific candidate builds at %s has not' \ |
| + 'been found' % url |
| + raise errors.NotFoundError(message, url) |
| + |
| + self.show_matching_builds(parser.entries) |
| + self.builds = parser.entries |
| + self.build_index = len(parser.entries) - 1 |
| + |
| + if self.build_number and \ |
| + ('build%s' % self.build_number) in self.builds: |
| + self.builds = ['build%s' % self.build_number] |
| + self.build_index = 0 |
| + self.logger.info('Selected build: build%s' % self.build_number) |
| + else: |
| + self.logger.info('Selected build: build%d' % |
| + (self.build_index + 1)) |
| @property |
| def candidate_build_list_regex(self): |
| @@ -439,51 +680,49 @@ class ReleaseCandidateScraper(ReleaseScraper): |
| a candidate build.""" |
| # Regex for possible builds for the given date |
| - return r'nightly/%(VERSION)s-candidates/' % { |
| - 'VERSION': self.version } |
| - |
| + return r'candidates/%(VERSION)s-candidates/' % { |
| + 'VERSION': self.version} |
| @property |
| def path_regex(self): |
| - """Return the regex for the path""" |
| + """Return the regex for the path to the build folder""" |
| - regex = r'%(PREFIX)s%(BUILD)s/%(UNSIGNED)s%(PLATFORM)s/%(LOCALE)s' |
| + regex = r'%(PREFIX)s%(BUILD)s/%(PLATFORM)s/%(LOCALE)s/' |
| return regex % {'PREFIX': self.candidate_build_list_regex, |
| 'BUILD': self.builds[self.build_index], |
| 'LOCALE': self.locale, |
| - 'PLATFORM': self.platform_regex, |
| - 'UNSIGNED': "unsigned/" if self.unsigned else ""} |
| + 'PLATFORM': self.platform_regex} |
| + @property |
| + def platform_regex(self): |
| + """Return the platform fragment of the URL""" |
| + |
| + if self.platform == 'win64': |
| + return self.platform |
| + |
| + return PLATFORM_FRAGMENTS[self.platform] |
| def build_filename(self, binary): |
| """Return the proposed filename with extension for the binary""" |
| - template = '%(APP)s-%(VERSION)s-build%(BUILD)s.%(LOCALE)s.%(PLATFORM)s.%(EXT)s' |
| + template = '%(APP)s-%(VERSION)s-%(BUILD)s.%(LOCALE)s.' \ |
| + '%(PLATFORM)s%(STUB)s.%(EXT)s' |
| return template % {'APP': self.application, |
| 'VERSION': self.version, |
| 'BUILD': self.builds[self.build_index], |
| 'LOCALE': self.locale, |
| 'PLATFORM': self.platform, |
| + 'STUB': '-stub' if self.is_stub_installer else '', |
| 'EXT': self.extension} |
| - |
| def download(self): |
| """Download the specified file""" |
| try: |
| # Try to download the signed candidate build |
| Scraper.download(self) |
| - except NotFoundException, e: |
| - print str(e) |
| - |
| - # If the signed build cannot be downloaded and unsigned builds are |
| - # allowed, try to download the unsigned build instead |
| - if self.no_unsigned: |
| - raise |
| - else: |
| - print "Signed build has not been found. Falling back to unsigned build." |
| - self.unsigned = True |
| - Scraper.download(self) |
| + except errors.NotFoundError, e: |
| + self.logger.exception(str(e)) |
| class TinderboxScraper(Scraper): |
| @@ -497,86 +736,91 @@ class TinderboxScraper(Scraper): |
| def __init__(self, branch='mozilla-central', build_number=None, date=None, |
| debug_build=False, *args, **kwargs): |
| - Scraper.__init__(self, *args, **kwargs) |
| self.branch = branch |
| + self.build_number = build_number |
| self.debug_build = debug_build |
| - self.locale_build = self.locale != 'en-US' |
| - self.timestamp = None |
| + self.date = date |
| + self.timestamp = None |
| # Currently any time in RelEng is based on the Pacific time zone. |
| - self.timezone = PacificTimezone(); |
| + self.timezone = PacificTimezone() |
| + |
| + Scraper.__init__(self, *args, **kwargs) |
| + |
| + def get_build_info(self): |
| + "Defines additional build information" |
| # Internally we access builds via index |
| - if build_number is not None: |
| - self.build_index = int(build_number) - 1 |
| + if self.build_number is not None: |
| + self.build_index = int(self.build_number) - 1 |
| else: |
| self.build_index = None |
| - if date is not None: |
| + if self.date is not None: |
| try: |
| - self.date = datetime.fromtimestamp(float(date), self.timezone) |
| - self.timestamp = date |
| + # date is provided in the format 2013-07-23 |
| + self.date = datetime.strptime(self.date, '%Y-%m-%d') |
| except: |
| - self.date = datetime.strptime(date, '%Y-%m-%d') |
| - else: |
| - self.date = None |
| + try: |
| + # date is provided as a unix timestamp |
| + datetime.fromtimestamp(float(self.date)) |
| + self.timestamp = self.date |
| + except: |
| + raise ValueError('%s is not a valid date' % self.date) |
| + self.locale_build = self.locale != 'en-US' |
| # For localized builds we do not have to retrieve the list of builds |
| # because only the last build is available |
| if not self.locale_build: |
| - self.builds, self.build_index = self.get_build_info(self.build_index) |
| - |
| - try: |
| - self.timestamp = self.builds[self.build_index] |
| - except: |
| - raise NotFoundException("Specified sub folder cannot be found", |
| - self.base_url + self.monthly_build_list_regex) |
| - |
| + self.builds, self.build_index = self.get_build_info_for_index( |
| + self.build_index) |
| @property |
| def binary_regex(self): |
| """Return the regex for the binary""" |
| - regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.' |
| + regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' |
| regex_suffix = {'linux': r'.*\.%(EXT)s$', |
| 'linux64': r'.*\.%(EXT)s$', |
| 'mac': r'.*\.%(EXT)s$', |
| 'mac64': r'.*\.%(EXT)s$', |
| - 'win32': r'.*(\.installer)\.%(EXT)s$', |
| - 'win64': r'.*(\.installer)\.%(EXT)s$'} |
| + 'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$', |
| + 'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'} |
| regex = regex_base_name + regex_suffix[self.platform] |
| return regex % {'APP': self.application, |
| 'LOCALE': self.locale, |
| + 'PLATFORM': PLATFORM_FRAGMENTS[self.platform], |
| + 'STUB': '-stub' if self.is_stub_installer else '', |
| 'EXT': self.extension} |
| - |
| def build_filename(self, binary): |
| """Return the proposed filename with extension for the binary""" |
| return '%(TIMESTAMP)s%(BRANCH)s%(DEBUG)s-%(NAME)s' % { |
| - 'TIMESTAMP': self.timestamp + '-' if self.timestamp else '', |
| - 'BRANCH': self.branch, |
| - 'DEBUG': '-debug' if self.debug_build else '', |
| - 'NAME': binary} |
| - |
| + 'TIMESTAMP': self.timestamp + '-' if self.timestamp else '', |
| + 'BRANCH': self.branch, |
| + 'DEBUG': '-debug' if self.debug_build else '', |
| + 'NAME': binary} |
| @property |
| def build_list_regex(self): |
| """Return the regex for the folder which contains the list of builds""" |
| - regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s' |
| - |
| - return regex % {'BRANCH': self.branch, |
| - 'PLATFORM': '' if self.locale_build else self.platform_regex, |
| - 'L10N': 'l10n' if self.locale_build else '', |
| - 'DEBUG': '-debug' if self.debug_build else ''} |
| + regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s/' |
| + return regex % { |
| + 'BRANCH': self.branch, |
| + 'PLATFORM': '' if self.locale_build else self.platform_regex, |
| + 'L10N': 'l10n' if self.locale_build else '', |
| + 'DEBUG': '-debug' if self.debug_build else ''} |
| def date_matches(self, timestamp): |
| - """Determines whether the timestamp date is equal to the argument date""" |
| + """ |
| + Determines whether the timestamp date is equal to the argument date |
| + """ |
| if self.date is None: |
| return False |
| @@ -584,65 +828,89 @@ class TinderboxScraper(Scraper): |
| timestamp = datetime.fromtimestamp(float(timestamp), self.timezone) |
| if self.date.date() == timestamp.date(): |
| return True |
| - |
| - return False |
| - |
| - |
| - @property |
| - def date_validation_regex(self): |
| - """Return the regex for a valid date argument value""" |
| - |
| - return r'^\d{4}-\d{1,2}-\d{1,2}$|^\d+$' |
| + return False |
| def detect_platform(self): |
| """Detect the current platform""" |
| platform = Scraper.detect_platform(self) |
| - # On OS X we have to special case the platform detection code and fallback |
| - # to 64 bit builds for the en-US locale |
| - if mozinfo.os == 'mac' and self.locale == 'en-US' and mozinfo.bits == 64: |
| + # On OS X we have to special case the platform detection code and |
| + # fallback to 64 bit builds for the en-US locale |
| + if mozinfo.os == 'mac' and self.locale == 'en-US' and \ |
| + mozinfo.bits == 64: |
| platform = "%s%d" % (mozinfo.os, mozinfo.bits) |
| return platform |
| + def is_build_dir(self, folder_name): |
| + """Return whether or not the given dir contains a build.""" |
| - def get_build_info(self, build_index=None): |
| - url = '/'.join([self.base_url, self.build_list_regex]) |
| + # Cannot move up to base scraper due to parser.entries call in |
| + # get_build_info_for_index (see below) |
| + url = '%s/' % urljoin(self.base_url, self.build_list_regex, folder_name) |
| - print 'Retrieving list of builds from %s' % url |
| + if self.application in APPLICATIONS_MULTI_LOCALE \ |
| + and self.locale != 'multi': |
| + url = '%s/' % urljoin(url, self.locale) |
| - # If a timestamp is given, retrieve just that build |
| - regex = '^' + self.timestamp + '$' if self.timestamp else r'^\d+$' |
| + parser = self._create_directory_parser(url) |
| - parser = DirectoryParser(url) |
| - parser.entries = parser.filter(regex) |
| + pattern = re.compile(self.binary_regex, re.IGNORECASE) |
| + for entry in parser.entries: |
| + try: |
| + pattern.match(entry).group() |
| + return True |
| + except: |
| + # No match, continue with next entry |
| + continue |
| + return False |
| - # If date is given, retrieve the subset of builds on that date |
| - if self.date is not None: |
| + def get_build_info_for_index(self, build_index=None): |
| + url = urljoin(self.base_url, self.build_list_regex) |
| + |
| + self.logger.info('Retrieving list of builds from %s' % url) |
| + parser = self._create_directory_parser(url) |
| + parser.entries = parser.filter(r'^\d+$') |
| + |
| + if self.timestamp: |
| + # If a timestamp is given, retrieve the folder with the timestamp |
| + # as name |
| + parser.entries = self.timestamp in parser.entries and \ |
| + [self.timestamp] |
| + |
| + elif self.date: |
| + # If date is given, retrieve the subset of builds on that date |
| parser.entries = filter(self.date_matches, parser.entries) |
| if not parser.entries: |
| message = 'No builds have been found' |
| - raise NotFoundException(message, url) |
| + raise errors.NotFoundError(message, url) |
| + |
| + self.show_matching_builds(parser.entries) |
| # If no index has been given, set it to the last build of the day. |
| if build_index is None: |
| - build_index = len(parser.entries) - 1 |
| + # Find the most recent non-empty entry. |
| + build_index = len(parser.entries) |
| + for build in reversed(parser.entries): |
| + build_index -= 1 |
| + if not build_index or self.is_build_dir(build): |
| + break |
| - return (parser.entries, build_index) |
| + self.logger.info('Selected build: %s' % parser.entries[build_index]) |
| + return (parser.entries, build_index) |
| @property |
| def path_regex(self): |
| - """Return the regex for the path""" |
| + """Return the regex for the path to the build folder""" |
| if self.locale_build: |
| return self.build_list_regex |
| - return '/'.join([self.build_list_regex, self.builds[self.build_index]]) |
| - |
| + return '%s/' % urljoin(self.build_list_regex, self.builds[self.build_index]) |
| @property |
| def platform_regex(self): |
| @@ -650,7 +918,7 @@ class TinderboxScraper(Scraper): |
| PLATFORM_FRAGMENTS = {'linux': 'linux', |
| 'linux64': 'linux64', |
| - 'mac': 'macosx', |
| + 'mac': 'macosx64', |
| 'mac64': 'macosx64', |
| 'win32': 'win32', |
| 'win64': 'win64'} |
| @@ -658,178 +926,104 @@ class TinderboxScraper(Scraper): |
| return PLATFORM_FRAGMENTS[self.platform] |
| -def cli(): |
| - """Main function for the downloader""" |
| - |
| - BUILD_TYPES = {'release': ReleaseScraper, |
| - 'candidate': ReleaseCandidateScraper, |
| - 'daily': DailyScraper, |
| - 'tinderbox': TinderboxScraper } |
| - |
| - usage = 'usage: %prog [options]' |
| - parser = OptionParser(usage=usage, description=__doc__) |
| - parser.add_option('--application', '-a', |
| - dest='application', |
| - choices=APPLICATIONS, |
| - default='firefox', |
| - metavar='APPLICATION', |
| - help='The name of the application to download, ' |
| - 'default: "%default"') |
| - parser.add_option('--directory', '-d', |
| - dest='directory', |
| - default=os.getcwd(), |
| - metavar='DIRECTORY', |
| - help='Target directory for the download, default: ' |
| - 'current working directory') |
| - parser.add_option('--build-number', |
| - dest='build_number', |
| - default=None, |
| - type="int", |
| - metavar='BUILD_NUMBER', |
| - help='Number of the build (for candidate, daily, ' |
| - 'and tinderbox builds)') |
| - parser.add_option('--locale', '-l', |
| - dest='locale', |
| - default='en-US', |
| - metavar='LOCALE', |
| - help='Locale of the application, default: "%default"') |
| - parser.add_option('--platform', '-p', |
| - dest='platform', |
| - choices=PLATFORM_FRAGMENTS.keys(), |
| - metavar='PLATFORM', |
| - help='Platform of the application') |
| - parser.add_option('--type', '-t', |
| - dest='type', |
| - choices=BUILD_TYPES.keys(), |
| - default='release', |
| - metavar='BUILD_TYPE', |
| - help='Type of build to download, default: "%default"') |
| - parser.add_option('--url', |
| - dest='url', |
| - default=None, |
| - metavar='URL', |
| - help='URL to download.') |
| - parser.add_option('--version', '-v', |
| - dest='version', |
| - metavar='VERSION', |
| - help='Version of the application to be used by release and\ |
| - candidate builds, i.e. "3.6"') |
| - parser.add_option('--extension', |
| - dest='extension', |
| - default=None, |
| - metavar='EXTENSION', |
| - help='File extension of the build (e.g. "zip"), default:\ |
| - the standard build extension on the platform.') |
| - parser.add_option('--username', |
| - dest='username', |
| - default=None, |
| - metavar='USERNAME', |
| - help='Username for basic HTTP authentication.') |
| - parser.add_option('--password', |
| - dest='password', |
| - default=None, |
| - metavar='PASSWORD', |
| - help='Password for basic HTTP authentication.') |
| - parser.add_option('--retry-attempts', |
| - dest='retry_attempts', |
| - default=3, |
| - type=int, |
| - metavar='RETRY_ATTEMPTS', |
| - help='Number of times the download will be attempted in ' |
| - 'the event of a failure, default: %default') |
| - parser.add_option('--retry-delay', |
| - dest='retry_delay', |
| - default=10, |
| - type=int, |
| - metavar='RETRY_DELAY', |
| - help='Amount of time (in seconds) to wait between retry ' |
| - 'attempts, default: %default') |
| - |
| - # Option group for candidate builds |
| - group = OptionGroup(parser, "Candidate builds", |
| - "Extra options for candidate builds.") |
| - group.add_option('--no-unsigned', |
| - dest='no_unsigned', |
| - action="store_true", |
| - help="Don't allow to download unsigned builds if signed\ |
| - builds are not available") |
| - parser.add_option_group(group) |
| - |
| - # Option group for daily builds |
| - group = OptionGroup(parser, "Daily builds", |
| - "Extra options for daily builds.") |
| - group.add_option('--branch', |
| - dest='branch', |
| - default='mozilla-central', |
| - metavar='BRANCH', |
| - help='Name of the branch, default: "%default"') |
| - group.add_option('--build-id', |
| - dest='build_id', |
| - default=None, |
| - metavar='BUILD_ID', |
| - help='ID of the build to download') |
| - group.add_option('--date', |
| - dest='date', |
| - default=None, |
| - metavar='DATE', |
| - help='Date of the build, default: latest build') |
| - parser.add_option_group(group) |
| - |
| - # Option group for tinderbox builds |
| - group = OptionGroup(parser, "Tinderbox builds", |
| - "Extra options for tinderbox builds.") |
| - group.add_option('--debug-build', |
| - dest='debug_build', |
| - action="store_true", |
| - help="Download a debug build") |
| - parser.add_option_group(group) |
| - |
| - # TODO: option group for nightly builds |
| - (options, args) = parser.parse_args() |
| - |
| - # Check for required options and arguments |
| - # Note: Will be optional when ini file support has been landed |
| - if not options.url \ |
| - and not options.type in ['daily', 'tinderbox'] \ |
| - and not options.version: |
| - parser.error('The version of the application to download has not been specified.') |
| - |
| - # Instantiate scraper and download the build |
| - scraper_keywords = {'application': options.application, |
| - 'locale': options.locale, |
| - 'platform': options.platform, |
| - 'version': options.version, |
| - 'directory': options.directory, |
| - 'extension': options.extension, |
| - 'authentication': { |
| - 'username': options.username, |
| - 'password': options.password}, |
| - 'retry_attempts': options.retry_attempts, |
| - 'retry_delay': options.retry_delay} |
| - scraper_options = {'candidate': { |
| - 'build_number': options.build_number, |
| - 'no_unsigned': options.no_unsigned}, |
| - 'daily': { |
| - 'branch': options.branch, |
| - 'build_number': options.build_number, |
| - 'build_id': options.build_id, |
| - 'date': options.date}, |
| - 'tinderbox': { |
| - 'branch': options.branch, |
| - 'build_number': options.build_number, |
| - 'date': options.date, |
| - 'debug_build': options.debug_build} |
| - } |
| - |
| - kwargs = scraper_keywords.copy() |
| - kwargs.update(scraper_options.get(options.type, {})) |
| - |
| - if options.url: |
| - build = DirectScraper(options.url, **kwargs) |
| - else: |
| - build = BUILD_TYPES[options.type](**kwargs) |
| - |
| - build.download() |
| - |
| -if __name__ == "__main__": |
| - cli() |
| +class TryScraper(Scraper): |
| + "Class to download a try build from the Mozilla server." |
| + |
| + def __init__(self, changeset=None, debug_build=False, *args, **kwargs): |
| + |
| + self.debug_build = debug_build |
| + self.changeset = changeset |
| + |
| + Scraper.__init__(self, *args, **kwargs) |
| + |
| + def get_build_info(self): |
| + "Defines additional build information" |
| + |
| + self.builds, self.build_index = self.get_build_info_for_index() |
| + |
| + @property |
| + def binary_regex(self): |
| + """Return the regex for the binary""" |
| + |
| + regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' |
| + regex_suffix = {'linux': r'.*\.%(EXT)s$', |
| + 'linux64': r'.*\.%(EXT)s$', |
| + 'mac': r'.*\.%(EXT)s$', |
| + 'mac64': r'.*\.%(EXT)s$', |
| + 'win32': r'.*(\.installer%(STUB)s)\.%(EXT)s$', |
| + 'win64': r'.*(\.installer%(STUB)s)\.%(EXT)s$'} |
| + |
| + regex = regex_base_name + regex_suffix[self.platform] |
| + |
| + return regex % {'APP': self.application, |
| + 'LOCALE': self.locale, |
| + 'PLATFORM': PLATFORM_FRAGMENTS[self.platform], |
| + 'STUB': '-stub' if self.is_stub_installer else '', |
| + 'EXT': self.extension} |
| + |
| + def build_filename(self, binary): |
| + """Return the proposed filename with extension for the binary""" |
| + |
| + return '%(CHANGESET)s%(DEBUG)s-%(NAME)s' % { |
| + 'CHANGESET': self.changeset, |
| + 'DEBUG': '-debug' if self.debug_build else '', |
| + 'NAME': binary} |
| + |
| + @property |
| + def build_list_regex(self): |
| + """Return the regex for the folder which contains the list of builds""" |
| + |
| + return 'try-builds/' |
| + |
| + def detect_platform(self): |
| + """Detect the current platform""" |
| + |
| + platform = Scraper.detect_platform(self) |
| + |
| + # On OS X we have to special case the platform detection code and |
| + # fallback to 64 bit builds for the en-US locale |
| + if mozinfo.os == 'mac' and self.locale == 'en-US' and \ |
| + mozinfo.bits == 64: |
| + platform = "%s%d" % (mozinfo.os, mozinfo.bits) |
| + |
| + return platform |
| + |
| + def get_build_info_for_index(self, build_index=None): |
| + url = urljoin(self.base_url, self.build_list_regex) |
| + |
| + self.logger.info('Retrieving list of builds from %s' % url) |
| + parser = self._create_directory_parser(url) |
| + parser.entries = parser.filter('.*-%s$' % self.changeset) |
| + |
| + if not parser.entries: |
| + raise errors.NotFoundError('No builds have been found', url) |
| + |
| + self.show_matching_builds(parser.entries) |
| + |
| + self.logger.info('Selected build: %s' % parser.entries[0]) |
| + |
| + return (parser.entries, 0) |
| + |
| + @property |
| + def path_regex(self): |
| + """Return the regex for the path to the build folder""" |
| + |
| + build_dir = 'try-%(PLATFORM)s%(DEBUG)s/' % { |
| + 'PLATFORM': self.platform_regex, |
| + 'DEBUG': '-debug' if self.debug_build else ''} |
| + return urljoin(self.build_list_regex, |
| + self.builds[self.build_index], |
| + build_dir) |
| + |
| + @property |
| + def platform_regex(self): |
| + """Return the platform fragment of the URL""" |
| + |
| + PLATFORM_FRAGMENTS = {'linux': 'linux', |
| + 'linux64': 'linux64', |
| + 'mac': 'macosx64', |
| + 'mac64': 'macosx64', |
| + 'win32': 'win32', |
| + 'win64': 'win64'} |
| + |
| + return PLATFORM_FRAGMENTS[self.platform] |