Index: mozdownload/scraper.py |
diff --git a/mozdownload/scraper.py b/mozdownload/scraper.py |
index 9011cab26c1801ea9804caff62fc60a8b812eee3..6122f8c02379d154f95ae5fee3fda3920a004482 100755 |
--- a/mozdownload/scraper.py |
+++ b/mozdownload/scraper.py |
@@ -1,211 +1,358 @@ |
-#!/usr/bin/env python |
- |
# This Source Code Form is subject to the terms of the Mozilla Public |
# License, v. 2.0. If a copy of the MPL was not distributed with this |
# file, You can obtain one at http://mozilla.org/MPL/2.0/. |
-"""Module to handle downloads for different types of Firefox and Thunderbird builds.""" |
- |
- |
from datetime import datetime |
-from optparse import OptionParser, OptionGroup |
+import logging |
import os |
import re |
+import requests |
import sys |
import time |
import urllib |
-import urllib2 |
+from urlparse import urlparse |
import mozinfo |
+import progressbar as pb |
kjellander_chromium
2015/11/17 10:19:29
Any idea how the script can work without the progr
phoglund_chromium
2015/11/17 11:13:07
Crap, you're right. I have it installed on my mach
kjellander_chromium
2015/11/17 12:57:17
Not necessarily screwed since we can bundle those
phoglund_chromium
2015/11/17 13:47:03
I managed to pull requests from the mirror you ind
|
+ |
+import errors |
from parser import DirectoryParser |
from timezones import PacificTimezone |
+from utils import urljoin |
-APPLICATIONS = ['b2g', 'firefox', 'thunderbird'] |
+APPLICATIONS = ('b2g', 'firefox', 'fennec', 'thunderbird') |
+ |
+# Some applications contain all locales in a single build |
+APPLICATIONS_MULTI_LOCALE = ('b2g', 'fennec') |
+ |
+# Used if the application is named differently than the subfolder on the server |
+APPLICATIONS_TO_FTP_DIRECTORY = {'fennec': 'mobile'} |
# Base URL for the path to all builds |
-BASE_URL = 'https://ftp.mozilla.org/pub/mozilla.org' |
+BASE_URL = 'https://archive.mozilla.org/pub/' |
-PLATFORM_FRAGMENTS = {'linux': 'linux-i686', |
- 'linux64': 'linux-x86_64', |
- 'mac': 'mac', |
- 'mac64': 'mac64', |
- 'win32': 'win32', |
- 'win64': 'win64-x86_64'} |
+# Chunk size when downloading a file |
+CHUNK_SIZE = 16 * 1024 |
-DEFAULT_FILE_EXTENSIONS = {'linux': 'tar.bz2', |
+DEFAULT_FILE_EXTENSIONS = {'android-api-9': 'apk', |
+ 'android-api-11': 'apk', |
+ 'android-x86': 'apk', |
+ 'linux': 'tar.bz2', |
'linux64': 'tar.bz2', |
'mac': 'dmg', |
'mac64': 'dmg', |
'win32': 'exe', |
'win64': 'exe'} |
-class NotFoundException(Exception): |
- """Exception for a resource not being found (e.g. no logs)""" |
- def __init__(self, message, location): |
- self.location = location |
- Exception.__init__(self, ': '.join([message, location])) |
+PLATFORM_FRAGMENTS = {'android-api-9': r'android-arm', |
+ 'android-api-11': r'android-arm', |
+ 'android-x86': r'android-i386', |
+ 'linux': r'linux-i686', |
+ 'linux64': r'linux-x86_64', |
+ 'mac': r'mac', |
+ 'mac64': r'mac(64)?', |
+ 'win32': r'win32', |
+ 'win64': r'win64(-x86_64)?'} |
class Scraper(object): |
"""Generic class to download an application from the Mozilla server""" |
- def __init__(self, directory, version, platform=None, |
- application='firefox', locale='en-US', extension=None, |
- authentication=None, retry_attempts=3, retry_delay=10): |
+ def __init__(self, destination=None, platform=None, |
+ application='firefox', locale=None, extension=None, |
+ username=None, password=None, |
+ retry_attempts=0, retry_delay=10., |
+ is_stub_installer=False, timeout=None, |
+ log_level='INFO', |
+ base_url=BASE_URL): |
# Private properties for caching |
- self._target = None |
+ self._filename = None |
self._binary = None |
- self.directory = directory |
- self.locale = locale |
+ self.destination = destination or os.getcwd() |
+ |
+ if not locale: |
+ if application in APPLICATIONS_MULTI_LOCALE: |
+ self.locale = 'multi' |
+ else: |
+ self.locale = 'en-US' |
+ else: |
+ self.locale = locale |
+ |
self.platform = platform or self.detect_platform() |
- self.version = version |
- self.extension = extension or DEFAULT_FILE_EXTENSIONS[self.platform] |
- self.authentication = authentication |
+ |
+ self.session = requests.Session() |
+ if (username, password) != (None, None): |
+ self.session.auth = (username, password) |
+ |
self.retry_attempts = retry_attempts |
self.retry_delay = retry_delay |
+ self.is_stub_installer = is_stub_installer |
+ self.timeout_download = timeout |
+ # this is the timeout used in requests.get. Unlike "auth", |
+ # it does not work if we attach it on the session, so we handle |
+ # it independently. |
+ self.timeout_network = 60. |
+ |
+ logging.basicConfig(format=' %(levelname)s | %(message)s') |
+ self.logger = logging.getLogger(self.__module__) |
+ self.logger.setLevel(log_level) |
# build the base URL |
self.application = application |
- self.base_url = '/'.join([BASE_URL, self.application]) |
+ self.base_url = '%s/' % urljoin( |
+ base_url, |
+ APPLICATIONS_TO_FTP_DIRECTORY.get(self.application, self.application) |
+ ) |
+ if extension: |
+ self.extension = extension |
+ else: |
+ if self.application in APPLICATIONS_MULTI_LOCALE and \ |
+ self.platform in ('win32', 'win64'): |
+ # builds for APPLICATIONS_MULTI_LOCALE only exist in zip |
+ self.extension = 'zip' |
+ else: |
+ self.extension = DEFAULT_FILE_EXTENSIONS[self.platform] |
+ |
+ attempt = 0 |
+ while True: |
+ attempt += 1 |
+ try: |
+ self.get_build_info() |
+ break |
+ except (errors.NotFoundError, requests.exceptions.RequestException), e: |
+ if self.retry_attempts > 0: |
+ # Log only if multiple attempts are requested |
+ self.logger.warning("Build not found: '%s'" % e.message) |
+ self.logger.info('Will retry in %s seconds...' % |
+ (self.retry_delay)) |
+ time.sleep(self.retry_delay) |
+ self.logger.info("Retrying... (attempt %s)" % attempt) |
+ |
+ if attempt >= self.retry_attempts: |
+ if hasattr(e, 'response') and \ |
+ e.response.status_code == 404: |
+ message = "Specified build has not been found" |
+ raise errors.NotFoundError(message, e.response.url) |
+ else: |
+ raise |
+ |
+ def _create_directory_parser(self, url): |
+ return DirectoryParser(url, |
+ session=self.session, |
+ timeout=self.timeout_network) |
@property |
def binary(self): |
"""Return the name of the build""" |
- if self._binary is None: |
- # Retrieve all entries from the remote virtual folder |
- parser = DirectoryParser(self.path) |
- if not parser.entries: |
- raise NotFoundException('No entries found', self.path) |
- |
- # Download the first matched directory entry |
- pattern = re.compile(self.binary_regex, re.IGNORECASE) |
- for entry in parser.entries: |
- try: |
- self._binary = pattern.match(entry).group() |
- break |
- except: |
- # No match, continue with next entry |
- continue |
- |
- if self._binary is None: |
- raise NotFoundException("Binary not found in folder", self.path) |
- else: |
- return self._binary |
+ attempt = 0 |
+ while self._binary is None: |
+ attempt += 1 |
+ try: |
+ # Retrieve all entries from the remote virtual folder |
+ parser = self._create_directory_parser(self.path) |
+ if not parser.entries: |
+ raise errors.NotFoundError('No entries found', self.path) |
+ |
+ # Download the first matched directory entry |
+ pattern = re.compile(self.binary_regex, re.IGNORECASE) |
+ for entry in parser.entries: |
+ try: |
+ self._binary = pattern.match(entry).group() |
+ break |
+ except: |
+ # No match, continue with next entry |
+ continue |
+ else: |
+ raise errors.NotFoundError("Binary not found in folder", |
+ self.path) |
+ except (errors.NotFoundError, requests.exceptions.RequestException), e: |
+ if self.retry_attempts > 0: |
+ # Log only if multiple attempts are requested |
+ self.logger.warning("Build not found: '%s'" % e.message) |
+ self.logger.info('Will retry in %s seconds...' % |
+ (self.retry_delay)) |
+ time.sleep(self.retry_delay) |
+ self.logger.info("Retrying... (attempt %s)" % attempt) |
+ |
+ if attempt >= self.retry_attempts: |
+ if hasattr(e, 'response') and \ |
+ e.response.status_code == 404: |
+ message = "Specified build has not been found" |
+ raise errors.NotFoundError(message, self.path) |
+ else: |
+ raise |
+ |
+ return self._binary |
@property |
def binary_regex(self): |
"""Return the regex for the binary filename""" |
- raise NotImplementedError(sys._getframe(0).f_code.co_name) |
- |
+ raise errors.NotImplementedError(sys._getframe(0).f_code.co_name) |
@property |
- def final_url(self): |
- """Return the final URL of the build""" |
- |
- return '/'.join([self.path, self.binary]) |
+ def url(self): |
+ """Return the URL of the build""" |
+ return urljoin(self.path, self.binary) |
@property |
def path(self): |
- """Return the path to the build""" |
- |
- return '/'.join([self.base_url, self.path_regex]) |
+ """Return the path to the build folder""" |
+ return urljoin(self.base_url, self.path_regex) |
@property |
def path_regex(self): |
- """Return the regex for the path to the build""" |
- |
- raise NotImplementedError(sys._getframe(0).f_code.co_name) |
+ """Return the regex for the path to the build folder""" |
+ raise errors.NotImplementedError(sys._getframe(0).f_code.co_name) |
@property |
def platform_regex(self): |
"""Return the platform fragment of the URL""" |
- return PLATFORM_FRAGMENTS[self.platform]; |
- |
+ return PLATFORM_FRAGMENTS[self.platform] |
@property |
- def target(self): |
- """Return the target file name of the build""" |
+ def filename(self): |
+ """Return the local filename of the build""" |
+ |
+ if self._filename is None: |
+ if os.path.splitext(self.destination)[1]: |
+ # If the filename has been given make use of it |
+ target_file = self.destination |
+ else: |
+ # Otherwise create it from the build details |
+ target_file = os.path.join(self.destination, |
+ self.build_filename(self.binary)) |
+ |
+ self._filename = os.path.abspath(target_file) |
- if self._target is None: |
- self._target = os.path.join(self.directory, |
- self.build_filename(self.binary)) |
- return self._target |
+ return self._filename |
+ def get_build_info(self): |
+ """Returns additional build information in subclasses if necessary""" |
+ pass |
def build_filename(self, binary): |
"""Return the proposed filename with extension for the binary""" |
- raise NotImplementedError(sys._getframe(0).f_code.co_name) |
- |
+ raise errors.NotImplementedError(sys._getframe(0).f_code.co_name) |
def detect_platform(self): |
"""Detect the current platform""" |
# For Mac and Linux 32bit we do not need the bits appended |
- if mozinfo.os == 'mac' or (mozinfo.os == 'linux' and mozinfo.bits == 32): |
+ if mozinfo.os == 'mac' or \ |
+ (mozinfo.os == 'linux' and mozinfo.bits == 32): |
return mozinfo.os |
else: |
return "%s%d" % (mozinfo.os, mozinfo.bits) |
- |
def download(self): |
"""Download the specified file""" |
- attempts = 0 |
+ def total_seconds(td): |
+ # Keep backward compatibility with Python 2.6 which doesn't have |
+ # this method |
+ if hasattr(td, 'total_seconds'): |
+ return td.total_seconds() |
+ else: |
+ return (td.microseconds + |
+ (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 10 ** 6 |
- if not os.path.isdir(self.directory): |
- os.makedirs(self.directory) |
+ attempt = 0 |
# Don't re-download the file |
- if os.path.isfile(os.path.abspath(self.target)): |
- print "File has already been downloaded: %s" % (self.target) |
- return |
- |
- print 'Downloading from: %s' % (urllib.unquote(self.final_url)) |
- tmp_file = self.target + ".part" |
- |
- if self.authentication \ |
- and self.authentication['username'] \ |
- and self.authentication['password']: |
- password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() |
- password_mgr.add_password(None, |
- self.final_url, |
- self.authentication['username'], |
- self.authentication['password']) |
- handler = urllib2.HTTPBasicAuthHandler(password_mgr) |
- opener = urllib2.build_opener(urllib2.HTTPHandler, handler) |
- urllib2.install_opener(opener) |
+ if os.path.isfile(os.path.abspath(self.filename)): |
+ self.logger.info("File has already been downloaded: %s" % |
+ (self.filename)) |
+ return self.filename |
+ |
+ directory = os.path.dirname(self.filename) |
+ if not os.path.isdir(directory): |
+ os.makedirs(directory) |
+ |
+ self.logger.info('Downloading from: %s' % |
+ (urllib.unquote(self.url))) |
+ self.logger.info('Saving as: %s' % self.filename) |
+ |
+ tmp_file = self.filename + ".part" |
while True: |
- attempts += 1 |
+ attempt += 1 |
try: |
- r = urllib2.urlopen(self.final_url) |
- CHUNK = 16 * 1024 |
+ start_time = datetime.now() |
+ |
+ # Enable streaming mode so we can download content in chunks |
+ r = self.session.get(self.url, stream=True) |
+ r.raise_for_status() |
+ |
+ content_length = r.headers.get('Content-length') |
+ # ValueError: Value out of range if only total_size given |
+ if content_length: |
+ total_size = int(content_length.strip()) |
+ max_value = ((total_size / CHUNK_SIZE) + 1) * CHUNK_SIZE |
+ |
+ bytes_downloaded = 0 |
+ |
+ log_level = self.logger.getEffectiveLevel() |
+ if log_level <= logging.INFO and content_length: |
+ widgets = [pb.Percentage(), ' ', pb.Bar(), ' ', pb.ETA(), |
+ ' ', pb.FileTransferSpeed()] |
+ pbar = pb.ProgressBar(widgets=widgets, |
+ maxval=max_value).start() |
+ |
with open(tmp_file, 'wb') as f: |
- for chunk in iter(lambda: r.read(CHUNK), ''): |
+ for chunk in iter(lambda: r.raw.read(CHUNK_SIZE), ''): |
f.write(chunk) |
+ bytes_downloaded += CHUNK_SIZE |
+ |
+ if log_level <= logging.INFO and content_length: |
+ pbar.update(bytes_downloaded) |
+ |
+ t1 = total_seconds(datetime.now() - start_time) |
+ if self.timeout_download and \ |
+ t1 >= self.timeout_download: |
+ raise errors.TimeoutError |
+ |
+ if log_level <= logging.INFO and content_length: |
+ pbar.finish() |
break |
- except (urllib2.HTTPError, urllib2.URLError): |
+ except (requests.exceptions.RequestException, errors.TimeoutError), e: |
if tmp_file and os.path.isfile(tmp_file): |
os.remove(tmp_file) |
- print 'Download failed! Retrying... (attempt %s)' % attempts |
- if attempts >= self.retry_attempts: |
+ if self.retry_attempts > 0: |
+ # Log only if multiple attempts are requested |
+ self.logger.warning('Download failed: "%s"' % str(e)) |
+ self.logger.info('Will retry in %s seconds...' % |
+ (self.retry_delay)) |
+ time.sleep(self.retry_delay) |
+ self.logger.info("Retrying... (attempt %s)" % attempt) |
+ if attempt >= self.retry_attempts: |
raise |
time.sleep(self.retry_delay) |
- os.rename(tmp_file, self.target) |
+ os.rename(tmp_file, self.filename) |
+ |
+ return self.filename |
+ |
+ def show_matching_builds(self, builds): |
+ """Output the matching builds""" |
+ self.logger.info('Found %s build%s: %s' % ( |
+ len(builds), |
+ len(builds) > 1 and 's' or '', |
+ len(builds) > 10 and |
+ ' ... '.join([', '.join(builds[:5]), ', '.join(builds[-5:])]) or |
+ ', '.join(builds))) |
class DailyScraper(Scraper): |
@@ -214,94 +361,160 @@ class DailyScraper(Scraper): |
def __init__(self, branch='mozilla-central', build_id=None, date=None, |
build_number=None, *args, **kwargs): |
- Scraper.__init__(self, *args, **kwargs) |
self.branch = branch |
+ self.build_id = build_id |
+ self.date = date |
+ self.build_number = build_number |
+ |
+ Scraper.__init__(self, *args, **kwargs) |
+ |
+ def get_build_info(self): |
+ """Defines additional build information""" |
# Internally we access builds via index |
- if build_number is not None: |
- self.build_index = int(build_number) - 1 |
+ if self.build_number is not None: |
+ self.build_index = int(self.build_number) - 1 |
else: |
self.build_index = None |
- if build_id: |
- # A build id has been specified. Split up its components so the date |
- # and time can be extracted: '20111212042025' -> '2011-12-12 04:20:25' |
- self.date = datetime.strptime(build_id, '%Y%m%d%H%M%S') |
- self.builds, self.build_index = self.get_build_info_for_date(self.date, |
- has_time=True) |
+ if self.build_id: |
+ # A build id has been specified. Split up its components so the |
+ # date and time can be extracted: |
+ # '20111212042025' -> '2011-12-12 04:20:25' |
+ self.date = datetime.strptime(self.build_id, '%Y%m%d%H%M%S') |
- elif date: |
+ elif self.date: |
# A date (without time) has been specified. Use its value and the |
# build index to find the requested build for that day. |
- self.date = datetime.strptime(date, '%Y-%m-%d') |
- self.builds, self.build_index = self.get_build_info_for_date(self.date, |
- build_index=self.build_index) |
- |
+ try: |
+ self.date = datetime.strptime(self.date, '%Y-%m-%d') |
+ except: |
+ raise ValueError('%s is not a valid date' % self.date) |
else: |
- # If no build id nor date have been specified the lastest available |
+ # If no build id nor date have been specified the latest available |
# build of the given branch has to be identified. We also have to |
# retrieve the date of the build via its build id. |
- url = '%s/nightly/latest-%s/' % (self.base_url, self.branch) |
- |
- print 'Retrieving the build status file from %s' % url |
- parser = DirectoryParser(url) |
- parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex) |
- if not parser.entries: |
- message = 'Status file for %s build cannot be found' % self.platform_regex |
- raise NotFoundException(message, url) |
- |
- # Read status file for the platform, retrieve build id, and convert to a date |
- status_file = url + parser.entries[-1] |
- f = urllib.urlopen(status_file) |
- self.date = datetime.strptime(f.readline().strip(), '%Y%m%d%H%M%S') |
- self.builds, self.build_index = self.get_build_info_for_date(self.date, |
- has_time=True) |
- |
- |
- def get_build_info_for_date(self, date, has_time=False, build_index=None): |
- url = '/'.join([self.base_url, self.monthly_build_list_regex]) |
- |
- print 'Retrieving list of builds from %s' % url |
- parser = DirectoryParser(url) |
- regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s$' % { |
- 'DATE': date.strftime('%Y-%m-%d'), |
- 'BRANCH': self.branch, |
- 'L10N': '' if self.locale == 'en-US' else '-l10n'} |
- parser.entries = parser.filter(regex) |
+ self.date = self.get_latest_build_date() |
+ |
+ self.builds, self.build_index = self.get_build_info_for_date( |
+ self.date, self.build_index) |
+ |
+ def get_latest_build_date(self): |
+ """ Returns date of latest available nightly build.""" |
+ if self.application not in ('fennec'): |
+ url = urljoin(self.base_url, 'nightly', 'latest-%s/' % self.branch) |
+ else: |
+ url = urljoin(self.base_url, 'nightly', 'latest-%s-%s/' % |
+ (self.branch, self.platform)) |
+ |
+ self.logger.info('Retrieving the build status file from %s' % url) |
+ parser = self._create_directory_parser(url) |
+ parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex) |
if not parser.entries: |
- message = 'Folder for builds on %s has not been found' % self.date.strftime('%Y-%m-%d') |
- raise NotFoundException(message, url) |
+ message = 'Status file for %s build cannot be found' % \ |
+ self.platform_regex |
+ raise errors.NotFoundError(message, url) |
+ |
+ # Read status file for the platform, retrieve build id, |
+ # and convert to a date |
+ headers = {'Cache-Control': 'max-age=0'} |
+ |
+ r = self.session.get(url + parser.entries[-1], headers=headers) |
+ try: |
+ r.raise_for_status() |
+ |
+ return datetime.strptime(r.text.split('\n')[0], '%Y%m%d%H%M%S') |
+ finally: |
+ r.close() |
+ |
+ def is_build_dir(self, folder_name): |
+ """Return whether or not the given dir contains a build.""" |
+ |
+ # Cannot move up to base scraper due to parser.entries call in |
+ # get_build_info_for_date (see below) |
+ |
+ url = '%s/' % urljoin(self.base_url, self.monthly_build_list_regex, folder_name) |
+ if self.application in APPLICATIONS_MULTI_LOCALE \ |
+ and self.locale != 'multi': |
+ url = '%s/' % urljoin(url, self.locale) |
+ |
+ parser = self._create_directory_parser(url) |
+ |
+ pattern = re.compile(self.binary_regex, re.IGNORECASE) |
+ for entry in parser.entries: |
+ try: |
+ pattern.match(entry).group() |
+ return True |
+ except: |
+ # No match, continue with next entry |
+ continue |
+ return False |
+ |
+ def get_build_info_for_date(self, date, build_index=None): |
+ url = urljoin(self.base_url, self.monthly_build_list_regex) |
+ has_time = date and date.time() |
+ |
+ self.logger.info('Retrieving list of builds from %s' % url) |
+ parser = self._create_directory_parser(url) |
+ regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s%(PLATFORM)s$' % { |
+ 'DATE': date.strftime('%Y-%m-%d'), |
+ 'BRANCH': self.branch, |
+ # ensure to select the correct subfolder for localized builds |
+ 'L10N': '' if self.locale in ('en-US', 'multi') else '(-l10n)?', |
+ 'PLATFORM': '' if self.application not in ( |
+ 'fennec') else '-' + self.platform |
+ } |
+ |
+ parser.entries = parser.filter(regex) |
+ parser.entries = parser.filter(self.is_build_dir) |
if has_time: |
- # If a time is included in the date, use it to determine the build's index |
+ # If a time is included in the date, use it to determine the |
+ # build's index |
regex = r'.*%s.*' % date.strftime('%H-%M-%S') |
- build_index = parser.entries.index(parser.filter(regex)[0]) |
- else: |
- # If no index has been given, set it to the last build of the day. |
- if build_index is None: |
- build_index = len(parser.entries) - 1 |
+ parser.entries = parser.filter(regex) |
- return (parser.entries, build_index) |
+ if not parser.entries: |
+ date_format = '%Y-%m-%d-%H-%M-%S' if has_time else '%Y-%m-%d' |
+ message = 'Folder for builds on %s has not been found' % \ |
+ self.date.strftime(date_format) |
+ raise errors.NotFoundError(message, url) |
+ # If no index has been given, set it to the last build of the day. |
+ self.show_matching_builds(parser.entries) |
+ # If no index has been given, set it to the last build of the day. |
+ if build_index is None: |
+ # Find the most recent non-empty entry. |
+ build_index = len(parser.entries) |
+ for build in reversed(parser.entries): |
+ build_index -= 1 |
+ if not build_index or self.is_build_dir(build): |
+ break |
+ self.logger.info('Selected build: %s' % parser.entries[build_index]) |
+ |
+ return (parser.entries, build_index) |
@property |
def binary_regex(self): |
"""Return the regex for the binary""" |
regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' |
- regex_suffix = {'linux': r'\.%(EXT)s$', |
+ regex_suffix = {'android-api-9': r'\.%(EXT)s$', |
+ 'android-api-11': r'\.%(EXT)s$', |
+ 'android-x86': r'\.%(EXT)s$', |
+ 'linux': r'\.%(EXT)s$', |
'linux64': r'\.%(EXT)s$', |
'mac': r'\.%(EXT)s$', |
'mac64': r'\.%(EXT)s$', |
- 'win32': r'(\.installer)\.%(EXT)s$', |
- 'win64': r'(\.installer)\.%(EXT)s$'} |
+ 'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$', |
+ 'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'} |
regex = regex_base_name + regex_suffix[self.platform] |
return regex % {'APP': self.application, |
'LOCALE': self.locale, |
'PLATFORM': self.platform_regex, |
- 'EXT': self.extension} |
- |
+ 'EXT': self.extension, |
+ 'STUB': '-stub' if self.is_stub_installer else ''} |
def build_filename(self, binary): |
"""Return the proposed filename with extension for the binary""" |
@@ -315,53 +528,69 @@ class DailyScraper(Scraper): |
timestamp = self.date.strftime('%Y-%m-%d') |
return '%(TIMESTAMP)s-%(BRANCH)s-%(NAME)s' % { |
- 'TIMESTAMP': timestamp, |
- 'BRANCH': self.branch, |
- 'NAME': binary} |
- |
+ 'TIMESTAMP': timestamp, |
+ 'BRANCH': self.branch, |
+ 'NAME': binary} |
@property |
def monthly_build_list_regex(self): |
- """Return the regex for the folder which contains the builds of a month.""" |
+ """Return the regex for the folder containing builds of a month.""" |
# Regex for possible builds for the given date |
return r'nightly/%(YEAR)s/%(MONTH)s/' % { |
- 'YEAR': self.date.year, |
- 'MONTH': str(self.date.month).zfill(2) } |
- |
+ 'YEAR': self.date.year, |
+ 'MONTH': str(self.date.month).zfill(2)} |
@property |
def path_regex(self): |
- """Return the regex for the path""" |
+ """Return the regex for the path to the build folder""" |
try: |
- return self.monthly_build_list_regex + self.builds[self.build_index] |
+ path = '%s/' % urljoin(self.monthly_build_list_regex, |
+ self.builds[self.build_index]) |
+ if self.application in APPLICATIONS_MULTI_LOCALE \ |
+ and self.locale != 'multi': |
+ path = '%s/' % urljoin(path, self.locale) |
+ return path |
except: |
- raise NotFoundException("Specified sub folder cannot be found", |
- self.base_url + self.monthly_build_list_regex) |
+ folder = urljoin(self.base_url, self.monthly_build_list_regex) |
+ raise errors.NotFoundError("Specified sub folder cannot be found", |
+ folder) |
class DirectScraper(Scraper): |
"""Class to download a file from a specified URL""" |
def __init__(self, url, *args, **kwargs): |
- Scraper.__init__(self, *args, **kwargs) |
+ self._url = url |
- self.url = url |
+ Scraper.__init__(self, *args, **kwargs) |
@property |
- def target(self): |
- return urllib.splitquery(self.final_url)[0].rpartition('/')[-1] |
+ def filename(self): |
+ if os.path.splitext(self.destination)[1]: |
+ # If the filename has been given make use of it |
+ target_file = self.destination |
+ else: |
+ # Otherwise determine it from the url. |
+ parsed_url = urlparse(self.url) |
+ source_filename = (parsed_url.path.rpartition('/')[-1] or |
+ parsed_url.hostname) |
+ target_file = os.path.join(self.destination, source_filename) |
+ |
+ return os.path.abspath(target_file) |
@property |
- def final_url(self): |
- return self.url |
+ def url(self): |
+ return self._url |
class ReleaseScraper(Scraper): |
"""Class to download a release build from the Mozilla server""" |
- def __init__(self, *args, **kwargs): |
+ def __init__(self, version, *args, **kwargs): |
+ self.version = version |
+ |
Scraper.__init__(self, *args, **kwargs) |
@property |
@@ -372,66 +601,78 @@ class ReleaseScraper(Scraper): |
'linux64': r'^%(APP)s-.*\.%(EXT)s$', |
'mac': r'^%(APP)s.*\.%(EXT)s$', |
'mac64': r'^%(APP)s.*\.%(EXT)s$', |
- 'win32': r'^%(APP)s.*\.%(EXT)s$', |
- 'win64': r'^%(APP)s.*\.%(EXT)s$'} |
- return regex[self.platform] % {'APP': self.application, |
- 'EXT': self.extension} |
- |
+ 'win32': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$', |
+ 'win64': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$'} |
+ return regex[self.platform] % { |
+ 'APP': self.application, |
+ 'EXT': self.extension, |
+ 'STUB': 'Stub' if self.is_stub_installer else ''} |
@property |
def path_regex(self): |
- """Return the regex for the path""" |
+ """Return the regex for the path to the build folder""" |
- regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s' |
+ regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s/' |
return regex % {'LOCALE': self.locale, |
'PLATFORM': self.platform_regex, |
'VERSION': self.version} |
+ @property |
+ def platform_regex(self): |
+ """Return the platform fragment of the URL""" |
+ |
+ if self.platform == 'win64': |
+ return self.platform |
+ |
+ return PLATFORM_FRAGMENTS[self.platform] |
def build_filename(self, binary): |
"""Return the proposed filename with extension for the binary""" |
- template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s.%(EXT)s' |
+ template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s%(STUB)s' \ |
+ '.%(EXT)s' |
return template % {'APP': self.application, |
'VERSION': self.version, |
'LOCALE': self.locale, |
'PLATFORM': self.platform, |
+ 'STUB': '-stub' if self.is_stub_installer else '', |
'EXT': self.extension} |
class ReleaseCandidateScraper(ReleaseScraper): |
"""Class to download a release candidate build from the Mozilla server""" |
- def __init__(self, build_number=None, no_unsigned=False, *args, **kwargs): |
- Scraper.__init__(self, *args, **kwargs) |
- |
- # Internally we access builds via index |
- if build_number is not None: |
- self.build_index = int(build_number) - 1 |
- else: |
- self.build_index = None |
- |
- self.builds, self.build_index = self.get_build_info_for_version(self.version, self.build_index) |
+ def __init__(self, version, build_number=None, *args, **kwargs): |
+ self.version = version |
+ self.build_number = build_number |
- self.no_unsigned = no_unsigned |
- self.unsigned = False |
+ Scraper.__init__(self, *args, **kwargs) |
+ def get_build_info(self): |
+ """Defines additional build information""" |
- def get_build_info_for_version(self, version, build_index=None): |
- url = '/'.join([self.base_url, self.candidate_build_list_regex]) |
+ # Internally we access builds via index |
+ url = urljoin(self.base_url, self.candidate_build_list_regex) |
+ self.logger.info('Retrieving list of candidate builds from %s' % url) |
- print 'Retrieving list of candidate builds from %s' % url |
- parser = DirectoryParser(url) |
+ parser = self._create_directory_parser(url) |
if not parser.entries: |
- message = 'Folder for specific candidate builds at has not been found' |
- raise NotFoundException(message, url) |
- |
- # If no index has been given, set it to the last build of the given version. |
- if build_index is None: |
- build_index = len(parser.entries) - 1 |
- |
- return (parser.entries, build_index) |
- |
+ message = 'Folder for specific candidate builds at %s has not' \ |
+ 'been found' % url |
+ raise errors.NotFoundError(message, url) |
+ |
+ self.show_matching_builds(parser.entries) |
+ self.builds = parser.entries |
+ self.build_index = len(parser.entries) - 1 |
+ |
+ if self.build_number and \ |
+ ('build%s' % self.build_number) in self.builds: |
+ self.builds = ['build%s' % self.build_number] |
+ self.build_index = 0 |
+ self.logger.info('Selected build: build%s' % self.build_number) |
+ else: |
+ self.logger.info('Selected build: build%d' % |
+ (self.build_index + 1)) |
@property |
def candidate_build_list_regex(self): |
@@ -439,51 +680,49 @@ class ReleaseCandidateScraper(ReleaseScraper): |
a candidate build.""" |
# Regex for possible builds for the given date |
- return r'nightly/%(VERSION)s-candidates/' % { |
- 'VERSION': self.version } |
- |
+ return r'candidates/%(VERSION)s-candidates/' % { |
+ 'VERSION': self.version} |
@property |
def path_regex(self): |
- """Return the regex for the path""" |
+ """Return the regex for the path to the build folder""" |
- regex = r'%(PREFIX)s%(BUILD)s/%(UNSIGNED)s%(PLATFORM)s/%(LOCALE)s' |
+ regex = r'%(PREFIX)s%(BUILD)s/%(PLATFORM)s/%(LOCALE)s/' |
return regex % {'PREFIX': self.candidate_build_list_regex, |
'BUILD': self.builds[self.build_index], |
'LOCALE': self.locale, |
- 'PLATFORM': self.platform_regex, |
- 'UNSIGNED': "unsigned/" if self.unsigned else ""} |
+ 'PLATFORM': self.platform_regex} |
+ @property |
+ def platform_regex(self): |
+ """Return the platform fragment of the URL""" |
+ |
+ if self.platform == 'win64': |
+ return self.platform |
+ |
+ return PLATFORM_FRAGMENTS[self.platform] |
def build_filename(self, binary): |
"""Return the proposed filename with extension for the binary""" |
- template = '%(APP)s-%(VERSION)s-build%(BUILD)s.%(LOCALE)s.%(PLATFORM)s.%(EXT)s' |
+ template = '%(APP)s-%(VERSION)s-%(BUILD)s.%(LOCALE)s.' \ |
+ '%(PLATFORM)s%(STUB)s.%(EXT)s' |
return template % {'APP': self.application, |
'VERSION': self.version, |
'BUILD': self.builds[self.build_index], |
'LOCALE': self.locale, |
'PLATFORM': self.platform, |
+ 'STUB': '-stub' if self.is_stub_installer else '', |
'EXT': self.extension} |
- |
def download(self): |
"""Download the specified file""" |
try: |
# Try to download the signed candidate build |
Scraper.download(self) |
- except NotFoundException, e: |
- print str(e) |
- |
- # If the signed build cannot be downloaded and unsigned builds are |
- # allowed, try to download the unsigned build instead |
- if self.no_unsigned: |
- raise |
- else: |
- print "Signed build has not been found. Falling back to unsigned build." |
- self.unsigned = True |
- Scraper.download(self) |
+ except errors.NotFoundError, e: |
+ self.logger.exception(str(e)) |
class TinderboxScraper(Scraper): |
@@ -497,86 +736,91 @@ class TinderboxScraper(Scraper): |
def __init__(self, branch='mozilla-central', build_number=None, date=None, |
debug_build=False, *args, **kwargs): |
- Scraper.__init__(self, *args, **kwargs) |
self.branch = branch |
+ self.build_number = build_number |
self.debug_build = debug_build |
- self.locale_build = self.locale != 'en-US' |
- self.timestamp = None |
+ self.date = date |
+ self.timestamp = None |
# Currently any time in RelEng is based on the Pacific time zone. |
- self.timezone = PacificTimezone(); |
+ self.timezone = PacificTimezone() |
+ |
+ Scraper.__init__(self, *args, **kwargs) |
+ |
+ def get_build_info(self): |
+ "Defines additional build information" |
# Internally we access builds via index |
- if build_number is not None: |
- self.build_index = int(build_number) - 1 |
+ if self.build_number is not None: |
+ self.build_index = int(self.build_number) - 1 |
else: |
self.build_index = None |
- if date is not None: |
+ if self.date is not None: |
try: |
- self.date = datetime.fromtimestamp(float(date), self.timezone) |
- self.timestamp = date |
+ # date is provided in the format 2013-07-23 |
+ self.date = datetime.strptime(self.date, '%Y-%m-%d') |
except: |
- self.date = datetime.strptime(date, '%Y-%m-%d') |
- else: |
- self.date = None |
+ try: |
+ # date is provided as a unix timestamp |
+ datetime.fromtimestamp(float(self.date)) |
+ self.timestamp = self.date |
+ except: |
+ raise ValueError('%s is not a valid date' % self.date) |
+ self.locale_build = self.locale != 'en-US' |
# For localized builds we do not have to retrieve the list of builds |
# because only the last build is available |
if not self.locale_build: |
- self.builds, self.build_index = self.get_build_info(self.build_index) |
- |
- try: |
- self.timestamp = self.builds[self.build_index] |
- except: |
- raise NotFoundException("Specified sub folder cannot be found", |
- self.base_url + self.monthly_build_list_regex) |
- |
+ self.builds, self.build_index = self.get_build_info_for_index( |
+ self.build_index) |
@property |
def binary_regex(self): |
"""Return the regex for the binary""" |
- regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.' |
+ regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' |
regex_suffix = {'linux': r'.*\.%(EXT)s$', |
'linux64': r'.*\.%(EXT)s$', |
'mac': r'.*\.%(EXT)s$', |
'mac64': r'.*\.%(EXT)s$', |
- 'win32': r'.*(\.installer)\.%(EXT)s$', |
- 'win64': r'.*(\.installer)\.%(EXT)s$'} |
+ 'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$', |
+ 'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'} |
regex = regex_base_name + regex_suffix[self.platform] |
return regex % {'APP': self.application, |
'LOCALE': self.locale, |
+ 'PLATFORM': PLATFORM_FRAGMENTS[self.platform], |
+ 'STUB': '-stub' if self.is_stub_installer else '', |
'EXT': self.extension} |
- |
def build_filename(self, binary): |
"""Return the proposed filename with extension for the binary""" |
return '%(TIMESTAMP)s%(BRANCH)s%(DEBUG)s-%(NAME)s' % { |
- 'TIMESTAMP': self.timestamp + '-' if self.timestamp else '', |
- 'BRANCH': self.branch, |
- 'DEBUG': '-debug' if self.debug_build else '', |
- 'NAME': binary} |
- |
+ 'TIMESTAMP': self.timestamp + '-' if self.timestamp else '', |
+ 'BRANCH': self.branch, |
+ 'DEBUG': '-debug' if self.debug_build else '', |
+ 'NAME': binary} |
@property |
def build_list_regex(self): |
"""Return the regex for the folder which contains the list of builds""" |
- regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s' |
- |
- return regex % {'BRANCH': self.branch, |
- 'PLATFORM': '' if self.locale_build else self.platform_regex, |
- 'L10N': 'l10n' if self.locale_build else '', |
- 'DEBUG': '-debug' if self.debug_build else ''} |
+ regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s/' |
+ return regex % { |
+ 'BRANCH': self.branch, |
+ 'PLATFORM': '' if self.locale_build else self.platform_regex, |
+ 'L10N': 'l10n' if self.locale_build else '', |
+ 'DEBUG': '-debug' if self.debug_build else ''} |
def date_matches(self, timestamp): |
- """Determines whether the timestamp date is equal to the argument date""" |
+ """ |
+ Determines whether the timestamp date is equal to the argument date |
+ """ |
if self.date is None: |
return False |
@@ -584,65 +828,89 @@ class TinderboxScraper(Scraper): |
timestamp = datetime.fromtimestamp(float(timestamp), self.timezone) |
if self.date.date() == timestamp.date(): |
return True |
- |
- return False |
- |
- |
- @property |
- def date_validation_regex(self): |
- """Return the regex for a valid date argument value""" |
- |
- return r'^\d{4}-\d{1,2}-\d{1,2}$|^\d+$' |
+ return False |
def detect_platform(self): |
"""Detect the current platform""" |
platform = Scraper.detect_platform(self) |
- # On OS X we have to special case the platform detection code and fallback |
- # to 64 bit builds for the en-US locale |
- if mozinfo.os == 'mac' and self.locale == 'en-US' and mozinfo.bits == 64: |
+ # On OS X we have to special case the platform detection code and |
+ # fallback to 64 bit builds for the en-US locale |
+ if mozinfo.os == 'mac' and self.locale == 'en-US' and \ |
+ mozinfo.bits == 64: |
platform = "%s%d" % (mozinfo.os, mozinfo.bits) |
return platform |
+ def is_build_dir(self, folder_name): |
+ """Return whether or not the given dir contains a build.""" |
- def get_build_info(self, build_index=None): |
- url = '/'.join([self.base_url, self.build_list_regex]) |
+ # Cannot move up to base scraper due to parser.entries call in |
+ # get_build_info_for_index (see below) |
+ url = '%s/' % urljoin(self.base_url, self.build_list_regex, folder_name) |
- print 'Retrieving list of builds from %s' % url |
+ if self.application in APPLICATIONS_MULTI_LOCALE \ |
+ and self.locale != 'multi': |
+ url = '%s/' % urljoin(url, self.locale) |
- # If a timestamp is given, retrieve just that build |
- regex = '^' + self.timestamp + '$' if self.timestamp else r'^\d+$' |
+ parser = self._create_directory_parser(url) |
- parser = DirectoryParser(url) |
- parser.entries = parser.filter(regex) |
+ pattern = re.compile(self.binary_regex, re.IGNORECASE) |
+ for entry in parser.entries: |
+ try: |
+ pattern.match(entry).group() |
+ return True |
+ except: |
+ # No match, continue with next entry |
+ continue |
+ return False |
- # If date is given, retrieve the subset of builds on that date |
- if self.date is not None: |
+ def get_build_info_for_index(self, build_index=None): |
+ url = urljoin(self.base_url, self.build_list_regex) |
+ |
+ self.logger.info('Retrieving list of builds from %s' % url) |
+ parser = self._create_directory_parser(url) |
+ parser.entries = parser.filter(r'^\d+$') |
+ |
+ if self.timestamp: |
+ # If a timestamp is given, retrieve the folder with the timestamp |
+ # as name |
+ parser.entries = self.timestamp in parser.entries and \ |
+ [self.timestamp] |
+ |
+ elif self.date: |
+ # If date is given, retrieve the subset of builds on that date |
parser.entries = filter(self.date_matches, parser.entries) |
if not parser.entries: |
message = 'No builds have been found' |
- raise NotFoundException(message, url) |
+ raise errors.NotFoundError(message, url) |
+ |
+ self.show_matching_builds(parser.entries) |
# If no index has been given, set it to the last build of the day. |
if build_index is None: |
- build_index = len(parser.entries) - 1 |
+ # Find the most recent non-empty entry. |
+ build_index = len(parser.entries) |
+ for build in reversed(parser.entries): |
+ build_index -= 1 |
+ if not build_index or self.is_build_dir(build): |
+ break |
- return (parser.entries, build_index) |
+ self.logger.info('Selected build: %s' % parser.entries[build_index]) |
+ return (parser.entries, build_index) |
@property |
def path_regex(self): |
- """Return the regex for the path""" |
+ """Return the regex for the path to the build folder""" |
if self.locale_build: |
return self.build_list_regex |
- return '/'.join([self.build_list_regex, self.builds[self.build_index]]) |
- |
+ return '%s/' % urljoin(self.build_list_regex, self.builds[self.build_index]) |
@property |
def platform_regex(self): |
@@ -650,7 +918,7 @@ class TinderboxScraper(Scraper): |
PLATFORM_FRAGMENTS = {'linux': 'linux', |
'linux64': 'linux64', |
- 'mac': 'macosx', |
+ 'mac': 'macosx64', |
'mac64': 'macosx64', |
'win32': 'win32', |
'win64': 'win64'} |
@@ -658,178 +926,104 @@ class TinderboxScraper(Scraper): |
return PLATFORM_FRAGMENTS[self.platform] |
-def cli(): |
- """Main function for the downloader""" |
- |
- BUILD_TYPES = {'release': ReleaseScraper, |
- 'candidate': ReleaseCandidateScraper, |
- 'daily': DailyScraper, |
- 'tinderbox': TinderboxScraper } |
- |
- usage = 'usage: %prog [options]' |
- parser = OptionParser(usage=usage, description=__doc__) |
- parser.add_option('--application', '-a', |
- dest='application', |
- choices=APPLICATIONS, |
- default='firefox', |
- metavar='APPLICATION', |
- help='The name of the application to download, ' |
- 'default: "%default"') |
- parser.add_option('--directory', '-d', |
- dest='directory', |
- default=os.getcwd(), |
- metavar='DIRECTORY', |
- help='Target directory for the download, default: ' |
- 'current working directory') |
- parser.add_option('--build-number', |
- dest='build_number', |
- default=None, |
- type="int", |
- metavar='BUILD_NUMBER', |
- help='Number of the build (for candidate, daily, ' |
- 'and tinderbox builds)') |
- parser.add_option('--locale', '-l', |
- dest='locale', |
- default='en-US', |
- metavar='LOCALE', |
- help='Locale of the application, default: "%default"') |
- parser.add_option('--platform', '-p', |
- dest='platform', |
- choices=PLATFORM_FRAGMENTS.keys(), |
- metavar='PLATFORM', |
- help='Platform of the application') |
- parser.add_option('--type', '-t', |
- dest='type', |
- choices=BUILD_TYPES.keys(), |
- default='release', |
- metavar='BUILD_TYPE', |
- help='Type of build to download, default: "%default"') |
- parser.add_option('--url', |
- dest='url', |
- default=None, |
- metavar='URL', |
- help='URL to download.') |
- parser.add_option('--version', '-v', |
- dest='version', |
- metavar='VERSION', |
- help='Version of the application to be used by release and\ |
- candidate builds, i.e. "3.6"') |
- parser.add_option('--extension', |
- dest='extension', |
- default=None, |
- metavar='EXTENSION', |
- help='File extension of the build (e.g. "zip"), default:\ |
- the standard build extension on the platform.') |
- parser.add_option('--username', |
- dest='username', |
- default=None, |
- metavar='USERNAME', |
- help='Username for basic HTTP authentication.') |
- parser.add_option('--password', |
- dest='password', |
- default=None, |
- metavar='PASSWORD', |
- help='Password for basic HTTP authentication.') |
- parser.add_option('--retry-attempts', |
- dest='retry_attempts', |
- default=3, |
- type=int, |
- metavar='RETRY_ATTEMPTS', |
- help='Number of times the download will be attempted in ' |
- 'the event of a failure, default: %default') |
- parser.add_option('--retry-delay', |
- dest='retry_delay', |
- default=10, |
- type=int, |
- metavar='RETRY_DELAY', |
- help='Amount of time (in seconds) to wait between retry ' |
- 'attempts, default: %default') |
- |
- # Option group for candidate builds |
- group = OptionGroup(parser, "Candidate builds", |
- "Extra options for candidate builds.") |
- group.add_option('--no-unsigned', |
- dest='no_unsigned', |
- action="store_true", |
- help="Don't allow to download unsigned builds if signed\ |
- builds are not available") |
- parser.add_option_group(group) |
- |
- # Option group for daily builds |
- group = OptionGroup(parser, "Daily builds", |
- "Extra options for daily builds.") |
- group.add_option('--branch', |
- dest='branch', |
- default='mozilla-central', |
- metavar='BRANCH', |
- help='Name of the branch, default: "%default"') |
- group.add_option('--build-id', |
- dest='build_id', |
- default=None, |
- metavar='BUILD_ID', |
- help='ID of the build to download') |
- group.add_option('--date', |
- dest='date', |
- default=None, |
- metavar='DATE', |
- help='Date of the build, default: latest build') |
- parser.add_option_group(group) |
- |
- # Option group for tinderbox builds |
- group = OptionGroup(parser, "Tinderbox builds", |
- "Extra options for tinderbox builds.") |
- group.add_option('--debug-build', |
- dest='debug_build', |
- action="store_true", |
- help="Download a debug build") |
- parser.add_option_group(group) |
- |
- # TODO: option group for nightly builds |
- (options, args) = parser.parse_args() |
- |
- # Check for required options and arguments |
- # Note: Will be optional when ini file support has been landed |
- if not options.url \ |
- and not options.type in ['daily', 'tinderbox'] \ |
- and not options.version: |
- parser.error('The version of the application to download has not been specified.') |
- |
- # Instantiate scraper and download the build |
- scraper_keywords = {'application': options.application, |
- 'locale': options.locale, |
- 'platform': options.platform, |
- 'version': options.version, |
- 'directory': options.directory, |
- 'extension': options.extension, |
- 'authentication': { |
- 'username': options.username, |
- 'password': options.password}, |
- 'retry_attempts': options.retry_attempts, |
- 'retry_delay': options.retry_delay} |
- scraper_options = {'candidate': { |
- 'build_number': options.build_number, |
- 'no_unsigned': options.no_unsigned}, |
- 'daily': { |
- 'branch': options.branch, |
- 'build_number': options.build_number, |
- 'build_id': options.build_id, |
- 'date': options.date}, |
- 'tinderbox': { |
- 'branch': options.branch, |
- 'build_number': options.build_number, |
- 'date': options.date, |
- 'debug_build': options.debug_build} |
- } |
- |
- kwargs = scraper_keywords.copy() |
- kwargs.update(scraper_options.get(options.type, {})) |
- |
- if options.url: |
- build = DirectScraper(options.url, **kwargs) |
- else: |
- build = BUILD_TYPES[options.type](**kwargs) |
- |
- build.download() |
- |
-if __name__ == "__main__": |
- cli() |
+class TryScraper(Scraper): |
+ "Class to download a try build from the Mozilla server." |
+ |
+ def __init__(self, changeset=None, debug_build=False, *args, **kwargs): |
+ |
+ self.debug_build = debug_build |
+ self.changeset = changeset |
+ |
+ Scraper.__init__(self, *args, **kwargs) |
+ |
+ def get_build_info(self): |
+ "Defines additional build information" |
+ |
+ self.builds, self.build_index = self.get_build_info_for_index() |
+ |
+ @property |
+ def binary_regex(self): |
+ """Return the regex for the binary""" |
+ |
+ regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' |
+ regex_suffix = {'linux': r'.*\.%(EXT)s$', |
+ 'linux64': r'.*\.%(EXT)s$', |
+ 'mac': r'.*\.%(EXT)s$', |
+ 'mac64': r'.*\.%(EXT)s$', |
+ 'win32': r'.*(\.installer%(STUB)s)\.%(EXT)s$', |
+ 'win64': r'.*(\.installer%(STUB)s)\.%(EXT)s$'} |
+ |
+ regex = regex_base_name + regex_suffix[self.platform] |
+ |
+ return regex % {'APP': self.application, |
+ 'LOCALE': self.locale, |
+ 'PLATFORM': PLATFORM_FRAGMENTS[self.platform], |
+ 'STUB': '-stub' if self.is_stub_installer else '', |
+ 'EXT': self.extension} |
+ |
+ def build_filename(self, binary): |
+ """Return the proposed filename with extension for the binary""" |
+ |
+ return '%(CHANGESET)s%(DEBUG)s-%(NAME)s' % { |
+ 'CHANGESET': self.changeset, |
+ 'DEBUG': '-debug' if self.debug_build else '', |
+ 'NAME': binary} |
+ |
+ @property |
+ def build_list_regex(self): |
+ """Return the regex for the folder which contains the list of builds""" |
+ |
+ return 'try-builds/' |
+ |
+ def detect_platform(self): |
+ """Detect the current platform""" |
+ |
+ platform = Scraper.detect_platform(self) |
+ |
+ # On OS X we have to special case the platform detection code and |
+ # fallback to 64 bit builds for the en-US locale |
+ if mozinfo.os == 'mac' and self.locale == 'en-US' and \ |
+ mozinfo.bits == 64: |
+ platform = "%s%d" % (mozinfo.os, mozinfo.bits) |
+ |
+ return platform |
+ |
+ def get_build_info_for_index(self, build_index=None): |
+ url = urljoin(self.base_url, self.build_list_regex) |
+ |
+ self.logger.info('Retrieving list of builds from %s' % url) |
+ parser = self._create_directory_parser(url) |
+ parser.entries = parser.filter('.*-%s$' % self.changeset) |
+ |
+ if not parser.entries: |
+ raise errors.NotFoundError('No builds have been found', url) |
+ |
+ self.show_matching_builds(parser.entries) |
+ |
+ self.logger.info('Selected build: %s' % parser.entries[0]) |
+ |
+ return (parser.entries, 0) |
+ |
+ @property |
+ def path_regex(self): |
+ """Return the regex for the path to the build folder""" |
+ |
+ build_dir = 'try-%(PLATFORM)s%(DEBUG)s/' % { |
+ 'PLATFORM': self.platform_regex, |
+ 'DEBUG': '-debug' if self.debug_build else ''} |
+ return urljoin(self.build_list_regex, |
+ self.builds[self.build_index], |
+ build_dir) |
+ |
+ @property |
+ def platform_regex(self): |
+ """Return the platform fragment of the URL""" |
+ |
+ PLATFORM_FRAGMENTS = {'linux': 'linux', |
+ 'linux64': 'linux64', |
+ 'mac': 'macosx64', |
+ 'mac64': 'macosx64', |
+ 'win32': 'win32', |
+ 'win64': 'win64'} |
+ |
+ return PLATFORM_FRAGMENTS[self.platform] |