OLD | NEW |
1 #!/usr/bin/env python | |
2 | |
3 # This Source Code Form is subject to the terms of the Mozilla Public | 1 # This Source Code Form is subject to the terms of the Mozilla Public |
4 # License, v. 2.0. If a copy of the MPL was not distributed with this | 2 # License, v. 2.0. If a copy of the MPL was not distributed with this |
5 # file, You can obtain one at http://mozilla.org/MPL/2.0/. | 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
6 | 4 |
7 """Module to handle downloads for different types of Firefox and Thunderbird bui
lds.""" | |
8 | |
9 | |
10 from datetime import datetime | 5 from datetime import datetime |
11 from optparse import OptionParser, OptionGroup | 6 import logging |
12 import os | 7 import os |
13 import re | 8 import re |
| 9 import requests |
14 import sys | 10 import sys |
15 import time | 11 import time |
16 import urllib | 12 import urllib |
17 import urllib2 | 13 from urlparse import urlparse |
18 | 14 |
19 import mozinfo | 15 import mozinfo |
20 | 16 |
| 17 import errors |
| 18 |
21 from parser import DirectoryParser | 19 from parser import DirectoryParser |
22 from timezones import PacificTimezone | 20 from timezones import PacificTimezone |
23 | 21 from utils import urljoin |
24 | 22 |
25 APPLICATIONS = ['b2g', 'firefox', 'thunderbird'] | 23 |
| 24 APPLICATIONS = ('b2g', 'firefox', 'fennec', 'thunderbird') |
| 25 |
| 26 # Some applications contain all locales in a single build |
| 27 APPLICATIONS_MULTI_LOCALE = ('b2g', 'fennec') |
| 28 |
| 29 # Used if the application is named differently than the subfolder on the server |
| 30 APPLICATIONS_TO_FTP_DIRECTORY = {'fennec': 'mobile'} |
26 | 31 |
27 # Base URL for the path to all builds | 32 # Base URL for the path to all builds |
28 BASE_URL = 'https://ftp.mozilla.org/pub/mozilla.org' | 33 BASE_URL = 'https://archive.mozilla.org/pub/' |
29 | 34 |
30 PLATFORM_FRAGMENTS = {'linux': 'linux-i686', | 35 # Chunk size when downloading a file |
31 'linux64': 'linux-x86_64', | 36 CHUNK_SIZE = 16 * 1024 |
32 'mac': 'mac', | 37 |
33 'mac64': 'mac64', | 38 DEFAULT_FILE_EXTENSIONS = {'android-api-9': 'apk', |
34 'win32': 'win32', | 39 'android-api-11': 'apk', |
35 'win64': 'win64-x86_64'} | 40 'android-x86': 'apk', |
36 | 41 'linux': 'tar.bz2', |
37 DEFAULT_FILE_EXTENSIONS = {'linux': 'tar.bz2', | |
38 'linux64': 'tar.bz2', | 42 'linux64': 'tar.bz2', |
39 'mac': 'dmg', | 43 'mac': 'dmg', |
40 'mac64': 'dmg', | 44 'mac64': 'dmg', |
41 'win32': 'exe', | 45 'win32': 'exe', |
42 'win64': 'exe'} | 46 'win64': 'exe'} |
43 | 47 |
44 class NotFoundException(Exception): | 48 PLATFORM_FRAGMENTS = {'android-api-9': r'android-arm', |
45 """Exception for a resource not being found (e.g. no logs)""" | 49 'android-api-11': r'android-arm', |
46 def __init__(self, message, location): | 50 'android-x86': r'android-i386', |
47 self.location = location | 51 'linux': r'linux-i686', |
48 Exception.__init__(self, ': '.join([message, location])) | 52 'linux64': r'linux-x86_64', |
| 53 'mac': r'mac', |
| 54 'mac64': r'mac(64)?', |
| 55 'win32': r'win32', |
| 56 'win64': r'win64(-x86_64)?'} |
49 | 57 |
50 | 58 |
51 class Scraper(object): | 59 class Scraper(object): |
52 """Generic class to download an application from the Mozilla server""" | 60 """Generic class to download an application from the Mozilla server""" |
53 | 61 |
54 def __init__(self, directory, version, platform=None, | 62 def __init__(self, destination=None, platform=None, |
55 application='firefox', locale='en-US', extension=None, | 63 application='firefox', locale=None, extension=None, |
56 authentication=None, retry_attempts=3, retry_delay=10): | 64 username=None, password=None, |
| 65 retry_attempts=0, retry_delay=10., |
| 66 is_stub_installer=False, timeout=None, |
| 67 log_level='INFO', |
| 68 base_url=BASE_URL): |
57 | 69 |
58 # Private properties for caching | 70 # Private properties for caching |
59 self._target = None | 71 self._filename = None |
60 self._binary = None | 72 self._binary = None |
61 | 73 |
62 self.directory = directory | 74 self.destination = destination or os.getcwd() |
63 self.locale = locale | 75 |
| 76 if not locale: |
| 77 if application in APPLICATIONS_MULTI_LOCALE: |
| 78 self.locale = 'multi' |
| 79 else: |
| 80 self.locale = 'en-US' |
| 81 else: |
| 82 self.locale = locale |
| 83 |
64 self.platform = platform or self.detect_platform() | 84 self.platform = platform or self.detect_platform() |
65 self.version = version | 85 |
66 self.extension = extension or DEFAULT_FILE_EXTENSIONS[self.platform] | 86 self.session = requests.Session() |
67 self.authentication = authentication | 87 if (username, password) != (None, None): |
| 88 self.session.auth = (username, password) |
| 89 |
68 self.retry_attempts = retry_attempts | 90 self.retry_attempts = retry_attempts |
69 self.retry_delay = retry_delay | 91 self.retry_delay = retry_delay |
| 92 self.is_stub_installer = is_stub_installer |
| 93 self.timeout_download = timeout |
| 94 # this is the timeout used in requests.get. Unlike "auth", |
| 95 # it does not work if we attach it on the session, so we handle |
| 96 # it independently. |
| 97 self.timeout_network = 60. |
| 98 |
| 99 logging.basicConfig(format=' %(levelname)s | %(message)s') |
| 100 self.logger = logging.getLogger(self.__module__) |
| 101 self.logger.setLevel(log_level) |
70 | 102 |
71 # build the base URL | 103 # build the base URL |
72 self.application = application | 104 self.application = application |
73 self.base_url = '/'.join([BASE_URL, self.application]) | 105 self.base_url = '%s/' % urljoin( |
74 | 106 base_url, |
| 107 APPLICATIONS_TO_FTP_DIRECTORY.get(self.application, self.application
) |
| 108 ) |
| 109 |
| 110 if extension: |
| 111 self.extension = extension |
| 112 else: |
| 113 if self.application in APPLICATIONS_MULTI_LOCALE and \ |
| 114 self.platform in ('win32', 'win64'): |
| 115 # builds for APPLICATIONS_MULTI_LOCALE only exist in zip |
| 116 self.extension = 'zip' |
| 117 else: |
| 118 self.extension = DEFAULT_FILE_EXTENSIONS[self.platform] |
| 119 |
| 120 attempt = 0 |
| 121 while True: |
| 122 attempt += 1 |
| 123 try: |
| 124 self.get_build_info() |
| 125 break |
| 126 except (errors.NotFoundError, requests.exceptions.RequestException),
e: |
| 127 if self.retry_attempts > 0: |
| 128 # Log only if multiple attempts are requested |
| 129 self.logger.warning("Build not found: '%s'" % e.message) |
| 130 self.logger.info('Will retry in %s seconds...' % |
| 131 (self.retry_delay)) |
| 132 time.sleep(self.retry_delay) |
| 133 self.logger.info("Retrying... (attempt %s)" % attempt) |
| 134 |
| 135 if attempt >= self.retry_attempts: |
| 136 if hasattr(e, 'response') and \ |
| 137 e.response.status_code == 404: |
| 138 message = "Specified build has not been found" |
| 139 raise errors.NotFoundError(message, e.response.url) |
| 140 else: |
| 141 raise |
| 142 |
| 143 def _create_directory_parser(self, url): |
| 144 return DirectoryParser(url, |
| 145 session=self.session, |
| 146 timeout=self.timeout_network) |
75 | 147 |
76 @property | 148 @property |
77 def binary(self): | 149 def binary(self): |
78 """Return the name of the build""" | 150 """Return the name of the build""" |
79 | 151 |
80 if self._binary is None: | 152 attempt = 0 |
81 # Retrieve all entries from the remote virtual folder | 153 |
82 parser = DirectoryParser(self.path) | 154 while self._binary is None: |
83 if not parser.entries: | 155 attempt += 1 |
84 raise NotFoundException('No entries found', self.path) | 156 try: |
85 | 157 # Retrieve all entries from the remote virtual folder |
86 # Download the first matched directory entry | 158 parser = self._create_directory_parser(self.path) |
87 pattern = re.compile(self.binary_regex, re.IGNORECASE) | 159 if not parser.entries: |
88 for entry in parser.entries: | 160 raise errors.NotFoundError('No entries found', self.path) |
89 try: | 161 |
90 self._binary = pattern.match(entry).group() | 162 # Download the first matched directory entry |
91 break | 163 pattern = re.compile(self.binary_regex, re.IGNORECASE) |
92 except: | 164 for entry in parser.entries: |
93 # No match, continue with next entry | 165 try: |
94 continue | 166 self._binary = pattern.match(entry).group() |
95 | 167 break |
96 if self._binary is None: | 168 except: |
97 raise NotFoundException("Binary not found in folder", self.path) | 169 # No match, continue with next entry |
98 else: | 170 continue |
99 return self._binary | 171 else: |
100 | 172 raise errors.NotFoundError("Binary not found in folder", |
| 173 self.path) |
| 174 except (errors.NotFoundError, requests.exceptions.RequestException),
e: |
| 175 if self.retry_attempts > 0: |
| 176 # Log only if multiple attempts are requested |
| 177 self.logger.warning("Build not found: '%s'" % e.message) |
| 178 self.logger.info('Will retry in %s seconds...' % |
| 179 (self.retry_delay)) |
| 180 time.sleep(self.retry_delay) |
| 181 self.logger.info("Retrying... (attempt %s)" % attempt) |
| 182 |
| 183 if attempt >= self.retry_attempts: |
| 184 if hasattr(e, 'response') and \ |
| 185 e.response.status_code == 404: |
| 186 message = "Specified build has not been found" |
| 187 raise errors.NotFoundError(message, self.path) |
| 188 else: |
| 189 raise |
| 190 |
| 191 return self._binary |
101 | 192 |
102 @property | 193 @property |
103 def binary_regex(self): | 194 def binary_regex(self): |
104 """Return the regex for the binary filename""" | 195 """Return the regex for the binary filename""" |
105 | 196 |
106 raise NotImplementedError(sys._getframe(0).f_code.co_name) | 197 raise errors.NotImplementedError(sys._getframe(0).f_code.co_name) |
107 | 198 |
108 | 199 @property |
109 @property | 200 def url(self): |
110 def final_url(self): | 201 """Return the URL of the build""" |
111 """Return the final URL of the build""" | 202 |
112 | 203 return urljoin(self.path, self.binary) |
113 return '/'.join([self.path, self.binary]) | |
114 | |
115 | 204 |
116 @property | 205 @property |
117 def path(self): | 206 def path(self): |
118 """Return the path to the build""" | 207 """Return the path to the build folder""" |
119 | 208 |
120 return '/'.join([self.base_url, self.path_regex]) | 209 return urljoin(self.base_url, self.path_regex) |
121 | |
122 | 210 |
123 @property | 211 @property |
124 def path_regex(self): | 212 def path_regex(self): |
125 """Return the regex for the path to the build""" | 213 """Return the regex for the path to the build folder""" |
126 | 214 |
127 raise NotImplementedError(sys._getframe(0).f_code.co_name) | 215 raise errors.NotImplementedError(sys._getframe(0).f_code.co_name) |
128 | |
129 | 216 |
130 @property | 217 @property |
131 def platform_regex(self): | 218 def platform_regex(self): |
132 """Return the platform fragment of the URL""" | 219 """Return the platform fragment of the URL""" |
133 | 220 |
134 return PLATFORM_FRAGMENTS[self.platform]; | 221 return PLATFORM_FRAGMENTS[self.platform] |
135 | 222 |
136 | 223 @property |
137 @property | 224 def filename(self): |
138 def target(self): | 225 """Return the local filename of the build""" |
139 """Return the target file name of the build""" | 226 |
140 | 227 if self._filename is None: |
141 if self._target is None: | 228 if os.path.splitext(self.destination)[1]: |
142 self._target = os.path.join(self.directory, | 229 # If the filename has been given make use of it |
143 self.build_filename(self.binary)) | 230 target_file = self.destination |
144 return self._target | 231 else: |
145 | 232 # Otherwise create it from the build details |
| 233 target_file = os.path.join(self.destination, |
| 234 self.build_filename(self.binary)) |
| 235 |
| 236 self._filename = os.path.abspath(target_file) |
| 237 |
| 238 return self._filename |
| 239 |
| 240 def get_build_info(self): |
| 241 """Returns additional build information in subclasses if necessary""" |
| 242 pass |
146 | 243 |
147 def build_filename(self, binary): | 244 def build_filename(self, binary): |
148 """Return the proposed filename with extension for the binary""" | 245 """Return the proposed filename with extension for the binary""" |
149 | 246 |
150 raise NotImplementedError(sys._getframe(0).f_code.co_name) | 247 raise errors.NotImplementedError(sys._getframe(0).f_code.co_name) |
151 | |
152 | 248 |
153 def detect_platform(self): | 249 def detect_platform(self): |
154 """Detect the current platform""" | 250 """Detect the current platform""" |
155 | 251 |
156 # For Mac and Linux 32bit we do not need the bits appended | 252 # For Mac and Linux 32bit we do not need the bits appended |
157 if mozinfo.os == 'mac' or (mozinfo.os == 'linux' and mozinfo.bits == 32)
: | 253 if mozinfo.os == 'mac' or \ |
| 254 (mozinfo.os == 'linux' and mozinfo.bits == 32): |
158 return mozinfo.os | 255 return mozinfo.os |
159 else: | 256 else: |
160 return "%s%d" % (mozinfo.os, mozinfo.bits) | 257 return "%s%d" % (mozinfo.os, mozinfo.bits) |
161 | 258 |
162 | |
163 def download(self): | 259 def download(self): |
164 """Download the specified file""" | 260 """Download the specified file""" |
165 | 261 |
166 attempts = 0 | 262 def total_seconds(td): |
167 | 263 # Keep backward compatibility with Python 2.6 which doesn't have |
168 if not os.path.isdir(self.directory): | 264 # this method |
169 os.makedirs(self.directory) | 265 if hasattr(td, 'total_seconds'): |
| 266 return td.total_seconds() |
| 267 else: |
| 268 return (td.microseconds + |
| 269 (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 10 ** 6 |
| 270 |
| 271 attempt = 0 |
170 | 272 |
171 # Don't re-download the file | 273 # Don't re-download the file |
172 if os.path.isfile(os.path.abspath(self.target)): | 274 if os.path.isfile(os.path.abspath(self.filename)): |
173 print "File has already been downloaded: %s" % (self.target) | 275 self.logger.info("File has already been downloaded: %s" % |
174 return | 276 (self.filename)) |
175 | 277 return self.filename |
176 print 'Downloading from: %s' % (urllib.unquote(self.final_url)) | 278 |
177 tmp_file = self.target + ".part" | 279 directory = os.path.dirname(self.filename) |
178 | 280 if not os.path.isdir(directory): |
179 if self.authentication \ | 281 os.makedirs(directory) |
180 and self.authentication['username'] \ | 282 |
181 and self.authentication['password']: | 283 self.logger.info('Downloading from: %s' % |
182 password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() | 284 (urllib.unquote(self.url))) |
183 password_mgr.add_password(None, | 285 self.logger.info('Saving as: %s' % self.filename) |
184 self.final_url, | 286 |
185 self.authentication['username'], | 287 tmp_file = self.filename + ".part" |
186 self.authentication['password']) | |
187 handler = urllib2.HTTPBasicAuthHandler(password_mgr) | |
188 opener = urllib2.build_opener(urllib2.HTTPHandler, handler) | |
189 urllib2.install_opener(opener) | |
190 | 288 |
191 while True: | 289 while True: |
192 attempts += 1 | 290 attempt += 1 |
193 try: | 291 try: |
194 r = urllib2.urlopen(self.final_url) | 292 start_time = datetime.now() |
195 CHUNK = 16 * 1024 | 293 |
| 294 # Enable streaming mode so we can download content in chunks |
| 295 r = self.session.get(self.url, stream=True) |
| 296 r.raise_for_status() |
| 297 |
| 298 content_length = r.headers.get('Content-length') |
| 299 # ValueError: Value out of range if only total_size given |
| 300 if content_length: |
| 301 total_size = int(content_length.strip()) |
| 302 max_value = ((total_size / CHUNK_SIZE) + 1) * CHUNK_SIZE |
| 303 |
| 304 bytes_downloaded = 0 |
| 305 |
196 with open(tmp_file, 'wb') as f: | 306 with open(tmp_file, 'wb') as f: |
197 for chunk in iter(lambda: r.read(CHUNK), ''): | 307 for chunk in iter(lambda: r.raw.read(CHUNK_SIZE), ''): |
198 f.write(chunk) | 308 f.write(chunk) |
| 309 bytes_downloaded += CHUNK_SIZE |
| 310 |
| 311 t1 = total_seconds(datetime.now() - start_time) |
| 312 if self.timeout_download and \ |
| 313 t1 >= self.timeout_download: |
| 314 raise errors.TimeoutError |
199 break | 315 break |
200 except (urllib2.HTTPError, urllib2.URLError): | 316 except (requests.exceptions.RequestException, errors.TimeoutError),
e: |
201 if tmp_file and os.path.isfile(tmp_file): | 317 if tmp_file and os.path.isfile(tmp_file): |
202 os.remove(tmp_file) | 318 os.remove(tmp_file) |
203 print 'Download failed! Retrying... (attempt %s)' % attempts | 319 if self.retry_attempts > 0: |
204 if attempts >= self.retry_attempts: | 320 # Log only if multiple attempts are requested |
| 321 self.logger.warning('Download failed: "%s"' % str(e)) |
| 322 self.logger.info('Will retry in %s seconds...' % |
| 323 (self.retry_delay)) |
| 324 time.sleep(self.retry_delay) |
| 325 self.logger.info("Retrying... (attempt %s)" % attempt) |
| 326 if attempt >= self.retry_attempts: |
205 raise | 327 raise |
206 time.sleep(self.retry_delay) | 328 time.sleep(self.retry_delay) |
207 | 329 |
208 os.rename(tmp_file, self.target) | 330 os.rename(tmp_file, self.filename) |
| 331 |
| 332 return self.filename |
| 333 |
| 334 def show_matching_builds(self, builds): |
| 335 """Output the matching builds""" |
| 336 self.logger.info('Found %s build%s: %s' % ( |
| 337 len(builds), |
| 338 len(builds) > 1 and 's' or '', |
| 339 len(builds) > 10 and |
| 340 ' ... '.join([', '.join(builds[:5]), ', '.join(builds[-5:])]) or |
| 341 ', '.join(builds))) |
209 | 342 |
210 | 343 |
211 class DailyScraper(Scraper): | 344 class DailyScraper(Scraper): |
212 """Class to download a daily build from the Mozilla server""" | 345 """Class to download a daily build from the Mozilla server""" |
213 | 346 |
214 def __init__(self, branch='mozilla-central', build_id=None, date=None, | 347 def __init__(self, branch='mozilla-central', build_id=None, date=None, |
215 build_number=None, *args, **kwargs): | 348 build_number=None, *args, **kwargs): |
216 | 349 |
| 350 self.branch = branch |
| 351 self.build_id = build_id |
| 352 self.date = date |
| 353 self.build_number = build_number |
| 354 |
217 Scraper.__init__(self, *args, **kwargs) | 355 Scraper.__init__(self, *args, **kwargs) |
218 self.branch = branch | 356 |
| 357 def get_build_info(self): |
| 358 """Defines additional build information""" |
219 | 359 |
220 # Internally we access builds via index | 360 # Internally we access builds via index |
221 if build_number is not None: | 361 if self.build_number is not None: |
222 self.build_index = int(build_number) - 1 | 362 self.build_index = int(self.build_number) - 1 |
223 else: | 363 else: |
224 self.build_index = None | 364 self.build_index = None |
225 | 365 |
226 if build_id: | 366 if self.build_id: |
227 # A build id has been specified. Split up its components so the date | 367 # A build id has been specified. Split up its components so the |
228 # and time can be extracted: '20111212042025' -> '2011-12-12 04:20:2
5' | 368 # date and time can be extracted: |
229 self.date = datetime.strptime(build_id, '%Y%m%d%H%M%S') | 369 # '20111212042025' -> '2011-12-12 04:20:25' |
230 self.builds, self.build_index = self.get_build_info_for_date(self.da
te, | 370 self.date = datetime.strptime(self.build_id, '%Y%m%d%H%M%S') |
231 has_tim
e=True) | |
232 | 371 |
233 elif date: | 372 elif self.date: |
234 # A date (without time) has been specified. Use its value and the | 373 # A date (without time) has been specified. Use its value and the |
235 # build index to find the requested build for that day. | 374 # build index to find the requested build for that day. |
236 self.date = datetime.strptime(date, '%Y-%m-%d') | 375 try: |
237 self.builds, self.build_index = self.get_build_info_for_date(self.da
te, | 376 self.date = datetime.strptime(self.date, '%Y-%m-%d') |
238 build_i
ndex=self.build_index) | 377 except: |
239 | 378 raise ValueError('%s is not a valid date' % self.date) |
240 else: | 379 else: |
241 # If no build id nor date have been specified the lastest available | 380 # If no build id nor date have been specified the latest available |
242 # build of the given branch has to be identified. We also have to | 381 # build of the given branch has to be identified. We also have to |
243 # retrieve the date of the build via its build id. | 382 # retrieve the date of the build via its build id. |
244 url = '%s/nightly/latest-%s/' % (self.base_url, self.branch) | 383 self.date = self.get_latest_build_date() |
245 | 384 |
246 print 'Retrieving the build status file from %s' % url | 385 self.builds, self.build_index = self.get_build_info_for_date( |
247 parser = DirectoryParser(url) | 386 self.date, self.build_index) |
248 parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex) | |
249 if not parser.entries: | |
250 message = 'Status file for %s build cannot be found' % self.plat
form_regex | |
251 raise NotFoundException(message, url) | |
252 | 387 |
253 # Read status file for the platform, retrieve build id, and convert
to a date | 388 def get_latest_build_date(self): |
254 status_file = url + parser.entries[-1] | 389 """ Returns date of latest available nightly build.""" |
255 f = urllib.urlopen(status_file) | 390 if self.application not in ('fennec'): |
256 self.date = datetime.strptime(f.readline().strip(), '%Y%m%d%H%M%S') | 391 url = urljoin(self.base_url, 'nightly', 'latest-%s/' % self.branch) |
257 self.builds, self.build_index = self.get_build_info_for_date(self.da
te, | 392 else: |
258 has_tim
e=True) | 393 url = urljoin(self.base_url, 'nightly', 'latest-%s-%s/' % |
| 394 (self.branch, self.platform)) |
259 | 395 |
| 396 self.logger.info('Retrieving the build status file from %s' % url) |
| 397 parser = self._create_directory_parser(url) |
| 398 parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex) |
| 399 if not parser.entries: |
| 400 message = 'Status file for %s build cannot be found' % \ |
| 401 self.platform_regex |
| 402 raise errors.NotFoundError(message, url) |
260 | 403 |
261 def get_build_info_for_date(self, date, has_time=False, build_index=None): | 404 # Read status file for the platform, retrieve build id, |
262 url = '/'.join([self.base_url, self.monthly_build_list_regex]) | 405 # and convert to a date |
| 406 headers = {'Cache-Control': 'max-age=0'} |
263 | 407 |
264 print 'Retrieving list of builds from %s' % url | 408 r = self.session.get(url + parser.entries[-1], headers=headers) |
265 parser = DirectoryParser(url) | 409 try: |
266 regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s$' % { | 410 r.raise_for_status() |
267 'DATE': date.strftime('%Y-%m-%d'), | 411 |
268 'BRANCH': self.branch, | 412 return datetime.strptime(r.text.split('\n')[0], '%Y%m%d%H%M%S') |
269 'L10N': '' if self.locale == 'en-US' else '-l10n'} | 413 finally: |
| 414 r.close() |
| 415 |
| 416 def is_build_dir(self, folder_name): |
| 417 """Return whether or not the given dir contains a build.""" |
| 418 |
| 419 # Cannot move up to base scraper due to parser.entries call in |
| 420 # get_build_info_for_date (see below) |
| 421 |
| 422 url = '%s/' % urljoin(self.base_url, self.monthly_build_list_regex, fold
er_name) |
| 423 if self.application in APPLICATIONS_MULTI_LOCALE \ |
| 424 and self.locale != 'multi': |
| 425 url = '%s/' % urljoin(url, self.locale) |
| 426 |
| 427 parser = self._create_directory_parser(url) |
| 428 |
| 429 pattern = re.compile(self.binary_regex, re.IGNORECASE) |
| 430 for entry in parser.entries: |
| 431 try: |
| 432 pattern.match(entry).group() |
| 433 return True |
| 434 except: |
| 435 # No match, continue with next entry |
| 436 continue |
| 437 return False |
| 438 |
| 439 def get_build_info_for_date(self, date, build_index=None): |
| 440 url = urljoin(self.base_url, self.monthly_build_list_regex) |
| 441 has_time = date and date.time() |
| 442 |
| 443 self.logger.info('Retrieving list of builds from %s' % url) |
| 444 parser = self._create_directory_parser(url) |
| 445 regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s%(PLATFORM)s$' % { |
| 446 'DATE': date.strftime('%Y-%m-%d'), |
| 447 'BRANCH': self.branch, |
| 448 # ensure to select the correct subfolder for localized builds |
| 449 'L10N': '' if self.locale in ('en-US', 'multi') else '(-l10n)?', |
| 450 'PLATFORM': '' if self.application not in ( |
| 451 'fennec') else '-' + self.platform |
| 452 } |
| 453 |
270 parser.entries = parser.filter(regex) | 454 parser.entries = parser.filter(regex) |
271 if not parser.entries: | 455 parser.entries = parser.filter(self.is_build_dir) |
272 message = 'Folder for builds on %s has not been found' % self.date.s
trftime('%Y-%m-%d') | |
273 raise NotFoundException(message, url) | |
274 | 456 |
275 if has_time: | 457 if has_time: |
276 # If a time is included in the date, use it to determine the build's
index | 458 # If a time is included in the date, use it to determine the |
| 459 # build's index |
277 regex = r'.*%s.*' % date.strftime('%H-%M-%S') | 460 regex = r'.*%s.*' % date.strftime('%H-%M-%S') |
278 build_index = parser.entries.index(parser.filter(regex)[0]) | 461 parser.entries = parser.filter(regex) |
279 else: | 462 |
280 # If no index has been given, set it to the last build of the day. | 463 if not parser.entries: |
281 if build_index is None: | 464 date_format = '%Y-%m-%d-%H-%M-%S' if has_time else '%Y-%m-%d' |
282 build_index = len(parser.entries) - 1 | 465 message = 'Folder for builds on %s has not been found' % \ |
| 466 self.date.strftime(date_format) |
| 467 raise errors.NotFoundError(message, url) |
| 468 |
| 469 # If no index has been given, set it to the last build of the day. |
| 470 self.show_matching_builds(parser.entries) |
| 471 # If no index has been given, set it to the last build of the day. |
| 472 if build_index is None: |
| 473 # Find the most recent non-empty entry. |
| 474 build_index = len(parser.entries) |
| 475 for build in reversed(parser.entries): |
| 476 build_index -= 1 |
| 477 if not build_index or self.is_build_dir(build): |
| 478 break |
| 479 self.logger.info('Selected build: %s' % parser.entries[build_index]) |
283 | 480 |
284 return (parser.entries, build_index) | 481 return (parser.entries, build_index) |
285 | 482 |
286 | |
287 @property | 483 @property |
288 def binary_regex(self): | 484 def binary_regex(self): |
289 """Return the regex for the binary""" | 485 """Return the regex for the binary""" |
290 | 486 |
291 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' | 487 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' |
292 regex_suffix = {'linux': r'\.%(EXT)s$', | 488 regex_suffix = {'android-api-9': r'\.%(EXT)s$', |
| 489 'android-api-11': r'\.%(EXT)s$', |
| 490 'android-x86': r'\.%(EXT)s$', |
| 491 'linux': r'\.%(EXT)s$', |
293 'linux64': r'\.%(EXT)s$', | 492 'linux64': r'\.%(EXT)s$', |
294 'mac': r'\.%(EXT)s$', | 493 'mac': r'\.%(EXT)s$', |
295 'mac64': r'\.%(EXT)s$', | 494 'mac64': r'\.%(EXT)s$', |
296 'win32': r'(\.installer)\.%(EXT)s$', | 495 'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$', |
297 'win64': r'(\.installer)\.%(EXT)s$'} | 496 'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'} |
298 regex = regex_base_name + regex_suffix[self.platform] | 497 regex = regex_base_name + regex_suffix[self.platform] |
299 | 498 |
300 return regex % {'APP': self.application, | 499 return regex % {'APP': self.application, |
301 'LOCALE': self.locale, | 500 'LOCALE': self.locale, |
302 'PLATFORM': self.platform_regex, | 501 'PLATFORM': self.platform_regex, |
303 'EXT': self.extension} | 502 'EXT': self.extension, |
304 | 503 'STUB': '-stub' if self.is_stub_installer else ''} |
305 | 504 |
306 def build_filename(self, binary): | 505 def build_filename(self, binary): |
307 """Return the proposed filename with extension for the binary""" | 506 """Return the proposed filename with extension for the binary""" |
308 | 507 |
309 try: | 508 try: |
310 # Get exact timestamp of the build to build the local file name | 509 # Get exact timestamp of the build to build the local file name |
311 folder = self.builds[self.build_index] | 510 folder = self.builds[self.build_index] |
312 timestamp = re.search('([\d\-]+)-\D.*', folder).group(1) | 511 timestamp = re.search('([\d\-]+)-\D.*', folder).group(1) |
313 except: | 512 except: |
314 # If it's not available use the build's date | 513 # If it's not available use the build's date |
315 timestamp = self.date.strftime('%Y-%m-%d') | 514 timestamp = self.date.strftime('%Y-%m-%d') |
316 | 515 |
317 return '%(TIMESTAMP)s-%(BRANCH)s-%(NAME)s' % { | 516 return '%(TIMESTAMP)s-%(BRANCH)s-%(NAME)s' % { |
318 'TIMESTAMP': timestamp, | 517 'TIMESTAMP': timestamp, |
319 'BRANCH': self.branch, | 518 'BRANCH': self.branch, |
320 'NAME': binary} | 519 'NAME': binary} |
321 | |
322 | 520 |
323 @property | 521 @property |
324 def monthly_build_list_regex(self): | 522 def monthly_build_list_regex(self): |
325 """Return the regex for the folder which contains the builds of a month.
""" | 523 """Return the regex for the folder containing builds of a month.""" |
326 | 524 |
327 # Regex for possible builds for the given date | 525 # Regex for possible builds for the given date |
328 return r'nightly/%(YEAR)s/%(MONTH)s/' % { | 526 return r'nightly/%(YEAR)s/%(MONTH)s/' % { |
329 'YEAR': self.date.year, | 527 'YEAR': self.date.year, |
330 'MONTH': str(self.date.month).zfill(2) } | 528 'MONTH': str(self.date.month).zfill(2)} |
331 | |
332 | 529 |
333 @property | 530 @property |
334 def path_regex(self): | 531 def path_regex(self): |
335 """Return the regex for the path""" | 532 """Return the regex for the path to the build folder""" |
336 | 533 |
337 try: | 534 try: |
338 return self.monthly_build_list_regex + self.builds[self.build_index] | 535 path = '%s/' % urljoin(self.monthly_build_list_regex, |
| 536 self.builds[self.build_index]) |
| 537 if self.application in APPLICATIONS_MULTI_LOCALE \ |
| 538 and self.locale != 'multi': |
| 539 path = '%s/' % urljoin(path, self.locale) |
| 540 return path |
339 except: | 541 except: |
340 raise NotFoundException("Specified sub folder cannot be found", | 542 folder = urljoin(self.base_url, self.monthly_build_list_regex) |
341 self.base_url + self.monthly_build_list_rege
x) | 543 raise errors.NotFoundError("Specified sub folder cannot be found", |
| 544 folder) |
342 | 545 |
343 | 546 |
344 class DirectScraper(Scraper): | 547 class DirectScraper(Scraper): |
345 """Class to download a file from a specified URL""" | 548 """Class to download a file from a specified URL""" |
346 | 549 |
347 def __init__(self, url, *args, **kwargs): | 550 def __init__(self, url, *args, **kwargs): |
| 551 self._url = url |
| 552 |
348 Scraper.__init__(self, *args, **kwargs) | 553 Scraper.__init__(self, *args, **kwargs) |
349 | 554 |
350 self.url = url | 555 @property |
| 556 def filename(self): |
| 557 if os.path.splitext(self.destination)[1]: |
| 558 # If the filename has been given make use of it |
| 559 target_file = self.destination |
| 560 else: |
| 561 # Otherwise determine it from the url. |
| 562 parsed_url = urlparse(self.url) |
| 563 source_filename = (parsed_url.path.rpartition('/')[-1] or |
| 564 parsed_url.hostname) |
| 565 target_file = os.path.join(self.destination, source_filename) |
| 566 |
| 567 return os.path.abspath(target_file) |
351 | 568 |
352 @property | 569 @property |
353 def target(self): | 570 def url(self): |
354 return urllib.splitquery(self.final_url)[0].rpartition('/')[-1] | 571 return self._url |
355 | |
356 @property | |
357 def final_url(self): | |
358 return self.url | |
359 | 572 |
360 | 573 |
361 class ReleaseScraper(Scraper): | 574 class ReleaseScraper(Scraper): |
362 """Class to download a release build from the Mozilla server""" | 575 """Class to download a release build from the Mozilla server""" |
363 | 576 |
364 def __init__(self, *args, **kwargs): | 577 def __init__(self, version, *args, **kwargs): |
| 578 self.version = version |
| 579 |
365 Scraper.__init__(self, *args, **kwargs) | 580 Scraper.__init__(self, *args, **kwargs) |
366 | 581 |
367 @property | 582 @property |
368 def binary_regex(self): | 583 def binary_regex(self): |
369 """Return the regex for the binary""" | 584 """Return the regex for the binary""" |
370 | 585 |
371 regex = {'linux': r'^%(APP)s-.*\.%(EXT)s$', | 586 regex = {'linux': r'^%(APP)s-.*\.%(EXT)s$', |
372 'linux64': r'^%(APP)s-.*\.%(EXT)s$', | 587 'linux64': r'^%(APP)s-.*\.%(EXT)s$', |
373 'mac': r'^%(APP)s.*\.%(EXT)s$', | 588 'mac': r'^%(APP)s.*\.%(EXT)s$', |
374 'mac64': r'^%(APP)s.*\.%(EXT)s$', | 589 'mac64': r'^%(APP)s.*\.%(EXT)s$', |
375 'win32': r'^%(APP)s.*\.%(EXT)s$', | 590 'win32': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$', |
376 'win64': r'^%(APP)s.*\.%(EXT)s$'} | 591 'win64': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$'} |
377 return regex[self.platform] % {'APP': self.application, | 592 return regex[self.platform] % { |
378 'EXT': self.extension} | 593 'APP': self.application, |
379 | 594 'EXT': self.extension, |
| 595 'STUB': 'Stub' if self.is_stub_installer else ''} |
380 | 596 |
381 @property | 597 @property |
382 def path_regex(self): | 598 def path_regex(self): |
383 """Return the regex for the path""" | 599 """Return the regex for the path to the build folder""" |
384 | 600 |
385 regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s' | 601 regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s/' |
386 return regex % {'LOCALE': self.locale, | 602 return regex % {'LOCALE': self.locale, |
387 'PLATFORM': self.platform_regex, | 603 'PLATFORM': self.platform_regex, |
388 'VERSION': self.version} | 604 'VERSION': self.version} |
389 | 605 |
| 606 @property |
| 607 def platform_regex(self): |
| 608 """Return the platform fragment of the URL""" |
| 609 |
| 610 if self.platform == 'win64': |
| 611 return self.platform |
| 612 |
| 613 return PLATFORM_FRAGMENTS[self.platform] |
390 | 614 |
391 def build_filename(self, binary): | 615 def build_filename(self, binary): |
392 """Return the proposed filename with extension for the binary""" | 616 """Return the proposed filename with extension for the binary""" |
393 | 617 |
394 template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s.%(EXT)s' | 618 template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s%(STUB)s' \ |
| 619 '.%(EXT)s' |
395 return template % {'APP': self.application, | 620 return template % {'APP': self.application, |
396 'VERSION': self.version, | 621 'VERSION': self.version, |
397 'LOCALE': self.locale, | 622 'LOCALE': self.locale, |
398 'PLATFORM': self.platform, | 623 'PLATFORM': self.platform, |
| 624 'STUB': '-stub' if self.is_stub_installer else '', |
399 'EXT': self.extension} | 625 'EXT': self.extension} |
400 | 626 |
401 | 627 |
402 class ReleaseCandidateScraper(ReleaseScraper): | 628 class ReleaseCandidateScraper(ReleaseScraper): |
403 """Class to download a release candidate build from the Mozilla server""" | 629 """Class to download a release candidate build from the Mozilla server""" |
404 | 630 |
405 def __init__(self, build_number=None, no_unsigned=False, *args, **kwargs): | 631 def __init__(self, version, build_number=None, *args, **kwargs): |
| 632 self.version = version |
| 633 self.build_number = build_number |
| 634 |
406 Scraper.__init__(self, *args, **kwargs) | 635 Scraper.__init__(self, *args, **kwargs) |
407 | 636 |
| 637 def get_build_info(self): |
| 638 """Defines additional build information""" |
| 639 |
408 # Internally we access builds via index | 640 # Internally we access builds via index |
409 if build_number is not None: | 641 url = urljoin(self.base_url, self.candidate_build_list_regex) |
410 self.build_index = int(build_number) - 1 | 642 self.logger.info('Retrieving list of candidate builds from %s' % url) |
| 643 |
| 644 parser = self._create_directory_parser(url) |
| 645 if not parser.entries: |
| 646 message = 'Folder for specific candidate builds at %s has not' \ |
| 647 'been found' % url |
| 648 raise errors.NotFoundError(message, url) |
| 649 |
| 650 self.show_matching_builds(parser.entries) |
| 651 self.builds = parser.entries |
| 652 self.build_index = len(parser.entries) - 1 |
| 653 |
| 654 if self.build_number and \ |
| 655 ('build%s' % self.build_number) in self.builds: |
| 656 self.builds = ['build%s' % self.build_number] |
| 657 self.build_index = 0 |
| 658 self.logger.info('Selected build: build%s' % self.build_number) |
411 else: | 659 else: |
412 self.build_index = None | 660 self.logger.info('Selected build: build%d' % |
413 | 661 (self.build_index + 1)) |
414 self.builds, self.build_index = self.get_build_info_for_version(self.ver
sion, self.build_index) | |
415 | |
416 self.no_unsigned = no_unsigned | |
417 self.unsigned = False | |
418 | |
419 | |
420 def get_build_info_for_version(self, version, build_index=None): | |
421 url = '/'.join([self.base_url, self.candidate_build_list_regex]) | |
422 | |
423 print 'Retrieving list of candidate builds from %s' % url | |
424 parser = DirectoryParser(url) | |
425 if not parser.entries: | |
426 message = 'Folder for specific candidate builds at has not been foun
d' | |
427 raise NotFoundException(message, url) | |
428 | |
429 # If no index has been given, set it to the last build of the given vers
ion. | |
430 if build_index is None: | |
431 build_index = len(parser.entries) - 1 | |
432 | |
433 return (parser.entries, build_index) | |
434 | |
435 | 662 |
436 @property | 663 @property |
437 def candidate_build_list_regex(self): | 664 def candidate_build_list_regex(self): |
438 """Return the regex for the folder which contains the builds of | 665 """Return the regex for the folder which contains the builds of |
439 a candidate build.""" | 666 a candidate build.""" |
440 | 667 |
441 # Regex for possible builds for the given date | 668 # Regex for possible builds for the given date |
442 return r'nightly/%(VERSION)s-candidates/' % { | 669 return r'candidates/%(VERSION)s-candidates/' % { |
443 'VERSION': self.version } | 670 'VERSION': self.version} |
444 | |
445 | 671 |
446 @property | 672 @property |
447 def path_regex(self): | 673 def path_regex(self): |
448 """Return the regex for the path""" | 674 """Return the regex for the path to the build folder""" |
449 | 675 |
450 regex = r'%(PREFIX)s%(BUILD)s/%(UNSIGNED)s%(PLATFORM)s/%(LOCALE)s' | 676 regex = r'%(PREFIX)s%(BUILD)s/%(PLATFORM)s/%(LOCALE)s/' |
451 return regex % {'PREFIX': self.candidate_build_list_regex, | 677 return regex % {'PREFIX': self.candidate_build_list_regex, |
452 'BUILD': self.builds[self.build_index], | 678 'BUILD': self.builds[self.build_index], |
453 'LOCALE': self.locale, | 679 'LOCALE': self.locale, |
454 'PLATFORM': self.platform_regex, | 680 'PLATFORM': self.platform_regex} |
455 'UNSIGNED': "unsigned/" if self.unsigned else ""} | |
456 | 681 |
| 682 @property |
| 683 def platform_regex(self): |
| 684 """Return the platform fragment of the URL""" |
| 685 |
| 686 if self.platform == 'win64': |
| 687 return self.platform |
| 688 |
| 689 return PLATFORM_FRAGMENTS[self.platform] |
457 | 690 |
458 def build_filename(self, binary): | 691 def build_filename(self, binary): |
459 """Return the proposed filename with extension for the binary""" | 692 """Return the proposed filename with extension for the binary""" |
460 | 693 |
461 template = '%(APP)s-%(VERSION)s-build%(BUILD)s.%(LOCALE)s.%(PLATFORM)s.%
(EXT)s' | 694 template = '%(APP)s-%(VERSION)s-%(BUILD)s.%(LOCALE)s.' \ |
| 695 '%(PLATFORM)s%(STUB)s.%(EXT)s' |
462 return template % {'APP': self.application, | 696 return template % {'APP': self.application, |
463 'VERSION': self.version, | 697 'VERSION': self.version, |
464 'BUILD': self.builds[self.build_index], | 698 'BUILD': self.builds[self.build_index], |
465 'LOCALE': self.locale, | 699 'LOCALE': self.locale, |
466 'PLATFORM': self.platform, | 700 'PLATFORM': self.platform, |
| 701 'STUB': '-stub' if self.is_stub_installer else '', |
467 'EXT': self.extension} | 702 'EXT': self.extension} |
468 | 703 |
469 | |
470 def download(self): | 704 def download(self): |
471 """Download the specified file""" | 705 """Download the specified file""" |
472 | 706 |
473 try: | 707 try: |
474 # Try to download the signed candidate build | 708 # Try to download the signed candidate build |
475 Scraper.download(self) | 709 Scraper.download(self) |
476 except NotFoundException, e: | 710 except errors.NotFoundError, e: |
477 print str(e) | 711 self.logger.exception(str(e)) |
478 | |
479 # If the signed build cannot be downloaded and unsigned builds are | |
480 # allowed, try to download the unsigned build instead | |
481 if self.no_unsigned: | |
482 raise | |
483 else: | |
484 print "Signed build has not been found. Falling back to unsigned
build." | |
485 self.unsigned = True | |
486 Scraper.download(self) | |
487 | 712 |
488 | 713 |
489 class TinderboxScraper(Scraper): | 714 class TinderboxScraper(Scraper): |
490 """Class to download a tinderbox build from the Mozilla server. | 715 """Class to download a tinderbox build from the Mozilla server. |
491 | 716 |
492 There are two ways to specify a unique build: | 717 There are two ways to specify a unique build: |
493 1. If the date (%Y-%m-%d) is given and build_number is given where | 718 1. If the date (%Y-%m-%d) is given and build_number is given where |
494 the build_number is the index of the build on the date | 719 the build_number is the index of the build on the date |
495 2. If the build timestamp (UNIX) is given, and matches a specific build. | 720 2. If the build timestamp (UNIX) is given, and matches a specific build. |
496 """ | 721 """ |
497 | 722 |
498 def __init__(self, branch='mozilla-central', build_number=None, date=None, | 723 def __init__(self, branch='mozilla-central', build_number=None, date=None, |
499 debug_build=False, *args, **kwargs): | 724 debug_build=False, *args, **kwargs): |
| 725 |
| 726 self.branch = branch |
| 727 self.build_number = build_number |
| 728 self.debug_build = debug_build |
| 729 self.date = date |
| 730 |
| 731 self.timestamp = None |
| 732 # Currently any time in RelEng is based on the Pacific time zone. |
| 733 self.timezone = PacificTimezone() |
| 734 |
500 Scraper.__init__(self, *args, **kwargs) | 735 Scraper.__init__(self, *args, **kwargs) |
501 | 736 |
502 self.branch = branch | 737 def get_build_info(self): |
503 self.debug_build = debug_build | 738 "Defines additional build information" |
504 self.locale_build = self.locale != 'en-US' | |
505 self.timestamp = None | |
506 | |
507 # Currently any time in RelEng is based on the Pacific time zone. | |
508 self.timezone = PacificTimezone(); | |
509 | 739 |
510 # Internally we access builds via index | 740 # Internally we access builds via index |
511 if build_number is not None: | 741 if self.build_number is not None: |
512 self.build_index = int(build_number) - 1 | 742 self.build_index = int(self.build_number) - 1 |
513 else: | 743 else: |
514 self.build_index = None | 744 self.build_index = None |
515 | 745 |
516 if date is not None: | 746 if self.date is not None: |
517 try: | 747 try: |
518 self.date = datetime.fromtimestamp(float(date), self.timezone) | 748 # date is provided in the format 2013-07-23 |
519 self.timestamp = date | 749 self.date = datetime.strptime(self.date, '%Y-%m-%d') |
520 except: | 750 except: |
521 self.date = datetime.strptime(date, '%Y-%m-%d') | 751 try: |
522 else: | 752 # date is provided as a unix timestamp |
523 self.date = None | 753 datetime.fromtimestamp(float(self.date)) |
| 754 self.timestamp = self.date |
| 755 except: |
| 756 raise ValueError('%s is not a valid date' % self.date) |
524 | 757 |
| 758 self.locale_build = self.locale != 'en-US' |
525 # For localized builds we do not have to retrieve the list of builds | 759 # For localized builds we do not have to retrieve the list of builds |
526 # because only the last build is available | 760 # because only the last build is available |
527 if not self.locale_build: | 761 if not self.locale_build: |
528 self.builds, self.build_index = self.get_build_info(self.build_index
) | 762 self.builds, self.build_index = self.get_build_info_for_index( |
529 | 763 self.build_index) |
530 try: | |
531 self.timestamp = self.builds[self.build_index] | |
532 except: | |
533 raise NotFoundException("Specified sub folder cannot be found", | |
534 self.base_url + self.monthly_build_list_
regex) | |
535 | |
536 | 764 |
537 @property | 765 @property |
538 def binary_regex(self): | 766 def binary_regex(self): |
539 """Return the regex for the binary""" | 767 """Return the regex for the binary""" |
540 | 768 |
541 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.' | 769 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' |
542 regex_suffix = {'linux': r'.*\.%(EXT)s$', | 770 regex_suffix = {'linux': r'.*\.%(EXT)s$', |
543 'linux64': r'.*\.%(EXT)s$', | 771 'linux64': r'.*\.%(EXT)s$', |
544 'mac': r'.*\.%(EXT)s$', | 772 'mac': r'.*\.%(EXT)s$', |
545 'mac64': r'.*\.%(EXT)s$', | 773 'mac64': r'.*\.%(EXT)s$', |
546 'win32': r'.*(\.installer)\.%(EXT)s$', | 774 'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$', |
547 'win64': r'.*(\.installer)\.%(EXT)s$'} | 775 'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'} |
548 | 776 |
549 regex = regex_base_name + regex_suffix[self.platform] | 777 regex = regex_base_name + regex_suffix[self.platform] |
550 | 778 |
551 return regex % {'APP': self.application, | 779 return regex % {'APP': self.application, |
552 'LOCALE': self.locale, | 780 'LOCALE': self.locale, |
| 781 'PLATFORM': PLATFORM_FRAGMENTS[self.platform], |
| 782 'STUB': '-stub' if self.is_stub_installer else '', |
553 'EXT': self.extension} | 783 'EXT': self.extension} |
554 | 784 |
555 | |
556 def build_filename(self, binary): | 785 def build_filename(self, binary): |
557 """Return the proposed filename with extension for the binary""" | 786 """Return the proposed filename with extension for the binary""" |
558 | 787 |
559 return '%(TIMESTAMP)s%(BRANCH)s%(DEBUG)s-%(NAME)s' % { | 788 return '%(TIMESTAMP)s%(BRANCH)s%(DEBUG)s-%(NAME)s' % { |
560 'TIMESTAMP': self.timestamp + '-' if self.timestamp else '', | 789 'TIMESTAMP': self.timestamp + '-' if self.timestamp else '', |
561 'BRANCH': self.branch, | 790 'BRANCH': self.branch, |
562 'DEBUG': '-debug' if self.debug_build else '', | 791 'DEBUG': '-debug' if self.debug_build else '', |
563 'NAME': binary} | 792 'NAME': binary} |
564 | |
565 | 793 |
566 @property | 794 @property |
567 def build_list_regex(self): | 795 def build_list_regex(self): |
568 """Return the regex for the folder which contains the list of builds""" | 796 """Return the regex for the folder which contains the list of builds""" |
569 | 797 |
570 regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s' | 798 regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s/' |
571 | 799 |
572 return regex % {'BRANCH': self.branch, | 800 return regex % { |
573 'PLATFORM': '' if self.locale_build else self.platform_r
egex, | 801 'BRANCH': self.branch, |
574 'L10N': 'l10n' if self.locale_build else '', | 802 'PLATFORM': '' if self.locale_build else self.platform_regex, |
575 'DEBUG': '-debug' if self.debug_build else ''} | 803 'L10N': 'l10n' if self.locale_build else '', |
576 | 804 'DEBUG': '-debug' if self.debug_build else ''} |
577 | 805 |
578 def date_matches(self, timestamp): | 806 def date_matches(self, timestamp): |
579 """Determines whether the timestamp date is equal to the argument date""
" | 807 """ |
| 808 Determines whether the timestamp date is equal to the argument date |
| 809 """ |
580 | 810 |
581 if self.date is None: | 811 if self.date is None: |
582 return False | 812 return False |
583 | 813 |
584 timestamp = datetime.fromtimestamp(float(timestamp), self.timezone) | 814 timestamp = datetime.fromtimestamp(float(timestamp), self.timezone) |
585 if self.date.date() == timestamp.date(): | 815 if self.date.date() == timestamp.date(): |
586 return True | 816 return True |
587 | 817 |
588 return False | 818 return False |
589 | 819 |
590 | |
591 @property | |
592 def date_validation_regex(self): | |
593 """Return the regex for a valid date argument value""" | |
594 | |
595 return r'^\d{4}-\d{1,2}-\d{1,2}$|^\d+$' | |
596 | |
597 | |
598 def detect_platform(self): | 820 def detect_platform(self): |
599 """Detect the current platform""" | 821 """Detect the current platform""" |
600 | 822 |
601 platform = Scraper.detect_platform(self) | 823 platform = Scraper.detect_platform(self) |
602 | 824 |
603 # On OS X we have to special case the platform detection code and fallba
ck | 825 # On OS X we have to special case the platform detection code and |
604 # to 64 bit builds for the en-US locale | 826 # fallback to 64 bit builds for the en-US locale |
605 if mozinfo.os == 'mac' and self.locale == 'en-US' and mozinfo.bits == 64
: | 827 if mozinfo.os == 'mac' and self.locale == 'en-US' and \ |
| 828 mozinfo.bits == 64: |
606 platform = "%s%d" % (mozinfo.os, mozinfo.bits) | 829 platform = "%s%d" % (mozinfo.os, mozinfo.bits) |
607 | 830 |
608 return platform | 831 return platform |
609 | 832 |
| 833 def is_build_dir(self, folder_name): |
| 834 """Return whether or not the given dir contains a build.""" |
610 | 835 |
611 def get_build_info(self, build_index=None): | 836 # Cannot move up to base scraper due to parser.entries call in |
612 url = '/'.join([self.base_url, self.build_list_regex]) | 837 # get_build_info_for_index (see below) |
| 838 url = '%s/' % urljoin(self.base_url, self.build_list_regex, folder_name) |
613 | 839 |
614 print 'Retrieving list of builds from %s' % url | 840 if self.application in APPLICATIONS_MULTI_LOCALE \ |
| 841 and self.locale != 'multi': |
| 842 url = '%s/' % urljoin(url, self.locale) |
615 | 843 |
616 # If a timestamp is given, retrieve just that build | 844 parser = self._create_directory_parser(url) |
617 regex = '^' + self.timestamp + '$' if self.timestamp else r'^\d+$' | |
618 | 845 |
619 parser = DirectoryParser(url) | 846 pattern = re.compile(self.binary_regex, re.IGNORECASE) |
620 parser.entries = parser.filter(regex) | 847 for entry in parser.entries: |
| 848 try: |
| 849 pattern.match(entry).group() |
| 850 return True |
| 851 except: |
| 852 # No match, continue with next entry |
| 853 continue |
| 854 return False |
621 | 855 |
622 # If date is given, retrieve the subset of builds on that date | 856 def get_build_info_for_index(self, build_index=None): |
623 if self.date is not None: | 857 url = urljoin(self.base_url, self.build_list_regex) |
| 858 |
| 859 self.logger.info('Retrieving list of builds from %s' % url) |
| 860 parser = self._create_directory_parser(url) |
| 861 parser.entries = parser.filter(r'^\d+$') |
| 862 |
| 863 if self.timestamp: |
| 864 # If a timestamp is given, retrieve the folder with the timestamp |
| 865 # as name |
| 866 parser.entries = self.timestamp in parser.entries and \ |
| 867 [self.timestamp] |
| 868 |
| 869 elif self.date: |
| 870 # If date is given, retrieve the subset of builds on that date |
624 parser.entries = filter(self.date_matches, parser.entries) | 871 parser.entries = filter(self.date_matches, parser.entries) |
625 | 872 |
626 if not parser.entries: | 873 if not parser.entries: |
627 message = 'No builds have been found' | 874 message = 'No builds have been found' |
628 raise NotFoundException(message, url) | 875 raise errors.NotFoundError(message, url) |
| 876 |
| 877 self.show_matching_builds(parser.entries) |
629 | 878 |
630 # If no index has been given, set it to the last build of the day. | 879 # If no index has been given, set it to the last build of the day. |
631 if build_index is None: | 880 if build_index is None: |
632 build_index = len(parser.entries) - 1 | 881 # Find the most recent non-empty entry. |
| 882 build_index = len(parser.entries) |
| 883 for build in reversed(parser.entries): |
| 884 build_index -= 1 |
| 885 if not build_index or self.is_build_dir(build): |
| 886 break |
| 887 |
| 888 self.logger.info('Selected build: %s' % parser.entries[build_index]) |
633 | 889 |
634 return (parser.entries, build_index) | 890 return (parser.entries, build_index) |
635 | 891 |
636 | |
637 @property | 892 @property |
638 def path_regex(self): | 893 def path_regex(self): |
639 """Return the regex for the path""" | 894 """Return the regex for the path to the build folder""" |
640 | 895 |
641 if self.locale_build: | 896 if self.locale_build: |
642 return self.build_list_regex | 897 return self.build_list_regex |
643 | 898 |
644 return '/'.join([self.build_list_regex, self.builds[self.build_index]]) | 899 return '%s/' % urljoin(self.build_list_regex, self.builds[self.build_ind
ex]) |
645 | |
646 | 900 |
647 @property | 901 @property |
648 def platform_regex(self): | 902 def platform_regex(self): |
649 """Return the platform fragment of the URL""" | 903 """Return the platform fragment of the URL""" |
650 | 904 |
651 PLATFORM_FRAGMENTS = {'linux': 'linux', | 905 PLATFORM_FRAGMENTS = {'linux': 'linux', |
652 'linux64': 'linux64', | 906 'linux64': 'linux64', |
653 'mac': 'macosx', | 907 'mac': 'macosx64', |
654 'mac64': 'macosx64', | 908 'mac64': 'macosx64', |
655 'win32': 'win32', | 909 'win32': 'win32', |
656 'win64': 'win64'} | 910 'win64': 'win64'} |
657 | 911 |
658 return PLATFORM_FRAGMENTS[self.platform] | 912 return PLATFORM_FRAGMENTS[self.platform] |
659 | 913 |
660 | 914 |
661 def cli(): | 915 class TryScraper(Scraper): |
662 """Main function for the downloader""" | 916 "Class to download a try build from the Mozilla server." |
663 | 917 |
664 BUILD_TYPES = {'release': ReleaseScraper, | 918 def __init__(self, changeset=None, debug_build=False, *args, **kwargs): |
665 'candidate': ReleaseCandidateScraper, | |
666 'daily': DailyScraper, | |
667 'tinderbox': TinderboxScraper } | |
668 | 919 |
669 usage = 'usage: %prog [options]' | 920 self.debug_build = debug_build |
670 parser = OptionParser(usage=usage, description=__doc__) | 921 self.changeset = changeset |
671 parser.add_option('--application', '-a', | |
672 dest='application', | |
673 choices=APPLICATIONS, | |
674 default='firefox', | |
675 metavar='APPLICATION', | |
676 help='The name of the application to download, ' | |
677 'default: "%default"') | |
678 parser.add_option('--directory', '-d', | |
679 dest='directory', | |
680 default=os.getcwd(), | |
681 metavar='DIRECTORY', | |
682 help='Target directory for the download, default: ' | |
683 'current working directory') | |
684 parser.add_option('--build-number', | |
685 dest='build_number', | |
686 default=None, | |
687 type="int", | |
688 metavar='BUILD_NUMBER', | |
689 help='Number of the build (for candidate, daily, ' | |
690 'and tinderbox builds)') | |
691 parser.add_option('--locale', '-l', | |
692 dest='locale', | |
693 default='en-US', | |
694 metavar='LOCALE', | |
695 help='Locale of the application, default: "%default"') | |
696 parser.add_option('--platform', '-p', | |
697 dest='platform', | |
698 choices=PLATFORM_FRAGMENTS.keys(), | |
699 metavar='PLATFORM', | |
700 help='Platform of the application') | |
701 parser.add_option('--type', '-t', | |
702 dest='type', | |
703 choices=BUILD_TYPES.keys(), | |
704 default='release', | |
705 metavar='BUILD_TYPE', | |
706 help='Type of build to download, default: "%default"') | |
707 parser.add_option('--url', | |
708 dest='url', | |
709 default=None, | |
710 metavar='URL', | |
711 help='URL to download.') | |
712 parser.add_option('--version', '-v', | |
713 dest='version', | |
714 metavar='VERSION', | |
715 help='Version of the application to be used by release and
\ | |
716 candidate builds, i.e. "3.6"') | |
717 parser.add_option('--extension', | |
718 dest='extension', | |
719 default=None, | |
720 metavar='EXTENSION', | |
721 help='File extension of the build (e.g. "zip"), default:\ | |
722 the standard build extension on the platform.') | |
723 parser.add_option('--username', | |
724 dest='username', | |
725 default=None, | |
726 metavar='USERNAME', | |
727 help='Username for basic HTTP authentication.') | |
728 parser.add_option('--password', | |
729 dest='password', | |
730 default=None, | |
731 metavar='PASSWORD', | |
732 help='Password for basic HTTP authentication.') | |
733 parser.add_option('--retry-attempts', | |
734 dest='retry_attempts', | |
735 default=3, | |
736 type=int, | |
737 metavar='RETRY_ATTEMPTS', | |
738 help='Number of times the download will be attempted in ' | |
739 'the event of a failure, default: %default') | |
740 parser.add_option('--retry-delay', | |
741 dest='retry_delay', | |
742 default=10, | |
743 type=int, | |
744 metavar='RETRY_DELAY', | |
745 help='Amount of time (in seconds) to wait between retry ' | |
746 'attempts, default: %default') | |
747 | 922 |
748 # Option group for candidate builds | 923 Scraper.__init__(self, *args, **kwargs) |
749 group = OptionGroup(parser, "Candidate builds", | |
750 "Extra options for candidate builds.") | |
751 group.add_option('--no-unsigned', | |
752 dest='no_unsigned', | |
753 action="store_true", | |
754 help="Don't allow to download unsigned builds if signed\ | |
755 builds are not available") | |
756 parser.add_option_group(group) | |
757 | 924 |
758 # Option group for daily builds | 925 def get_build_info(self): |
759 group = OptionGroup(parser, "Daily builds", | 926 "Defines additional build information" |
760 "Extra options for daily builds.") | |
761 group.add_option('--branch', | |
762 dest='branch', | |
763 default='mozilla-central', | |
764 metavar='BRANCH', | |
765 help='Name of the branch, default: "%default"') | |
766 group.add_option('--build-id', | |
767 dest='build_id', | |
768 default=None, | |
769 metavar='BUILD_ID', | |
770 help='ID of the build to download') | |
771 group.add_option('--date', | |
772 dest='date', | |
773 default=None, | |
774 metavar='DATE', | |
775 help='Date of the build, default: latest build') | |
776 parser.add_option_group(group) | |
777 | 927 |
778 # Option group for tinderbox builds | 928 self.builds, self.build_index = self.get_build_info_for_index() |
779 group = OptionGroup(parser, "Tinderbox builds", | |
780 "Extra options for tinderbox builds.") | |
781 group.add_option('--debug-build', | |
782 dest='debug_build', | |
783 action="store_true", | |
784 help="Download a debug build") | |
785 parser.add_option_group(group) | |
786 | 929 |
787 # TODO: option group for nightly builds | 930 @property |
788 (options, args) = parser.parse_args() | 931 def binary_regex(self): |
| 932 """Return the regex for the binary""" |
789 | 933 |
790 # Check for required options and arguments | 934 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' |
791 # Note: Will be optional when ini file support has been landed | 935 regex_suffix = {'linux': r'.*\.%(EXT)s$', |
792 if not options.url \ | 936 'linux64': r'.*\.%(EXT)s$', |
793 and not options.type in ['daily', 'tinderbox'] \ | 937 'mac': r'.*\.%(EXT)s$', |
794 and not options.version: | 938 'mac64': r'.*\.%(EXT)s$', |
795 parser.error('The version of the application to download has not been sp
ecified.') | 939 'win32': r'.*(\.installer%(STUB)s)\.%(EXT)s$', |
| 940 'win64': r'.*(\.installer%(STUB)s)\.%(EXT)s$'} |
796 | 941 |
797 # Instantiate scraper and download the build | 942 regex = regex_base_name + regex_suffix[self.platform] |
798 scraper_keywords = {'application': options.application, | |
799 'locale': options.locale, | |
800 'platform': options.platform, | |
801 'version': options.version, | |
802 'directory': options.directory, | |
803 'extension': options.extension, | |
804 'authentication': { | |
805 'username': options.username, | |
806 'password': options.password}, | |
807 'retry_attempts': options.retry_attempts, | |
808 'retry_delay': options.retry_delay} | |
809 scraper_options = {'candidate': { | |
810 'build_number': options.build_number, | |
811 'no_unsigned': options.no_unsigned}, | |
812 'daily': { | |
813 'branch': options.branch, | |
814 'build_number': options.build_number, | |
815 'build_id': options.build_id, | |
816 'date': options.date}, | |
817 'tinderbox': { | |
818 'branch': options.branch, | |
819 'build_number': options.build_number, | |
820 'date': options.date, | |
821 'debug_build': options.debug_build} | |
822 } | |
823 | 943 |
824 kwargs = scraper_keywords.copy() | 944 return regex % {'APP': self.application, |
825 kwargs.update(scraper_options.get(options.type, {})) | 945 'LOCALE': self.locale, |
| 946 'PLATFORM': PLATFORM_FRAGMENTS[self.platform], |
| 947 'STUB': '-stub' if self.is_stub_installer else '', |
| 948 'EXT': self.extension} |
826 | 949 |
827 if options.url: | 950 def build_filename(self, binary): |
828 build = DirectScraper(options.url, **kwargs) | 951 """Return the proposed filename with extension for the binary""" |
829 else: | |
830 build = BUILD_TYPES[options.type](**kwargs) | |
831 | 952 |
832 build.download() | 953 return '%(CHANGESET)s%(DEBUG)s-%(NAME)s' % { |
| 954 'CHANGESET': self.changeset, |
| 955 'DEBUG': '-debug' if self.debug_build else '', |
| 956 'NAME': binary} |
833 | 957 |
834 if __name__ == "__main__": | 958 @property |
835 cli() | 959 def build_list_regex(self): |
| 960 """Return the regex for the folder which contains the list of builds""" |
| 961 |
| 962 return 'try-builds/' |
| 963 |
| 964 def detect_platform(self): |
| 965 """Detect the current platform""" |
| 966 |
| 967 platform = Scraper.detect_platform(self) |
| 968 |
| 969 # On OS X we have to special case the platform detection code and |
| 970 # fallback to 64 bit builds for the en-US locale |
| 971 if mozinfo.os == 'mac' and self.locale == 'en-US' and \ |
| 972 mozinfo.bits == 64: |
| 973 platform = "%s%d" % (mozinfo.os, mozinfo.bits) |
| 974 |
| 975 return platform |
| 976 |
| 977 def get_build_info_for_index(self, build_index=None): |
| 978 url = urljoin(self.base_url, self.build_list_regex) |
| 979 |
| 980 self.logger.info('Retrieving list of builds from %s' % url) |
| 981 parser = self._create_directory_parser(url) |
| 982 parser.entries = parser.filter('.*-%s$' % self.changeset) |
| 983 |
| 984 if not parser.entries: |
| 985 raise errors.NotFoundError('No builds have been found', url) |
| 986 |
| 987 self.show_matching_builds(parser.entries) |
| 988 |
| 989 self.logger.info('Selected build: %s' % parser.entries[0]) |
| 990 |
| 991 return (parser.entries, 0) |
| 992 |
| 993 @property |
| 994 def path_regex(self): |
| 995 """Return the regex for the path to the build folder""" |
| 996 |
| 997 build_dir = 'try-%(PLATFORM)s%(DEBUG)s/' % { |
| 998 'PLATFORM': self.platform_regex, |
| 999 'DEBUG': '-debug' if self.debug_build else ''} |
| 1000 return urljoin(self.build_list_regex, |
| 1001 self.builds[self.build_index], |
| 1002 build_dir) |
| 1003 |
| 1004 @property |
| 1005 def platform_regex(self): |
| 1006 """Return the platform fragment of the URL""" |
| 1007 |
| 1008 PLATFORM_FRAGMENTS = {'linux': 'linux', |
| 1009 'linux64': 'linux64', |
| 1010 'mac': 'macosx64', |
| 1011 'mac64': 'macosx64', |
| 1012 'win32': 'win32', |
| 1013 'win64': 'win64'} |
| 1014 |
| 1015 return PLATFORM_FRAGMENTS[self.platform] |
OLD | NEW |