Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(949)

Side by Side Diff: mozdownload/scraper.py

Issue 1451373002: Updating mozdownload (excluding tests) (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/mozdownload@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #!/usr/bin/env python
2
3 # This Source Code Form is subject to the terms of the Mozilla Public 1 # This Source Code Form is subject to the terms of the Mozilla Public
4 # License, v. 2.0. If a copy of the MPL was not distributed with this 2 # License, v. 2.0. If a copy of the MPL was not distributed with this
5 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 4
7 """Module to handle downloads for different types of Firefox and Thunderbird bui lds."""
8
9
10 from datetime import datetime 5 from datetime import datetime
11 from optparse import OptionParser, OptionGroup 6 import logging
12 import os 7 import os
13 import re 8 import re
9 import requests
14 import sys 10 import sys
15 import time 11 import time
16 import urllib 12 import urllib
17 import urllib2 13 from urlparse import urlparse
18 14
19 import mozinfo 15 import mozinfo
16 import progressbar as pb
kjellander_chromium 2015/11/17 10:19:29 Any idea how the script can work without the progr
phoglund_chromium 2015/11/17 11:13:07 Crap, you're right. I have it installed on my mach
kjellander_chromium 2015/11/17 12:57:17 Not necessarily screwed since we can bundle those
phoglund_chromium 2015/11/17 13:47:03 I managed to pull requests from the mirror you ind
17
18 import errors
20 19
21 from parser import DirectoryParser 20 from parser import DirectoryParser
22 from timezones import PacificTimezone 21 from timezones import PacificTimezone
23 22 from utils import urljoin
24 23
25 APPLICATIONS = ['b2g', 'firefox', 'thunderbird'] 24
25 APPLICATIONS = ('b2g', 'firefox', 'fennec', 'thunderbird')
26
27 # Some applications contain all locales in a single build
28 APPLICATIONS_MULTI_LOCALE = ('b2g', 'fennec')
29
30 # Used if the application is named differently than the subfolder on the server
31 APPLICATIONS_TO_FTP_DIRECTORY = {'fennec': 'mobile'}
26 32
27 # Base URL for the path to all builds 33 # Base URL for the path to all builds
28 BASE_URL = 'https://ftp.mozilla.org/pub/mozilla.org' 34 BASE_URL = 'https://archive.mozilla.org/pub/'
29 35
30 PLATFORM_FRAGMENTS = {'linux': 'linux-i686', 36 # Chunk size when downloading a file
31 'linux64': 'linux-x86_64', 37 CHUNK_SIZE = 16 * 1024
32 'mac': 'mac', 38
33 'mac64': 'mac64', 39 DEFAULT_FILE_EXTENSIONS = {'android-api-9': 'apk',
34 'win32': 'win32', 40 'android-api-11': 'apk',
35 'win64': 'win64-x86_64'} 41 'android-x86': 'apk',
36 42 'linux': 'tar.bz2',
37 DEFAULT_FILE_EXTENSIONS = {'linux': 'tar.bz2',
38 'linux64': 'tar.bz2', 43 'linux64': 'tar.bz2',
39 'mac': 'dmg', 44 'mac': 'dmg',
40 'mac64': 'dmg', 45 'mac64': 'dmg',
41 'win32': 'exe', 46 'win32': 'exe',
42 'win64': 'exe'} 47 'win64': 'exe'}
43 48
44 class NotFoundException(Exception): 49 PLATFORM_FRAGMENTS = {'android-api-9': r'android-arm',
45 """Exception for a resource not being found (e.g. no logs)""" 50 'android-api-11': r'android-arm',
46 def __init__(self, message, location): 51 'android-x86': r'android-i386',
47 self.location = location 52 'linux': r'linux-i686',
48 Exception.__init__(self, ': '.join([message, location])) 53 'linux64': r'linux-x86_64',
54 'mac': r'mac',
55 'mac64': r'mac(64)?',
56 'win32': r'win32',
57 'win64': r'win64(-x86_64)?'}
49 58
50 59
51 class Scraper(object): 60 class Scraper(object):
52 """Generic class to download an application from the Mozilla server""" 61 """Generic class to download an application from the Mozilla server"""
53 62
54 def __init__(self, directory, version, platform=None, 63 def __init__(self, destination=None, platform=None,
55 application='firefox', locale='en-US', extension=None, 64 application='firefox', locale=None, extension=None,
56 authentication=None, retry_attempts=3, retry_delay=10): 65 username=None, password=None,
66 retry_attempts=0, retry_delay=10.,
67 is_stub_installer=False, timeout=None,
68 log_level='INFO',
69 base_url=BASE_URL):
57 70
58 # Private properties for caching 71 # Private properties for caching
59 self._target = None 72 self._filename = None
60 self._binary = None 73 self._binary = None
61 74
62 self.directory = directory 75 self.destination = destination or os.getcwd()
63 self.locale = locale 76
77 if not locale:
78 if application in APPLICATIONS_MULTI_LOCALE:
79 self.locale = 'multi'
80 else:
81 self.locale = 'en-US'
82 else:
83 self.locale = locale
84
64 self.platform = platform or self.detect_platform() 85 self.platform = platform or self.detect_platform()
65 self.version = version 86
66 self.extension = extension or DEFAULT_FILE_EXTENSIONS[self.platform] 87 self.session = requests.Session()
67 self.authentication = authentication 88 if (username, password) != (None, None):
89 self.session.auth = (username, password)
90
68 self.retry_attempts = retry_attempts 91 self.retry_attempts = retry_attempts
69 self.retry_delay = retry_delay 92 self.retry_delay = retry_delay
93 self.is_stub_installer = is_stub_installer
94 self.timeout_download = timeout
95 # this is the timeout used in requests.get. Unlike "auth",
96 # it does not work if we attach it on the session, so we handle
97 # it independently.
98 self.timeout_network = 60.
99
100 logging.basicConfig(format=' %(levelname)s | %(message)s')
101 self.logger = logging.getLogger(self.__module__)
102 self.logger.setLevel(log_level)
70 103
71 # build the base URL 104 # build the base URL
72 self.application = application 105 self.application = application
73 self.base_url = '/'.join([BASE_URL, self.application]) 106 self.base_url = '%s/' % urljoin(
74 107 base_url,
108 APPLICATIONS_TO_FTP_DIRECTORY.get(self.application, self.application )
109 )
110
111 if extension:
112 self.extension = extension
113 else:
114 if self.application in APPLICATIONS_MULTI_LOCALE and \
115 self.platform in ('win32', 'win64'):
116 # builds for APPLICATIONS_MULTI_LOCALE only exist in zip
117 self.extension = 'zip'
118 else:
119 self.extension = DEFAULT_FILE_EXTENSIONS[self.platform]
120
121 attempt = 0
122 while True:
123 attempt += 1
124 try:
125 self.get_build_info()
126 break
127 except (errors.NotFoundError, requests.exceptions.RequestException), e:
128 if self.retry_attempts > 0:
129 # Log only if multiple attempts are requested
130 self.logger.warning("Build not found: '%s'" % e.message)
131 self.logger.info('Will retry in %s seconds...' %
132 (self.retry_delay))
133 time.sleep(self.retry_delay)
134 self.logger.info("Retrying... (attempt %s)" % attempt)
135
136 if attempt >= self.retry_attempts:
137 if hasattr(e, 'response') and \
138 e.response.status_code == 404:
139 message = "Specified build has not been found"
140 raise errors.NotFoundError(message, e.response.url)
141 else:
142 raise
143
144 def _create_directory_parser(self, url):
145 return DirectoryParser(url,
146 session=self.session,
147 timeout=self.timeout_network)
75 148
76 @property 149 @property
77 def binary(self): 150 def binary(self):
78 """Return the name of the build""" 151 """Return the name of the build"""
79 152
80 if self._binary is None: 153 attempt = 0
81 # Retrieve all entries from the remote virtual folder 154
82 parser = DirectoryParser(self.path) 155 while self._binary is None:
83 if not parser.entries: 156 attempt += 1
84 raise NotFoundException('No entries found', self.path) 157 try:
85 158 # Retrieve all entries from the remote virtual folder
86 # Download the first matched directory entry 159 parser = self._create_directory_parser(self.path)
87 pattern = re.compile(self.binary_regex, re.IGNORECASE) 160 if not parser.entries:
88 for entry in parser.entries: 161 raise errors.NotFoundError('No entries found', self.path)
89 try: 162
90 self._binary = pattern.match(entry).group() 163 # Download the first matched directory entry
91 break 164 pattern = re.compile(self.binary_regex, re.IGNORECASE)
92 except: 165 for entry in parser.entries:
93 # No match, continue with next entry 166 try:
94 continue 167 self._binary = pattern.match(entry).group()
95 168 break
96 if self._binary is None: 169 except:
97 raise NotFoundException("Binary not found in folder", self.path) 170 # No match, continue with next entry
98 else: 171 continue
99 return self._binary 172 else:
100 173 raise errors.NotFoundError("Binary not found in folder",
174 self.path)
175 except (errors.NotFoundError, requests.exceptions.RequestException), e:
176 if self.retry_attempts > 0:
177 # Log only if multiple attempts are requested
178 self.logger.warning("Build not found: '%s'" % e.message)
179 self.logger.info('Will retry in %s seconds...' %
180 (self.retry_delay))
181 time.sleep(self.retry_delay)
182 self.logger.info("Retrying... (attempt %s)" % attempt)
183
184 if attempt >= self.retry_attempts:
185 if hasattr(e, 'response') and \
186 e.response.status_code == 404:
187 message = "Specified build has not been found"
188 raise errors.NotFoundError(message, self.path)
189 else:
190 raise
191
192 return self._binary
101 193
102 @property 194 @property
103 def binary_regex(self): 195 def binary_regex(self):
104 """Return the regex for the binary filename""" 196 """Return the regex for the binary filename"""
105 197
106 raise NotImplementedError(sys._getframe(0).f_code.co_name) 198 raise errors.NotImplementedError(sys._getframe(0).f_code.co_name)
107 199
108 200 @property
109 @property 201 def url(self):
110 def final_url(self): 202 """Return the URL of the build"""
111 """Return the final URL of the build""" 203
112 204 return urljoin(self.path, self.binary)
113 return '/'.join([self.path, self.binary])
114
115 205
116 @property 206 @property
117 def path(self): 207 def path(self):
118 """Return the path to the build""" 208 """Return the path to the build folder"""
119 209
120 return '/'.join([self.base_url, self.path_regex]) 210 return urljoin(self.base_url, self.path_regex)
121
122 211
123 @property 212 @property
124 def path_regex(self): 213 def path_regex(self):
125 """Return the regex for the path to the build""" 214 """Return the regex for the path to the build folder"""
126 215
127 raise NotImplementedError(sys._getframe(0).f_code.co_name) 216 raise errors.NotImplementedError(sys._getframe(0).f_code.co_name)
128
129 217
130 @property 218 @property
131 def platform_regex(self): 219 def platform_regex(self):
132 """Return the platform fragment of the URL""" 220 """Return the platform fragment of the URL"""
133 221
134 return PLATFORM_FRAGMENTS[self.platform]; 222 return PLATFORM_FRAGMENTS[self.platform]
135 223
136 224 @property
137 @property 225 def filename(self):
138 def target(self): 226 """Return the local filename of the build"""
139 """Return the target file name of the build""" 227
140 228 if self._filename is None:
141 if self._target is None: 229 if os.path.splitext(self.destination)[1]:
142 self._target = os.path.join(self.directory, 230 # If the filename has been given make use of it
143 self.build_filename(self.binary)) 231 target_file = self.destination
144 return self._target 232 else:
145 233 # Otherwise create it from the build details
234 target_file = os.path.join(self.destination,
235 self.build_filename(self.binary))
236
237 self._filename = os.path.abspath(target_file)
238
239 return self._filename
240
241 def get_build_info(self):
242 """Returns additional build information in subclasses if necessary"""
243 pass
146 244
147 def build_filename(self, binary): 245 def build_filename(self, binary):
148 """Return the proposed filename with extension for the binary""" 246 """Return the proposed filename with extension for the binary"""
149 247
150 raise NotImplementedError(sys._getframe(0).f_code.co_name) 248 raise errors.NotImplementedError(sys._getframe(0).f_code.co_name)
151
152 249
153 def detect_platform(self): 250 def detect_platform(self):
154 """Detect the current platform""" 251 """Detect the current platform"""
155 252
156 # For Mac and Linux 32bit we do not need the bits appended 253 # For Mac and Linux 32bit we do not need the bits appended
157 if mozinfo.os == 'mac' or (mozinfo.os == 'linux' and mozinfo.bits == 32) : 254 if mozinfo.os == 'mac' or \
255 (mozinfo.os == 'linux' and mozinfo.bits == 32):
158 return mozinfo.os 256 return mozinfo.os
159 else: 257 else:
160 return "%s%d" % (mozinfo.os, mozinfo.bits) 258 return "%s%d" % (mozinfo.os, mozinfo.bits)
161 259
162
163 def download(self): 260 def download(self):
164 """Download the specified file""" 261 """Download the specified file"""
165 262
166 attempts = 0 263 def total_seconds(td):
167 264 # Keep backward compatibility with Python 2.6 which doesn't have
168 if not os.path.isdir(self.directory): 265 # this method
169 os.makedirs(self.directory) 266 if hasattr(td, 'total_seconds'):
267 return td.total_seconds()
268 else:
269 return (td.microseconds +
270 (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 10 ** 6
271
272 attempt = 0
170 273
171 # Don't re-download the file 274 # Don't re-download the file
172 if os.path.isfile(os.path.abspath(self.target)): 275 if os.path.isfile(os.path.abspath(self.filename)):
173 print "File has already been downloaded: %s" % (self.target) 276 self.logger.info("File has already been downloaded: %s" %
174 return 277 (self.filename))
175 278 return self.filename
176 print 'Downloading from: %s' % (urllib.unquote(self.final_url)) 279
177 tmp_file = self.target + ".part" 280 directory = os.path.dirname(self.filename)
178 281 if not os.path.isdir(directory):
179 if self.authentication \ 282 os.makedirs(directory)
180 and self.authentication['username'] \ 283
181 and self.authentication['password']: 284 self.logger.info('Downloading from: %s' %
182 password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() 285 (urllib.unquote(self.url)))
183 password_mgr.add_password(None, 286 self.logger.info('Saving as: %s' % self.filename)
184 self.final_url, 287
185 self.authentication['username'], 288 tmp_file = self.filename + ".part"
186 self.authentication['password'])
187 handler = urllib2.HTTPBasicAuthHandler(password_mgr)
188 opener = urllib2.build_opener(urllib2.HTTPHandler, handler)
189 urllib2.install_opener(opener)
190 289
191 while True: 290 while True:
192 attempts += 1 291 attempt += 1
193 try: 292 try:
194 r = urllib2.urlopen(self.final_url) 293 start_time = datetime.now()
195 CHUNK = 16 * 1024 294
295 # Enable streaming mode so we can download content in chunks
296 r = self.session.get(self.url, stream=True)
297 r.raise_for_status()
298
299 content_length = r.headers.get('Content-length')
300 # ValueError: Value out of range if only total_size given
301 if content_length:
302 total_size = int(content_length.strip())
303 max_value = ((total_size / CHUNK_SIZE) + 1) * CHUNK_SIZE
304
305 bytes_downloaded = 0
306
307 log_level = self.logger.getEffectiveLevel()
308 if log_level <= logging.INFO and content_length:
309 widgets = [pb.Percentage(), ' ', pb.Bar(), ' ', pb.ETA(),
310 ' ', pb.FileTransferSpeed()]
311 pbar = pb.ProgressBar(widgets=widgets,
312 maxval=max_value).start()
313
196 with open(tmp_file, 'wb') as f: 314 with open(tmp_file, 'wb') as f:
197 for chunk in iter(lambda: r.read(CHUNK), ''): 315 for chunk in iter(lambda: r.raw.read(CHUNK_SIZE), ''):
198 f.write(chunk) 316 f.write(chunk)
317 bytes_downloaded += CHUNK_SIZE
318
319 if log_level <= logging.INFO and content_length:
320 pbar.update(bytes_downloaded)
321
322 t1 = total_seconds(datetime.now() - start_time)
323 if self.timeout_download and \
324 t1 >= self.timeout_download:
325 raise errors.TimeoutError
326
327 if log_level <= logging.INFO and content_length:
328 pbar.finish()
199 break 329 break
200 except (urllib2.HTTPError, urllib2.URLError): 330 except (requests.exceptions.RequestException, errors.TimeoutError), e:
201 if tmp_file and os.path.isfile(tmp_file): 331 if tmp_file and os.path.isfile(tmp_file):
202 os.remove(tmp_file) 332 os.remove(tmp_file)
203 print 'Download failed! Retrying... (attempt %s)' % attempts 333 if self.retry_attempts > 0:
204 if attempts >= self.retry_attempts: 334 # Log only if multiple attempts are requested
335 self.logger.warning('Download failed: "%s"' % str(e))
336 self.logger.info('Will retry in %s seconds...' %
337 (self.retry_delay))
338 time.sleep(self.retry_delay)
339 self.logger.info("Retrying... (attempt %s)" % attempt)
340 if attempt >= self.retry_attempts:
205 raise 341 raise
206 time.sleep(self.retry_delay) 342 time.sleep(self.retry_delay)
207 343
208 os.rename(tmp_file, self.target) 344 os.rename(tmp_file, self.filename)
345
346 return self.filename
347
348 def show_matching_builds(self, builds):
349 """Output the matching builds"""
350 self.logger.info('Found %s build%s: %s' % (
351 len(builds),
352 len(builds) > 1 and 's' or '',
353 len(builds) > 10 and
354 ' ... '.join([', '.join(builds[:5]), ', '.join(builds[-5:])]) or
355 ', '.join(builds)))
209 356
210 357
211 class DailyScraper(Scraper): 358 class DailyScraper(Scraper):
212 """Class to download a daily build from the Mozilla server""" 359 """Class to download a daily build from the Mozilla server"""
213 360
214 def __init__(self, branch='mozilla-central', build_id=None, date=None, 361 def __init__(self, branch='mozilla-central', build_id=None, date=None,
215 build_number=None, *args, **kwargs): 362 build_number=None, *args, **kwargs):
216 363
364 self.branch = branch
365 self.build_id = build_id
366 self.date = date
367 self.build_number = build_number
368
217 Scraper.__init__(self, *args, **kwargs) 369 Scraper.__init__(self, *args, **kwargs)
218 self.branch = branch 370
371 def get_build_info(self):
372 """Defines additional build information"""
219 373
220 # Internally we access builds via index 374 # Internally we access builds via index
221 if build_number is not None: 375 if self.build_number is not None:
222 self.build_index = int(build_number) - 1 376 self.build_index = int(self.build_number) - 1
223 else: 377 else:
224 self.build_index = None 378 self.build_index = None
225 379
226 if build_id: 380 if self.build_id:
227 # A build id has been specified. Split up its components so the date 381 # A build id has been specified. Split up its components so the
228 # and time can be extracted: '20111212042025' -> '2011-12-12 04:20:2 5' 382 # date and time can be extracted:
229 self.date = datetime.strptime(build_id, '%Y%m%d%H%M%S') 383 # '20111212042025' -> '2011-12-12 04:20:25'
230 self.builds, self.build_index = self.get_build_info_for_date(self.da te, 384 self.date = datetime.strptime(self.build_id, '%Y%m%d%H%M%S')
231 has_tim e=True)
232 385
233 elif date: 386 elif self.date:
234 # A date (without time) has been specified. Use its value and the 387 # A date (without time) has been specified. Use its value and the
235 # build index to find the requested build for that day. 388 # build index to find the requested build for that day.
236 self.date = datetime.strptime(date, '%Y-%m-%d') 389 try:
237 self.builds, self.build_index = self.get_build_info_for_date(self.da te, 390 self.date = datetime.strptime(self.date, '%Y-%m-%d')
238 build_i ndex=self.build_index) 391 except:
239 392 raise ValueError('%s is not a valid date' % self.date)
240 else: 393 else:
241 # If no build id nor date have been specified the lastest available 394 # If no build id nor date have been specified the latest available
242 # build of the given branch has to be identified. We also have to 395 # build of the given branch has to be identified. We also have to
243 # retrieve the date of the build via its build id. 396 # retrieve the date of the build via its build id.
244 url = '%s/nightly/latest-%s/' % (self.base_url, self.branch) 397 self.date = self.get_latest_build_date()
245 398
246 print 'Retrieving the build status file from %s' % url 399 self.builds, self.build_index = self.get_build_info_for_date(
247 parser = DirectoryParser(url) 400 self.date, self.build_index)
248 parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex)
249 if not parser.entries:
250 message = 'Status file for %s build cannot be found' % self.plat form_regex
251 raise NotFoundException(message, url)
252 401
253 # Read status file for the platform, retrieve build id, and convert to a date 402 def get_latest_build_date(self):
254 status_file = url + parser.entries[-1] 403 """ Returns date of latest available nightly build."""
255 f = urllib.urlopen(status_file) 404 if self.application not in ('fennec'):
256 self.date = datetime.strptime(f.readline().strip(), '%Y%m%d%H%M%S') 405 url = urljoin(self.base_url, 'nightly', 'latest-%s/' % self.branch)
257 self.builds, self.build_index = self.get_build_info_for_date(self.da te, 406 else:
258 has_tim e=True) 407 url = urljoin(self.base_url, 'nightly', 'latest-%s-%s/' %
408 (self.branch, self.platform))
259 409
410 self.logger.info('Retrieving the build status file from %s' % url)
411 parser = self._create_directory_parser(url)
412 parser.entries = parser.filter(r'.*%s\.txt' % self.platform_regex)
413 if not parser.entries:
414 message = 'Status file for %s build cannot be found' % \
415 self.platform_regex
416 raise errors.NotFoundError(message, url)
260 417
261 def get_build_info_for_date(self, date, has_time=False, build_index=None): 418 # Read status file for the platform, retrieve build id,
262 url = '/'.join([self.base_url, self.monthly_build_list_regex]) 419 # and convert to a date
420 headers = {'Cache-Control': 'max-age=0'}
263 421
264 print 'Retrieving list of builds from %s' % url 422 r = self.session.get(url + parser.entries[-1], headers=headers)
265 parser = DirectoryParser(url) 423 try:
266 regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s$' % { 424 r.raise_for_status()
267 'DATE': date.strftime('%Y-%m-%d'), 425
268 'BRANCH': self.branch, 426 return datetime.strptime(r.text.split('\n')[0], '%Y%m%d%H%M%S')
269 'L10N': '' if self.locale == 'en-US' else '-l10n'} 427 finally:
428 r.close()
429
430 def is_build_dir(self, folder_name):
431 """Return whether or not the given dir contains a build."""
432
433 # Cannot move up to base scraper due to parser.entries call in
434 # get_build_info_for_date (see below)
435
436 url = '%s/' % urljoin(self.base_url, self.monthly_build_list_regex, fold er_name)
437 if self.application in APPLICATIONS_MULTI_LOCALE \
438 and self.locale != 'multi':
439 url = '%s/' % urljoin(url, self.locale)
440
441 parser = self._create_directory_parser(url)
442
443 pattern = re.compile(self.binary_regex, re.IGNORECASE)
444 for entry in parser.entries:
445 try:
446 pattern.match(entry).group()
447 return True
448 except:
449 # No match, continue with next entry
450 continue
451 return False
452
453 def get_build_info_for_date(self, date, build_index=None):
454 url = urljoin(self.base_url, self.monthly_build_list_regex)
455 has_time = date and date.time()
456
457 self.logger.info('Retrieving list of builds from %s' % url)
458 parser = self._create_directory_parser(url)
459 regex = r'%(DATE)s-(\d+-)+%(BRANCH)s%(L10N)s%(PLATFORM)s$' % {
460 'DATE': date.strftime('%Y-%m-%d'),
461 'BRANCH': self.branch,
462 # ensure to select the correct subfolder for localized builds
463 'L10N': '' if self.locale in ('en-US', 'multi') else '(-l10n)?',
464 'PLATFORM': '' if self.application not in (
465 'fennec') else '-' + self.platform
466 }
467
270 parser.entries = parser.filter(regex) 468 parser.entries = parser.filter(regex)
271 if not parser.entries: 469 parser.entries = parser.filter(self.is_build_dir)
272 message = 'Folder for builds on %s has not been found' % self.date.s trftime('%Y-%m-%d')
273 raise NotFoundException(message, url)
274 470
275 if has_time: 471 if has_time:
276 # If a time is included in the date, use it to determine the build's index 472 # If a time is included in the date, use it to determine the
473 # build's index
277 regex = r'.*%s.*' % date.strftime('%H-%M-%S') 474 regex = r'.*%s.*' % date.strftime('%H-%M-%S')
278 build_index = parser.entries.index(parser.filter(regex)[0]) 475 parser.entries = parser.filter(regex)
279 else: 476
280 # If no index has been given, set it to the last build of the day. 477 if not parser.entries:
281 if build_index is None: 478 date_format = '%Y-%m-%d-%H-%M-%S' if has_time else '%Y-%m-%d'
282 build_index = len(parser.entries) - 1 479 message = 'Folder for builds on %s has not been found' % \
480 self.date.strftime(date_format)
481 raise errors.NotFoundError(message, url)
482
483 # If no index has been given, set it to the last build of the day.
484 self.show_matching_builds(parser.entries)
485 # If no index has been given, set it to the last build of the day.
486 if build_index is None:
487 # Find the most recent non-empty entry.
488 build_index = len(parser.entries)
489 for build in reversed(parser.entries):
490 build_index -= 1
491 if not build_index or self.is_build_dir(build):
492 break
493 self.logger.info('Selected build: %s' % parser.entries[build_index])
283 494
284 return (parser.entries, build_index) 495 return (parser.entries, build_index)
285 496
286
287 @property 497 @property
288 def binary_regex(self): 498 def binary_regex(self):
289 """Return the regex for the binary""" 499 """Return the regex for the binary"""
290 500
291 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s' 501 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s'
292 regex_suffix = {'linux': r'\.%(EXT)s$', 502 regex_suffix = {'android-api-9': r'\.%(EXT)s$',
503 'android-api-11': r'\.%(EXT)s$',
504 'android-x86': r'\.%(EXT)s$',
505 'linux': r'\.%(EXT)s$',
293 'linux64': r'\.%(EXT)s$', 506 'linux64': r'\.%(EXT)s$',
294 'mac': r'\.%(EXT)s$', 507 'mac': r'\.%(EXT)s$',
295 'mac64': r'\.%(EXT)s$', 508 'mac64': r'\.%(EXT)s$',
296 'win32': r'(\.installer)\.%(EXT)s$', 509 'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$',
297 'win64': r'(\.installer)\.%(EXT)s$'} 510 'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'}
298 regex = regex_base_name + regex_suffix[self.platform] 511 regex = regex_base_name + regex_suffix[self.platform]
299 512
300 return regex % {'APP': self.application, 513 return regex % {'APP': self.application,
301 'LOCALE': self.locale, 514 'LOCALE': self.locale,
302 'PLATFORM': self.platform_regex, 515 'PLATFORM': self.platform_regex,
303 'EXT': self.extension} 516 'EXT': self.extension,
304 517 'STUB': '-stub' if self.is_stub_installer else ''}
305 518
306 def build_filename(self, binary): 519 def build_filename(self, binary):
307 """Return the proposed filename with extension for the binary""" 520 """Return the proposed filename with extension for the binary"""
308 521
309 try: 522 try:
310 # Get exact timestamp of the build to build the local file name 523 # Get exact timestamp of the build to build the local file name
311 folder = self.builds[self.build_index] 524 folder = self.builds[self.build_index]
312 timestamp = re.search('([\d\-]+)-\D.*', folder).group(1) 525 timestamp = re.search('([\d\-]+)-\D.*', folder).group(1)
313 except: 526 except:
314 # If it's not available use the build's date 527 # If it's not available use the build's date
315 timestamp = self.date.strftime('%Y-%m-%d') 528 timestamp = self.date.strftime('%Y-%m-%d')
316 529
317 return '%(TIMESTAMP)s-%(BRANCH)s-%(NAME)s' % { 530 return '%(TIMESTAMP)s-%(BRANCH)s-%(NAME)s' % {
318 'TIMESTAMP': timestamp, 531 'TIMESTAMP': timestamp,
319 'BRANCH': self.branch, 532 'BRANCH': self.branch,
320 'NAME': binary} 533 'NAME': binary}
321
322 534
323 @property 535 @property
324 def monthly_build_list_regex(self): 536 def monthly_build_list_regex(self):
325 """Return the regex for the folder which contains the builds of a month. """ 537 """Return the regex for the folder containing builds of a month."""
326 538
327 # Regex for possible builds for the given date 539 # Regex for possible builds for the given date
328 return r'nightly/%(YEAR)s/%(MONTH)s/' % { 540 return r'nightly/%(YEAR)s/%(MONTH)s/' % {
329 'YEAR': self.date.year, 541 'YEAR': self.date.year,
330 'MONTH': str(self.date.month).zfill(2) } 542 'MONTH': str(self.date.month).zfill(2)}
331
332 543
333 @property 544 @property
334 def path_regex(self): 545 def path_regex(self):
335 """Return the regex for the path""" 546 """Return the regex for the path to the build folder"""
336 547
337 try: 548 try:
338 return self.monthly_build_list_regex + self.builds[self.build_index] 549 path = '%s/' % urljoin(self.monthly_build_list_regex,
550 self.builds[self.build_index])
551 if self.application in APPLICATIONS_MULTI_LOCALE \
552 and self.locale != 'multi':
553 path = '%s/' % urljoin(path, self.locale)
554 return path
339 except: 555 except:
340 raise NotFoundException("Specified sub folder cannot be found", 556 folder = urljoin(self.base_url, self.monthly_build_list_regex)
341 self.base_url + self.monthly_build_list_rege x) 557 raise errors.NotFoundError("Specified sub folder cannot be found",
558 folder)
342 559
343 560
344 class DirectScraper(Scraper): 561 class DirectScraper(Scraper):
345 """Class to download a file from a specified URL""" 562 """Class to download a file from a specified URL"""
346 563
347 def __init__(self, url, *args, **kwargs): 564 def __init__(self, url, *args, **kwargs):
565 self._url = url
566
348 Scraper.__init__(self, *args, **kwargs) 567 Scraper.__init__(self, *args, **kwargs)
349 568
350 self.url = url 569 @property
570 def filename(self):
571 if os.path.splitext(self.destination)[1]:
572 # If the filename has been given make use of it
573 target_file = self.destination
574 else:
575 # Otherwise determine it from the url.
576 parsed_url = urlparse(self.url)
577 source_filename = (parsed_url.path.rpartition('/')[-1] or
578 parsed_url.hostname)
579 target_file = os.path.join(self.destination, source_filename)
580
581 return os.path.abspath(target_file)
351 582
352 @property 583 @property
353 def target(self): 584 def url(self):
354 return urllib.splitquery(self.final_url)[0].rpartition('/')[-1] 585 return self._url
355
356 @property
357 def final_url(self):
358 return self.url
359 586
360 587
361 class ReleaseScraper(Scraper): 588 class ReleaseScraper(Scraper):
362 """Class to download a release build from the Mozilla server""" 589 """Class to download a release build from the Mozilla server"""
363 590
364 def __init__(self, *args, **kwargs): 591 def __init__(self, version, *args, **kwargs):
592 self.version = version
593
365 Scraper.__init__(self, *args, **kwargs) 594 Scraper.__init__(self, *args, **kwargs)
366 595
367 @property 596 @property
368 def binary_regex(self): 597 def binary_regex(self):
369 """Return the regex for the binary""" 598 """Return the regex for the binary"""
370 599
371 regex = {'linux': r'^%(APP)s-.*\.%(EXT)s$', 600 regex = {'linux': r'^%(APP)s-.*\.%(EXT)s$',
372 'linux64': r'^%(APP)s-.*\.%(EXT)s$', 601 'linux64': r'^%(APP)s-.*\.%(EXT)s$',
373 'mac': r'^%(APP)s.*\.%(EXT)s$', 602 'mac': r'^%(APP)s.*\.%(EXT)s$',
374 'mac64': r'^%(APP)s.*\.%(EXT)s$', 603 'mac64': r'^%(APP)s.*\.%(EXT)s$',
375 'win32': r'^%(APP)s.*\.%(EXT)s$', 604 'win32': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$',
376 'win64': r'^%(APP)s.*\.%(EXT)s$'} 605 'win64': r'^%(APP)s.*%(STUB)s.*\.%(EXT)s$'}
377 return regex[self.platform] % {'APP': self.application, 606 return regex[self.platform] % {
378 'EXT': self.extension} 607 'APP': self.application,
379 608 'EXT': self.extension,
609 'STUB': 'Stub' if self.is_stub_installer else ''}
380 610
381 @property 611 @property
382 def path_regex(self): 612 def path_regex(self):
383 """Return the regex for the path""" 613 """Return the regex for the path to the build folder"""
384 614
385 regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s' 615 regex = r'releases/%(VERSION)s/%(PLATFORM)s/%(LOCALE)s/'
386 return regex % {'LOCALE': self.locale, 616 return regex % {'LOCALE': self.locale,
387 'PLATFORM': self.platform_regex, 617 'PLATFORM': self.platform_regex,
388 'VERSION': self.version} 618 'VERSION': self.version}
389 619
620 @property
621 def platform_regex(self):
622 """Return the platform fragment of the URL"""
623
624 if self.platform == 'win64':
625 return self.platform
626
627 return PLATFORM_FRAGMENTS[self.platform]
390 628
391 def build_filename(self, binary): 629 def build_filename(self, binary):
392 """Return the proposed filename with extension for the binary""" 630 """Return the proposed filename with extension for the binary"""
393 631
394 template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s.%(EXT)s' 632 template = '%(APP)s-%(VERSION)s.%(LOCALE)s.%(PLATFORM)s%(STUB)s' \
633 '.%(EXT)s'
395 return template % {'APP': self.application, 634 return template % {'APP': self.application,
396 'VERSION': self.version, 635 'VERSION': self.version,
397 'LOCALE': self.locale, 636 'LOCALE': self.locale,
398 'PLATFORM': self.platform, 637 'PLATFORM': self.platform,
638 'STUB': '-stub' if self.is_stub_installer else '',
399 'EXT': self.extension} 639 'EXT': self.extension}
400 640
401 641
402 class ReleaseCandidateScraper(ReleaseScraper): 642 class ReleaseCandidateScraper(ReleaseScraper):
403 """Class to download a release candidate build from the Mozilla server""" 643 """Class to download a release candidate build from the Mozilla server"""
404 644
405 def __init__(self, build_number=None, no_unsigned=False, *args, **kwargs): 645 def __init__(self, version, build_number=None, *args, **kwargs):
646 self.version = version
647 self.build_number = build_number
648
406 Scraper.__init__(self, *args, **kwargs) 649 Scraper.__init__(self, *args, **kwargs)
407 650
651 def get_build_info(self):
652 """Defines additional build information"""
653
408 # Internally we access builds via index 654 # Internally we access builds via index
409 if build_number is not None: 655 url = urljoin(self.base_url, self.candidate_build_list_regex)
410 self.build_index = int(build_number) - 1 656 self.logger.info('Retrieving list of candidate builds from %s' % url)
657
658 parser = self._create_directory_parser(url)
659 if not parser.entries:
660 message = 'Folder for specific candidate builds at %s has not' \
661 'been found' % url
662 raise errors.NotFoundError(message, url)
663
664 self.show_matching_builds(parser.entries)
665 self.builds = parser.entries
666 self.build_index = len(parser.entries) - 1
667
668 if self.build_number and \
669 ('build%s' % self.build_number) in self.builds:
670 self.builds = ['build%s' % self.build_number]
671 self.build_index = 0
672 self.logger.info('Selected build: build%s' % self.build_number)
411 else: 673 else:
412 self.build_index = None 674 self.logger.info('Selected build: build%d' %
413 675 (self.build_index + 1))
414 self.builds, self.build_index = self.get_build_info_for_version(self.ver sion, self.build_index)
415
416 self.no_unsigned = no_unsigned
417 self.unsigned = False
418
419
420 def get_build_info_for_version(self, version, build_index=None):
421 url = '/'.join([self.base_url, self.candidate_build_list_regex])
422
423 print 'Retrieving list of candidate builds from %s' % url
424 parser = DirectoryParser(url)
425 if not parser.entries:
426 message = 'Folder for specific candidate builds at has not been foun d'
427 raise NotFoundException(message, url)
428
429 # If no index has been given, set it to the last build of the given vers ion.
430 if build_index is None:
431 build_index = len(parser.entries) - 1
432
433 return (parser.entries, build_index)
434
435 676
436 @property 677 @property
437 def candidate_build_list_regex(self): 678 def candidate_build_list_regex(self):
438 """Return the regex for the folder which contains the builds of 679 """Return the regex for the folder which contains the builds of
439 a candidate build.""" 680 a candidate build."""
440 681
441 # Regex for possible builds for the given date 682 # Regex for possible builds for the given date
442 return r'nightly/%(VERSION)s-candidates/' % { 683 return r'candidates/%(VERSION)s-candidates/' % {
443 'VERSION': self.version } 684 'VERSION': self.version}
444
445 685
446 @property 686 @property
447 def path_regex(self): 687 def path_regex(self):
448 """Return the regex for the path""" 688 """Return the regex for the path to the build folder"""
449 689
450 regex = r'%(PREFIX)s%(BUILD)s/%(UNSIGNED)s%(PLATFORM)s/%(LOCALE)s' 690 regex = r'%(PREFIX)s%(BUILD)s/%(PLATFORM)s/%(LOCALE)s/'
451 return regex % {'PREFIX': self.candidate_build_list_regex, 691 return regex % {'PREFIX': self.candidate_build_list_regex,
452 'BUILD': self.builds[self.build_index], 692 'BUILD': self.builds[self.build_index],
453 'LOCALE': self.locale, 693 'LOCALE': self.locale,
454 'PLATFORM': self.platform_regex, 694 'PLATFORM': self.platform_regex}
455 'UNSIGNED': "unsigned/" if self.unsigned else ""}
456 695
696 @property
697 def platform_regex(self):
698 """Return the platform fragment of the URL"""
699
700 if self.platform == 'win64':
701 return self.platform
702
703 return PLATFORM_FRAGMENTS[self.platform]
457 704
458 def build_filename(self, binary): 705 def build_filename(self, binary):
459 """Return the proposed filename with extension for the binary""" 706 """Return the proposed filename with extension for the binary"""
460 707
461 template = '%(APP)s-%(VERSION)s-build%(BUILD)s.%(LOCALE)s.%(PLATFORM)s.% (EXT)s' 708 template = '%(APP)s-%(VERSION)s-%(BUILD)s.%(LOCALE)s.' \
709 '%(PLATFORM)s%(STUB)s.%(EXT)s'
462 return template % {'APP': self.application, 710 return template % {'APP': self.application,
463 'VERSION': self.version, 711 'VERSION': self.version,
464 'BUILD': self.builds[self.build_index], 712 'BUILD': self.builds[self.build_index],
465 'LOCALE': self.locale, 713 'LOCALE': self.locale,
466 'PLATFORM': self.platform, 714 'PLATFORM': self.platform,
715 'STUB': '-stub' if self.is_stub_installer else '',
467 'EXT': self.extension} 716 'EXT': self.extension}
468 717
469
470 def download(self): 718 def download(self):
471 """Download the specified file""" 719 """Download the specified file"""
472 720
473 try: 721 try:
474 # Try to download the signed candidate build 722 # Try to download the signed candidate build
475 Scraper.download(self) 723 Scraper.download(self)
476 except NotFoundException, e: 724 except errors.NotFoundError, e:
477 print str(e) 725 self.logger.exception(str(e))
478
479 # If the signed build cannot be downloaded and unsigned builds are
480 # allowed, try to download the unsigned build instead
481 if self.no_unsigned:
482 raise
483 else:
484 print "Signed build has not been found. Falling back to unsigned build."
485 self.unsigned = True
486 Scraper.download(self)
487 726
488 727
489 class TinderboxScraper(Scraper): 728 class TinderboxScraper(Scraper):
490 """Class to download a tinderbox build from the Mozilla server. 729 """Class to download a tinderbox build from the Mozilla server.
491 730
492 There are two ways to specify a unique build: 731 There are two ways to specify a unique build:
493 1. If the date (%Y-%m-%d) is given and build_number is given where 732 1. If the date (%Y-%m-%d) is given and build_number is given where
494 the build_number is the index of the build on the date 733 the build_number is the index of the build on the date
495 2. If the build timestamp (UNIX) is given, and matches a specific build. 734 2. If the build timestamp (UNIX) is given, and matches a specific build.
496 """ 735 """
497 736
498 def __init__(self, branch='mozilla-central', build_number=None, date=None, 737 def __init__(self, branch='mozilla-central', build_number=None, date=None,
499 debug_build=False, *args, **kwargs): 738 debug_build=False, *args, **kwargs):
739
740 self.branch = branch
741 self.build_number = build_number
742 self.debug_build = debug_build
743 self.date = date
744
745 self.timestamp = None
746 # Currently any time in RelEng is based on the Pacific time zone.
747 self.timezone = PacificTimezone()
748
500 Scraper.__init__(self, *args, **kwargs) 749 Scraper.__init__(self, *args, **kwargs)
501 750
502 self.branch = branch 751 def get_build_info(self):
503 self.debug_build = debug_build 752 "Defines additional build information"
504 self.locale_build = self.locale != 'en-US'
505 self.timestamp = None
506
507 # Currently any time in RelEng is based on the Pacific time zone.
508 self.timezone = PacificTimezone();
509 753
510 # Internally we access builds via index 754 # Internally we access builds via index
511 if build_number is not None: 755 if self.build_number is not None:
512 self.build_index = int(build_number) - 1 756 self.build_index = int(self.build_number) - 1
513 else: 757 else:
514 self.build_index = None 758 self.build_index = None
515 759
516 if date is not None: 760 if self.date is not None:
517 try: 761 try:
518 self.date = datetime.fromtimestamp(float(date), self.timezone) 762 # date is provided in the format 2013-07-23
519 self.timestamp = date 763 self.date = datetime.strptime(self.date, '%Y-%m-%d')
520 except: 764 except:
521 self.date = datetime.strptime(date, '%Y-%m-%d') 765 try:
522 else: 766 # date is provided as a unix timestamp
523 self.date = None 767 datetime.fromtimestamp(float(self.date))
768 self.timestamp = self.date
769 except:
770 raise ValueError('%s is not a valid date' % self.date)
524 771
772 self.locale_build = self.locale != 'en-US'
525 # For localized builds we do not have to retrieve the list of builds 773 # For localized builds we do not have to retrieve the list of builds
526 # because only the last build is available 774 # because only the last build is available
527 if not self.locale_build: 775 if not self.locale_build:
528 self.builds, self.build_index = self.get_build_info(self.build_index ) 776 self.builds, self.build_index = self.get_build_info_for_index(
529 777 self.build_index)
530 try:
531 self.timestamp = self.builds[self.build_index]
532 except:
533 raise NotFoundException("Specified sub folder cannot be found",
534 self.base_url + self.monthly_build_list_ regex)
535
536 778
537 @property 779 @property
538 def binary_regex(self): 780 def binary_regex(self):
539 """Return the regex for the binary""" 781 """Return the regex for the binary"""
540 782
541 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.' 783 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s'
542 regex_suffix = {'linux': r'.*\.%(EXT)s$', 784 regex_suffix = {'linux': r'.*\.%(EXT)s$',
543 'linux64': r'.*\.%(EXT)s$', 785 'linux64': r'.*\.%(EXT)s$',
544 'mac': r'.*\.%(EXT)s$', 786 'mac': r'.*\.%(EXT)s$',
545 'mac64': r'.*\.%(EXT)s$', 787 'mac64': r'.*\.%(EXT)s$',
546 'win32': r'.*(\.installer)\.%(EXT)s$', 788 'win32': r'(\.installer%(STUB)s)?\.%(EXT)s$',
547 'win64': r'.*(\.installer)\.%(EXT)s$'} 789 'win64': r'(\.installer%(STUB)s)?\.%(EXT)s$'}
548 790
549 regex = regex_base_name + regex_suffix[self.platform] 791 regex = regex_base_name + regex_suffix[self.platform]
550 792
551 return regex % {'APP': self.application, 793 return regex % {'APP': self.application,
552 'LOCALE': self.locale, 794 'LOCALE': self.locale,
795 'PLATFORM': PLATFORM_FRAGMENTS[self.platform],
796 'STUB': '-stub' if self.is_stub_installer else '',
553 'EXT': self.extension} 797 'EXT': self.extension}
554 798
555
556 def build_filename(self, binary): 799 def build_filename(self, binary):
557 """Return the proposed filename with extension for the binary""" 800 """Return the proposed filename with extension for the binary"""
558 801
559 return '%(TIMESTAMP)s%(BRANCH)s%(DEBUG)s-%(NAME)s' % { 802 return '%(TIMESTAMP)s%(BRANCH)s%(DEBUG)s-%(NAME)s' % {
560 'TIMESTAMP': self.timestamp + '-' if self.timestamp else '', 803 'TIMESTAMP': self.timestamp + '-' if self.timestamp else '',
561 'BRANCH': self.branch, 804 'BRANCH': self.branch,
562 'DEBUG': '-debug' if self.debug_build else '', 805 'DEBUG': '-debug' if self.debug_build else '',
563 'NAME': binary} 806 'NAME': binary}
564
565 807
566 @property 808 @property
567 def build_list_regex(self): 809 def build_list_regex(self):
568 """Return the regex for the folder which contains the list of builds""" 810 """Return the regex for the folder which contains the list of builds"""
569 811
570 regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s' 812 regex = 'tinderbox-builds/%(BRANCH)s-%(PLATFORM)s%(L10N)s%(DEBUG)s/'
571 813
572 return regex % {'BRANCH': self.branch, 814 return regex % {
573 'PLATFORM': '' if self.locale_build else self.platform_r egex, 815 'BRANCH': self.branch,
574 'L10N': 'l10n' if self.locale_build else '', 816 'PLATFORM': '' if self.locale_build else self.platform_regex,
575 'DEBUG': '-debug' if self.debug_build else ''} 817 'L10N': 'l10n' if self.locale_build else '',
576 818 'DEBUG': '-debug' if self.debug_build else ''}
577 819
578 def date_matches(self, timestamp): 820 def date_matches(self, timestamp):
579 """Determines whether the timestamp date is equal to the argument date"" " 821 """
822 Determines whether the timestamp date is equal to the argument date
823 """
580 824
581 if self.date is None: 825 if self.date is None:
582 return False 826 return False
583 827
584 timestamp = datetime.fromtimestamp(float(timestamp), self.timezone) 828 timestamp = datetime.fromtimestamp(float(timestamp), self.timezone)
585 if self.date.date() == timestamp.date(): 829 if self.date.date() == timestamp.date():
586 return True 830 return True
587 831
588 return False 832 return False
589 833
590
591 @property
592 def date_validation_regex(self):
593 """Return the regex for a valid date argument value"""
594
595 return r'^\d{4}-\d{1,2}-\d{1,2}$|^\d+$'
596
597
598 def detect_platform(self): 834 def detect_platform(self):
599 """Detect the current platform""" 835 """Detect the current platform"""
600 836
601 platform = Scraper.detect_platform(self) 837 platform = Scraper.detect_platform(self)
602 838
603 # On OS X we have to special case the platform detection code and fallba ck 839 # On OS X we have to special case the platform detection code and
604 # to 64 bit builds for the en-US locale 840 # fallback to 64 bit builds for the en-US locale
605 if mozinfo.os == 'mac' and self.locale == 'en-US' and mozinfo.bits == 64 : 841 if mozinfo.os == 'mac' and self.locale == 'en-US' and \
842 mozinfo.bits == 64:
606 platform = "%s%d" % (mozinfo.os, mozinfo.bits) 843 platform = "%s%d" % (mozinfo.os, mozinfo.bits)
607 844
608 return platform 845 return platform
609 846
847 def is_build_dir(self, folder_name):
848 """Return whether or not the given dir contains a build."""
610 849
611 def get_build_info(self, build_index=None): 850 # Cannot move up to base scraper due to parser.entries call in
612 url = '/'.join([self.base_url, self.build_list_regex]) 851 # get_build_info_for_index (see below)
852 url = '%s/' % urljoin(self.base_url, self.build_list_regex, folder_name)
613 853
614 print 'Retrieving list of builds from %s' % url 854 if self.application in APPLICATIONS_MULTI_LOCALE \
855 and self.locale != 'multi':
856 url = '%s/' % urljoin(url, self.locale)
615 857
616 # If a timestamp is given, retrieve just that build 858 parser = self._create_directory_parser(url)
617 regex = '^' + self.timestamp + '$' if self.timestamp else r'^\d+$'
618 859
619 parser = DirectoryParser(url) 860 pattern = re.compile(self.binary_regex, re.IGNORECASE)
620 parser.entries = parser.filter(regex) 861 for entry in parser.entries:
862 try:
863 pattern.match(entry).group()
864 return True
865 except:
866 # No match, continue with next entry
867 continue
868 return False
621 869
622 # If date is given, retrieve the subset of builds on that date 870 def get_build_info_for_index(self, build_index=None):
623 if self.date is not None: 871 url = urljoin(self.base_url, self.build_list_regex)
872
873 self.logger.info('Retrieving list of builds from %s' % url)
874 parser = self._create_directory_parser(url)
875 parser.entries = parser.filter(r'^\d+$')
876
877 if self.timestamp:
878 # If a timestamp is given, retrieve the folder with the timestamp
879 # as name
880 parser.entries = self.timestamp in parser.entries and \
881 [self.timestamp]
882
883 elif self.date:
884 # If date is given, retrieve the subset of builds on that date
624 parser.entries = filter(self.date_matches, parser.entries) 885 parser.entries = filter(self.date_matches, parser.entries)
625 886
626 if not parser.entries: 887 if not parser.entries:
627 message = 'No builds have been found' 888 message = 'No builds have been found'
628 raise NotFoundException(message, url) 889 raise errors.NotFoundError(message, url)
890
891 self.show_matching_builds(parser.entries)
629 892
630 # If no index has been given, set it to the last build of the day. 893 # If no index has been given, set it to the last build of the day.
631 if build_index is None: 894 if build_index is None:
632 build_index = len(parser.entries) - 1 895 # Find the most recent non-empty entry.
896 build_index = len(parser.entries)
897 for build in reversed(parser.entries):
898 build_index -= 1
899 if not build_index or self.is_build_dir(build):
900 break
901
902 self.logger.info('Selected build: %s' % parser.entries[build_index])
633 903
634 return (parser.entries, build_index) 904 return (parser.entries, build_index)
635 905
636
637 @property 906 @property
638 def path_regex(self): 907 def path_regex(self):
639 """Return the regex for the path""" 908 """Return the regex for the path to the build folder"""
640 909
641 if self.locale_build: 910 if self.locale_build:
642 return self.build_list_regex 911 return self.build_list_regex
643 912
644 return '/'.join([self.build_list_regex, self.builds[self.build_index]]) 913 return '%s/' % urljoin(self.build_list_regex, self.builds[self.build_ind ex])
645
646 914
647 @property 915 @property
648 def platform_regex(self): 916 def platform_regex(self):
649 """Return the platform fragment of the URL""" 917 """Return the platform fragment of the URL"""
650 918
651 PLATFORM_FRAGMENTS = {'linux': 'linux', 919 PLATFORM_FRAGMENTS = {'linux': 'linux',
652 'linux64': 'linux64', 920 'linux64': 'linux64',
653 'mac': 'macosx', 921 'mac': 'macosx64',
654 'mac64': 'macosx64', 922 'mac64': 'macosx64',
655 'win32': 'win32', 923 'win32': 'win32',
656 'win64': 'win64'} 924 'win64': 'win64'}
657 925
658 return PLATFORM_FRAGMENTS[self.platform] 926 return PLATFORM_FRAGMENTS[self.platform]
659 927
660 928
661 def cli(): 929 class TryScraper(Scraper):
662 """Main function for the downloader""" 930 "Class to download a try build from the Mozilla server."
663 931
664 BUILD_TYPES = {'release': ReleaseScraper, 932 def __init__(self, changeset=None, debug_build=False, *args, **kwargs):
665 'candidate': ReleaseCandidateScraper,
666 'daily': DailyScraper,
667 'tinderbox': TinderboxScraper }
668 933
669 usage = 'usage: %prog [options]' 934 self.debug_build = debug_build
670 parser = OptionParser(usage=usage, description=__doc__) 935 self.changeset = changeset
671 parser.add_option('--application', '-a',
672 dest='application',
673 choices=APPLICATIONS,
674 default='firefox',
675 metavar='APPLICATION',
676 help='The name of the application to download, '
677 'default: "%default"')
678 parser.add_option('--directory', '-d',
679 dest='directory',
680 default=os.getcwd(),
681 metavar='DIRECTORY',
682 help='Target directory for the download, default: '
683 'current working directory')
684 parser.add_option('--build-number',
685 dest='build_number',
686 default=None,
687 type="int",
688 metavar='BUILD_NUMBER',
689 help='Number of the build (for candidate, daily, '
690 'and tinderbox builds)')
691 parser.add_option('--locale', '-l',
692 dest='locale',
693 default='en-US',
694 metavar='LOCALE',
695 help='Locale of the application, default: "%default"')
696 parser.add_option('--platform', '-p',
697 dest='platform',
698 choices=PLATFORM_FRAGMENTS.keys(),
699 metavar='PLATFORM',
700 help='Platform of the application')
701 parser.add_option('--type', '-t',
702 dest='type',
703 choices=BUILD_TYPES.keys(),
704 default='release',
705 metavar='BUILD_TYPE',
706 help='Type of build to download, default: "%default"')
707 parser.add_option('--url',
708 dest='url',
709 default=None,
710 metavar='URL',
711 help='URL to download.')
712 parser.add_option('--version', '-v',
713 dest='version',
714 metavar='VERSION',
715 help='Version of the application to be used by release and \
716 candidate builds, i.e. "3.6"')
717 parser.add_option('--extension',
718 dest='extension',
719 default=None,
720 metavar='EXTENSION',
721 help='File extension of the build (e.g. "zip"), default:\
722 the standard build extension on the platform.')
723 parser.add_option('--username',
724 dest='username',
725 default=None,
726 metavar='USERNAME',
727 help='Username for basic HTTP authentication.')
728 parser.add_option('--password',
729 dest='password',
730 default=None,
731 metavar='PASSWORD',
732 help='Password for basic HTTP authentication.')
733 parser.add_option('--retry-attempts',
734 dest='retry_attempts',
735 default=3,
736 type=int,
737 metavar='RETRY_ATTEMPTS',
738 help='Number of times the download will be attempted in '
739 'the event of a failure, default: %default')
740 parser.add_option('--retry-delay',
741 dest='retry_delay',
742 default=10,
743 type=int,
744 metavar='RETRY_DELAY',
745 help='Amount of time (in seconds) to wait between retry '
746 'attempts, default: %default')
747 936
748 # Option group for candidate builds 937 Scraper.__init__(self, *args, **kwargs)
749 group = OptionGroup(parser, "Candidate builds",
750 "Extra options for candidate builds.")
751 group.add_option('--no-unsigned',
752 dest='no_unsigned',
753 action="store_true",
754 help="Don't allow to download unsigned builds if signed\
755 builds are not available")
756 parser.add_option_group(group)
757 938
758 # Option group for daily builds 939 def get_build_info(self):
759 group = OptionGroup(parser, "Daily builds", 940 "Defines additional build information"
760 "Extra options for daily builds.")
761 group.add_option('--branch',
762 dest='branch',
763 default='mozilla-central',
764 metavar='BRANCH',
765 help='Name of the branch, default: "%default"')
766 group.add_option('--build-id',
767 dest='build_id',
768 default=None,
769 metavar='BUILD_ID',
770 help='ID of the build to download')
771 group.add_option('--date',
772 dest='date',
773 default=None,
774 metavar='DATE',
775 help='Date of the build, default: latest build')
776 parser.add_option_group(group)
777 941
778 # Option group for tinderbox builds 942 self.builds, self.build_index = self.get_build_info_for_index()
779 group = OptionGroup(parser, "Tinderbox builds",
780 "Extra options for tinderbox builds.")
781 group.add_option('--debug-build',
782 dest='debug_build',
783 action="store_true",
784 help="Download a debug build")
785 parser.add_option_group(group)
786 943
787 # TODO: option group for nightly builds 944 @property
788 (options, args) = parser.parse_args() 945 def binary_regex(self):
946 """Return the regex for the binary"""
789 947
790 # Check for required options and arguments 948 regex_base_name = r'^%(APP)s-.*\.%(LOCALE)s\.%(PLATFORM)s'
791 # Note: Will be optional when ini file support has been landed 949 regex_suffix = {'linux': r'.*\.%(EXT)s$',
792 if not options.url \ 950 'linux64': r'.*\.%(EXT)s$',
793 and not options.type in ['daily', 'tinderbox'] \ 951 'mac': r'.*\.%(EXT)s$',
794 and not options.version: 952 'mac64': r'.*\.%(EXT)s$',
795 parser.error('The version of the application to download has not been sp ecified.') 953 'win32': r'.*(\.installer%(STUB)s)\.%(EXT)s$',
954 'win64': r'.*(\.installer%(STUB)s)\.%(EXT)s$'}
796 955
797 # Instantiate scraper and download the build 956 regex = regex_base_name + regex_suffix[self.platform]
798 scraper_keywords = {'application': options.application,
799 'locale': options.locale,
800 'platform': options.platform,
801 'version': options.version,
802 'directory': options.directory,
803 'extension': options.extension,
804 'authentication': {
805 'username': options.username,
806 'password': options.password},
807 'retry_attempts': options.retry_attempts,
808 'retry_delay': options.retry_delay}
809 scraper_options = {'candidate': {
810 'build_number': options.build_number,
811 'no_unsigned': options.no_unsigned},
812 'daily': {
813 'branch': options.branch,
814 'build_number': options.build_number,
815 'build_id': options.build_id,
816 'date': options.date},
817 'tinderbox': {
818 'branch': options.branch,
819 'build_number': options.build_number,
820 'date': options.date,
821 'debug_build': options.debug_build}
822 }
823 957
824 kwargs = scraper_keywords.copy() 958 return regex % {'APP': self.application,
825 kwargs.update(scraper_options.get(options.type, {})) 959 'LOCALE': self.locale,
960 'PLATFORM': PLATFORM_FRAGMENTS[self.platform],
961 'STUB': '-stub' if self.is_stub_installer else '',
962 'EXT': self.extension}
826 963
827 if options.url: 964 def build_filename(self, binary):
828 build = DirectScraper(options.url, **kwargs) 965 """Return the proposed filename with extension for the binary"""
829 else:
830 build = BUILD_TYPES[options.type](**kwargs)
831 966
832 build.download() 967 return '%(CHANGESET)s%(DEBUG)s-%(NAME)s' % {
968 'CHANGESET': self.changeset,
969 'DEBUG': '-debug' if self.debug_build else '',
970 'NAME': binary}
833 971
834 if __name__ == "__main__": 972 @property
835 cli() 973 def build_list_regex(self):
974 """Return the regex for the folder which contains the list of builds"""
975
976 return 'try-builds/'
977
978 def detect_platform(self):
979 """Detect the current platform"""
980
981 platform = Scraper.detect_platform(self)
982
983 # On OS X we have to special case the platform detection code and
984 # fallback to 64 bit builds for the en-US locale
985 if mozinfo.os == 'mac' and self.locale == 'en-US' and \
986 mozinfo.bits == 64:
987 platform = "%s%d" % (mozinfo.os, mozinfo.bits)
988
989 return platform
990
991 def get_build_info_for_index(self, build_index=None):
992 url = urljoin(self.base_url, self.build_list_regex)
993
994 self.logger.info('Retrieving list of builds from %s' % url)
995 parser = self._create_directory_parser(url)
996 parser.entries = parser.filter('.*-%s$' % self.changeset)
997
998 if not parser.entries:
999 raise errors.NotFoundError('No builds have been found', url)
1000
1001 self.show_matching_builds(parser.entries)
1002
1003 self.logger.info('Selected build: %s' % parser.entries[0])
1004
1005 return (parser.entries, 0)
1006
1007 @property
1008 def path_regex(self):
1009 """Return the regex for the path to the build folder"""
1010
1011 build_dir = 'try-%(PLATFORM)s%(DEBUG)s/' % {
1012 'PLATFORM': self.platform_regex,
1013 'DEBUG': '-debug' if self.debug_build else ''}
1014 return urljoin(self.build_list_regex,
1015 self.builds[self.build_index],
1016 build_dir)
1017
1018 @property
1019 def platform_regex(self):
1020 """Return the platform fragment of the URL"""
1021
1022 PLATFORM_FRAGMENTS = {'linux': 'linux',
1023 'linux64': 'linux64',
1024 'mac': 'macosx64',
1025 'mac64': 'macosx64',
1026 'win32': 'win32',
1027 'win64': 'win64'}
1028
1029 return PLATFORM_FRAGMENTS[self.platform]
OLDNEW
« no previous file with comments | « mozdownload/parser.py ('k') | mozdownload/timezones.py » ('j') | setup.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698