OLD | NEW |
(Empty) | |
| 1 # Copyright (c) 2010 The Chromium OS Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 # |
| 5 # Adapted from portage/getbinpkg.py -- Portage binary-package helper functions |
| 6 # Copyright 2003-2004 Gentoo Foundation |
| 7 # Distributed under the terms of the GNU General Public License v2 |
| 8 |
| 9 import operator |
| 10 import os |
| 11 import tempfile |
| 12 import time |
| 13 import urllib2 |
| 14 import urlparse |
| 15 |
| 16 class PackageIndex(object): |
| 17 """A parser for the Portage Packages index file. |
| 18 |
| 19 The Portage Packages index file serves to keep track of what packages are |
| 20 included in a tree. It contains the following sections: |
| 21 1) The header. The header tracks general key/value pairs that don't apply |
| 22 to any specific package. E.g., it tracks the base URL of the packages |
| 23 file, and the number of packages included in the file. The header is |
| 24 terminated by a blank line. |
| 25 2) The body. The body is a list of packages. Each package contains a list |
| 26 of key/value pairs. Packages are either terminated by a blank line or |
| 27 by the end of the file. Every package has a CPV entry, which serves as |
| 28 a unique identifier for the package. |
| 29 """ |
| 30 |
| 31 def __init__(self): |
| 32 """Constructor.""" |
| 33 |
| 34 # The header tracks general key/value pairs that don't apply to any |
| 35 # specific package. E.g., it tracks the base URL of the packages. |
| 36 self.header = {} |
| 37 |
| 38 # A list of packages (stored as a list of dictionaries). |
| 39 self.packages = [] |
| 40 |
| 41 # Whether or not the PackageIndex has been modified since the last time it |
| 42 # was written. |
| 43 self.modified = False |
| 44 |
| 45 def _PopulateDuplicateDB(self, db): |
| 46 """Populate db with SHA1 -> URL mapping for packages. |
| 47 |
| 48 Args: |
| 49 db: Dictionary to populate with SHA1 -> URL mapping for packages. |
| 50 """ |
| 51 |
| 52 uri = self.header['URI'] |
| 53 for pkg in self.packages: |
| 54 cpv, sha1 = pkg['CPV'], pkg['SHA1'] |
| 55 path = pkg.get('PATH', cpv + '.tbz2') |
| 56 db[sha1] = urlparse.urljoin(uri, path) |
| 57 |
| 58 def _ReadPkgIndex(self, pkgfile): |
| 59 """Read a list of key/value pairs from the Packages file into a dictionary. |
| 60 |
| 61 Both header entries and package entries are lists of key/value pairs, so |
| 62 they can both be read by this function. Entries can be terminated by empty |
| 63 lines or by the end of the file. |
| 64 |
| 65 This function will read lines from the specified file until it encounters |
| 66 the a blank line or the end of the file. |
| 67 |
| 68 Keys and values in the Packages file are separated by a colon and a space. |
| 69 Keys may contain capital letters, numbers, and underscores, but may not |
| 70 contain colons. Values may contain any character except a newline. In |
| 71 particular, it is normal for values to contain colons. |
| 72 |
| 73 Lines that have content, and do not contain a valid key/value pair, are |
| 74 ignored. This is for compatibility with the Portage package parser, and |
| 75 to allow for future extensions to the Packages file format. |
| 76 |
| 77 All entries must contain at least one key/value pair. If the end of the |
| 78 fils is reached, an empty dictionary is returned. |
| 79 |
| 80 Args: |
| 81 pkgfile: A python file object. |
| 82 |
| 83 Returns the dictionary of key-value pairs that was read from the file. |
| 84 """ |
| 85 d = {} |
| 86 for line in pkgfile: |
| 87 line = line.rstrip('\n') |
| 88 if not line: |
| 89 assert d, 'Packages entry must contain at least one key/value pair' |
| 90 break |
| 91 line = line.split(': ', 1) |
| 92 if len(line) == 2: |
| 93 k, v = line |
| 94 d[k] = v |
| 95 return d |
| 96 |
| 97 def _WritePkgIndex(self, pkgfile, entry): |
| 98 """Write header entry or package entry to packages file. |
| 99 |
| 100 The keys and values will be separated by a colon and a space. The entry |
| 101 will be terminated by a blank line. |
| 102 |
| 103 Args: |
| 104 pkgfile: A python file object. |
| 105 entry: A dictionary of the key/value pairs to write. |
| 106 """ |
| 107 lines = ['%s: %s' % (k, v) for k, v in sorted(entry.items()) if v] |
| 108 pkgfile.write('%s\n\n' % '\n'.join(lines)) |
| 109 |
| 110 def _ReadHeader(self, pkgfile): |
| 111 """Read header of packages file. |
| 112 |
| 113 Args: |
| 114 pkgfile: A python file object. |
| 115 """ |
| 116 assert not self.header, 'Should only read header once.' |
| 117 self.header = self._ReadPkgIndex(pkgfile) |
| 118 |
| 119 def _ReadBody(self, pkgfile): |
| 120 """Read body of packages file. |
| 121 |
| 122 Before calling this function, you must first read the header (using |
| 123 _ReadHeader). |
| 124 |
| 125 Args: |
| 126 pkgfile: A python file object. |
| 127 """ |
| 128 assert self.header, 'Should read header first.' |
| 129 assert not self.packages, 'Should only read body once.' |
| 130 |
| 131 # Read all of the sections in the body by looping until we reach the end |
| 132 # of the file. |
| 133 while True: |
| 134 d = self._ReadPkgIndex(pkgfile) |
| 135 if not d: |
| 136 break |
| 137 if 'CPV' in d: |
| 138 self.packages.append(d) |
| 139 |
| 140 def Read(self, pkgfile): |
| 141 """Read the entire packages file. |
| 142 |
| 143 Args: |
| 144 pkgfile: A python file object. |
| 145 """ |
| 146 self._ReadHeader(pkgfile) |
| 147 self._ReadBody(pkgfile) |
| 148 |
| 149 def RemoveFilteredPackages(self, filter_fn): |
| 150 """Remove packages which match filter_fn. |
| 151 |
| 152 Args: |
| 153 filter_fn: A function which operates on packages. If it returns True, |
| 154 the package should be removed. |
| 155 """ |
| 156 |
| 157 filtered = [p for p in self.packages if not filter_fn(p)] |
| 158 if filtered != self.packages: |
| 159 self.modified = True |
| 160 self.packages = filtered |
| 161 |
| 162 def ResolveDuplicateUploads(self, pkgindexes): |
| 163 """Point packages at files that have already been uploaded. |
| 164 |
| 165 For each package in our index, check if there is an existing package that |
| 166 has already been uploaded to the same base URI. If so, point that package |
| 167 at the existing file, so that we don't have to upload the file. |
| 168 |
| 169 Args: |
| 170 pkgindexes: A list of PackageIndex objects containing info about packages |
| 171 that have already been uploaded. |
| 172 |
| 173 Returns: |
| 174 A list of the packages that still need to be uploaded. |
| 175 """ |
| 176 db = {} |
| 177 for pkgindex in pkgindexes: |
| 178 pkgindex._PopulateDuplicateDB(db) |
| 179 |
| 180 uploads = [] |
| 181 base_uri = self.header['URI'] |
| 182 for pkg in self.packages: |
| 183 sha1 = pkg['SHA1'] |
| 184 uri = db.get(sha1) |
| 185 if uri and uri.startswith(base_uri): |
| 186 pkg['PATH'] = uri[len(base_uri):].lstrip('/') |
| 187 else: |
| 188 uploads.append(pkg) |
| 189 return uploads |
| 190 |
| 191 def SetUploadLocation(self, base_uri, path_prefix): |
| 192 """Set upload location to base_uri + path_prefix. |
| 193 |
| 194 Args: |
| 195 base_uri: Base URI for all packages in the file. We set |
| 196 self.header['URI'] to this value, so all packages must live under |
| 197 this directory. |
| 198 path_prefix: Path prefix to use for all current packages in the file. |
| 199 This will be added to the beginning of the path for every package. |
| 200 """ |
| 201 self.header['URI'] = base_uri |
| 202 for pkg in self.packages: |
| 203 pkg['PATH'] = urlparse.urljoin(path_prefix, pkg['CPV'] + '.tbz2') |
| 204 |
| 205 def Write(self, pkgfile): |
| 206 """Write a packages file to disk. |
| 207 |
| 208 If 'modified' flag is set, the TIMESTAMP and PACKAGES fields in the header |
| 209 will be updated before writing to disk. |
| 210 |
| 211 Args: |
| 212 pkgfile: A python file object. |
| 213 """ |
| 214 if self.modified: |
| 215 self.header['TIMESTAMP'] = str(long(time.time())) |
| 216 self.header['PACKAGES'] = str(len(self.packages)) |
| 217 self.modified = False |
| 218 self._WritePkgIndex(pkgfile, self.header) |
| 219 for metadata in sorted(self.packages, key=operator.itemgetter('CPV')): |
| 220 self._WritePkgIndex(pkgfile, metadata) |
| 221 |
| 222 def WriteToNamedTemporaryFile(self): |
| 223 """Write pkgindex to a temporary file. |
| 224 |
| 225 Args: |
| 226 pkgindex: The PackageIndex object. |
| 227 |
| 228 Returns: |
| 229 A temporary file containing the packages from pkgindex. |
| 230 """ |
| 231 f = tempfile.NamedTemporaryFile() |
| 232 self.Write(f) |
| 233 f.flush() |
| 234 f.seek(0) |
| 235 return f |
| 236 |
| 237 |
| 238 def _RetryUrlOpen(url, tries=3): |
| 239 """Open the specified url, retrying if we run into temporary errors. |
| 240 |
| 241 We retry for both network errors and 5xx Server Errors. We do not retry |
| 242 for HTTP errors with a non-5xx code. |
| 243 |
| 244 Args: |
| 245 url: The specified url. |
| 246 tries: The number of times to try. |
| 247 |
| 248 Returns: |
| 249 The result of urllib2.urlopen(url). |
| 250 """ |
| 251 for i in range(tries): |
| 252 try: |
| 253 return urllib2.urlopen(url) |
| 254 except urllib2.HTTPError as e: |
| 255 if i + 1 >= tries or e.code < 500: |
| 256 raise |
| 257 else: |
| 258 print 'Cannot GET %s: %s' % (url, str(e)) |
| 259 except urllib2.URLError as e: |
| 260 if i + 1 >= tries: |
| 261 raise |
| 262 else: |
| 263 print 'Cannot GET %s: %s' % (url, str(e)) |
| 264 print 'Sleeping for 10 seconds before retrying...' |
| 265 time.sleep(10) |
| 266 |
| 267 |
| 268 def GrabRemotePackageIndex(binhost_url): |
| 269 """Grab the latest binary package database from the specified URL. |
| 270 |
| 271 Args: |
| 272 binhost_url: Base URL of remote packages (PORTAGE_BINHOST). |
| 273 |
| 274 Returns: |
| 275 A PackageIndex object, if the Packages file can be retrieved. If the |
| 276 server returns status code 404, None is returned. |
| 277 """ |
| 278 |
| 279 url = urlparse.urljoin(binhost_url, 'Packages') |
| 280 try: |
| 281 f = _RetryUrlOpen(url) |
| 282 except urllib2.HTTPError as e: |
| 283 if e.code == 404: |
| 284 return None |
| 285 raise |
| 286 |
| 287 pkgindex = PackageIndex() |
| 288 pkgindex.Read(f) |
| 289 pkgindex.header.setdefault('URI', binhost_url) |
| 290 f.close() |
| 291 return pkgindex |
| 292 |
| 293 |
| 294 def GrabLocalPackageIndex(package_path): |
| 295 """Read a local packages file from disk into a PackageIndex() object. |
| 296 |
| 297 Args: |
| 298 package_path: Directory containing Packages file. |
| 299 |
| 300 Returns: |
| 301 A PackageIndex object. |
| 302 """ |
| 303 packages_file = file(os.path.join(package_path, 'Packages')) |
| 304 pkgindex = PackageIndex() |
| 305 pkgindex.Read(packages_file) |
| 306 packages_file.close() |
| 307 return pkgindex |
OLD | NEW |