Chromium Code Reviews| Index: chromite/lib/binpkg.py |
| diff --git a/chromite/lib/binpkg.py b/chromite/lib/binpkg.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..bef26fe2b6d5914a340f5c3e7a0d771e9221634f |
| --- /dev/null |
| +++ b/chromite/lib/binpkg.py |
| @@ -0,0 +1,299 @@ |
| +# Copyright (c) 2010 The Chromium OS Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| +# |
| +# Adapted from portage/getbinpkg.py -- Portage binary-package helper functions |
| +# Copyright 2003-2004 Gentoo Foundation |
| +# Distributed under the terms of the GNU General Public License v2 |
| + |
| +import operator |
| +import os |
| +import tempfile |
| +import time |
| +import urllib2 |
| +import urlparse |
| + |
| +class PackageIndex(object): |
| + """A parser for the Portage Packages index file. |
| + |
| + The Portage Packages index file serves to keep track of what packages are |
| + included in a tree. It contains the following sections: |
| + 1) The header. The header tracks general key/value pairs that don't apply |
| + to any specific package. E.g., it tracks the base URL of the packages |
| + file, and the number of packages included in the file. The header is |
| + terminated by a blank line. |
| + 2) The body. The body is a list of packages. Each package contains a list |
| + of key/value pairs. Packages are either terminated by a blank line or |
| + by the end of the file. Every package has a CPV entry, which serves as |
| + a unique identifier for the package. |
|
diandersAtChromium
2010/11/25 00:45:37
Thank you for desc! Very helpful! :)
BTW: havin
|
| + """ |
| + |
| + def __init__(self): |
| + """Constructor.""" |
| + |
| + # The header tracks general key/value pairs that don't apply to any |
| + # specific package. E.g., it tracks the base URL of the packages. |
| + self.header = {} |
| + |
| + # A list of packages (stored as a list of dictionaries). |
| + self.packages = [] |
| + |
| + # Whether or not the PackageIndex has been modified since the last time it |
| + # was written. |
| + self.modified = False |
| + |
| + def _PopulateDuplicateDB(self, db): |
| + """Populate db with SHA1 -> URL mapping for packages. |
| + |
| + Args: |
| + db: Dictionary to populate with SHA1 -> URL mapping for packages. |
| + """ |
| + |
| + uri = self.header['URI'] |
| + for pkg in self.packages: |
| + cpv, sha1 = pkg['CPV'], pkg['SHA1'] |
| + path = pkg.get('PATH', cpv + '.tbz2') |
| + db[sha1] = urlparse.urljoin(uri, path) |
| + |
| + def _ReadPkgIndex(self, pkgfile): |
| + """Read a list of key/value pairs from the Packages file into a dictionary. |
| + |
| + Both header entries and package entries are lists of key/value pairs, so |
| + they can both be read by this function. Entries can be terminated by empty |
| + lines or by the end of the file. |
| + |
| + This function will read lines from the specified file until it encounters |
| + the a blank line or the end of the file. |
| + |
| + Keys and values in the Packages file are separated by a colon and a space. |
| + Keys may contain capital letters, numbers, and underscores, but may not |
| + contain colons. Values may contain any character except a newline. In |
| + particular, it is normal for values to contain colons. |
| + |
| + Lines that have content, and do not contain a valid key/value pair, are |
| + ignored. This is for compatibility with the Portage package parser, and |
| + to allow for future extensions to the Packages file format. |
| + |
| + All entries must contain at least one key/value pair. If the end of the |
| + fils is reached, an empty dictionary is returned. |
| + |
| + Args: |
| + pkgfile: A python file object. |
| + |
| + Returns the dictionary of key-value pairs that was read from the file. |
| + """ |
| + d = {} |
| + for line in pkgfile: |
| + line = line.rstrip('\n') |
| + if not line: |
| + assert d, 'Packages entry must contain at least one key/value pair' |
|
diandersAtChromium
2010/11/25 00:45:37
Could happen if file ever has two blank lines (eve
davidjames
2010/11/29 21:54:17
The parser that comes with Portage does not output
|
| + break |
| + line = line.split(': ', 1) |
| + if len(line) == 2: |
| + k, v = line |
| + d[k] = v |
| + return d |
| + |
| + def _WritePkgIndex(self, pkgfile, entry): |
| + """Write header entry or package entry to packages file. |
| + |
| + The keys and values will be separated by a colon and a space. The entry |
| + will be terminated by a blank line. |
| + |
| + Args: |
| + pkgfile: A python file object. |
| + entry: A dictionary of the key/value pairs to write. |
|
diandersAtChromium
2010/11/25 00:45:37
Any dictionary items with a blank value will not b
davidjames
2010/11/29 21:54:17
This behavior is copied from Portage. Not sure if
|
| + """ |
| + lines = ['%s: %s' % (k, v) for k, v in sorted(entry.items()) if v] |
| + pkgfile.write('%s\n\n' % '\n'.join(lines)) |
| + |
| + def _ReadHeader(self, pkgfile): |
| + """Read header of packages file. |
| + |
| + Args: |
| + pkgfile: A python file object. |
| + """ |
|
diandersAtChromium
2010/11/25 00:45:37
Why does this update the existing header dictionar
davidjames
2010/11/29 21:54:17
Done.
|
| + self.header.update(self._ReadPkgIndex(pkgfile)) |
| + |
| + def _ReadBody(self, pkgfile): |
| + """Read body of packages file. |
| + |
| + Before calling this function, you must first read the header (using |
| + _ReadHeader). |
| + |
| + Args: |
| + pkgfile: A python file object. |
| + """ |
|
diandersAtChromium
2010/11/25 00:45:37
assert not self.packages, "Should only call _ReadB
|
| + |
| + # Read all of the sections in the body by looping until we reach the end |
| + # of the file. |
| + while True: |
| + d = self._ReadPkgIndex(pkgfile) |
| + if not d: |
| + break |
| + if 'CPV' in d: |
| + self.packages.append(d) |
| + |
| + def Read(self, pkgfile): |
| + """Read the entire packages file. |
| + |
| + Args: |
| + pkgfile: A python file object. |
| + """ |
| + self._ReadHeader(pkgfile) |
| + self._ReadBody(pkgfile) |
| + |
| + def RemoveFilteredPackages(self, filter_fn): |
| + """Remove packages which match filter_fn. |
| + |
| + Args: |
| + filter_fn: A function which operates on packages. If it returns True, |
| + the package should be removed. |
| + """ |
| + |
| + filtered = [p for p in self.packages if not filter_fn(p)] |
| + if filtered != self.packages: |
| + self.modified = True |
| + self.packages = filtered |
| + |
| + def ResolveDuplicateUploads(self, pkgindexes): |
| + """Point packages at files that have already been uploaded. |
| + |
| + For each package in our index, check if there is an existing package that |
| + has already been uploaded to the same base URI. If so, point that package |
| + at the existing file, so that we don't have to upload the file. |
| + |
| + Args: |
| + pkgindexes: A list of pkgindex files that have already been uploaded. |
|
diandersAtChromium
2010/11/25 00:45:37
Not a list of files. A list of other PackageIndex
|
| + |
| + Returns: |
| + A list of the packages that still need to be uploaded. |
| + """ |
| + db = {} |
| + for pkgindex in pkgindexes: |
| + pkgindex._PopulateDuplicateDB(db) |
| + |
| + uploads = [] |
| + base_uri = self.header['URI'] |
| + for pkg in self.packages: |
| + sha1 = pkg['SHA1'] |
| + uri = db.get(sha1) |
| + if uri and uri.startswith(base_uri): |
| + pkg['PATH'] = uri[len(base_uri):].lstrip('/') |
| + else: |
| + uploads.append(pkg) |
| + return uploads |
| + |
| + def SetUploadLocation(self, base_uri, path_prefix): |
| + """Set upload location to base_uri + path_prefix. |
| + |
| + Args: |
| + base_uri: Base URI for all packages in the file. We set |
| + self.header['URI'] to this value, so all packages must live under |
| + this directory. |
| + path_prefix: Path prefix to use for all current packages in the file. |
| + This will be added to the beginning of the path for every package. |
| + """ |
| + self.header['URI'] = base_uri |
| + for pkg in self.packages: |
| + pkg['PATH'] = urlparse.urljoin(path_prefix, pkg['CPV'] + '.tbz2') |
| + |
| + def Write(self, pkgfile): |
| + """Write a packages file to disk. |
| + |
|
diandersAtChromium
2010/11/25 00:45:37
NOTE: Has a side effect of updating TIMESTAMP and
|
| + Args: |
| + pkgfile: A python file object. |
| + """ |
| + if self.modified: |
| + self.header['TIMESTAMP'] = str(long(time.time())) |
| + self.header['PACKAGES'] = str(len(self.packages)) |
| + self.modified = False |
| + self._WritePkgIndex(pkgfile, self.header) |
| + for metadata in sorted(self.packages, key=operator.itemgetter('CPV')): |
| + self._WritePkgIndex(pkgfile, metadata) |
| + |
| + def WriteToNamedTemporaryFile(self): |
| + """Write pkgindex to a temporary file. |
| + |
| + Args: |
| + pkgindex: The PackageIndex object. |
| + |
| + Returns: |
| + A temporary file containing the packages from pkgindex. |
| + """ |
| + f = tempfile.NamedTemporaryFile() |
| + self.Write(f) |
| + f.flush() |
| + f.seek(0) |
| + return f |
| + |
| + |
| +def _RetryUrlOpen(url, tries=3): |
| + """Open the specified url, retrying if we run into temporary errors. |
| + |
| + We retry for both network errors and 5xx Server Errors. We do not retry |
| + for HTTP errors with a non-5xx code. |
| + |
| + Args: |
| + url: The specified url. |
| + tries: The number of times to try. |
| + |
| + Returns: |
| + The result of urllib2.urlopen(url). |
| + """ |
| + for i in range(tries): |
| + try: |
| + return urllib2.urlopen(url) |
| + except urllib2.HTTPError as e: |
| + if i + 1 >= tries or e.code < 500: |
| + raise |
| + else: |
| + print 'Cannot GET %s: %s' % (url, str(e)) |
| + except urllib2.URLError as e: |
| + if i + 1 >= tries: |
| + raise |
| + else: |
| + print 'Cannot GET %s: %s' % (url, str(e)) |
| + print 'Sleeping for 10 seconds before retrying...' |
| + time.sleep(10) |
| + |
| + |
| +def GrabRemotePackageIndex(binhost_url): |
| + """Grab the latest binary package database from the specified URL. |
| + |
| + Args: |
| + binhost_url: Base URL of remote packages (PORTAGE_BINHOST). |
| + |
| + Returns: |
| + A PackageIndex object. |
|
diandersAtChromium
2010/11/25 00:45:37
...or None if the database is missing.
davidjames
2010/11/29 21:54:17
Done.
|
| + """ |
| + |
| + url = urlparse.urljoin(binhost_url, 'Packages') |
| + try: |
| + f = _RetryUrlOpen(url) |
| + except urllib2.HTTPError as e: |
| + if e.code == 404: |
| + return None |
| + raise |
| + |
| + pkgindex = PackageIndex() |
| + pkgindex.Read(f) |
| + pkgindex.header.setdefault('URI', binhost_url) |
| + f.close() |
| + return pkgindex |
| + |
| + |
| +def GrabLocalPackageIndex(package_path): |
| + """Read a local packages file from disk into a PackageIndex() object. |
| + |
| + Args: |
| + package_path: Directory containing Packages file. |
| + |
| + Returns: |
| + A PackageIndex object. |
| + """ |
| + packages_file = file(os.path.join(package_path, 'Packages')) |
| + pkgindex = PackageIndex() |
| + pkgindex.Read(packages_file) |
| + packages_file.close() |
| + return pkgindex |