chromite/lib/binpkg.py - Issue 5344002: Update cbuildbot.py and prebuilt.py to deduplicate preflight prebuilts.

Unified Diff: chromite/lib/binpkg.py

Issue 5344002: Update cbuildbot.py and prebuilt.py to deduplicate preflight prebuilts. (Closed) Base URL: None@preflight_upload

Patch Set: Address comments by dianders. Created 10 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chromite/lib/binpkg.py

diff --git a/chromite/lib/binpkg.py b/chromite/lib/binpkg.py

new file mode 100644

index 0000000000000000000000000000000000000000..bef26fe2b6d5914a340f5c3e7a0d771e9221634f

--- /dev/null

+++ b/chromite/lib/binpkg.py

@@ -0,0 +1,299 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+# Adapted from portage/getbinpkg.py -- Portage binary-package helper functions

+# Distributed under the terms of the GNU General Public License v2

+import operator

+import os

+import tempfile

+import time

+import urllib2

+import urlparse

+class PackageIndex(object):

+ """A parser for the Portage Packages index file.

+ The Portage Packages index file serves to keep track of what packages are

+ included in a tree. It contains the following sections:

+ 1) The header. The header tracks general key/value pairs that don't apply

+ to any specific package. E.g., it tracks the base URL of the packages

+ file, and the number of packages included in the file. The header is

+ terminated by a blank line.

+ 2) The body. The body is a list of packages. Each package contains a list

+ of key/value pairs. Packages are either terminated by a blank line or

+ by the end of the file. Every package has a CPV entry, which serves as

+ a unique identifier for the package.

diandersAtChromium 2010/11/25 00:45:37 Thank you for desc! Very helpful! :) BTW: havin

+ """

+ def __init__(self):

+ """Constructor."""

+ # The header tracks general key/value pairs that don't apply to any

+ # specific package. E.g., it tracks the base URL of the packages.

+ self.header = {}

+ # A list of packages (stored as a list of dictionaries).

+ self.packages = []

+ # Whether or not the PackageIndex has been modified since the last time it

+ # was written.

+ self.modified = False

+ def _PopulateDuplicateDB(self, db):

+ """Populate db with SHA1 -> URL mapping for packages.

+ Args:

+ db: Dictionary to populate with SHA1 -> URL mapping for packages.

+ """

+ uri = self.header['URI']

+ for pkg in self.packages:

+ cpv, sha1 = pkg['CPV'], pkg['SHA1']

+ path = pkg.get('PATH', cpv + '.tbz2')

+ db[sha1] = urlparse.urljoin(uri, path)

+ def _ReadPkgIndex(self, pkgfile):

+ """Read a list of key/value pairs from the Packages file into a dictionary.

+ Both header entries and package entries are lists of key/value pairs, so

+ they can both be read by this function. Entries can be terminated by empty

+ lines or by the end of the file.

+ This function will read lines from the specified file until it encounters

+ the a blank line or the end of the file.

+ Keys and values in the Packages file are separated by a colon and a space.

+ Keys may contain capital letters, numbers, and underscores, but may not

+ contain colons. Values may contain any character except a newline. In

+ particular, it is normal for values to contain colons.

+ Lines that have content, and do not contain a valid key/value pair, are

+ ignored. This is for compatibility with the Portage package parser, and

+ to allow for future extensions to the Packages file format.

+ All entries must contain at least one key/value pair. If the end of the

+ fils is reached, an empty dictionary is returned.

+ Args:

+ pkgfile: A python file object.

+ Returns the dictionary of key-value pairs that was read from the file.

+ """

+ d = {}

+ for line in pkgfile:

+ line = line.rstrip('\n')

+ if not line:

+ assert d, 'Packages entry must contain at least one key/value pair'

diandersAtChromium 2010/11/25 00:45:37 Could happen if file ever has two blank lines (eve

davidjames 2010/11/29 21:54:17 The parser that comes with Portage does not output

+ break

+ line = line.split(': ', 1)

+ if len(line) == 2:

+ k, v = line

+ d[k] = v

+ return d

+ def _WritePkgIndex(self, pkgfile, entry):

+ """Write header entry or package entry to packages file.

+ The keys and values will be separated by a colon and a space. The entry

+ will be terminated by a blank line.

+ Args:

+ pkgfile: A python file object.

+ entry: A dictionary of the key/value pairs to write.

diandersAtChromium 2010/11/25 00:45:37 Any dictionary items with a blank value will not b

davidjames 2010/11/29 21:54:17 This behavior is copied from Portage. Not sure if

+ """

+ lines = ['%s: %s' % (k, v) for k, v in sorted(entry.items()) if v]

+ pkgfile.write('%s\n\n' % '\n'.join(lines))

+ def _ReadHeader(self, pkgfile):

+ """Read header of packages file.

+ Args:

+ pkgfile: A python file object.

+ """

diandersAtChromium 2010/11/25 00:45:37 Why does this update the existing header dictionar

davidjames 2010/11/29 21:54:17 Done.

+ self.header.update(self._ReadPkgIndex(pkgfile))

+ def _ReadBody(self, pkgfile):

+ """Read body of packages file.

+ Before calling this function, you must first read the header (using

+ _ReadHeader).

+ Args:

+ pkgfile: A python file object.

+ """

diandersAtChromium 2010/11/25 00:45:37 assert not self.packages, "Should only call _ReadB

+ # Read all of the sections in the body by looping until we reach the end

+ # of the file.

+ while True:

+ d = self._ReadPkgIndex(pkgfile)

+ if not d:

+ break

+ if 'CPV' in d:

+ self.packages.append(d)

+ def Read(self, pkgfile):

+ """Read the entire packages file.

+ Args:

+ pkgfile: A python file object.

+ """

+ self._ReadHeader(pkgfile)

+ self._ReadBody(pkgfile)

+ def RemoveFilteredPackages(self, filter_fn):

+ """Remove packages which match filter_fn.

+ Args:

+ filter_fn: A function which operates on packages. If it returns True,

+ the package should be removed.

+ """

+ filtered = [p for p in self.packages if not filter_fn(p)]

+ if filtered != self.packages:

+ self.modified = True

+ self.packages = filtered

+ def ResolveDuplicateUploads(self, pkgindexes):

+ """Point packages at files that have already been uploaded.

+ For each package in our index, check if there is an existing package that

+ has already been uploaded to the same base URI. If so, point that package

+ at the existing file, so that we don't have to upload the file.

+ Args:

+ pkgindexes: A list of pkgindex files that have already been uploaded.

diandersAtChromium 2010/11/25 00:45:37 Not a list of files. A list of other PackageIndex

+ Returns:

+ A list of the packages that still need to be uploaded.

+ """

+ db = {}

+ for pkgindex in pkgindexes:

+ pkgindex._PopulateDuplicateDB(db)

+ uploads = []

+ base_uri = self.header['URI']

+ for pkg in self.packages:

+ sha1 = pkg['SHA1']

+ uri = db.get(sha1)

+ if uri and uri.startswith(base_uri):

+ pkg['PATH'] = uri[len(base_uri):].lstrip('/')

+ else:

+ uploads.append(pkg)

+ return uploads

+ def SetUploadLocation(self, base_uri, path_prefix):

+ """Set upload location to base_uri + path_prefix.

+ Args:

+ base_uri: Base URI for all packages in the file. We set

+ self.header['URI'] to this value, so all packages must live under

+ this directory.

+ path_prefix: Path prefix to use for all current packages in the file.

+ This will be added to the beginning of the path for every package.

+ """

+ self.header['URI'] = base_uri

+ for pkg in self.packages:

+ pkg['PATH'] = urlparse.urljoin(path_prefix, pkg['CPV'] + '.tbz2')

+ def Write(self, pkgfile):

+ """Write a packages file to disk.

diandersAtChromium 2010/11/25 00:45:37 NOTE: Has a side effect of updating TIMESTAMP and

+ Args:

+ pkgfile: A python file object.

+ """

+ if self.modified:

+ self.header['TIMESTAMP'] = str(long(time.time()))

+ self.header['PACKAGES'] = str(len(self.packages))

+ self.modified = False

+ self._WritePkgIndex(pkgfile, self.header)

+ for metadata in sorted(self.packages, key=operator.itemgetter('CPV')):

+ self._WritePkgIndex(pkgfile, metadata)

+ def WriteToNamedTemporaryFile(self):

+ """Write pkgindex to a temporary file.

+ Args:

+ pkgindex: The PackageIndex object.

+ Returns:

+ A temporary file containing the packages from pkgindex.

+ """

+ f = tempfile.NamedTemporaryFile()

+ self.Write(f)

+ f.flush()

+ f.seek(0)

+ return f

+def _RetryUrlOpen(url, tries=3):

+ """Open the specified url, retrying if we run into temporary errors.

+ We retry for both network errors and 5xx Server Errors. We do not retry

+ for HTTP errors with a non-5xx code.

+ Args:

+ url: The specified url.

+ tries: The number of times to try.

+ Returns:

+ The result of urllib2.urlopen(url).

+ """

+ for i in range(tries):

+ try:

+ return urllib2.urlopen(url)

+ except urllib2.HTTPError as e:

+ if i + 1 >= tries or e.code < 500:

+ raise

+ else:

+ print 'Cannot GET %s: %s' % (url, str(e))

+ except urllib2.URLError as e:

+ if i + 1 >= tries:

+ raise

+ else:

+ print 'Cannot GET %s: %s' % (url, str(e))

+ print 'Sleeping for 10 seconds before retrying...'

+ time.sleep(10)

+def GrabRemotePackageIndex(binhost_url):

+ """Grab the latest binary package database from the specified URL.

+ Args:

+ binhost_url: Base URL of remote packages (PORTAGE_BINHOST).

+ Returns:

+ A PackageIndex object.

diandersAtChromium 2010/11/25 00:45:37 ...or None if the database is missing.

davidjames 2010/11/29 21:54:17 Done.

+ """

+ url = urlparse.urljoin(binhost_url, 'Packages')

+ try:

+ f = _RetryUrlOpen(url)

+ except urllib2.HTTPError as e:

+ if e.code == 404:

+ return None

+ raise

+ pkgindex = PackageIndex()

+ pkgindex.Read(f)

+ pkgindex.header.setdefault('URI', binhost_url)

+ f.close()

+ return pkgindex

+def GrabLocalPackageIndex(package_path):

+ """Read a local packages file from disk into a PackageIndex() object.

+ Args:

+ package_path: Directory containing Packages file.

+ Returns:

+ A PackageIndex object.

+ """

+ packages_file = file(os.path.join(package_path, 'Packages'))

+ pkgindex = PackageIndex()

+ pkgindex.Read(packages_file)

+ packages_file.close()

+ return pkgindex

« no previous file with comments | « bin/cbuildbot_unittest.py ('k') | prebuilt.py » ('j') | prebuilt.py » ('J')