Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(158)

Unified Diff: chromite/lib/binpkg.py

Issue 5344002: Update cbuildbot.py and prebuilt.py to deduplicate preflight prebuilts. (Closed) Base URL: None@preflight_upload
Patch Set: Reset against cros/master Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « bin/cbuildbot_unittest.py ('k') | prebuilt.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chromite/lib/binpkg.py
diff --git a/chromite/lib/binpkg.py b/chromite/lib/binpkg.py
new file mode 100644
index 0000000000000000000000000000000000000000..40a29dbcd22904dcd9d1c90af568178928085a51
--- /dev/null
+++ b/chromite/lib/binpkg.py
@@ -0,0 +1,307 @@
+# Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+#
+# Adapted from portage/getbinpkg.py -- Portage binary-package helper functions
+# Copyright 2003-2004 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+import operator
+import os
+import tempfile
+import time
+import urllib2
+import urlparse
+
+class PackageIndex(object):
+ """A parser for the Portage Packages index file.
+
+ The Portage Packages index file serves to keep track of what packages are
+ included in a tree. It contains the following sections:
+ 1) The header. The header tracks general key/value pairs that don't apply
+ to any specific package. E.g., it tracks the base URL of the packages
+ file, and the number of packages included in the file. The header is
+ terminated by a blank line.
+ 2) The body. The body is a list of packages. Each package contains a list
+ of key/value pairs. Packages are either terminated by a blank line or
+ by the end of the file. Every package has a CPV entry, which serves as
+ a unique identifier for the package.
+ """
+
+ def __init__(self):
+ """Constructor."""
+
+ # The header tracks general key/value pairs that don't apply to any
+ # specific package. E.g., it tracks the base URL of the packages.
+ self.header = {}
+
+ # A list of packages (stored as a list of dictionaries).
+ self.packages = []
+
+ # Whether or not the PackageIndex has been modified since the last time it
+ # was written.
+ self.modified = False
+
+ def _PopulateDuplicateDB(self, db):
+ """Populate db with SHA1 -> URL mapping for packages.
+
+ Args:
+ db: Dictionary to populate with SHA1 -> URL mapping for packages.
+ """
+
+ uri = self.header['URI']
+ for pkg in self.packages:
+ cpv, sha1 = pkg['CPV'], pkg['SHA1']
+ path = pkg.get('PATH', cpv + '.tbz2')
+ db[sha1] = urlparse.urljoin(uri, path)
+
+ def _ReadPkgIndex(self, pkgfile):
+ """Read a list of key/value pairs from the Packages file into a dictionary.
+
+ Both header entries and package entries are lists of key/value pairs, so
+ they can both be read by this function. Entries can be terminated by empty
+ lines or by the end of the file.
+
+ This function will read lines from the specified file until it encounters
+ the a blank line or the end of the file.
+
+ Keys and values in the Packages file are separated by a colon and a space.
+ Keys may contain capital letters, numbers, and underscores, but may not
+ contain colons. Values may contain any character except a newline. In
+ particular, it is normal for values to contain colons.
+
+ Lines that have content, and do not contain a valid key/value pair, are
+ ignored. This is for compatibility with the Portage package parser, and
+ to allow for future extensions to the Packages file format.
+
+ All entries must contain at least one key/value pair. If the end of the
+ fils is reached, an empty dictionary is returned.
+
+ Args:
+ pkgfile: A python file object.
+
+ Returns the dictionary of key-value pairs that was read from the file.
+ """
+ d = {}
+ for line in pkgfile:
+ line = line.rstrip('\n')
+ if not line:
+ assert d, 'Packages entry must contain at least one key/value pair'
+ break
+ line = line.split(': ', 1)
+ if len(line) == 2:
+ k, v = line
+ d[k] = v
+ return d
+
+ def _WritePkgIndex(self, pkgfile, entry):
+ """Write header entry or package entry to packages file.
+
+ The keys and values will be separated by a colon and a space. The entry
+ will be terminated by a blank line.
+
+ Args:
+ pkgfile: A python file object.
+ entry: A dictionary of the key/value pairs to write.
+ """
+ lines = ['%s: %s' % (k, v) for k, v in sorted(entry.items()) if v]
+ pkgfile.write('%s\n\n' % '\n'.join(lines))
+
+ def _ReadHeader(self, pkgfile):
+ """Read header of packages file.
+
+ Args:
+ pkgfile: A python file object.
+ """
+ assert not self.header, 'Should only read header once.'
+ self.header = self._ReadPkgIndex(pkgfile)
+
+ def _ReadBody(self, pkgfile):
+ """Read body of packages file.
+
+ Before calling this function, you must first read the header (using
+ _ReadHeader).
+
+ Args:
+ pkgfile: A python file object.
+ """
+ assert self.header, 'Should read header first.'
+ assert not self.packages, 'Should only read body once.'
+
+ # Read all of the sections in the body by looping until we reach the end
+ # of the file.
+ while True:
+ d = self._ReadPkgIndex(pkgfile)
+ if not d:
+ break
+ if 'CPV' in d:
+ self.packages.append(d)
+
+ def Read(self, pkgfile):
+ """Read the entire packages file.
+
+ Args:
+ pkgfile: A python file object.
+ """
+ self._ReadHeader(pkgfile)
+ self._ReadBody(pkgfile)
+
+ def RemoveFilteredPackages(self, filter_fn):
+ """Remove packages which match filter_fn.
+
+ Args:
+ filter_fn: A function which operates on packages. If it returns True,
+ the package should be removed.
+ """
+
+ filtered = [p for p in self.packages if not filter_fn(p)]
+ if filtered != self.packages:
+ self.modified = True
+ self.packages = filtered
+
+ def ResolveDuplicateUploads(self, pkgindexes):
+ """Point packages at files that have already been uploaded.
+
+ For each package in our index, check if there is an existing package that
+ has already been uploaded to the same base URI. If so, point that package
+ at the existing file, so that we don't have to upload the file.
+
+ Args:
+ pkgindexes: A list of PackageIndex objects containing info about packages
+ that have already been uploaded.
+
+ Returns:
+ A list of the packages that still need to be uploaded.
+ """
+ db = {}
+ for pkgindex in pkgindexes:
+ pkgindex._PopulateDuplicateDB(db)
+
+ uploads = []
+ base_uri = self.header['URI']
+ for pkg in self.packages:
+ sha1 = pkg['SHA1']
+ uri = db.get(sha1)
+ if uri and uri.startswith(base_uri):
+ pkg['PATH'] = uri[len(base_uri):].lstrip('/')
+ else:
+ uploads.append(pkg)
+ return uploads
+
+ def SetUploadLocation(self, base_uri, path_prefix):
+ """Set upload location to base_uri + path_prefix.
+
+ Args:
+ base_uri: Base URI for all packages in the file. We set
+ self.header['URI'] to this value, so all packages must live under
+ this directory.
+ path_prefix: Path prefix to use for all current packages in the file.
+ This will be added to the beginning of the path for every package.
+ """
+ self.header['URI'] = base_uri
+ for pkg in self.packages:
+ pkg['PATH'] = urlparse.urljoin(path_prefix, pkg['CPV'] + '.tbz2')
+
+ def Write(self, pkgfile):
+ """Write a packages file to disk.
+
+ If 'modified' flag is set, the TIMESTAMP and PACKAGES fields in the header
+ will be updated before writing to disk.
+
+ Args:
+ pkgfile: A python file object.
+ """
+ if self.modified:
+ self.header['TIMESTAMP'] = str(long(time.time()))
+ self.header['PACKAGES'] = str(len(self.packages))
+ self.modified = False
+ self._WritePkgIndex(pkgfile, self.header)
+ for metadata in sorted(self.packages, key=operator.itemgetter('CPV')):
+ self._WritePkgIndex(pkgfile, metadata)
+
+ def WriteToNamedTemporaryFile(self):
+ """Write pkgindex to a temporary file.
+
+ Args:
+ pkgindex: The PackageIndex object.
+
+ Returns:
+ A temporary file containing the packages from pkgindex.
+ """
+ f = tempfile.NamedTemporaryFile()
+ self.Write(f)
+ f.flush()
+ f.seek(0)
+ return f
+
+
+def _RetryUrlOpen(url, tries=3):
+ """Open the specified url, retrying if we run into temporary errors.
+
+ We retry for both network errors and 5xx Server Errors. We do not retry
+ for HTTP errors with a non-5xx code.
+
+ Args:
+ url: The specified url.
+ tries: The number of times to try.
+
+ Returns:
+ The result of urllib2.urlopen(url).
+ """
+ for i in range(tries):
+ try:
+ return urllib2.urlopen(url)
+ except urllib2.HTTPError as e:
+ if i + 1 >= tries or e.code < 500:
+ raise
+ else:
+ print 'Cannot GET %s: %s' % (url, str(e))
+ except urllib2.URLError as e:
+ if i + 1 >= tries:
+ raise
+ else:
+ print 'Cannot GET %s: %s' % (url, str(e))
+ print 'Sleeping for 10 seconds before retrying...'
+ time.sleep(10)
+
+
+def GrabRemotePackageIndex(binhost_url):
+ """Grab the latest binary package database from the specified URL.
+
+ Args:
+ binhost_url: Base URL of remote packages (PORTAGE_BINHOST).
+
+ Returns:
+ A PackageIndex object, if the Packages file can be retrieved. If the
+ server returns status code 404, None is returned.
+ """
+
+ url = urlparse.urljoin(binhost_url, 'Packages')
+ try:
+ f = _RetryUrlOpen(url)
+ except urllib2.HTTPError as e:
+ if e.code == 404:
+ return None
+ raise
+
+ pkgindex = PackageIndex()
+ pkgindex.Read(f)
+ pkgindex.header.setdefault('URI', binhost_url)
+ f.close()
+ return pkgindex
+
+
+def GrabLocalPackageIndex(package_path):
+ """Read a local packages file from disk into a PackageIndex() object.
+
+ Args:
+ package_path: Directory containing Packages file.
+
+ Returns:
+ A PackageIndex object.
+ """
+ packages_file = file(os.path.join(package_path, 'Packages'))
+ pkgindex = PackageIndex()
+ pkgindex.Read(packages_file)
+ packages_file.close()
+ return pkgindex
« no previous file with comments | « bin/cbuildbot_unittest.py ('k') | prebuilt.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698