Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright (c) 2010 The Chromium OS Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 # | |
| 5 # Adapted from portage/getbinpkg.py -- Portage binary-package helper functions | |
| 6 # Copyright 2003-2004 Gentoo Foundation | |
| 7 # Distributed under the terms of the GNU General Public License v2 | |
| 8 | |
| 9 import operator | |
| 10 import os | |
| 11 import tempfile | |
| 12 import time | |
| 13 import urllib2 | |
| 14 import urlparse | |
| 15 | |
| 16 class PackageIndex(object): | |
| 17 """A parser for the Portage Packages index file. | |
| 18 | |
| 19 The Portage Packages index file serves to keep track of what packages are | |
| 20 included in a tree. It contains the following sections: | |
| 21 1) The header. The header tracks general key/value pairs that don't apply | |
| 22 to any specific package. E.g., it tracks the base URL of the packages | |
| 23 file, and the number of packages included in the file. The header is | |
| 24 terminated by a blank line. | |
| 25 2) The body. The body is a list of packages. Each package contains a list | |
| 26 of key/value pairs. Packages are either terminated by a blank line or | |
| 27 by the end of the file. Every package has a CPV entry, which serves as | |
| 28 a unique identifier for the package. | |
|
diandersAtChromium
2010/11/25 00:45:37
Thank you for desc! Very helpful! :)
BTW: havin
| |
| 29 """ | |
| 30 | |
| 31 def __init__(self): | |
| 32 """Constructor.""" | |
| 33 | |
| 34 # The header tracks general key/value pairs that don't apply to any | |
| 35 # specific package. E.g., it tracks the base URL of the packages. | |
| 36 self.header = {} | |
| 37 | |
| 38 # A list of packages (stored as a list of dictionaries). | |
| 39 self.packages = [] | |
| 40 | |
| 41 # Whether or not the PackageIndex has been modified since the last time it | |
| 42 # was written. | |
| 43 self.modified = False | |
| 44 | |
| 45 def _PopulateDuplicateDB(self, db): | |
| 46 """Populate db with SHA1 -> URL mapping for packages. | |
| 47 | |
| 48 Args: | |
| 49 db: Dictionary to populate with SHA1 -> URL mapping for packages. | |
| 50 """ | |
| 51 | |
| 52 uri = self.header['URI'] | |
| 53 for pkg in self.packages: | |
| 54 cpv, sha1 = pkg['CPV'], pkg['SHA1'] | |
| 55 path = pkg.get('PATH', cpv + '.tbz2') | |
| 56 db[sha1] = urlparse.urljoin(uri, path) | |
| 57 | |
| 58 def _ReadPkgIndex(self, pkgfile): | |
| 59 """Read a list of key/value pairs from the Packages file into a dictionary. | |
| 60 | |
| 61 Both header entries and package entries are lists of key/value pairs, so | |
| 62 they can both be read by this function. Entries can be terminated by empty | |
| 63 lines or by the end of the file. | |
| 64 | |
| 65 This function will read lines from the specified file until it encounters | |
| 66 the a blank line or the end of the file. | |
| 67 | |
| 68 Keys and values in the Packages file are separated by a colon and a space. | |
| 69 Keys may contain capital letters, numbers, and underscores, but may not | |
| 70 contain colons. Values may contain any character except a newline. In | |
| 71 particular, it is normal for values to contain colons. | |
| 72 | |
| 73 Lines that have content, and do not contain a valid key/value pair, are | |
| 74 ignored. This is for compatibility with the Portage package parser, and | |
| 75 to allow for future extensions to the Packages file format. | |
| 76 | |
| 77 All entries must contain at least one key/value pair. If the end of the | |
| 78 fils is reached, an empty dictionary is returned. | |
| 79 | |
| 80 Args: | |
| 81 pkgfile: A python file object. | |
| 82 | |
| 83 Returns the dictionary of key-value pairs that was read from the file. | |
| 84 """ | |
| 85 d = {} | |
| 86 for line in pkgfile: | |
| 87 line = line.rstrip('\n') | |
| 88 if not line: | |
| 89 assert d, 'Packages entry must contain at least one key/value pair' | |
|
diandersAtChromium
2010/11/25 00:45:37
Could happen if file ever has two blank lines (eve
davidjames
2010/11/29 21:54:17
The parser that comes with Portage does not output
| |
| 90 break | |
| 91 line = line.split(': ', 1) | |
| 92 if len(line) == 2: | |
| 93 k, v = line | |
| 94 d[k] = v | |
| 95 return d | |
| 96 | |
| 97 def _WritePkgIndex(self, pkgfile, entry): | |
| 98 """Write header entry or package entry to packages file. | |
| 99 | |
| 100 The keys and values will be separated by a colon and a space. The entry | |
| 101 will be terminated by a blank line. | |
| 102 | |
| 103 Args: | |
| 104 pkgfile: A python file object. | |
| 105 entry: A dictionary of the key/value pairs to write. | |
|
diandersAtChromium
2010/11/25 00:45:37
Any dictionary items with a blank value will not b
davidjames
2010/11/29 21:54:17
This behavior is copied from Portage. Not sure if
| |
| 106 """ | |
| 107 lines = ['%s: %s' % (k, v) for k, v in sorted(entry.items()) if v] | |
| 108 pkgfile.write('%s\n\n' % '\n'.join(lines)) | |
| 109 | |
| 110 def _ReadHeader(self, pkgfile): | |
| 111 """Read header of packages file. | |
| 112 | |
| 113 Args: | |
| 114 pkgfile: A python file object. | |
| 115 """ | |
|
diandersAtChromium
2010/11/25 00:45:37
Why does this update the existing header dictionar
davidjames
2010/11/29 21:54:17
Done.
| |
| 116 self.header.update(self._ReadPkgIndex(pkgfile)) | |
| 117 | |
| 118 def _ReadBody(self, pkgfile): | |
| 119 """Read body of packages file. | |
| 120 | |
| 121 Before calling this function, you must first read the header (using | |
| 122 _ReadHeader). | |
| 123 | |
| 124 Args: | |
| 125 pkgfile: A python file object. | |
| 126 """ | |
|
diandersAtChromium
2010/11/25 00:45:37
assert not self.packages, "Should only call _ReadB
| |
| 127 | |
| 128 # Read all of the sections in the body by looping until we reach the end | |
| 129 # of the file. | |
| 130 while True: | |
| 131 d = self._ReadPkgIndex(pkgfile) | |
| 132 if not d: | |
| 133 break | |
| 134 if 'CPV' in d: | |
| 135 self.packages.append(d) | |
| 136 | |
| 137 def Read(self, pkgfile): | |
| 138 """Read the entire packages file. | |
| 139 | |
| 140 Args: | |
| 141 pkgfile: A python file object. | |
| 142 """ | |
| 143 self._ReadHeader(pkgfile) | |
| 144 self._ReadBody(pkgfile) | |
| 145 | |
| 146 def RemoveFilteredPackages(self, filter_fn): | |
| 147 """Remove packages which match filter_fn. | |
| 148 | |
| 149 Args: | |
| 150 filter_fn: A function which operates on packages. If it returns True, | |
| 151 the package should be removed. | |
| 152 """ | |
| 153 | |
| 154 filtered = [p for p in self.packages if not filter_fn(p)] | |
| 155 if filtered != self.packages: | |
| 156 self.modified = True | |
| 157 self.packages = filtered | |
| 158 | |
| 159 def ResolveDuplicateUploads(self, pkgindexes): | |
| 160 """Point packages at files that have already been uploaded. | |
| 161 | |
| 162 For each package in our index, check if there is an existing package that | |
| 163 has already been uploaded to the same base URI. If so, point that package | |
| 164 at the existing file, so that we don't have to upload the file. | |
| 165 | |
| 166 Args: | |
| 167 pkgindexes: A list of pkgindex files that have already been uploaded. | |
|
diandersAtChromium
2010/11/25 00:45:37
Not a list of files. A list of other PackageIndex
| |
| 168 | |
| 169 Returns: | |
| 170 A list of the packages that still need to be uploaded. | |
| 171 """ | |
| 172 db = {} | |
| 173 for pkgindex in pkgindexes: | |
| 174 pkgindex._PopulateDuplicateDB(db) | |
| 175 | |
| 176 uploads = [] | |
| 177 base_uri = self.header['URI'] | |
| 178 for pkg in self.packages: | |
| 179 sha1 = pkg['SHA1'] | |
| 180 uri = db.get(sha1) | |
| 181 if uri and uri.startswith(base_uri): | |
| 182 pkg['PATH'] = uri[len(base_uri):].lstrip('/') | |
| 183 else: | |
| 184 uploads.append(pkg) | |
| 185 return uploads | |
| 186 | |
| 187 def SetUploadLocation(self, base_uri, path_prefix): | |
| 188 """Set upload location to base_uri + path_prefix. | |
| 189 | |
| 190 Args: | |
| 191 base_uri: Base URI for all packages in the file. We set | |
| 192 self.header['URI'] to this value, so all packages must live under | |
| 193 this directory. | |
| 194 path_prefix: Path prefix to use for all current packages in the file. | |
| 195 This will be added to the beginning of the path for every package. | |
| 196 """ | |
| 197 self.header['URI'] = base_uri | |
| 198 for pkg in self.packages: | |
| 199 pkg['PATH'] = urlparse.urljoin(path_prefix, pkg['CPV'] + '.tbz2') | |
| 200 | |
| 201 def Write(self, pkgfile): | |
| 202 """Write a packages file to disk. | |
| 203 | |
|
diandersAtChromium
2010/11/25 00:45:37
NOTE: Has a side effect of updating TIMESTAMP and
| |
| 204 Args: | |
| 205 pkgfile: A python file object. | |
| 206 """ | |
| 207 if self.modified: | |
| 208 self.header['TIMESTAMP'] = str(long(time.time())) | |
| 209 self.header['PACKAGES'] = str(len(self.packages)) | |
| 210 self.modified = False | |
| 211 self._WritePkgIndex(pkgfile, self.header) | |
| 212 for metadata in sorted(self.packages, key=operator.itemgetter('CPV')): | |
| 213 self._WritePkgIndex(pkgfile, metadata) | |
| 214 | |
| 215 def WriteToNamedTemporaryFile(self): | |
| 216 """Write pkgindex to a temporary file. | |
| 217 | |
| 218 Args: | |
| 219 pkgindex: The PackageIndex object. | |
| 220 | |
| 221 Returns: | |
| 222 A temporary file containing the packages from pkgindex. | |
| 223 """ | |
| 224 f = tempfile.NamedTemporaryFile() | |
| 225 self.Write(f) | |
| 226 f.flush() | |
| 227 f.seek(0) | |
| 228 return f | |
| 229 | |
| 230 | |
| 231 def _RetryUrlOpen(url, tries=3): | |
| 232 """Open the specified url, retrying if we run into temporary errors. | |
| 233 | |
| 234 We retry for both network errors and 5xx Server Errors. We do not retry | |
| 235 for HTTP errors with a non-5xx code. | |
| 236 | |
| 237 Args: | |
| 238 url: The specified url. | |
| 239 tries: The number of times to try. | |
| 240 | |
| 241 Returns: | |
| 242 The result of urllib2.urlopen(url). | |
| 243 """ | |
| 244 for i in range(tries): | |
| 245 try: | |
| 246 return urllib2.urlopen(url) | |
| 247 except urllib2.HTTPError as e: | |
| 248 if i + 1 >= tries or e.code < 500: | |
| 249 raise | |
| 250 else: | |
| 251 print 'Cannot GET %s: %s' % (url, str(e)) | |
| 252 except urllib2.URLError as e: | |
| 253 if i + 1 >= tries: | |
| 254 raise | |
| 255 else: | |
| 256 print 'Cannot GET %s: %s' % (url, str(e)) | |
| 257 print 'Sleeping for 10 seconds before retrying...' | |
| 258 time.sleep(10) | |
| 259 | |
| 260 | |
| 261 def GrabRemotePackageIndex(binhost_url): | |
| 262 """Grab the latest binary package database from the specified URL. | |
| 263 | |
| 264 Args: | |
| 265 binhost_url: Base URL of remote packages (PORTAGE_BINHOST). | |
| 266 | |
| 267 Returns: | |
| 268 A PackageIndex object. | |
|
diandersAtChromium
2010/11/25 00:45:37
...or None if the database is missing.
davidjames
2010/11/29 21:54:17
Done.
| |
| 269 """ | |
| 270 | |
| 271 url = urlparse.urljoin(binhost_url, 'Packages') | |
| 272 try: | |
| 273 f = _RetryUrlOpen(url) | |
| 274 except urllib2.HTTPError as e: | |
| 275 if e.code == 404: | |
| 276 return None | |
| 277 raise | |
| 278 | |
| 279 pkgindex = PackageIndex() | |
| 280 pkgindex.Read(f) | |
| 281 pkgindex.header.setdefault('URI', binhost_url) | |
| 282 f.close() | |
| 283 return pkgindex | |
| 284 | |
| 285 | |
| 286 def GrabLocalPackageIndex(package_path): | |
| 287 """Read a local packages file from disk into a PackageIndex() object. | |
| 288 | |
| 289 Args: | |
| 290 package_path: Directory containing Packages file. | |
| 291 | |
| 292 Returns: | |
| 293 A PackageIndex object. | |
| 294 """ | |
| 295 packages_file = file(os.path.join(package_path, 'Packages')) | |
| 296 pkgindex = PackageIndex() | |
| 297 pkgindex.Read(packages_file) | |
| 298 packages_file.close() | |
| 299 return pkgindex | |
| OLD | NEW |