| OLD | NEW |
| (Empty) |
| 1 # Copyright (c) 2010 The Chromium OS Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 # | |
| 5 # Adapted from portage/getbinpkg.py -- Portage binary-package helper functions | |
| 6 # Copyright 2003-2004 Gentoo Foundation | |
| 7 # Distributed under the terms of the GNU General Public License v2 | |
| 8 | |
| 9 import operator | |
| 10 import os | |
| 11 import tempfile | |
| 12 import time | |
| 13 import urllib | |
| 14 import urllib2 | |
| 15 | |
| 16 class PackageIndex(object): | |
| 17 """A parser for the Portage Packages index file. | |
| 18 | |
| 19 The Portage Packages index file serves to keep track of what packages are | |
| 20 included in a tree. It contains the following sections: | |
| 21 1) The header. The header tracks general key/value pairs that don't apply | |
| 22 to any specific package. E.g., it tracks the base URL of the packages | |
| 23 file, and the number of packages included in the file. The header is | |
| 24 terminated by a blank line. | |
| 25 2) The body. The body is a list of packages. Each package contains a list | |
| 26 of key/value pairs. Packages are either terminated by a blank line or | |
| 27 by the end of the file. Every package has a CPV entry, which serves as | |
| 28 a unique identifier for the package. | |
| 29 """ | |
| 30 | |
| 31 def __init__(self): | |
| 32 """Constructor.""" | |
| 33 | |
| 34 # The header tracks general key/value pairs that don't apply to any | |
| 35 # specific package. E.g., it tracks the base URL of the packages. | |
| 36 self.header = {} | |
| 37 | |
| 38 # A list of packages (stored as a list of dictionaries). | |
| 39 self.packages = [] | |
| 40 | |
| 41 # Whether or not the PackageIndex has been modified since the last time it | |
| 42 # was written. | |
| 43 self.modified = False | |
| 44 | |
| 45 def _PopulateDuplicateDB(self, db): | |
| 46 """Populate db with SHA1 -> URL mapping for packages. | |
| 47 | |
| 48 Args: | |
| 49 db: Dictionary to populate with SHA1 -> URL mapping for packages. | |
| 50 """ | |
| 51 | |
| 52 uri = self.header['URI'] | |
| 53 for pkg in self.packages: | |
| 54 cpv, sha1 = pkg['CPV'], pkg.get('SHA1') | |
| 55 if sha1: | |
| 56 path = pkg.get('PATH', urllib.quote(cpv + '.tbz2')) | |
| 57 db[sha1] = '%s/%s' % (uri.rstrip('/'), path) | |
| 58 | |
| 59 def _ReadPkgIndex(self, pkgfile): | |
| 60 """Read a list of key/value pairs from the Packages file into a dictionary. | |
| 61 | |
| 62 Both header entries and package entries are lists of key/value pairs, so | |
| 63 they can both be read by this function. Entries can be terminated by empty | |
| 64 lines or by the end of the file. | |
| 65 | |
| 66 This function will read lines from the specified file until it encounters | |
| 67 the a blank line or the end of the file. | |
| 68 | |
| 69 Keys and values in the Packages file are separated by a colon and a space. | |
| 70 Keys may contain capital letters, numbers, and underscores, but may not | |
| 71 contain colons. Values may contain any character except a newline. In | |
| 72 particular, it is normal for values to contain colons. | |
| 73 | |
| 74 Lines that have content, and do not contain a valid key/value pair, are | |
| 75 ignored. This is for compatibility with the Portage package parser, and | |
| 76 to allow for future extensions to the Packages file format. | |
| 77 | |
| 78 All entries must contain at least one key/value pair. If the end of the | |
| 79 fils is reached, an empty dictionary is returned. | |
| 80 | |
| 81 Args: | |
| 82 pkgfile: A python file object. | |
| 83 | |
| 84 Returns the dictionary of key-value pairs that was read from the file. | |
| 85 """ | |
| 86 d = {} | |
| 87 for line in pkgfile: | |
| 88 line = line.rstrip('\n') | |
| 89 if not line: | |
| 90 assert d, 'Packages entry must contain at least one key/value pair' | |
| 91 break | |
| 92 line = line.split(': ', 1) | |
| 93 if len(line) == 2: | |
| 94 k, v = line | |
| 95 d[k] = v | |
| 96 return d | |
| 97 | |
| 98 def _WritePkgIndex(self, pkgfile, entry): | |
| 99 """Write header entry or package entry to packages file. | |
| 100 | |
| 101 The keys and values will be separated by a colon and a space. The entry | |
| 102 will be terminated by a blank line. | |
| 103 | |
| 104 Args: | |
| 105 pkgfile: A python file object. | |
| 106 entry: A dictionary of the key/value pairs to write. | |
| 107 """ | |
| 108 lines = ['%s: %s' % (k, v) for k, v in sorted(entry.items()) if v] | |
| 109 pkgfile.write('%s\n\n' % '\n'.join(lines)) | |
| 110 | |
| 111 def _ReadHeader(self, pkgfile): | |
| 112 """Read header of packages file. | |
| 113 | |
| 114 Args: | |
| 115 pkgfile: A python file object. | |
| 116 """ | |
| 117 assert not self.header, 'Should only read header once.' | |
| 118 self.header = self._ReadPkgIndex(pkgfile) | |
| 119 | |
| 120 def _ReadBody(self, pkgfile): | |
| 121 """Read body of packages file. | |
| 122 | |
| 123 Before calling this function, you must first read the header (using | |
| 124 _ReadHeader). | |
| 125 | |
| 126 Args: | |
| 127 pkgfile: A python file object. | |
| 128 """ | |
| 129 assert self.header, 'Should read header first.' | |
| 130 assert not self.packages, 'Should only read body once.' | |
| 131 | |
| 132 # Read all of the sections in the body by looping until we reach the end | |
| 133 # of the file. | |
| 134 while True: | |
| 135 d = self._ReadPkgIndex(pkgfile) | |
| 136 if not d: | |
| 137 break | |
| 138 if 'CPV' in d: | |
| 139 self.packages.append(d) | |
| 140 | |
| 141 def Read(self, pkgfile): | |
| 142 """Read the entire packages file. | |
| 143 | |
| 144 Args: | |
| 145 pkgfile: A python file object. | |
| 146 """ | |
| 147 self._ReadHeader(pkgfile) | |
| 148 self._ReadBody(pkgfile) | |
| 149 | |
| 150 def RemoveFilteredPackages(self, filter_fn): | |
| 151 """Remove packages which match filter_fn. | |
| 152 | |
| 153 Args: | |
| 154 filter_fn: A function which operates on packages. If it returns True, | |
| 155 the package should be removed. | |
| 156 """ | |
| 157 | |
| 158 filtered = [p for p in self.packages if not filter_fn(p)] | |
| 159 if filtered != self.packages: | |
| 160 self.modified = True | |
| 161 self.packages = filtered | |
| 162 | |
| 163 def ResolveDuplicateUploads(self, pkgindexes): | |
| 164 """Point packages at files that have already been uploaded. | |
| 165 | |
| 166 For each package in our index, check if there is an existing package that | |
| 167 has already been uploaded to the same base URI. If so, point that package | |
| 168 at the existing file, so that we don't have to upload the file. | |
| 169 | |
| 170 Args: | |
| 171 pkgindexes: A list of PackageIndex objects containing info about packages | |
| 172 that have already been uploaded. | |
| 173 | |
| 174 Returns: | |
| 175 A list of the packages that still need to be uploaded. | |
| 176 """ | |
| 177 db = {} | |
| 178 for pkgindex in pkgindexes: | |
| 179 pkgindex._PopulateDuplicateDB(db) | |
| 180 | |
| 181 uploads = [] | |
| 182 base_uri = self.header['URI'] | |
| 183 for pkg in self.packages: | |
| 184 sha1 = pkg.get('SHA1') | |
| 185 uri = db.get(sha1) | |
| 186 if sha1 and uri and uri.startswith(base_uri): | |
| 187 pkg['PATH'] = uri[len(base_uri):].lstrip('/') | |
| 188 else: | |
| 189 uploads.append(pkg) | |
| 190 return uploads | |
| 191 | |
| 192 def SetUploadLocation(self, base_uri, path_prefix): | |
| 193 """Set upload location to base_uri + path_prefix. | |
| 194 | |
| 195 Args: | |
| 196 base_uri: Base URI for all packages in the file. We set | |
| 197 self.header['URI'] to this value, so all packages must live under | |
| 198 this directory. | |
| 199 path_prefix: Path prefix to use for all current packages in the file. | |
| 200 This will be added to the beginning of the path for every package. | |
| 201 """ | |
| 202 self.header['URI'] = base_uri | |
| 203 for pkg in self.packages: | |
| 204 path = urllib.quote(pkg['CPV'] + '.tbz2') | |
| 205 pkg['PATH'] = '%s/%s' % (path_prefix.rstrip('/'), path) | |
| 206 | |
| 207 def Write(self, pkgfile): | |
| 208 """Write a packages file to disk. | |
| 209 | |
| 210 If 'modified' flag is set, the TIMESTAMP and PACKAGES fields in the header | |
| 211 will be updated before writing to disk. | |
| 212 | |
| 213 Args: | |
| 214 pkgfile: A python file object. | |
| 215 """ | |
| 216 if self.modified: | |
| 217 self.header['TIMESTAMP'] = str(long(time.time())) | |
| 218 self.header['PACKAGES'] = str(len(self.packages)) | |
| 219 self.modified = False | |
| 220 self._WritePkgIndex(pkgfile, self.header) | |
| 221 for metadata in sorted(self.packages, key=operator.itemgetter('CPV')): | |
| 222 self._WritePkgIndex(pkgfile, metadata) | |
| 223 | |
| 224 def WriteToNamedTemporaryFile(self): | |
| 225 """Write pkgindex to a temporary file. | |
| 226 | |
| 227 Args: | |
| 228 pkgindex: The PackageIndex object. | |
| 229 | |
| 230 Returns: | |
| 231 A temporary file containing the packages from pkgindex. | |
| 232 """ | |
| 233 f = tempfile.NamedTemporaryFile() | |
| 234 self.Write(f) | |
| 235 f.flush() | |
| 236 f.seek(0) | |
| 237 return f | |
| 238 | |
| 239 | |
| 240 def _RetryUrlOpen(url, tries=3): | |
| 241 """Open the specified url, retrying if we run into temporary errors. | |
| 242 | |
| 243 We retry for both network errors and 5xx Server Errors. We do not retry | |
| 244 for HTTP errors with a non-5xx code. | |
| 245 | |
| 246 Args: | |
| 247 url: The specified url. | |
| 248 tries: The number of times to try. | |
| 249 | |
| 250 Returns: | |
| 251 The result of urllib2.urlopen(url). | |
| 252 """ | |
| 253 for i in range(tries): | |
| 254 try: | |
| 255 return urllib2.urlopen(url) | |
| 256 except urllib2.HTTPError as e: | |
| 257 if i + 1 >= tries or e.code < 500: | |
| 258 raise | |
| 259 else: | |
| 260 print 'Cannot GET %s: %s' % (url, str(e)) | |
| 261 except urllib2.URLError as e: | |
| 262 if i + 1 >= tries: | |
| 263 raise | |
| 264 else: | |
| 265 print 'Cannot GET %s: %s' % (url, str(e)) | |
| 266 print 'Sleeping for 10 seconds before retrying...' | |
| 267 time.sleep(10) | |
| 268 | |
| 269 | |
| 270 def GrabRemotePackageIndex(binhost_url): | |
| 271 """Grab the latest binary package database from the specified URL. | |
| 272 | |
| 273 Args: | |
| 274 binhost_url: Base URL of remote packages (PORTAGE_BINHOST). | |
| 275 | |
| 276 Returns: | |
| 277 A PackageIndex object, if the Packages file can be retrieved. If the | |
| 278 server returns status code 404, None is returned. | |
| 279 """ | |
| 280 | |
| 281 url = '%s/Packages' % binhost_url.rstrip('/') | |
| 282 try: | |
| 283 f = _RetryUrlOpen(url) | |
| 284 except urllib2.HTTPError as e: | |
| 285 if e.code == 404: | |
| 286 return None | |
| 287 raise | |
| 288 | |
| 289 pkgindex = PackageIndex() | |
| 290 pkgindex.Read(f) | |
| 291 pkgindex.header.setdefault('URI', binhost_url) | |
| 292 f.close() | |
| 293 return pkgindex | |
| 294 | |
| 295 | |
| 296 def GrabLocalPackageIndex(package_path): | |
| 297 """Read a local packages file from disk into a PackageIndex() object. | |
| 298 | |
| 299 Args: | |
| 300 package_path: Directory containing Packages file. | |
| 301 | |
| 302 Returns: | |
| 303 A PackageIndex object. | |
| 304 """ | |
| 305 packages_file = file(os.path.join(package_path, 'Packages')) | |
| 306 pkgindex = PackageIndex() | |
| 307 pkgindex.Read(packages_file) | |
| 308 packages_file.close() | |
| 309 return pkgindex | |
| OLD | NEW |