OLD | NEW |
| (Empty) |
1 # Copyright (c) 2010 The Chromium OS Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 # | |
5 # Adapted from portage/getbinpkg.py -- Portage binary-package helper functions | |
6 # Copyright 2003-2004 Gentoo Foundation | |
7 # Distributed under the terms of the GNU General Public License v2 | |
8 | |
9 import operator | |
10 import os | |
11 import tempfile | |
12 import time | |
13 import urllib | |
14 import urllib2 | |
15 | |
16 class PackageIndex(object): | |
17 """A parser for the Portage Packages index file. | |
18 | |
19 The Portage Packages index file serves to keep track of what packages are | |
20 included in a tree. It contains the following sections: | |
21 1) The header. The header tracks general key/value pairs that don't apply | |
22 to any specific package. E.g., it tracks the base URL of the packages | |
23 file, and the number of packages included in the file. The header is | |
24 terminated by a blank line. | |
25 2) The body. The body is a list of packages. Each package contains a list | |
26 of key/value pairs. Packages are either terminated by a blank line or | |
27 by the end of the file. Every package has a CPV entry, which serves as | |
28 a unique identifier for the package. | |
29 """ | |
30 | |
31 def __init__(self): | |
32 """Constructor.""" | |
33 | |
34 # The header tracks general key/value pairs that don't apply to any | |
35 # specific package. E.g., it tracks the base URL of the packages. | |
36 self.header = {} | |
37 | |
38 # A list of packages (stored as a list of dictionaries). | |
39 self.packages = [] | |
40 | |
41 # Whether or not the PackageIndex has been modified since the last time it | |
42 # was written. | |
43 self.modified = False | |
44 | |
45 def _PopulateDuplicateDB(self, db): | |
46 """Populate db with SHA1 -> URL mapping for packages. | |
47 | |
48 Args: | |
49 db: Dictionary to populate with SHA1 -> URL mapping for packages. | |
50 """ | |
51 | |
52 uri = self.header['URI'] | |
53 for pkg in self.packages: | |
54 cpv, sha1 = pkg['CPV'], pkg.get('SHA1') | |
55 if sha1: | |
56 path = pkg.get('PATH', urllib.quote(cpv + '.tbz2')) | |
57 db[sha1] = '%s/%s' % (uri.rstrip('/'), path) | |
58 | |
59 def _ReadPkgIndex(self, pkgfile): | |
60 """Read a list of key/value pairs from the Packages file into a dictionary. | |
61 | |
62 Both header entries and package entries are lists of key/value pairs, so | |
63 they can both be read by this function. Entries can be terminated by empty | |
64 lines or by the end of the file. | |
65 | |
66 This function will read lines from the specified file until it encounters | |
67 the a blank line or the end of the file. | |
68 | |
69 Keys and values in the Packages file are separated by a colon and a space. | |
70 Keys may contain capital letters, numbers, and underscores, but may not | |
71 contain colons. Values may contain any character except a newline. In | |
72 particular, it is normal for values to contain colons. | |
73 | |
74 Lines that have content, and do not contain a valid key/value pair, are | |
75 ignored. This is for compatibility with the Portage package parser, and | |
76 to allow for future extensions to the Packages file format. | |
77 | |
78 All entries must contain at least one key/value pair. If the end of the | |
79 fils is reached, an empty dictionary is returned. | |
80 | |
81 Args: | |
82 pkgfile: A python file object. | |
83 | |
84 Returns the dictionary of key-value pairs that was read from the file. | |
85 """ | |
86 d = {} | |
87 for line in pkgfile: | |
88 line = line.rstrip('\n') | |
89 if not line: | |
90 assert d, 'Packages entry must contain at least one key/value pair' | |
91 break | |
92 line = line.split(': ', 1) | |
93 if len(line) == 2: | |
94 k, v = line | |
95 d[k] = v | |
96 return d | |
97 | |
98 def _WritePkgIndex(self, pkgfile, entry): | |
99 """Write header entry or package entry to packages file. | |
100 | |
101 The keys and values will be separated by a colon and a space. The entry | |
102 will be terminated by a blank line. | |
103 | |
104 Args: | |
105 pkgfile: A python file object. | |
106 entry: A dictionary of the key/value pairs to write. | |
107 """ | |
108 lines = ['%s: %s' % (k, v) for k, v in sorted(entry.items()) if v] | |
109 pkgfile.write('%s\n\n' % '\n'.join(lines)) | |
110 | |
111 def _ReadHeader(self, pkgfile): | |
112 """Read header of packages file. | |
113 | |
114 Args: | |
115 pkgfile: A python file object. | |
116 """ | |
117 assert not self.header, 'Should only read header once.' | |
118 self.header = self._ReadPkgIndex(pkgfile) | |
119 | |
120 def _ReadBody(self, pkgfile): | |
121 """Read body of packages file. | |
122 | |
123 Before calling this function, you must first read the header (using | |
124 _ReadHeader). | |
125 | |
126 Args: | |
127 pkgfile: A python file object. | |
128 """ | |
129 assert self.header, 'Should read header first.' | |
130 assert not self.packages, 'Should only read body once.' | |
131 | |
132 # Read all of the sections in the body by looping until we reach the end | |
133 # of the file. | |
134 while True: | |
135 d = self._ReadPkgIndex(pkgfile) | |
136 if not d: | |
137 break | |
138 if 'CPV' in d: | |
139 self.packages.append(d) | |
140 | |
141 def Read(self, pkgfile): | |
142 """Read the entire packages file. | |
143 | |
144 Args: | |
145 pkgfile: A python file object. | |
146 """ | |
147 self._ReadHeader(pkgfile) | |
148 self._ReadBody(pkgfile) | |
149 | |
150 def RemoveFilteredPackages(self, filter_fn): | |
151 """Remove packages which match filter_fn. | |
152 | |
153 Args: | |
154 filter_fn: A function which operates on packages. If it returns True, | |
155 the package should be removed. | |
156 """ | |
157 | |
158 filtered = [p for p in self.packages if not filter_fn(p)] | |
159 if filtered != self.packages: | |
160 self.modified = True | |
161 self.packages = filtered | |
162 | |
163 def ResolveDuplicateUploads(self, pkgindexes): | |
164 """Point packages at files that have already been uploaded. | |
165 | |
166 For each package in our index, check if there is an existing package that | |
167 has already been uploaded to the same base URI. If so, point that package | |
168 at the existing file, so that we don't have to upload the file. | |
169 | |
170 Args: | |
171 pkgindexes: A list of PackageIndex objects containing info about packages | |
172 that have already been uploaded. | |
173 | |
174 Returns: | |
175 A list of the packages that still need to be uploaded. | |
176 """ | |
177 db = {} | |
178 for pkgindex in pkgindexes: | |
179 pkgindex._PopulateDuplicateDB(db) | |
180 | |
181 uploads = [] | |
182 base_uri = self.header['URI'] | |
183 for pkg in self.packages: | |
184 sha1 = pkg.get('SHA1') | |
185 uri = db.get(sha1) | |
186 if sha1 and uri and uri.startswith(base_uri): | |
187 pkg['PATH'] = uri[len(base_uri):].lstrip('/') | |
188 else: | |
189 uploads.append(pkg) | |
190 return uploads | |
191 | |
192 def SetUploadLocation(self, base_uri, path_prefix): | |
193 """Set upload location to base_uri + path_prefix. | |
194 | |
195 Args: | |
196 base_uri: Base URI for all packages in the file. We set | |
197 self.header['URI'] to this value, so all packages must live under | |
198 this directory. | |
199 path_prefix: Path prefix to use for all current packages in the file. | |
200 This will be added to the beginning of the path for every package. | |
201 """ | |
202 self.header['URI'] = base_uri | |
203 for pkg in self.packages: | |
204 path = urllib.quote(pkg['CPV'] + '.tbz2') | |
205 pkg['PATH'] = '%s/%s' % (path_prefix.rstrip('/'), path) | |
206 | |
207 def Write(self, pkgfile): | |
208 """Write a packages file to disk. | |
209 | |
210 If 'modified' flag is set, the TIMESTAMP and PACKAGES fields in the header | |
211 will be updated before writing to disk. | |
212 | |
213 Args: | |
214 pkgfile: A python file object. | |
215 """ | |
216 if self.modified: | |
217 self.header['TIMESTAMP'] = str(long(time.time())) | |
218 self.header['PACKAGES'] = str(len(self.packages)) | |
219 self.modified = False | |
220 self._WritePkgIndex(pkgfile, self.header) | |
221 for metadata in sorted(self.packages, key=operator.itemgetter('CPV')): | |
222 self._WritePkgIndex(pkgfile, metadata) | |
223 | |
224 def WriteToNamedTemporaryFile(self): | |
225 """Write pkgindex to a temporary file. | |
226 | |
227 Args: | |
228 pkgindex: The PackageIndex object. | |
229 | |
230 Returns: | |
231 A temporary file containing the packages from pkgindex. | |
232 """ | |
233 f = tempfile.NamedTemporaryFile() | |
234 self.Write(f) | |
235 f.flush() | |
236 f.seek(0) | |
237 return f | |
238 | |
239 | |
240 def _RetryUrlOpen(url, tries=3): | |
241 """Open the specified url, retrying if we run into temporary errors. | |
242 | |
243 We retry for both network errors and 5xx Server Errors. We do not retry | |
244 for HTTP errors with a non-5xx code. | |
245 | |
246 Args: | |
247 url: The specified url. | |
248 tries: The number of times to try. | |
249 | |
250 Returns: | |
251 The result of urllib2.urlopen(url). | |
252 """ | |
253 for i in range(tries): | |
254 try: | |
255 return urllib2.urlopen(url) | |
256 except urllib2.HTTPError as e: | |
257 if i + 1 >= tries or e.code < 500: | |
258 raise | |
259 else: | |
260 print 'Cannot GET %s: %s' % (url, str(e)) | |
261 except urllib2.URLError as e: | |
262 if i + 1 >= tries: | |
263 raise | |
264 else: | |
265 print 'Cannot GET %s: %s' % (url, str(e)) | |
266 print 'Sleeping for 10 seconds before retrying...' | |
267 time.sleep(10) | |
268 | |
269 | |
270 def GrabRemotePackageIndex(binhost_url): | |
271 """Grab the latest binary package database from the specified URL. | |
272 | |
273 Args: | |
274 binhost_url: Base URL of remote packages (PORTAGE_BINHOST). | |
275 | |
276 Returns: | |
277 A PackageIndex object, if the Packages file can be retrieved. If the | |
278 server returns status code 404, None is returned. | |
279 """ | |
280 | |
281 url = '%s/Packages' % binhost_url.rstrip('/') | |
282 try: | |
283 f = _RetryUrlOpen(url) | |
284 except urllib2.HTTPError as e: | |
285 if e.code == 404: | |
286 return None | |
287 raise | |
288 | |
289 pkgindex = PackageIndex() | |
290 pkgindex.Read(f) | |
291 pkgindex.header.setdefault('URI', binhost_url) | |
292 f.close() | |
293 return pkgindex | |
294 | |
295 | |
296 def GrabLocalPackageIndex(package_path): | |
297 """Read a local packages file from disk into a PackageIndex() object. | |
298 | |
299 Args: | |
300 package_path: Directory containing Packages file. | |
301 | |
302 Returns: | |
303 A PackageIndex object. | |
304 """ | |
305 packages_file = file(os.path.join(package_path, 'Packages')) | |
306 pkgindex = PackageIndex() | |
307 pkgindex.Read(packages_file) | |
308 packages_file.close() | |
309 return pkgindex | |
OLD | NEW |