OLD | NEW |
---|---|
(Empty) | |
1 # Copyright (c) 2010 The Chromium OS Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 # | |
5 # Adapted from portage/getbinpkg.py -- Portage binary-package helper functions | |
6 # Copyright 2003-2004 Gentoo Foundation | |
7 # Distributed under the terms of the GNU General Public License v2 | |
8 | |
9 import operator | |
10 import os | |
11 import tempfile | |
12 import time | |
13 import urllib2 | |
14 import urlparse | |
15 | |
16 class PackageIndex(object): | |
17 """A parser for the Portage Packages index file. | |
18 | |
19 The Portage Packages index file serves to keep track of what packages are | |
20 included in a tree. It contains the following sections: | |
21 1) The header. The header tracks general key/value pairs that don't apply | |
22 to any specific package. E.g., it tracks the base URL of the packages | |
23 file, and the number of packages included in the file. The header is | |
24 terminated by a blank line. | |
25 2) The body. The body is a list of packages. Each package contains a list | |
26 of key/value pairs. Packages are either terminated by a blank line or | |
27 by the end of the file. Every package has a CPV entry, which serves as | |
28 a unique identifier for the package. | |
diandersAtChromium
2010/11/25 00:45:37
Thank you for desc! Very helpful! :)
BTW: havin
| |
29 """ | |
30 | |
31 def __init__(self): | |
32 """Constructor.""" | |
33 | |
34 # The header tracks general key/value pairs that don't apply to any | |
35 # specific package. E.g., it tracks the base URL of the packages. | |
36 self.header = {} | |
37 | |
38 # A list of packages (stored as a list of dictionaries). | |
39 self.packages = [] | |
40 | |
41 # Whether or not the PackageIndex has been modified since the last time it | |
42 # was written. | |
43 self.modified = False | |
44 | |
45 def _PopulateDuplicateDB(self, db): | |
46 """Populate db with SHA1 -> URL mapping for packages. | |
47 | |
48 Args: | |
49 db: Dictionary to populate with SHA1 -> URL mapping for packages. | |
50 """ | |
51 | |
52 uri = self.header['URI'] | |
53 for pkg in self.packages: | |
54 cpv, sha1 = pkg['CPV'], pkg['SHA1'] | |
55 path = pkg.get('PATH', cpv + '.tbz2') | |
56 db[sha1] = urlparse.urljoin(uri, path) | |
57 | |
58 def _ReadPkgIndex(self, pkgfile): | |
59 """Read a list of key/value pairs from the Packages file into a dictionary. | |
60 | |
61 Both header entries and package entries are lists of key/value pairs, so | |
62 they can both be read by this function. Entries can be terminated by empty | |
63 lines or by the end of the file. | |
64 | |
65 This function will read lines from the specified file until it encounters | |
66 the a blank line or the end of the file. | |
67 | |
68 Keys and values in the Packages file are separated by a colon and a space. | |
69 Keys may contain capital letters, numbers, and underscores, but may not | |
70 contain colons. Values may contain any character except a newline. In | |
71 particular, it is normal for values to contain colons. | |
72 | |
73 Lines that have content, and do not contain a valid key/value pair, are | |
74 ignored. This is for compatibility with the Portage package parser, and | |
75 to allow for future extensions to the Packages file format. | |
76 | |
77 All entries must contain at least one key/value pair. If the end of the | |
78 fils is reached, an empty dictionary is returned. | |
79 | |
80 Args: | |
81 pkgfile: A python file object. | |
82 | |
83 Returns the dictionary of key-value pairs that was read from the file. | |
84 """ | |
85 d = {} | |
86 for line in pkgfile: | |
87 line = line.rstrip('\n') | |
88 if not line: | |
89 assert d, 'Packages entry must contain at least one key/value pair' | |
diandersAtChromium
2010/11/25 00:45:37
Could happen if file ever has two blank lines (eve
davidjames
2010/11/29 21:54:17
The parser that comes with Portage does not output
| |
90 break | |
91 line = line.split(': ', 1) | |
92 if len(line) == 2: | |
93 k, v = line | |
94 d[k] = v | |
95 return d | |
96 | |
97 def _WritePkgIndex(self, pkgfile, entry): | |
98 """Write header entry or package entry to packages file. | |
99 | |
100 The keys and values will be separated by a colon and a space. The entry | |
101 will be terminated by a blank line. | |
102 | |
103 Args: | |
104 pkgfile: A python file object. | |
105 entry: A dictionary of the key/value pairs to write. | |
diandersAtChromium
2010/11/25 00:45:37
Any dictionary items with a blank value will not b
davidjames
2010/11/29 21:54:17
This behavior is copied from Portage. Not sure if
| |
106 """ | |
107 lines = ['%s: %s' % (k, v) for k, v in sorted(entry.items()) if v] | |
108 pkgfile.write('%s\n\n' % '\n'.join(lines)) | |
109 | |
110 def _ReadHeader(self, pkgfile): | |
111 """Read header of packages file. | |
112 | |
113 Args: | |
114 pkgfile: A python file object. | |
115 """ | |
diandersAtChromium
2010/11/25 00:45:37
Why does this update the existing header dictionar
davidjames
2010/11/29 21:54:17
Done.
| |
116 self.header.update(self._ReadPkgIndex(pkgfile)) | |
117 | |
118 def _ReadBody(self, pkgfile): | |
119 """Read body of packages file. | |
120 | |
121 Before calling this function, you must first read the header (using | |
122 _ReadHeader). | |
123 | |
124 Args: | |
125 pkgfile: A python file object. | |
126 """ | |
diandersAtChromium
2010/11/25 00:45:37
assert not self.packages, "Should only call _ReadB
| |
127 | |
128 # Read all of the sections in the body by looping until we reach the end | |
129 # of the file. | |
130 while True: | |
131 d = self._ReadPkgIndex(pkgfile) | |
132 if not d: | |
133 break | |
134 if 'CPV' in d: | |
135 self.packages.append(d) | |
136 | |
137 def Read(self, pkgfile): | |
138 """Read the entire packages file. | |
139 | |
140 Args: | |
141 pkgfile: A python file object. | |
142 """ | |
143 self._ReadHeader(pkgfile) | |
144 self._ReadBody(pkgfile) | |
145 | |
146 def RemoveFilteredPackages(self, filter_fn): | |
147 """Remove packages which match filter_fn. | |
148 | |
149 Args: | |
150 filter_fn: A function which operates on packages. If it returns True, | |
151 the package should be removed. | |
152 """ | |
153 | |
154 filtered = [p for p in self.packages if not filter_fn(p)] | |
155 if filtered != self.packages: | |
156 self.modified = True | |
157 self.packages = filtered | |
158 | |
159 def ResolveDuplicateUploads(self, pkgindexes): | |
160 """Point packages at files that have already been uploaded. | |
161 | |
162 For each package in our index, check if there is an existing package that | |
163 has already been uploaded to the same base URI. If so, point that package | |
164 at the existing file, so that we don't have to upload the file. | |
165 | |
166 Args: | |
167 pkgindexes: A list of pkgindex files that have already been uploaded. | |
diandersAtChromium
2010/11/25 00:45:37
Not a list of files. A list of other PackageIndex
| |
168 | |
169 Returns: | |
170 A list of the packages that still need to be uploaded. | |
171 """ | |
172 db = {} | |
173 for pkgindex in pkgindexes: | |
174 pkgindex._PopulateDuplicateDB(db) | |
175 | |
176 uploads = [] | |
177 base_uri = self.header['URI'] | |
178 for pkg in self.packages: | |
179 sha1 = pkg['SHA1'] | |
180 uri = db.get(sha1) | |
181 if uri and uri.startswith(base_uri): | |
182 pkg['PATH'] = uri[len(base_uri):].lstrip('/') | |
183 else: | |
184 uploads.append(pkg) | |
185 return uploads | |
186 | |
187 def SetUploadLocation(self, base_uri, path_prefix): | |
188 """Set upload location to base_uri + path_prefix. | |
189 | |
190 Args: | |
191 base_uri: Base URI for all packages in the file. We set | |
192 self.header['URI'] to this value, so all packages must live under | |
193 this directory. | |
194 path_prefix: Path prefix to use for all current packages in the file. | |
195 This will be added to the beginning of the path for every package. | |
196 """ | |
197 self.header['URI'] = base_uri | |
198 for pkg in self.packages: | |
199 pkg['PATH'] = urlparse.urljoin(path_prefix, pkg['CPV'] + '.tbz2') | |
200 | |
201 def Write(self, pkgfile): | |
202 """Write a packages file to disk. | |
203 | |
diandersAtChromium
2010/11/25 00:45:37
NOTE: Has a side effect of updating TIMESTAMP and
| |
204 Args: | |
205 pkgfile: A python file object. | |
206 """ | |
207 if self.modified: | |
208 self.header['TIMESTAMP'] = str(long(time.time())) | |
209 self.header['PACKAGES'] = str(len(self.packages)) | |
210 self.modified = False | |
211 self._WritePkgIndex(pkgfile, self.header) | |
212 for metadata in sorted(self.packages, key=operator.itemgetter('CPV')): | |
213 self._WritePkgIndex(pkgfile, metadata) | |
214 | |
215 def WriteToNamedTemporaryFile(self): | |
216 """Write pkgindex to a temporary file. | |
217 | |
218 Args: | |
219 pkgindex: The PackageIndex object. | |
220 | |
221 Returns: | |
222 A temporary file containing the packages from pkgindex. | |
223 """ | |
224 f = tempfile.NamedTemporaryFile() | |
225 self.Write(f) | |
226 f.flush() | |
227 f.seek(0) | |
228 return f | |
229 | |
230 | |
231 def _RetryUrlOpen(url, tries=3): | |
232 """Open the specified url, retrying if we run into temporary errors. | |
233 | |
234 We retry for both network errors and 5xx Server Errors. We do not retry | |
235 for HTTP errors with a non-5xx code. | |
236 | |
237 Args: | |
238 url: The specified url. | |
239 tries: The number of times to try. | |
240 | |
241 Returns: | |
242 The result of urllib2.urlopen(url). | |
243 """ | |
244 for i in range(tries): | |
245 try: | |
246 return urllib2.urlopen(url) | |
247 except urllib2.HTTPError as e: | |
248 if i + 1 >= tries or e.code < 500: | |
249 raise | |
250 else: | |
251 print 'Cannot GET %s: %s' % (url, str(e)) | |
252 except urllib2.URLError as e: | |
253 if i + 1 >= tries: | |
254 raise | |
255 else: | |
256 print 'Cannot GET %s: %s' % (url, str(e)) | |
257 print 'Sleeping for 10 seconds before retrying...' | |
258 time.sleep(10) | |
259 | |
260 | |
261 def GrabRemotePackageIndex(binhost_url): | |
262 """Grab the latest binary package database from the specified URL. | |
263 | |
264 Args: | |
265 binhost_url: Base URL of remote packages (PORTAGE_BINHOST). | |
266 | |
267 Returns: | |
268 A PackageIndex object. | |
diandersAtChromium
2010/11/25 00:45:37
...or None if the database is missing.
davidjames
2010/11/29 21:54:17
Done.
| |
269 """ | |
270 | |
271 url = urlparse.urljoin(binhost_url, 'Packages') | |
272 try: | |
273 f = _RetryUrlOpen(url) | |
274 except urllib2.HTTPError as e: | |
275 if e.code == 404: | |
276 return None | |
277 raise | |
278 | |
279 pkgindex = PackageIndex() | |
280 pkgindex.Read(f) | |
281 pkgindex.header.setdefault('URI', binhost_url) | |
282 f.close() | |
283 return pkgindex | |
284 | |
285 | |
286 def GrabLocalPackageIndex(package_path): | |
287 """Read a local packages file from disk into a PackageIndex() object. | |
288 | |
289 Args: | |
290 package_path: Directory containing Packages file. | |
291 | |
292 Returns: | |
293 A PackageIndex object. | |
294 """ | |
295 packages_file = file(os.path.join(package_path, 'Packages')) | |
296 pkgindex = PackageIndex() | |
297 pkgindex.Read(packages_file) | |
298 packages_file.close() | |
299 return pkgindex | |
OLD | NEW |