Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(244)

Side by Side Diff: chromite/lib/binpkg.py

Issue 5344002: Update cbuildbot.py and prebuilt.py to deduplicate preflight prebuilts. (Closed) Base URL: None@preflight_upload
Patch Set: Address comments by dianders. Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4 #
5 # Adapted from portage/getbinpkg.py -- Portage binary-package helper functions
6 # Copyright 2003-2004 Gentoo Foundation
7 # Distributed under the terms of the GNU General Public License v2
8
9 import operator
10 import os
11 import tempfile
12 import time
13 import urllib2
14 import urlparse
15
16 class PackageIndex(object):
17 """A parser for the Portage Packages index file.
18
19 The Portage Packages index file serves to keep track of what packages are
20 included in a tree. It contains the following sections:
21 1) The header. The header tracks general key/value pairs that don't apply
22 to any specific package. E.g., it tracks the base URL of the packages
23 file, and the number of packages included in the file. The header is
24 terminated by a blank line.
25 2) The body. The body is a list of packages. Each package contains a list
26 of key/value pairs. Packages are either terminated by a blank line or
27 by the end of the file. Every package has a CPV entry, which serves as
28 a unique identifier for the package.
diandersAtChromium 2010/11/25 00:45:37 Thank you for desc! Very helpful! :) BTW: havin
29 """
30
31 def __init__(self):
32 """Constructor."""
33
34 # The header tracks general key/value pairs that don't apply to any
35 # specific package. E.g., it tracks the base URL of the packages.
36 self.header = {}
37
38 # A list of packages (stored as a list of dictionaries).
39 self.packages = []
40
41 # Whether or not the PackageIndex has been modified since the last time it
42 # was written.
43 self.modified = False
44
45 def _PopulateDuplicateDB(self, db):
46 """Populate db with SHA1 -> URL mapping for packages.
47
48 Args:
49 db: Dictionary to populate with SHA1 -> URL mapping for packages.
50 """
51
52 uri = self.header['URI']
53 for pkg in self.packages:
54 cpv, sha1 = pkg['CPV'], pkg['SHA1']
55 path = pkg.get('PATH', cpv + '.tbz2')
56 db[sha1] = urlparse.urljoin(uri, path)
57
58 def _ReadPkgIndex(self, pkgfile):
59 """Read a list of key/value pairs from the Packages file into a dictionary.
60
61 Both header entries and package entries are lists of key/value pairs, so
62 they can both be read by this function. Entries can be terminated by empty
63 lines or by the end of the file.
64
65 This function will read lines from the specified file until it encounters
66 the a blank line or the end of the file.
67
68 Keys and values in the Packages file are separated by a colon and a space.
69 Keys may contain capital letters, numbers, and underscores, but may not
70 contain colons. Values may contain any character except a newline. In
71 particular, it is normal for values to contain colons.
72
73 Lines that have content, and do not contain a valid key/value pair, are
74 ignored. This is for compatibility with the Portage package parser, and
75 to allow for future extensions to the Packages file format.
76
77 All entries must contain at least one key/value pair. If the end of the
78 fils is reached, an empty dictionary is returned.
79
80 Args:
81 pkgfile: A python file object.
82
83 Returns the dictionary of key-value pairs that was read from the file.
84 """
85 d = {}
86 for line in pkgfile:
87 line = line.rstrip('\n')
88 if not line:
89 assert d, 'Packages entry must contain at least one key/value pair'
diandersAtChromium 2010/11/25 00:45:37 Could happen if file ever has two blank lines (eve
davidjames 2010/11/29 21:54:17 The parser that comes with Portage does not output
90 break
91 line = line.split(': ', 1)
92 if len(line) == 2:
93 k, v = line
94 d[k] = v
95 return d
96
97 def _WritePkgIndex(self, pkgfile, entry):
98 """Write header entry or package entry to packages file.
99
100 The keys and values will be separated by a colon and a space. The entry
101 will be terminated by a blank line.
102
103 Args:
104 pkgfile: A python file object.
105 entry: A dictionary of the key/value pairs to write.
diandersAtChromium 2010/11/25 00:45:37 Any dictionary items with a blank value will not b
davidjames 2010/11/29 21:54:17 This behavior is copied from Portage. Not sure if
106 """
107 lines = ['%s: %s' % (k, v) for k, v in sorted(entry.items()) if v]
108 pkgfile.write('%s\n\n' % '\n'.join(lines))
109
110 def _ReadHeader(self, pkgfile):
111 """Read header of packages file.
112
113 Args:
114 pkgfile: A python file object.
115 """
diandersAtChromium 2010/11/25 00:45:37 Why does this update the existing header dictionar
davidjames 2010/11/29 21:54:17 Done.
116 self.header.update(self._ReadPkgIndex(pkgfile))
117
118 def _ReadBody(self, pkgfile):
119 """Read body of packages file.
120
121 Before calling this function, you must first read the header (using
122 _ReadHeader).
123
124 Args:
125 pkgfile: A python file object.
126 """
diandersAtChromium 2010/11/25 00:45:37 assert not self.packages, "Should only call _ReadB
127
128 # Read all of the sections in the body by looping until we reach the end
129 # of the file.
130 while True:
131 d = self._ReadPkgIndex(pkgfile)
132 if not d:
133 break
134 if 'CPV' in d:
135 self.packages.append(d)
136
137 def Read(self, pkgfile):
138 """Read the entire packages file.
139
140 Args:
141 pkgfile: A python file object.
142 """
143 self._ReadHeader(pkgfile)
144 self._ReadBody(pkgfile)
145
146 def RemoveFilteredPackages(self, filter_fn):
147 """Remove packages which match filter_fn.
148
149 Args:
150 filter_fn: A function which operates on packages. If it returns True,
151 the package should be removed.
152 """
153
154 filtered = [p for p in self.packages if not filter_fn(p)]
155 if filtered != self.packages:
156 self.modified = True
157 self.packages = filtered
158
159 def ResolveDuplicateUploads(self, pkgindexes):
160 """Point packages at files that have already been uploaded.
161
162 For each package in our index, check if there is an existing package that
163 has already been uploaded to the same base URI. If so, point that package
164 at the existing file, so that we don't have to upload the file.
165
166 Args:
167 pkgindexes: A list of pkgindex files that have already been uploaded.
diandersAtChromium 2010/11/25 00:45:37 Not a list of files. A list of other PackageIndex
168
169 Returns:
170 A list of the packages that still need to be uploaded.
171 """
172 db = {}
173 for pkgindex in pkgindexes:
174 pkgindex._PopulateDuplicateDB(db)
175
176 uploads = []
177 base_uri = self.header['URI']
178 for pkg in self.packages:
179 sha1 = pkg['SHA1']
180 uri = db.get(sha1)
181 if uri and uri.startswith(base_uri):
182 pkg['PATH'] = uri[len(base_uri):].lstrip('/')
183 else:
184 uploads.append(pkg)
185 return uploads
186
187 def SetUploadLocation(self, base_uri, path_prefix):
188 """Set upload location to base_uri + path_prefix.
189
190 Args:
191 base_uri: Base URI for all packages in the file. We set
192 self.header['URI'] to this value, so all packages must live under
193 this directory.
194 path_prefix: Path prefix to use for all current packages in the file.
195 This will be added to the beginning of the path for every package.
196 """
197 self.header['URI'] = base_uri
198 for pkg in self.packages:
199 pkg['PATH'] = urlparse.urljoin(path_prefix, pkg['CPV'] + '.tbz2')
200
201 def Write(self, pkgfile):
202 """Write a packages file to disk.
203
diandersAtChromium 2010/11/25 00:45:37 NOTE: Has a side effect of updating TIMESTAMP and
204 Args:
205 pkgfile: A python file object.
206 """
207 if self.modified:
208 self.header['TIMESTAMP'] = str(long(time.time()))
209 self.header['PACKAGES'] = str(len(self.packages))
210 self.modified = False
211 self._WritePkgIndex(pkgfile, self.header)
212 for metadata in sorted(self.packages, key=operator.itemgetter('CPV')):
213 self._WritePkgIndex(pkgfile, metadata)
214
215 def WriteToNamedTemporaryFile(self):
216 """Write pkgindex to a temporary file.
217
218 Args:
219 pkgindex: The PackageIndex object.
220
221 Returns:
222 A temporary file containing the packages from pkgindex.
223 """
224 f = tempfile.NamedTemporaryFile()
225 self.Write(f)
226 f.flush()
227 f.seek(0)
228 return f
229
230
231 def _RetryUrlOpen(url, tries=3):
232 """Open the specified url, retrying if we run into temporary errors.
233
234 We retry for both network errors and 5xx Server Errors. We do not retry
235 for HTTP errors with a non-5xx code.
236
237 Args:
238 url: The specified url.
239 tries: The number of times to try.
240
241 Returns:
242 The result of urllib2.urlopen(url).
243 """
244 for i in range(tries):
245 try:
246 return urllib2.urlopen(url)
247 except urllib2.HTTPError as e:
248 if i + 1 >= tries or e.code < 500:
249 raise
250 else:
251 print 'Cannot GET %s: %s' % (url, str(e))
252 except urllib2.URLError as e:
253 if i + 1 >= tries:
254 raise
255 else:
256 print 'Cannot GET %s: %s' % (url, str(e))
257 print 'Sleeping for 10 seconds before retrying...'
258 time.sleep(10)
259
260
261 def GrabRemotePackageIndex(binhost_url):
262 """Grab the latest binary package database from the specified URL.
263
264 Args:
265 binhost_url: Base URL of remote packages (PORTAGE_BINHOST).
266
267 Returns:
268 A PackageIndex object.
diandersAtChromium 2010/11/25 00:45:37 ...or None if the database is missing.
davidjames 2010/11/29 21:54:17 Done.
269 """
270
271 url = urlparse.urljoin(binhost_url, 'Packages')
272 try:
273 f = _RetryUrlOpen(url)
274 except urllib2.HTTPError as e:
275 if e.code == 404:
276 return None
277 raise
278
279 pkgindex = PackageIndex()
280 pkgindex.Read(f)
281 pkgindex.header.setdefault('URI', binhost_url)
282 f.close()
283 return pkgindex
284
285
286 def GrabLocalPackageIndex(package_path):
287 """Read a local packages file from disk into a PackageIndex() object.
288
289 Args:
290 package_path: Directory containing Packages file.
291
292 Returns:
293 A PackageIndex object.
294 """
295 packages_file = file(os.path.join(package_path, 'Packages'))
296 pkgindex = PackageIndex()
297 pkgindex.Read(packages_file)
298 packages_file.close()
299 return pkgindex
OLDNEW
« no previous file with comments | « bin/cbuildbot_unittest.py ('k') | prebuilt.py » ('j') | prebuilt.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698