Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(175)

Side by Side Diff: chromite/lib/binpkg.py

Issue 5344002: Update cbuildbot.py and prebuilt.py to deduplicate preflight prebuilts. (Closed) Base URL: None@preflight_upload
Patch Set: Reset against cros/master Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « bin/cbuildbot_unittest.py ('k') | prebuilt.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4 #
5 # Adapted from portage/getbinpkg.py -- Portage binary-package helper functions
6 # Copyright 2003-2004 Gentoo Foundation
7 # Distributed under the terms of the GNU General Public License v2
8
9 import operator
10 import os
11 import tempfile
12 import time
13 import urllib2
14 import urlparse
15
16 class PackageIndex(object):
17 """A parser for the Portage Packages index file.
18
19 The Portage Packages index file serves to keep track of what packages are
20 included in a tree. It contains the following sections:
21 1) The header. The header tracks general key/value pairs that don't apply
22 to any specific package. E.g., it tracks the base URL of the packages
23 file, and the number of packages included in the file. The header is
24 terminated by a blank line.
25 2) The body. The body is a list of packages. Each package contains a list
26 of key/value pairs. Packages are either terminated by a blank line or
27 by the end of the file. Every package has a CPV entry, which serves as
28 a unique identifier for the package.
29 """
30
31 def __init__(self):
32 """Constructor."""
33
34 # The header tracks general key/value pairs that don't apply to any
35 # specific package. E.g., it tracks the base URL of the packages.
36 self.header = {}
37
38 # A list of packages (stored as a list of dictionaries).
39 self.packages = []
40
41 # Whether or not the PackageIndex has been modified since the last time it
42 # was written.
43 self.modified = False
44
45 def _PopulateDuplicateDB(self, db):
46 """Populate db with SHA1 -> URL mapping for packages.
47
48 Args:
49 db: Dictionary to populate with SHA1 -> URL mapping for packages.
50 """
51
52 uri = self.header['URI']
53 for pkg in self.packages:
54 cpv, sha1 = pkg['CPV'], pkg['SHA1']
55 path = pkg.get('PATH', cpv + '.tbz2')
56 db[sha1] = urlparse.urljoin(uri, path)
57
58 def _ReadPkgIndex(self, pkgfile):
59 """Read a list of key/value pairs from the Packages file into a dictionary.
60
61 Both header entries and package entries are lists of key/value pairs, so
62 they can both be read by this function. Entries can be terminated by empty
63 lines or by the end of the file.
64
65 This function will read lines from the specified file until it encounters
66 the a blank line or the end of the file.
67
68 Keys and values in the Packages file are separated by a colon and a space.
69 Keys may contain capital letters, numbers, and underscores, but may not
70 contain colons. Values may contain any character except a newline. In
71 particular, it is normal for values to contain colons.
72
73 Lines that have content, and do not contain a valid key/value pair, are
74 ignored. This is for compatibility with the Portage package parser, and
75 to allow for future extensions to the Packages file format.
76
77 All entries must contain at least one key/value pair. If the end of the
78 fils is reached, an empty dictionary is returned.
79
80 Args:
81 pkgfile: A python file object.
82
83 Returns the dictionary of key-value pairs that was read from the file.
84 """
85 d = {}
86 for line in pkgfile:
87 line = line.rstrip('\n')
88 if not line:
89 assert d, 'Packages entry must contain at least one key/value pair'
90 break
91 line = line.split(': ', 1)
92 if len(line) == 2:
93 k, v = line
94 d[k] = v
95 return d
96
97 def _WritePkgIndex(self, pkgfile, entry):
98 """Write header entry or package entry to packages file.
99
100 The keys and values will be separated by a colon and a space. The entry
101 will be terminated by a blank line.
102
103 Args:
104 pkgfile: A python file object.
105 entry: A dictionary of the key/value pairs to write.
106 """
107 lines = ['%s: %s' % (k, v) for k, v in sorted(entry.items()) if v]
108 pkgfile.write('%s\n\n' % '\n'.join(lines))
109
110 def _ReadHeader(self, pkgfile):
111 """Read header of packages file.
112
113 Args:
114 pkgfile: A python file object.
115 """
116 assert not self.header, 'Should only read header once.'
117 self.header = self._ReadPkgIndex(pkgfile)
118
119 def _ReadBody(self, pkgfile):
120 """Read body of packages file.
121
122 Before calling this function, you must first read the header (using
123 _ReadHeader).
124
125 Args:
126 pkgfile: A python file object.
127 """
128 assert self.header, 'Should read header first.'
129 assert not self.packages, 'Should only read body once.'
130
131 # Read all of the sections in the body by looping until we reach the end
132 # of the file.
133 while True:
134 d = self._ReadPkgIndex(pkgfile)
135 if not d:
136 break
137 if 'CPV' in d:
138 self.packages.append(d)
139
140 def Read(self, pkgfile):
141 """Read the entire packages file.
142
143 Args:
144 pkgfile: A python file object.
145 """
146 self._ReadHeader(pkgfile)
147 self._ReadBody(pkgfile)
148
149 def RemoveFilteredPackages(self, filter_fn):
150 """Remove packages which match filter_fn.
151
152 Args:
153 filter_fn: A function which operates on packages. If it returns True,
154 the package should be removed.
155 """
156
157 filtered = [p for p in self.packages if not filter_fn(p)]
158 if filtered != self.packages:
159 self.modified = True
160 self.packages = filtered
161
162 def ResolveDuplicateUploads(self, pkgindexes):
163 """Point packages at files that have already been uploaded.
164
165 For each package in our index, check if there is an existing package that
166 has already been uploaded to the same base URI. If so, point that package
167 at the existing file, so that we don't have to upload the file.
168
169 Args:
170 pkgindexes: A list of PackageIndex objects containing info about packages
171 that have already been uploaded.
172
173 Returns:
174 A list of the packages that still need to be uploaded.
175 """
176 db = {}
177 for pkgindex in pkgindexes:
178 pkgindex._PopulateDuplicateDB(db)
179
180 uploads = []
181 base_uri = self.header['URI']
182 for pkg in self.packages:
183 sha1 = pkg['SHA1']
184 uri = db.get(sha1)
185 if uri and uri.startswith(base_uri):
186 pkg['PATH'] = uri[len(base_uri):].lstrip('/')
187 else:
188 uploads.append(pkg)
189 return uploads
190
191 def SetUploadLocation(self, base_uri, path_prefix):
192 """Set upload location to base_uri + path_prefix.
193
194 Args:
195 base_uri: Base URI for all packages in the file. We set
196 self.header['URI'] to this value, so all packages must live under
197 this directory.
198 path_prefix: Path prefix to use for all current packages in the file.
199 This will be added to the beginning of the path for every package.
200 """
201 self.header['URI'] = base_uri
202 for pkg in self.packages:
203 pkg['PATH'] = urlparse.urljoin(path_prefix, pkg['CPV'] + '.tbz2')
204
205 def Write(self, pkgfile):
206 """Write a packages file to disk.
207
208 If 'modified' flag is set, the TIMESTAMP and PACKAGES fields in the header
209 will be updated before writing to disk.
210
211 Args:
212 pkgfile: A python file object.
213 """
214 if self.modified:
215 self.header['TIMESTAMP'] = str(long(time.time()))
216 self.header['PACKAGES'] = str(len(self.packages))
217 self.modified = False
218 self._WritePkgIndex(pkgfile, self.header)
219 for metadata in sorted(self.packages, key=operator.itemgetter('CPV')):
220 self._WritePkgIndex(pkgfile, metadata)
221
222 def WriteToNamedTemporaryFile(self):
223 """Write pkgindex to a temporary file.
224
225 Args:
226 pkgindex: The PackageIndex object.
227
228 Returns:
229 A temporary file containing the packages from pkgindex.
230 """
231 f = tempfile.NamedTemporaryFile()
232 self.Write(f)
233 f.flush()
234 f.seek(0)
235 return f
236
237
238 def _RetryUrlOpen(url, tries=3):
239 """Open the specified url, retrying if we run into temporary errors.
240
241 We retry for both network errors and 5xx Server Errors. We do not retry
242 for HTTP errors with a non-5xx code.
243
244 Args:
245 url: The specified url.
246 tries: The number of times to try.
247
248 Returns:
249 The result of urllib2.urlopen(url).
250 """
251 for i in range(tries):
252 try:
253 return urllib2.urlopen(url)
254 except urllib2.HTTPError as e:
255 if i + 1 >= tries or e.code < 500:
256 raise
257 else:
258 print 'Cannot GET %s: %s' % (url, str(e))
259 except urllib2.URLError as e:
260 if i + 1 >= tries:
261 raise
262 else:
263 print 'Cannot GET %s: %s' % (url, str(e))
264 print 'Sleeping for 10 seconds before retrying...'
265 time.sleep(10)
266
267
268 def GrabRemotePackageIndex(binhost_url):
269 """Grab the latest binary package database from the specified URL.
270
271 Args:
272 binhost_url: Base URL of remote packages (PORTAGE_BINHOST).
273
274 Returns:
275 A PackageIndex object, if the Packages file can be retrieved. If the
276 server returns status code 404, None is returned.
277 """
278
279 url = urlparse.urljoin(binhost_url, 'Packages')
280 try:
281 f = _RetryUrlOpen(url)
282 except urllib2.HTTPError as e:
283 if e.code == 404:
284 return None
285 raise
286
287 pkgindex = PackageIndex()
288 pkgindex.Read(f)
289 pkgindex.header.setdefault('URI', binhost_url)
290 f.close()
291 return pkgindex
292
293
294 def GrabLocalPackageIndex(package_path):
295 """Read a local packages file from disk into a PackageIndex() object.
296
297 Args:
298 package_path: Directory containing Packages file.
299
300 Returns:
301 A PackageIndex object.
302 """
303 packages_file = file(os.path.join(package_path, 'Packages'))
304 pkgindex = PackageIndex()
305 pkgindex.Read(packages_file)
306 packages_file.close()
307 return pkgindex
OLDNEW
« no previous file with comments | « bin/cbuildbot_unittest.py ('k') | prebuilt.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698