Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(259)

Side by Side Diff: parallel_emerge

Issue 2827037: Add --fast to build_image (Closed) Base URL: ssh://gitrw.chromium.org/crosutils.git
Patch Set: .. Created 10 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « build_image ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/python2.6
sosa 2010/07/01 18:56:23 Please refactor this to cros_parallel_emerge and m
2 # Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Program to run emerge in parallel, for significant speedup.
7
8 Usage:
9 ./parallel_emerge --board=BOARD [emerge args] package
10
11 Basic operation:
12 Runs 'emerge -p --debug' to display dependencies, and stores a
13 dependency graph. All non-blocked packages are launched in parallel,
14 as 'emerge --nodeps package' with any blocked packages being emerged
15 immediately upon deps being met.
16
17 For this to work effectively, /usr/lib/portage/pym/portage/locks.py
18 must be stubbed out, preventing portage from slowing itself with
19 unneccesary locking, as this script ensures that emerge is run in such
20 a way that common resources are never in conflict. This is controlled
21 by an environment variable PORTAGE_LOCKS set in parallel emerge
22 subprocesses.
23
24 Parallel Emerge unlocks two things during operation, here's what you
25 must do to keep this safe:
26 * Storage dir containing binary packages. - Don't emerge new
27 packages while installing the existing ones.
28 * Portage database - You must not examine deps while modifying the
29 database. Therefore you may only parallelize "-p" read only access,
30 or "--nodeps" write only access.
31 Caveats:
32 * Some ebuild packages have incorrectly specified deps, and running
33 them in parallel is more likely to bring out these failures.
34 * Portage "world" is a record of explicitly installed packages. In
35 this parallel scheme, explicitly installed packages are installed
36 twice, once for the real install, and once for world file addition.
37 * Some ebuilds (especially the build part) have complex dependencies
38 that are not captured well by this script (it may be necessary to
39 install an old package to build, but then install a newer version
40 of the same package for a runtime dep). This script is only
41 currently stable for binpkg installs.
42 """
43
44 import os
sosa 2010/07/01 18:56:23 These are not in alphabetical order.
45 import re
46 import shlex
47 import subprocess
48 import sys
49 import tempfile
50 import time
51
52
53 def Usage():
54 print "Usage:"
55 print " ./parallel_emerge --board=BOARD [emerge args] package"
56 sys.exit(1)
57
58
59 # These are dependencies that are not specified in the package,
60 # but will prevent the package from installing.
61 secret_deps = {}
62
63 # Globals: package we are building, board we are targeting,
64 # emerge args we are passing through.
65 PACKAGE = None
66 EMERGE_ARGS = ""
67 BOARD = None
68
69 # Runtime flags. TODO(): maybe make these commandline options or
70 # environment veriables.
sosa 2010/07/01 18:56:23 variables*
71 VERBOSE = False
72 AUTOCLEAN = False
73
74
75 def ParseArgs(argv):
76 """Set global vars based on command line.
77
78 We need to be compatible with emerge arg format.
79 We scrape --board-XXX, and distinguish between args
80 and package names.
81 TODO(): robustify argument processing, as it's possible to
82 pass in many two argument parameters that are difficult
83 to programmaitcally identify, although we don't currently
84 use any besides --bdeps <y|n>.
85 Args:
86 argv: arguments list
87 Returns:
88 triplet of (package list, emerge argumens, board string)
89 """
90 if VERBOSE:
91 print argv
92 board_arg = None
93 package_args = []
94 emerge_passthru_args = ""
95 re_board = re.compile(r"--board=(?P<board>.*)")
96 for arg in argv[1:]:
97 # Check if the arg begins with '-'
98 if arg[0] == "-" or arg == "y" or arg == "n":
99 # Specifically match "--board="
100 m = re_board.match(arg)
101 if m:
102 board_arg = m.group("board")
103 else:
104 # Pass through to emerge.
105 emerge_passthru_args = emerge_passthru_args + " " + arg
106 else:
107 # Only non-dashed arg should be the target package.
108 package_args.append(arg)
109
110 if not package_args:
111 Usage()
112 sys.exit(1)
113
114 # Set globals.
115 return " ".join(package_args), emerge_passthru_args, board_arg
116
117
118 def EmergeCommand():
119 """Helper function to return the base emerge commandline.
120
121 This is configured for board type, and including pass thru args,
122 using global variables. TODO(): unglobalfy.
123 Returns:
124 string containing emerge command.
125 """
126 emerge = "emerge"
127 if BOARD:
128 emerge += "-" + BOARD
129 return emerge + " " + EMERGE_ARGS
130
131
132 def GetDepsFromPortage(package):
133 """Get dependency tree info by running emerge.
134
135 Run 'emerge -p --debug package', and get a text output of all deps.
136 TODO(): Put dep caclation in a library, as cros_extract_deps
137 also uses this code.
138 Args:
139 package: string containing the packages to build.
140 Returns:
141 text output of emege -p --debug, which can be processed elsewhere.
142 """
143 print "Calculating deps for package %s" % package
144 cmdline = EmergeCommand() + " -p --debug " + package
145 print "+ %s" % cmdline
146
147 # Store output in a temp file as it is too big for a unix pipe.
148 stderr_buffer = tempfile.TemporaryFile()
149 stdout_buffer = tempfile.TemporaryFile()
150 # Launch the subprocess.
151 depsproc = subprocess.Popen(shlex.split(cmdline), stderr=stderr_buffer,
152 stdout=stdout_buffer, bufsize=64*1024)
153
154 # Wait for this to complete.
155 seconds = 0
156 while depsproc.poll() is not None:
157 seconds += 1
158 time.sleep(1)
159 if seconds % 5 == 0:
160 print ".",
161 print " done"
162
163 print "Deps calculated in %d:%02ds" % (seconds / 60, seconds % 60)
164
165 depsproc.wait()
166 stderr_buffer.seek(0)
167 stderr_raw = stderr_buffer.read()
168 info_start = stderr_raw.find("digraph")
169 if info_start != -1:
170 stdout = stderr_raw[info_start:]
171 else:
172 stdout_buffer.seek(0)
173 stdout_raw = stdout_buffer.read()
174 stdout = stderr_raw + stdout_raw
175 if VERBOSE or depsproc.returncode != 0:
176 print stdout
177 if depsproc.returncode != 0:
178 print "Failed to generate deps"
179 sys.exit(1)
180
181 lines = stdout.split("\n")
182 return lines
183
184
185 def DepsToTree(lines):
186 """Regex the emerge --tree output to generate a nested dict of dependencies.
187
188 Args:
189 lines: text dump from 'emerge -p --tree package'
190 Returns:
191 dep_tree: nested dict of dependencies, as specified by emerge.
192 there may be dupes, or circular deps.
193
194 We need to regex lines as follows:
195 hard-host-depends depends on
196 ('ebuild', '/', 'dev-lang/swig-1.3.36', 'merge') depends on
197 ('ebuild', '/', 'dev-lang/perl-5.8.8-r8', 'merge') (buildtime)
198 ('binary', '/.../rootfs/', 'sys-auth/policykit-0.9-r1', 'merge') depends on
199 ('binary', '/.../rootfs/', 'x11-misc/xbitmaps-1.1.0', 'merge') (no children)
200 """
201
202 re_deps = re.compile(r"(?P<indent>\W*)\(\'(?P<pkgtype>\w+)\', "
203 r"\'(?P<destination>[\w/\.-]+)\',"
204 r" \'(?P<pkgdir>[\w\+-]+)/(?P<pkgname>[\w\+-]+)-"
205 r"(?P<version>\d+[\w\.-]*)\', \'(?P<action>\w+)\'\) "
206 r"(?P<deptype>(depends on|\(.*\)))")
207 re_origdeps = re.compile(r"(?P<pkgname>[\w\+/-]+) depends on")
208 re_failed = re.compile(r".*depends on.*")
209
210 deps_tree = {}
211 deps_stack = []
212 for line in lines:
213 m = re_deps.match(line)
214 m_orig = re_origdeps.match(line)
215 if m:
216 pkgname = m.group("pkgname")
217 pkgdir = m.group("pkgdir")
218 pkgtype = m.group("pkgtype")
219 indent = m.group("indent")
220 doins = m.group("action")
221 deptype = m.group("deptype")
222 depth = 1
223 if not indent:
224 depth = 0
225 version = m.group("version")
226
227 # If we are indented, we should have
228 # found a "depends on" previously.
229 if len(deps_stack) < depth:
230 print "FAIL: corrupt input at:"
231 print line
232 print "No Parent."
233 sys.exit(1)
234
235 # Go step by step through stack and tree
236 # until we find our parent. Generate
237 updatedep = deps_tree
238 for i in range(0, depth):
239 updatedep = updatedep[deps_stack[i]]["deps"]
240
241 # Pretty print what we've captured.
242 indent = "|" + "".ljust(depth, "_")
243 fullpkg = "%s/%s-%s" % (pkgdir, pkgname, version)
244 if VERBOSE:
245 print ("" + indent + " " + pkgdir + "/" + pkgname + " - " +
246 version + " (" + pkgtype + ", " + doins +
247 ", " + deptype + ")")
248
249 # Add our new package into the tree, if it's not already there.
250 updatedep.setdefault(fullpkg, {})
251 # Add an empty deps for this new package.
252 updatedep[fullpkg].setdefault("deps", {})
253 # Add the action we should take (merge, nomerge).
254 updatedep[fullpkg].setdefault("action", doins)
255 # Add the type of dep.
256 updatedep[fullpkg].setdefault("deptype", deptype)
257
258 # Drop any stack entries below our depth.
259 deps_stack = deps_stack[0:depth]
260 # Add ourselves to the end of the stack.
261 deps_stack.append(fullpkg)
262 elif m_orig:
263 # Also capture "pseudo packages", which are the freeform test
264 # we requested to be installed. These are generic package names
265 # like "chromeos" rather than chromeos/chromeos-0.0.1
266 depth = 0
267 # Tag these with "original" in case they overlap with real packages.
268 pkgname = "original-%s" % m_orig.group("pkgname")
269 # Insert this into the deps tree so so we can stick it in "world"
270 updatedep = deps_tree
271 for i in range(0, depth):
272 updatedep = updatedep[deps_stack[i]]["deps"]
273 if VERBOSE:
274 print pkgname
275 # Add our new package into the tree, if it's not already there.
276 updatedep.setdefault(pkgname, {})
277 updatedep[pkgname].setdefault("deps", {})
278 # Add the type of dep.
279 updatedep[pkgname].setdefault("action", "world")
280 updatedep[pkgname].setdefault("deptype", "normal")
281
282 # Drop any obsolete stack entries.
283 deps_stack = deps_stack[0:depth]
284 # Add ourselves to the end of the stack.
285 deps_stack.append(pkgname)
286 else:
287 # Is this a package that failed to match uor huge regex?
288 m = re_failed.match(line)
289 if m:
290 print "FAIL: Couldn't understand line:"
291 print line
292 sys.exit(1)
293
294 return deps_tree
295
296
297 def PrintTree(deps, depth=""):
298 """Print the deps we have seen in the emerge output.
299
300 Args:
301 deps: dependency tree structure.
302 depth: allows printing the tree recursively, with indentation.
303 """
304 for entry in deps:
305 action = deps[entry]["action"]
306 print "%s %s (%s)" % (depth, entry, action)
307 PrintTree(deps[entry]["deps"], depth=depth + " ")
308
309
310 def GenDependencyGraph(deps_tree):
311 """Generate a doubly linked dependency graph.
312
313 Args:
314 deps_tree: dependency tree structure.
315 Returns:
316 Deps graph in the form of a dict of packages, with each package
317 specifying a "needs" list and "provides" list.
318 """
319 deps_map = {}
320
321 def ReverseTree(packages):
322 """Convert tree to digraph.
323
324 Take the tree of package -> requirements and reverse it to a digraph of
325 buildable packages -> packages they unblock
326 Args:
327 packages: tree(s) of dependencies
328 Returns:
329 unsanitized digraph
330 """
331 for pkg in packages:
332 action = packages[pkg]["action"]
333 this_pkg = deps_map.setdefault(
334 pkg, {"needs": {}, "provides": set(), "action": "nomerge"})
335 if action != "nomerge":
336 this_pkg["action"] = action
337 ReverseTree(packages[pkg]["deps"])
338 for dep, dep_item in packages[pkg]["deps"].items():
339 dep_pkg = deps_map[dep]
340 dep_type = dep_item["deptype"]
341 if dep_type == "(runtime_post)":
342 dep_pkg["needs"][pkg] = dep_type
343 this_pkg["provides"].add(dep)
344 else:
345 dep_pkg["provides"].add(pkg)
346 this_pkg["needs"][dep] = dep_type
347
348 def SanitizeDep(basedep, currdep, oldstack, limit):
349 """Remove any circular dependencies between basedep, currdep, then recurse.
350
351 Args:
352 basedep: original dependency, top of stack.
353 currdep: bottom of our current recursion, bottom of stack.
354 oldstack: current dependency chain.
355 limit: how many more levels of recusion to go through, max.
356 TODO(): Break PDEPEND preferentially, then RDEPEND. Also extract emerge
357 linear ordering and break cycles on default emerge linear order.
358 """
359 if limit == 0:
360 return
361 for dep in deps_map[currdep]["needs"]:
362 stack = oldstack + [dep]
363 if basedep in deps_map[dep]["needs"]:
364 print "Remove cyclic dependency from:"
365 for i in xrange(0, len(stack) - 1):
366 print " %s (%s)-> %s " % (
367 stack[i], deps_map[stack[i]]["needs"][stack[i+1]], stack[i+1])
368 del deps_map[dep]["needs"][basedep]
369 deps_map[basedep]["provides"].remove(dep)
370 SanitizeDep(basedep, dep, stack, limit - 1)
371
372 def SanitizeTree():
373 """Remove circular dependencies up to cycle length 8."""
374 for dep in deps_map:
375 SanitizeDep(dep, dep, [dep], 8)
376
377 def AddSecretDeps():
378 """Find these tagged packages and add extra dependencies.
379
380 For debugging dependency problems.
381 """
382 for bad in secret_deps:
383 needed = secret_deps[bad]
384 bad_pkg = None
385 needed_pkg = None
386 for dep in deps_map:
387 if dep.find(bad) != -1:
388 bad_pkg = dep
389 if dep.find(needed) != -1:
390 needed_pkg = dep
391 if bad_pkg and needed_pkg:
392 deps_map[needed_pkg]["provides"].add(bad_pkg)
393 deps_map[bad_pkg]["needs"][needed_pkg] = "(manually forced)"
394
395 ReverseTree(deps_tree)
396 AddSecretDeps()
397 SanitizeTree()
398 return deps_map
399
400
401 def PrintDepsMap(deps_map):
402 """Print dependency graph, for each package list it's prerequisites."""
403 for i in deps_map:
404 print "%s: (%s) needs" % (i, deps_map[i]["action"])
405 for j, dep_type in deps_map[i]["needs"].items():
406 print " %s ( %s )" % (j, dep_type)
407
408
409 class EmergeQueue(object):
410 """Class to schedule emerge jobs according to a dependency graph."""
411
412 def __init__(self, deps_map):
413 # Store the dependency graph.
414 self._deps_map = deps_map
415 # Initialize the runnable queue to empty.
416 self._jobs = []
417 # List of total package installs represented in deps_map.
418 install_jobs = [x for x in deps_map if deps_map[x]["action"] == "merge"]
419 self._total_jobs = len(install_jobs)
420
421 # Initialize the ready queue, these are jobs with no unmet dependencies.
422 self._emerge_queue = [x for x in deps_map if not deps_map[x]["needs"]]
423 # Initialize the failed queue to empty.
424 self._retry_queue = []
425 self._failed = {}
426
427 def _Status(self):
428 """Print status."""
429 print "Pending %s, Ready %s, Running %s, Failed %s, Total %s" % (
430 len(self._deps_map), len(self._emerge_queue),
431 len(self._jobs), len(self._failed), self._total_jobs)
432
433 def _LaunchOneEmerge(self, target):
434 """Run emerge --nodeps to do a single package install.
435
436 If this is a pseudopackage, that means we're done, and can select in in the
437 world file.
438 Args:
439 target: the full package name of the package to install.
440 eg. "sys-apps/portage-2.17"
441 Returns:
442 triplet containing (target name, subprocess object, output buffer object)
443 """
444 if target.startswith("original-"):
445 # "original-" signifies one of the packages we originally requested.
446 # Since we have explicitly installed the versioned package as a dep of
447 # this, we only need to tag in "world" that we are done with this
448 # install request. "--select -n" indicates an addition to "world"
449 # without an actual install.
450 newtarget = target.replace("original-", "")
451 cmdline = EmergeCommand() + " --nodeps --select --noreplace " + newtarget
452 else:
453 # This package is a dependency of something we specifically
454 # requested. Therefore we should install it but not allow it
455 # in the "world" file, which represents explicit intalls.
456 # "--oneshot" here will prevent it from being tagged in world.
457 cmdline = EmergeCommand() + " --nodeps --oneshot =" + target
458 if VERBOSE:
459 print "running %s" % cmdline
460
461 # Store output in a temp file as it is too big for a unix pipe.
462 stdout_buffer = tempfile.TemporaryFile()
463 # Modify the environment to disable locking.
464 portage_env = os.environ.copy()
465 portage_env["PORTAGE_LOCKS"] = "false"
466 # Autoclean rummages around in the portage database and uninstalls
467 # old packages. Definitely not necessary for build_image. However
468 # it may be necessary for incremental build_packages. It may also
469 # not be parallel safe.
470 if not AUTOCLEAN:
471 portage_env["AUTOCLEAN"] = "no"
472 # Launch the subprocess.
473 emerge_proc = subprocess.Popen(
474 shlex.split(cmdline), stdout=stdout_buffer,
475 stderr=subprocess.STDOUT, bufsize=64*1024, env=portage_env)
476
477 return (target, emerge_proc, stdout_buffer)
478
479 def _Finish(self, target):
480 """Mark a target as completed and unblock dependecies."""
sosa 2010/07/01 18:56:23 misspelling of dependencies
481 for dep in self._deps_map[target]["provides"]:
482 del self._deps_map[dep]["needs"][target]
483 if not self._deps_map[dep]["needs"]:
484 if VERBOSE:
485 print "Unblocking %s" % dep
486 self._emerge_queue.append(dep)
487 self._deps_map.pop(target)
488
489 def _Retry(self):
490 if self._retry_queue:
491 target = self._retry_queue.pop(0)
492 self._emerge_queue.append(target)
493 print "Retrying emerge of %s." % target
494
495 def Run(self):
496 """Run through the scheduled ebuilds.
497
498 Keep running so long as we have uninstalled packages in the
499 dependency graph to merge.
500 """
501 while self._deps_map:
502 # If we have packages that are ready, kick them off.
503 if self._emerge_queue:
504 target = self._emerge_queue.pop(0)
505 action = self._deps_map[target]["action"]
506 # We maintain a tree of all deps, if this doesn't need
507 # to be installed just free up it's children and continue.
508 # It is possible to reinstall deps of deps, without reinstalling
509 # first level deps, like so:
510 # chromeos (merge) -> eselect (nomerge) -> python (merge)
511 if action == "nomerge":
512 self._Finish(target)
513 else:
514 # Kick off the build if it's marked to be built.
515 print "Emerging %s (%s)" % (target, action)
516 job = self._LaunchOneEmerge(target)
517 # Append it to the active jobs list.
518 self._jobs.append(job)
519 continue
520 # Wait a bit to see if maybe some jobs finish. You can't
521 # wait on a set of jobs in python, so we'll just poll.
522 time.sleep(1)
523
524 # Check here that we are actually waiting for something.
525 if (not self._emerge_queue and
526 not self._jobs and
527 self._deps_map):
528 # If we have failed on a package retry it now.
529 if self._retry_queue:
530 self._Retry()
531 # If we have failed a package twice, just give up.
532 elif self._failed:
533 for failure, output in self._failed.items():
534 print "Package failed: %s" % failure
535 print output
536 PrintDepsMap(self._deps_map)
537 print "Packages failed: %s" % " ,".join(self._failed.keys())
538 sys.exit(1)
539 # If we have dependency cycles.
540 else:
541 print "Deadlock! Circular dependencies!"
542 PrintDepsMap(self._deps_map)
543 sys.exit(1)
544
545 # Check every running job to see if we've finished any jobs.
546 for target, job, stdout in self._jobs:
547 # Is it done?
548 if job.poll() is not None:
549 # Clean up the subprocess.
550 job.wait()
551 # Get the output if we want to print it.
552 stdout.seek(0)
553 output = stdout.read()
554
555 # Remove from active jobs list, we are done with this process.
556 self._jobs.remove((target, job, stdout))
557
558 # Print if necessary.
559 if VERBOSE:
560 print output
561 if job.returncode != 0:
562 # Handle job failure.
563 if target in self._failed:
564 # If this job has failed previously, give up.
565 print "Failed %s. Your build has failed." % target
566 else:
567 # Queue up this build to try again after a long while.
568 self._retry_queue.append(target)
569 self._failed[target] = output
570 print "Failed %s, retrying later." % target
571 else:
572 if target in self._failed and self._retry_queue:
573 # If we have successfully retried a failed package, and there
574 # are more failed packages, try the next one. We will only have
575 # one retrying package actively running at a time.
576 self._Retry()
577
578 print "Completed %s" % target
579 # Mark as completed and unblock waiting ebuilds.
580 self._Finish(target)
581
582 # Print an update.
583 self._Status()
584
585
586 # Main control code.
sosa 2010/07/01 18:56:23 Re-write to a main function
587 print "Starting fast-emerge."
588 PACKAGE, EMERGE_ARGS, BOARD = ParseArgs(sys.argv)
sosa 2010/07/01 18:56:23 I do not think it's good style or necessary to mak
589 print " Building package %s on %s (%s)" % (PACKAGE, EMERGE_ARGS, BOARD)
590
591 print "Running emerge to generate deps"
592 deps_output = GetDepsFromPortage(PACKAGE)
593 print "Processing emerge output"
594 dependency_tree = DepsToTree(deps_output)
595 if VERBOSE:
596 print "Print tree"
597 PrintTree(dependency_tree)
598
599 print "Generate dependency graph."
600 dependency_graph = GenDependencyGraph(dependency_tree)
601
602 if VERBOSE:
603 PrintDepsMap(dependency_graph)
604
605 # Run the queued emerges.
606 scheduler = EmergeQueue(dependency_graph)
607 scheduler.Run()
608
609 print "Done"
610
sosa 2010/07/01 18:56:23 Use standard if name == '__main__' construct
OLDNEW
« no previous file with comments | « build_image ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698