Index: parallel_emerge |
diff --git a/parallel_emerge b/parallel_emerge |
new file mode 100755 |
index 0000000000000000000000000000000000000000..72fcc27c6d7d5a1d11ea6f2cb83c33c4e7315acd |
--- /dev/null |
+++ b/parallel_emerge |
@@ -0,0 +1,610 @@ |
+#!/usr/bin/python2.6 |
sosa
2010/07/01 18:56:23
Please refactor this to cros_parallel_emerge and m
|
+# Copyright (c) 2010 The Chromium OS Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+"""Program to run emerge in parallel, for significant speedup. |
+ |
+Usage: |
+ ./parallel_emerge --board=BOARD [emerge args] package |
+ |
+Basic operation: |
+ Runs 'emerge -p --debug' to display dependencies, and stores a |
+ dependency graph. All non-blocked packages are launched in parallel, |
+ as 'emerge --nodeps package' with any blocked packages being emerged |
+ immediately upon deps being met. |
+ |
+ For this to work effectively, /usr/lib/portage/pym/portage/locks.py |
+ must be stubbed out, preventing portage from slowing itself with |
+ unneccesary locking, as this script ensures that emerge is run in such |
+ a way that common resources are never in conflict. This is controlled |
+ by an environment variable PORTAGE_LOCKS set in parallel emerge |
+ subprocesses. |
+ |
+ Parallel Emerge unlocks two things during operation, here's what you |
+ must do to keep this safe: |
+ * Storage dir containing binary packages. - Don't emerge new |
+ packages while installing the existing ones. |
+ * Portage database - You must not examine deps while modifying the |
+ database. Therefore you may only parallelize "-p" read only access, |
+ or "--nodeps" write only access. |
+ Caveats: |
+ * Some ebuild packages have incorrectly specified deps, and running |
+ them in parallel is more likely to bring out these failures. |
+ * Portage "world" is a record of explicitly installed packages. In |
+ this parallel scheme, explicitly installed packages are installed |
+ twice, once for the real install, and once for world file addition. |
+ * Some ebuilds (especially the build part) have complex dependencies |
+ that are not captured well by this script (it may be necessary to |
+ install an old package to build, but then install a newer version |
+ of the same package for a runtime dep). This script is only |
+ currently stable for binpkg installs. |
+""" |
+ |
+import os |
sosa
2010/07/01 18:56:23
These are not in alphabetical order.
|
+import re |
+import shlex |
+import subprocess |
+import sys |
+import tempfile |
+import time |
+ |
+ |
+def Usage(): |
+ print "Usage:" |
+ print " ./parallel_emerge --board=BOARD [emerge args] package" |
+ sys.exit(1) |
+ |
+ |
+# These are dependencies that are not specified in the package, |
+# but will prevent the package from installing. |
+secret_deps = {} |
+ |
+# Globals: package we are building, board we are targeting, |
+# emerge args we are passing through. |
+PACKAGE = None |
+EMERGE_ARGS = "" |
+BOARD = None |
+ |
+# Runtime flags. TODO(): maybe make these commandline options or |
+# environment veriables. |
sosa
2010/07/01 18:56:23
variables*
|
+VERBOSE = False |
+AUTOCLEAN = False |
+ |
+ |
+def ParseArgs(argv): |
+ """Set global vars based on command line. |
+ |
+ We need to be compatible with emerge arg format. |
+ We scrape --board-XXX, and distinguish between args |
+ and package names. |
+ TODO(): robustify argument processing, as it's possible to |
+ pass in many two argument parameters that are difficult |
+ to programmaitcally identify, although we don't currently |
+ use any besides --bdeps <y|n>. |
+ Args: |
+ argv: arguments list |
+ Returns: |
+ triplet of (package list, emerge argumens, board string) |
+ """ |
+ if VERBOSE: |
+ print argv |
+ board_arg = None |
+ package_args = [] |
+ emerge_passthru_args = "" |
+ re_board = re.compile(r"--board=(?P<board>.*)") |
+ for arg in argv[1:]: |
+ # Check if the arg begins with '-' |
+ if arg[0] == "-" or arg == "y" or arg == "n": |
+ # Specifically match "--board=" |
+ m = re_board.match(arg) |
+ if m: |
+ board_arg = m.group("board") |
+ else: |
+ # Pass through to emerge. |
+ emerge_passthru_args = emerge_passthru_args + " " + arg |
+ else: |
+ # Only non-dashed arg should be the target package. |
+ package_args.append(arg) |
+ |
+ if not package_args: |
+ Usage() |
+ sys.exit(1) |
+ |
+ # Set globals. |
+ return " ".join(package_args), emerge_passthru_args, board_arg |
+ |
+ |
+def EmergeCommand(): |
+ """Helper function to return the base emerge commandline. |
+ |
+ This is configured for board type, and including pass thru args, |
+ using global variables. TODO(): unglobalfy. |
+ Returns: |
+ string containing emerge command. |
+ """ |
+ emerge = "emerge" |
+ if BOARD: |
+ emerge += "-" + BOARD |
+ return emerge + " " + EMERGE_ARGS |
+ |
+ |
+def GetDepsFromPortage(package): |
+ """Get dependency tree info by running emerge. |
+ |
+ Run 'emerge -p --debug package', and get a text output of all deps. |
+ TODO(): Put dep caclation in a library, as cros_extract_deps |
+ also uses this code. |
+ Args: |
+ package: string containing the packages to build. |
+ Returns: |
+ text output of emege -p --debug, which can be processed elsewhere. |
+ """ |
+ print "Calculating deps for package %s" % package |
+ cmdline = EmergeCommand() + " -p --debug " + package |
+ print "+ %s" % cmdline |
+ |
+ # Store output in a temp file as it is too big for a unix pipe. |
+ stderr_buffer = tempfile.TemporaryFile() |
+ stdout_buffer = tempfile.TemporaryFile() |
+ # Launch the subprocess. |
+ depsproc = subprocess.Popen(shlex.split(cmdline), stderr=stderr_buffer, |
+ stdout=stdout_buffer, bufsize=64*1024) |
+ |
+ # Wait for this to complete. |
+ seconds = 0 |
+ while depsproc.poll() is not None: |
+ seconds += 1 |
+ time.sleep(1) |
+ if seconds % 5 == 0: |
+ print ".", |
+ print " done" |
+ |
+ print "Deps calculated in %d:%02ds" % (seconds / 60, seconds % 60) |
+ |
+ depsproc.wait() |
+ stderr_buffer.seek(0) |
+ stderr_raw = stderr_buffer.read() |
+ info_start = stderr_raw.find("digraph") |
+ if info_start != -1: |
+ stdout = stderr_raw[info_start:] |
+ else: |
+ stdout_buffer.seek(0) |
+ stdout_raw = stdout_buffer.read() |
+ stdout = stderr_raw + stdout_raw |
+ if VERBOSE or depsproc.returncode != 0: |
+ print stdout |
+ if depsproc.returncode != 0: |
+ print "Failed to generate deps" |
+ sys.exit(1) |
+ |
+ lines = stdout.split("\n") |
+ return lines |
+ |
+ |
+def DepsToTree(lines): |
+ """Regex the emerge --tree output to generate a nested dict of dependencies. |
+ |
+ Args: |
+ lines: text dump from 'emerge -p --tree package' |
+ Returns: |
+ dep_tree: nested dict of dependencies, as specified by emerge. |
+ there may be dupes, or circular deps. |
+ |
+ We need to regex lines as follows: |
+ hard-host-depends depends on |
+ ('ebuild', '/', 'dev-lang/swig-1.3.36', 'merge') depends on |
+ ('ebuild', '/', 'dev-lang/perl-5.8.8-r8', 'merge') (buildtime) |
+ ('binary', '/.../rootfs/', 'sys-auth/policykit-0.9-r1', 'merge') depends on |
+ ('binary', '/.../rootfs/', 'x11-misc/xbitmaps-1.1.0', 'merge') (no children) |
+ """ |
+ |
+ re_deps = re.compile(r"(?P<indent>\W*)\(\'(?P<pkgtype>\w+)\', " |
+ r"\'(?P<destination>[\w/\.-]+)\'," |
+ r" \'(?P<pkgdir>[\w\+-]+)/(?P<pkgname>[\w\+-]+)-" |
+ r"(?P<version>\d+[\w\.-]*)\', \'(?P<action>\w+)\'\) " |
+ r"(?P<deptype>(depends on|\(.*\)))") |
+ re_origdeps = re.compile(r"(?P<pkgname>[\w\+/-]+) depends on") |
+ re_failed = re.compile(r".*depends on.*") |
+ |
+ deps_tree = {} |
+ deps_stack = [] |
+ for line in lines: |
+ m = re_deps.match(line) |
+ m_orig = re_origdeps.match(line) |
+ if m: |
+ pkgname = m.group("pkgname") |
+ pkgdir = m.group("pkgdir") |
+ pkgtype = m.group("pkgtype") |
+ indent = m.group("indent") |
+ doins = m.group("action") |
+ deptype = m.group("deptype") |
+ depth = 1 |
+ if not indent: |
+ depth = 0 |
+ version = m.group("version") |
+ |
+ # If we are indented, we should have |
+ # found a "depends on" previously. |
+ if len(deps_stack) < depth: |
+ print "FAIL: corrupt input at:" |
+ print line |
+ print "No Parent." |
+ sys.exit(1) |
+ |
+ # Go step by step through stack and tree |
+ # until we find our parent. Generate |
+ updatedep = deps_tree |
+ for i in range(0, depth): |
+ updatedep = updatedep[deps_stack[i]]["deps"] |
+ |
+ # Pretty print what we've captured. |
+ indent = "|" + "".ljust(depth, "_") |
+ fullpkg = "%s/%s-%s" % (pkgdir, pkgname, version) |
+ if VERBOSE: |
+ print ("" + indent + " " + pkgdir + "/" + pkgname + " - " + |
+ version + " (" + pkgtype + ", " + doins + |
+ ", " + deptype + ")") |
+ |
+ # Add our new package into the tree, if it's not already there. |
+ updatedep.setdefault(fullpkg, {}) |
+ # Add an empty deps for this new package. |
+ updatedep[fullpkg].setdefault("deps", {}) |
+ # Add the action we should take (merge, nomerge). |
+ updatedep[fullpkg].setdefault("action", doins) |
+ # Add the type of dep. |
+ updatedep[fullpkg].setdefault("deptype", deptype) |
+ |
+ # Drop any stack entries below our depth. |
+ deps_stack = deps_stack[0:depth] |
+ # Add ourselves to the end of the stack. |
+ deps_stack.append(fullpkg) |
+ elif m_orig: |
+ # Also capture "pseudo packages", which are the freeform test |
+ # we requested to be installed. These are generic package names |
+ # like "chromeos" rather than chromeos/chromeos-0.0.1 |
+ depth = 0 |
+ # Tag these with "original" in case they overlap with real packages. |
+ pkgname = "original-%s" % m_orig.group("pkgname") |
+ # Insert this into the deps tree so so we can stick it in "world" |
+ updatedep = deps_tree |
+ for i in range(0, depth): |
+ updatedep = updatedep[deps_stack[i]]["deps"] |
+ if VERBOSE: |
+ print pkgname |
+ # Add our new package into the tree, if it's not already there. |
+ updatedep.setdefault(pkgname, {}) |
+ updatedep[pkgname].setdefault("deps", {}) |
+ # Add the type of dep. |
+ updatedep[pkgname].setdefault("action", "world") |
+ updatedep[pkgname].setdefault("deptype", "normal") |
+ |
+ # Drop any obsolete stack entries. |
+ deps_stack = deps_stack[0:depth] |
+ # Add ourselves to the end of the stack. |
+ deps_stack.append(pkgname) |
+ else: |
+ # Is this a package that failed to match uor huge regex? |
+ m = re_failed.match(line) |
+ if m: |
+ print "FAIL: Couldn't understand line:" |
+ print line |
+ sys.exit(1) |
+ |
+ return deps_tree |
+ |
+ |
+def PrintTree(deps, depth=""): |
+ """Print the deps we have seen in the emerge output. |
+ |
+ Args: |
+ deps: dependency tree structure. |
+ depth: allows printing the tree recursively, with indentation. |
+ """ |
+ for entry in deps: |
+ action = deps[entry]["action"] |
+ print "%s %s (%s)" % (depth, entry, action) |
+ PrintTree(deps[entry]["deps"], depth=depth + " ") |
+ |
+ |
+def GenDependencyGraph(deps_tree): |
+ """Generate a doubly linked dependency graph. |
+ |
+ Args: |
+ deps_tree: dependency tree structure. |
+ Returns: |
+ Deps graph in the form of a dict of packages, with each package |
+ specifying a "needs" list and "provides" list. |
+ """ |
+ deps_map = {} |
+ |
+ def ReverseTree(packages): |
+ """Convert tree to digraph. |
+ |
+ Take the tree of package -> requirements and reverse it to a digraph of |
+ buildable packages -> packages they unblock |
+ Args: |
+ packages: tree(s) of dependencies |
+ Returns: |
+ unsanitized digraph |
+ """ |
+ for pkg in packages: |
+ action = packages[pkg]["action"] |
+ this_pkg = deps_map.setdefault( |
+ pkg, {"needs": {}, "provides": set(), "action": "nomerge"}) |
+ if action != "nomerge": |
+ this_pkg["action"] = action |
+ ReverseTree(packages[pkg]["deps"]) |
+ for dep, dep_item in packages[pkg]["deps"].items(): |
+ dep_pkg = deps_map[dep] |
+ dep_type = dep_item["deptype"] |
+ if dep_type == "(runtime_post)": |
+ dep_pkg["needs"][pkg] = dep_type |
+ this_pkg["provides"].add(dep) |
+ else: |
+ dep_pkg["provides"].add(pkg) |
+ this_pkg["needs"][dep] = dep_type |
+ |
+ def SanitizeDep(basedep, currdep, oldstack, limit): |
+ """Remove any circular dependencies between basedep, currdep, then recurse. |
+ |
+ Args: |
+ basedep: original dependency, top of stack. |
+ currdep: bottom of our current recursion, bottom of stack. |
+ oldstack: current dependency chain. |
+ limit: how many more levels of recusion to go through, max. |
+ TODO(): Break PDEPEND preferentially, then RDEPEND. Also extract emerge |
+ linear ordering and break cycles on default emerge linear order. |
+ """ |
+ if limit == 0: |
+ return |
+ for dep in deps_map[currdep]["needs"]: |
+ stack = oldstack + [dep] |
+ if basedep in deps_map[dep]["needs"]: |
+ print "Remove cyclic dependency from:" |
+ for i in xrange(0, len(stack) - 1): |
+ print " %s (%s)-> %s " % ( |
+ stack[i], deps_map[stack[i]]["needs"][stack[i+1]], stack[i+1]) |
+ del deps_map[dep]["needs"][basedep] |
+ deps_map[basedep]["provides"].remove(dep) |
+ SanitizeDep(basedep, dep, stack, limit - 1) |
+ |
+ def SanitizeTree(): |
+ """Remove circular dependencies up to cycle length 8.""" |
+ for dep in deps_map: |
+ SanitizeDep(dep, dep, [dep], 8) |
+ |
+ def AddSecretDeps(): |
+ """Find these tagged packages and add extra dependencies. |
+ |
+ For debugging dependency problems. |
+ """ |
+ for bad in secret_deps: |
+ needed = secret_deps[bad] |
+ bad_pkg = None |
+ needed_pkg = None |
+ for dep in deps_map: |
+ if dep.find(bad) != -1: |
+ bad_pkg = dep |
+ if dep.find(needed) != -1: |
+ needed_pkg = dep |
+ if bad_pkg and needed_pkg: |
+ deps_map[needed_pkg]["provides"].add(bad_pkg) |
+ deps_map[bad_pkg]["needs"][needed_pkg] = "(manually forced)" |
+ |
+ ReverseTree(deps_tree) |
+ AddSecretDeps() |
+ SanitizeTree() |
+ return deps_map |
+ |
+ |
+def PrintDepsMap(deps_map): |
+ """Print dependency graph, for each package list it's prerequisites.""" |
+ for i in deps_map: |
+ print "%s: (%s) needs" % (i, deps_map[i]["action"]) |
+ for j, dep_type in deps_map[i]["needs"].items(): |
+ print " %s ( %s )" % (j, dep_type) |
+ |
+ |
+class EmergeQueue(object): |
+ """Class to schedule emerge jobs according to a dependency graph.""" |
+ |
+ def __init__(self, deps_map): |
+ # Store the dependency graph. |
+ self._deps_map = deps_map |
+ # Initialize the runnable queue to empty. |
+ self._jobs = [] |
+ # List of total package installs represented in deps_map. |
+ install_jobs = [x for x in deps_map if deps_map[x]["action"] == "merge"] |
+ self._total_jobs = len(install_jobs) |
+ |
+ # Initialize the ready queue, these are jobs with no unmet dependencies. |
+ self._emerge_queue = [x for x in deps_map if not deps_map[x]["needs"]] |
+ # Initialize the failed queue to empty. |
+ self._retry_queue = [] |
+ self._failed = {} |
+ |
+ def _Status(self): |
+ """Print status.""" |
+ print "Pending %s, Ready %s, Running %s, Failed %s, Total %s" % ( |
+ len(self._deps_map), len(self._emerge_queue), |
+ len(self._jobs), len(self._failed), self._total_jobs) |
+ |
+ def _LaunchOneEmerge(self, target): |
+ """Run emerge --nodeps to do a single package install. |
+ |
+ If this is a pseudopackage, that means we're done, and can select in in the |
+ world file. |
+ Args: |
+ target: the full package name of the package to install. |
+ eg. "sys-apps/portage-2.17" |
+ Returns: |
+ triplet containing (target name, subprocess object, output buffer object) |
+ """ |
+ if target.startswith("original-"): |
+ # "original-" signifies one of the packages we originally requested. |
+ # Since we have explicitly installed the versioned package as a dep of |
+ # this, we only need to tag in "world" that we are done with this |
+ # install request. "--select -n" indicates an addition to "world" |
+ # without an actual install. |
+ newtarget = target.replace("original-", "") |
+ cmdline = EmergeCommand() + " --nodeps --select --noreplace " + newtarget |
+ else: |
+ # This package is a dependency of something we specifically |
+ # requested. Therefore we should install it but not allow it |
+ # in the "world" file, which represents explicit intalls. |
+ # "--oneshot" here will prevent it from being tagged in world. |
+ cmdline = EmergeCommand() + " --nodeps --oneshot =" + target |
+ if VERBOSE: |
+ print "running %s" % cmdline |
+ |
+ # Store output in a temp file as it is too big for a unix pipe. |
+ stdout_buffer = tempfile.TemporaryFile() |
+ # Modify the environment to disable locking. |
+ portage_env = os.environ.copy() |
+ portage_env["PORTAGE_LOCKS"] = "false" |
+ # Autoclean rummages around in the portage database and uninstalls |
+ # old packages. Definitely not necessary for build_image. However |
+ # it may be necessary for incremental build_packages. It may also |
+ # not be parallel safe. |
+ if not AUTOCLEAN: |
+ portage_env["AUTOCLEAN"] = "no" |
+ # Launch the subprocess. |
+ emerge_proc = subprocess.Popen( |
+ shlex.split(cmdline), stdout=stdout_buffer, |
+ stderr=subprocess.STDOUT, bufsize=64*1024, env=portage_env) |
+ |
+ return (target, emerge_proc, stdout_buffer) |
+ |
+ def _Finish(self, target): |
+ """Mark a target as completed and unblock dependecies.""" |
sosa
2010/07/01 18:56:23
misspelling of dependencies
|
+ for dep in self._deps_map[target]["provides"]: |
+ del self._deps_map[dep]["needs"][target] |
+ if not self._deps_map[dep]["needs"]: |
+ if VERBOSE: |
+ print "Unblocking %s" % dep |
+ self._emerge_queue.append(dep) |
+ self._deps_map.pop(target) |
+ |
+ def _Retry(self): |
+ if self._retry_queue: |
+ target = self._retry_queue.pop(0) |
+ self._emerge_queue.append(target) |
+ print "Retrying emerge of %s." % target |
+ |
+ def Run(self): |
+ """Run through the scheduled ebuilds. |
+ |
+ Keep running so long as we have uninstalled packages in the |
+ dependency graph to merge. |
+ """ |
+ while self._deps_map: |
+ # If we have packages that are ready, kick them off. |
+ if self._emerge_queue: |
+ target = self._emerge_queue.pop(0) |
+ action = self._deps_map[target]["action"] |
+ # We maintain a tree of all deps, if this doesn't need |
+ # to be installed just free up it's children and continue. |
+ # It is possible to reinstall deps of deps, without reinstalling |
+ # first level deps, like so: |
+ # chromeos (merge) -> eselect (nomerge) -> python (merge) |
+ if action == "nomerge": |
+ self._Finish(target) |
+ else: |
+ # Kick off the build if it's marked to be built. |
+ print "Emerging %s (%s)" % (target, action) |
+ job = self._LaunchOneEmerge(target) |
+ # Append it to the active jobs list. |
+ self._jobs.append(job) |
+ continue |
+ # Wait a bit to see if maybe some jobs finish. You can't |
+ # wait on a set of jobs in python, so we'll just poll. |
+ time.sleep(1) |
+ |
+ # Check here that we are actually waiting for something. |
+ if (not self._emerge_queue and |
+ not self._jobs and |
+ self._deps_map): |
+ # If we have failed on a package retry it now. |
+ if self._retry_queue: |
+ self._Retry() |
+ # If we have failed a package twice, just give up. |
+ elif self._failed: |
+ for failure, output in self._failed.items(): |
+ print "Package failed: %s" % failure |
+ print output |
+ PrintDepsMap(self._deps_map) |
+ print "Packages failed: %s" % " ,".join(self._failed.keys()) |
+ sys.exit(1) |
+ # If we have dependency cycles. |
+ else: |
+ print "Deadlock! Circular dependencies!" |
+ PrintDepsMap(self._deps_map) |
+ sys.exit(1) |
+ |
+ # Check every running job to see if we've finished any jobs. |
+ for target, job, stdout in self._jobs: |
+ # Is it done? |
+ if job.poll() is not None: |
+ # Clean up the subprocess. |
+ job.wait() |
+ # Get the output if we want to print it. |
+ stdout.seek(0) |
+ output = stdout.read() |
+ |
+ # Remove from active jobs list, we are done with this process. |
+ self._jobs.remove((target, job, stdout)) |
+ |
+ # Print if necessary. |
+ if VERBOSE: |
+ print output |
+ if job.returncode != 0: |
+ # Handle job failure. |
+ if target in self._failed: |
+ # If this job has failed previously, give up. |
+ print "Failed %s. Your build has failed." % target |
+ else: |
+ # Queue up this build to try again after a long while. |
+ self._retry_queue.append(target) |
+ self._failed[target] = output |
+ print "Failed %s, retrying later." % target |
+ else: |
+ if target in self._failed and self._retry_queue: |
+ # If we have successfully retried a failed package, and there |
+ # are more failed packages, try the next one. We will only have |
+ # one retrying package actively running at a time. |
+ self._Retry() |
+ |
+ print "Completed %s" % target |
+ # Mark as completed and unblock waiting ebuilds. |
+ self._Finish(target) |
+ |
+ # Print an update. |
+ self._Status() |
+ |
+ |
+# Main control code. |
sosa
2010/07/01 18:56:23
Re-write to a main function
|
+print "Starting fast-emerge." |
+PACKAGE, EMERGE_ARGS, BOARD = ParseArgs(sys.argv) |
sosa
2010/07/01 18:56:23
I do not think it's good style or necessary to mak
|
+print " Building package %s on %s (%s)" % (PACKAGE, EMERGE_ARGS, BOARD) |
+ |
+print "Running emerge to generate deps" |
+deps_output = GetDepsFromPortage(PACKAGE) |
+print "Processing emerge output" |
+dependency_tree = DepsToTree(deps_output) |
+if VERBOSE: |
+ print "Print tree" |
+ PrintTree(dependency_tree) |
+ |
+print "Generate dependency graph." |
+dependency_graph = GenDependencyGraph(dependency_tree) |
+ |
+if VERBOSE: |
+ PrintDepsMap(dependency_graph) |
+ |
+# Run the queued emerges. |
+scheduler = EmergeQueue(dependency_graph) |
+scheduler.Run() |
+ |
+print "Done" |
+ |
sosa
2010/07/01 18:56:23
Use standard if name == '__main__' construct
|