Chromium Code Reviews| Index: owners.py |
| diff --git a/owners.py b/owners.py |
| index 1b1775f08c48ecbbb720c7d622c30d5932e7f84e..9049f2a7d02342e9fd103236691198b0ab760ec2 100644 |
| --- a/owners.py |
| +++ b/owners.py |
| @@ -49,6 +49,7 @@ Examples for all of these combinations can be found in tests/owners_unittest.py. |
| """ |
| import collections |
| +import random |
| import re |
| @@ -252,60 +253,62 @@ class Database(object): |
| ('%s is not a "set" directive, "*", ' |
| 'or an email address: "%s"' % (line_type, directive))) |
| - |
| def _covering_set_of_owners_for(self, files): |
| - # Get the set of directories from the files. |
| - dirs = set() |
| - for f in files: |
| - dirs.add(self._enclosing_dir_with_owners(f)) |
| - |
| - |
| - owned_dirs = {} |
| - dir_owners = {} |
| - |
| + dirs_remaining = set(self._enclosing_dir_with_owners(f) for f in files) |
| + all_possible_owners = self._all_possible_owners(dirs_remaining) |
| + suggested_owners = set() |
| + while dirs_remaining: |
| + owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining) |
| + suggested_owners.add(owner) |
| + for dirname, _ in all_possible_owners[owner]: |
| + dirs_remaining.remove(dirname) |
| + return suggested_owners |
| + |
| + def _all_possible_owners(self, dirs): |
| + |
| + """Returns a list of (potential owner, distance-from-dir) tuples; a |
| + distance of 1 is the lowest/closest possible distance (which makes the |
| + subsequent math easier).""" |
| + |
| + all_possible_owners = {} |
| for current_dir in dirs: |
| - # Get the list of owners for each directory. |
| - current_owners = set() |
| dirname = current_dir |
| - while dirname in self.owners_for: |
| - current_owners |= self.owners_for[dirname] |
| + distance = 1 |
| + while True: |
| + for owner in self.owners_for.get(dirname, []): |
| + all_possible_owners.setdefault(owner, []) |
| + # It's possible the same owner might match a directory from |
| + # multiple files, and we only want the closest entry. |
| + if not current_dir in [el[0] for el in all_possible_owners[owner]]: |
|
M-A Ruel
2012/12/19 01:41:45
if would be efficient to do:
if not any(current_di
|
| + all_possible_owners[owner].append((current_dir, distance)) |
| if self._stop_looking(dirname): |
| break |
| - prev_parent = dirname |
| dirname = self.os_path.dirname(dirname) |
| - if prev_parent == dirname: |
| - break |
| - |
| - # Map each directory to a list of its owners. |
| - dir_owners[current_dir] = current_owners |
| - |
| - # Add the directory to the list of each owner. |
| - for owner in current_owners: |
| - owned_dirs.setdefault(owner, set()).add(current_dir) |
| - |
| - final_owners = set() |
| - while dirs: |
| - # Find the owner that has the most directories. |
| - max_count = 0 |
| - max_owner = None |
| - owner_count = {} |
| - for dirname in dirs: |
| - for owner in dir_owners[dirname]: |
| - count = owner_count.get(owner, 0) + 1 |
| - owner_count[owner] = count |
| - if count >= max_count: |
| - max_owner = owner |
| - max_count = count |
| - |
| - # If no more directories have OWNERS, we're done. |
| - if not max_owner: |
| - break |
| - |
| - final_owners.add(max_owner) |
| - |
| - # Remove all directories owned by the current owner from the remaining |
| - # list. |
| - for dirname in owned_dirs[max_owner]: |
| - dirs.discard(dirname) |
| - |
| - return final_owners |
| + distance += 1 |
| + return all_possible_owners |
| + |
| + @staticmethod |
| + def lowest_cost_owner(all_possible_owners, dirs): |
| + # We want to minimize both the number of reviewers and the distance |
| + # from the files/dirs needing reviews. The "pow(X, 1.75)" below is |
| + # an arbitrarily-selected scaling factor that seems to work well - it |
| + # will select one reviewer in the parent directory over three reviewers |
| + # in subdirs, but not one reviewer over just two. |
| + total_costs_by_owner = {} |
| + for owner in all_possible_owners: |
| + total_distance = 0 |
| + num_directories_owned = 0 |
| + for dirname, distance in all_possible_owners[owner]: |
| + if dirname in dirs: |
| + total_distance += distance |
| + num_directories_owned += 1 |
| + if num_directories_owned: |
| + total_costs_by_owner[owner] = (total_distance / |
| + pow(num_directories_owned, 1.75)) |
| + |
| + # Return the lowest cost owner. In the case of a tie, pick one randomly. |
| + lowest_cost = min(total_costs_by_owner.itervalues()) |
| + lowest_cost_owners = filter( |
| + lambda owner: total_costs_by_owner[owner] == lowest_cost, |
| + total_costs_by_owner) |
| + return random.Random().choice(lowest_cost_owners) |