| OLD | NEW |
| (Empty) | |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 class Occurrence(list): |
| 6 """A list of indices where something occurs in a list. |
| 7 |
| 8 The list of indices can be accessed directly, since this class is a |
| 9 subclass of |list|. In addition to this list, we also have a |name| |
| 10 property which specifies what thing is occurring in those positions. For |
| 11 our uses here, the name is a string denoting either a project name |
| 12 (e.g., 'chromium' or 'chromium-skia') or a component name (e.g., |
| 13 'Blink>API' or 'Blink>DOM'). |
| 14 """ |
| 15 def __init__(self, name, indices=None): |
| 16 super(Occurrence, self).__init__(indices or []) |
| 17 self.name = name |
| 18 |
| 19 |
| 20 def GetOccurrences(names): |
| 21 """Return a concordance of elements in a list. |
| 22 |
| 23 Args: |
| 24 names (list): a list of "names". Typically names are strings, but |
| 25 they're actually allowed to be anything which can serve as a key |
| 26 in a dict. |
| 27 |
| 28 Returns: |
| 29 A dict mapping each "name" in |names| to an Occurrence object, |
| 30 each of which contains a list of the indices where that name occurs |
| 31 in |names|. |
| 32 """ |
| 33 occurrences = {} |
| 34 for index, name in enumerate(names or []): |
| 35 if name not in occurrences: |
| 36 occurrences[name] = Occurrence(name, [index]) |
| 37 else: |
| 38 occurrences[name].append(index) |
| 39 |
| 40 return occurrences |
| 41 |
| 42 |
| 43 def DefaultOccurrenceRanking(occurrence): |
| 44 """Default function for ranking an occurrence. |
| 45 |
| 46 Note: The default behavior works for component classifier and for |
| 47 project classifier, it works for cpp callstack class ranking. |
| 48 |
| 49 Args: |
| 50 occurrence (Occurrence): a collection of indices where some "name" |
| 51 occured in a sequence. |
| 52 |
| 53 Returns: |
| 54 A pair of the weight/priority for this |occurrence|, and the index |
| 55 of the first time its name appeared in the sequence the |occurrence| |
| 56 came from. |
| 57 """ |
| 58 # If the first two elements in the sequence are in this class, then |
| 59 # give it highest priority. |
| 60 if 0 in occurrence and 1 in occurrence: |
| 61 return -float('inf'), occurrence[0] |
| 62 |
| 63 return -len(occurrence), occurrence[0] |
| 64 |
| 65 |
| 66 # TODO(wrengr): it'd be nice to have the ranking function decide how |
| 67 # much of the input sequence to look at, rather than the caller deciding |
| 68 # once and for all. Of course, doing that would mean having the |
| 69 # |Occurrence| class lazily traverse the sequence, with some sort of |
| 70 # productivity guarantee. |
| 71 def RankByOccurrence(names, top_n, rank_function=None): |
| 72 """Rank the things occurring in a sequence according to some function. |
| 73 |
| 74 Given any sequence of "names", construct a concordance and return |
| 75 the few highest-ranking names according to a function for ranking |
| 76 |Occurrence|s. N.B., this function is generic in the length of the |
| 77 input sequence, so it's up to callers to truncate the sequence if |
| 78 they so desire. |
| 79 |
| 80 Args: |
| 81 names (list): a list of "names". Typically names are strings, but |
| 82 they're actually allowed to be anything which can serve as a key |
| 83 in a dict. |
| 84 top_n (int): how many results to return. |
| 85 rank_function (callable): what rank value to give an occurrence. If |
| 86 you don't supply this argument, or if you provide a falsy value, |
| 87 then we will fall back to using the |DefaultOccurrenceRanking|. |
| 88 |
| 89 Returns: |
| 90 A length-|top_n| list of "names" ordered by the |rank_function|. |
| 91 """ |
| 92 # TODO(wrengr): can't we do this sorting/filtering/truncation in one pass? |
| 93 if not rank_function: # pragma: no cover. |
| 94 rank_function = DefaultOccurrenceRanking |
| 95 occurrences = sorted(GetOccurrences(names).values(), key=rank_function) |
| 96 |
| 97 # Filter out unnamed classes. Alas, we can't sort these out before |
| 98 # constructing the concordance, because then our indices would be off. |
| 99 classes = [occurrence.name for occurrence in occurrences if occurrence.name] |
| 100 |
| 101 return classes[:top_n] |
| OLD | NEW |