| OLD | NEW |
| (Empty) | |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 class Occurrence(list): |
| 6 """A list of indices where something occurs in a list. |
| 7 |
| 8 The list of indices can be accessed directly, since this class is a |
| 9 subclass of |list|. In addition to this list, we also have a |name| |
| 10 property which specifies what thing is occurring in those positions. For |
| 11 our uses here, the name is a string denoting either a project name |
| 12 (e.g., 'chromium' or 'chromium-skia') or a component name (e.g., |
| 13 'Blink>API' or 'Blink>DOM'). |
| 14 """ |
| 15 def __init__(self, name, indices=None): |
| 16 super(Occurrence, self).__init__(indices or []) |
| 17 self._name = name |
| 18 |
| 19 # TODO(http://crbug.com/644476): even though "name" is fine for our |
| 20 # use case (strings identifying Projects & Components), we should |
| 21 # probably use a more general name for this. |
| 22 @property |
| 23 def name(self): |
| 24 """The \"name\" this object is a list of occurrence indices for.""" |
| 25 return self._name |
| 26 |
| 27 |
| 28 def GetOccurrences(names): |
| 29 """Return a concordance of elements in a list. |
| 30 |
| 31 Args: |
| 32 names (list): a list of "names". Typically names are strings, but |
| 33 they're actually allowed to be anything which can serve as a key |
| 34 in a dict. |
| 35 |
| 36 Returns: |
| 37 A dict mapping each "name" in |names| to an Occurrence object, |
| 38 each of which contains a list of the indices where that name occurs |
| 39 in |names|. |
| 40 """ |
| 41 occurrences = {} |
| 42 for index, name in enumerate(names or []): |
| 43 if name not in occurrences: |
| 44 occurrences[name] = Occurrence(name, [index]) |
| 45 else: |
| 46 occurrences[name].append(index) |
| 47 |
| 48 return occurrences |
| 49 |
| 50 |
| 51 def DefaultOccurrenceRanking(occurrence): |
| 52 """Default function for ranking an occurrence. |
| 53 |
| 54 Note: The default behavior works for component classifier and for |
| 55 project classifier, it works for cpp callstack class ranking. |
| 56 |
| 57 Args: |
| 58 occurrence (Occurrence): a collection of indices where some "name" |
| 59 occured in a sequence. |
| 60 |
| 61 Returns: |
| 62 A pair of the weight/priority for this |occurrence|, and the index |
| 63 of the first time its name appeared in the sequence the |occurrence| |
| 64 came from. |
| 65 """ |
| 66 # If the first two elements in the sequence are in this class, then |
| 67 # give it highest priority. |
| 68 if 0 in occurrence and 1 in occurrence: |
| 69 return -float('inf'), occurrence[0] |
| 70 |
| 71 return -len(occurrence), occurrence[0] |
| 72 |
| 73 |
| 74 # TODO(wrengr): it'd be nice to have the ranking function decide how |
| 75 # much of the input sequence to look at, rather than the caller deciding |
| 76 # once and for all. Of course, doing that would mean having the |
| 77 # |Occurrence| class lazily traverse the sequence, with some sort of |
| 78 # productivity guarantee. |
| 79 def RankByOccurrence(names, top_n, rank_function=None): |
| 80 """Rank the things occurring in a sequence according to some function. |
| 81 |
| 82 Given any sequence of "names", construct a concordance and return |
| 83 the few highest-ranking names according to a function for ranking |
| 84 |Occurrence|s. N.B., this function is generic in the length of the |
| 85 input sequence, so it's up to callers to truncate the sequence if |
| 86 they so desire. |
| 87 |
| 88 Args: |
| 89 names (list): a list of "names". Typically names are strings, but |
| 90 they're actually allowed to be anything which can serve as a key |
| 91 in a dict. |
| 92 top_n (int): how many results to return. |
| 93 rank_function (callable): what rank value to give an occurrence. If |
| 94 you don't supply this argument, or if you provide a falsy value, |
| 95 then we will fall back to using the |DefaultOccurrenceRanking|. |
| 96 |
| 97 Returns: |
| 98 A length-|top_n| list of "names" ordered by the |rank_function|. |
| 99 """ |
| 100 if not rank_function: # pragma: no cover. |
| 101 rank_function = DefaultOccurrenceRanking |
| 102 |
| 103 # TODO(wrengr): can't we do this sorting/filtering/truncation in one pass? |
| 104 occurrences = sorted(GetOccurrences(names).values(), key=rank_function) |
| 105 |
| 106 # Filter out unnamed classes. Alas, we can't sort these out before |
| 107 # constructing the concordance, because then our indices would be off. |
| 108 # TODO(wrengr): generalize the filter function into another parameter. |
| 109 classes = [occurrence.name for occurrence in occurrences if occurrence.name] |
| 110 |
| 111 return classes[:top_n] |
| OLD | NEW |