Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #ifndef TOOLS_GN_UNIQUIFY_H_ | |
| 6 #define TOOLS_GN_UNIQUIFY_H_ | |
| 7 | |
| 8 #include <algorithm> | |
| 9 | |
| 10 #include "base/containers/hash_tables.h" | |
| 11 | |
| 12 namespace internal { | |
| 13 | |
| 14 // This struct allows us to insert things by reference into a hash table. This | |
| 15 // is used to avoid copying strings just to uniquify them. The hash function | |
| 16 // of a UniquifyRef is that of the object it points to. | |
| 17 // | |
| 18 // It also caches the hash value which allows us to query and then insert | |
| 19 // without recomputing the hash. | |
| 20 template<typename T> | |
| 21 struct UniquifyRef { | |
| 22 UniquifyRef() : value(NULL), hash_val(0) {} | |
| 23 UniquifyRef(T* v) : value(v) { | |
| 24 #if defined(COMPILER_GCC) | |
| 25 BASE_HASH_NAMESPACE::hash<T> h; | |
| 26 hash_val = h(*value); | |
| 27 #elif defined(COMPILER_MSVC) | |
| 28 hash_val = BASE_HASH_NAMESPACE::hash_value(*value); | |
| 29 #else | |
| 30 #error write me | |
| 31 #endif // COMPILER... | |
| 32 } | |
| 33 | |
| 34 T* value; | |
| 35 size_t hash_val; | |
| 36 | |
| 37 bool operator==(const UniquifyRef<T>& other) const { | |
| 38 return *value == *other.value; | |
| 39 } | |
| 40 bool operator<(const UniquifyRef<T>& other) const { | |
| 41 return *value < *other.value; | |
| 42 } | |
| 43 }; | |
| 44 | |
| 45 } // namespace internal | |
| 46 | |
| 47 namespace BASE_HASH_NAMESPACE { | |
| 48 | |
| 49 #if defined(COMPILER_GCC) | |
| 50 template<typename T> struct hash< internal::UniquifyRef<T> > { | |
| 51 std::size_t operator()(const internal::UniquifyRef<T>& v) const { | |
| 52 return v.hash_val; | |
| 53 } | |
| 54 }; | |
| 55 #elif defined(COMPILER_MSVC) | |
| 56 template<typename T> | |
| 57 inline size_t hash_value(const internal::UniquifyRef<T>& v) { | |
| 58 return v.hash_val; | |
| 59 } | |
| 60 #endif // COMPILER... | |
| 61 | |
| 62 } // namespace BASE_HASH_NAMESPACE | |
| 63 | |
| 64 // Uniquifies based on brute-force equality comparisons. | |
| 65 template<typename T> | |
| 66 inline void UniquifyBruteForce(std::vector<T>* container) { | |
| 67 if (container->size() <= 1) | |
| 68 return; | |
| 69 | |
| 70 size_t dest_i = 1; | |
| 71 for (size_t source_i = 1; source_i < container->size(); source_i++) { | |
| 72 std::vector<T>::iterator last = container->begin() + dest_i; | |
| 73 T& cur = (*container)[source_i]; | |
| 74 if (std::find(container->begin(), last, cur) == last) { | |
| 75 // Not seen yet. | |
| 76 if (source_i != dest_i) | |
| 77 std::swap(cur, (*container)[dest_i]); | |
|
viettrungluu
2013/10/08 23:39:41
Having to do a swap is a little unfortunate (since
| |
| 78 dest_i++; | |
| 79 } | |
| 80 } | |
| 81 container->resize(dest_i); | |
| 82 } | |
| 83 | |
| 84 // Uniquifies based on a hash-table comparison. | |
| 85 template<typename T> | |
| 86 inline void UniquifyHash(std::vector<T>* container) { | |
| 87 if (container->size() <= 1) | |
| 88 return; | |
| 89 | |
| 90 typedef internal::UniquifyRef<T> Ref; | |
| 91 typedef base::hash_set<Ref> HashSet; | |
| 92 | |
| 93 HashSet hash; | |
| 94 hash.insert(Ref(&(*container)[0])); | |
| 95 | |
| 96 size_t dest_i = 1; | |
| 97 for (size_t source_i = 1; source_i < container->size(); source_i++) { | |
| 98 Ref source_ref(&(*container)[source_i]); | |
| 99 if (hash.find(source_ref) == hash.end()) { | |
| 100 // Not seen yet. | |
| 101 hash.insert(source_ref); | |
| 102 if (source_i != dest_i) | |
| 103 std::swap((*container)[source_i], (*container)[dest_i]); | |
| 104 dest_i++; | |
| 105 } | |
| 106 // Otherwise we've seen this one before and we can skip it. | |
| 107 } | |
| 108 container->resize(dest_i); | |
| 109 } | |
| 110 | |
| 111 // Uniqifies items in the given vector. The first occurance of a given item | |
| 112 // will be kept, any remaining ones will be dropped. | |
| 113 // | |
| 114 // This function is somewhat like std::unique. std::unique only removes | |
| 115 // sequential duplicates, and just moves duplicates to the end since it can't | |
| 116 // resize the container via iteratos alone. | |
| 117 // | |
| 118 // Since this version assumes the input is a vector, we can actually do the | |
| 119 // resize. We also remove duplicates no matter what the ordering is. | |
| 120 // | |
| 121 // The objects in the vecotr must support hashing and equality. | |
| 122 template<typename T> | |
| 123 inline void Uniquify(std::vector<T>* container) { | |
| 124 // Containers this size or smaller get brute-force compares rather than | |
| 125 // using a hash table. This saves a bunch of allocations for the hash table. | |
| 126 // This number was picked based on measurements for 32-byte strings which | |
| 127 // seem like a reasonable average compare for our workload (this test is in | |
| 128 // the unittest file). | |
| 129 // | |
| 130 // Interestingly, for longer strings (assuming they're random and there are | |
| 131 // mostly no duplicates) the threshold will actually be higher because a | |
| 132 // string compare will terminate before checking all the characters, while a | |
| 133 // hash has to walk the whole string. | |
| 134 const size_t kBruteForceThreshold = 75; | |
| 135 if (container->size() <= kBruteForceThreshold) | |
| 136 UniquifyBruteForce(container); | |
| 137 else | |
| 138 UniquifyHash(container); | |
| 139 } | |
| 140 | |
| 141 #endif // TOOLS_GN_UNIQUIFY_H_ | |
| OLD | NEW |