OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #ifndef TOOLS_GN_UNIQUIFY_H_ | |
6 #define TOOLS_GN_UNIQUIFY_H_ | |
7 | |
8 #include <algorithm> | |
9 | |
10 #include "base/containers/hash_tables.h" | |
11 | |
12 namespace internal { | |
13 | |
14 // This struct allows us to insert things by reference into a hash table. This | |
15 // is used to avoid copying strings just to uniquify them. The hash function | |
16 // of a UniquifyRef is that of the object it points to. | |
17 // | |
18 // It also caches the hash value which allows us to query and then insert | |
19 // without recomputing the hash. | |
20 template<typename T> | |
21 struct UniquifyRef { | |
22 UniquifyRef() : value(NULL), hash_val(0) {} | |
23 UniquifyRef(T* v) : value(v) { | |
24 #if defined(COMPILER_GCC) | |
25 BASE_HASH_NAMESPACE::hash<T> h; | |
26 hash_val = h(*value); | |
27 #elif defined(COMPILER_MSVC) | |
28 hash_val = BASE_HASH_NAMESPACE::hash_value(*value); | |
29 #else | |
30 #error write me | |
31 #endif // COMPILER... | |
32 } | |
33 | |
34 T* value; | |
35 size_t hash_val; | |
36 | |
37 bool operator==(const UniquifyRef<T>& other) const { | |
38 return *value == *other.value; | |
39 } | |
40 bool operator<(const UniquifyRef<T>& other) const { | |
41 return *value < *other.value; | |
42 } | |
43 }; | |
44 | |
45 } // namespace internal | |
46 | |
47 namespace BASE_HASH_NAMESPACE { | |
48 | |
49 #if defined(COMPILER_GCC) | |
50 template<typename T> struct hash< internal::UniquifyRef<T> > { | |
51 std::size_t operator()(const internal::UniquifyRef<T>& v) const { | |
52 return v.hash_val; | |
53 } | |
54 }; | |
55 #elif defined(COMPILER_MSVC) | |
56 template<typename T> | |
57 inline size_t hash_value(const internal::UniquifyRef<T>& v) { | |
58 return v.hash_val; | |
59 } | |
60 #endif // COMPILER... | |
61 | |
62 } // namespace BASE_HASH_NAMESPACE | |
63 | |
64 // Uniquifies based on brute-force equality comparisons. | |
65 template<typename T> | |
66 inline void UniquifyBruteForce(std::vector<T>* container) { | |
67 if (container->size() <= 1) | |
68 return; | |
69 | |
70 size_t dest_i = 1; | |
71 for (size_t source_i = 1; source_i < container->size(); source_i++) { | |
72 std::vector<T>::iterator last = container->begin() + dest_i; | |
73 T& cur = (*container)[source_i]; | |
74 if (std::find(container->begin(), last, cur) == last) { | |
75 // Not seen yet. | |
76 if (source_i != dest_i) | |
77 std::swap(cur, (*container)[dest_i]); | |
viettrungluu
2013/10/08 23:39:41
Having to do a swap is a little unfortunate (since
| |
78 dest_i++; | |
79 } | |
80 } | |
81 container->resize(dest_i); | |
82 } | |
83 | |
84 // Uniquifies based on a hash-table comparison. | |
85 template<typename T> | |
86 inline void UniquifyHash(std::vector<T>* container) { | |
87 if (container->size() <= 1) | |
88 return; | |
89 | |
90 typedef internal::UniquifyRef<T> Ref; | |
91 typedef base::hash_set<Ref> HashSet; | |
92 | |
93 HashSet hash; | |
94 hash.insert(Ref(&(*container)[0])); | |
95 | |
96 size_t dest_i = 1; | |
97 for (size_t source_i = 1; source_i < container->size(); source_i++) { | |
98 Ref source_ref(&(*container)[source_i]); | |
99 if (hash.find(source_ref) == hash.end()) { | |
100 // Not seen yet. | |
101 hash.insert(source_ref); | |
102 if (source_i != dest_i) | |
103 std::swap((*container)[source_i], (*container)[dest_i]); | |
104 dest_i++; | |
105 } | |
106 // Otherwise we've seen this one before and we can skip it. | |
107 } | |
108 container->resize(dest_i); | |
109 } | |
110 | |
111 // Uniqifies items in the given vector. The first occurance of a given item | |
112 // will be kept, any remaining ones will be dropped. | |
113 // | |
114 // This function is somewhat like std::unique. std::unique only removes | |
115 // sequential duplicates, and just moves duplicates to the end since it can't | |
116 // resize the container via iteratos alone. | |
117 // | |
118 // Since this version assumes the input is a vector, we can actually do the | |
119 // resize. We also remove duplicates no matter what the ordering is. | |
120 // | |
121 // The objects in the vecotr must support hashing and equality. | |
122 template<typename T> | |
123 inline void Uniquify(std::vector<T>* container) { | |
124 // Containers this size or smaller get brute-force compares rather than | |
125 // using a hash table. This saves a bunch of allocations for the hash table. | |
126 // This number was picked based on measurements for 32-byte strings which | |
127 // seem like a reasonable average compare for our workload (this test is in | |
128 // the unittest file). | |
129 // | |
130 // Interestingly, for longer strings (assuming they're random and there are | |
131 // mostly no duplicates) the threshold will actually be higher because a | |
132 // string compare will terminate before checking all the characters, while a | |
133 // hash has to walk the whole string. | |
134 const size_t kBruteForceThreshold = 75; | |
135 if (container->size() <= kBruteForceThreshold) | |
136 UniquifyBruteForce(container); | |
137 else | |
138 UniquifyHash(container); | |
139 } | |
140 | |
141 #endif // TOOLS_GN_UNIQUIFY_H_ | |
OLD | NEW |