Index: tools/gn/uniquify.h |
diff --git a/tools/gn/uniquify.h b/tools/gn/uniquify.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..ae8e4300dd3205bc08d6a7b7b3b6130fe0888266 |
--- /dev/null |
+++ b/tools/gn/uniquify.h |
@@ -0,0 +1,141 @@ |
+// Copyright (c) 2013 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#ifndef TOOLS_GN_UNIQUIFY_H_ |
+#define TOOLS_GN_UNIQUIFY_H_ |
+ |
+#include <algorithm> |
+ |
+#include "base/containers/hash_tables.h" |
+ |
+namespace internal { |
+ |
+// This struct allows us to insert things by reference into a hash table. This |
+// is used to avoid copying strings just to uniquify them. The hash function |
+// of a UniquifyRef is that of the object it points to. |
+// |
+// It also caches the hash value which allows us to query and then insert |
+// without recomputing the hash. |
+template<typename T> |
+struct UniquifyRef { |
+ UniquifyRef() : value(NULL), hash_val(0) {} |
+ UniquifyRef(T* v) : value(v) { |
+#if defined(COMPILER_GCC) |
+ BASE_HASH_NAMESPACE::hash<T> h; |
+ hash_val = h(*value); |
+#elif defined(COMPILER_MSVC) |
+ hash_val = BASE_HASH_NAMESPACE::hash_value(*value); |
+#else |
+ #error write me |
+#endif // COMPILER... |
+ } |
+ |
+ T* value; |
+ size_t hash_val; |
+ |
+ bool operator==(const UniquifyRef<T>& other) const { |
+ return *value == *other.value; |
+ } |
+ bool operator<(const UniquifyRef<T>& other) const { |
+ return *value < *other.value; |
+ } |
+}; |
+ |
+} // namespace internal |
+ |
+namespace BASE_HASH_NAMESPACE { |
+ |
+#if defined(COMPILER_GCC) |
+template<typename T> struct hash< internal::UniquifyRef<T> > { |
+ std::size_t operator()(const internal::UniquifyRef<T>& v) const { |
+ return v.hash_val; |
+ } |
+}; |
+#elif defined(COMPILER_MSVC) |
+template<typename T> |
+inline size_t hash_value(const internal::UniquifyRef<T>& v) { |
+ return v.hash_val; |
+} |
+#endif // COMPILER... |
+ |
+} // namespace BASE_HASH_NAMESPACE |
+ |
+// Uniquifies based on brute-force equality comparisons. |
+template<typename T> |
+inline void UniquifyBruteForce(std::vector<T>* container) { |
+ if (container->size() <= 1) |
+ return; |
+ |
+ size_t dest_i = 1; |
+ for (size_t source_i = 1; source_i < container->size(); source_i++) { |
+ std::vector<T>::iterator last = container->begin() + dest_i; |
+ T& cur = (*container)[source_i]; |
+ if (std::find(container->begin(), last, cur) == last) { |
+ // Not seen yet. |
+ if (source_i != dest_i) |
+ std::swap(cur, (*container)[dest_i]); |
viettrungluu
2013/10/08 23:39:41
Having to do a swap is a little unfortunate (since
|
+ dest_i++; |
+ } |
+ } |
+ container->resize(dest_i); |
+} |
+ |
+// Uniquifies based on a hash-table comparison. |
+template<typename T> |
+inline void UniquifyHash(std::vector<T>* container) { |
+ if (container->size() <= 1) |
+ return; |
+ |
+ typedef internal::UniquifyRef<T> Ref; |
+ typedef base::hash_set<Ref> HashSet; |
+ |
+ HashSet hash; |
+ hash.insert(Ref(&(*container)[0])); |
+ |
+ size_t dest_i = 1; |
+ for (size_t source_i = 1; source_i < container->size(); source_i++) { |
+ Ref source_ref(&(*container)[source_i]); |
+ if (hash.find(source_ref) == hash.end()) { |
+ // Not seen yet. |
+ hash.insert(source_ref); |
+ if (source_i != dest_i) |
+ std::swap((*container)[source_i], (*container)[dest_i]); |
+ dest_i++; |
+ } |
+ // Otherwise we've seen this one before and we can skip it. |
+ } |
+ container->resize(dest_i); |
+} |
+ |
+// Uniqifies items in the given vector. The first occurance of a given item |
+// will be kept, any remaining ones will be dropped. |
+// |
+// This function is somewhat like std::unique. std::unique only removes |
+// sequential duplicates, and just moves duplicates to the end since it can't |
+// resize the container via iteratos alone. |
+// |
+// Since this version assumes the input is a vector, we can actually do the |
+// resize. We also remove duplicates no matter what the ordering is. |
+// |
+// The objects in the vecotr must support hashing and equality. |
+template<typename T> |
+inline void Uniquify(std::vector<T>* container) { |
+ // Containers this size or smaller get brute-force compares rather than |
+ // using a hash table. This saves a bunch of allocations for the hash table. |
+ // This number was picked based on measurements for 32-byte strings which |
+ // seem like a reasonable average compare for our workload (this test is in |
+ // the unittest file). |
+ // |
+ // Interestingly, for longer strings (assuming they're random and there are |
+ // mostly no duplicates) the threshold will actually be higher because a |
+ // string compare will terminate before checking all the characters, while a |
+ // hash has to walk the whole string. |
+ const size_t kBruteForceThreshold = 75; |
+ if (container->size() <= kBruteForceThreshold) |
+ UniquifyBruteForce(container); |
+ else |
+ UniquifyHash(container); |
+} |
+ |
+#endif // TOOLS_GN_UNIQUIFY_H_ |