 Chromium Code Reviews
 Chromium Code Reviews Issue 26537002:
  Add a UniqueVector class to GN  (Closed) 
  Base URL: http://git.chromium.org/chromium/src.git@master
    
  
    Issue 26537002:
  Add a UniqueVector class to GN  (Closed) 
  Base URL: http://git.chromium.org/chromium/src.git@master| Index: tools/gn/uniquify.h | 
| diff --git a/tools/gn/uniquify.h b/tools/gn/uniquify.h | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..ae8e4300dd3205bc08d6a7b7b3b6130fe0888266 | 
| --- /dev/null | 
| +++ b/tools/gn/uniquify.h | 
| @@ -0,0 +1,141 @@ | 
| +// Copyright (c) 2013 The Chromium Authors. All rights reserved. | 
| +// Use of this source code is governed by a BSD-style license that can be | 
| +// found in the LICENSE file. | 
| + | 
| +#ifndef TOOLS_GN_UNIQUIFY_H_ | 
| +#define TOOLS_GN_UNIQUIFY_H_ | 
| + | 
| +#include <algorithm> | 
| + | 
| +#include "base/containers/hash_tables.h" | 
| + | 
| +namespace internal { | 
| + | 
| +// This struct allows us to insert things by reference into a hash table. This | 
| +// is used to avoid copying strings just to uniquify them. The hash function | 
| +// of a UniquifyRef is that of the object it points to. | 
| +// | 
| +// It also caches the hash value which allows us to query and then insert | 
| +// without recomputing the hash. | 
| +template<typename T> | 
| +struct UniquifyRef { | 
| + UniquifyRef() : value(NULL), hash_val(0) {} | 
| + UniquifyRef(T* v) : value(v) { | 
| +#if defined(COMPILER_GCC) | 
| + BASE_HASH_NAMESPACE::hash<T> h; | 
| + hash_val = h(*value); | 
| +#elif defined(COMPILER_MSVC) | 
| + hash_val = BASE_HASH_NAMESPACE::hash_value(*value); | 
| +#else | 
| + #error write me | 
| +#endif // COMPILER... | 
| + } | 
| + | 
| + T* value; | 
| + size_t hash_val; | 
| + | 
| + bool operator==(const UniquifyRef<T>& other) const { | 
| + return *value == *other.value; | 
| + } | 
| + bool operator<(const UniquifyRef<T>& other) const { | 
| + return *value < *other.value; | 
| + } | 
| +}; | 
| + | 
| +} // namespace internal | 
| + | 
| +namespace BASE_HASH_NAMESPACE { | 
| + | 
| +#if defined(COMPILER_GCC) | 
| +template<typename T> struct hash< internal::UniquifyRef<T> > { | 
| + std::size_t operator()(const internal::UniquifyRef<T>& v) const { | 
| + return v.hash_val; | 
| + } | 
| +}; | 
| +#elif defined(COMPILER_MSVC) | 
| +template<typename T> | 
| +inline size_t hash_value(const internal::UniquifyRef<T>& v) { | 
| + return v.hash_val; | 
| +} | 
| +#endif // COMPILER... | 
| + | 
| +} // namespace BASE_HASH_NAMESPACE | 
| + | 
| +// Uniquifies based on brute-force equality comparisons. | 
| +template<typename T> | 
| +inline void UniquifyBruteForce(std::vector<T>* container) { | 
| + if (container->size() <= 1) | 
| + return; | 
| + | 
| + size_t dest_i = 1; | 
| + for (size_t source_i = 1; source_i < container->size(); source_i++) { | 
| + std::vector<T>::iterator last = container->begin() + dest_i; | 
| + T& cur = (*container)[source_i]; | 
| + if (std::find(container->begin(), last, cur) == last) { | 
| + // Not seen yet. | 
| + if (source_i != dest_i) | 
| + std::swap(cur, (*container)[dest_i]); | 
| 
viettrungluu
2013/10/08 23:39:41
Having to do a swap is a little unfortunate (since
 | 
| + dest_i++; | 
| + } | 
| + } | 
| + container->resize(dest_i); | 
| +} | 
| + | 
| +// Uniquifies based on a hash-table comparison. | 
| +template<typename T> | 
| +inline void UniquifyHash(std::vector<T>* container) { | 
| + if (container->size() <= 1) | 
| + return; | 
| + | 
| + typedef internal::UniquifyRef<T> Ref; | 
| + typedef base::hash_set<Ref> HashSet; | 
| + | 
| + HashSet hash; | 
| + hash.insert(Ref(&(*container)[0])); | 
| + | 
| + size_t dest_i = 1; | 
| + for (size_t source_i = 1; source_i < container->size(); source_i++) { | 
| + Ref source_ref(&(*container)[source_i]); | 
| + if (hash.find(source_ref) == hash.end()) { | 
| + // Not seen yet. | 
| + hash.insert(source_ref); | 
| + if (source_i != dest_i) | 
| + std::swap((*container)[source_i], (*container)[dest_i]); | 
| + dest_i++; | 
| + } | 
| + // Otherwise we've seen this one before and we can skip it. | 
| + } | 
| + container->resize(dest_i); | 
| +} | 
| + | 
| +// Uniqifies items in the given vector. The first occurance of a given item | 
| +// will be kept, any remaining ones will be dropped. | 
| +// | 
| +// This function is somewhat like std::unique. std::unique only removes | 
| +// sequential duplicates, and just moves duplicates to the end since it can't | 
| +// resize the container via iteratos alone. | 
| +// | 
| +// Since this version assumes the input is a vector, we can actually do the | 
| +// resize. We also remove duplicates no matter what the ordering is. | 
| +// | 
| +// The objects in the vecotr must support hashing and equality. | 
| +template<typename T> | 
| +inline void Uniquify(std::vector<T>* container) { | 
| + // Containers this size or smaller get brute-force compares rather than | 
| + // using a hash table. This saves a bunch of allocations for the hash table. | 
| + // This number was picked based on measurements for 32-byte strings which | 
| + // seem like a reasonable average compare for our workload (this test is in | 
| + // the unittest file). | 
| + // | 
| + // Interestingly, for longer strings (assuming they're random and there are | 
| + // mostly no duplicates) the threshold will actually be higher because a | 
| + // string compare will terminate before checking all the characters, while a | 
| + // hash has to walk the whole string. | 
| + const size_t kBruteForceThreshold = 75; | 
| + if (container->size() <= kBruteForceThreshold) | 
| + UniquifyBruteForce(container); | 
| + else | 
| + UniquifyHash(container); | 
| +} | 
| + | 
| +#endif // TOOLS_GN_UNIQUIFY_H_ |