tools/gn/uniquify.h - Issue 26537002: Add a UniqueVector class to GN

Unified Diff: tools/gn/uniquify.h

Issue 26537002: Add a UniqueVector class to GN (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: comments Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/gn/uniquify.h

diff --git a/tools/gn/uniquify.h b/tools/gn/uniquify.h

new file mode 100644

index 0000000000000000000000000000000000000000..ae8e4300dd3205bc08d6a7b7b3b6130fe0888266

--- /dev/null

+++ b/tools/gn/uniquify.h

@@ -0,0 +1,141 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#ifndef TOOLS_GN_UNIQUIFY_H_

+#define TOOLS_GN_UNIQUIFY_H_

+#include <algorithm>

+#include "base/containers/hash_tables.h"

+namespace internal {

+// This struct allows us to insert things by reference into a hash table. This

+// is used to avoid copying strings just to uniquify them. The hash function

+// of a UniquifyRef is that of the object it points to.

+//

+// It also caches the hash value which allows us to query and then insert

+// without recomputing the hash.

+template<typename T>

+struct UniquifyRef {

+ UniquifyRef() : value(NULL), hash_val(0) {}

+ UniquifyRef(T* v) : value(v) {

+#if defined(COMPILER_GCC)

+ BASE_HASH_NAMESPACE::hash<T> h;

+ hash_val = h(*value);

+#elif defined(COMPILER_MSVC)

+ hash_val = BASE_HASH_NAMESPACE::hash_value(*value);

+#else

+ #error write me

+#endif // COMPILER...

+ }

+ T* value;

+ size_t hash_val;

+ bool operator==(const UniquifyRef<T>& other) const {

+ return *value == *other.value;

+ }

+ bool operator<(const UniquifyRef<T>& other) const {

+ return *value < *other.value;

+ }

+};

+} // namespace internal

+namespace BASE_HASH_NAMESPACE {

+#if defined(COMPILER_GCC)

+template<typename T> struct hash< internal::UniquifyRef<T> > {

+ std::size_t operator()(const internal::UniquifyRef<T>& v) const {

+ return v.hash_val;

+ }

+};

+#elif defined(COMPILER_MSVC)

+template<typename T>

+inline size_t hash_value(const internal::UniquifyRef<T>& v) {

+ return v.hash_val;

+#endif // COMPILER...

+} // namespace BASE_HASH_NAMESPACE

+// Uniquifies based on brute-force equality comparisons.

+template<typename T>

+inline void UniquifyBruteForce(std::vector<T>* container) {

+ if (container->size() <= 1)

+ return;

+ size_t dest_i = 1;

+ for (size_t source_i = 1; source_i < container->size(); source_i++) {

+ std::vector<T>::iterator last = container->begin() + dest_i;

+ T& cur = (*container)[source_i];

+ if (std::find(container->begin(), last, cur) == last) {

+ // Not seen yet.

+ if (source_i != dest_i)

+ std::swap(cur, (*container)[dest_i]);

viettrungluu 2013/10/08 23:39:41 Having to do a swap is a little unfortunate (since

+ dest_i++;

+ }

+ container->resize(dest_i);

+// Uniquifies based on a hash-table comparison.

+template<typename T>

+inline void UniquifyHash(std::vector<T>* container) {

+ if (container->size() <= 1)

+ return;

+ typedef internal::UniquifyRef<T> Ref;

+ typedef base::hash_set<Ref> HashSet;

+ HashSet hash;

+ hash.insert(Ref(&(*container)[0]));

+ size_t dest_i = 1;

+ for (size_t source_i = 1; source_i < container->size(); source_i++) {

+ Ref source_ref(&(*container)[source_i]);

+ if (hash.find(source_ref) == hash.end()) {

+ // Not seen yet.

+ hash.insert(source_ref);

+ if (source_i != dest_i)

+ std::swap((*container)[source_i], (*container)[dest_i]);

+ dest_i++;

+ }

+ // Otherwise we've seen this one before and we can skip it.

+ }

+ container->resize(dest_i);

+// Uniqifies items in the given vector. The first occurance of a given item

+// will be kept, any remaining ones will be dropped.

+//

+// This function is somewhat like std::unique. std::unique only removes

+// sequential duplicates, and just moves duplicates to the end since it can't

+// resize the container via iteratos alone.

+//

+// Since this version assumes the input is a vector, we can actually do the

+// resize. We also remove duplicates no matter what the ordering is.

+//

+// The objects in the vecotr must support hashing and equality.

+template<typename T>

+inline void Uniquify(std::vector<T>* container) {

+ // Containers this size or smaller get brute-force compares rather than

+ // using a hash table. This saves a bunch of allocations for the hash table.

+ // This number was picked based on measurements for 32-byte strings which

+ // seem like a reasonable average compare for our workload (this test is in

+ // the unittest file).

+ //

+ // Interestingly, for longer strings (assuming they're random and there are

+ // mostly no duplicates) the threshold will actually be higher because a

+ // string compare will terminate before checking all the characters, while a

+ // hash has to walk the whole string.

+ const size_t kBruteForceThreshold = 75;

+ if (container->size() <= kBruteForceThreshold)

+ UniquifyBruteForce(container);

+ else

+ UniquifyHash(container);

+#endif // TOOLS_GN_UNIQUIFY_H_

« no previous file with comments | « tools/gn/source_file.h ('k') | tools/gn/uniquify_unittest.cc » ('j') | tools/gn/uniquify_unittest.cc » ('J')