Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Unified Diff: tools/gn/uniquify.h

Issue 26537002: Add a UniqueVector class to GN (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: comments Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/gn/uniquify.h
diff --git a/tools/gn/uniquify.h b/tools/gn/uniquify.h
new file mode 100644
index 0000000000000000000000000000000000000000..ae8e4300dd3205bc08d6a7b7b3b6130fe0888266
--- /dev/null
+++ b/tools/gn/uniquify.h
@@ -0,0 +1,141 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef TOOLS_GN_UNIQUIFY_H_
+#define TOOLS_GN_UNIQUIFY_H_
+
+#include <algorithm>
+
+#include "base/containers/hash_tables.h"
+
+namespace internal {
+
+// This struct allows us to insert things by reference into a hash table. This
+// is used to avoid copying strings just to uniquify them. The hash function
+// of a UniquifyRef is that of the object it points to.
+//
+// It also caches the hash value which allows us to query and then insert
+// without recomputing the hash.
+template<typename T>
+struct UniquifyRef {
+ UniquifyRef() : value(NULL), hash_val(0) {}
+ UniquifyRef(T* v) : value(v) {
+#if defined(COMPILER_GCC)
+ BASE_HASH_NAMESPACE::hash<T> h;
+ hash_val = h(*value);
+#elif defined(COMPILER_MSVC)
+ hash_val = BASE_HASH_NAMESPACE::hash_value(*value);
+#else
+ #error write me
+#endif // COMPILER...
+ }
+
+ T* value;
+ size_t hash_val;
+
+ bool operator==(const UniquifyRef<T>& other) const {
+ return *value == *other.value;
+ }
+ bool operator<(const UniquifyRef<T>& other) const {
+ return *value < *other.value;
+ }
+};
+
+} // namespace internal
+
+namespace BASE_HASH_NAMESPACE {
+
+#if defined(COMPILER_GCC)
+template<typename T> struct hash< internal::UniquifyRef<T> > {
+ std::size_t operator()(const internal::UniquifyRef<T>& v) const {
+ return v.hash_val;
+ }
+};
+#elif defined(COMPILER_MSVC)
+template<typename T>
+inline size_t hash_value(const internal::UniquifyRef<T>& v) {
+ return v.hash_val;
+}
+#endif // COMPILER...
+
+} // namespace BASE_HASH_NAMESPACE
+
+// Uniquifies based on brute-force equality comparisons.
+template<typename T>
+inline void UniquifyBruteForce(std::vector<T>* container) {
+ if (container->size() <= 1)
+ return;
+
+ size_t dest_i = 1;
+ for (size_t source_i = 1; source_i < container->size(); source_i++) {
+ std::vector<T>::iterator last = container->begin() + dest_i;
+ T& cur = (*container)[source_i];
+ if (std::find(container->begin(), last, cur) == last) {
+ // Not seen yet.
+ if (source_i != dest_i)
+ std::swap(cur, (*container)[dest_i]);
viettrungluu 2013/10/08 23:39:41 Having to do a swap is a little unfortunate (since
+ dest_i++;
+ }
+ }
+ container->resize(dest_i);
+}
+
+// Uniquifies based on a hash-table comparison.
+template<typename T>
+inline void UniquifyHash(std::vector<T>* container) {
+ if (container->size() <= 1)
+ return;
+
+ typedef internal::UniquifyRef<T> Ref;
+ typedef base::hash_set<Ref> HashSet;
+
+ HashSet hash;
+ hash.insert(Ref(&(*container)[0]));
+
+ size_t dest_i = 1;
+ for (size_t source_i = 1; source_i < container->size(); source_i++) {
+ Ref source_ref(&(*container)[source_i]);
+ if (hash.find(source_ref) == hash.end()) {
+ // Not seen yet.
+ hash.insert(source_ref);
+ if (source_i != dest_i)
+ std::swap((*container)[source_i], (*container)[dest_i]);
+ dest_i++;
+ }
+ // Otherwise we've seen this one before and we can skip it.
+ }
+ container->resize(dest_i);
+}
+
+// Uniqifies items in the given vector. The first occurance of a given item
+// will be kept, any remaining ones will be dropped.
+//
+// This function is somewhat like std::unique. std::unique only removes
+// sequential duplicates, and just moves duplicates to the end since it can't
+// resize the container via iteratos alone.
+//
+// Since this version assumes the input is a vector, we can actually do the
+// resize. We also remove duplicates no matter what the ordering is.
+//
+// The objects in the vecotr must support hashing and equality.
+template<typename T>
+inline void Uniquify(std::vector<T>* container) {
+ // Containers this size or smaller get brute-force compares rather than
+ // using a hash table. This saves a bunch of allocations for the hash table.
+ // This number was picked based on measurements for 32-byte strings which
+ // seem like a reasonable average compare for our workload (this test is in
+ // the unittest file).
+ //
+ // Interestingly, for longer strings (assuming they're random and there are
+ // mostly no duplicates) the threshold will actually be higher because a
+ // string compare will terminate before checking all the characters, while a
+ // hash has to walk the whole string.
+ const size_t kBruteForceThreshold = 75;
+ if (container->size() <= kBruteForceThreshold)
+ UniquifyBruteForce(container);
+ else
+ UniquifyHash(container);
+}
+
+#endif // TOOLS_GN_UNIQUIFY_H_

Powered by Google App Engine
This is Rietveld 408576698