Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(731)

Side by Side Diff: tools/gn/uniquify.h

Issue 26537002: Add a UniqueVector class to GN (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: comments Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef TOOLS_GN_UNIQUIFY_H_
6 #define TOOLS_GN_UNIQUIFY_H_
7
8 #include <algorithm>
9
10 #include "base/containers/hash_tables.h"
11
12 namespace internal {
13
14 // This struct allows us to insert things by reference into a hash table. This
15 // is used to avoid copying strings just to uniquify them. The hash function
16 // of a UniquifyRef is that of the object it points to.
17 //
18 // It also caches the hash value which allows us to query and then insert
19 // without recomputing the hash.
20 template<typename T>
21 struct UniquifyRef {
22 UniquifyRef() : value(NULL), hash_val(0) {}
23 UniquifyRef(T* v) : value(v) {
24 #if defined(COMPILER_GCC)
25 BASE_HASH_NAMESPACE::hash<T> h;
26 hash_val = h(*value);
27 #elif defined(COMPILER_MSVC)
28 hash_val = BASE_HASH_NAMESPACE::hash_value(*value);
29 #else
30 #error write me
31 #endif // COMPILER...
32 }
33
34 T* value;
35 size_t hash_val;
36
37 bool operator==(const UniquifyRef<T>& other) const {
38 return *value == *other.value;
39 }
40 bool operator<(const UniquifyRef<T>& other) const {
41 return *value < *other.value;
42 }
43 };
44
45 } // namespace internal
46
47 namespace BASE_HASH_NAMESPACE {
48
49 #if defined(COMPILER_GCC)
50 template<typename T> struct hash< internal::UniquifyRef<T> > {
51 std::size_t operator()(const internal::UniquifyRef<T>& v) const {
52 return v.hash_val;
53 }
54 };
55 #elif defined(COMPILER_MSVC)
56 template<typename T>
57 inline size_t hash_value(const internal::UniquifyRef<T>& v) {
58 return v.hash_val;
59 }
60 #endif // COMPILER...
61
62 } // namespace BASE_HASH_NAMESPACE
63
64 // Uniquifies based on brute-force equality comparisons.
65 template<typename T>
66 inline void UniquifyBruteForce(std::vector<T>* container) {
67 if (container->size() <= 1)
68 return;
69
70 size_t dest_i = 1;
71 for (size_t source_i = 1; source_i < container->size(); source_i++) {
72 std::vector<T>::iterator last = container->begin() + dest_i;
73 T& cur = (*container)[source_i];
74 if (std::find(container->begin(), last, cur) == last) {
75 // Not seen yet.
76 if (source_i != dest_i)
77 std::swap(cur, (*container)[dest_i]);
viettrungluu 2013/10/08 23:39:41 Having to do a swap is a little unfortunate (since
78 dest_i++;
79 }
80 }
81 container->resize(dest_i);
82 }
83
84 // Uniquifies based on a hash-table comparison.
85 template<typename T>
86 inline void UniquifyHash(std::vector<T>* container) {
87 if (container->size() <= 1)
88 return;
89
90 typedef internal::UniquifyRef<T> Ref;
91 typedef base::hash_set<Ref> HashSet;
92
93 HashSet hash;
94 hash.insert(Ref(&(*container)[0]));
95
96 size_t dest_i = 1;
97 for (size_t source_i = 1; source_i < container->size(); source_i++) {
98 Ref source_ref(&(*container)[source_i]);
99 if (hash.find(source_ref) == hash.end()) {
100 // Not seen yet.
101 hash.insert(source_ref);
102 if (source_i != dest_i)
103 std::swap((*container)[source_i], (*container)[dest_i]);
104 dest_i++;
105 }
106 // Otherwise we've seen this one before and we can skip it.
107 }
108 container->resize(dest_i);
109 }
110
111 // Uniqifies items in the given vector. The first occurance of a given item
112 // will be kept, any remaining ones will be dropped.
113 //
114 // This function is somewhat like std::unique. std::unique only removes
115 // sequential duplicates, and just moves duplicates to the end since it can't
116 // resize the container via iteratos alone.
117 //
118 // Since this version assumes the input is a vector, we can actually do the
119 // resize. We also remove duplicates no matter what the ordering is.
120 //
121 // The objects in the vecotr must support hashing and equality.
122 template<typename T>
123 inline void Uniquify(std::vector<T>* container) {
124 // Containers this size or smaller get brute-force compares rather than
125 // using a hash table. This saves a bunch of allocations for the hash table.
126 // This number was picked based on measurements for 32-byte strings which
127 // seem like a reasonable average compare for our workload (this test is in
128 // the unittest file).
129 //
130 // Interestingly, for longer strings (assuming they're random and there are
131 // mostly no duplicates) the threshold will actually be higher because a
132 // string compare will terminate before checking all the characters, while a
133 // hash has to walk the whole string.
134 const size_t kBruteForceThreshold = 75;
135 if (container->size() <= kBruteForceThreshold)
136 UniquifyBruteForce(container);
137 else
138 UniquifyHash(container);
139 }
140
141 #endif // TOOLS_GN_UNIQUIFY_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698