Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(78)

Side by Side Diff: third_party/brotli/enc/cluster.h

Issue 2537133002: Update brotli to v1.0.0-snapshot. (Closed)
Patch Set: Fixed typo Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/brotli/enc/brotli_bit_stream.cc ('k') | third_party/brotli/enc/cluster.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright 2013 Google Inc. All Rights Reserved. 1 /* Copyright 2013 Google Inc. All Rights Reserved.
2 2
3 Distributed under MIT license. 3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */ 5 */
6 6
7 // Functions for clustering similar histograms together. 7 /* Functions for clustering similar histograms together. */
8 8
9 #ifndef BROTLI_ENC_CLUSTER_H_ 9 #ifndef BROTLI_ENC_CLUSTER_H_
10 #define BROTLI_ENC_CLUSTER_H_ 10 #define BROTLI_ENC_CLUSTER_H_
11 11
12 #include <math.h> 12 #include <brotli/types.h>
13 #include <algorithm> 13 #include "./histogram.h"
14 #include <utility> 14 #include "./memory.h"
15 #include <vector> 15 #include "./port.h"
16 16
17 #include "./bit_cost.h" 17 #if defined(__cplusplus) || defined(c_plusplus)
18 #include "./entropy_encode.h" 18 extern "C" {
19 #include "./fast_log.h" 19 #endif
20 #include "./histogram.h"
21 #include "./port.h"
22 #include "./types.h"
23 20
24 namespace brotli { 21 typedef struct HistogramPair {
25
26 struct HistogramPair {
27 uint32_t idx1; 22 uint32_t idx1;
28 uint32_t idx2; 23 uint32_t idx2;
29 double cost_combo; 24 double cost_combo;
30 double cost_diff; 25 double cost_diff;
31 }; 26 } HistogramPair;
32 27
33 inline bool operator<(const HistogramPair& p1, const HistogramPair& p2) { 28 #define CODE(X) /* Declaration */;
34 if (p1.cost_diff != p2.cost_diff) {
35 return p1.cost_diff > p2.cost_diff;
36 }
37 return (p1.idx2 - p1.idx1) > (p2.idx2 - p2.idx1);
38 }
39 29
40 // Returns entropy reduction of the context map when we combine two clusters. 30 #define FN(X) X ## Literal
41 inline double ClusterCostDiff(size_t size_a, size_t size_b) { 31 #include "./cluster_inc.h" /* NOLINT(build/include) */
42 size_t size_c = size_a + size_b; 32 #undef FN
43 return static_cast<double>(size_a) * FastLog2(size_a) +
44 static_cast<double>(size_b) * FastLog2(size_b) -
45 static_cast<double>(size_c) * FastLog2(size_c);
46 }
47 33
48 // Computes the bit cost reduction by combining out[idx1] and out[idx2] and if 34 #define FN(X) X ## Command
49 // it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. 35 #include "./cluster_inc.h" /* NOLINT(build/include) */
50 template<typename HistogramType> 36 #undef FN
51 void CompareAndPushToQueue(const HistogramType* out,
52 const uint32_t* cluster_size,
53 uint32_t idx1, uint32_t idx2,
54 size_t max_num_pairs,
55 HistogramPair* pairs,
56 size_t* num_pairs) {
57 if (idx1 == idx2) {
58 return;
59 }
60 if (idx2 < idx1) {
61 uint32_t t = idx2;
62 idx2 = idx1;
63 idx1 = t;
64 }
65 bool store_pair = false;
66 HistogramPair p = {};
67 p.idx1 = idx1;
68 p.idx2 = idx2;
69 p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
70 p.cost_diff -= out[idx1].bit_cost_;
71 p.cost_diff -= out[idx2].bit_cost_;
72 37
73 if (out[idx1].total_count_ == 0) { 38 #define FN(X) X ## Distance
74 p.cost_combo = out[idx2].bit_cost_; 39 #include "./cluster_inc.h" /* NOLINT(build/include) */
75 store_pair = true; 40 #undef FN
76 } else if (out[idx2].total_count_ == 0) {
77 p.cost_combo = out[idx1].bit_cost_;
78 store_pair = true;
79 } else {
80 double threshold = *num_pairs == 0 ? 1e99 :
81 std::max(0.0, pairs[0].cost_diff);
82 HistogramType combo = out[idx1];
83 combo.AddHistogram(out[idx2]);
84 double cost_combo = PopulationCost(combo);
85 if (cost_combo < threshold - p.cost_diff) {
86 p.cost_combo = cost_combo;
87 store_pair = true;
88 }
89 }
90 if (store_pair) {
91 p.cost_diff += p.cost_combo;
92 if (*num_pairs > 0 && pairs[0] < p) {
93 // Replace the top of the queue if needed.
94 if (*num_pairs < max_num_pairs) {
95 pairs[*num_pairs] = pairs[0];
96 ++(*num_pairs);
97 }
98 pairs[0] = p;
99 } else if (*num_pairs < max_num_pairs) {
100 pairs[*num_pairs] = p;
101 ++(*num_pairs);
102 }
103 }
104 }
105 41
106 template<typename HistogramType> 42 #undef CODE
107 size_t HistogramCombine(HistogramType* out,
108 uint32_t* cluster_size,
109 uint32_t* symbols,
110 uint32_t* clusters,
111 HistogramPair* pairs,
112 size_t num_clusters,
113 size_t symbols_size,
114 size_t max_clusters,
115 size_t max_num_pairs) {
116 double cost_diff_threshold = 0.0;
117 size_t min_cluster_size = 1;
118 43
119 // We maintain a vector of histogram pairs, with the property that the pair 44 #if defined(__cplusplus) || defined(c_plusplus)
120 // with the maximum bit cost reduction is the first. 45 } /* extern "C" */
121 size_t num_pairs = 0; 46 #endif
122 for (size_t idx1 = 0; idx1 < num_clusters; ++idx1) {
123 for (size_t idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
124 CompareAndPushToQueue(out, cluster_size, clusters[idx1], clusters[idx2],
125 max_num_pairs, &pairs[0], &num_pairs);
126 }
127 }
128 47
129 while (num_clusters > min_cluster_size) { 48 #endif /* BROTLI_ENC_CLUSTER_H_ */
130 if (pairs[0].cost_diff >= cost_diff_threshold) {
131 cost_diff_threshold = 1e99;
132 min_cluster_size = max_clusters;
133 continue;
134 }
135 // Take the best pair from the top of heap.
136 uint32_t best_idx1 = pairs[0].idx1;
137 uint32_t best_idx2 = pairs[0].idx2;
138 out[best_idx1].AddHistogram(out[best_idx2]);
139 out[best_idx1].bit_cost_ = pairs[0].cost_combo;
140 cluster_size[best_idx1] += cluster_size[best_idx2];
141 for (size_t i = 0; i < symbols_size; ++i) {
142 if (symbols[i] == best_idx2) {
143 symbols[i] = best_idx1;
144 }
145 }
146 for (size_t i = 0; i < num_clusters; ++i) {
147 if (clusters[i] == best_idx2) {
148 memmove(&clusters[i], &clusters[i + 1],
149 (num_clusters - i - 1) * sizeof(clusters[0]));
150 break;
151 }
152 }
153 --num_clusters;
154 // Remove pairs intersecting the just combined best pair.
155 size_t copy_to_idx = 0;
156 for (size_t i = 0; i < num_pairs; ++i) {
157 HistogramPair& p = pairs[i];
158 if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
159 p.idx1 == best_idx2 || p.idx2 == best_idx2) {
160 // Remove invalid pair from the queue.
161 continue;
162 }
163 if (pairs[0] < p) {
164 // Replace the top of the queue if needed.
165 HistogramPair front = pairs[0];
166 pairs[0] = p;
167 pairs[copy_to_idx] = front;
168 } else {
169 pairs[copy_to_idx] = p;
170 }
171 ++copy_to_idx;
172 }
173 num_pairs = copy_to_idx;
174
175 // Push new pairs formed with the combined histogram to the heap.
176 for (size_t i = 0; i < num_clusters; ++i) {
177 CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i],
178 max_num_pairs, &pairs[0], &num_pairs);
179 }
180 }
181 return num_clusters;
182 }
183
184 // -----------------------------------------------------------------------------
185 // Histogram refinement
186
187 // What is the bit cost of moving histogram from cur_symbol to candidate.
188 template<typename HistogramType>
189 double HistogramBitCostDistance(const HistogramType& histogram,
190 const HistogramType& candidate) {
191 if (histogram.total_count_ == 0) {
192 return 0.0;
193 }
194 HistogramType tmp = histogram;
195 tmp.AddHistogram(candidate);
196 return PopulationCost(tmp) - candidate.bit_cost_;
197 }
198
199 // Find the best 'out' histogram for each of the 'in' histograms.
200 // When called, clusters[0..num_clusters) contains the unique values from
201 // symbols[0..in_size), but this property is not preserved in this function.
202 // Note: we assume that out[]->bit_cost_ is already up-to-date.
203 template<typename HistogramType>
204 void HistogramRemap(const HistogramType* in, size_t in_size,
205 const uint32_t* clusters, size_t num_clusters,
206 HistogramType* out, uint32_t* symbols) {
207 for (size_t i = 0; i < in_size; ++i) {
208 uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
209 double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
210 for (size_t j = 0; j < num_clusters; ++j) {
211 const double cur_bits = HistogramBitCostDistance(in[i], out[clusters[j]]);
212 if (cur_bits < best_bits) {
213 best_bits = cur_bits;
214 best_out = clusters[j];
215 }
216 }
217 symbols[i] = best_out;
218 }
219
220 // Recompute each out based on raw and symbols.
221 for (size_t j = 0; j < num_clusters; ++j) {
222 out[clusters[j]].Clear();
223 }
224 for (size_t i = 0; i < in_size; ++i) {
225 out[symbols[i]].AddHistogram(in[i]);
226 }
227 }
228
229 // Reorders elements of the out[0..length) array and changes values in
230 // symbols[0..length) array in the following way:
231 // * when called, symbols[] contains indexes into out[], and has N unique
232 // values (possibly N < length)
233 // * on return, symbols'[i] = f(symbols[i]) and
234 // out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
235 // where f is a bijection between the range of symbols[] and [0..N), and
236 // the first occurrences of values in symbols'[i] come in consecutive
237 // increasing order.
238 // Returns N, the number of unique values in symbols[].
239 template<typename HistogramType>
240 size_t HistogramReindex(HistogramType* out, uint32_t* symbols, size_t length) {
241 static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
242 std::vector<uint32_t> new_index(length, kInvalidIndex);
243 uint32_t next_index = 0;
244 for (size_t i = 0; i < length; ++i) {
245 if (new_index[symbols[i]] == kInvalidIndex) {
246 new_index[symbols[i]] = next_index;
247 ++next_index;
248 }
249 }
250 std::vector<HistogramType> tmp(next_index);
251 next_index = 0;
252 for (size_t i = 0; i < length; ++i) {
253 if (new_index[symbols[i]] == next_index) {
254 tmp[next_index] = out[symbols[i]];
255 ++next_index;
256 }
257 symbols[i] = new_index[symbols[i]];
258 }
259 for (size_t i = 0; i < next_index; ++i) {
260 out[i] = tmp[i];
261 }
262 return next_index;
263 }
264
265 // Clusters similar histograms in 'in' together, the selected histograms are
266 // placed in 'out', and for each index in 'in', *histogram_symbols will
267 // indicate which of the 'out' histograms is the best approximation.
268 template<typename HistogramType>
269 void ClusterHistograms(const std::vector<HistogramType>& in,
270 size_t num_contexts, size_t num_blocks,
271 size_t max_histograms,
272 std::vector<HistogramType>* out,
273 std::vector<uint32_t>* histogram_symbols) {
274 const size_t in_size = num_contexts * num_blocks;
275 assert(in_size == in.size());
276 std::vector<uint32_t> cluster_size(in_size, 1);
277 std::vector<uint32_t> clusters(in_size);
278 size_t num_clusters = 0;
279 out->resize(in_size);
280 histogram_symbols->resize(in_size);
281 for (size_t i = 0; i < in_size; ++i) {
282 (*out)[i] = in[i];
283 (*out)[i].bit_cost_ = PopulationCost(in[i]);
284 (*histogram_symbols)[i] = static_cast<uint32_t>(i);
285 }
286
287 const size_t max_input_histograms = 64;
288 // For the first pass of clustering, we allow all pairs.
289 size_t max_num_pairs = max_input_histograms * max_input_histograms / 2;
290 std::vector<HistogramPair> pairs(max_num_pairs + 1);
291
292 for (size_t i = 0; i < in_size; i += max_input_histograms) {
293 size_t num_to_combine = std::min(in_size - i, max_input_histograms);
294 for (size_t j = 0; j < num_to_combine; ++j) {
295 clusters[num_clusters + j] = static_cast<uint32_t>(i + j);
296 }
297 size_t num_new_clusters =
298 HistogramCombine(&(*out)[0], &cluster_size[0],
299 &(*histogram_symbols)[i],
300 &clusters[num_clusters], &pairs[0],
301 num_to_combine, num_to_combine,
302 max_histograms, max_num_pairs);
303 num_clusters += num_new_clusters;
304 }
305
306 // For the second pass, we limit the total number of histogram pairs.
307 // After this limit is reached, we only keep searching for the best pair.
308 max_num_pairs =
309 std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
310 pairs.resize(max_num_pairs + 1);
311
312 // Collapse similar histograms.
313 num_clusters = HistogramCombine(&(*out)[0], &cluster_size[0],
314 &(*histogram_symbols)[0], &clusters[0],
315 &pairs[0], num_clusters, in_size,
316 max_histograms, max_num_pairs);
317
318 // Find the optimal map from original histograms to the final ones.
319 HistogramRemap(&in[0], in_size, &clusters[0], num_clusters,
320 &(*out)[0], &(*histogram_symbols)[0]);
321
322 // Convert the context map to a canonical form.
323 size_t num_histograms =
324 HistogramReindex(&(*out)[0], &(*histogram_symbols)[0], in_size);
325 out->resize(num_histograms);
326 }
327
328 } // namespace brotli
329
330 #endif // BROTLI_ENC_CLUSTER_H_
OLDNEW
« no previous file with comments | « third_party/brotli/enc/brotli_bit_stream.cc ('k') | third_party/brotli/enc/cluster.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698