OLD | NEW |
| 1 /* NOLINT(build/header_guard) */ |
1 /* Copyright 2013 Google Inc. All Rights Reserved. | 2 /* Copyright 2013 Google Inc. All Rights Reserved. |
2 | 3 |
3 Distributed under MIT license. | 4 Distributed under MIT license. |
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | 5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
5 */ | 6 */ |
6 | 7 |
7 // Functions for clustering similar histograms together. | 8 /* template parameters: FN, CODE */ |
8 | 9 |
9 #ifndef BROTLI_ENC_CLUSTER_H_ | 10 #define HistogramType FN(Histogram) |
10 #define BROTLI_ENC_CLUSTER_H_ | |
11 | 11 |
12 #include <math.h> | 12 /* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if |
13 #include <algorithm> | 13 it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */ |
14 #include <utility> | 14 BROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)( |
15 #include <vector> | 15 const HistogramType* out, const uint32_t* cluster_size, uint32_t idx1, |
16 | 16 uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs, |
17 #include "./bit_cost.h" | 17 size_t* num_pairs) CODE({ |
18 #include "./entropy_encode.h" | 18 BROTLI_BOOL is_good_pair = BROTLI_FALSE; |
19 #include "./fast_log.h" | 19 HistogramPair p = {0}; |
20 #include "./histogram.h" | |
21 #include "./port.h" | |
22 #include "./types.h" | |
23 | |
24 namespace brotli { | |
25 | |
26 struct HistogramPair { | |
27 uint32_t idx1; | |
28 uint32_t idx2; | |
29 double cost_combo; | |
30 double cost_diff; | |
31 }; | |
32 | |
33 inline bool operator<(const HistogramPair& p1, const HistogramPair& p2) { | |
34 if (p1.cost_diff != p2.cost_diff) { | |
35 return p1.cost_diff > p2.cost_diff; | |
36 } | |
37 return (p1.idx2 - p1.idx1) > (p2.idx2 - p2.idx1); | |
38 } | |
39 | |
40 // Returns entropy reduction of the context map when we combine two clusters. | |
41 inline double ClusterCostDiff(size_t size_a, size_t size_b) { | |
42 size_t size_c = size_a + size_b; | |
43 return static_cast<double>(size_a) * FastLog2(size_a) + | |
44 static_cast<double>(size_b) * FastLog2(size_b) - | |
45 static_cast<double>(size_c) * FastLog2(size_c); | |
46 } | |
47 | |
48 // Computes the bit cost reduction by combining out[idx1] and out[idx2] and if | |
49 // it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. | |
50 template<typename HistogramType> | |
51 void CompareAndPushToQueue(const HistogramType* out, | |
52 const uint32_t* cluster_size, | |
53 uint32_t idx1, uint32_t idx2, | |
54 size_t max_num_pairs, | |
55 HistogramPair* pairs, | |
56 size_t* num_pairs) { | |
57 if (idx1 == idx2) { | 20 if (idx1 == idx2) { |
58 return; | 21 return; |
59 } | 22 } |
60 if (idx2 < idx1) { | 23 if (idx2 < idx1) { |
61 uint32_t t = idx2; | 24 uint32_t t = idx2; |
62 idx2 = idx1; | 25 idx2 = idx1; |
63 idx1 = t; | 26 idx1 = t; |
64 } | 27 } |
65 bool store_pair = false; | |
66 HistogramPair p = {}; | |
67 p.idx1 = idx1; | 28 p.idx1 = idx1; |
68 p.idx2 = idx2; | 29 p.idx2 = idx2; |
69 p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]); | 30 p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]); |
70 p.cost_diff -= out[idx1].bit_cost_; | 31 p.cost_diff -= out[idx1].bit_cost_; |
71 p.cost_diff -= out[idx2].bit_cost_; | 32 p.cost_diff -= out[idx2].bit_cost_; |
72 | 33 |
73 if (out[idx1].total_count_ == 0) { | 34 if (out[idx1].total_count_ == 0) { |
74 p.cost_combo = out[idx2].bit_cost_; | 35 p.cost_combo = out[idx2].bit_cost_; |
75 store_pair = true; | 36 is_good_pair = BROTLI_TRUE; |
76 } else if (out[idx2].total_count_ == 0) { | 37 } else if (out[idx2].total_count_ == 0) { |
77 p.cost_combo = out[idx1].bit_cost_; | 38 p.cost_combo = out[idx1].bit_cost_; |
78 store_pair = true; | 39 is_good_pair = BROTLI_TRUE; |
79 } else { | 40 } else { |
80 double threshold = *num_pairs == 0 ? 1e99 : | 41 double threshold = *num_pairs == 0 ? 1e99 : |
81 std::max(0.0, pairs[0].cost_diff); | 42 BROTLI_MAX(double, 0.0, pairs[0].cost_diff); |
82 HistogramType combo = out[idx1]; | 43 HistogramType combo = out[idx1]; |
83 combo.AddHistogram(out[idx2]); | 44 double cost_combo; |
84 double cost_combo = PopulationCost(combo); | 45 FN(HistogramAddHistogram)(&combo, &out[idx2]); |
| 46 cost_combo = FN(BrotliPopulationCost)(&combo); |
85 if (cost_combo < threshold - p.cost_diff) { | 47 if (cost_combo < threshold - p.cost_diff) { |
86 p.cost_combo = cost_combo; | 48 p.cost_combo = cost_combo; |
87 store_pair = true; | 49 is_good_pair = BROTLI_TRUE; |
88 } | 50 } |
89 } | 51 } |
90 if (store_pair) { | 52 if (is_good_pair) { |
91 p.cost_diff += p.cost_combo; | 53 p.cost_diff += p.cost_combo; |
92 if (*num_pairs > 0 && pairs[0] < p) { | 54 if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) { |
93 // Replace the top of the queue if needed. | 55 /* Replace the top of the queue if needed. */ |
94 if (*num_pairs < max_num_pairs) { | 56 if (*num_pairs < max_num_pairs) { |
95 pairs[*num_pairs] = pairs[0]; | 57 pairs[*num_pairs] = pairs[0]; |
96 ++(*num_pairs); | 58 ++(*num_pairs); |
97 } | 59 } |
98 pairs[0] = p; | 60 pairs[0] = p; |
99 } else if (*num_pairs < max_num_pairs) { | 61 } else if (*num_pairs < max_num_pairs) { |
100 pairs[*num_pairs] = p; | 62 pairs[*num_pairs] = p; |
101 ++(*num_pairs); | 63 ++(*num_pairs); |
102 } | 64 } |
103 } | 65 } |
104 } | 66 }) |
105 | 67 |
106 template<typename HistogramType> | 68 BROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out, |
107 size_t HistogramCombine(HistogramType* out, | 69 uint32_t* cluster_size, |
108 uint32_t* cluster_size, | 70 uint32_t* symbols, |
109 uint32_t* symbols, | 71 uint32_t* clusters, |
110 uint32_t* clusters, | 72 HistogramPair* pairs, |
111 HistogramPair* pairs, | 73 size_t num_clusters, |
112 size_t num_clusters, | 74 size_t symbols_size, |
113 size_t symbols_size, | 75 size_t max_clusters, |
114 size_t max_clusters, | 76 size_t max_num_pairs) CODE({ |
115 size_t max_num_pairs) { | |
116 double cost_diff_threshold = 0.0; | 77 double cost_diff_threshold = 0.0; |
117 size_t min_cluster_size = 1; | 78 size_t min_cluster_size = 1; |
| 79 size_t num_pairs = 0; |
118 | 80 |
119 // We maintain a vector of histogram pairs, with the property that the pair | 81 { |
120 // with the maximum bit cost reduction is the first. | 82 /* We maintain a vector of histogram pairs, with the property that the pair |
121 size_t num_pairs = 0; | 83 with the maximum bit cost reduction is the first. */ |
122 for (size_t idx1 = 0; idx1 < num_clusters; ++idx1) { | 84 size_t idx1; |
123 for (size_t idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) { | 85 for (idx1 = 0; idx1 < num_clusters; ++idx1) { |
124 CompareAndPushToQueue(out, cluster_size, clusters[idx1], clusters[idx2], | 86 size_t idx2; |
125 max_num_pairs, &pairs[0], &num_pairs); | 87 for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) { |
| 88 FN(BrotliCompareAndPushToQueue)(out, cluster_size, clusters[idx1], |
| 89 clusters[idx2], max_num_pairs, &pairs[0], &num_pairs); |
| 90 } |
126 } | 91 } |
127 } | 92 } |
128 | 93 |
129 while (num_clusters > min_cluster_size) { | 94 while (num_clusters > min_cluster_size) { |
| 95 uint32_t best_idx1; |
| 96 uint32_t best_idx2; |
| 97 size_t i; |
130 if (pairs[0].cost_diff >= cost_diff_threshold) { | 98 if (pairs[0].cost_diff >= cost_diff_threshold) { |
131 cost_diff_threshold = 1e99; | 99 cost_diff_threshold = 1e99; |
132 min_cluster_size = max_clusters; | 100 min_cluster_size = max_clusters; |
133 continue; | 101 continue; |
134 } | 102 } |
135 // Take the best pair from the top of heap. | 103 /* Take the best pair from the top of heap. */ |
136 uint32_t best_idx1 = pairs[0].idx1; | 104 best_idx1 = pairs[0].idx1; |
137 uint32_t best_idx2 = pairs[0].idx2; | 105 best_idx2 = pairs[0].idx2; |
138 out[best_idx1].AddHistogram(out[best_idx2]); | 106 FN(HistogramAddHistogram)(&out[best_idx1], &out[best_idx2]); |
139 out[best_idx1].bit_cost_ = pairs[0].cost_combo; | 107 out[best_idx1].bit_cost_ = pairs[0].cost_combo; |
140 cluster_size[best_idx1] += cluster_size[best_idx2]; | 108 cluster_size[best_idx1] += cluster_size[best_idx2]; |
141 for (size_t i = 0; i < symbols_size; ++i) { | 109 for (i = 0; i < symbols_size; ++i) { |
142 if (symbols[i] == best_idx2) { | 110 if (symbols[i] == best_idx2) { |
143 symbols[i] = best_idx1; | 111 symbols[i] = best_idx1; |
144 } | 112 } |
145 } | 113 } |
146 for (size_t i = 0; i < num_clusters; ++i) { | 114 for (i = 0; i < num_clusters; ++i) { |
147 if (clusters[i] == best_idx2) { | 115 if (clusters[i] == best_idx2) { |
148 memmove(&clusters[i], &clusters[i + 1], | 116 memmove(&clusters[i], &clusters[i + 1], |
149 (num_clusters - i - 1) * sizeof(clusters[0])); | 117 (num_clusters - i - 1) * sizeof(clusters[0])); |
150 break; | 118 break; |
151 } | 119 } |
152 } | 120 } |
153 --num_clusters; | 121 --num_clusters; |
154 // Remove pairs intersecting the just combined best pair. | 122 { |
155 size_t copy_to_idx = 0; | 123 /* Remove pairs intersecting the just combined best pair. */ |
156 for (size_t i = 0; i < num_pairs; ++i) { | 124 size_t copy_to_idx = 0; |
157 HistogramPair& p = pairs[i]; | 125 for (i = 0; i < num_pairs; ++i) { |
158 if (p.idx1 == best_idx1 || p.idx2 == best_idx1 || | 126 HistogramPair* p = &pairs[i]; |
159 p.idx1 == best_idx2 || p.idx2 == best_idx2) { | 127 if (p->idx1 == best_idx1 || p->idx2 == best_idx1 || |
160 // Remove invalid pair from the queue. | 128 p->idx1 == best_idx2 || p->idx2 == best_idx2) { |
161 continue; | 129 /* Remove invalid pair from the queue. */ |
| 130 continue; |
| 131 } |
| 132 if (HistogramPairIsLess(&pairs[0], p)) { |
| 133 /* Replace the top of the queue if needed. */ |
| 134 HistogramPair front = pairs[0]; |
| 135 pairs[0] = *p; |
| 136 pairs[copy_to_idx] = front; |
| 137 } else { |
| 138 pairs[copy_to_idx] = *p; |
| 139 } |
| 140 ++copy_to_idx; |
162 } | 141 } |
163 if (pairs[0] < p) { | 142 num_pairs = copy_to_idx; |
164 // Replace the top of the queue if needed. | |
165 HistogramPair front = pairs[0]; | |
166 pairs[0] = p; | |
167 pairs[copy_to_idx] = front; | |
168 } else { | |
169 pairs[copy_to_idx] = p; | |
170 } | |
171 ++copy_to_idx; | |
172 } | 143 } |
173 num_pairs = copy_to_idx; | |
174 | 144 |
175 // Push new pairs formed with the combined histogram to the heap. | 145 /* Push new pairs formed with the combined histogram to the heap. */ |
176 for (size_t i = 0; i < num_clusters; ++i) { | 146 for (i = 0; i < num_clusters; ++i) { |
177 CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i], | 147 FN(BrotliCompareAndPushToQueue)(out, cluster_size, best_idx1, clusters[i], |
178 max_num_pairs, &pairs[0], &num_pairs); | 148 max_num_pairs, &pairs[0], &num_pairs); |
179 } | 149 } |
180 } | 150 } |
181 return num_clusters; | 151 return num_clusters; |
182 } | 152 }) |
183 | 153 |
184 // ----------------------------------------------------------------------------- | 154 /* What is the bit cost of moving histogram from cur_symbol to candidate. */ |
185 // Histogram refinement | 155 BROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)( |
| 156 const HistogramType* histogram, const HistogramType* candidate) CODE({ |
| 157 if (histogram->total_count_ == 0) { |
| 158 return 0.0; |
| 159 } else { |
| 160 HistogramType tmp = *histogram; |
| 161 FN(HistogramAddHistogram)(&tmp, candidate); |
| 162 return FN(BrotliPopulationCost)(&tmp) - candidate->bit_cost_; |
| 163 } |
| 164 }) |
186 | 165 |
187 // What is the bit cost of moving histogram from cur_symbol to candidate. | 166 /* Find the best 'out' histogram for each of the 'in' histograms. |
188 template<typename HistogramType> | 167 When called, clusters[0..num_clusters) contains the unique values from |
189 double HistogramBitCostDistance(const HistogramType& histogram, | 168 symbols[0..in_size), but this property is not preserved in this function. |
190 const HistogramType& candidate) { | 169 Note: we assume that out[]->bit_cost_ is already up-to-date. */ |
191 if (histogram.total_count_ == 0) { | 170 BROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in, |
192 return 0.0; | 171 size_t in_size, const uint32_t* clusters, size_t num_clusters, |
193 } | 172 HistogramType* out, uint32_t* symbols) CODE({ |
194 HistogramType tmp = histogram; | 173 size_t i; |
195 tmp.AddHistogram(candidate); | 174 for (i = 0; i < in_size; ++i) { |
196 return PopulationCost(tmp) - candidate.bit_cost_; | |
197 } | |
198 | |
199 // Find the best 'out' histogram for each of the 'in' histograms. | |
200 // When called, clusters[0..num_clusters) contains the unique values from | |
201 // symbols[0..in_size), but this property is not preserved in this function. | |
202 // Note: we assume that out[]->bit_cost_ is already up-to-date. | |
203 template<typename HistogramType> | |
204 void HistogramRemap(const HistogramType* in, size_t in_size, | |
205 const uint32_t* clusters, size_t num_clusters, | |
206 HistogramType* out, uint32_t* symbols) { | |
207 for (size_t i = 0; i < in_size; ++i) { | |
208 uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1]; | 175 uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1]; |
209 double best_bits = HistogramBitCostDistance(in[i], out[best_out]); | 176 double best_bits = |
210 for (size_t j = 0; j < num_clusters; ++j) { | 177 FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out]); |
211 const double cur_bits = HistogramBitCostDistance(in[i], out[clusters[j]]); | 178 size_t j; |
| 179 for (j = 0; j < num_clusters; ++j) { |
| 180 const double cur_bits = |
| 181 FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]]); |
212 if (cur_bits < best_bits) { | 182 if (cur_bits < best_bits) { |
213 best_bits = cur_bits; | 183 best_bits = cur_bits; |
214 best_out = clusters[j]; | 184 best_out = clusters[j]; |
215 } | 185 } |
216 } | 186 } |
217 symbols[i] = best_out; | 187 symbols[i] = best_out; |
218 } | 188 } |
219 | 189 |
220 // Recompute each out based on raw and symbols. | 190 /* Recompute each out based on raw and symbols. */ |
221 for (size_t j = 0; j < num_clusters; ++j) { | 191 for (i = 0; i < num_clusters; ++i) { |
222 out[clusters[j]].Clear(); | 192 FN(HistogramClear)(&out[clusters[i]]); |
223 } | 193 } |
224 for (size_t i = 0; i < in_size; ++i) { | 194 for (i = 0; i < in_size; ++i) { |
225 out[symbols[i]].AddHistogram(in[i]); | 195 FN(HistogramAddHistogram)(&out[symbols[i]], &in[i]); |
226 } | 196 } |
227 } | 197 }) |
228 | 198 |
229 // Reorders elements of the out[0..length) array and changes values in | 199 /* Reorders elements of the out[0..length) array and changes values in |
230 // symbols[0..length) array in the following way: | 200 symbols[0..length) array in the following way: |
231 // * when called, symbols[] contains indexes into out[], and has N unique | 201 * when called, symbols[] contains indexes into out[], and has N unique |
232 // values (possibly N < length) | 202 values (possibly N < length) |
233 // * on return, symbols'[i] = f(symbols[i]) and | 203 * on return, symbols'[i] = f(symbols[i]) and |
234 // out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length, | 204 out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length, |
235 // where f is a bijection between the range of symbols[] and [0..N), and | 205 where f is a bijection between the range of symbols[] and [0..N), and |
236 // the first occurrences of values in symbols'[i] come in consecutive | 206 the first occurrences of values in symbols'[i] come in consecutive |
237 // increasing order. | 207 increasing order. |
238 // Returns N, the number of unique values in symbols[]. | 208 Returns N, the number of unique values in symbols[]. */ |
239 template<typename HistogramType> | 209 BROTLI_INTERNAL size_t FN(BrotliHistogramReindex)(MemoryManager* m, |
240 size_t HistogramReindex(HistogramType* out, uint32_t* symbols, size_t length) { | 210 HistogramType* out, uint32_t* symbols, size_t length) CODE({ |
241 static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max(); | 211 static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX; |
242 std::vector<uint32_t> new_index(length, kInvalidIndex); | 212 uint32_t* new_index = BROTLI_ALLOC(m, uint32_t, length); |
243 uint32_t next_index = 0; | 213 uint32_t next_index; |
244 for (size_t i = 0; i < length; ++i) { | 214 HistogramType* tmp; |
| 215 size_t i; |
| 216 if (BROTLI_IS_OOM(m)) return 0; |
| 217 for (i = 0; i < length; ++i) { |
| 218 new_index[i] = kInvalidIndex; |
| 219 } |
| 220 next_index = 0; |
| 221 for (i = 0; i < length; ++i) { |
245 if (new_index[symbols[i]] == kInvalidIndex) { | 222 if (new_index[symbols[i]] == kInvalidIndex) { |
246 new_index[symbols[i]] = next_index; | 223 new_index[symbols[i]] = next_index; |
247 ++next_index; | 224 ++next_index; |
248 } | 225 } |
249 } | 226 } |
250 std::vector<HistogramType> tmp(next_index); | 227 /* TODO: by using idea of "cycle-sort" we can avoid allocation of |
| 228 tmp and reduce the number of copying by the factor of 2. */ |
| 229 tmp = BROTLI_ALLOC(m, HistogramType, next_index); |
| 230 if (BROTLI_IS_OOM(m)) return 0; |
251 next_index = 0; | 231 next_index = 0; |
252 for (size_t i = 0; i < length; ++i) { | 232 for (i = 0; i < length; ++i) { |
253 if (new_index[symbols[i]] == next_index) { | 233 if (new_index[symbols[i]] == next_index) { |
254 tmp[next_index] = out[symbols[i]]; | 234 tmp[next_index] = out[symbols[i]]; |
255 ++next_index; | 235 ++next_index; |
256 } | 236 } |
257 symbols[i] = new_index[symbols[i]]; | 237 symbols[i] = new_index[symbols[i]]; |
258 } | 238 } |
259 for (size_t i = 0; i < next_index; ++i) { | 239 BROTLI_FREE(m, new_index); |
| 240 for (i = 0; i < next_index; ++i) { |
260 out[i] = tmp[i]; | 241 out[i] = tmp[i]; |
261 } | 242 } |
| 243 BROTLI_FREE(m, tmp); |
262 return next_index; | 244 return next_index; |
263 } | 245 }) |
264 | 246 |
265 // Clusters similar histograms in 'in' together, the selected histograms are | 247 BROTLI_INTERNAL void FN(BrotliClusterHistograms)( |
266 // placed in 'out', and for each index in 'in', *histogram_symbols will | 248 MemoryManager* m, const HistogramType* in, const size_t in_size, |
267 // indicate which of the 'out' histograms is the best approximation. | 249 size_t max_histograms, HistogramType* out, size_t* out_size, |
268 template<typename HistogramType> | 250 uint32_t* histogram_symbols) CODE({ |
269 void ClusterHistograms(const std::vector<HistogramType>& in, | 251 uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, in_size); |
270 size_t num_contexts, size_t num_blocks, | 252 uint32_t* clusters = BROTLI_ALLOC(m, uint32_t, in_size); |
271 size_t max_histograms, | |
272 std::vector<HistogramType>* out, | |
273 std::vector<uint32_t>* histogram_symbols) { | |
274 const size_t in_size = num_contexts * num_blocks; | |
275 assert(in_size == in.size()); | |
276 std::vector<uint32_t> cluster_size(in_size, 1); | |
277 std::vector<uint32_t> clusters(in_size); | |
278 size_t num_clusters = 0; | 253 size_t num_clusters = 0; |
279 out->resize(in_size); | 254 const size_t max_input_histograms = 64; |
280 histogram_symbols->resize(in_size); | 255 size_t pairs_capacity = max_input_histograms * max_input_histograms / 2; |
281 for (size_t i = 0; i < in_size; ++i) { | 256 /* For the first pass of clustering, we allow all pairs. */ |
282 (*out)[i] = in[i]; | 257 HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1); |
283 (*out)[i].bit_cost_ = PopulationCost(in[i]); | 258 size_t i; |
284 (*histogram_symbols)[i] = static_cast<uint32_t>(i); | 259 |
| 260 if (BROTLI_IS_OOM(m)) return; |
| 261 |
| 262 for (i = 0; i < in_size; ++i) { |
| 263 cluster_size[i] = 1; |
285 } | 264 } |
286 | 265 |
287 const size_t max_input_histograms = 64; | 266 for (i = 0; i < in_size; ++i) { |
288 // For the first pass of clustering, we allow all pairs. | 267 out[i] = in[i]; |
289 size_t max_num_pairs = max_input_histograms * max_input_histograms / 2; | 268 out[i].bit_cost_ = FN(BrotliPopulationCost)(&in[i]); |
290 std::vector<HistogramPair> pairs(max_num_pairs + 1); | 269 histogram_symbols[i] = (uint32_t)i; |
| 270 } |
291 | 271 |
292 for (size_t i = 0; i < in_size; i += max_input_histograms) { | 272 for (i = 0; i < in_size; i += max_input_histograms) { |
293 size_t num_to_combine = std::min(in_size - i, max_input_histograms); | 273 size_t num_to_combine = |
294 for (size_t j = 0; j < num_to_combine; ++j) { | 274 BROTLI_MIN(size_t, in_size - i, max_input_histograms); |
295 clusters[num_clusters + j] = static_cast<uint32_t>(i + j); | 275 size_t num_new_clusters; |
| 276 size_t j; |
| 277 for (j = 0; j < num_to_combine; ++j) { |
| 278 clusters[num_clusters + j] = (uint32_t)(i + j); |
296 } | 279 } |
297 size_t num_new_clusters = | 280 num_new_clusters = |
298 HistogramCombine(&(*out)[0], &cluster_size[0], | 281 FN(BrotliHistogramCombine)(out, cluster_size, |
299 &(*histogram_symbols)[i], | 282 &histogram_symbols[i], |
300 &clusters[num_clusters], &pairs[0], | 283 &clusters[num_clusters], pairs, |
301 num_to_combine, num_to_combine, | 284 num_to_combine, num_to_combine, |
302 max_histograms, max_num_pairs); | 285 max_histograms, pairs_capacity); |
303 num_clusters += num_new_clusters; | 286 num_clusters += num_new_clusters; |
304 } | 287 } |
305 | 288 |
306 // For the second pass, we limit the total number of histogram pairs. | 289 { |
307 // After this limit is reached, we only keep searching for the best pair. | 290 /* For the second pass, we limit the total number of histogram pairs. |
308 max_num_pairs = | 291 After this limit is reached, we only keep searching for the best pair. */ |
309 std::min(64 * num_clusters, (num_clusters / 2) * num_clusters); | 292 size_t max_num_pairs = BROTLI_MIN(size_t, |
310 pairs.resize(max_num_pairs + 1); | 293 64 * num_clusters, (num_clusters / 2) * num_clusters); |
| 294 BROTLI_ENSURE_CAPACITY( |
| 295 m, HistogramPair, pairs, pairs_capacity, max_num_pairs + 1); |
| 296 if (BROTLI_IS_OOM(m)) return; |
311 | 297 |
312 // Collapse similar histograms. | 298 /* Collapse similar histograms. */ |
313 num_clusters = HistogramCombine(&(*out)[0], &cluster_size[0], | 299 num_clusters = FN(BrotliHistogramCombine)(out, cluster_size, |
314 &(*histogram_symbols)[0], &clusters[0], | 300 histogram_symbols, clusters, |
315 &pairs[0], num_clusters, in_size, | 301 pairs, num_clusters, in_size, |
316 max_histograms, max_num_pairs); | 302 max_histograms, max_num_pairs); |
| 303 } |
| 304 BROTLI_FREE(m, pairs); |
| 305 BROTLI_FREE(m, cluster_size); |
| 306 /* Find the optimal map from original histograms to the final ones. */ |
| 307 FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters, |
| 308 out, histogram_symbols); |
| 309 BROTLI_FREE(m, clusters); |
| 310 /* Convert the context map to a canonical form. */ |
| 311 *out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size); |
| 312 if (BROTLI_IS_OOM(m)) return; |
| 313 }) |
317 | 314 |
318 // Find the optimal map from original histograms to the final ones. | 315 #undef HistogramType |
319 HistogramRemap(&in[0], in_size, &clusters[0], num_clusters, | |
320 &(*out)[0], &(*histogram_symbols)[0]); | |
321 | |
322 // Convert the context map to a canonical form. | |
323 size_t num_histograms = | |
324 HistogramReindex(&(*out)[0], &(*histogram_symbols)[0], in_size); | |
325 out->resize(num_histograms); | |
326 } | |
327 | |
328 } // namespace brotli | |
329 | |
330 #endif // BROTLI_ENC_CLUSTER_H_ | |
OLD | NEW |