OLD | NEW |
| (Empty) |
1 /* Copyright 2010 Google Inc. All Rights Reserved. | |
2 | |
3 Distributed under MIT license. | |
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | |
5 */ | |
6 | |
7 // Entropy encoding (Huffman) utilities. | |
8 | |
9 #include "./entropy_encode.h" | |
10 | |
11 #include <algorithm> | |
12 #include <limits> | |
13 #include <cstdlib> | |
14 | |
15 #include "./histogram.h" | |
16 #include "./port.h" | |
17 #include "./types.h" | |
18 | |
19 namespace brotli { | |
20 | |
21 void SetDepth(const HuffmanTree &p, | |
22 HuffmanTree *pool, | |
23 uint8_t *depth, | |
24 uint8_t level) { | |
25 if (p.index_left_ >= 0) { | |
26 ++level; | |
27 SetDepth(pool[p.index_left_], pool, depth, level); | |
28 SetDepth(pool[p.index_right_or_value_], pool, depth, level); | |
29 } else { | |
30 depth[p.index_right_or_value_] = level; | |
31 } | |
32 } | |
33 | |
34 // Sort the root nodes, least popular first. | |
35 static inline bool SortHuffmanTree(const HuffmanTree& v0, | |
36 const HuffmanTree& v1) { | |
37 if (v0.total_count_ != v1.total_count_) { | |
38 return v0.total_count_ < v1.total_count_; | |
39 } | |
40 return v0.index_right_or_value_ > v1.index_right_or_value_; | |
41 } | |
42 | |
43 // This function will create a Huffman tree. | |
44 // | |
45 // The catch here is that the tree cannot be arbitrarily deep. | |
46 // Brotli specifies a maximum depth of 15 bits for "code trees" | |
47 // and 7 bits for "code length code trees." | |
48 // | |
49 // count_limit is the value that is to be faked as the minimum value | |
50 // and this minimum value is raised until the tree matches the | |
51 // maximum length requirement. | |
52 // | |
53 // This algorithm is not of excellent performance for very long data blocks, | |
54 // especially when population counts are longer than 2**tree_limit, but | |
55 // we are not planning to use this with extremely long blocks. | |
56 // | |
57 // See http://en.wikipedia.org/wiki/Huffman_coding | |
58 void CreateHuffmanTree(const uint32_t *data, | |
59 const size_t length, | |
60 const int tree_limit, | |
61 HuffmanTree* tree, | |
62 uint8_t *depth) { | |
63 // For block sizes below 64 kB, we never need to do a second iteration | |
64 // of this loop. Probably all of our block sizes will be smaller than | |
65 // that, so this loop is mostly of academic interest. If we actually | |
66 // would need this, we would be better off with the Katajainen algorithm. | |
67 for (uint32_t count_limit = 1; ; count_limit *= 2) { | |
68 size_t n = 0; | |
69 for (size_t i = length; i != 0;) { | |
70 --i; | |
71 if (data[i]) { | |
72 const uint32_t count = std::max(data[i], count_limit); | |
73 tree[n++] = HuffmanTree(count, -1, static_cast<int16_t>(i)); | |
74 } | |
75 } | |
76 | |
77 if (n == 1) { | |
78 depth[tree[0].index_right_or_value_] = 1; // Only one element. | |
79 break; | |
80 } | |
81 | |
82 std::sort(tree, tree + n, SortHuffmanTree); | |
83 | |
84 // The nodes are: | |
85 // [0, n): the sorted leaf nodes that we start with. | |
86 // [n]: we add a sentinel here. | |
87 // [n + 1, 2n): new parent nodes are added here, starting from | |
88 // (n+1). These are naturally in ascending order. | |
89 // [2n]: we add a sentinel at the end as well. | |
90 // There will be (2n+1) elements at the end. | |
91 const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1); | |
92 tree[n] = sentinel; | |
93 tree[n + 1] = sentinel; | |
94 | |
95 size_t i = 0; // Points to the next leaf node. | |
96 size_t j = n + 1; // Points to the next non-leaf node. | |
97 for (size_t k = n - 1; k != 0; --k) { | |
98 size_t left, right; | |
99 if (tree[i].total_count_ <= tree[j].total_count_) { | |
100 left = i; | |
101 ++i; | |
102 } else { | |
103 left = j; | |
104 ++j; | |
105 } | |
106 if (tree[i].total_count_ <= tree[j].total_count_) { | |
107 right = i; | |
108 ++i; | |
109 } else { | |
110 right = j; | |
111 ++j; | |
112 } | |
113 | |
114 // The sentinel node becomes the parent node. | |
115 size_t j_end = 2 * n - k; | |
116 tree[j_end].total_count_ = | |
117 tree[left].total_count_ + tree[right].total_count_; | |
118 tree[j_end].index_left_ = static_cast<int16_t>(left); | |
119 tree[j_end].index_right_or_value_ = static_cast<int16_t>(right); | |
120 | |
121 // Add back the last sentinel node. | |
122 tree[j_end + 1] = sentinel; | |
123 } | |
124 SetDepth(tree[2 * n - 1], &tree[0], depth, 0); | |
125 | |
126 // We need to pack the Huffman tree in tree_limit bits. | |
127 // If this was not successful, add fake entities to the lowest values | |
128 // and retry. | |
129 if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) { | |
130 break; | |
131 } | |
132 } | |
133 } | |
134 | |
135 static void Reverse(uint8_t* v, size_t start, size_t end) { | |
136 --end; | |
137 while (start < end) { | |
138 uint8_t tmp = v[start]; | |
139 v[start] = v[end]; | |
140 v[end] = tmp; | |
141 ++start; | |
142 --end; | |
143 } | |
144 } | |
145 | |
146 static void WriteHuffmanTreeRepetitions( | |
147 const uint8_t previous_value, | |
148 const uint8_t value, | |
149 size_t repetitions, | |
150 size_t* tree_size, | |
151 uint8_t* tree, | |
152 uint8_t* extra_bits_data) { | |
153 assert(repetitions > 0); | |
154 if (previous_value != value) { | |
155 tree[*tree_size] = value; | |
156 extra_bits_data[*tree_size] = 0; | |
157 ++(*tree_size); | |
158 --repetitions; | |
159 } | |
160 if (repetitions == 7) { | |
161 tree[*tree_size] = value; | |
162 extra_bits_data[*tree_size] = 0; | |
163 ++(*tree_size); | |
164 --repetitions; | |
165 } | |
166 if (repetitions < 3) { | |
167 for (size_t i = 0; i < repetitions; ++i) { | |
168 tree[*tree_size] = value; | |
169 extra_bits_data[*tree_size] = 0; | |
170 ++(*tree_size); | |
171 } | |
172 } else { | |
173 repetitions -= 3; | |
174 size_t start = *tree_size; | |
175 while (true) { | |
176 tree[*tree_size] = 16; | |
177 extra_bits_data[*tree_size] = repetitions & 0x3; | |
178 ++(*tree_size); | |
179 repetitions >>= 2; | |
180 if (repetitions == 0) { | |
181 break; | |
182 } | |
183 --repetitions; | |
184 } | |
185 Reverse(tree, start, *tree_size); | |
186 Reverse(extra_bits_data, start, *tree_size); | |
187 } | |
188 } | |
189 | |
190 static void WriteHuffmanTreeRepetitionsZeros( | |
191 size_t repetitions, | |
192 size_t* tree_size, | |
193 uint8_t* tree, | |
194 uint8_t* extra_bits_data) { | |
195 if (repetitions == 11) { | |
196 tree[*tree_size] = 0; | |
197 extra_bits_data[*tree_size] = 0; | |
198 ++(*tree_size); | |
199 --repetitions; | |
200 } | |
201 if (repetitions < 3) { | |
202 for (size_t i = 0; i < repetitions; ++i) { | |
203 tree[*tree_size] = 0; | |
204 extra_bits_data[*tree_size] = 0; | |
205 ++(*tree_size); | |
206 } | |
207 } else { | |
208 repetitions -= 3; | |
209 size_t start = *tree_size; | |
210 while (true) { | |
211 tree[*tree_size] = 17; | |
212 extra_bits_data[*tree_size] = repetitions & 0x7; | |
213 ++(*tree_size); | |
214 repetitions >>= 3; | |
215 if (repetitions == 0) { | |
216 break; | |
217 } | |
218 --repetitions; | |
219 } | |
220 Reverse(tree, start, *tree_size); | |
221 Reverse(extra_bits_data, start, *tree_size); | |
222 } | |
223 } | |
224 | |
225 void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts, | |
226 uint8_t* good_for_rle) { | |
227 size_t nonzero_count = 0; | |
228 size_t stride; | |
229 size_t limit; | |
230 size_t sum; | |
231 const size_t streak_limit = 1240; | |
232 // Let's make the Huffman code more compatible with rle encoding. | |
233 size_t i; | |
234 for (i = 0; i < length; i++) { | |
235 if (counts[i]) { | |
236 ++nonzero_count; | |
237 } | |
238 } | |
239 if (nonzero_count < 16) { | |
240 return; | |
241 } | |
242 while (length != 0 && counts[length - 1] == 0) { | |
243 --length; | |
244 } | |
245 if (length == 0) { | |
246 return; // All zeros. | |
247 } | |
248 // Now counts[0..length - 1] does not have trailing zeros. | |
249 { | |
250 size_t nonzeros = 0; | |
251 uint32_t smallest_nonzero = 1 << 30; | |
252 for (i = 0; i < length; ++i) { | |
253 if (counts[i] != 0) { | |
254 ++nonzeros; | |
255 if (smallest_nonzero > counts[i]) { | |
256 smallest_nonzero = counts[i]; | |
257 } | |
258 } | |
259 } | |
260 if (nonzeros < 5) { | |
261 // Small histogram will model it well. | |
262 return; | |
263 } | |
264 size_t zeros = length - nonzeros; | |
265 if (smallest_nonzero < 4) { | |
266 if (zeros < 6) { | |
267 for (i = 1; i < length - 1; ++i) { | |
268 if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) { | |
269 counts[i] = 1; | |
270 } | |
271 } | |
272 } | |
273 } | |
274 if (nonzeros < 28) { | |
275 return; | |
276 } | |
277 } | |
278 // 2) Let's mark all population counts that already can be encoded | |
279 // with an rle code. | |
280 memset(good_for_rle, 0, length); | |
281 { | |
282 // Let's not spoil any of the existing good rle codes. | |
283 // Mark any seq of 0's that is longer as 5 as a good_for_rle. | |
284 // Mark any seq of non-0's that is longer as 7 as a good_for_rle. | |
285 uint32_t symbol = counts[0]; | |
286 size_t step = 0; | |
287 for (i = 0; i <= length; ++i) { | |
288 if (i == length || counts[i] != symbol) { | |
289 if ((symbol == 0 && step >= 5) || | |
290 (symbol != 0 && step >= 7)) { | |
291 size_t k; | |
292 for (k = 0; k < step; ++k) { | |
293 good_for_rle[i - k - 1] = 1; | |
294 } | |
295 } | |
296 step = 1; | |
297 if (i != length) { | |
298 symbol = counts[i]; | |
299 } | |
300 } else { | |
301 ++step; | |
302 } | |
303 } | |
304 } | |
305 // 3) Let's replace those population counts that lead to more rle codes. | |
306 // Math here is in 24.8 fixed point representation. | |
307 stride = 0; | |
308 limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420; | |
309 sum = 0; | |
310 for (i = 0; i <= length; ++i) { | |
311 if (i == length || good_for_rle[i] || | |
312 (i != 0 && good_for_rle[i - 1]) || | |
313 (256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) { | |
314 if (stride >= 4 || (stride >= 3 && sum == 0)) { | |
315 size_t k; | |
316 // The stride must end, collapse what we have, if we have enough (4). | |
317 size_t count = (sum + stride / 2) / stride; | |
318 if (count == 0) { | |
319 count = 1; | |
320 } | |
321 if (sum == 0) { | |
322 // Don't make an all zeros stride to be upgraded to ones. | |
323 count = 0; | |
324 } | |
325 for (k = 0; k < stride; ++k) { | |
326 // We don't want to change value at counts[i], | |
327 // that is already belonging to the next stride. Thus - 1. | |
328 counts[i - k - 1] = static_cast<uint32_t>(count); | |
329 } | |
330 } | |
331 stride = 0; | |
332 sum = 0; | |
333 if (i < length - 2) { | |
334 // All interesting strides have a count of at least 4, | |
335 // at least when non-zeros. | |
336 limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420; | |
337 } else if (i < length) { | |
338 limit = 256 * counts[i]; | |
339 } else { | |
340 limit = 0; | |
341 } | |
342 } | |
343 ++stride; | |
344 if (i != length) { | |
345 sum += counts[i]; | |
346 if (stride >= 4) { | |
347 limit = (256 * sum + stride / 2) / stride; | |
348 } | |
349 if (stride == 4) { | |
350 limit += 120; | |
351 } | |
352 } | |
353 } | |
354 } | |
355 | |
356 static void DecideOverRleUse(const uint8_t* depth, const size_t length, | |
357 bool *use_rle_for_non_zero, | |
358 bool *use_rle_for_zero) { | |
359 size_t total_reps_zero = 0; | |
360 size_t total_reps_non_zero = 0; | |
361 size_t count_reps_zero = 1; | |
362 size_t count_reps_non_zero = 1; | |
363 for (size_t i = 0; i < length;) { | |
364 const uint8_t value = depth[i]; | |
365 size_t reps = 1; | |
366 for (size_t k = i + 1; k < length && depth[k] == value; ++k) { | |
367 ++reps; | |
368 } | |
369 if (reps >= 3 && value == 0) { | |
370 total_reps_zero += reps; | |
371 ++count_reps_zero; | |
372 } | |
373 if (reps >= 4 && value != 0) { | |
374 total_reps_non_zero += reps; | |
375 ++count_reps_non_zero; | |
376 } | |
377 i += reps; | |
378 } | |
379 *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2; | |
380 *use_rle_for_zero = total_reps_zero > count_reps_zero * 2; | |
381 } | |
382 | |
383 void WriteHuffmanTree(const uint8_t* depth, | |
384 size_t length, | |
385 size_t* tree_size, | |
386 uint8_t* tree, | |
387 uint8_t* extra_bits_data) { | |
388 uint8_t previous_value = 8; | |
389 | |
390 // Throw away trailing zeros. | |
391 size_t new_length = length; | |
392 for (size_t i = 0; i < length; ++i) { | |
393 if (depth[length - i - 1] == 0) { | |
394 --new_length; | |
395 } else { | |
396 break; | |
397 } | |
398 } | |
399 | |
400 // First gather statistics on if it is a good idea to do rle. | |
401 bool use_rle_for_non_zero = false; | |
402 bool use_rle_for_zero = false; | |
403 if (length > 50) { | |
404 // Find rle coding for longer codes. | |
405 // Shorter codes seem not to benefit from rle. | |
406 DecideOverRleUse(depth, new_length, | |
407 &use_rle_for_non_zero, &use_rle_for_zero); | |
408 } | |
409 | |
410 // Actual rle coding. | |
411 for (size_t i = 0; i < new_length;) { | |
412 const uint8_t value = depth[i]; | |
413 size_t reps = 1; | |
414 if ((value != 0 && use_rle_for_non_zero) || | |
415 (value == 0 && use_rle_for_zero)) { | |
416 for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) { | |
417 ++reps; | |
418 } | |
419 } | |
420 if (value == 0) { | |
421 WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data); | |
422 } else { | |
423 WriteHuffmanTreeRepetitions(previous_value, | |
424 value, reps, tree_size, | |
425 tree, extra_bits_data); | |
426 previous_value = value; | |
427 } | |
428 i += reps; | |
429 } | |
430 } | |
431 | |
432 namespace { | |
433 | |
434 uint16_t ReverseBits(int num_bits, uint16_t bits) { | |
435 static const size_t kLut[16] = { // Pre-reversed 4-bit values. | |
436 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, | |
437 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf | |
438 }; | |
439 size_t retval = kLut[bits & 0xf]; | |
440 for (int i = 4; i < num_bits; i += 4) { | |
441 retval <<= 4; | |
442 bits = static_cast<uint16_t>(bits >> 4); | |
443 retval |= kLut[bits & 0xf]; | |
444 } | |
445 retval >>= (-num_bits & 0x3); | |
446 return static_cast<uint16_t>(retval); | |
447 } | |
448 | |
449 } // namespace | |
450 | |
451 void ConvertBitDepthsToSymbols(const uint8_t *depth, | |
452 size_t len, | |
453 uint16_t *bits) { | |
454 // In Brotli, all bit depths are [1..15] | |
455 // 0 bit depth means that the symbol does not exist. | |
456 const int kMaxBits = 16; // 0..15 are values for bits | |
457 uint16_t bl_count[kMaxBits] = { 0 }; | |
458 { | |
459 for (size_t i = 0; i < len; ++i) { | |
460 ++bl_count[depth[i]]; | |
461 } | |
462 bl_count[0] = 0; | |
463 } | |
464 uint16_t next_code[kMaxBits]; | |
465 next_code[0] = 0; | |
466 { | |
467 int code = 0; | |
468 for (int bits = 1; bits < kMaxBits; ++bits) { | |
469 code = (code + bl_count[bits - 1]) << 1; | |
470 next_code[bits] = static_cast<uint16_t>(code); | |
471 } | |
472 } | |
473 for (size_t i = 0; i < len; ++i) { | |
474 if (depth[i]) { | |
475 bits[i] = ReverseBits(depth[i], next_code[depth[i]]++); | |
476 } | |
477 } | |
478 } | |
479 | |
480 } // namespace brotli | |
OLD | NEW |