OLD | NEW |
| 1 /* NOLINT(build/header_guard) */ |
1 /* Copyright 2013 Google Inc. All Rights Reserved. | 2 /* Copyright 2013 Google Inc. All Rights Reserved. |
2 | 3 |
3 Distributed under MIT license. | 4 Distributed under MIT license. |
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | 5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
5 */ | 6 */ |
6 | 7 |
7 // Functions to estimate the bit cost of Huffman trees. | 8 /* template parameters: FN */ |
8 | 9 |
9 #ifndef BROTLI_ENC_BIT_COST_H_ | 10 #define HistogramType FN(Histogram) |
10 #define BROTLI_ENC_BIT_COST_H_ | |
11 | 11 |
12 #include "./entropy_encode.h" | 12 double FN(BrotliPopulationCost)(const HistogramType* histogram) { |
13 #include "./fast_log.h" | |
14 #include "./types.h" | |
15 | |
16 namespace brotli { | |
17 | |
18 static inline double ShannonEntropy(const uint32_t *population, size_t size, | |
19 size_t *total) { | |
20 size_t sum = 0; | |
21 double retval = 0; | |
22 const uint32_t *population_end = population + size; | |
23 size_t p; | |
24 if (size & 1) { | |
25 goto odd_number_of_elements_left; | |
26 } | |
27 while (population < population_end) { | |
28 p = *population++; | |
29 sum += p; | |
30 retval -= static_cast<double>(p) * FastLog2(p); | |
31 odd_number_of_elements_left: | |
32 p = *population++; | |
33 sum += p; | |
34 retval -= static_cast<double>(p) * FastLog2(p); | |
35 } | |
36 if (sum) retval += static_cast<double>(sum) * FastLog2(sum); | |
37 *total = sum; | |
38 return retval; | |
39 } | |
40 | |
41 static inline double BitsEntropy(const uint32_t *population, size_t size) { | |
42 size_t sum; | |
43 double retval = ShannonEntropy(population, size, &sum); | |
44 if (retval < sum) { | |
45 // At least one bit per literal is needed. | |
46 retval = static_cast<double>(sum); | |
47 } | |
48 return retval; | |
49 } | |
50 | |
51 template<int kSize> | |
52 double PopulationCost(const Histogram<kSize>& histogram) { | |
53 static const double kOneSymbolHistogramCost = 12; | 13 static const double kOneSymbolHistogramCost = 12; |
54 static const double kTwoSymbolHistogramCost = 20; | 14 static const double kTwoSymbolHistogramCost = 20; |
55 static const double kThreeSymbolHistogramCost = 28; | 15 static const double kThreeSymbolHistogramCost = 28; |
56 static const double kFourSymbolHistogramCost = 37; | 16 static const double kFourSymbolHistogramCost = 37; |
57 if (histogram.total_count_ == 0) { | 17 const size_t data_size = FN(HistogramDataSize)(); |
| 18 int count = 0; |
| 19 size_t s[5]; |
| 20 double bits = 0.0; |
| 21 size_t i; |
| 22 if (histogram->total_count_ == 0) { |
58 return kOneSymbolHistogramCost; | 23 return kOneSymbolHistogramCost; |
59 } | 24 } |
60 int count = 0; | 25 for (i = 0; i < data_size; ++i) { |
61 int s[5]; | 26 if (histogram->data_[i] > 0) { |
62 for (int i = 0; i < kSize; ++i) { | |
63 if (histogram.data_[i] > 0) { | |
64 s[count] = i; | 27 s[count] = i; |
65 ++count; | 28 ++count; |
66 if (count > 4) break; | 29 if (count > 4) break; |
67 } | 30 } |
68 } | 31 } |
69 if (count == 1) { | 32 if (count == 1) { |
70 return kOneSymbolHistogramCost; | 33 return kOneSymbolHistogramCost; |
71 } | 34 } |
72 if (count == 2) { | 35 if (count == 2) { |
73 return (kTwoSymbolHistogramCost + | 36 return (kTwoSymbolHistogramCost + (double)histogram->total_count_); |
74 static_cast<double>(histogram.total_count_)); | |
75 } | 37 } |
76 if (count == 3) { | 38 if (count == 3) { |
77 const uint32_t histo0 = histogram.data_[s[0]]; | 39 const uint32_t histo0 = histogram->data_[s[0]]; |
78 const uint32_t histo1 = histogram.data_[s[1]]; | 40 const uint32_t histo1 = histogram->data_[s[1]]; |
79 const uint32_t histo2 = histogram.data_[s[2]]; | 41 const uint32_t histo2 = histogram->data_[s[2]]; |
80 const uint32_t histomax = std::max(histo0, std::max(histo1, histo2)); | 42 const uint32_t histomax = |
| 43 BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2)); |
81 return (kThreeSymbolHistogramCost + | 44 return (kThreeSymbolHistogramCost + |
82 2 * (histo0 + histo1 + histo2) - histomax); | 45 2 * (histo0 + histo1 + histo2) - histomax); |
83 } | 46 } |
84 if (count == 4) { | 47 if (count == 4) { |
85 uint32_t histo[4]; | 48 uint32_t histo[4]; |
86 for (int i = 0; i < 4; ++i) { | 49 uint32_t h23; |
87 histo[i] = histogram.data_[s[i]]; | 50 uint32_t histomax; |
| 51 for (i = 0; i < 4; ++i) { |
| 52 histo[i] = histogram->data_[s[i]]; |
88 } | 53 } |
89 // Sort | 54 /* Sort */ |
90 for (int i = 0; i < 4; ++i) { | 55 for (i = 0; i < 4; ++i) { |
91 for (int j = i + 1; j < 4; ++j) { | 56 size_t j; |
| 57 for (j = i + 1; j < 4; ++j) { |
92 if (histo[j] > histo[i]) { | 58 if (histo[j] > histo[i]) { |
93 std::swap(histo[j], histo[i]); | 59 BROTLI_SWAP(uint32_t, histo, j, i); |
94 } | 60 } |
95 } | 61 } |
96 } | 62 } |
97 const uint32_t h23 = histo[2] + histo[3]; | 63 h23 = histo[2] + histo[3]; |
98 const uint32_t histomax = std::max(h23, histo[0]); | 64 histomax = BROTLI_MAX(uint32_t, h23, histo[0]); |
99 return (kFourSymbolHistogramCost + | 65 return (kFourSymbolHistogramCost + |
100 3 * h23 + 2 * (histo[0] + histo[1]) - histomax); | 66 3 * h23 + 2 * (histo[0] + histo[1]) - histomax); |
101 } | 67 } |
102 | 68 |
103 // In this loop we compute the entropy of the histogram and simultaneously | 69 { |
104 // build a simplified histogram of the code length codes where we use the | 70 /* In this loop we compute the entropy of the histogram and simultaneously |
105 // zero repeat code 17, but we don't use the non-zero repeat code 16. | 71 build a simplified histogram of the code length codes where we use the |
106 double bits = 0; | 72 zero repeat code 17, but we don't use the non-zero repeat code 16. */ |
107 size_t max_depth = 1; | 73 size_t max_depth = 1; |
108 uint32_t depth_histo[kCodeLengthCodes] = { 0 }; | 74 uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 }; |
109 const double log2total = FastLog2(histogram.total_count_); | 75 const double log2total = FastLog2(histogram->total_count_); |
110 for (size_t i = 0; i < kSize;) { | 76 for (i = 0; i < data_size;) { |
111 if (histogram.data_[i] > 0) { | 77 if (histogram->data_[i] > 0) { |
112 // Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = | 78 /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = |
113 // = log2(total_count) - log2(count(symbol)) | 79 = log2(total_count) - log2(count(symbol)) */ |
114 double log2p = log2total - FastLog2(histogram.data_[i]); | 80 double log2p = log2total - FastLog2(histogram->data_[i]); |
115 // Approximate the bit depth by round(-log2(P(symbol))) | 81 /* Approximate the bit depth by round(-log2(P(symbol))) */ |
116 size_t depth = static_cast<size_t>(log2p + 0.5); | 82 size_t depth = (size_t)(log2p + 0.5); |
117 bits += histogram.data_[i] * log2p; | 83 bits += histogram->data_[i] * log2p; |
118 if (depth > 15) { | 84 if (depth > 15) { |
119 depth = 15; | 85 depth = 15; |
120 } | 86 } |
121 if (depth > max_depth) { | 87 if (depth > max_depth) { |
122 max_depth = depth; | 88 max_depth = depth; |
123 } | 89 } |
124 ++depth_histo[depth]; | 90 ++depth_histo[depth]; |
125 ++i; | 91 ++i; |
126 } else { | |
127 // Compute the run length of zeros and add the appropriate number of 0 and | |
128 // 17 code length codes to the code length code histogram. | |
129 uint32_t reps = 1; | |
130 for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) { | |
131 ++reps; | |
132 } | |
133 i += reps; | |
134 if (i == kSize) { | |
135 // Don't add any cost for the last zero run, since these are encoded | |
136 // only implicitly. | |
137 break; | |
138 } | |
139 if (reps < 3) { | |
140 depth_histo[0] += reps; | |
141 } else { | 92 } else { |
142 reps -= 2; | 93 /* Compute the run length of zeros and add the appropriate number of 0 |
143 while (reps > 0) { | 94 and 17 code length codes to the code length code histogram. */ |
144 ++depth_histo[17]; | 95 uint32_t reps = 1; |
145 // Add the 3 extra bits for the 17 code length code. | 96 size_t k; |
146 bits += 3; | 97 for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) { |
147 reps >>= 3; | 98 ++reps; |
| 99 } |
| 100 i += reps; |
| 101 if (i == data_size) { |
| 102 /* Don't add any cost for the last zero run, since these are encoded |
| 103 only implicitly. */ |
| 104 break; |
| 105 } |
| 106 if (reps < 3) { |
| 107 depth_histo[0] += reps; |
| 108 } else { |
| 109 reps -= 2; |
| 110 while (reps > 0) { |
| 111 ++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH]; |
| 112 /* Add the 3 extra bits for the 17 code length code. */ |
| 113 bits += 3; |
| 114 reps >>= 3; |
| 115 } |
148 } | 116 } |
149 } | 117 } |
150 } | 118 } |
| 119 /* Add the estimated encoding cost of the code length code histogram. */ |
| 120 bits += (double)(18 + 2 * max_depth); |
| 121 /* Add the entropy of the code length code histogram. */ |
| 122 bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES); |
151 } | 123 } |
152 // Add the estimated encoding cost of the code length code histogram. | |
153 bits += static_cast<double>(18 + 2 * max_depth); | |
154 // Add the entropy of the code length code histogram. | |
155 bits += BitsEntropy(depth_histo, kCodeLengthCodes); | |
156 return bits; | 124 return bits; |
157 } | 125 } |
158 | 126 |
159 } // namespace brotli | 127 #undef HistogramType |
160 | |
161 #endif // BROTLI_ENC_BIT_COST_H_ | |
OLD | NEW |