OLD | NEW |
1 /* Copyright 2013 Google Inc. All Rights Reserved. | 1 /* Copyright 2013 Google Inc. All Rights Reserved. |
2 | 2 |
3 Distributed under MIT license. | 3 Distributed under MIT license. |
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
5 */ | 5 */ |
6 | 6 |
7 // Functions to estimate the bit cost of Huffman trees. | 7 /* Functions to estimate the bit cost of Huffman trees. */ |
8 | 8 |
9 #ifndef BROTLI_ENC_BIT_COST_H_ | 9 #ifndef BROTLI_ENC_BIT_COST_H_ |
10 #define BROTLI_ENC_BIT_COST_H_ | 10 #define BROTLI_ENC_BIT_COST_H_ |
11 | 11 |
12 #include "./entropy_encode.h" | 12 #include <brotli/types.h> |
13 #include "./fast_log.h" | 13 #include "./fast_log.h" |
14 #include "./types.h" | 14 #include "./histogram.h" |
| 15 #include "./port.h" |
15 | 16 |
16 namespace brotli { | 17 #if defined(__cplusplus) || defined(c_plusplus) |
| 18 extern "C" { |
| 19 #endif |
17 | 20 |
18 static inline double ShannonEntropy(const uint32_t *population, size_t size, | 21 static BROTLI_INLINE double ShannonEntropy(const uint32_t *population, |
19 size_t *total) { | 22 size_t size, size_t *total) { |
20 size_t sum = 0; | 23 size_t sum = 0; |
21 double retval = 0; | 24 double retval = 0; |
22 const uint32_t *population_end = population + size; | 25 const uint32_t *population_end = population + size; |
23 size_t p; | 26 size_t p; |
24 if (size & 1) { | 27 if (size & 1) { |
25 goto odd_number_of_elements_left; | 28 goto odd_number_of_elements_left; |
26 } | 29 } |
27 while (population < population_end) { | 30 while (population < population_end) { |
28 p = *population++; | 31 p = *population++; |
29 sum += p; | 32 sum += p; |
30 retval -= static_cast<double>(p) * FastLog2(p); | 33 retval -= (double)p * FastLog2(p); |
31 odd_number_of_elements_left: | 34 odd_number_of_elements_left: |
32 p = *population++; | 35 p = *population++; |
33 sum += p; | 36 sum += p; |
34 retval -= static_cast<double>(p) * FastLog2(p); | 37 retval -= (double)p * FastLog2(p); |
35 } | 38 } |
36 if (sum) retval += static_cast<double>(sum) * FastLog2(sum); | 39 if (sum) retval += (double)sum * FastLog2(sum); |
37 *total = sum; | 40 *total = sum; |
38 return retval; | 41 return retval; |
39 } | 42 } |
40 | 43 |
41 static inline double BitsEntropy(const uint32_t *population, size_t size) { | 44 static BROTLI_INLINE double BitsEntropy( |
| 45 const uint32_t *population, size_t size) { |
42 size_t sum; | 46 size_t sum; |
43 double retval = ShannonEntropy(population, size, &sum); | 47 double retval = ShannonEntropy(population, size, &sum); |
44 if (retval < sum) { | 48 if (retval < sum) { |
45 // At least one bit per literal is needed. | 49 /* At least one bit per literal is needed. */ |
46 retval = static_cast<double>(sum); | 50 retval = (double)sum; |
47 } | 51 } |
48 return retval; | 52 return retval; |
49 } | 53 } |
50 | 54 |
51 template<int kSize> | 55 BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*); |
52 double PopulationCost(const Histogram<kSize>& histogram) { | 56 BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*); |
53 static const double kOneSymbolHistogramCost = 12; | 57 BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*); |
54 static const double kTwoSymbolHistogramCost = 20; | |
55 static const double kThreeSymbolHistogramCost = 28; | |
56 static const double kFourSymbolHistogramCost = 37; | |
57 if (histogram.total_count_ == 0) { | |
58 return kOneSymbolHistogramCost; | |
59 } | |
60 int count = 0; | |
61 int s[5]; | |
62 for (int i = 0; i < kSize; ++i) { | |
63 if (histogram.data_[i] > 0) { | |
64 s[count] = i; | |
65 ++count; | |
66 if (count > 4) break; | |
67 } | |
68 } | |
69 if (count == 1) { | |
70 return kOneSymbolHistogramCost; | |
71 } | |
72 if (count == 2) { | |
73 return (kTwoSymbolHistogramCost + | |
74 static_cast<double>(histogram.total_count_)); | |
75 } | |
76 if (count == 3) { | |
77 const uint32_t histo0 = histogram.data_[s[0]]; | |
78 const uint32_t histo1 = histogram.data_[s[1]]; | |
79 const uint32_t histo2 = histogram.data_[s[2]]; | |
80 const uint32_t histomax = std::max(histo0, std::max(histo1, histo2)); | |
81 return (kThreeSymbolHistogramCost + | |
82 2 * (histo0 + histo1 + histo2) - histomax); | |
83 } | |
84 if (count == 4) { | |
85 uint32_t histo[4]; | |
86 for (int i = 0; i < 4; ++i) { | |
87 histo[i] = histogram.data_[s[i]]; | |
88 } | |
89 // Sort | |
90 for (int i = 0; i < 4; ++i) { | |
91 for (int j = i + 1; j < 4; ++j) { | |
92 if (histo[j] > histo[i]) { | |
93 std::swap(histo[j], histo[i]); | |
94 } | |
95 } | |
96 } | |
97 const uint32_t h23 = histo[2] + histo[3]; | |
98 const uint32_t histomax = std::max(h23, histo[0]); | |
99 return (kFourSymbolHistogramCost + | |
100 3 * h23 + 2 * (histo[0] + histo[1]) - histomax); | |
101 } | |
102 | 58 |
103 // In this loop we compute the entropy of the histogram and simultaneously | 59 #if defined(__cplusplus) || defined(c_plusplus) |
104 // build a simplified histogram of the code length codes where we use the | 60 } /* extern "C" */ |
105 // zero repeat code 17, but we don't use the non-zero repeat code 16. | 61 #endif |
106 double bits = 0; | |
107 size_t max_depth = 1; | |
108 uint32_t depth_histo[kCodeLengthCodes] = { 0 }; | |
109 const double log2total = FastLog2(histogram.total_count_); | |
110 for (size_t i = 0; i < kSize;) { | |
111 if (histogram.data_[i] > 0) { | |
112 // Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = | |
113 // = log2(total_count) - log2(count(symbol)) | |
114 double log2p = log2total - FastLog2(histogram.data_[i]); | |
115 // Approximate the bit depth by round(-log2(P(symbol))) | |
116 size_t depth = static_cast<size_t>(log2p + 0.5); | |
117 bits += histogram.data_[i] * log2p; | |
118 if (depth > 15) { | |
119 depth = 15; | |
120 } | |
121 if (depth > max_depth) { | |
122 max_depth = depth; | |
123 } | |
124 ++depth_histo[depth]; | |
125 ++i; | |
126 } else { | |
127 // Compute the run length of zeros and add the appropriate number of 0 and | |
128 // 17 code length codes to the code length code histogram. | |
129 uint32_t reps = 1; | |
130 for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) { | |
131 ++reps; | |
132 } | |
133 i += reps; | |
134 if (i == kSize) { | |
135 // Don't add any cost for the last zero run, since these are encoded | |
136 // only implicitly. | |
137 break; | |
138 } | |
139 if (reps < 3) { | |
140 depth_histo[0] += reps; | |
141 } else { | |
142 reps -= 2; | |
143 while (reps > 0) { | |
144 ++depth_histo[17]; | |
145 // Add the 3 extra bits for the 17 code length code. | |
146 bits += 3; | |
147 reps >>= 3; | |
148 } | |
149 } | |
150 } | |
151 } | |
152 // Add the estimated encoding cost of the code length code histogram. | |
153 bits += static_cast<double>(18 + 2 * max_depth); | |
154 // Add the entropy of the code length code histogram. | |
155 bits += BitsEntropy(depth_histo, kCodeLengthCodes); | |
156 return bits; | |
157 } | |
158 | 62 |
159 } // namespace brotli | 63 #endif /* BROTLI_ENC_BIT_COST_H_ */ |
160 | |
161 #endif // BROTLI_ENC_BIT_COST_H_ | |
OLD | NEW |