| OLD | NEW |
| (Empty) | |
| 1 /* Copyright 2013 Google Inc. All Rights Reserved. |
| 2 |
| 3 Distributed under MIT license. |
| 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
| 5 */ |
| 6 |
| 7 // Functions to estimate the bit cost of Huffman trees. |
| 8 |
| 9 #ifndef BROTLI_ENC_BIT_COST_H_ |
| 10 #define BROTLI_ENC_BIT_COST_H_ |
| 11 |
| 12 #include "./entropy_encode.h" |
| 13 #include "./fast_log.h" |
| 14 #include "./types.h" |
| 15 |
| 16 namespace brotli { |
| 17 |
| 18 static inline double ShannonEntropy(const uint32_t *population, size_t size, |
| 19 size_t *total) { |
| 20 size_t sum = 0; |
| 21 double retval = 0; |
| 22 const uint32_t *population_end = population + size; |
| 23 size_t p; |
| 24 if (size & 1) { |
| 25 goto odd_number_of_elements_left; |
| 26 } |
| 27 while (population < population_end) { |
| 28 p = *population++; |
| 29 sum += p; |
| 30 retval -= static_cast<double>(p) * FastLog2(p); |
| 31 odd_number_of_elements_left: |
| 32 p = *population++; |
| 33 sum += p; |
| 34 retval -= static_cast<double>(p) * FastLog2(p); |
| 35 } |
| 36 if (sum) retval += static_cast<double>(sum) * FastLog2(sum); |
| 37 *total = sum; |
| 38 return retval; |
| 39 } |
| 40 |
| 41 static inline double BitsEntropy(const uint32_t *population, size_t size) { |
| 42 size_t sum; |
| 43 double retval = ShannonEntropy(population, size, &sum); |
| 44 if (retval < sum) { |
| 45 // At least one bit per literal is needed. |
| 46 retval = static_cast<double>(sum); |
| 47 } |
| 48 return retval; |
| 49 } |
| 50 |
| 51 template<int kSize> |
| 52 double PopulationCost(const Histogram<kSize>& histogram) { |
| 53 static const double kOneSymbolHistogramCost = 12; |
| 54 static const double kTwoSymbolHistogramCost = 20; |
| 55 static const double kThreeSymbolHistogramCost = 28; |
| 56 static const double kFourSymbolHistogramCost = 37; |
| 57 if (histogram.total_count_ == 0) { |
| 58 return kOneSymbolHistogramCost; |
| 59 } |
| 60 int count = 0; |
| 61 int s[5]; |
| 62 for (int i = 0; i < kSize; ++i) { |
| 63 if (histogram.data_[i] > 0) { |
| 64 s[count] = i; |
| 65 ++count; |
| 66 if (count > 4) break; |
| 67 } |
| 68 } |
| 69 if (count == 1) { |
| 70 return kOneSymbolHistogramCost; |
| 71 } |
| 72 if (count == 2) { |
| 73 return (kTwoSymbolHistogramCost + |
| 74 static_cast<double>(histogram.total_count_)); |
| 75 } |
| 76 if (count == 3) { |
| 77 const uint32_t histo0 = histogram.data_[s[0]]; |
| 78 const uint32_t histo1 = histogram.data_[s[1]]; |
| 79 const uint32_t histo2 = histogram.data_[s[2]]; |
| 80 const uint32_t histomax = std::max(histo0, std::max(histo1, histo2)); |
| 81 return (kThreeSymbolHistogramCost + |
| 82 2 * (histo0 + histo1 + histo2) - histomax); |
| 83 } |
| 84 if (count == 4) { |
| 85 uint32_t histo[4]; |
| 86 for (int i = 0; i < 4; ++i) { |
| 87 histo[i] = histogram.data_[s[i]]; |
| 88 } |
| 89 // Sort |
| 90 for (int i = 0; i < 4; ++i) { |
| 91 for (int j = i + 1; j < 4; ++j) { |
| 92 if (histo[j] > histo[i]) { |
| 93 std::swap(histo[j], histo[i]); |
| 94 } |
| 95 } |
| 96 } |
| 97 const uint32_t h23 = histo[2] + histo[3]; |
| 98 const uint32_t histomax = std::max(h23, histo[0]); |
| 99 return (kFourSymbolHistogramCost + |
| 100 3 * h23 + 2 * (histo[0] + histo[1]) - histomax); |
| 101 } |
| 102 |
| 103 // In this loop we compute the entropy of the histogram and simultaneously |
| 104 // build a simplified histogram of the code length codes where we use the |
| 105 // zero repeat code 17, but we don't use the non-zero repeat code 16. |
| 106 double bits = 0; |
| 107 size_t max_depth = 1; |
| 108 uint32_t depth_histo[kCodeLengthCodes] = { 0 }; |
| 109 const double log2total = FastLog2(histogram.total_count_); |
| 110 for (size_t i = 0; i < kSize;) { |
| 111 if (histogram.data_[i] > 0) { |
| 112 // Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = |
| 113 // = log2(total_count) - log2(count(symbol)) |
| 114 double log2p = log2total - FastLog2(histogram.data_[i]); |
| 115 // Approximate the bit depth by round(-log2(P(symbol))) |
| 116 size_t depth = static_cast<size_t>(log2p + 0.5); |
| 117 bits += histogram.data_[i] * log2p; |
| 118 if (depth > 15) { |
| 119 depth = 15; |
| 120 } |
| 121 if (depth > max_depth) { |
| 122 max_depth = depth; |
| 123 } |
| 124 ++depth_histo[depth]; |
| 125 ++i; |
| 126 } else { |
| 127 // Compute the run length of zeros and add the appropriate number of 0 and |
| 128 // 17 code length codes to the code length code histogram. |
| 129 uint32_t reps = 1; |
| 130 for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) { |
| 131 ++reps; |
| 132 } |
| 133 i += reps; |
| 134 if (i == kSize) { |
| 135 // Don't add any cost for the last zero run, since these are encoded |
| 136 // only implicitly. |
| 137 break; |
| 138 } |
| 139 if (reps < 3) { |
| 140 depth_histo[0] += reps; |
| 141 } else { |
| 142 reps -= 2; |
| 143 while (reps > 0) { |
| 144 ++depth_histo[17]; |
| 145 // Add the 3 extra bits for the 17 code length code. |
| 146 bits += 3; |
| 147 reps >>= 3; |
| 148 } |
| 149 } |
| 150 } |
| 151 } |
| 152 // Add the estimated encoding cost of the code length code histogram. |
| 153 bits += static_cast<double>(18 + 2 * max_depth); |
| 154 // Add the entropy of the code length code histogram. |
| 155 bits += BitsEntropy(depth_histo, kCodeLengthCodes); |
| 156 return bits; |
| 157 } |
| 158 |
| 159 } // namespace brotli |
| 160 |
| 161 #endif // BROTLI_ENC_BIT_COST_H_ |
| OLD | NEW |