Index: third_party/brotli/enc/backward_references.cc |
diff --git a/third_party/brotli/enc/backward_references.cc b/third_party/brotli/enc/backward_references.cc |
deleted file mode 100644 |
index 539c1e7623780f948e6cbfdaa97c8804eab2d994..0000000000000000000000000000000000000000 |
--- a/third_party/brotli/enc/backward_references.cc |
+++ /dev/null |
@@ -1,858 +0,0 @@ |
-/* Copyright 2013 Google Inc. All Rights Reserved. |
- |
- Distributed under MIT license. |
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
-*/ |
- |
-// Function to find backward reference copies. |
- |
-#include "./backward_references.h" |
- |
-#include <algorithm> |
-#include <limits> |
-#include <vector> |
- |
-#include "./command.h" |
-#include "./fast_log.h" |
-#include "./literal_cost.h" |
- |
-namespace brotli { |
- |
-// The maximum length for which the zopflification uses distinct distances. |
-static const uint16_t kMaxZopfliLen = 325; |
- |
-// Histogram based cost model for zopflification. |
-class ZopfliCostModel { |
- public: |
- ZopfliCostModel(void) : min_cost_cmd_(kInfinity) {} |
- |
- void SetFromCommands(size_t num_bytes, |
- size_t position, |
- const uint8_t* ringbuffer, |
- size_t ringbuffer_mask, |
- const Command* commands, |
- size_t num_commands, |
- size_t last_insert_len) { |
- std::vector<uint32_t> histogram_literal(256, 0); |
- std::vector<uint32_t> histogram_cmd(kNumCommandPrefixes, 0); |
- std::vector<uint32_t> histogram_dist(kNumDistancePrefixes, 0); |
- |
- size_t pos = position - last_insert_len; |
- for (size_t i = 0; i < num_commands; i++) { |
- size_t inslength = commands[i].insert_len_; |
- size_t copylength = commands[i].copy_len(); |
- size_t distcode = commands[i].dist_prefix_; |
- size_t cmdcode = commands[i].cmd_prefix_; |
- |
- histogram_cmd[cmdcode]++; |
- if (cmdcode >= 128) histogram_dist[distcode]++; |
- |
- for (size_t j = 0; j < inslength; j++) { |
- histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++; |
- } |
- |
- pos += inslength + copylength; |
- } |
- |
- std::vector<float> cost_literal; |
- Set(histogram_literal, &cost_literal); |
- Set(histogram_cmd, &cost_cmd_); |
- Set(histogram_dist, &cost_dist_); |
- |
- for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) { |
- min_cost_cmd_ = std::min(min_cost_cmd_, cost_cmd_[i]); |
- } |
- |
- literal_costs_.resize(num_bytes + 1); |
- literal_costs_[0] = 0.0; |
- for (size_t i = 0; i < num_bytes; ++i) { |
- literal_costs_[i + 1] = literal_costs_[i] + |
- cost_literal[ringbuffer[(position + i) & ringbuffer_mask]]; |
- } |
- } |
- |
- void SetFromLiteralCosts(size_t num_bytes, |
- size_t position, |
- const uint8_t* ringbuffer, |
- size_t ringbuffer_mask) { |
- literal_costs_.resize(num_bytes + 2); |
- EstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask, |
- ringbuffer, &literal_costs_[1]); |
- literal_costs_[0] = 0.0; |
- for (size_t i = 0; i < num_bytes; ++i) { |
- literal_costs_[i + 1] += literal_costs_[i]; |
- } |
- cost_cmd_.resize(kNumCommandPrefixes); |
- cost_dist_.resize(kNumDistancePrefixes); |
- for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) { |
- cost_cmd_[i] = static_cast<float>(FastLog2(11 + i)); |
- } |
- for (uint32_t i = 0; i < kNumDistancePrefixes; ++i) { |
- cost_dist_[i] = static_cast<float>(FastLog2(20 + i)); |
- } |
- min_cost_cmd_ = static_cast<float>(FastLog2(11)); |
- } |
- |
- float GetCommandCost( |
- size_t dist_code, size_t length_code, size_t insert_length) const { |
- uint16_t inscode = GetInsertLengthCode(insert_length); |
- uint16_t copycode = GetCopyLengthCode(length_code); |
- uint16_t cmdcode = CombineLengthCodes(inscode, copycode, dist_code == 0); |
- uint16_t dist_symbol; |
- uint32_t distextra; |
- PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra); |
- uint32_t distnumextra = distextra >> 24; |
- |
- float result = static_cast<float>( |
- GetInsertExtra(inscode) + GetCopyExtra(copycode) + distnumextra); |
- result += cost_cmd_[cmdcode]; |
- if (cmdcode >= 128) result += cost_dist_[dist_symbol]; |
- return result; |
- } |
- |
- float GetLiteralCosts(size_t from, size_t to) const { |
- return literal_costs_[to] - literal_costs_[from]; |
- } |
- |
- float GetMinCostCmd(void) const { |
- return min_cost_cmd_; |
- } |
- |
- private: |
- void Set(const std::vector<uint32_t>& histogram, std::vector<float>* cost) { |
- cost->resize(histogram.size()); |
- size_t sum = 0; |
- for (size_t i = 0; i < histogram.size(); i++) { |
- sum += histogram[i]; |
- } |
- float log2sum = static_cast<float>(FastLog2(sum)); |
- for (size_t i = 0; i < histogram.size(); i++) { |
- if (histogram[i] == 0) { |
- (*cost)[i] = log2sum + 2; |
- continue; |
- } |
- |
- // Shannon bits for this symbol. |
- (*cost)[i] = log2sum - static_cast<float>(FastLog2(histogram[i])); |
- |
- // Cannot be coded with less than 1 bit |
- if ((*cost)[i] < 1) (*cost)[i] = 1; |
- } |
- } |
- |
- std::vector<float> cost_cmd_; // The insert and copy length symbols. |
- std::vector<float> cost_dist_; |
- // Cumulative costs of literals per position in the stream. |
- std::vector<float> literal_costs_; |
- float min_cost_cmd_; |
-}; |
- |
-inline size_t ComputeDistanceCode(size_t distance, |
- size_t max_distance, |
- int quality, |
- const int* dist_cache) { |
- if (distance <= max_distance) { |
- if (distance == static_cast<size_t>(dist_cache[0])) { |
- return 0; |
- } else if (distance == static_cast<size_t>(dist_cache[1])) { |
- return 1; |
- } else if (distance == static_cast<size_t>(dist_cache[2])) { |
- return 2; |
- } else if (distance == static_cast<size_t>(dist_cache[3])) { |
- return 3; |
- } else if (quality > 3 && distance >= 6) { |
- for (size_t k = 4; k < kNumDistanceShortCodes; ++k) { |
- size_t idx = kDistanceCacheIndex[k]; |
- size_t candidate = |
- static_cast<size_t>(dist_cache[idx] + kDistanceCacheOffset[k]); |
- static const size_t kLimits[16] = { 0, 0, 0, 0, |
- 6, 6, 11, 11, |
- 11, 11, 11, 11, |
- 12, 12, 12, 12 }; |
- if (distance == candidate && distance >= kLimits[k]) { |
- return k; |
- } |
- } |
- } |
- } |
- return distance + 15; |
-} |
- |
-// REQUIRES: len >= 2, start_pos <= pos |
-// REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity |
-// Maintains the "ZopfliNode array invariant". |
-inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos, |
- size_t len, size_t len_code, size_t dist, |
- size_t short_code, float cost) { |
- ZopfliNode& next = nodes[pos + len]; |
- next.length = static_cast<uint32_t>(len | ((len + 9u - len_code) << 24)); |
- next.distance = static_cast<uint32_t>(dist | (short_code << 25)); |
- next.insert_length = static_cast<uint32_t>(pos - start_pos); |
- next.cost = cost; |
-} |
- |
-// Maintains the smallest 2^k cost difference together with their positions |
-class StartPosQueue { |
- public: |
- struct PosData { |
- size_t pos; |
- int distance_cache[4]; |
- float costdiff; |
- }; |
- |
- explicit StartPosQueue(int bits) |
- : mask_((1u << bits) - 1), q_(1 << bits), idx_(0) {} |
- |
- void Clear(void) { |
- idx_ = 0; |
- } |
- |
- void Push(const StartPosQueue::PosData& posdata) { |
- size_t offset = ~idx_ & mask_; |
- ++idx_; |
- size_t len = size(); |
- q_[offset] = posdata; |
- /* Restore the sorted order. In the list of |len| items at most |len - 1| |
- adjacent element comparisons / swaps are required. */ |
- for (size_t i = 1; i < len; ++i) { |
- if (q_[offset & mask_].costdiff > q_[(offset + 1) & mask_].costdiff) { |
- std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]); |
- } |
- ++offset; |
- } |
- } |
- |
- size_t size(void) const { return std::min(idx_, mask_ + 1); } |
- |
- const StartPosQueue::PosData& GetStartPosData(size_t k) const { |
- return q_[(k - idx_) & mask_]; |
- } |
- |
- private: |
- const size_t mask_; |
- std::vector<PosData> q_; |
- size_t idx_; |
-}; |
- |
-// Returns the minimum possible copy length that can improve the cost of any |
-// future position. |
-static size_t ComputeMinimumCopyLength(const StartPosQueue& queue, |
- const ZopfliNode* nodes, |
- const ZopfliCostModel& model, |
- const size_t num_bytes, |
- const size_t pos) { |
- // Compute the minimum possible cost of reaching any future position. |
- const size_t start0 = queue.GetStartPosData(0).pos; |
- float min_cost = (nodes[start0].cost + |
- model.GetLiteralCosts(start0, pos) + |
- model.GetMinCostCmd()); |
- size_t len = 2; |
- size_t next_len_bucket = 4; |
- size_t next_len_offset = 10; |
- while (pos + len <= num_bytes && nodes[pos + len].cost <= min_cost) { |
- // We already reached (pos + len) with no more cost than the minimum |
- // possible cost of reaching anything from this pos, so there is no point in |
- // looking for lengths <= len. |
- ++len; |
- if (len == next_len_offset) { |
- // We reached the next copy length code bucket, so we add one more |
- // extra bit to the minimum cost. |
- min_cost += static_cast<float>(1.0); |
- next_len_offset += next_len_bucket; |
- next_len_bucket *= 2; |
- } |
- } |
- return len; |
-} |
- |
-// Fills in dist_cache[0..3] with the last four distances (as defined by |
-// Section 4. of the Spec) that would be used at (block_start + pos) if we |
-// used the shortest path of commands from block_start, computed from |
-// nodes[0..pos]. The last four distances at block_start are in |
-// starting_dist_cach[0..3]. |
-// REQUIRES: nodes[pos].cost < kInfinity |
-// REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". |
-static void ComputeDistanceCache(const size_t block_start, |
- const size_t pos, |
- const size_t max_backward, |
- const int* starting_dist_cache, |
- const ZopfliNode* nodes, |
- int* dist_cache) { |
- int idx = 0; |
- size_t p = pos; |
- // Because of prerequisite, does at most (pos + 1) / 2 iterations. |
- while (idx < 4 && p > 0) { |
- const size_t clen = nodes[p].copy_length(); |
- const size_t ilen = nodes[p].insert_length; |
- const size_t dist = nodes[p].copy_distance(); |
- // Since block_start + p is the end position of the command, the copy part |
- // starts from block_start + p - clen. Distances that are greater than this |
- // or greater than max_backward are static dictionary references, and do |
- // not update the last distances. Also distance code 0 (last distance) |
- // does not update the last distances. |
- if (dist + clen <= block_start + p && dist <= max_backward && |
- nodes[p].distance_code() > 0) { |
- dist_cache[idx++] = static_cast<int>(dist); |
- } |
- // Because of prerequisite, p >= clen + ilen >= 2. |
- p -= clen + ilen; |
- } |
- for (; idx < 4; ++idx) { |
- dist_cache[idx] = *starting_dist_cache++; |
- } |
-} |
- |
-static void UpdateNodes(const size_t num_bytes, |
- const size_t block_start, |
- const size_t pos, |
- const uint8_t* ringbuffer, |
- const size_t ringbuffer_mask, |
- const size_t max_backward_limit, |
- const int* starting_dist_cache, |
- const size_t num_matches, |
- const BackwardMatch* matches, |
- const ZopfliCostModel* model, |
- StartPosQueue* queue, |
- ZopfliNode* nodes) { |
- size_t cur_ix = block_start + pos; |
- size_t cur_ix_masked = cur_ix & ringbuffer_mask; |
- size_t max_distance = std::min(cur_ix, max_backward_limit); |
- |
- if (nodes[pos].cost <= model->GetLiteralCosts(0, pos)) { |
- StartPosQueue::PosData posdata; |
- posdata.pos = pos; |
- posdata.costdiff = nodes[pos].cost - model->GetLiteralCosts(0, pos); |
- ComputeDistanceCache(block_start, pos, max_backward_limit, |
- starting_dist_cache, nodes, posdata.distance_cache); |
- queue->Push(posdata); |
- } |
- |
- const size_t min_len = ComputeMinimumCopyLength( |
- *queue, nodes, *model, num_bytes, pos); |
- |
- // Go over the command starting positions in order of increasing cost |
- // difference. |
- for (size_t k = 0; k < 5 && k < queue->size(); ++k) { |
- const StartPosQueue::PosData& posdata = queue->GetStartPosData(k); |
- const size_t start = posdata.pos; |
- const float start_costdiff = posdata.costdiff; |
- |
- // Look for last distance matches using the distance cache from this |
- // starting position. |
- size_t best_len = min_len - 1; |
- for (size_t j = 0; j < kNumDistanceShortCodes; ++j) { |
- const size_t idx = kDistanceCacheIndex[j]; |
- const size_t backward = static_cast<size_t>(posdata.distance_cache[idx] + |
- kDistanceCacheOffset[j]); |
- size_t prev_ix = cur_ix - backward; |
- if (prev_ix >= cur_ix) { |
- continue; |
- } |
- if (PREDICT_FALSE(backward > max_distance)) { |
- continue; |
- } |
- prev_ix &= ringbuffer_mask; |
- |
- if (cur_ix_masked + best_len > ringbuffer_mask || |
- prev_ix + best_len > ringbuffer_mask || |
- ringbuffer[cur_ix_masked + best_len] != |
- ringbuffer[prev_ix + best_len]) { |
- continue; |
- } |
- const size_t len = |
- FindMatchLengthWithLimit(&ringbuffer[prev_ix], |
- &ringbuffer[cur_ix_masked], |
- num_bytes - pos); |
- for (size_t l = best_len + 1; l <= len; ++l) { |
- const size_t inslen = pos - start; |
- float cmd_cost = model->GetCommandCost(j, l, inslen); |
- float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos); |
- if (cost < nodes[pos + l].cost) { |
- UpdateZopfliNode(&nodes[0], pos, start, l, l, backward, j + 1, cost); |
- } |
- best_len = l; |
- } |
- } |
- |
- // At higher iterations look only for new last distance matches, since |
- // looking only for new command start positions with the same distances |
- // does not help much. |
- if (k >= 2) continue; |
- |
- // Loop through all possible copy lengths at this position. |
- size_t len = min_len; |
- for (size_t j = 0; j < num_matches; ++j) { |
- BackwardMatch match = matches[j]; |
- size_t dist = match.distance; |
- bool is_dictionary_match = dist > max_distance; |
- // We already tried all possible last distance matches, so we can use |
- // normal distance code here. |
- size_t dist_code = dist + 15; |
- // Try all copy lengths up until the maximum copy length corresponding |
- // to this distance. If the distance refers to the static dictionary, or |
- // the maximum length is long enough, try only one maximum length. |
- size_t max_len = match.length(); |
- if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) { |
- len = max_len; |
- } |
- for (; len <= max_len; ++len) { |
- size_t len_code = is_dictionary_match ? match.length_code() : len; |
- const size_t inslen = pos - start; |
- float cmd_cost = model->GetCommandCost(dist_code, len_code, inslen); |
- float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos); |
- if (cost < nodes[pos + len].cost) { |
- UpdateZopfliNode(&nodes[0], pos, start, len, len_code, dist, 0, cost); |
- } |
- } |
- } |
- } |
-} |
- |
-static void ComputeShortestPathFromNodes(size_t num_bytes, |
- const ZopfliNode* nodes, |
- std::vector<uint32_t>* path) { |
- std::vector<uint32_t> backwards(num_bytes / 2 + 1); |
- size_t index = num_bytes; |
- while (nodes[index].cost == kInfinity) --index; |
- size_t num_commands = 0; |
- while (index != 0) { |
- size_t len = nodes[index].command_length(); |
- backwards[num_commands++] = static_cast<uint32_t>(len); |
- index -= len; |
- } |
- path->resize(num_commands); |
- for (size_t i = num_commands, j = 0; i > 0; --i, ++j) { |
- (*path)[j] = backwards[i - 1]; |
- } |
-} |
- |
-void ZopfliCreateCommands(const size_t num_bytes, |
- const size_t block_start, |
- const size_t max_backward_limit, |
- const std::vector<uint32_t>& path, |
- const ZopfliNode* nodes, |
- int* dist_cache, |
- size_t* last_insert_len, |
- Command* commands, |
- size_t* num_literals) { |
- size_t pos = 0; |
- for (size_t i = 0; i < path.size(); i++) { |
- const ZopfliNode& next = nodes[pos + path[i]]; |
- size_t copy_length = next.copy_length(); |
- size_t insert_length = next.insert_length; |
- pos += insert_length; |
- if (i == 0) { |
- insert_length += *last_insert_len; |
- *last_insert_len = 0; |
- } |
- size_t distance = next.copy_distance(); |
- size_t len_code = next.length_code(); |
- size_t max_distance = std::min(block_start + pos, max_backward_limit); |
- bool is_dictionary = (distance > max_distance); |
- size_t dist_code = next.distance_code(); |
- |
- Command cmd(insert_length, copy_length, len_code, dist_code); |
- commands[i] = cmd; |
- |
- if (!is_dictionary && dist_code > 0) { |
- dist_cache[3] = dist_cache[2]; |
- dist_cache[2] = dist_cache[1]; |
- dist_cache[1] = dist_cache[0]; |
- dist_cache[0] = static_cast<int>(distance); |
- } |
- |
- *num_literals += insert_length; |
- pos += copy_length; |
- } |
- *last_insert_len += num_bytes - pos; |
-} |
- |
-static void ZopfliIterate(size_t num_bytes, |
- size_t position, |
- const uint8_t* ringbuffer, |
- size_t ringbuffer_mask, |
- const size_t max_backward_limit, |
- const int* dist_cache, |
- const ZopfliCostModel& model, |
- const std::vector<uint32_t>& num_matches, |
- const std::vector<BackwardMatch>& matches, |
- ZopfliNode* nodes, |
- std::vector<uint32_t>* path) { |
- nodes[0].length = 0; |
- nodes[0].cost = 0; |
- StartPosQueue queue(3); |
- size_t cur_match_pos = 0; |
- for (size_t i = 0; i + 3 < num_bytes; i++) { |
- UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask, |
- max_backward_limit, dist_cache, num_matches[i], |
- &matches[cur_match_pos], &model, &queue, &nodes[0]); |
- cur_match_pos += num_matches[i]; |
- // The zopflification can be too slow in case of very long lengths, so in |
- // such case skip it all, it does not cost a lot of compression ratio. |
- if (num_matches[i] == 1 && |
- matches[cur_match_pos - 1].length() > kMaxZopfliLen) { |
- i += matches[cur_match_pos - 1].length() - 1; |
- queue.Clear(); |
- } |
- } |
- ComputeShortestPathFromNodes(num_bytes, &nodes[0], path); |
-} |
- |
- |
-void ZopfliComputeShortestPath(size_t num_bytes, |
- size_t position, |
- const uint8_t* ringbuffer, |
- size_t ringbuffer_mask, |
- const size_t max_backward_limit, |
- const int* dist_cache, |
- Hashers::H10* hasher, |
- ZopfliNode* nodes, |
- std::vector<uint32_t>* path) { |
- nodes[0].length = 0; |
- nodes[0].cost = 0; |
- ZopfliCostModel* model = new ZopfliCostModel; |
- model->SetFromLiteralCosts(num_bytes, position, |
- ringbuffer, ringbuffer_mask); |
- StartPosQueue queue(3); |
- BackwardMatch matches[Hashers::H10::kMaxNumMatches]; |
- for (size_t i = 0; i + 3 < num_bytes; i++) { |
- const size_t max_distance = std::min(position + i, max_backward_limit); |
- size_t num_matches = hasher->FindAllMatches( |
- ringbuffer, ringbuffer_mask, position + i, num_bytes - i, max_distance, |
- matches); |
- if (num_matches > 0 && |
- matches[num_matches - 1].length() > kMaxZopfliLen) { |
- matches[0] = matches[num_matches - 1]; |
- num_matches = 1; |
- } |
- UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask, |
- max_backward_limit, dist_cache, num_matches, matches, |
- model, &queue, nodes); |
- if (num_matches == 1 && matches[0].length() > kMaxZopfliLen) { |
- for (size_t j = 1; j < matches[0].length() && i + 4 < num_bytes; ++j) { |
- ++i; |
- if (matches[0].length() - j < 64 && |
- num_bytes - i >= kMaxTreeCompLength) { |
- hasher->Store(ringbuffer, ringbuffer_mask, position + i); |
- } |
- } |
- queue.Clear(); |
- } |
- } |
- delete model; |
- ComputeShortestPathFromNodes(num_bytes, nodes, path); |
-} |
- |
-template<typename Hasher> |
-void CreateBackwardReferences(size_t num_bytes, |
- size_t position, |
- bool is_last, |
- const uint8_t* ringbuffer, |
- size_t ringbuffer_mask, |
- const int quality, |
- const int lgwin, |
- Hasher* hasher, |
- int* dist_cache, |
- size_t* last_insert_len, |
- Command* commands, |
- size_t* num_commands, |
- size_t* num_literals) { |
- // Set maximum distance, see section 9.1. of the spec. |
- const size_t max_backward_limit = (1 << lgwin) - 16; |
- |
- // Choose which init method is faster. |
- // memset is about 100 times faster than hasher->InitForData(). |
- const size_t kMaxBytesForPartialHashInit = Hasher::kHashMapSize >> 7; |
- if (position == 0 && is_last && num_bytes <= kMaxBytesForPartialHashInit) { |
- hasher->InitForData(ringbuffer, num_bytes); |
- } else { |
- hasher->Init(); |
- } |
- if (num_bytes >= 3 && position >= 3) { |
- // Prepare the hashes for three last bytes of the last write. |
- // These could not be calculated before, since they require knowledge |
- // of both the previous and the current block. |
- hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask], |
- static_cast<uint32_t>(position - 3)); |
- hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask], |
- static_cast<uint32_t>(position - 2)); |
- hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask], |
- static_cast<uint32_t>(position - 1)); |
- } |
- const Command * const orig_commands = commands; |
- size_t insert_length = *last_insert_len; |
- size_t i = position & ringbuffer_mask; |
- const size_t i_diff = position - i; |
- const size_t i_end = i + num_bytes; |
- |
- // For speed up heuristics for random data. |
- const size_t random_heuristics_window_size = quality < 9 ? 64 : 512; |
- size_t apply_random_heuristics = i + random_heuristics_window_size; |
- |
- // Minimum score to accept a backward reference. |
- const double kMinScore = 4.0; |
- |
- while (i + Hasher::kHashTypeLength - 1 < i_end) { |
- size_t max_length = i_end - i; |
- size_t max_distance = std::min(i + i_diff, max_backward_limit); |
- size_t best_len = 0; |
- size_t best_len_code = 0; |
- size_t best_dist = 0; |
- double best_score = kMinScore; |
- bool match_found = hasher->FindLongestMatch( |
- ringbuffer, ringbuffer_mask, |
- dist_cache, static_cast<uint32_t>(i + i_diff), max_length, max_distance, |
- &best_len, &best_len_code, &best_dist, &best_score); |
- if (match_found) { |
- // Found a match. Let's look for something even better ahead. |
- int delayed_backward_references_in_row = 0; |
- for (;;) { |
- --max_length; |
- size_t best_len_2 = |
- quality < 5 ? std::min(best_len - 1, max_length) : 0; |
- size_t best_len_code_2 = 0; |
- size_t best_dist_2 = 0; |
- double best_score_2 = kMinScore; |
- max_distance = std::min(i + i_diff + 1, max_backward_limit); |
- match_found = hasher->FindLongestMatch( |
- ringbuffer, ringbuffer_mask, |
- dist_cache, static_cast<uint32_t>(i + i_diff + 1), |
- max_length, max_distance, |
- &best_len_2, &best_len_code_2, &best_dist_2, &best_score_2); |
- double cost_diff_lazy = 7.0; |
- if (match_found && best_score_2 >= best_score + cost_diff_lazy) { |
- // Ok, let's just write one byte for now and start a match from the |
- // next byte. |
- ++i; |
- ++insert_length; |
- best_len = best_len_2; |
- best_len_code = best_len_code_2; |
- best_dist = best_dist_2; |
- best_score = best_score_2; |
- if (++delayed_backward_references_in_row < 4) { |
- continue; |
- } |
- } |
- break; |
- } |
- apply_random_heuristics = |
- i + 2 * best_len + random_heuristics_window_size; |
- max_distance = std::min(i + i_diff, max_backward_limit); |
- // The first 16 codes are special shortcodes, and the minimum offset is 1. |
- size_t distance_code = |
- ComputeDistanceCode(best_dist, max_distance, quality, dist_cache); |
- if (best_dist <= max_distance && distance_code > 0) { |
- dist_cache[3] = dist_cache[2]; |
- dist_cache[2] = dist_cache[1]; |
- dist_cache[1] = dist_cache[0]; |
- dist_cache[0] = static_cast<int>(best_dist); |
- } |
- Command cmd(insert_length, best_len, best_len_code, distance_code); |
- *commands++ = cmd; |
- *num_literals += insert_length; |
- insert_length = 0; |
- // Put the hash keys into the table, if there are enough |
- // bytes left. |
- for (size_t j = 2; j < best_len; ++j) { |
- hasher->Store(&ringbuffer[i + j], |
- static_cast<uint32_t>(i + i_diff + j)); |
- } |
- i += best_len; |
- } else { |
- ++insert_length; |
- ++i; |
- // If we have not seen matches for a long time, we can skip some |
- // match lookups. Unsuccessful match lookups are very very expensive |
- // and this kind of a heuristic speeds up compression quite |
- // a lot. |
- if (i > apply_random_heuristics) { |
- // Going through uncompressible data, jump. |
- if (i > apply_random_heuristics + 4 * random_heuristics_window_size) { |
- // It is quite a long time since we saw a copy, so we assume |
- // that this data is not compressible, and store hashes less |
- // often. Hashes of non compressible data are less likely to |
- // turn out to be useful in the future, too, so we store less of |
- // them to not to flood out the hash table of good compressible |
- // data. |
- size_t i_jump = std::min(i + 16, i_end - 4); |
- for (; i < i_jump; i += 4) { |
- hasher->Store(ringbuffer + i, static_cast<uint32_t>(i + i_diff)); |
- insert_length += 4; |
- } |
- } else { |
- size_t i_jump = std::min(i + 8, i_end - 3); |
- for (; i < i_jump; i += 2) { |
- hasher->Store(ringbuffer + i, static_cast<uint32_t>(i + i_diff)); |
- insert_length += 2; |
- } |
- } |
- } |
- } |
- } |
- insert_length += i_end - i; |
- *last_insert_len = insert_length; |
- *num_commands += static_cast<size_t>(commands - orig_commands); |
-} |
- |
-void CreateBackwardReferences(size_t num_bytes, |
- size_t position, |
- bool is_last, |
- const uint8_t* ringbuffer, |
- size_t ringbuffer_mask, |
- const int quality, |
- const int lgwin, |
- Hashers* hashers, |
- int hash_type, |
- int* dist_cache, |
- size_t* last_insert_len, |
- Command* commands, |
- size_t* num_commands, |
- size_t* num_literals) { |
- bool zopflify = quality > 9; |
- if (zopflify) { |
- Hashers::H10* hasher = hashers->hash_h10; |
- hasher->Init(lgwin, position, num_bytes, is_last); |
- hasher->StitchToPreviousBlock(num_bytes, position, |
- ringbuffer, ringbuffer_mask); |
- // Set maximum distance, see section 9.1. of the spec. |
- const size_t max_backward_limit = (1 << lgwin) - 16; |
- if (quality == 10) { |
- std::vector<ZopfliNode> nodes(num_bytes + 1); |
- std::vector<uint32_t> path; |
- ZopfliComputeShortestPath(num_bytes, position, |
- ringbuffer, ringbuffer_mask, |
- max_backward_limit, dist_cache, hasher, |
- &nodes[0], &path); |
- ZopfliCreateCommands(num_bytes, position, max_backward_limit, path, |
- &nodes[0], dist_cache, last_insert_len, commands, |
- num_literals); |
- *num_commands += path.size(); |
- return; |
- } |
- std::vector<uint32_t> num_matches(num_bytes); |
- std::vector<BackwardMatch> matches(4 * num_bytes); |
- size_t cur_match_pos = 0; |
- for (size_t i = 0; i + 3 < num_bytes; ++i) { |
- size_t max_distance = std::min(position + i, max_backward_limit); |
- size_t max_length = num_bytes - i; |
- // Ensure that we have enough free slots. |
- if (matches.size() < cur_match_pos + Hashers::H10::kMaxNumMatches) { |
- matches.resize(cur_match_pos + Hashers::H10::kMaxNumMatches); |
- } |
- size_t num_found_matches = hasher->FindAllMatches( |
- ringbuffer, ringbuffer_mask, position + i, max_length, max_distance, |
- &matches[cur_match_pos]); |
- const size_t cur_match_end = cur_match_pos + num_found_matches; |
- for (size_t j = cur_match_pos; j + 1 < cur_match_end; ++j) { |
- assert(matches[j].length() < matches[j + 1].length()); |
- assert(matches[j].distance > max_distance || |
- matches[j].distance <= matches[j + 1].distance); |
- } |
- num_matches[i] = static_cast<uint32_t>(num_found_matches); |
- if (num_found_matches > 0) { |
- const size_t match_len = matches[cur_match_end - 1].length(); |
- if (match_len > kMaxZopfliLen) { |
- matches[cur_match_pos++] = matches[cur_match_end - 1]; |
- num_matches[i] = 1; |
- for (size_t j = 1; j < match_len; ++j) { |
- ++i; |
- if (match_len - j < 64 && num_bytes - i >= kMaxTreeCompLength) { |
- hasher->Store(ringbuffer, ringbuffer_mask, position + i); |
- } |
- num_matches[i] = 0; |
- } |
- } else { |
- cur_match_pos = cur_match_end; |
- } |
- } |
- } |
- size_t orig_num_literals = *num_literals; |
- size_t orig_last_insert_len = *last_insert_len; |
- int orig_dist_cache[4] = { |
- dist_cache[0], dist_cache[1], dist_cache[2], dist_cache[3] |
- }; |
- size_t orig_num_commands = *num_commands; |
- static const size_t kIterations = 2; |
- for (size_t i = 0; i < kIterations; i++) { |
- ZopfliCostModel model; |
- if (i == 0) { |
- model.SetFromLiteralCosts(num_bytes, position, |
- ringbuffer, ringbuffer_mask); |
- } else { |
- model.SetFromCommands(num_bytes, position, |
- ringbuffer, ringbuffer_mask, |
- commands, *num_commands - orig_num_commands, |
- orig_last_insert_len); |
- } |
- *num_commands = orig_num_commands; |
- *num_literals = orig_num_literals; |
- *last_insert_len = orig_last_insert_len; |
- memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0])); |
- std::vector<ZopfliNode> nodes(num_bytes + 1); |
- std::vector<uint32_t> path; |
- ZopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask, |
- max_backward_limit, dist_cache, model, num_matches, matches, |
- &nodes[0], &path); |
- ZopfliCreateCommands(num_bytes, position, max_backward_limit, path, |
- &nodes[0], dist_cache, last_insert_len, commands, |
- num_literals); |
- *num_commands += path.size(); |
- } |
- return; |
- } |
- |
- switch (hash_type) { |
- case 2: |
- CreateBackwardReferences<Hashers::H2>( |
- num_bytes, position, is_last, ringbuffer, ringbuffer_mask, |
- quality, lgwin, hashers->hash_h2, dist_cache, |
- last_insert_len, commands, num_commands, num_literals); |
- break; |
- case 3: |
- CreateBackwardReferences<Hashers::H3>( |
- num_bytes, position, is_last, ringbuffer, ringbuffer_mask, |
- quality, lgwin, hashers->hash_h3, dist_cache, |
- last_insert_len, commands, num_commands, num_literals); |
- break; |
- case 4: |
- CreateBackwardReferences<Hashers::H4>( |
- num_bytes, position, is_last, ringbuffer, ringbuffer_mask, |
- quality, lgwin, hashers->hash_h4, dist_cache, |
- last_insert_len, commands, num_commands, num_literals); |
- break; |
- case 5: |
- CreateBackwardReferences<Hashers::H5>( |
- num_bytes, position, is_last, ringbuffer, ringbuffer_mask, |
- quality, lgwin, hashers->hash_h5, dist_cache, |
- last_insert_len, commands, num_commands, num_literals); |
- break; |
- case 6: |
- CreateBackwardReferences<Hashers::H6>( |
- num_bytes, position, is_last, ringbuffer, ringbuffer_mask, |
- quality, lgwin, hashers->hash_h6, dist_cache, |
- last_insert_len, commands, num_commands, num_literals); |
- break; |
- case 7: |
- CreateBackwardReferences<Hashers::H7>( |
- num_bytes, position, is_last, ringbuffer, ringbuffer_mask, |
- quality, lgwin, hashers->hash_h7, dist_cache, |
- last_insert_len, commands, num_commands, num_literals); |
- break; |
- case 8: |
- CreateBackwardReferences<Hashers::H8>( |
- num_bytes, position, is_last, ringbuffer, ringbuffer_mask, |
- quality, lgwin, hashers->hash_h8, dist_cache, |
- last_insert_len, commands, num_commands, num_literals); |
- break; |
- case 9: |
- CreateBackwardReferences<Hashers::H9>( |
- num_bytes, position, is_last, ringbuffer, ringbuffer_mask, |
- quality, lgwin, hashers->hash_h9, dist_cache, |
- last_insert_len, commands, num_commands, num_literals); |
- break; |
- default: |
- break; |
- } |
-} |
- |
-} // namespace brotli |