Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1496)

Side by Side Diff: third_party/brotli/enc/backward_references.cc

Issue 2537133002: Update brotli to v1.0.0-snapshot. (Closed)
Patch Set: Fixed typo Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /* Copyright 2013 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 // Function to find backward reference copies.
8
9 #include "./backward_references.h"
10
11 #include <algorithm>
12 #include <limits>
13 #include <vector>
14
15 #include "./command.h"
16 #include "./fast_log.h"
17 #include "./literal_cost.h"
18
19 namespace brotli {
20
21 // The maximum length for which the zopflification uses distinct distances.
22 static const uint16_t kMaxZopfliLen = 325;
23
24 // Histogram based cost model for zopflification.
25 class ZopfliCostModel {
26 public:
27 ZopfliCostModel(void) : min_cost_cmd_(kInfinity) {}
28
29 void SetFromCommands(size_t num_bytes,
30 size_t position,
31 const uint8_t* ringbuffer,
32 size_t ringbuffer_mask,
33 const Command* commands,
34 size_t num_commands,
35 size_t last_insert_len) {
36 std::vector<uint32_t> histogram_literal(256, 0);
37 std::vector<uint32_t> histogram_cmd(kNumCommandPrefixes, 0);
38 std::vector<uint32_t> histogram_dist(kNumDistancePrefixes, 0);
39
40 size_t pos = position - last_insert_len;
41 for (size_t i = 0; i < num_commands; i++) {
42 size_t inslength = commands[i].insert_len_;
43 size_t copylength = commands[i].copy_len();
44 size_t distcode = commands[i].dist_prefix_;
45 size_t cmdcode = commands[i].cmd_prefix_;
46
47 histogram_cmd[cmdcode]++;
48 if (cmdcode >= 128) histogram_dist[distcode]++;
49
50 for (size_t j = 0; j < inslength; j++) {
51 histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
52 }
53
54 pos += inslength + copylength;
55 }
56
57 std::vector<float> cost_literal;
58 Set(histogram_literal, &cost_literal);
59 Set(histogram_cmd, &cost_cmd_);
60 Set(histogram_dist, &cost_dist_);
61
62 for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
63 min_cost_cmd_ = std::min(min_cost_cmd_, cost_cmd_[i]);
64 }
65
66 literal_costs_.resize(num_bytes + 1);
67 literal_costs_[0] = 0.0;
68 for (size_t i = 0; i < num_bytes; ++i) {
69 literal_costs_[i + 1] = literal_costs_[i] +
70 cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
71 }
72 }
73
74 void SetFromLiteralCosts(size_t num_bytes,
75 size_t position,
76 const uint8_t* ringbuffer,
77 size_t ringbuffer_mask) {
78 literal_costs_.resize(num_bytes + 2);
79 EstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
80 ringbuffer, &literal_costs_[1]);
81 literal_costs_[0] = 0.0;
82 for (size_t i = 0; i < num_bytes; ++i) {
83 literal_costs_[i + 1] += literal_costs_[i];
84 }
85 cost_cmd_.resize(kNumCommandPrefixes);
86 cost_dist_.resize(kNumDistancePrefixes);
87 for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
88 cost_cmd_[i] = static_cast<float>(FastLog2(11 + i));
89 }
90 for (uint32_t i = 0; i < kNumDistancePrefixes; ++i) {
91 cost_dist_[i] = static_cast<float>(FastLog2(20 + i));
92 }
93 min_cost_cmd_ = static_cast<float>(FastLog2(11));
94 }
95
96 float GetCommandCost(
97 size_t dist_code, size_t length_code, size_t insert_length) const {
98 uint16_t inscode = GetInsertLengthCode(insert_length);
99 uint16_t copycode = GetCopyLengthCode(length_code);
100 uint16_t cmdcode = CombineLengthCodes(inscode, copycode, dist_code == 0);
101 uint16_t dist_symbol;
102 uint32_t distextra;
103 PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
104 uint32_t distnumextra = distextra >> 24;
105
106 float result = static_cast<float>(
107 GetInsertExtra(inscode) + GetCopyExtra(copycode) + distnumextra);
108 result += cost_cmd_[cmdcode];
109 if (cmdcode >= 128) result += cost_dist_[dist_symbol];
110 return result;
111 }
112
113 float GetLiteralCosts(size_t from, size_t to) const {
114 return literal_costs_[to] - literal_costs_[from];
115 }
116
117 float GetMinCostCmd(void) const {
118 return min_cost_cmd_;
119 }
120
121 private:
122 void Set(const std::vector<uint32_t>& histogram, std::vector<float>* cost) {
123 cost->resize(histogram.size());
124 size_t sum = 0;
125 for (size_t i = 0; i < histogram.size(); i++) {
126 sum += histogram[i];
127 }
128 float log2sum = static_cast<float>(FastLog2(sum));
129 for (size_t i = 0; i < histogram.size(); i++) {
130 if (histogram[i] == 0) {
131 (*cost)[i] = log2sum + 2;
132 continue;
133 }
134
135 // Shannon bits for this symbol.
136 (*cost)[i] = log2sum - static_cast<float>(FastLog2(histogram[i]));
137
138 // Cannot be coded with less than 1 bit
139 if ((*cost)[i] < 1) (*cost)[i] = 1;
140 }
141 }
142
143 std::vector<float> cost_cmd_; // The insert and copy length symbols.
144 std::vector<float> cost_dist_;
145 // Cumulative costs of literals per position in the stream.
146 std::vector<float> literal_costs_;
147 float min_cost_cmd_;
148 };
149
150 inline size_t ComputeDistanceCode(size_t distance,
151 size_t max_distance,
152 int quality,
153 const int* dist_cache) {
154 if (distance <= max_distance) {
155 if (distance == static_cast<size_t>(dist_cache[0])) {
156 return 0;
157 } else if (distance == static_cast<size_t>(dist_cache[1])) {
158 return 1;
159 } else if (distance == static_cast<size_t>(dist_cache[2])) {
160 return 2;
161 } else if (distance == static_cast<size_t>(dist_cache[3])) {
162 return 3;
163 } else if (quality > 3 && distance >= 6) {
164 for (size_t k = 4; k < kNumDistanceShortCodes; ++k) {
165 size_t idx = kDistanceCacheIndex[k];
166 size_t candidate =
167 static_cast<size_t>(dist_cache[idx] + kDistanceCacheOffset[k]);
168 static const size_t kLimits[16] = { 0, 0, 0, 0,
169 6, 6, 11, 11,
170 11, 11, 11, 11,
171 12, 12, 12, 12 };
172 if (distance == candidate && distance >= kLimits[k]) {
173 return k;
174 }
175 }
176 }
177 }
178 return distance + 15;
179 }
180
181 // REQUIRES: len >= 2, start_pos <= pos
182 // REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity
183 // Maintains the "ZopfliNode array invariant".
184 inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
185 size_t len, size_t len_code, size_t dist,
186 size_t short_code, float cost) {
187 ZopfliNode& next = nodes[pos + len];
188 next.length = static_cast<uint32_t>(len | ((len + 9u - len_code) << 24));
189 next.distance = static_cast<uint32_t>(dist | (short_code << 25));
190 next.insert_length = static_cast<uint32_t>(pos - start_pos);
191 next.cost = cost;
192 }
193
194 // Maintains the smallest 2^k cost difference together with their positions
195 class StartPosQueue {
196 public:
197 struct PosData {
198 size_t pos;
199 int distance_cache[4];
200 float costdiff;
201 };
202
203 explicit StartPosQueue(int bits)
204 : mask_((1u << bits) - 1), q_(1 << bits), idx_(0) {}
205
206 void Clear(void) {
207 idx_ = 0;
208 }
209
210 void Push(const StartPosQueue::PosData& posdata) {
211 size_t offset = ~idx_ & mask_;
212 ++idx_;
213 size_t len = size();
214 q_[offset] = posdata;
215 /* Restore the sorted order. In the list of |len| items at most |len - 1|
216 adjacent element comparisons / swaps are required. */
217 for (size_t i = 1; i < len; ++i) {
218 if (q_[offset & mask_].costdiff > q_[(offset + 1) & mask_].costdiff) {
219 std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]);
220 }
221 ++offset;
222 }
223 }
224
225 size_t size(void) const { return std::min(idx_, mask_ + 1); }
226
227 const StartPosQueue::PosData& GetStartPosData(size_t k) const {
228 return q_[(k - idx_) & mask_];
229 }
230
231 private:
232 const size_t mask_;
233 std::vector<PosData> q_;
234 size_t idx_;
235 };
236
237 // Returns the minimum possible copy length that can improve the cost of any
238 // future position.
239 static size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
240 const ZopfliNode* nodes,
241 const ZopfliCostModel& model,
242 const size_t num_bytes,
243 const size_t pos) {
244 // Compute the minimum possible cost of reaching any future position.
245 const size_t start0 = queue.GetStartPosData(0).pos;
246 float min_cost = (nodes[start0].cost +
247 model.GetLiteralCosts(start0, pos) +
248 model.GetMinCostCmd());
249 size_t len = 2;
250 size_t next_len_bucket = 4;
251 size_t next_len_offset = 10;
252 while (pos + len <= num_bytes && nodes[pos + len].cost <= min_cost) {
253 // We already reached (pos + len) with no more cost than the minimum
254 // possible cost of reaching anything from this pos, so there is no point in
255 // looking for lengths <= len.
256 ++len;
257 if (len == next_len_offset) {
258 // We reached the next copy length code bucket, so we add one more
259 // extra bit to the minimum cost.
260 min_cost += static_cast<float>(1.0);
261 next_len_offset += next_len_bucket;
262 next_len_bucket *= 2;
263 }
264 }
265 return len;
266 }
267
268 // Fills in dist_cache[0..3] with the last four distances (as defined by
269 // Section 4. of the Spec) that would be used at (block_start + pos) if we
270 // used the shortest path of commands from block_start, computed from
271 // nodes[0..pos]. The last four distances at block_start are in
272 // starting_dist_cach[0..3].
273 // REQUIRES: nodes[pos].cost < kInfinity
274 // REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant".
275 static void ComputeDistanceCache(const size_t block_start,
276 const size_t pos,
277 const size_t max_backward,
278 const int* starting_dist_cache,
279 const ZopfliNode* nodes,
280 int* dist_cache) {
281 int idx = 0;
282 size_t p = pos;
283 // Because of prerequisite, does at most (pos + 1) / 2 iterations.
284 while (idx < 4 && p > 0) {
285 const size_t clen = nodes[p].copy_length();
286 const size_t ilen = nodes[p].insert_length;
287 const size_t dist = nodes[p].copy_distance();
288 // Since block_start + p is the end position of the command, the copy part
289 // starts from block_start + p - clen. Distances that are greater than this
290 // or greater than max_backward are static dictionary references, and do
291 // not update the last distances. Also distance code 0 (last distance)
292 // does not update the last distances.
293 if (dist + clen <= block_start + p && dist <= max_backward &&
294 nodes[p].distance_code() > 0) {
295 dist_cache[idx++] = static_cast<int>(dist);
296 }
297 // Because of prerequisite, p >= clen + ilen >= 2.
298 p -= clen + ilen;
299 }
300 for (; idx < 4; ++idx) {
301 dist_cache[idx] = *starting_dist_cache++;
302 }
303 }
304
305 static void UpdateNodes(const size_t num_bytes,
306 const size_t block_start,
307 const size_t pos,
308 const uint8_t* ringbuffer,
309 const size_t ringbuffer_mask,
310 const size_t max_backward_limit,
311 const int* starting_dist_cache,
312 const size_t num_matches,
313 const BackwardMatch* matches,
314 const ZopfliCostModel* model,
315 StartPosQueue* queue,
316 ZopfliNode* nodes) {
317 size_t cur_ix = block_start + pos;
318 size_t cur_ix_masked = cur_ix & ringbuffer_mask;
319 size_t max_distance = std::min(cur_ix, max_backward_limit);
320
321 if (nodes[pos].cost <= model->GetLiteralCosts(0, pos)) {
322 StartPosQueue::PosData posdata;
323 posdata.pos = pos;
324 posdata.costdiff = nodes[pos].cost - model->GetLiteralCosts(0, pos);
325 ComputeDistanceCache(block_start, pos, max_backward_limit,
326 starting_dist_cache, nodes, posdata.distance_cache);
327 queue->Push(posdata);
328 }
329
330 const size_t min_len = ComputeMinimumCopyLength(
331 *queue, nodes, *model, num_bytes, pos);
332
333 // Go over the command starting positions in order of increasing cost
334 // difference.
335 for (size_t k = 0; k < 5 && k < queue->size(); ++k) {
336 const StartPosQueue::PosData& posdata = queue->GetStartPosData(k);
337 const size_t start = posdata.pos;
338 const float start_costdiff = posdata.costdiff;
339
340 // Look for last distance matches using the distance cache from this
341 // starting position.
342 size_t best_len = min_len - 1;
343 for (size_t j = 0; j < kNumDistanceShortCodes; ++j) {
344 const size_t idx = kDistanceCacheIndex[j];
345 const size_t backward = static_cast<size_t>(posdata.distance_cache[idx] +
346 kDistanceCacheOffset[j]);
347 size_t prev_ix = cur_ix - backward;
348 if (prev_ix >= cur_ix) {
349 continue;
350 }
351 if (PREDICT_FALSE(backward > max_distance)) {
352 continue;
353 }
354 prev_ix &= ringbuffer_mask;
355
356 if (cur_ix_masked + best_len > ringbuffer_mask ||
357 prev_ix + best_len > ringbuffer_mask ||
358 ringbuffer[cur_ix_masked + best_len] !=
359 ringbuffer[prev_ix + best_len]) {
360 continue;
361 }
362 const size_t len =
363 FindMatchLengthWithLimit(&ringbuffer[prev_ix],
364 &ringbuffer[cur_ix_masked],
365 num_bytes - pos);
366 for (size_t l = best_len + 1; l <= len; ++l) {
367 const size_t inslen = pos - start;
368 float cmd_cost = model->GetCommandCost(j, l, inslen);
369 float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
370 if (cost < nodes[pos + l].cost) {
371 UpdateZopfliNode(&nodes[0], pos, start, l, l, backward, j + 1, cost);
372 }
373 best_len = l;
374 }
375 }
376
377 // At higher iterations look only for new last distance matches, since
378 // looking only for new command start positions with the same distances
379 // does not help much.
380 if (k >= 2) continue;
381
382 // Loop through all possible copy lengths at this position.
383 size_t len = min_len;
384 for (size_t j = 0; j < num_matches; ++j) {
385 BackwardMatch match = matches[j];
386 size_t dist = match.distance;
387 bool is_dictionary_match = dist > max_distance;
388 // We already tried all possible last distance matches, so we can use
389 // normal distance code here.
390 size_t dist_code = dist + 15;
391 // Try all copy lengths up until the maximum copy length corresponding
392 // to this distance. If the distance refers to the static dictionary, or
393 // the maximum length is long enough, try only one maximum length.
394 size_t max_len = match.length();
395 if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) {
396 len = max_len;
397 }
398 for (; len <= max_len; ++len) {
399 size_t len_code = is_dictionary_match ? match.length_code() : len;
400 const size_t inslen = pos - start;
401 float cmd_cost = model->GetCommandCost(dist_code, len_code, inslen);
402 float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
403 if (cost < nodes[pos + len].cost) {
404 UpdateZopfliNode(&nodes[0], pos, start, len, len_code, dist, 0, cost);
405 }
406 }
407 }
408 }
409 }
410
411 static void ComputeShortestPathFromNodes(size_t num_bytes,
412 const ZopfliNode* nodes,
413 std::vector<uint32_t>* path) {
414 std::vector<uint32_t> backwards(num_bytes / 2 + 1);
415 size_t index = num_bytes;
416 while (nodes[index].cost == kInfinity) --index;
417 size_t num_commands = 0;
418 while (index != 0) {
419 size_t len = nodes[index].command_length();
420 backwards[num_commands++] = static_cast<uint32_t>(len);
421 index -= len;
422 }
423 path->resize(num_commands);
424 for (size_t i = num_commands, j = 0; i > 0; --i, ++j) {
425 (*path)[j] = backwards[i - 1];
426 }
427 }
428
429 void ZopfliCreateCommands(const size_t num_bytes,
430 const size_t block_start,
431 const size_t max_backward_limit,
432 const std::vector<uint32_t>& path,
433 const ZopfliNode* nodes,
434 int* dist_cache,
435 size_t* last_insert_len,
436 Command* commands,
437 size_t* num_literals) {
438 size_t pos = 0;
439 for (size_t i = 0; i < path.size(); i++) {
440 const ZopfliNode& next = nodes[pos + path[i]];
441 size_t copy_length = next.copy_length();
442 size_t insert_length = next.insert_length;
443 pos += insert_length;
444 if (i == 0) {
445 insert_length += *last_insert_len;
446 *last_insert_len = 0;
447 }
448 size_t distance = next.copy_distance();
449 size_t len_code = next.length_code();
450 size_t max_distance = std::min(block_start + pos, max_backward_limit);
451 bool is_dictionary = (distance > max_distance);
452 size_t dist_code = next.distance_code();
453
454 Command cmd(insert_length, copy_length, len_code, dist_code);
455 commands[i] = cmd;
456
457 if (!is_dictionary && dist_code > 0) {
458 dist_cache[3] = dist_cache[2];
459 dist_cache[2] = dist_cache[1];
460 dist_cache[1] = dist_cache[0];
461 dist_cache[0] = static_cast<int>(distance);
462 }
463
464 *num_literals += insert_length;
465 pos += copy_length;
466 }
467 *last_insert_len += num_bytes - pos;
468 }
469
470 static void ZopfliIterate(size_t num_bytes,
471 size_t position,
472 const uint8_t* ringbuffer,
473 size_t ringbuffer_mask,
474 const size_t max_backward_limit,
475 const int* dist_cache,
476 const ZopfliCostModel& model,
477 const std::vector<uint32_t>& num_matches,
478 const std::vector<BackwardMatch>& matches,
479 ZopfliNode* nodes,
480 std::vector<uint32_t>* path) {
481 nodes[0].length = 0;
482 nodes[0].cost = 0;
483 StartPosQueue queue(3);
484 size_t cur_match_pos = 0;
485 for (size_t i = 0; i + 3 < num_bytes; i++) {
486 UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
487 max_backward_limit, dist_cache, num_matches[i],
488 &matches[cur_match_pos], &model, &queue, &nodes[0]);
489 cur_match_pos += num_matches[i];
490 // The zopflification can be too slow in case of very long lengths, so in
491 // such case skip it all, it does not cost a lot of compression ratio.
492 if (num_matches[i] == 1 &&
493 matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
494 i += matches[cur_match_pos - 1].length() - 1;
495 queue.Clear();
496 }
497 }
498 ComputeShortestPathFromNodes(num_bytes, &nodes[0], path);
499 }
500
501
502 void ZopfliComputeShortestPath(size_t num_bytes,
503 size_t position,
504 const uint8_t* ringbuffer,
505 size_t ringbuffer_mask,
506 const size_t max_backward_limit,
507 const int* dist_cache,
508 Hashers::H10* hasher,
509 ZopfliNode* nodes,
510 std::vector<uint32_t>* path) {
511 nodes[0].length = 0;
512 nodes[0].cost = 0;
513 ZopfliCostModel* model = new ZopfliCostModel;
514 model->SetFromLiteralCosts(num_bytes, position,
515 ringbuffer, ringbuffer_mask);
516 StartPosQueue queue(3);
517 BackwardMatch matches[Hashers::H10::kMaxNumMatches];
518 for (size_t i = 0; i + 3 < num_bytes; i++) {
519 const size_t max_distance = std::min(position + i, max_backward_limit);
520 size_t num_matches = hasher->FindAllMatches(
521 ringbuffer, ringbuffer_mask, position + i, num_bytes - i, max_distance,
522 matches);
523 if (num_matches > 0 &&
524 matches[num_matches - 1].length() > kMaxZopfliLen) {
525 matches[0] = matches[num_matches - 1];
526 num_matches = 1;
527 }
528 UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
529 max_backward_limit, dist_cache, num_matches, matches,
530 model, &queue, nodes);
531 if (num_matches == 1 && matches[0].length() > kMaxZopfliLen) {
532 for (size_t j = 1; j < matches[0].length() && i + 4 < num_bytes; ++j) {
533 ++i;
534 if (matches[0].length() - j < 64 &&
535 num_bytes - i >= kMaxTreeCompLength) {
536 hasher->Store(ringbuffer, ringbuffer_mask, position + i);
537 }
538 }
539 queue.Clear();
540 }
541 }
542 delete model;
543 ComputeShortestPathFromNodes(num_bytes, nodes, path);
544 }
545
546 template<typename Hasher>
547 void CreateBackwardReferences(size_t num_bytes,
548 size_t position,
549 bool is_last,
550 const uint8_t* ringbuffer,
551 size_t ringbuffer_mask,
552 const int quality,
553 const int lgwin,
554 Hasher* hasher,
555 int* dist_cache,
556 size_t* last_insert_len,
557 Command* commands,
558 size_t* num_commands,
559 size_t* num_literals) {
560 // Set maximum distance, see section 9.1. of the spec.
561 const size_t max_backward_limit = (1 << lgwin) - 16;
562
563 // Choose which init method is faster.
564 // memset is about 100 times faster than hasher->InitForData().
565 const size_t kMaxBytesForPartialHashInit = Hasher::kHashMapSize >> 7;
566 if (position == 0 && is_last && num_bytes <= kMaxBytesForPartialHashInit) {
567 hasher->InitForData(ringbuffer, num_bytes);
568 } else {
569 hasher->Init();
570 }
571 if (num_bytes >= 3 && position >= 3) {
572 // Prepare the hashes for three last bytes of the last write.
573 // These could not be calculated before, since they require knowledge
574 // of both the previous and the current block.
575 hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask],
576 static_cast<uint32_t>(position - 3));
577 hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask],
578 static_cast<uint32_t>(position - 2));
579 hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
580 static_cast<uint32_t>(position - 1));
581 }
582 const Command * const orig_commands = commands;
583 size_t insert_length = *last_insert_len;
584 size_t i = position & ringbuffer_mask;
585 const size_t i_diff = position - i;
586 const size_t i_end = i + num_bytes;
587
588 // For speed up heuristics for random data.
589 const size_t random_heuristics_window_size = quality < 9 ? 64 : 512;
590 size_t apply_random_heuristics = i + random_heuristics_window_size;
591
592 // Minimum score to accept a backward reference.
593 const double kMinScore = 4.0;
594
595 while (i + Hasher::kHashTypeLength - 1 < i_end) {
596 size_t max_length = i_end - i;
597 size_t max_distance = std::min(i + i_diff, max_backward_limit);
598 size_t best_len = 0;
599 size_t best_len_code = 0;
600 size_t best_dist = 0;
601 double best_score = kMinScore;
602 bool match_found = hasher->FindLongestMatch(
603 ringbuffer, ringbuffer_mask,
604 dist_cache, static_cast<uint32_t>(i + i_diff), max_length, max_distance,
605 &best_len, &best_len_code, &best_dist, &best_score);
606 if (match_found) {
607 // Found a match. Let's look for something even better ahead.
608 int delayed_backward_references_in_row = 0;
609 for (;;) {
610 --max_length;
611 size_t best_len_2 =
612 quality < 5 ? std::min(best_len - 1, max_length) : 0;
613 size_t best_len_code_2 = 0;
614 size_t best_dist_2 = 0;
615 double best_score_2 = kMinScore;
616 max_distance = std::min(i + i_diff + 1, max_backward_limit);
617 match_found = hasher->FindLongestMatch(
618 ringbuffer, ringbuffer_mask,
619 dist_cache, static_cast<uint32_t>(i + i_diff + 1),
620 max_length, max_distance,
621 &best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
622 double cost_diff_lazy = 7.0;
623 if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
624 // Ok, let's just write one byte for now and start a match from the
625 // next byte.
626 ++i;
627 ++insert_length;
628 best_len = best_len_2;
629 best_len_code = best_len_code_2;
630 best_dist = best_dist_2;
631 best_score = best_score_2;
632 if (++delayed_backward_references_in_row < 4) {
633 continue;
634 }
635 }
636 break;
637 }
638 apply_random_heuristics =
639 i + 2 * best_len + random_heuristics_window_size;
640 max_distance = std::min(i + i_diff, max_backward_limit);
641 // The first 16 codes are special shortcodes, and the minimum offset is 1.
642 size_t distance_code =
643 ComputeDistanceCode(best_dist, max_distance, quality, dist_cache);
644 if (best_dist <= max_distance && distance_code > 0) {
645 dist_cache[3] = dist_cache[2];
646 dist_cache[2] = dist_cache[1];
647 dist_cache[1] = dist_cache[0];
648 dist_cache[0] = static_cast<int>(best_dist);
649 }
650 Command cmd(insert_length, best_len, best_len_code, distance_code);
651 *commands++ = cmd;
652 *num_literals += insert_length;
653 insert_length = 0;
654 // Put the hash keys into the table, if there are enough
655 // bytes left.
656 for (size_t j = 2; j < best_len; ++j) {
657 hasher->Store(&ringbuffer[i + j],
658 static_cast<uint32_t>(i + i_diff + j));
659 }
660 i += best_len;
661 } else {
662 ++insert_length;
663 ++i;
664 // If we have not seen matches for a long time, we can skip some
665 // match lookups. Unsuccessful match lookups are very very expensive
666 // and this kind of a heuristic speeds up compression quite
667 // a lot.
668 if (i > apply_random_heuristics) {
669 // Going through uncompressible data, jump.
670 if (i > apply_random_heuristics + 4 * random_heuristics_window_size) {
671 // It is quite a long time since we saw a copy, so we assume
672 // that this data is not compressible, and store hashes less
673 // often. Hashes of non compressible data are less likely to
674 // turn out to be useful in the future, too, so we store less of
675 // them to not to flood out the hash table of good compressible
676 // data.
677 size_t i_jump = std::min(i + 16, i_end - 4);
678 for (; i < i_jump; i += 4) {
679 hasher->Store(ringbuffer + i, static_cast<uint32_t>(i + i_diff));
680 insert_length += 4;
681 }
682 } else {
683 size_t i_jump = std::min(i + 8, i_end - 3);
684 for (; i < i_jump; i += 2) {
685 hasher->Store(ringbuffer + i, static_cast<uint32_t>(i + i_diff));
686 insert_length += 2;
687 }
688 }
689 }
690 }
691 }
692 insert_length += i_end - i;
693 *last_insert_len = insert_length;
694 *num_commands += static_cast<size_t>(commands - orig_commands);
695 }
696
697 void CreateBackwardReferences(size_t num_bytes,
698 size_t position,
699 bool is_last,
700 const uint8_t* ringbuffer,
701 size_t ringbuffer_mask,
702 const int quality,
703 const int lgwin,
704 Hashers* hashers,
705 int hash_type,
706 int* dist_cache,
707 size_t* last_insert_len,
708 Command* commands,
709 size_t* num_commands,
710 size_t* num_literals) {
711 bool zopflify = quality > 9;
712 if (zopflify) {
713 Hashers::H10* hasher = hashers->hash_h10;
714 hasher->Init(lgwin, position, num_bytes, is_last);
715 hasher->StitchToPreviousBlock(num_bytes, position,
716 ringbuffer, ringbuffer_mask);
717 // Set maximum distance, see section 9.1. of the spec.
718 const size_t max_backward_limit = (1 << lgwin) - 16;
719 if (quality == 10) {
720 std::vector<ZopfliNode> nodes(num_bytes + 1);
721 std::vector<uint32_t> path;
722 ZopfliComputeShortestPath(num_bytes, position,
723 ringbuffer, ringbuffer_mask,
724 max_backward_limit, dist_cache, hasher,
725 &nodes[0], &path);
726 ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
727 &nodes[0], dist_cache, last_insert_len, commands,
728 num_literals);
729 *num_commands += path.size();
730 return;
731 }
732 std::vector<uint32_t> num_matches(num_bytes);
733 std::vector<BackwardMatch> matches(4 * num_bytes);
734 size_t cur_match_pos = 0;
735 for (size_t i = 0; i + 3 < num_bytes; ++i) {
736 size_t max_distance = std::min(position + i, max_backward_limit);
737 size_t max_length = num_bytes - i;
738 // Ensure that we have enough free slots.
739 if (matches.size() < cur_match_pos + Hashers::H10::kMaxNumMatches) {
740 matches.resize(cur_match_pos + Hashers::H10::kMaxNumMatches);
741 }
742 size_t num_found_matches = hasher->FindAllMatches(
743 ringbuffer, ringbuffer_mask, position + i, max_length, max_distance,
744 &matches[cur_match_pos]);
745 const size_t cur_match_end = cur_match_pos + num_found_matches;
746 for (size_t j = cur_match_pos; j + 1 < cur_match_end; ++j) {
747 assert(matches[j].length() < matches[j + 1].length());
748 assert(matches[j].distance > max_distance ||
749 matches[j].distance <= matches[j + 1].distance);
750 }
751 num_matches[i] = static_cast<uint32_t>(num_found_matches);
752 if (num_found_matches > 0) {
753 const size_t match_len = matches[cur_match_end - 1].length();
754 if (match_len > kMaxZopfliLen) {
755 matches[cur_match_pos++] = matches[cur_match_end - 1];
756 num_matches[i] = 1;
757 for (size_t j = 1; j < match_len; ++j) {
758 ++i;
759 if (match_len - j < 64 && num_bytes - i >= kMaxTreeCompLength) {
760 hasher->Store(ringbuffer, ringbuffer_mask, position + i);
761 }
762 num_matches[i] = 0;
763 }
764 } else {
765 cur_match_pos = cur_match_end;
766 }
767 }
768 }
769 size_t orig_num_literals = *num_literals;
770 size_t orig_last_insert_len = *last_insert_len;
771 int orig_dist_cache[4] = {
772 dist_cache[0], dist_cache[1], dist_cache[2], dist_cache[3]
773 };
774 size_t orig_num_commands = *num_commands;
775 static const size_t kIterations = 2;
776 for (size_t i = 0; i < kIterations; i++) {
777 ZopfliCostModel model;
778 if (i == 0) {
779 model.SetFromLiteralCosts(num_bytes, position,
780 ringbuffer, ringbuffer_mask);
781 } else {
782 model.SetFromCommands(num_bytes, position,
783 ringbuffer, ringbuffer_mask,
784 commands, *num_commands - orig_num_commands,
785 orig_last_insert_len);
786 }
787 *num_commands = orig_num_commands;
788 *num_literals = orig_num_literals;
789 *last_insert_len = orig_last_insert_len;
790 memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
791 std::vector<ZopfliNode> nodes(num_bytes + 1);
792 std::vector<uint32_t> path;
793 ZopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask,
794 max_backward_limit, dist_cache, model, num_matches, matches,
795 &nodes[0], &path);
796 ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
797 &nodes[0], dist_cache, last_insert_len, commands,
798 num_literals);
799 *num_commands += path.size();
800 }
801 return;
802 }
803
804 switch (hash_type) {
805 case 2:
806 CreateBackwardReferences<Hashers::H2>(
807 num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
808 quality, lgwin, hashers->hash_h2, dist_cache,
809 last_insert_len, commands, num_commands, num_literals);
810 break;
811 case 3:
812 CreateBackwardReferences<Hashers::H3>(
813 num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
814 quality, lgwin, hashers->hash_h3, dist_cache,
815 last_insert_len, commands, num_commands, num_literals);
816 break;
817 case 4:
818 CreateBackwardReferences<Hashers::H4>(
819 num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
820 quality, lgwin, hashers->hash_h4, dist_cache,
821 last_insert_len, commands, num_commands, num_literals);
822 break;
823 case 5:
824 CreateBackwardReferences<Hashers::H5>(
825 num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
826 quality, lgwin, hashers->hash_h5, dist_cache,
827 last_insert_len, commands, num_commands, num_literals);
828 break;
829 case 6:
830 CreateBackwardReferences<Hashers::H6>(
831 num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
832 quality, lgwin, hashers->hash_h6, dist_cache,
833 last_insert_len, commands, num_commands, num_literals);
834 break;
835 case 7:
836 CreateBackwardReferences<Hashers::H7>(
837 num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
838 quality, lgwin, hashers->hash_h7, dist_cache,
839 last_insert_len, commands, num_commands, num_literals);
840 break;
841 case 8:
842 CreateBackwardReferences<Hashers::H8>(
843 num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
844 quality, lgwin, hashers->hash_h8, dist_cache,
845 last_insert_len, commands, num_commands, num_literals);
846 break;
847 case 9:
848 CreateBackwardReferences<Hashers::H9>(
849 num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
850 quality, lgwin, hashers->hash_h9, dist_cache,
851 last_insert_len, commands, num_commands, num_literals);
852 break;
853 default:
854 break;
855 }
856 }
857
858 } // namespace brotli
OLDNEW
« no previous file with comments | « third_party/brotli/enc/backward_references.c ('k') | third_party/brotli/enc/backward_references_inc.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698