Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(91)

Side by Side Diff: third_party/brotli/enc/metablock.cc

Issue 2537133002: Update brotli to v1.0.0-snapshot. (Closed)
Patch Set: Fixed typo Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/brotli/enc/metablock.c ('k') | third_party/brotli/enc/metablock_inc.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /* Copyright 2015 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 // Algorithms for distributing the literals and commands of a metablock between
8 // block types and contexts.
9
10 #include "./metablock.h"
11
12 #include "./block_splitter.h"
13 #include "./context.h"
14 #include "./cluster.h"
15 #include "./histogram.h"
16
17 namespace brotli {
18
19 void BuildMetaBlock(const uint8_t* ringbuffer,
20 const size_t pos,
21 const size_t mask,
22 uint8_t prev_byte,
23 uint8_t prev_byte2,
24 const Command* cmds,
25 size_t num_commands,
26 ContextType literal_context_mode,
27 MetaBlockSplit* mb) {
28 SplitBlock(cmds, num_commands,
29 ringbuffer, pos, mask,
30 &mb->literal_split,
31 &mb->command_split,
32 &mb->distance_split);
33
34 std::vector<ContextType> literal_context_modes(mb->literal_split.num_types,
35 literal_context_mode);
36
37 size_t num_literal_contexts =
38 mb->literal_split.num_types << kLiteralContextBits;
39 size_t num_distance_contexts =
40 mb->distance_split.num_types << kDistanceContextBits;
41 std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
42 mb->command_histograms.resize(mb->command_split.num_types);
43 std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
44 BuildHistograms(cmds, num_commands,
45 mb->literal_split,
46 mb->command_split,
47 mb->distance_split,
48 ringbuffer,
49 pos,
50 mask,
51 prev_byte,
52 prev_byte2,
53 literal_context_modes,
54 &literal_histograms,
55 &mb->command_histograms,
56 &distance_histograms);
57
58 // Histogram ids need to fit in one byte.
59 static const size_t kMaxNumberOfHistograms = 256;
60
61 ClusterHistograms(literal_histograms,
62 1u << kLiteralContextBits,
63 mb->literal_split.num_types,
64 kMaxNumberOfHistograms,
65 &mb->literal_histograms,
66 &mb->literal_context_map);
67
68 ClusterHistograms(distance_histograms,
69 1u << kDistanceContextBits,
70 mb->distance_split.num_types,
71 kMaxNumberOfHistograms,
72 &mb->distance_histograms,
73 &mb->distance_context_map);
74 }
75
76 // Greedy block splitter for one block category (literal, command or distance).
77 template<typename HistogramType>
78 class BlockSplitter {
79 public:
80 BlockSplitter(size_t alphabet_size,
81 size_t min_block_size,
82 double split_threshold,
83 size_t num_symbols,
84 BlockSplit* split,
85 std::vector<HistogramType>* histograms)
86 : alphabet_size_(alphabet_size),
87 min_block_size_(min_block_size),
88 split_threshold_(split_threshold),
89 num_blocks_(0),
90 split_(split),
91 histograms_(histograms),
92 target_block_size_(min_block_size),
93 block_size_(0),
94 curr_histogram_ix_(0),
95 merge_last_count_(0) {
96 size_t max_num_blocks = num_symbols / min_block_size + 1;
97 // We have to allocate one more histogram than the maximum number of block
98 // types for the current histogram when the meta-block is too big.
99 size_t max_num_types = std::min<size_t>(max_num_blocks, kMaxBlockTypes + 1);
100 split_->lengths.resize(max_num_blocks);
101 split_->types.resize(max_num_blocks);
102 histograms_->resize(max_num_types);
103 last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
104 }
105
106 // Adds the next symbol to the current histogram. When the current histogram
107 // reaches the target size, decides on merging the block.
108 void AddSymbol(size_t symbol) {
109 (*histograms_)[curr_histogram_ix_].Add(symbol);
110 ++block_size_;
111 if (block_size_ == target_block_size_) {
112 FinishBlock(/* is_final = */ false);
113 }
114 }
115
116 // Does either of three things:
117 // (1) emits the current block with a new block type;
118 // (2) emits the current block with the type of the second last block;
119 // (3) merges the current block with the last block.
120 void FinishBlock(bool is_final) {
121 if (block_size_ < min_block_size_) {
122 block_size_ = min_block_size_;
123 }
124 if (num_blocks_ == 0) {
125 // Create first block.
126 split_->lengths[0] = static_cast<uint32_t>(block_size_);
127 split_->types[0] = 0;
128 last_entropy_[0] =
129 BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
130 last_entropy_[1] = last_entropy_[0];
131 ++num_blocks_;
132 ++split_->num_types;
133 ++curr_histogram_ix_;
134 block_size_ = 0;
135 } else if (block_size_ > 0) {
136 double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0],
137 alphabet_size_);
138 HistogramType combined_histo[2];
139 double combined_entropy[2];
140 double diff[2];
141 for (size_t j = 0; j < 2; ++j) {
142 size_t last_histogram_ix = last_histogram_ix_[j];
143 combined_histo[j] = (*histograms_)[curr_histogram_ix_];
144 combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
145 combined_entropy[j] = BitsEntropy(
146 &combined_histo[j].data_[0], alphabet_size_);
147 diff[j] = combined_entropy[j] - entropy - last_entropy_[j];
148 }
149
150 if (split_->num_types < kMaxBlockTypes &&
151 diff[0] > split_threshold_ &&
152 diff[1] > split_threshold_) {
153 // Create new block.
154 split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
155 split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
156 last_histogram_ix_[1] = last_histogram_ix_[0];
157 last_histogram_ix_[0] = static_cast<uint8_t>(split_->num_types);
158 last_entropy_[1] = last_entropy_[0];
159 last_entropy_[0] = entropy;
160 ++num_blocks_;
161 ++split_->num_types;
162 ++curr_histogram_ix_;
163 block_size_ = 0;
164 merge_last_count_ = 0;
165 target_block_size_ = min_block_size_;
166 } else if (diff[1] < diff[0] - 20.0) {
167 // Combine this block with second last block.
168 split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
169 split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
170 std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
171 (*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
172 last_entropy_[1] = last_entropy_[0];
173 last_entropy_[0] = combined_entropy[1];
174 ++num_blocks_;
175 block_size_ = 0;
176 (*histograms_)[curr_histogram_ix_].Clear();
177 merge_last_count_ = 0;
178 target_block_size_ = min_block_size_;
179 } else {
180 // Combine this block with last block.
181 split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
182 (*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
183 last_entropy_[0] = combined_entropy[0];
184 if (split_->num_types == 1) {
185 last_entropy_[1] = last_entropy_[0];
186 }
187 block_size_ = 0;
188 (*histograms_)[curr_histogram_ix_].Clear();
189 if (++merge_last_count_ > 1) {
190 target_block_size_ += min_block_size_;
191 }
192 }
193 }
194 if (is_final) {
195 (*histograms_).resize(split_->num_types);
196 split_->types.resize(num_blocks_);
197 split_->lengths.resize(num_blocks_);
198 }
199 }
200
201 private:
202 static const uint16_t kMaxBlockTypes = 256;
203
204 // Alphabet size of particular block category.
205 const size_t alphabet_size_;
206 // We collect at least this many symbols for each block.
207 const size_t min_block_size_;
208 // We merge histograms A and B if
209 // entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
210 // where A is the current histogram and B is the histogram of the last or the
211 // second last block type.
212 const double split_threshold_;
213
214 size_t num_blocks_;
215 BlockSplit* split_; // not owned
216 std::vector<HistogramType>* histograms_; // not owned
217
218 // The number of symbols that we want to collect before deciding on whether
219 // or not to merge the block with a previous one or emit a new block.
220 size_t target_block_size_;
221 // The number of symbols in the current histogram.
222 size_t block_size_;
223 // Offset of the current histogram.
224 size_t curr_histogram_ix_;
225 // Offset of the histograms of the previous two block types.
226 size_t last_histogram_ix_[2];
227 // Entropy of the previous two block types.
228 double last_entropy_[2];
229 // The number of times we merged the current block with the last one.
230 size_t merge_last_count_;
231 };
232
233 void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
234 size_t pos,
235 size_t mask,
236 const Command *commands,
237 size_t n_commands,
238 MetaBlockSplit* mb) {
239 size_t num_literals = 0;
240 for (size_t i = 0; i < n_commands; ++i) {
241 num_literals += commands[i].insert_len_;
242 }
243
244 BlockSplitter<HistogramLiteral> lit_blocks(
245 256, 512, 400.0, num_literals,
246 &mb->literal_split, &mb->literal_histograms);
247 BlockSplitter<HistogramCommand> cmd_blocks(
248 kNumCommandPrefixes, 1024, 500.0, n_commands,
249 &mb->command_split, &mb->command_histograms);
250 BlockSplitter<HistogramDistance> dist_blocks(
251 64, 512, 100.0, n_commands,
252 &mb->distance_split, &mb->distance_histograms);
253
254 for (size_t i = 0; i < n_commands; ++i) {
255 const Command cmd = commands[i];
256 cmd_blocks.AddSymbol(cmd.cmd_prefix_);
257 for (size_t j = cmd.insert_len_; j != 0; --j) {
258 lit_blocks.AddSymbol(ringbuffer[pos & mask]);
259 ++pos;
260 }
261 pos += cmd.copy_len();
262 if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
263 dist_blocks.AddSymbol(cmd.dist_prefix_);
264 }
265 }
266
267 lit_blocks.FinishBlock(/* is_final = */ true);
268 cmd_blocks.FinishBlock(/* is_final = */ true);
269 dist_blocks.FinishBlock(/* is_final = */ true);
270 }
271
272 // Greedy block splitter for one block category (literal, command or distance).
273 // Gathers histograms for all context buckets.
274 template<typename HistogramType>
275 class ContextBlockSplitter {
276 public:
277 ContextBlockSplitter(size_t alphabet_size,
278 size_t num_contexts,
279 size_t min_block_size,
280 double split_threshold,
281 size_t num_symbols,
282 BlockSplit* split,
283 std::vector<HistogramType>* histograms)
284 : alphabet_size_(alphabet_size),
285 num_contexts_(num_contexts),
286 max_block_types_(kMaxBlockTypes / num_contexts),
287 min_block_size_(min_block_size),
288 split_threshold_(split_threshold),
289 num_blocks_(0),
290 split_(split),
291 histograms_(histograms),
292 target_block_size_(min_block_size),
293 block_size_(0),
294 curr_histogram_ix_(0),
295 last_entropy_(2 * num_contexts),
296 merge_last_count_(0) {
297 size_t max_num_blocks = num_symbols / min_block_size + 1;
298 // We have to allocate one more histogram than the maximum number of block
299 // types for the current histogram when the meta-block is too big.
300 size_t max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
301 split_->lengths.resize(max_num_blocks);
302 split_->types.resize(max_num_blocks);
303 histograms_->resize(max_num_types * num_contexts);
304 last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
305 }
306
307 // Adds the next symbol to the current block type and context. When the
308 // current block reaches the target size, decides on merging the block.
309 void AddSymbol(size_t symbol, size_t context) {
310 (*histograms_)[curr_histogram_ix_ + context].Add(symbol);
311 ++block_size_;
312 if (block_size_ == target_block_size_) {
313 FinishBlock(/* is_final = */ false);
314 }
315 }
316
317 // Does either of three things:
318 // (1) emits the current block with a new block type;
319 // (2) emits the current block with the type of the second last block;
320 // (3) merges the current block with the last block.
321 void FinishBlock(bool is_final) {
322 if (block_size_ < min_block_size_) {
323 block_size_ = min_block_size_;
324 }
325 if (num_blocks_ == 0) {
326 // Create first block.
327 split_->lengths[0] = static_cast<uint32_t>(block_size_);
328 split_->types[0] = 0;
329 for (size_t i = 0; i < num_contexts_; ++i) {
330 last_entropy_[i] =
331 BitsEntropy(&(*histograms_)[i].data_[0], alphabet_size_);
332 last_entropy_[num_contexts_ + i] = last_entropy_[i];
333 }
334 ++num_blocks_;
335 ++split_->num_types;
336 curr_histogram_ix_ += num_contexts_;
337 block_size_ = 0;
338 } else if (block_size_ > 0) {
339 // Try merging the set of histograms for the current block type with the
340 // respective set of histograms for the last and second last block types.
341 // Decide over the split based on the total reduction of entropy across
342 // all contexts.
343 std::vector<double> entropy(num_contexts_);
344 std::vector<HistogramType> combined_histo(2 * num_contexts_);
345 std::vector<double> combined_entropy(2 * num_contexts_);
346 double diff[2] = { 0.0 };
347 for (size_t i = 0; i < num_contexts_; ++i) {
348 size_t curr_histo_ix = curr_histogram_ix_ + i;
349 entropy[i] = BitsEntropy(&(*histograms_)[curr_histo_ix].data_[0],
350 alphabet_size_);
351 for (size_t j = 0; j < 2; ++j) {
352 size_t jx = j * num_contexts_ + i;
353 size_t last_histogram_ix = last_histogram_ix_[j] + i;
354 combined_histo[jx] = (*histograms_)[curr_histo_ix];
355 combined_histo[jx].AddHistogram((*histograms_)[last_histogram_ix]);
356 combined_entropy[jx] = BitsEntropy(
357 &combined_histo[jx].data_[0], alphabet_size_);
358 diff[j] += combined_entropy[jx] - entropy[i] - last_entropy_[jx];
359 }
360 }
361
362 if (split_->num_types < max_block_types_ &&
363 diff[0] > split_threshold_ &&
364 diff[1] > split_threshold_) {
365 // Create new block.
366 split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
367 split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
368 last_histogram_ix_[1] = last_histogram_ix_[0];
369 last_histogram_ix_[0] = split_->num_types * num_contexts_;
370 for (size_t i = 0; i < num_contexts_; ++i) {
371 last_entropy_[num_contexts_ + i] = last_entropy_[i];
372 last_entropy_[i] = entropy[i];
373 }
374 ++num_blocks_;
375 ++split_->num_types;
376 curr_histogram_ix_ += num_contexts_;
377 block_size_ = 0;
378 merge_last_count_ = 0;
379 target_block_size_ = min_block_size_;
380 } else if (diff[1] < diff[0] - 20.0) {
381 // Combine this block with second last block.
382 split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
383 split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
384 std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
385 for (size_t i = 0; i < num_contexts_; ++i) {
386 (*histograms_)[last_histogram_ix_[0] + i] =
387 combined_histo[num_contexts_ + i];
388 last_entropy_[num_contexts_ + i] = last_entropy_[i];
389 last_entropy_[i] = combined_entropy[num_contexts_ + i];
390 (*histograms_)[curr_histogram_ix_ + i].Clear();
391 }
392 ++num_blocks_;
393 block_size_ = 0;
394 merge_last_count_ = 0;
395 target_block_size_ = min_block_size_;
396 } else {
397 // Combine this block with last block.
398 split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
399 for (size_t i = 0; i < num_contexts_; ++i) {
400 (*histograms_)[last_histogram_ix_[0] + i] = combined_histo[i];
401 last_entropy_[i] = combined_entropy[i];
402 if (split_->num_types == 1) {
403 last_entropy_[num_contexts_ + i] = last_entropy_[i];
404 }
405 (*histograms_)[curr_histogram_ix_ + i].Clear();
406 }
407 block_size_ = 0;
408 if (++merge_last_count_ > 1) {
409 target_block_size_ += min_block_size_;
410 }
411 }
412 }
413 if (is_final) {
414 (*histograms_).resize(split_->num_types * num_contexts_);
415 split_->types.resize(num_blocks_);
416 split_->lengths.resize(num_blocks_);
417 }
418 }
419
420 private:
421 static const int kMaxBlockTypes = 256;
422
423 // Alphabet size of particular block category.
424 const size_t alphabet_size_;
425 const size_t num_contexts_;
426 const size_t max_block_types_;
427 // We collect at least this many symbols for each block.
428 const size_t min_block_size_;
429 // We merge histograms A and B if
430 // entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
431 // where A is the current histogram and B is the histogram of the last or the
432 // second last block type.
433 const double split_threshold_;
434
435 size_t num_blocks_;
436 BlockSplit* split_; // not owned
437 std::vector<HistogramType>* histograms_; // not owned
438
439 // The number of symbols that we want to collect before deciding on whether
440 // or not to merge the block with a previous one or emit a new block.
441 size_t target_block_size_;
442 // The number of symbols in the current histogram.
443 size_t block_size_;
444 // Offset of the current histogram.
445 size_t curr_histogram_ix_;
446 // Offset of the histograms of the previous two block types.
447 size_t last_histogram_ix_[2];
448 // Entropy of the previous two block types.
449 std::vector<double> last_entropy_;
450 // The number of times we merged the current block with the last one.
451 size_t merge_last_count_;
452 };
453
454 void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
455 size_t pos,
456 size_t mask,
457 uint8_t prev_byte,
458 uint8_t prev_byte2,
459 ContextType literal_context_mode,
460 size_t num_contexts,
461 const uint32_t* static_context_map,
462 const Command *commands,
463 size_t n_commands,
464 MetaBlockSplit* mb) {
465 size_t num_literals = 0;
466 for (size_t i = 0; i < n_commands; ++i) {
467 num_literals += commands[i].insert_len_;
468 }
469
470 ContextBlockSplitter<HistogramLiteral> lit_blocks(
471 256, num_contexts, 512, 400.0, num_literals,
472 &mb->literal_split, &mb->literal_histograms);
473 BlockSplitter<HistogramCommand> cmd_blocks(
474 kNumCommandPrefixes, 1024, 500.0, n_commands,
475 &mb->command_split, &mb->command_histograms);
476 BlockSplitter<HistogramDistance> dist_blocks(
477 64, 512, 100.0, n_commands,
478 &mb->distance_split, &mb->distance_histograms);
479
480 for (size_t i = 0; i < n_commands; ++i) {
481 const Command cmd = commands[i];
482 cmd_blocks.AddSymbol(cmd.cmd_prefix_);
483 for (size_t j = cmd.insert_len_; j != 0; --j) {
484 size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
485 uint8_t literal = ringbuffer[pos & mask];
486 lit_blocks.AddSymbol(literal, static_context_map[context]);
487 prev_byte2 = prev_byte;
488 prev_byte = literal;
489 ++pos;
490 }
491 pos += cmd.copy_len();
492 if (cmd.copy_len()) {
493 prev_byte2 = ringbuffer[(pos - 2) & mask];
494 prev_byte = ringbuffer[(pos - 1) & mask];
495 if (cmd.cmd_prefix_ >= 128) {
496 dist_blocks.AddSymbol(cmd.dist_prefix_);
497 }
498 }
499 }
500
501 lit_blocks.FinishBlock(/* is_final = */ true);
502 cmd_blocks.FinishBlock(/* is_final = */ true);
503 dist_blocks.FinishBlock(/* is_final = */ true);
504
505 mb->literal_context_map.resize(
506 mb->literal_split.num_types << kLiteralContextBits);
507 for (size_t i = 0; i < mb->literal_split.num_types; ++i) {
508 for (size_t j = 0; j < (1u << kLiteralContextBits); ++j) {
509 mb->literal_context_map[(i << kLiteralContextBits) + j] =
510 static_cast<uint32_t>(i * num_contexts) + static_context_map[j];
511 }
512 }
513 }
514
515 void OptimizeHistograms(size_t num_direct_distance_codes,
516 size_t distance_postfix_bits,
517 MetaBlockSplit* mb) {
518 uint8_t* good_for_rle = new uint8_t[kNumCommandPrefixes];
519 for (size_t i = 0; i < mb->literal_histograms.size(); ++i) {
520 OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0],
521 good_for_rle);
522 }
523 for (size_t i = 0; i < mb->command_histograms.size(); ++i) {
524 OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
525 &mb->command_histograms[i].data_[0],
526 good_for_rle);
527 }
528 size_t num_distance_codes =
529 kNumDistanceShortCodes + num_direct_distance_codes +
530 (48u << distance_postfix_bits);
531 for (size_t i = 0; i < mb->distance_histograms.size(); ++i) {
532 OptimizeHuffmanCountsForRle(num_distance_codes,
533 &mb->distance_histograms[i].data_[0],
534 good_for_rle);
535 }
536 delete[] good_for_rle;
537 }
538
539 } // namespace brotli
OLDNEW
« no previous file with comments | « third_party/brotli/enc/metablock.c ('k') | third_party/brotli/enc/metablock_inc.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698