Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: third_party/brotli/enc/compress_fragment.c

Issue 2537133002: Update brotli to v1.0.0-snapshot. (Closed)
Patch Set: Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /* Copyright 2015 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 /* Function for fast encoding of an input fragment, independently from the input
8 history. This function uses one-pass processing: when we find a backward
9 match, we immediately emit the corresponding command and literal codes to
10 the bit stream.
11
12 Adapted from the CompressFragment() function in
13 https://github.com/google/snappy/blob/master/snappy.cc */
14
15 #include "./compress_fragment.h"
16
17 #include <string.h> /* memcmp, memcpy, memset */
18
19 #include "../common/constants.h"
20 #include <brotli/types.h>
21 #include "./brotli_bit_stream.h"
22 #include "./entropy_encode.h"
23 #include "./fast_log.h"
24 #include "./find_match_length.h"
25 #include "./memory.h"
26 #include "./port.h"
27 #include "./write_bits.h"
28
29
30 #if defined(__cplusplus) || defined(c_plusplus)
31 extern "C" {
32 #endif
33
34 /* Same as MaxBackwardLimit(18) */
35 #define MAX_DISTANCE ((1 << 18) - BROTLI_WINDOW_GAP)
36
37 /* kHashMul32 multiplier has these properties:
38 * The multiplier must be odd. Otherwise we may lose the highest bit.
39 * No long streaks of ones or zeros.
40 * There is no effort to ensure that it is a prime, the oddity is enough
41 for this use.
42 * The number has been tuned heuristically against compression benchmarks. */
43 static const uint32_t kHashMul32 = 0x1e35a7bd;
44
45 static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
46 const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 24) * kHashMul32;
47 return (uint32_t)(h >> shift);
48 }
49
50 static BROTLI_INLINE uint32_t HashBytesAtOffset(
51 uint64_t v, int offset, size_t shift) {
52 assert(offset >= 0);
53 assert(offset <= 3);
54 {
55 const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
56 return (uint32_t)(h >> shift);
57 }
58 }
59
60 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
61 return TO_BROTLI_BOOL(
62 BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
63 p1[4] == p2[4]);
64 }
65
66 /* Builds a literal prefix code into "depths" and "bits" based on the statistics
67 of the "input" string and stores it into the bit stream.
68 Note that the prefix code here is built from the pre-LZ77 input, therefore
69 we can only approximate the statistics of the actual literal stream.
70 Moreover, for long inputs we build a histogram from a sample of the input
71 and thus have to assign a non-zero depth for each literal.
72 Returns estimated compression ratio millibytes/char for encoding given input
73 with generated code. */
74 static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
75 const uint8_t* input,
76 const size_t input_size,
77 uint8_t depths[256],
78 uint16_t bits[256],
79 size_t* storage_ix,
80 uint8_t* storage) {
81 uint32_t histogram[256] = { 0 };
82 size_t histogram_total;
83 size_t i;
84 if (input_size < (1 << 15)) {
85 for (i = 0; i < input_size; ++i) {
86 ++histogram[input[i]];
87 }
88 histogram_total = input_size;
89 for (i = 0; i < 256; ++i) {
90 /* We weigh the first 11 samples with weight 3 to account for the
91 balancing effect of the LZ77 phase on the histogram. */
92 const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
93 histogram[i] += adjust;
94 histogram_total += adjust;
95 }
96 } else {
97 static const size_t kSampleRate = 29;
98 for (i = 0; i < input_size; i += kSampleRate) {
99 ++histogram[input[i]];
100 }
101 histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
102 for (i = 0; i < 256; ++i) {
103 /* We add 1 to each population count to avoid 0 bit depths (since this is
104 only a sample and we don't know if the symbol appears or not), and we
105 weigh the first 11 samples with weight 3 to account for the balancing
106 effect of the LZ77 phase on the histogram (more frequent symbols are
107 more likely to be in backward references instead as literals). */
108 const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
109 histogram[i] += adjust;
110 histogram_total += adjust;
111 }
112 }
113 BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
114 /* max_bits = */ 8,
115 depths, bits, storage_ix, storage);
116 if (BROTLI_IS_OOM(m)) return 0;
117 {
118 size_t literal_ratio = 0;
119 for (i = 0; i < 256; ++i) {
120 if (histogram[i]) literal_ratio += histogram[i] * depths[i];
121 }
122 /* Estimated encoding ratio, millibytes per symbol. */
123 return (literal_ratio * 125) / histogram_total;
124 }
125 }
126
127 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
128 "bits" based on "histogram" and stores it into the bit stream. */
129 static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
130 uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
131 uint8_t* storage) {
132 /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
133 HuffmanTree tree[129];
134 uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
135 uint16_t cmd_bits[64];
136
137 BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
138 BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
139 /* We have to jump through a few hoops here in order to compute
140 the command bits because the symbols are in a different order than in
141 the full alphabet. This looks complicated, but having the symbols
142 in this order in the command bits saves a few branches in the Emit*
143 functions. */
144 memcpy(cmd_depth, depth, 24);
145 memcpy(cmd_depth + 24, depth + 40, 8);
146 memcpy(cmd_depth + 32, depth + 24, 8);
147 memcpy(cmd_depth + 40, depth + 48, 8);
148 memcpy(cmd_depth + 48, depth + 32, 8);
149 memcpy(cmd_depth + 56, depth + 56, 8);
150 BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
151 memcpy(bits, cmd_bits, 48);
152 memcpy(bits + 24, cmd_bits + 32, 16);
153 memcpy(bits + 32, cmd_bits + 48, 16);
154 memcpy(bits + 40, cmd_bits + 24, 16);
155 memcpy(bits + 48, cmd_bits + 40, 16);
156 memcpy(bits + 56, cmd_bits + 56, 16);
157 BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
158 {
159 /* Create the bit length array for the full command alphabet. */
160 size_t i;
161 memset(cmd_depth, 0, 64); /* only 64 first values were used */
162 memcpy(cmd_depth, depth, 8);
163 memcpy(cmd_depth + 64, depth + 8, 8);
164 memcpy(cmd_depth + 128, depth + 16, 8);
165 memcpy(cmd_depth + 192, depth + 24, 8);
166 memcpy(cmd_depth + 384, depth + 32, 8);
167 for (i = 0; i < 8; ++i) {
168 cmd_depth[128 + 8 * i] = depth[40 + i];
169 cmd_depth[256 + 8 * i] = depth[48 + i];
170 cmd_depth[448 + 8 * i] = depth[56 + i];
171 }
172 BrotliStoreHuffmanTree(
173 cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
174 }
175 BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
176 }
177
178 /* REQUIRES: insertlen < 6210 */
179 static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
180 const uint8_t depth[128],
181 const uint16_t bits[128],
182 uint32_t histo[128],
183 size_t* storage_ix,
184 uint8_t* storage) {
185 if (insertlen < 6) {
186 const size_t code = insertlen + 40;
187 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
188 ++histo[code];
189 } else if (insertlen < 130) {
190 const size_t tail = insertlen - 2;
191 const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
192 const size_t prefix = tail >> nbits;
193 const size_t inscode = (nbits << 1) + prefix + 42;
194 BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
195 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
196 ++histo[inscode];
197 } else if (insertlen < 2114) {
198 const size_t tail = insertlen - 66;
199 const uint32_t nbits = Log2FloorNonZero(tail);
200 const size_t code = nbits + 50;
201 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
202 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
203 ++histo[code];
204 } else {
205 BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
206 BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
207 ++histo[21];
208 }
209 }
210
211 static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
212 const uint8_t depth[128],
213 const uint16_t bits[128],
214 uint32_t histo[128],
215 size_t* storage_ix,
216 uint8_t* storage) {
217 if (insertlen < 22594) {
218 BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
219 BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
220 ++histo[22];
221 } else {
222 BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
223 BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
224 ++histo[23];
225 }
226 }
227
228 static BROTLI_INLINE void EmitCopyLen(size_t copylen,
229 const uint8_t depth[128],
230 const uint16_t bits[128],
231 uint32_t histo[128],
232 size_t* storage_ix,
233 uint8_t* storage) {
234 if (copylen < 10) {
235 BrotliWriteBits(
236 depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
237 ++histo[copylen + 14];
238 } else if (copylen < 134) {
239 const size_t tail = copylen - 6;
240 const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
241 const size_t prefix = tail >> nbits;
242 const size_t code = (nbits << 1) + prefix + 20;
243 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
244 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
245 ++histo[code];
246 } else if (copylen < 2118) {
247 const size_t tail = copylen - 70;
248 const uint32_t nbits = Log2FloorNonZero(tail);
249 const size_t code = nbits + 28;
250 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
251 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
252 ++histo[code];
253 } else {
254 BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
255 BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
256 ++histo[47];
257 }
258 }
259
260 static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
261 const uint8_t depth[128],
262 const uint16_t bits[128],
263 uint32_t histo[128],
264 size_t* storage_ix,
265 uint8_t* storage) {
266 if (copylen < 12) {
267 BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
268 ++histo[copylen - 4];
269 } else if (copylen < 72) {
270 const size_t tail = copylen - 8;
271 const uint32_t nbits = Log2FloorNonZero(tail) - 1;
272 const size_t prefix = tail >> nbits;
273 const size_t code = (nbits << 1) + prefix + 4;
274 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
275 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
276 ++histo[code];
277 } else if (copylen < 136) {
278 const size_t tail = copylen - 8;
279 const size_t code = (tail >> 5) + 30;
280 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
281 BrotliWriteBits(5, tail & 31, storage_ix, storage);
282 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
283 ++histo[code];
284 ++histo[64];
285 } else if (copylen < 2120) {
286 const size_t tail = copylen - 72;
287 const uint32_t nbits = Log2FloorNonZero(tail);
288 const size_t code = nbits + 28;
289 BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
290 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
291 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
292 ++histo[code];
293 ++histo[64];
294 } else {
295 BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
296 BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
297 BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
298 ++histo[47];
299 ++histo[64];
300 }
301 }
302
303 static BROTLI_INLINE void EmitDistance(size_t distance,
304 const uint8_t depth[128],
305 const uint16_t bits[128],
306 uint32_t histo[128],
307 size_t* storage_ix, uint8_t* storage) {
308 const size_t d = distance + 3;
309 const uint32_t nbits = Log2FloorNonZero(d) - 1u;
310 const size_t prefix = (d >> nbits) & 1;
311 const size_t offset = (2 + prefix) << nbits;
312 const size_t distcode = 2 * (nbits - 1) + prefix + 80;
313 BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
314 BrotliWriteBits(nbits, d - offset, storage_ix, storage);
315 ++histo[distcode];
316 }
317
318 static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
319 const uint8_t depth[256],
320 const uint16_t bits[256],
321 size_t* storage_ix, uint8_t* storage) {
322 size_t j;
323 for (j = 0; j < len; j++) {
324 const uint8_t lit = input[j];
325 BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
326 }
327 }
328
329 /* REQUIRES: len <= 1 << 20. */
330 static void BrotliStoreMetaBlockHeader(
331 size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
332 uint8_t* storage) {
333 /* ISLAST */
334 BrotliWriteBits(1, 0, storage_ix, storage);
335 if (len <= (1U << 16)) {
336 /* MNIBBLES is 4 */
337 BrotliWriteBits(2, 0, storage_ix, storage);
338 BrotliWriteBits(16, len - 1, storage_ix, storage);
339 } else {
340 /* MNIBBLES is 5 */
341 BrotliWriteBits(2, 1, storage_ix, storage);
342 BrotliWriteBits(20, len - 1, storage_ix, storage);
343 }
344 /* ISUNCOMPRESSED */
345 BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
346 }
347
348 static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
349 uint8_t *array) {
350 while (n_bits > 0) {
351 size_t byte_pos = pos >> 3;
352 size_t n_unchanged_bits = pos & 7;
353 size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
354 size_t total_bits = n_unchanged_bits + n_changed_bits;
355 uint32_t mask =
356 (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
357 uint32_t unchanged_bits = array[byte_pos] & mask;
358 uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
359 array[byte_pos] =
360 (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
361 n_bits -= n_changed_bits;
362 bits >>= n_changed_bits;
363 pos += n_changed_bits;
364 }
365 }
366
367 static void RewindBitPosition(const size_t new_storage_ix,
368 size_t* storage_ix, uint8_t* storage) {
369 const size_t bitpos = new_storage_ix & 7;
370 const size_t mask = (1u << bitpos) - 1;
371 storage[new_storage_ix >> 3] &= (uint8_t)mask;
372 *storage_ix = new_storage_ix;
373 }
374
375 static BROTLI_BOOL ShouldMergeBlock(
376 const uint8_t* data, size_t len, const uint8_t* depths) {
377 size_t histo[256] = { 0 };
378 static const size_t kSampleRate = 43;
379 size_t i;
380 for (i = 0; i < len; i += kSampleRate) {
381 ++histo[data[i]];
382 }
383 {
384 const size_t total = (len + kSampleRate - 1) / kSampleRate;
385 double r = (FastLog2(total) + 0.5) * (double)total + 200;
386 for (i = 0; i < 256; ++i) {
387 r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
388 }
389 return TO_BROTLI_BOOL(r >= 0.0);
390 }
391 }
392
393 /* Acceptable loss for uncompressible speedup is 2% */
394 #define MIN_RATIO 980
395
396 static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
397 const uint8_t* metablock_start, const uint8_t* next_emit,
398 const size_t insertlen, const size_t literal_ratio) {
399 const size_t compressed = (size_t)(next_emit - metablock_start);
400 if (compressed * 50 > insertlen) {
401 return BROTLI_FALSE;
402 } else {
403 return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
404 }
405 }
406
407 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
408 const size_t storage_ix_start,
409 size_t* storage_ix, uint8_t* storage) {
410 const size_t len = (size_t)(end - begin);
411 RewindBitPosition(storage_ix_start, storage_ix, storage);
412 BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
413 *storage_ix = (*storage_ix + 7u) & ~7u;
414 memcpy(&storage[*storage_ix >> 3], begin, len);
415 *storage_ix += len << 3;
416 storage[*storage_ix >> 3] = 0;
417 }
418
419 static uint32_t kCmdHistoSeed[128] = {
420 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
421 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
422 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
423 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
424 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
425 1, 1, 1, 1, 0, 0, 0, 0,
426 };
427
428 static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
429 MemoryManager* m, const uint8_t* input, size_t input_size,
430 BROTLI_BOOL is_last, int* table, size_t table_bits, uint8_t cmd_depth[128],
431 uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
432 size_t* storage_ix, uint8_t* storage) {
433 uint32_t cmd_histo[128];
434 const uint8_t* ip_end;
435
436 /* "next_emit" is a pointer to the first byte that is not covered by a
437 previous copy. Bytes between "next_emit" and the start of the next copy or
438 the end of the input will be emitted as literal bytes. */
439 const uint8_t* next_emit = input;
440 /* Save the start of the first block for position and distance computations.
441 */
442 const uint8_t* base_ip = input;
443
444 static const size_t kFirstBlockSize = 3 << 15;
445 static const size_t kMergeBlockSize = 1 << 16;
446
447 const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
448 const size_t kMinMatchLen = 5;
449
450 const uint8_t* metablock_start = input;
451 size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
452 size_t total_block_size = block_size;
453 /* Save the bit position of the MLEN field of the meta-block header, so that
454 we can update it later if we decide to extend this meta-block. */
455 size_t mlen_storage_ix = *storage_ix + 3;
456
457 uint8_t lit_depth[256];
458 uint16_t lit_bits[256];
459
460 size_t literal_ratio;
461
462 const uint8_t* ip;
463 int last_distance;
464
465 const size_t shift = 64u - table_bits;
466
467 if (input_size == 0) {
468 assert(is_last);
469 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
470 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
471 *storage_ix = (*storage_ix + 7u) & ~7u;
472 return;
473 }
474
475 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
476 /* No block splits, no contexts. */
477 BrotliWriteBits(13, 0, storage_ix, storage);
478
479 literal_ratio = BuildAndStoreLiteralPrefixCode(
480 m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
481 if (BROTLI_IS_OOM(m)) return;
482
483 {
484 /* Store the pre-compressed command and distance prefix codes. */
485 size_t i;
486 for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
487 BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
488 }
489 }
490 BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
491 storage_ix, storage);
492
493 emit_commands:
494 /* Initialize the command and distance histograms. We will gather
495 statistics of command and distance codes during the processing
496 of this block and use it to update the command and distance
497 prefix codes for the next block. */
498 memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
499
500 /* "ip" is the input pointer. */
501 ip = input;
502 last_distance = -1;
503 ip_end = input + block_size;
504
505 if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
506 /* For the last block, we need to keep a 16 bytes margin so that we can be
507 sure that all distances are at most window size - 16.
508 For all other blocks, we only need to keep a margin of 5 bytes so that
509 we don't go over the block size with a copy. */
510 const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
511 input_size - kInputMarginBytes);
512 const uint8_t* ip_limit = input + len_limit;
513
514 uint32_t next_hash;
515 for (next_hash = Hash(++ip, shift); ; ) {
516 /* Step 1: Scan forward in the input looking for a 5-byte-long match.
517 If we get close to exhausting the input then goto emit_remainder.
518
519 Heuristic match skipping: If 32 bytes are scanned with no matches
520 found, start looking only at every other byte. If 32 more bytes are
521 scanned, look at every third byte, etc.. When a match is found,
522 immediately go back to looking at every byte. This is a small loss
523 (~5% performance, ~0.1% density) for compressible data due to more
524 bookkeeping, but for non-compressible data (such as JPEG) it's a huge
525 win since the compressor quickly "realizes" the data is incompressible
526 and doesn't bother looking for matches everywhere.
527
528 The "skip" variable keeps track of how many bytes there are since the
529 last match; dividing it by 32 (i.e. right-shifting by five) gives the
530 number of bytes to move ahead for each iteration. */
531 uint32_t skip = 32;
532
533 const uint8_t* next_ip = ip;
534 const uint8_t* candidate;
535 assert(next_emit < ip);
536 trawl:
537 do {
538 uint32_t hash = next_hash;
539 uint32_t bytes_between_hash_lookups = skip++ >> 5;
540 assert(hash == Hash(next_ip, shift));
541 ip = next_ip;
542 next_ip = ip + bytes_between_hash_lookups;
543 if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
544 goto emit_remainder;
545 }
546 next_hash = Hash(next_ip, shift);
547 candidate = ip - last_distance;
548 if (IsMatch(ip, candidate)) {
549 if (BROTLI_PREDICT_TRUE(candidate < ip)) {
550 table[hash] = (int)(ip - base_ip);
551 break;
552 }
553 }
554 candidate = base_ip + table[hash];
555 assert(candidate >= base_ip);
556 assert(candidate < ip);
557
558 table[hash] = (int)(ip - base_ip);
559 } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
560
561 /* Check copy distance. If candidate is not feasible, continue search.
562 Checking is done outside of hot loop to reduce overhead. */
563 if (ip - candidate > MAX_DISTANCE) goto trawl;
564
565 /* Step 2: Emit the found match together with the literal bytes from
566 "next_emit" to the bit stream, and then see if we can find a next match
567 immediately afterwards. Repeat until we find no match for the input
568 without emitting some literal bytes. */
569
570 {
571 /* We have a 5-byte match at ip, and we need to emit bytes in
572 [next_emit, ip). */
573 const uint8_t* base = ip;
574 size_t matched = 5 + FindMatchLengthWithLimit(
575 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
576 int distance = (int)(base - candidate); /* > 0 */
577 size_t insert = (size_t)(base - next_emit);
578 ip += matched;
579 assert(0 == memcmp(base, candidate, matched));
580 if (BROTLI_PREDICT_TRUE(insert < 6210)) {
581 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
582 storage_ix, storage);
583 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
584 literal_ratio)) {
585 EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
586 storage_ix, storage);
587 input_size -= (size_t)(base - input);
588 input = base;
589 next_emit = input;
590 goto next_block;
591 } else {
592 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
593 storage_ix, storage);
594 }
595 EmitLiterals(next_emit, insert, lit_depth, lit_bits,
596 storage_ix, storage);
597 if (distance == last_distance) {
598 BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
599 ++cmd_histo[64];
600 } else {
601 EmitDistance((size_t)distance, cmd_depth, cmd_bits,
602 cmd_histo, storage_ix, storage);
603 last_distance = distance;
604 }
605 EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
606 storage_ix, storage);
607
608 next_emit = ip;
609 if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
610 goto emit_remainder;
611 }
612 /* We could immediately start working at ip now, but to improve
613 compression we first update "table" with the hashes of some positions
614 within the last copy. */
615 {
616 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
617 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
618 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
619 table[prev_hash] = (int)(ip - base_ip - 3);
620 prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
621 table[prev_hash] = (int)(ip - base_ip - 2);
622 prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
623 table[prev_hash] = (int)(ip - base_ip - 1);
624
625 candidate = base_ip + table[cur_hash];
626 table[cur_hash] = (int)(ip - base_ip);
627 }
628 }
629
630 while (IsMatch(ip, candidate)) {
631 /* We have a 5-byte match at ip, and no need to emit any literal bytes
632 prior to ip. */
633 const uint8_t* base = ip;
634 size_t matched = 5 + FindMatchLengthWithLimit(
635 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
636 if (ip - candidate > MAX_DISTANCE) break;
637 ip += matched;
638 last_distance = (int)(base - candidate); /* > 0 */
639 assert(0 == memcmp(base, candidate, matched));
640 EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
641 storage_ix, storage);
642 EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
643 cmd_histo, storage_ix, storage);
644
645 next_emit = ip;
646 if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
647 goto emit_remainder;
648 }
649 /* We could immediately start working at ip now, but to improve
650 compression we first update "table" with the hashes of some positions
651 within the last copy. */
652 {
653 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
654 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
655 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
656 table[prev_hash] = (int)(ip - base_ip - 3);
657 prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
658 table[prev_hash] = (int)(ip - base_ip - 2);
659 prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
660 table[prev_hash] = (int)(ip - base_ip - 1);
661
662 candidate = base_ip + table[cur_hash];
663 table[cur_hash] = (int)(ip - base_ip);
664 }
665 }
666
667 next_hash = Hash(++ip, shift);
668 }
669 }
670
671 emit_remainder:
672 assert(next_emit <= ip_end);
673 input += block_size;
674 input_size -= block_size;
675 block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
676
677 /* Decide if we want to continue this meta-block instead of emitting the
678 last insert-only command. */
679 if (input_size > 0 &&
680 total_block_size + block_size <= (1 << 20) &&
681 ShouldMergeBlock(input, block_size, lit_depth)) {
682 assert(total_block_size > (1 << 16));
683 /* Update the size of the current meta-block and continue emitting commands.
684 We can do this because the current size and the new size both have 5
685 nibbles. */
686 total_block_size += block_size;
687 UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
688 goto emit_commands;
689 }
690
691 /* Emit the remaining bytes as literals. */
692 if (next_emit < ip_end) {
693 const size_t insert = (size_t)(ip_end - next_emit);
694 if (BROTLI_PREDICT_TRUE(insert < 6210)) {
695 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
696 storage_ix, storage);
697 EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
698 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
699 literal_ratio)) {
700 EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
701 storage_ix, storage);
702 } else {
703 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
704 storage_ix, storage);
705 EmitLiterals(next_emit, insert, lit_depth, lit_bits,
706 storage_ix, storage);
707 }
708 }
709 next_emit = ip_end;
710
711 next_block:
712 /* If we have more data, write a new meta-block header and prefix codes and
713 then continue emitting commands. */
714 if (input_size > 0) {
715 metablock_start = input;
716 block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
717 total_block_size = block_size;
718 /* Save the bit position of the MLEN field of the meta-block header, so that
719 we can update it later if we decide to extend this meta-block. */
720 mlen_storage_ix = *storage_ix + 3;
721 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
722 /* No block splits, no contexts. */
723 BrotliWriteBits(13, 0, storage_ix, storage);
724 literal_ratio = BuildAndStoreLiteralPrefixCode(
725 m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
726 if (BROTLI_IS_OOM(m)) return;
727 BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
728 storage_ix, storage);
729 goto emit_commands;
730 }
731
732 if (is_last) {
733 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
734 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
735 *storage_ix = (*storage_ix + 7u) & ~7u;
736 } else {
737 /* If this is not the last block, update the command and distance prefix
738 codes for the next block and store the compressed forms. */
739 cmd_code[0] = 0;
740 *cmd_code_numbits = 0;
741 BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
742 cmd_code_numbits, cmd_code);
743 }
744 }
745
746 #define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
747
748 #define BAKE_METHOD_PARAM_(B) \
749 static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B( \
750 MemoryManager* m, const uint8_t* input, size_t input_size, \
751 BROTLI_BOOL is_last, int* table, uint8_t cmd_depth[128], \
752 uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code, \
753 size_t* storage_ix, uint8_t* storage) { \
754 BrotliCompressFragmentFastImpl(m, input, input_size, is_last, table, B, \
755 cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage); \
756 }
757 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
758 #undef BAKE_METHOD_PARAM_
759
760 void BrotliCompressFragmentFast(
761 MemoryManager* m, const uint8_t* input, size_t input_size,
762 BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128],
763 uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
764 size_t* storage_ix, uint8_t* storage) {
765 const size_t table_bits = Log2FloorNonZero(table_size);
766 switch (table_bits) {
767 #define CASE_(B) \
768 case B: \
769 BrotliCompressFragmentFastImpl ## B( \
770 m, input, input_size, is_last, table, cmd_depth, cmd_bits, \
771 cmd_code_numbits, cmd_code, storage_ix, storage); \
772 break;
773 FOR_TABLE_BITS_(CASE_)
774 #undef CASE_
775 default: assert(0); break;
776 }
777 }
778
779 #undef FOR_TABLE_BITS_
780
781 #if defined(__cplusplus) || defined(c_plusplus)
782 } /* extern "C" */
783 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698