OLD | NEW |
1 /* Copyright 2013 Google Inc. All Rights Reserved. | 1 /* Copyright 2013 Google Inc. All Rights Reserved. |
2 | 2 |
3 Distributed under MIT license. | 3 Distributed under MIT license. |
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
5 */ | 5 */ |
6 | 6 |
7 // Implementation of Brotli compressor. | 7 /* Implementation of Brotli compressor. */ |
8 | 8 |
9 #include "./encode.h" | 9 #include <brotli/encode.h> |
10 | 10 |
11 #include <algorithm> | 11 #include <stdlib.h> /* free, malloc */ |
12 #include <cstdlib> /* free, malloc */ | 12 #include <string.h> /* memcpy, memset */ |
13 #include <cstring> /* memset */ | |
14 #include <limits> | |
15 | 13 |
| 14 #include "../common/version.h" |
16 #include "./backward_references.h" | 15 #include "./backward_references.h" |
17 #include "./bit_cost.h" | 16 #include "./bit_cost.h" |
18 #include "./block_splitter.h" | |
19 #include "./brotli_bit_stream.h" | 17 #include "./brotli_bit_stream.h" |
20 #include "./cluster.h" | |
21 #include "./context.h" | |
22 #include "./metablock.h" | |
23 #include "./transform.h" | |
24 #include "./compress_fragment.h" | 18 #include "./compress_fragment.h" |
25 #include "./compress_fragment_two_pass.h" | 19 #include "./compress_fragment_two_pass.h" |
| 20 #include "./context.h" |
26 #include "./entropy_encode.h" | 21 #include "./entropy_encode.h" |
27 #include "./fast_log.h" | 22 #include "./fast_log.h" |
28 #include "./hash.h" | 23 #include "./hash.h" |
29 #include "./histogram.h" | 24 #include "./histogram.h" |
| 25 #include "./memory.h" |
| 26 #include "./metablock.h" |
| 27 #include "./port.h" |
30 #include "./prefix.h" | 28 #include "./prefix.h" |
| 29 #include "./quality.h" |
| 30 #include "./ringbuffer.h" |
31 #include "./utf8_util.h" | 31 #include "./utf8_util.h" |
32 #include "./write_bits.h" | 32 #include "./write_bits.h" |
33 | 33 |
34 namespace brotli { | 34 #if defined(__cplusplus) || defined(c_plusplus) |
35 | 35 extern "C" { |
36 static const int kMinQualityForBlockSplit = 4; | 36 #endif |
37 static const int kMinQualityForContextModeling = 5; | |
38 static const int kMinQualityForOptimizeHistograms = 4; | |
39 // For quality 2 there is no block splitting, so we buffer at most this much | |
40 // literals and commands. | |
41 static const size_t kMaxNumDelayedSymbols = 0x2fff; | |
42 | 37 |
43 #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src)); | 38 #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src)); |
44 | 39 |
| 40 typedef enum BrotliEncoderStreamState { |
| 41 /* Default state. */ |
| 42 BROTLI_STREAM_PROCESSING = 0, |
| 43 /* Intermediate state; after next block is emitted, byte-padding should be |
| 44 performed before getting back to default state. */ |
| 45 BROTLI_STREAM_FLUSH_REQUESTED = 1, |
| 46 /* Last metablock was produced; no more input is acceptable. */ |
| 47 BROTLI_STREAM_FINISHED = 2, |
| 48 /* Flushing compressed block and writing meta-data block header. */ |
| 49 BROTLI_STREAM_METADATA_HEAD = 3, |
| 50 /* Writing metadata block body. */ |
| 51 BROTLI_STREAM_METADATA_BODY = 4 |
| 52 } BrotliEncoderStreamState; |
| 53 |
| 54 typedef struct BrotliEncoderStateStruct { |
| 55 BrotliEncoderParams params; |
| 56 |
| 57 MemoryManager memory_manager_; |
| 58 |
| 59 Hashers hashers_; |
| 60 uint64_t input_pos_; |
| 61 RingBuffer ringbuffer_; |
| 62 size_t cmd_alloc_size_; |
| 63 Command* commands_; |
| 64 size_t num_commands_; |
| 65 size_t num_literals_; |
| 66 size_t last_insert_len_; |
| 67 uint64_t last_flush_pos_; |
| 68 uint64_t last_processed_pos_; |
| 69 int dist_cache_[4]; |
| 70 int saved_dist_cache_[4]; |
| 71 uint8_t last_byte_; |
| 72 uint8_t last_byte_bits_; |
| 73 uint8_t prev_byte_; |
| 74 uint8_t prev_byte2_; |
| 75 size_t storage_size_; |
| 76 uint8_t* storage_; |
| 77 /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */ |
| 78 int small_table_[1 << 10]; /* 4KiB */ |
| 79 int* large_table_; /* Allocated only when needed */ |
| 80 size_t large_table_size_; |
| 81 /* Command and distance prefix codes (each 64 symbols, stored back-to-back) |
| 82 used for the next block in FAST_ONE_PASS_COMPRESSION_QUALITY. The command |
| 83 prefix code is over a smaller alphabet with the following 64 symbols: |
| 84 0 - 15: insert length code 0, copy length code 0 - 15, same distance |
| 85 16 - 39: insert length code 0, copy length code 0 - 23 |
| 86 40 - 63: insert length code 0 - 23, copy length code 0 |
| 87 Note that symbols 16 and 40 represent the same code in the full alphabet, |
| 88 but we do not use either of them in FAST_ONE_PASS_COMPRESSION_QUALITY. */ |
| 89 uint8_t cmd_depths_[128]; |
| 90 uint16_t cmd_bits_[128]; |
| 91 /* The compressed form of the command and distance prefix codes for the next |
| 92 block in FAST_ONE_PASS_COMPRESSION_QUALITY. */ |
| 93 uint8_t cmd_code_[512]; |
| 94 size_t cmd_code_numbits_; |
| 95 /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */ |
| 96 uint32_t* command_buf_; |
| 97 uint8_t* literal_buf_; |
| 98 |
| 99 uint8_t* next_out_; |
| 100 size_t available_out_; |
| 101 size_t total_out_; |
| 102 /* Temporary buffer for padding flush bits or metadata block header / body. */ |
| 103 union { |
| 104 uint64_t u64[2]; |
| 105 uint8_t u8[16]; |
| 106 } tiny_buf_; |
| 107 uint32_t remaining_metadata_bytes_; |
| 108 BrotliEncoderStreamState stream_state_; |
| 109 |
| 110 BROTLI_BOOL is_last_block_emitted_; |
| 111 BROTLI_BOOL is_initialized_; |
| 112 } BrotliEncoderStateStruct; |
| 113 |
| 114 static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s); |
| 115 |
| 116 static size_t InputBlockSize(BrotliEncoderState* s) { |
| 117 if (!EnsureInitialized(s)) return 0; |
| 118 return (size_t)1 << s->params.lgblock; |
| 119 } |
| 120 |
| 121 static uint64_t UnprocessedInputSize(BrotliEncoderState* s) { |
| 122 return s->input_pos_ - s->last_processed_pos_; |
| 123 } |
| 124 |
| 125 static size_t RemainingInputBlockSize(BrotliEncoderState* s) { |
| 126 const uint64_t delta = UnprocessedInputSize(s); |
| 127 size_t block_size = InputBlockSize(s); |
| 128 if (delta >= block_size) return 0; |
| 129 return block_size - (size_t)delta; |
| 130 } |
| 131 |
| 132 BROTLI_BOOL BrotliEncoderSetParameter( |
| 133 BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) { |
| 134 /* Changing parameters on the fly is not implemented yet. */ |
| 135 if (state->is_initialized_) return BROTLI_FALSE; |
| 136 /* TODO: Validate/clamp parameters here. */ |
| 137 switch (p) { |
| 138 case BROTLI_PARAM_MODE: |
| 139 state->params.mode = (BrotliEncoderMode)value; |
| 140 return BROTLI_TRUE; |
| 141 |
| 142 case BROTLI_PARAM_QUALITY: |
| 143 state->params.quality = (int)value; |
| 144 return BROTLI_TRUE; |
| 145 |
| 146 case BROTLI_PARAM_LGWIN: |
| 147 state->params.lgwin = (int)value; |
| 148 return BROTLI_TRUE; |
| 149 |
| 150 case BROTLI_PARAM_LGBLOCK: |
| 151 state->params.lgblock = (int)value; |
| 152 return BROTLI_TRUE; |
| 153 |
| 154 default: return BROTLI_FALSE; |
| 155 } |
| 156 } |
| 157 |
45 static void RecomputeDistancePrefixes(Command* cmds, | 158 static void RecomputeDistancePrefixes(Command* cmds, |
46 size_t num_commands, | 159 size_t num_commands, |
47 uint32_t num_direct_distance_codes, | 160 uint32_t num_direct_distance_codes, |
48 uint32_t distance_postfix_bits) { | 161 uint32_t distance_postfix_bits) { |
| 162 size_t i; |
49 if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) { | 163 if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) { |
50 return; | 164 return; |
51 } | 165 } |
52 for (size_t i = 0; i < num_commands; ++i) { | 166 for (i = 0; i < num_commands; ++i) { |
53 Command* cmd = &cmds[i]; | 167 Command* cmd = &cmds[i]; |
54 if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) { | 168 if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) { |
55 PrefixEncodeCopyDistance(cmd->DistanceCode(), | 169 PrefixEncodeCopyDistance(CommandRestoreDistanceCode(cmd), |
56 num_direct_distance_codes, | 170 num_direct_distance_codes, |
57 distance_postfix_bits, | 171 distance_postfix_bits, |
58 &cmd->dist_prefix_, | 172 &cmd->dist_prefix_, |
59 &cmd->dist_extra_); | 173 &cmd->dist_extra_); |
60 } | 174 } |
61 } | 175 } |
62 } | 176 } |
63 | 177 |
64 /* Wraps 64-bit input position to 32-bit ringbuffer position preserving | 178 /* Wraps 64-bit input position to 32-bit ring-buffer position preserving |
65 "not-a-first-lap" feature. */ | 179 "not-a-first-lap" feature. */ |
66 static uint32_t WrapPosition(uint64_t position) { | 180 static uint32_t WrapPosition(uint64_t position) { |
67 uint32_t result = static_cast<uint32_t>(position); | 181 uint32_t result = (uint32_t)position; |
68 if (position > (1u << 30)) { | 182 uint64_t gb = position >> 30; |
69 result = (result & ((1u << 30) - 1)) | (1u << 30); | 183 if (gb > 2) { |
| 184 /* Wrap every 2GiB; The first 3GB are continuous. */ |
| 185 result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30; |
70 } | 186 } |
71 return result; | 187 return result; |
72 } | 188 } |
73 | 189 |
74 uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) { | 190 static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) { |
75 if (storage_size_ < size) { | 191 MemoryManager* m = &s->memory_manager_; |
76 delete[] storage_; | 192 if (s->storage_size_ < size) { |
77 storage_ = new uint8_t[size]; | 193 BROTLI_FREE(m, s->storage_); |
78 storage_size_ = size; | 194 s->storage_ = BROTLI_ALLOC(m, uint8_t, size); |
| 195 if (BROTLI_IS_OOM(m)) return NULL; |
| 196 s->storage_size_ = size; |
79 } | 197 } |
80 return storage_; | 198 return s->storage_; |
81 } | |
82 | |
83 static size_t MaxHashTableSize(int quality) { | |
84 return quality == 0 ? 1 << 15 : 1 << 17; | |
85 } | 199 } |
86 | 200 |
87 static size_t HashTableSize(size_t max_table_size, size_t input_size) { | 201 static size_t HashTableSize(size_t max_table_size, size_t input_size) { |
88 size_t htsize = 256; | 202 size_t htsize = 256; |
89 while (htsize < max_table_size && htsize < input_size) { | 203 while (htsize < max_table_size && htsize < input_size) { |
90 htsize <<= 1; | 204 htsize <<= 1; |
91 } | 205 } |
92 return htsize; | 206 return htsize; |
93 } | 207 } |
94 | 208 |
95 int* BrotliCompressor::GetHashTable(int quality, | 209 static int* GetHashTable(BrotliEncoderState* s, int quality, |
96 size_t input_size, | 210 size_t input_size, size_t* table_size) { |
97 size_t* table_size) { | 211 /* Use smaller hash table when input.size() is smaller, since we |
98 // Use smaller hash table when input.size() is smaller, since we | 212 fill the table, incurring O(hash table size) overhead for |
99 // fill the table, incurring O(hash table size) overhead for | 213 compression, and if the input is short, we won't need that |
100 // compression, and if the input is short, we won't need that | 214 many hash table entries anyway. */ |
101 // many hash table entries anyway. | 215 MemoryManager* m = &s->memory_manager_; |
102 const size_t max_table_size = MaxHashTableSize(quality); | 216 const size_t max_table_size = MaxHashTableSize(quality); |
| 217 size_t htsize = HashTableSize(max_table_size, input_size); |
| 218 int* table; |
103 assert(max_table_size >= 256); | 219 assert(max_table_size >= 256); |
104 size_t htsize = HashTableSize(max_table_size, input_size); | 220 if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { |
| 221 /* Only odd shifts are supported by fast-one-pass. */ |
| 222 if ((htsize & 0xAAAAA) == 0) { |
| 223 htsize <<= 1; |
| 224 } |
| 225 } |
105 | 226 |
106 int* table; | 227 if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) { |
107 if (htsize <= sizeof(small_table_) / sizeof(small_table_[0])) { | 228 table = s->small_table_; |
108 table = small_table_; | |
109 } else { | 229 } else { |
110 if (large_table_ == NULL) { | 230 if (htsize > s->large_table_size_) { |
111 large_table_ = new int[max_table_size]; | 231 s->large_table_size_ = htsize; |
| 232 BROTLI_FREE(m, s->large_table_); |
| 233 s->large_table_ = BROTLI_ALLOC(m, int, htsize); |
| 234 if (BROTLI_IS_OOM(m)) return 0; |
112 } | 235 } |
113 table = large_table_; | 236 table = s->large_table_; |
114 } | 237 } |
115 | 238 |
116 *table_size = htsize; | 239 *table_size = htsize; |
117 memset(table, 0, htsize * sizeof(*table)); | 240 memset(table, 0, htsize * sizeof(*table)); |
118 return table; | 241 return table; |
119 } | 242 } |
120 | 243 |
121 static void EncodeWindowBits(int lgwin, uint8_t* last_byte, | 244 static void EncodeWindowBits(int lgwin, uint8_t* last_byte, |
122 uint8_t* last_byte_bits) { | 245 uint8_t* last_byte_bits) { |
123 if (lgwin == 16) { | 246 if (lgwin == 16) { |
124 *last_byte = 0; | 247 *last_byte = 0; |
125 *last_byte_bits = 1; | 248 *last_byte_bits = 1; |
126 } else if (lgwin == 17) { | 249 } else if (lgwin == 17) { |
127 *last_byte = 1; | 250 *last_byte = 1; |
128 *last_byte_bits = 7; | 251 *last_byte_bits = 7; |
129 } else if (lgwin > 17) { | 252 } else if (lgwin > 17) { |
130 *last_byte = static_cast<uint8_t>(((lgwin - 17) << 1) | 1); | 253 *last_byte = (uint8_t)(((lgwin - 17) << 1) | 1); |
131 *last_byte_bits = 4; | 254 *last_byte_bits = 4; |
132 } else { | 255 } else { |
133 *last_byte = static_cast<uint8_t>(((lgwin - 8) << 4) | 1); | 256 *last_byte = (uint8_t)(((lgwin - 8) << 4) | 1); |
134 *last_byte_bits = 7; | 257 *last_byte_bits = 7; |
135 } | 258 } |
136 } | 259 } |
137 | 260 |
138 // Initializes the command and distance prefix codes for the first block. | 261 /* Initializes the command and distance prefix codes for the first block. */ |
139 static void InitCommandPrefixCodes(uint8_t cmd_depths[128], | 262 static void InitCommandPrefixCodes(uint8_t cmd_depths[128], |
140 uint16_t cmd_bits[128], | 263 uint16_t cmd_bits[128], |
141 uint8_t cmd_code[512], | 264 uint8_t cmd_code[512], |
142 size_t* cmd_code_numbits) { | 265 size_t* cmd_code_numbits) { |
143 static const uint8_t kDefaultCommandDepths[128] = { | 266 static const uint8_t kDefaultCommandDepths[128] = { |
144 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, | 267 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, |
145 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, | 268 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, |
146 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6, | 269 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6, |
147 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, | 270 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, |
148 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 271 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
149 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, | 272 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, |
150 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10, | 273 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10, |
151 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, | 274 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, |
152 }; | 275 }; |
153 static const uint16_t kDefaultCommandBits[128] = { | 276 static const uint16_t kDefaultCommandBits[128] = { |
154 0, 0, 8, 9, 3, 35, 7, 71, | 277 0, 0, 8, 9, 3, 35, 7, 71, |
155 39, 103, 23, 47, 175, 111, 239, 31, | 278 39, 103, 23, 47, 175, 111, 239, 31, |
156 0, 0, 0, 4, 12, 2, 10, 6, | 279 0, 0, 0, 4, 12, 2, 10, 6, |
157 13, 29, 11, 43, 27, 59, 87, 55, | 280 13, 29, 11, 43, 27, 59, 87, 55, |
158 15, 79, 319, 831, 191, 703, 447, 959, | 281 15, 79, 319, 831, 191, 703, 447, 959, |
159 0, 14, 1, 25, 5, 21, 19, 51, | 282 0, 14, 1, 25, 5, 21, 19, 51, |
160 119, 159, 95, 223, 479, 991, 63, 575, | 283 119, 159, 95, 223, 479, 991, 63, 575, |
161 127, 639, 383, 895, 255, 767, 511, 1023, | 284 127, 639, 383, 895, 255, 767, 511, 1023, |
162 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 285 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
163 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12, | 286 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12, |
164 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255, | 287 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255, |
165 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095, | 288 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095, |
166 }; | 289 }; |
167 COPY_ARRAY(cmd_depths, kDefaultCommandDepths); | |
168 COPY_ARRAY(cmd_bits, kDefaultCommandBits); | |
169 | |
170 // Initialize the pre-compressed form of the command and distance prefix | |
171 // codes. | |
172 static const uint8_t kDefaultCommandCode[] = { | 290 static const uint8_t kDefaultCommandCode[] = { |
173 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6, | 291 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6, |
174 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c, | 292 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c, |
175 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa, | 293 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa, |
176 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94, | 294 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94, |
177 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00, | 295 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00, |
178 }; | 296 }; |
179 static const int kDefaultCommandCodeNumBits = 448; | 297 static const size_t kDefaultCommandCodeNumBits = 448; |
| 298 COPY_ARRAY(cmd_depths, kDefaultCommandDepths); |
| 299 COPY_ARRAY(cmd_bits, kDefaultCommandBits); |
| 300 |
| 301 /* Initialize the pre-compressed form of the command and distance prefix |
| 302 codes. */ |
180 COPY_ARRAY(cmd_code, kDefaultCommandCode); | 303 COPY_ARRAY(cmd_code, kDefaultCommandCode); |
181 *cmd_code_numbits = kDefaultCommandCodeNumBits; | 304 *cmd_code_numbits = kDefaultCommandCodeNumBits; |
182 } | 305 } |
183 | 306 |
184 // Decide about the context map based on the ability of the prediction | 307 /* Decide about the context map based on the ability of the prediction |
185 // ability of the previous byte UTF8-prefix on the next byte. The | 308 ability of the previous byte UTF8-prefix on the next byte. The |
186 // prediction ability is calculated as shannon entropy. Here we need | 309 prediction ability is calculated as Shannon entropy. Here we need |
187 // shannon entropy instead of 'BitsEntropy' since the prefix will be | 310 Shannon entropy instead of 'BitsEntropy' since the prefix will be |
188 // encoded with the remaining 6 bits of the following byte, and | 311 encoded with the remaining 6 bits of the following byte, and |
189 // BitsEntropy will assume that symbol to be stored alone using Huffman | 312 BitsEntropy will assume that symbol to be stored alone using Huffman |
190 // coding. | 313 coding. */ |
191 static void ChooseContextMap(int quality, | 314 static void ChooseContextMap(int quality, |
192 uint32_t* bigram_histo, | 315 uint32_t* bigram_histo, |
193 size_t* num_literal_contexts, | 316 size_t* num_literal_contexts, |
194 const uint32_t** literal_context_map) { | 317 const uint32_t** literal_context_map) { |
195 uint32_t monogram_histo[3] = { 0 }; | |
196 uint32_t two_prefix_histo[6] = { 0 }; | |
197 size_t total = 0; | |
198 for (size_t i = 0; i < 9; ++i) { | |
199 total += bigram_histo[i]; | |
200 monogram_histo[i % 3] += bigram_histo[i]; | |
201 size_t j = i; | |
202 if (j >= 6) { | |
203 j -= 6; | |
204 } | |
205 two_prefix_histo[j] += bigram_histo[i]; | |
206 } | |
207 size_t dummy; | |
208 double entropy1 = ShannonEntropy(monogram_histo, 3, &dummy); | |
209 double entropy2 = (ShannonEntropy(two_prefix_histo, 3, &dummy) + | |
210 ShannonEntropy(two_prefix_histo + 3, 3, &dummy)); | |
211 double entropy3 = 0; | |
212 for (size_t k = 0; k < 3; ++k) { | |
213 entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy); | |
214 } | |
215 | |
216 assert(total != 0); | |
217 double scale = 1.0 / static_cast<double>(total); | |
218 entropy1 *= scale; | |
219 entropy2 *= scale; | |
220 entropy3 *= scale; | |
221 | |
222 static const uint32_t kStaticContextMapContinuation[64] = { | 318 static const uint32_t kStaticContextMapContinuation[64] = { |
223 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 319 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
226 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 322 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
227 }; | 323 }; |
228 static const uint32_t kStaticContextMapSimpleUTF8[64] = { | 324 static const uint32_t kStaticContextMapSimpleUTF8[64] = { |
229 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 325 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
230 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 326 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
231 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
232 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
233 }; | 329 }; |
234 if (quality < 7) { | 330 |
235 // 3 context models is a bit slower, don't use it at lower qualities. | 331 uint32_t monogram_histo[3] = { 0 }; |
236 entropy3 = entropy1 * 10; | 332 uint32_t two_prefix_histo[6] = { 0 }; |
| 333 size_t total = 0; |
| 334 size_t i; |
| 335 size_t dummy; |
| 336 double entropy[4]; |
| 337 for (i = 0; i < 9; ++i) { |
| 338 size_t j = i; |
| 339 total += bigram_histo[i]; |
| 340 monogram_histo[i % 3] += bigram_histo[i]; |
| 341 if (j >= 6) { |
| 342 j -= 6; |
| 343 } |
| 344 two_prefix_histo[j] += bigram_histo[i]; |
237 } | 345 } |
238 // If expected savings by symbol are less than 0.2 bits, skip the | 346 entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy); |
239 // context modeling -- in exchange for faster decoding speed. | 347 entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) + |
240 if (entropy1 - entropy2 < 0.2 && | 348 ShannonEntropy(two_prefix_histo + 3, 3, &dummy)); |
241 entropy1 - entropy3 < 0.2) { | 349 entropy[3] = 0; |
| 350 for (i = 0; i < 3; ++i) { |
| 351 entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy); |
| 352 } |
| 353 |
| 354 assert(total != 0); |
| 355 entropy[0] = 1.0 / (double)total; |
| 356 entropy[1] *= entropy[0]; |
| 357 entropy[2] *= entropy[0]; |
| 358 entropy[3] *= entropy[0]; |
| 359 |
| 360 if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) { |
| 361 /* 3 context models is a bit slower, don't use it at lower qualities. */ |
| 362 entropy[3] = entropy[1] * 10; |
| 363 } |
| 364 /* If expected savings by symbol are less than 0.2 bits, skip the |
| 365 context modeling -- in exchange for faster decoding speed. */ |
| 366 if (entropy[1] - entropy[2] < 0.2 && |
| 367 entropy[1] - entropy[3] < 0.2) { |
242 *num_literal_contexts = 1; | 368 *num_literal_contexts = 1; |
243 } else if (entropy2 - entropy3 < 0.02) { | 369 } else if (entropy[2] - entropy[3] < 0.02) { |
244 *num_literal_contexts = 2; | 370 *num_literal_contexts = 2; |
245 *literal_context_map = kStaticContextMapSimpleUTF8; | 371 *literal_context_map = kStaticContextMapSimpleUTF8; |
246 } else { | 372 } else { |
247 *num_literal_contexts = 3; | 373 *num_literal_contexts = 3; |
248 *literal_context_map = kStaticContextMapContinuation; | 374 *literal_context_map = kStaticContextMapContinuation; |
249 } | 375 } |
250 } | 376 } |
251 | 377 |
252 static void DecideOverLiteralContextModeling( | 378 static void DecideOverLiteralContextModeling(const uint8_t* input, |
253 const uint8_t* input, | 379 size_t start_pos, size_t length, size_t mask, int quality, |
254 size_t start_pos, | 380 ContextType* literal_context_mode, size_t* num_literal_contexts, |
255 size_t length, | |
256 size_t mask, | |
257 int quality, | |
258 ContextType* literal_context_mode, | |
259 size_t* num_literal_contexts, | |
260 const uint32_t** literal_context_map) { | 381 const uint32_t** literal_context_map) { |
261 if (quality < kMinQualityForContextModeling || length < 64) { | 382 if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) { |
262 return; | 383 return; |
| 384 } else { |
| 385 /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of |
| 386 UTF8 data faster we only examine 64 byte long strides at every 4kB |
| 387 intervals. */ |
| 388 const size_t end_pos = start_pos + length; |
| 389 uint32_t bigram_prefix_histo[9] = { 0 }; |
| 390 for (; start_pos + 64 <= end_pos; start_pos += 4096) { |
| 391 static const int lut[4] = { 0, 0, 1, 2 }; |
| 392 const size_t stride_end_pos = start_pos + 64; |
| 393 int prev = lut[input[start_pos & mask] >> 6] * 3; |
| 394 size_t pos; |
| 395 for (pos = start_pos + 1; pos < stride_end_pos; ++pos) { |
| 396 const uint8_t literal = input[pos & mask]; |
| 397 ++bigram_prefix_histo[prev + lut[literal >> 6]]; |
| 398 prev = lut[literal >> 6] * 3; |
| 399 } |
| 400 } |
| 401 *literal_context_mode = CONTEXT_UTF8; |
| 402 ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts, |
| 403 literal_context_map); |
263 } | 404 } |
264 // Gather bigram data of the UTF8 byte prefixes. To make the analysis of | |
265 // UTF8 data faster we only examine 64 byte long strides at every 4kB | |
266 // intervals. | |
267 const size_t end_pos = start_pos + length; | |
268 uint32_t bigram_prefix_histo[9] = { 0 }; | |
269 for (; start_pos + 64 <= end_pos; start_pos += 4096) { | |
270 static const int lut[4] = { 0, 0, 1, 2 }; | |
271 const size_t stride_end_pos = start_pos + 64; | |
272 int prev = lut[input[start_pos & mask] >> 6] * 3; | |
273 for (size_t pos = start_pos + 1; pos < stride_end_pos; ++pos) { | |
274 const uint8_t literal = input[pos & mask]; | |
275 ++bigram_prefix_histo[prev + lut[literal >> 6]]; | |
276 prev = lut[literal >> 6] * 3; | |
277 } | |
278 } | |
279 *literal_context_mode = CONTEXT_UTF8; | |
280 ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts, | |
281 literal_context_map); | |
282 } | 405 } |
283 | 406 |
284 static bool ShouldCompress(const uint8_t* data, | 407 static BROTLI_BOOL ShouldCompress( |
285 const size_t mask, | 408 const uint8_t* data, const size_t mask, const uint64_t last_flush_pos, |
286 const uint64_t last_flush_pos, | 409 const size_t bytes, const size_t num_literals, const size_t num_commands) { |
287 const size_t bytes, | |
288 const size_t num_literals, | |
289 const size_t num_commands) { | |
290 if (num_commands < (bytes >> 8) + 2) { | 410 if (num_commands < (bytes >> 8) + 2) { |
291 if (num_literals > 0.99 * static_cast<double>(bytes)) { | 411 if (num_literals > 0.99 * (double)bytes) { |
292 uint32_t literal_histo[256] = { 0 }; | 412 uint32_t literal_histo[256] = { 0 }; |
293 static const uint32_t kSampleRate = 13; | 413 static const uint32_t kSampleRate = 13; |
294 static const double kMinEntropy = 7.92; | 414 static const double kMinEntropy = 7.92; |
295 const double bit_cost_threshold = | 415 const double bit_cost_threshold = |
296 static_cast<double>(bytes) * kMinEntropy / kSampleRate; | 416 (double)bytes * kMinEntropy / kSampleRate; |
297 size_t t = (bytes + kSampleRate - 1) / kSampleRate; | 417 size_t t = (bytes + kSampleRate - 1) / kSampleRate; |
298 uint32_t pos = static_cast<uint32_t>(last_flush_pos); | 418 uint32_t pos = (uint32_t)last_flush_pos; |
299 for (size_t i = 0; i < t; i++) { | 419 size_t i; |
| 420 for (i = 0; i < t; i++) { |
300 ++literal_histo[data[pos & mask]]; | 421 ++literal_histo[data[pos & mask]]; |
301 pos += kSampleRate; | 422 pos += kSampleRate; |
302 } | 423 } |
303 if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) { | 424 if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) { |
304 return false; | 425 return BROTLI_FALSE; |
305 } | 426 } |
306 } | 427 } |
307 } | 428 } |
308 return true; | 429 return BROTLI_TRUE; |
309 } | 430 } |
310 | 431 |
311 static void WriteMetaBlockInternal(const uint8_t* data, | 432 static void WriteMetaBlockInternal(MemoryManager* m, |
| 433 const uint8_t* data, |
312 const size_t mask, | 434 const size_t mask, |
313 const uint64_t last_flush_pos, | 435 const uint64_t last_flush_pos, |
314 const size_t bytes, | 436 const size_t bytes, |
315 const bool is_last, | 437 const BROTLI_BOOL is_last, |
316 const int quality, | 438 const BrotliEncoderParams* params, |
317 const bool font_mode, | |
318 const uint8_t prev_byte, | 439 const uint8_t prev_byte, |
319 const uint8_t prev_byte2, | 440 const uint8_t prev_byte2, |
320 const size_t num_literals, | 441 const size_t num_literals, |
321 const size_t num_commands, | 442 const size_t num_commands, |
322 Command* commands, | 443 Command* commands, |
323 const int* saved_dist_cache, | 444 const int* saved_dist_cache, |
324 int* dist_cache, | 445 int* dist_cache, |
325 size_t* storage_ix, | 446 size_t* storage_ix, |
326 uint8_t* storage) { | 447 uint8_t* storage) { |
| 448 const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos); |
| 449 uint8_t last_byte; |
| 450 uint8_t last_byte_bits; |
| 451 uint32_t num_direct_distance_codes = 0; |
| 452 uint32_t distance_postfix_bits = 0; |
| 453 |
327 if (bytes == 0) { | 454 if (bytes == 0) { |
328 // Write the ISLAST and ISEMPTY bits. | 455 /* Write the ISLAST and ISEMPTY bits. */ |
329 WriteBits(2, 3, storage_ix, storage); | 456 BrotliWriteBits(2, 3, storage_ix, storage); |
330 *storage_ix = (*storage_ix + 7u) & ~7u; | 457 *storage_ix = (*storage_ix + 7u) & ~7u; |
331 return; | 458 return; |
332 } | 459 } |
333 | 460 |
334 if (!ShouldCompress(data, mask, last_flush_pos, bytes, | 461 if (!ShouldCompress(data, mask, last_flush_pos, bytes, |
335 num_literals, num_commands)) { | 462 num_literals, num_commands)) { |
336 // Restore the distance cache, as its last update by | 463 /* Restore the distance cache, as its last update by |
337 // CreateBackwardReferences is now unused. | 464 CreateBackwardReferences is now unused. */ |
338 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); | 465 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); |
339 StoreUncompressedMetaBlock(is_last, data, | 466 BrotliStoreUncompressedMetaBlock(is_last, data, |
340 WrapPosition(last_flush_pos), mask, bytes, | 467 wrapped_last_flush_pos, mask, bytes, |
341 storage_ix, storage); | 468 storage_ix, storage); |
342 return; | 469 return; |
343 } | 470 } |
344 | 471 |
345 const uint8_t last_byte = storage[0]; | 472 last_byte = storage[0]; |
346 const uint8_t last_byte_bits = static_cast<uint8_t>(*storage_ix & 0xff); | 473 last_byte_bits = (uint8_t)(*storage_ix & 0xff); |
347 uint32_t num_direct_distance_codes = 0; | 474 if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES && |
348 uint32_t distance_postfix_bits = 0; | 475 params->mode == BROTLI_MODE_FONT) { |
349 if (quality > 9 && font_mode) { | |
350 num_direct_distance_codes = 12; | 476 num_direct_distance_codes = 12; |
351 distance_postfix_bits = 1; | 477 distance_postfix_bits = 1; |
352 RecomputeDistancePrefixes(commands, | 478 RecomputeDistancePrefixes(commands, |
353 num_commands, | 479 num_commands, |
354 num_direct_distance_codes, | 480 num_direct_distance_codes, |
355 distance_postfix_bits); | 481 distance_postfix_bits); |
356 } | 482 } |
357 if (quality == 2) { | 483 if (params->quality <= MAX_QUALITY_FOR_STATIC_ENRTOPY_CODES) { |
358 StoreMetaBlockFast(data, WrapPosition(last_flush_pos), | 484 BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos, |
359 bytes, mask, is_last, | 485 bytes, mask, is_last, |
360 commands, num_commands, | 486 commands, num_commands, |
361 storage_ix, storage); | 487 storage_ix, storage); |
362 } else if (quality < kMinQualityForBlockSplit) { | 488 if (BROTLI_IS_OOM(m)) return; |
363 StoreMetaBlockTrivial(data, WrapPosition(last_flush_pos), | 489 } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) { |
364 bytes, mask, is_last, | 490 BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos, |
365 commands, num_commands, | 491 bytes, mask, is_last, |
366 storage_ix, storage); | 492 commands, num_commands, |
| 493 storage_ix, storage); |
| 494 if (BROTLI_IS_OOM(m)) return; |
367 } else { | 495 } else { |
| 496 ContextType literal_context_mode = CONTEXT_UTF8; |
368 MetaBlockSplit mb; | 497 MetaBlockSplit mb; |
369 ContextType literal_context_mode = CONTEXT_UTF8; | 498 InitMetaBlockSplit(&mb); |
370 if (quality <= 9) { | 499 if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) { |
371 size_t num_literal_contexts = 1; | 500 size_t num_literal_contexts = 1; |
372 const uint32_t* literal_context_map = NULL; | 501 const uint32_t* literal_context_map = NULL; |
373 DecideOverLiteralContextModeling(data, WrapPosition(last_flush_pos), | 502 DecideOverLiteralContextModeling(data, wrapped_last_flush_pos, |
374 bytes, mask, | 503 bytes, mask, |
375 quality, | 504 params->quality, |
376 &literal_context_mode, | 505 &literal_context_mode, |
377 &num_literal_contexts, | 506 &num_literal_contexts, |
378 &literal_context_map); | 507 &literal_context_map); |
379 if (literal_context_map == NULL) { | 508 if (literal_context_map == NULL) { |
380 BuildMetaBlockGreedy(data, WrapPosition(last_flush_pos), mask, | 509 BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask, |
381 commands, num_commands, &mb); | 510 commands, num_commands, &mb); |
| 511 if (BROTLI_IS_OOM(m)) return; |
382 } else { | 512 } else { |
383 BuildMetaBlockGreedyWithContexts(data, WrapPosition(last_flush_pos), | 513 BrotliBuildMetaBlockGreedyWithContexts(m, data, |
384 mask, | 514 wrapped_last_flush_pos, |
385 prev_byte, prev_byte2, | 515 mask, |
386 literal_context_mode, | 516 prev_byte, prev_byte2, |
387 num_literal_contexts, | 517 literal_context_mode, |
388 literal_context_map, | 518 num_literal_contexts, |
389 commands, num_commands, | 519 literal_context_map, |
390 &mb); | 520 commands, num_commands, |
| 521 &mb); |
| 522 if (BROTLI_IS_OOM(m)) return; |
391 } | 523 } |
392 } else { | 524 } else { |
393 if (!IsMostlyUTF8(data, WrapPosition(last_flush_pos), mask, bytes, | 525 if (!BrotliIsMostlyUTF8(data, wrapped_last_flush_pos, mask, bytes, |
394 kMinUTF8Ratio)) { | 526 kMinUTF8Ratio)) { |
395 literal_context_mode = CONTEXT_SIGNED; | 527 literal_context_mode = CONTEXT_SIGNED; |
396 } | 528 } |
397 BuildMetaBlock(data, WrapPosition(last_flush_pos), mask, | 529 BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, params, |
398 prev_byte, prev_byte2, | 530 prev_byte, prev_byte2, |
399 commands, num_commands, | 531 commands, num_commands, |
400 literal_context_mode, | 532 literal_context_mode, |
401 &mb); | 533 &mb); |
402 } | 534 if (BROTLI_IS_OOM(m)) return; |
403 if (quality >= kMinQualityForOptimizeHistograms) { | 535 } |
404 OptimizeHistograms(num_direct_distance_codes, | 536 if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) { |
| 537 BrotliOptimizeHistograms(num_direct_distance_codes, |
| 538 distance_postfix_bits, |
| 539 &mb); |
| 540 } |
| 541 BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask, |
| 542 prev_byte, prev_byte2, |
| 543 is_last, |
| 544 num_direct_distance_codes, |
405 distance_postfix_bits, | 545 distance_postfix_bits, |
406 &mb); | 546 literal_context_mode, |
407 } | 547 commands, num_commands, |
408 StoreMetaBlock(data, WrapPosition(last_flush_pos), bytes, mask, | 548 &mb, |
409 prev_byte, prev_byte2, | 549 storage_ix, storage); |
410 is_last, | 550 if (BROTLI_IS_OOM(m)) return; |
411 num_direct_distance_codes, | 551 DestroyMetaBlockSplit(m, &mb); |
412 distance_postfix_bits, | |
413 literal_context_mode, | |
414 commands, num_commands, | |
415 mb, | |
416 storage_ix, storage); | |
417 } | 552 } |
418 if (bytes + 4 < (*storage_ix >> 3)) { | 553 if (bytes + 4 < (*storage_ix >> 3)) { |
419 // Restore the distance cache and last byte. | 554 /* Restore the distance cache and last byte. */ |
420 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); | 555 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); |
421 storage[0] = last_byte; | 556 storage[0] = last_byte; |
422 *storage_ix = last_byte_bits; | 557 *storage_ix = last_byte_bits; |
423 StoreUncompressedMetaBlock(is_last, data, | 558 BrotliStoreUncompressedMetaBlock(is_last, data, |
424 WrapPosition(last_flush_pos), mask, | 559 wrapped_last_flush_pos, mask, |
425 bytes, storage_ix, storage); | 560 bytes, storage_ix, storage); |
426 } | 561 } |
427 } | 562 } |
428 | 563 |
429 BrotliCompressor::BrotliCompressor(BrotliParams params) | 564 static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) { |
430 : params_(params), | 565 if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE; |
431 hashers_(new Hashers()), | 566 if (s->is_initialized_) return BROTLI_TRUE; |
432 input_pos_(0), | 567 |
433 num_commands_(0), | 568 SanitizeParams(&s->params); |
434 num_literals_(0), | 569 s->params.lgblock = ComputeLgBlock(&s->params); |
435 last_insert_len_(0), | 570 |
436 last_flush_pos_(0), | 571 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX; |
437 last_processed_pos_(0), | 572 |
438 prev_byte_(0), | 573 RingBufferSetup(&s->params, &s->ringbuffer_); |
439 prev_byte2_(0), | 574 |
440 storage_size_(0), | 575 /* Initialize last byte with stream header. */ |
441 storage_(0), | 576 { |
442 large_table_(NULL), | 577 int lgwin = s->params.lgwin; |
443 cmd_code_numbits_(0), | 578 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || |
444 command_buf_(NULL), | 579 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { |
445 literal_buf_(NULL), | 580 lgwin = BROTLI_MAX(int, lgwin, 18); |
446 is_last_block_emitted_(0) { | 581 } |
447 // Sanitize params. | 582 EncodeWindowBits(lgwin, &s->last_byte_, &s->last_byte_bits_); |
448 params_.quality = std::max(0, params_.quality); | 583 } |
449 if (params_.lgwin < kMinWindowBits) { | 584 |
450 params_.lgwin = kMinWindowBits; | 585 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { |
451 } else if (params_.lgwin > kMaxWindowBits) { | 586 InitCommandPrefixCodes(s->cmd_depths_, s->cmd_bits_, |
452 params_.lgwin = kMaxWindowBits; | 587 s->cmd_code_, &s->cmd_code_numbits_); |
453 } | 588 } |
454 if (params_.quality <= 1) { | 589 |
455 params_.lgblock = params_.lgwin; | 590 /* Initialize hashers. */ |
456 } else if (params_.quality < kMinQualityForBlockSplit) { | 591 HashersSetup(&s->memory_manager_, &s->hashers_, ChooseHasher(&s->params)); |
457 params_.lgblock = 14; | 592 if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE; |
458 } else if (params_.lgblock == 0) { | 593 |
459 params_.lgblock = 16; | 594 s->is_initialized_ = BROTLI_TRUE; |
460 if (params_.quality >= 9 && params_.lgwin > params_.lgblock) { | 595 return BROTLI_TRUE; |
461 params_.lgblock = std::min(18, params_.lgwin); | 596 } |
462 } | 597 |
| 598 static void BrotliEncoderInitState(BrotliEncoderState* s) { |
| 599 s->params.mode = BROTLI_DEFAULT_MODE; |
| 600 s->params.quality = BROTLI_DEFAULT_QUALITY; |
| 601 s->params.lgwin = BROTLI_DEFAULT_WINDOW; |
| 602 s->params.lgblock = 0; |
| 603 |
| 604 s->input_pos_ = 0; |
| 605 s->num_commands_ = 0; |
| 606 s->num_literals_ = 0; |
| 607 s->last_insert_len_ = 0; |
| 608 s->last_flush_pos_ = 0; |
| 609 s->last_processed_pos_ = 0; |
| 610 s->prev_byte_ = 0; |
| 611 s->prev_byte2_ = 0; |
| 612 s->storage_size_ = 0; |
| 613 s->storage_ = 0; |
| 614 s->large_table_ = NULL; |
| 615 s->large_table_size_ = 0; |
| 616 s->cmd_code_numbits_ = 0; |
| 617 s->command_buf_ = NULL; |
| 618 s->literal_buf_ = NULL; |
| 619 s->next_out_ = NULL; |
| 620 s->available_out_ = 0; |
| 621 s->total_out_ = 0; |
| 622 s->stream_state_ = BROTLI_STREAM_PROCESSING; |
| 623 s->is_last_block_emitted_ = BROTLI_FALSE; |
| 624 s->is_initialized_ = BROTLI_FALSE; |
| 625 |
| 626 InitHashers(&s->hashers_); |
| 627 |
| 628 RingBufferInit(&s->ringbuffer_); |
| 629 |
| 630 s->commands_ = 0; |
| 631 s->cmd_alloc_size_ = 0; |
| 632 |
| 633 /* Initialize distance cache. */ |
| 634 s->dist_cache_[0] = 4; |
| 635 s->dist_cache_[1] = 11; |
| 636 s->dist_cache_[2] = 15; |
| 637 s->dist_cache_[3] = 16; |
| 638 /* Save the state of the distance cache in case we need to restore it for |
| 639 emitting an uncompressed block. */ |
| 640 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->dist_cache_)); |
| 641 } |
| 642 |
| 643 BrotliEncoderState* BrotliEncoderCreateInstance(brotli_alloc_func alloc_func, |
| 644 brotli_free_func free_func, |
| 645 void* opaque) { |
| 646 BrotliEncoderState* state = 0; |
| 647 if (!alloc_func && !free_func) { |
| 648 state = (BrotliEncoderState*)malloc(sizeof(BrotliEncoderState)); |
| 649 } else if (alloc_func && free_func) { |
| 650 state = (BrotliEncoderState*)alloc_func(opaque, sizeof(BrotliEncoderState)); |
| 651 } |
| 652 if (state == 0) { |
| 653 /* BROTLI_DUMP(); */ |
| 654 return 0; |
| 655 } |
| 656 BrotliInitMemoryManager( |
| 657 &state->memory_manager_, alloc_func, free_func, opaque); |
| 658 BrotliEncoderInitState(state); |
| 659 return state; |
| 660 } |
| 661 |
| 662 static void BrotliEncoderCleanupState(BrotliEncoderState* s) { |
| 663 MemoryManager* m = &s->memory_manager_; |
| 664 if (BROTLI_IS_OOM(m)) { |
| 665 BrotliWipeOutMemoryManager(m); |
| 666 return; |
| 667 } |
| 668 BROTLI_FREE(m, s->storage_); |
| 669 BROTLI_FREE(m, s->commands_); |
| 670 RingBufferFree(m, &s->ringbuffer_); |
| 671 DestroyHashers(m, &s->hashers_); |
| 672 BROTLI_FREE(m, s->large_table_); |
| 673 BROTLI_FREE(m, s->command_buf_); |
| 674 BROTLI_FREE(m, s->literal_buf_); |
| 675 } |
| 676 |
| 677 /* Deinitializes and frees BrotliEncoderState instance. */ |
| 678 void BrotliEncoderDestroyInstance(BrotliEncoderState* state) { |
| 679 if (!state) { |
| 680 return; |
463 } else { | 681 } else { |
464 params_.lgblock = std::min(kMaxInputBlockBits, | 682 MemoryManager* m = &state->memory_manager_; |
465 std::max(kMinInputBlockBits, params_.lgblock)); | 683 brotli_free_func free_func = m->free_func; |
466 } | 684 void* opaque = m->opaque; |
467 | 685 BrotliEncoderCleanupState(state); |
468 // Initialize input and literal cost ring buffers. | 686 free_func(opaque, state); |
469 // We allocate at least lgwin + 1 bits for the ring buffer so that the newly | 687 } |
470 // added block fits there completely and we still get lgwin bits and at least | 688 } |
471 // read_block_size_bits + 1 bits because the copy tail length needs to be | 689 |
472 // smaller than ringbuffer size. | 690 /* |
473 int ringbuffer_bits = std::max(params_.lgwin + 1, params_.lgblock + 1); | 691 Copies the given input data to the internal ring buffer of the compressor. |
474 ringbuffer_ = new RingBuffer(ringbuffer_bits, params_.lgblock); | 692 No processing of the data occurs at this time and this function can be |
475 | 693 called multiple times before calling WriteBrotliData() to process the |
476 commands_ = 0; | 694 accumulated input. At most input_block_size() bytes of input data can be |
477 cmd_alloc_size_ = 0; | 695 copied to the ring buffer, otherwise the next WriteBrotliData() will fail. |
478 | 696 */ |
479 // Initialize last byte with stream header. | 697 static void CopyInputToRingBuffer(BrotliEncoderState* s, |
480 EncodeWindowBits(params_.lgwin, &last_byte_, &last_byte_bits_); | 698 const size_t input_size, |
481 | 699 const uint8_t* input_buffer) { |
482 // Initialize distance cache. | 700 RingBuffer* ringbuffer_ = &s->ringbuffer_; |
483 dist_cache_[0] = 4; | 701 MemoryManager* m = &s->memory_manager_; |
484 dist_cache_[1] = 11; | 702 if (!EnsureInitialized(s)) return; |
485 dist_cache_[2] = 15; | 703 RingBufferWrite(m, input_buffer, input_size, ringbuffer_); |
486 dist_cache_[3] = 16; | 704 if (BROTLI_IS_OOM(m)) return; |
487 // Save the state of the distance cache in case we need to restore it for | 705 s->input_pos_ += input_size; |
488 // emitting an uncompressed block. | 706 |
489 memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_)); | 707 /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the |
490 | 708 hashing not depend on uninitialized data. This makes compression |
491 if (params_.quality == 0) { | 709 deterministic and it prevents uninitialized memory warnings in Valgrind. |
492 InitCommandPrefixCodes(cmd_depths_, cmd_bits_, | 710 Even without erasing, the output would be valid (but nondeterministic). |
493 cmd_code_, &cmd_code_numbits_); | 711 |
494 } else if (params_.quality == 1) { | 712 Background information: The compressor stores short (at most 8 bytes) |
495 command_buf_ = new uint32_t[kCompressFragmentTwoPassBlockSize]; | 713 substrings of the input already read in a hash table, and detects |
496 literal_buf_ = new uint8_t[kCompressFragmentTwoPassBlockSize]; | 714 repetitions by looking up such substrings in the hash table. If it |
497 } | 715 can find a substring, it checks whether the substring is really there |
498 | 716 in the ring buffer (or it's just a hash collision). Should the hash |
499 // Initialize hashers. | 717 table become corrupt, this check makes sure that the output is |
500 hash_type_ = std::min(10, params_.quality); | 718 still valid, albeit the compression ratio would be bad. |
501 hashers_->Init(hash_type_); | 719 |
502 } | 720 The compressor populates the hash table from the ring buffer as it's |
503 | 721 reading new bytes from the input. However, at the last few indexes of |
504 BrotliCompressor::~BrotliCompressor(void) { | 722 the ring buffer, there are not enough bytes to build full-length |
505 delete[] storage_; | 723 substrings from. Since the hash table always contains full-length |
506 free(commands_); | 724 substrings, we erase with dummy zeros here to make sure that those |
507 delete ringbuffer_; | 725 substrings will contain zeros at the end instead of uninitialized |
508 delete hashers_; | 726 data. |
509 delete[] large_table_; | 727 |
510 delete[] command_buf_; | 728 Please note that erasing is not necessary (because the |
511 delete[] literal_buf_; | 729 memory region is already initialized since he ring buffer |
512 } | 730 has a `tail' that holds a copy of the beginning,) so we |
513 | 731 skip erasing if we have already gone around at least once in |
514 void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size, | 732 the ring buffer. |
515 const uint8_t* input_buffer) { | 733 |
516 ringbuffer_->Write(input_buffer, input_size); | 734 Only clear during the first round of ring-buffer writes. On |
517 input_pos_ += input_size; | 735 subsequent rounds data in the ring-buffer would be affected. */ |
518 | 736 if (ringbuffer_->pos_ <= ringbuffer_->mask_) { |
519 // TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the | 737 /* This is the first time when the ring buffer is being written. |
520 // hashing not depend on uninitialized data. This makes compression | 738 We clear 7 bytes just after the bytes that have been copied from |
521 // deterministic and it prevents uninitialized memory warnings in Valgrind. | 739 the input buffer. |
522 // Even without erasing, the output would be valid (but nondeterministic). | 740 |
523 // | 741 The ring-buffer has a "tail" that holds a copy of the beginning, |
524 // Background information: The compressor stores short (at most 8 bytes) | 742 but only once the ring buffer has been fully written once, i.e., |
525 // substrings of the input already read in a hash table, and detects | 743 pos <= mask. For the first time, we need to write values |
526 // repetitions by looking up such substrings in the hash table. If it | 744 in this tail (where index may be larger than mask), so that |
527 // can find a substring, it checks whether the substring is really there | 745 we have exactly defined behavior and don't read uninitialized |
528 // in the ring buffer (or it's just a hash collision). Should the hash | 746 memory. Due to performance reasons, hashing reads data using a |
529 // table become corrupt, this check makes sure that the output is | 747 LOAD64, which can go 7 bytes beyond the bytes written in the |
530 // still valid, albeit the compression ratio would be bad. | 748 ring-buffer. */ |
531 // | 749 memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7); |
532 // The compressor populates the hash table from the ring buffer as it's | 750 } |
533 // reading new bytes from the input. However, at the last few indexes of | 751 } |
534 // the ring buffer, there are not enough bytes to build full-length | 752 |
535 // substrings from. Since the hash table always contains full-length | 753 void BrotliEncoderSetCustomDictionary(BrotliEncoderState* s, size_t size, |
536 // substrings, we erase with dummy 0s here to make sure that those | 754 const uint8_t* dict) { |
537 // substrings will contain 0s at the end instead of uninitialized | 755 size_t max_dict_size = MaxBackwardLimit(s->params.lgwin); |
538 // data. | 756 size_t dict_size = size; |
539 // | 757 MemoryManager* m = &s->memory_manager_; |
540 // Please note that erasing is not necessary (because the | 758 |
541 // memory region is already initialized since he ring buffer | 759 if (!EnsureInitialized(s)) return; |
542 // has a `tail' that holds a copy of the beginning,) so we | 760 |
543 // skip erasing if we have already gone around at least once in | 761 if (dict_size == 0 || |
544 // the ring buffer. | 762 s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || |
545 size_t pos = ringbuffer_->position(); | 763 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { |
546 // Only clear during the first round of ringbuffer writes. On | 764 return; |
547 // subsequent rounds data in the ringbuffer would be affected. | 765 } |
548 if (pos <= ringbuffer_->mask()) { | 766 if (size > max_dict_size) { |
549 // This is the first time when the ring buffer is being written. | 767 dict += size - max_dict_size; |
550 // We clear 7 bytes just after the bytes that have been copied from | 768 dict_size = max_dict_size; |
551 // the input buffer. | 769 } |
552 // | 770 CopyInputToRingBuffer(s, dict_size, dict); |
553 // The ringbuffer has a "tail" that holds a copy of the beginning, | 771 s->last_flush_pos_ = dict_size; |
554 // but only once the ring buffer has been fully written once, i.e., | 772 s->last_processed_pos_ = dict_size; |
555 // pos <= mask. For the first time, we need to write values | 773 if (dict_size > 0) { |
556 // in this tail (where index may be larger than mask), so that | 774 s->prev_byte_ = dict[dict_size - 1]; |
557 // we have exactly defined behavior and don't read un-initialized | 775 } |
558 // memory. Due to performance reasons, hashing reads data using a | 776 if (dict_size > 1) { |
559 // LOAD64, which can go 7 bytes beyond the bytes written in the | 777 s->prev_byte2_ = dict[dict_size - 2]; |
560 // ringbuffer. | 778 } |
561 memset(ringbuffer_->start() + pos, 0, 7); | 779 HashersPrependCustomDictionary(m, &s->hashers_, &s->params, dict_size, dict); |
562 } | 780 if (BROTLI_IS_OOM(m)) return; |
563 } | 781 } |
564 | 782 |
565 void BrotliCompressor::BrotliSetCustomDictionary( | 783 /* Marks all input as processed. |
566 const size_t size, const uint8_t* dict) { | 784 Returns true if position wrapping occurs. */ |
567 CopyInputToRingBuffer(size, dict); | 785 static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) { |
568 last_flush_pos_ = size; | 786 uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_); |
569 last_processed_pos_ = size; | 787 uint32_t wrapped_input_pos = WrapPosition(s->input_pos_); |
570 if (size > 0) { | 788 s->last_processed_pos_ = s->input_pos_; |
571 prev_byte_ = dict[size - 1]; | 789 return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos); |
572 } | 790 } |
573 if (size > 1) { | 791 |
574 prev_byte2_ = dict[size - 2]; | 792 /* |
575 } | 793 Processes the accumulated input data and sets |*out_size| to the length of |
576 hashers_->PrependCustomDictionary(hash_type_, params_.lgwin, size, dict); | 794 the new output meta-block, or to zero if no new output meta-block has been |
577 } | 795 created (in this case the processed input data is buffered internally). |
578 | 796 If |*out_size| is positive, |*output| points to the start of the output |
579 bool BrotliCompressor::WriteBrotliData(const bool is_last, | 797 data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is |
580 const bool force_flush, | 798 always created. However, until |is_last| is BROTLI_TRUE encoder may retain up |
581 size_t* out_size, | 799 to 7 bits of the last byte of output. To force encoder to dump the remaining |
582 uint8_t** output) { | 800 bits use WriteMetadata() to append an empty meta-data block. |
583 const uint64_t delta = input_pos_ - last_processed_pos_; | 801 Returns BROTLI_FALSE if the size of the input data is larger than |
584 const uint8_t* data = ringbuffer_->start(); | 802 input_block_size(). |
585 const uint32_t mask = ringbuffer_->mask(); | 803 */ |
| 804 static BROTLI_BOOL EncodeData( |
| 805 BrotliEncoderState* s, const BROTLI_BOOL is_last, |
| 806 const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) { |
| 807 const uint64_t delta = UnprocessedInputSize(s); |
| 808 const uint32_t bytes = (uint32_t)delta; |
| 809 const uint32_t wrapped_last_processed_pos = |
| 810 WrapPosition(s->last_processed_pos_); |
| 811 uint8_t* data; |
| 812 uint32_t mask; |
| 813 MemoryManager* m = &s->memory_manager_; |
| 814 |
| 815 if (!EnsureInitialized(s)) return BROTLI_FALSE; |
| 816 data = s->ringbuffer_.buffer_; |
| 817 mask = s->ringbuffer_.mask_; |
586 | 818 |
587 /* Adding more blocks after "last" block is forbidden. */ | 819 /* Adding more blocks after "last" block is forbidden. */ |
588 if (is_last_block_emitted_) return false; | 820 if (s->is_last_block_emitted_) return BROTLI_FALSE; |
589 if (is_last) is_last_block_emitted_ = 1; | 821 if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE; |
590 | 822 |
591 if (delta > input_block_size()) { | 823 if (delta > InputBlockSize(s)) { |
592 return false; | 824 return BROTLI_FALSE; |
593 } | 825 } |
594 const uint32_t bytes = static_cast<uint32_t>(delta); | 826 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY && |
595 | 827 !s->command_buf_) { |
596 if (params_.quality <= 1) { | 828 s->command_buf_ = |
| 829 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize); |
| 830 s->literal_buf_ = |
| 831 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize); |
| 832 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
| 833 } |
| 834 |
| 835 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || |
| 836 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { |
| 837 uint8_t* storage; |
| 838 size_t storage_ix = s->last_byte_bits_; |
| 839 size_t table_size; |
| 840 int* table; |
| 841 |
597 if (delta == 0 && !is_last) { | 842 if (delta == 0 && !is_last) { |
598 // We have no new input data and we don't have to finish the stream, so | 843 /* We have no new input data and we don't have to finish the stream, so |
599 // nothing to do. | 844 nothing to do. */ |
600 *out_size = 0; | 845 *out_size = 0; |
601 return true; | 846 return BROTLI_TRUE; |
602 } | 847 } |
603 const size_t max_out_size = 2 * bytes + 500; | 848 storage = GetBrotliStorage(s, 2 * bytes + 502); |
604 uint8_t* storage = GetBrotliStorage(max_out_size); | 849 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
605 storage[0] = last_byte_; | 850 storage[0] = s->last_byte_; |
606 size_t storage_ix = last_byte_bits_; | 851 table = GetHashTable(s, s->params.quality, bytes, &table_size); |
607 size_t table_size; | 852 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
608 int* table = GetHashTable(params_.quality, bytes, &table_size); | 853 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { |
609 if (params_.quality == 0) { | |
610 BrotliCompressFragmentFast( | 854 BrotliCompressFragmentFast( |
611 &data[WrapPosition(last_processed_pos_) & mask], | 855 m, &data[wrapped_last_processed_pos & mask], |
612 bytes, is_last, | 856 bytes, is_last, |
613 table, table_size, | 857 table, table_size, |
614 cmd_depths_, cmd_bits_, | 858 s->cmd_depths_, s->cmd_bits_, |
615 &cmd_code_numbits_, cmd_code_, | 859 &s->cmd_code_numbits_, s->cmd_code_, |
616 &storage_ix, storage); | 860 &storage_ix, storage); |
| 861 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
617 } else { | 862 } else { |
618 BrotliCompressFragmentTwoPass( | 863 BrotliCompressFragmentTwoPass( |
619 &data[WrapPosition(last_processed_pos_) & mask], | 864 m, &data[wrapped_last_processed_pos & mask], |
620 bytes, is_last, | 865 bytes, is_last, |
621 command_buf_, literal_buf_, | 866 s->command_buf_, s->literal_buf_, |
622 table, table_size, | 867 table, table_size, |
623 &storage_ix, storage); | 868 &storage_ix, storage); |
624 } | 869 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
625 last_byte_ = storage[storage_ix >> 3]; | 870 } |
626 last_byte_bits_ = storage_ix & 7u; | 871 s->last_byte_ = storage[storage_ix >> 3]; |
627 last_processed_pos_ = input_pos_; | 872 s->last_byte_bits_ = storage_ix & 7u; |
| 873 UpdateLastProcessedPos(s); |
628 *output = &storage[0]; | 874 *output = &storage[0]; |
629 *out_size = storage_ix >> 3; | 875 *out_size = storage_ix >> 3; |
630 return true; | 876 return BROTLI_TRUE; |
631 } | 877 } |
632 | 878 |
633 // Theoretical max number of commands is 1 per 2 bytes. | 879 { |
634 size_t newsize = num_commands_ + bytes / 2 + 1; | 880 /* Theoretical max number of commands is 1 per 2 bytes. */ |
635 if (newsize > cmd_alloc_size_) { | 881 size_t newsize = s->num_commands_ + bytes / 2 + 1; |
636 // Reserve a bit more memory to allow merging with a next block | 882 if (newsize > s->cmd_alloc_size_) { |
637 // without realloc: that would impact speed. | 883 Command* new_commands; |
638 newsize += (bytes / 4) + 16; | 884 /* Reserve a bit more memory to allow merging with a next block |
639 cmd_alloc_size_ = newsize; | 885 without reallocation: that would impact speed. */ |
640 commands_ = | 886 newsize += (bytes / 4) + 16; |
641 static_cast<Command*>(realloc(commands_, sizeof(Command) * newsize)); | 887 s->cmd_alloc_size_ = newsize; |
642 } | 888 new_commands = BROTLI_ALLOC(m, Command, newsize); |
643 | 889 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
644 CreateBackwardReferences(bytes, WrapPosition(last_processed_pos_), | 890 if (s->commands_) { |
645 is_last, data, mask, | 891 memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_); |
646 params_.quality, | 892 BROTLI_FREE(m, s->commands_); |
647 params_.lgwin, | 893 } |
648 hashers_, | 894 s->commands_ = new_commands; |
649 hash_type_, | 895 } |
650 dist_cache_, | 896 } |
651 &last_insert_len_, | 897 |
652 &commands_[num_commands_], | 898 BrotliCreateBackwardReferences(m, bytes, wrapped_last_processed_pos, |
653 &num_commands_, | 899 is_last, data, mask, |
654 &num_literals_); | 900 &s->params, |
655 | 901 &s->hashers_, |
656 size_t max_length = std::min<size_t>(mask + 1, 1u << kMaxInputBlockBits); | 902 s->dist_cache_, |
657 const size_t max_literals = max_length / 8; | 903 &s->last_insert_len_, |
658 const size_t max_commands = max_length / 8; | 904 &s->commands_[s->num_commands_], |
659 if (!is_last && !force_flush && | 905 &s->num_commands_, |
660 (params_.quality >= kMinQualityForBlockSplit || | 906 &s->num_literals_); |
661 (num_literals_ + num_commands_ < kMaxNumDelayedSymbols)) && | 907 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
662 num_literals_ < max_literals && | 908 |
663 num_commands_ < max_commands && | 909 { |
664 input_pos_ + input_block_size() <= last_flush_pos_ + max_length) { | 910 const size_t max_length = MaxMetablockSize(&s->params); |
665 // Merge with next input block. Everything will happen later. | 911 const size_t max_literals = max_length / 8; |
666 last_processed_pos_ = input_pos_; | 912 const size_t max_commands = max_length / 8; |
| 913 const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_); |
| 914 /* If maximal possible additional block doesn't fit metablock, flush now. */ |
| 915 /* TODO: Postpone decision until next block arrives? */ |
| 916 const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL( |
| 917 processed_bytes + InputBlockSize(s) <= max_length); |
| 918 /* If block splitting is not used, then flush as soon as there is some |
| 919 amount of commands / literals produced. */ |
| 920 const BROTLI_BOOL should_flush = TO_BROTLI_BOOL( |
| 921 s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT && |
| 922 s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS); |
| 923 if (!is_last && !force_flush && !should_flush && |
| 924 next_input_fits_metablock && |
| 925 s->num_literals_ < max_literals && |
| 926 s->num_commands_ < max_commands) { |
| 927 /* Merge with next input block. Everything will happen later. */ |
| 928 if (UpdateLastProcessedPos(s)) { |
| 929 HashersReset(&s->hashers_, ChooseHasher(&s->params)); |
| 930 } |
| 931 *out_size = 0; |
| 932 return BROTLI_TRUE; |
| 933 } |
| 934 } |
| 935 |
| 936 /* Create the last insert-only command. */ |
| 937 if (s->last_insert_len_ > 0) { |
| 938 InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_); |
| 939 s->num_literals_ += s->last_insert_len_; |
| 940 s->last_insert_len_ = 0; |
| 941 } |
| 942 |
| 943 if (!is_last && s->input_pos_ == s->last_flush_pos_) { |
| 944 /* We have no new input data and we don't have to finish the stream, so |
| 945 nothing to do. */ |
667 *out_size = 0; | 946 *out_size = 0; |
668 return true; | 947 return BROTLI_TRUE; |
669 } | 948 } |
670 | 949 assert(s->input_pos_ >= s->last_flush_pos_); |
671 // Create the last insert-only command. | 950 assert(s->input_pos_ > s->last_flush_pos_ || is_last); |
672 if (last_insert_len_ > 0) { | 951 assert(s->input_pos_ - s->last_flush_pos_ <= 1u << 24); |
673 brotli::Command cmd(last_insert_len_); | 952 { |
674 commands_[num_commands_++] = cmd; | 953 const uint32_t metablock_size = |
675 num_literals_ += last_insert_len_; | 954 (uint32_t)(s->input_pos_ - s->last_flush_pos_); |
676 last_insert_len_ = 0; | 955 uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 502); |
677 } | 956 size_t storage_ix = s->last_byte_bits_; |
678 | 957 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
679 if (!is_last && input_pos_ == last_flush_pos_) { | 958 storage[0] = s->last_byte_; |
680 // We have no new input data and we don't have to finish the stream, so | 959 WriteMetaBlockInternal( |
681 // nothing to do. | 960 m, data, mask, s->last_flush_pos_, metablock_size, is_last, |
682 *out_size = 0; | 961 &s->params, s->prev_byte_, s->prev_byte2_, |
683 return true; | 962 s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_, |
684 } | 963 s->dist_cache_, &storage_ix, storage); |
685 assert(input_pos_ >= last_flush_pos_); | 964 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
686 assert(input_pos_ > last_flush_pos_ || is_last); | 965 s->last_byte_ = storage[storage_ix >> 3]; |
687 assert(input_pos_ - last_flush_pos_ <= 1u << 24); | 966 s->last_byte_bits_ = storage_ix & 7u; |
688 const uint32_t metablock_size = | 967 s->last_flush_pos_ = s->input_pos_; |
689 static_cast<uint32_t>(input_pos_ - last_flush_pos_); | 968 if (UpdateLastProcessedPos(s)) { |
690 const size_t max_out_size = 2 * metablock_size + 500; | 969 HashersReset(&s->hashers_, ChooseHasher(&s->params)); |
691 uint8_t* storage = GetBrotliStorage(max_out_size); | 970 } |
692 storage[0] = last_byte_; | 971 if (s->last_flush_pos_ > 0) { |
693 size_t storage_ix = last_byte_bits_; | 972 s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask]; |
694 bool font_mode = params_.mode == BrotliParams::MODE_FONT; | 973 } |
695 WriteMetaBlockInternal( | 974 if (s->last_flush_pos_ > 1) { |
696 data, mask, last_flush_pos_, metablock_size, is_last, params_.quality, | 975 s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask]; |
697 font_mode, prev_byte_, prev_byte2_, num_literals_, num_commands_, | 976 } |
698 commands_, saved_dist_cache_, dist_cache_, &storage_ix, storage); | 977 s->num_commands_ = 0; |
699 last_byte_ = storage[storage_ix >> 3]; | 978 s->num_literals_ = 0; |
700 last_byte_bits_ = storage_ix & 7u; | 979 /* Save the state of the distance cache in case we need to restore it for |
701 last_flush_pos_ = input_pos_; | 980 emitting an uncompressed block. */ |
702 last_processed_pos_ = input_pos_; | 981 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->dist_cache_)); |
703 if (last_flush_pos_ > 0) { | 982 *output = &storage[0]; |
704 prev_byte_ = data[(static_cast<uint32_t>(last_flush_pos_) - 1) & mask]; | 983 *out_size = storage_ix >> 3; |
705 } | 984 return BROTLI_TRUE; |
706 if (last_flush_pos_ > 1) { | 985 } |
707 prev_byte2_ = data[(static_cast<uint32_t>(last_flush_pos_) - 2) & mask]; | 986 } |
708 } | 987 |
709 num_commands_ = 0; | 988 /* Dumps remaining output bits and metadata header to |header|. |
710 num_literals_ = 0; | 989 Returns number of produced bytes. |
711 // Save the state of the distance cache in case we need to restore it for | 990 REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long. |
712 // emitting an uncompressed block. | 991 REQUIRED: |block_size| <= (1 << 24). */ |
713 memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_)); | 992 static size_t WriteMetadataHeader( |
714 *output = &storage[0]; | 993 BrotliEncoderState* s, const size_t block_size, uint8_t* header) { |
715 *out_size = storage_ix >> 3; | 994 size_t storage_ix; |
716 return true; | 995 storage_ix = s->last_byte_bits_; |
717 } | 996 header[0] = s->last_byte_; |
718 | 997 s->last_byte_ = 0; |
719 bool BrotliCompressor::WriteMetaBlock(const size_t input_size, | 998 s->last_byte_bits_ = 0; |
720 const uint8_t* input_buffer, | 999 |
721 const bool is_last, | 1000 BrotliWriteBits(1, 0, &storage_ix, header); |
722 size_t* encoded_size, | 1001 BrotliWriteBits(2, 3, &storage_ix, header); |
723 uint8_t* encoded_buffer) { | 1002 BrotliWriteBits(1, 0, &storage_ix, header); |
724 CopyInputToRingBuffer(input_size, input_buffer); | 1003 if (block_size == 0) { |
725 size_t out_size = 0; | 1004 BrotliWriteBits(2, 0, &storage_ix, header); |
726 uint8_t* output; | |
727 if (!WriteBrotliData(is_last, /* force_flush = */ true, &out_size, &output) || | |
728 out_size > *encoded_size) { | |
729 return false; | |
730 } | |
731 if (out_size > 0) { | |
732 memcpy(encoded_buffer, output, out_size); | |
733 } | |
734 *encoded_size = out_size; | |
735 return true; | |
736 } | |
737 | |
738 bool BrotliCompressor::WriteMetadata(const size_t input_size, | |
739 const uint8_t* input_buffer, | |
740 const bool is_last, | |
741 size_t* encoded_size, | |
742 uint8_t* encoded_buffer) { | |
743 if (input_size > (1 << 24) || input_size + 6 > *encoded_size) { | |
744 return false; | |
745 } | |
746 uint64_t hdr_buffer_data[2]; | |
747 uint8_t* hdr_buffer = reinterpret_cast<uint8_t*>(&hdr_buffer_data[0]); | |
748 size_t storage_ix = last_byte_bits_; | |
749 hdr_buffer[0] = last_byte_; | |
750 WriteBits(1, 0, &storage_ix, hdr_buffer); | |
751 WriteBits(2, 3, &storage_ix, hdr_buffer); | |
752 WriteBits(1, 0, &storage_ix, hdr_buffer); | |
753 if (input_size == 0) { | |
754 WriteBits(2, 0, &storage_ix, hdr_buffer); | |
755 *encoded_size = (storage_ix + 7u) >> 3; | |
756 memcpy(encoded_buffer, hdr_buffer, *encoded_size); | |
757 } else { | 1005 } else { |
758 uint32_t nbits = (input_size == 1) ? 0 : (Log2FloorNonZero( | 1006 uint32_t nbits = (block_size == 1) ? 0 : |
759 static_cast<uint32_t>(input_size) - 1) + 1); | 1007 (Log2FloorNonZero((uint32_t)block_size - 1) + 1); |
760 uint32_t nbytes = (nbits + 7) / 8; | 1008 uint32_t nbytes = (nbits + 7) / 8; |
761 WriteBits(2, nbytes, &storage_ix, hdr_buffer); | 1009 BrotliWriteBits(2, nbytes, &storage_ix, header); |
762 WriteBits(8 * nbytes, input_size - 1, &storage_ix, hdr_buffer); | 1010 BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header); |
763 size_t hdr_size = (storage_ix + 7u) >> 3; | 1011 } |
764 memcpy(encoded_buffer, hdr_buffer, hdr_size); | 1012 return (storage_ix + 7u) >> 3; |
765 memcpy(&encoded_buffer[hdr_size], input_buffer, input_size); | 1013 } |
766 *encoded_size = hdr_size + input_size; | 1014 |
767 } | 1015 static BROTLI_BOOL BrotliCompressBufferQuality10( |
768 if (is_last) { | 1016 int lgwin, size_t input_size, const uint8_t* input_buffer, |
769 encoded_buffer[(*encoded_size)++] = 3; | |
770 } | |
771 last_byte_ = 0; | |
772 last_byte_bits_ = 0; | |
773 return true; | |
774 } | |
775 | |
776 bool BrotliCompressor::FinishStream( | |
777 size_t* encoded_size, uint8_t* encoded_buffer) { | 1017 size_t* encoded_size, uint8_t* encoded_buffer) { |
778 return WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer); | 1018 MemoryManager memory_manager; |
779 } | 1019 MemoryManager* m = &memory_manager; |
780 | 1020 |
781 static int BrotliCompressBufferQuality10(int lgwin, | 1021 const size_t mask = BROTLI_SIZE_MAX >> 1; |
782 size_t input_size, | 1022 const size_t max_backward_limit = MaxBackwardLimit(lgwin); |
783 const uint8_t* input_buffer, | |
784 size_t* encoded_size, | |
785 uint8_t* encoded_buffer) { | |
786 const size_t mask = std::numeric_limits<size_t>::max() >> 1; | |
787 assert(input_size <= mask + 1); | |
788 const size_t max_backward_limit = (1 << lgwin) - 16; | |
789 int dist_cache[4] = { 4, 11, 15, 16 }; | 1023 int dist_cache[4] = { 4, 11, 15, 16 }; |
790 int saved_dist_cache[4] = { 4, 11, 15, 16 }; | 1024 int saved_dist_cache[4] = { 4, 11, 15, 16 }; |
791 int ok = 1; | 1025 BROTLI_BOOL ok = BROTLI_TRUE; |
792 const size_t max_out_size = *encoded_size; | 1026 const size_t max_out_size = *encoded_size; |
793 size_t total_out_size = 0; | 1027 size_t total_out_size = 0; |
794 uint8_t last_byte; | 1028 uint8_t last_byte; |
795 uint8_t last_byte_bits; | 1029 uint8_t last_byte_bits; |
796 EncodeWindowBits(lgwin, &last_byte, &last_byte_bits); | 1030 H10* hasher; |
797 | 1031 |
798 Hashers::H10* hasher = new Hashers::H10; | 1032 const size_t hasher_eff_size = |
799 const size_t hasher_eff_size = std::min(input_size, max_backward_limit + 16); | 1033 BROTLI_MIN(size_t, input_size, max_backward_limit + BROTLI_WINDOW_GAP); |
800 hasher->Init(lgwin, 0, hasher_eff_size, true); | 1034 |
801 | 1035 BrotliEncoderParams params; |
802 const int lgblock = std::min(18, lgwin); | 1036 |
803 const int lgmetablock = std::min(24, lgwin + 1); | 1037 const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1); |
804 const size_t max_block_size = static_cast<size_t>(1) << lgblock; | 1038 size_t max_block_size; |
805 const size_t max_metablock_size = static_cast<size_t>(1) << lgmetablock; | 1039 const size_t max_metablock_size = (size_t)1 << lgmetablock; |
806 const size_t max_literals_per_metablock = max_metablock_size / 8; | 1040 const size_t max_literals_per_metablock = max_metablock_size / 8; |
807 const size_t max_commands_per_metablock = max_metablock_size / 8; | 1041 const size_t max_commands_per_metablock = max_metablock_size / 8; |
808 size_t metablock_start = 0; | 1042 size_t metablock_start = 0; |
809 uint8_t prev_byte = 0; | 1043 uint8_t prev_byte = 0; |
810 uint8_t prev_byte2 = 0; | 1044 uint8_t prev_byte2 = 0; |
| 1045 |
| 1046 params.mode = BROTLI_DEFAULT_MODE; |
| 1047 params.quality = 10; |
| 1048 params.lgwin = lgwin; |
| 1049 params.lgblock = 0; |
| 1050 SanitizeParams(¶ms); |
| 1051 params.lgblock = ComputeLgBlock(¶ms); |
| 1052 max_block_size = (size_t)1 << params.lgblock; |
| 1053 |
| 1054 BrotliInitMemoryManager(m, 0, 0, 0); |
| 1055 |
| 1056 assert(input_size <= mask + 1); |
| 1057 EncodeWindowBits(lgwin, &last_byte, &last_byte_bits); |
| 1058 hasher = BROTLI_ALLOC(m, H10, 1); |
| 1059 if (BROTLI_IS_OOM(m)) goto oom; |
| 1060 InitializeH10(hasher); |
| 1061 InitH10(m, hasher, input_buffer, ¶ms, 0, hasher_eff_size, 1); |
| 1062 if (BROTLI_IS_OOM(m)) goto oom; |
| 1063 |
811 while (ok && metablock_start < input_size) { | 1064 while (ok && metablock_start < input_size) { |
812 const size_t metablock_end = | 1065 const size_t metablock_end = |
813 std::min(input_size, metablock_start + max_metablock_size); | 1066 BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size); |
814 const size_t expected_num_commands = | 1067 const size_t expected_num_commands = |
815 (metablock_end - metablock_start) / 12 + 16; | 1068 (metablock_end - metablock_start) / 12 + 16; |
816 Command* commands = 0; | 1069 Command* commands = 0; |
817 size_t num_commands = 0; | 1070 size_t num_commands = 0; |
818 size_t last_insert_len = 0; | 1071 size_t last_insert_len = 0; |
819 size_t num_literals = 0; | 1072 size_t num_literals = 0; |
820 size_t metablock_size = 0; | 1073 size_t metablock_size = 0; |
821 size_t cmd_alloc_size = 0; | 1074 size_t cmd_alloc_size = 0; |
| 1075 BROTLI_BOOL is_last; |
| 1076 uint8_t* storage; |
| 1077 size_t storage_ix; |
822 | 1078 |
823 for (size_t block_start = metablock_start; block_start < metablock_end; ) { | 1079 size_t block_start; |
824 size_t block_size = std::min(metablock_end - block_start, max_block_size); | 1080 for (block_start = metablock_start; block_start < metablock_end; ) { |
825 ZopfliNode* nodes = new ZopfliNode[block_size + 1]; | 1081 size_t block_size = |
826 std::vector<uint32_t> path; | 1082 BROTLI_MIN(size_t, metablock_end - block_start, max_block_size); |
827 hasher->StitchToPreviousBlock(block_size, block_start, | 1083 ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1); |
828 input_buffer, mask); | 1084 size_t path_size; |
829 ZopfliComputeShortestPath(block_size, block_start, input_buffer, mask, | 1085 size_t new_cmd_alloc_size; |
830 max_backward_limit, dist_cache, | 1086 if (BROTLI_IS_OOM(m)) goto oom; |
831 hasher, nodes, &path); | 1087 BrotliInitZopfliNodes(nodes, block_size + 1); |
832 // We allocate a command buffer in the first iteration of this loop that | 1088 StitchToPreviousBlockH10(hasher, block_size, block_start, |
833 // will be likely big enough for the whole metablock, so that for most | 1089 input_buffer, mask); |
834 // inputs we will not have to reallocate in later iterations. We do the | 1090 path_size = BrotliZopfliComputeShortestPath( |
835 // allocation here and not before the loop, because if the input is small, | 1091 m, block_size, block_start, input_buffer, mask, ¶ms, |
836 // this will be allocated after the zopfli cost model is freed, so this | 1092 max_backward_limit, dist_cache, hasher, nodes); |
837 // will not increase peak memory usage. | 1093 if (BROTLI_IS_OOM(m)) goto oom; |
838 // TODO: If the first allocation is too small, increase command | 1094 /* We allocate a command buffer in the first iteration of this loop that |
839 // buffer size exponentially. | 1095 will be likely big enough for the whole metablock, so that for most |
840 size_t new_cmd_alloc_size = std::max(expected_num_commands, | 1096 inputs we will not have to reallocate in later iterations. We do the |
841 num_commands + path.size() + 1); | 1097 allocation here and not before the loop, because if the input is small, |
| 1098 this will be allocated after the Zopfli cost model is freed, so this |
| 1099 will not increase peak memory usage. |
| 1100 TODO: If the first allocation is too small, increase command |
| 1101 buffer size exponentially. */ |
| 1102 new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands, |
| 1103 num_commands + path_size + 1); |
842 if (cmd_alloc_size != new_cmd_alloc_size) { | 1104 if (cmd_alloc_size != new_cmd_alloc_size) { |
| 1105 Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size); |
| 1106 if (BROTLI_IS_OOM(m)) goto oom; |
843 cmd_alloc_size = new_cmd_alloc_size; | 1107 cmd_alloc_size = new_cmd_alloc_size; |
844 commands = static_cast<Command*>( | 1108 if (commands) { |
845 realloc(commands, cmd_alloc_size * sizeof(Command))); | 1109 memcpy(new_commands, commands, sizeof(Command) * num_commands); |
| 1110 BROTLI_FREE(m, commands); |
| 1111 } |
| 1112 commands = new_commands; |
846 } | 1113 } |
847 ZopfliCreateCommands(block_size, block_start, max_backward_limit, path, | 1114 BrotliZopfliCreateCommands(block_size, block_start, max_backward_limit, |
848 &nodes[0], dist_cache, &last_insert_len, | 1115 &nodes[0], dist_cache, &last_insert_len, |
849 &commands[num_commands], &num_literals); | 1116 &commands[num_commands], &num_literals); |
850 num_commands += path.size(); | 1117 num_commands += path_size; |
851 block_start += block_size; | 1118 block_start += block_size; |
852 metablock_size += block_size; | 1119 metablock_size += block_size; |
853 delete[] nodes; | 1120 BROTLI_FREE(m, nodes); |
854 if (num_literals > max_literals_per_metablock || | 1121 if (num_literals > max_literals_per_metablock || |
855 num_commands > max_commands_per_metablock) { | 1122 num_commands > max_commands_per_metablock) { |
856 break; | 1123 break; |
857 } | 1124 } |
858 } | 1125 } |
859 | 1126 |
860 if (last_insert_len > 0) { | 1127 if (last_insert_len > 0) { |
861 Command cmd(last_insert_len); | 1128 InitInsertCommand(&commands[num_commands++], last_insert_len); |
862 commands[num_commands++] = cmd; | |
863 num_literals += last_insert_len; | 1129 num_literals += last_insert_len; |
864 } | 1130 } |
865 | 1131 |
866 const bool is_last = (metablock_start + metablock_size == input_size); | 1132 is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size); |
867 uint8_t* storage = NULL; | 1133 storage = NULL; |
868 size_t storage_ix = last_byte_bits; | 1134 storage_ix = last_byte_bits; |
869 | 1135 |
870 if (metablock_size == 0) { | 1136 if (metablock_size == 0) { |
871 // Write the ISLAST and ISEMPTY bits. | 1137 /* Write the ISLAST and ISEMPTY bits. */ |
872 storage = new uint8_t[16]; | 1138 storage = BROTLI_ALLOC(m, uint8_t, 16); |
| 1139 if (BROTLI_IS_OOM(m)) goto oom; |
873 storage[0] = last_byte; | 1140 storage[0] = last_byte; |
874 WriteBits(2, 3, &storage_ix, storage); | 1141 BrotliWriteBits(2, 3, &storage_ix, storage); |
875 storage_ix = (storage_ix + 7u) & ~7u; | 1142 storage_ix = (storage_ix + 7u) & ~7u; |
876 } else if (!ShouldCompress(input_buffer, mask, metablock_start, | 1143 } else if (!ShouldCompress(input_buffer, mask, metablock_start, |
877 metablock_size, num_literals, num_commands)) { | 1144 metablock_size, num_literals, num_commands)) { |
878 // Restore the distance cache, as its last update by | 1145 /* Restore the distance cache, as its last update by |
879 // CreateBackwardReferences is now unused. | 1146 CreateBackwardReferences is now unused. */ |
880 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); | 1147 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); |
881 storage = new uint8_t[metablock_size + 16]; | 1148 storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16); |
| 1149 if (BROTLI_IS_OOM(m)) goto oom; |
882 storage[0] = last_byte; | 1150 storage[0] = last_byte; |
883 StoreUncompressedMetaBlock(is_last, input_buffer, | 1151 BrotliStoreUncompressedMetaBlock(is_last, input_buffer, |
884 metablock_start, mask, metablock_size, | 1152 metablock_start, mask, metablock_size, |
885 &storage_ix, storage); | 1153 &storage_ix, storage); |
886 } else { | 1154 } else { |
887 uint32_t num_direct_distance_codes = 0; | 1155 uint32_t num_direct_distance_codes = 0; |
888 uint32_t distance_postfix_bits = 0; | 1156 uint32_t distance_postfix_bits = 0; |
| 1157 ContextType literal_context_mode = CONTEXT_UTF8; |
889 MetaBlockSplit mb; | 1158 MetaBlockSplit mb; |
890 ContextType literal_context_mode = CONTEXT_UTF8; | 1159 InitMetaBlockSplit(&mb); |
891 if (!IsMostlyUTF8( | 1160 if (!BrotliIsMostlyUTF8(input_buffer, metablock_start, mask, |
892 input_buffer, metablock_start, mask, metablock_size, | 1161 metablock_size, kMinUTF8Ratio)) { |
893 kMinUTF8Ratio)) { | |
894 literal_context_mode = CONTEXT_SIGNED; | 1162 literal_context_mode = CONTEXT_SIGNED; |
895 } | 1163 } |
896 BuildMetaBlock(input_buffer, metablock_start, mask, | 1164 BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask, ¶ms, |
897 prev_byte, prev_byte2, | 1165 prev_byte, prev_byte2, |
898 commands, num_commands, | 1166 commands, num_commands, |
899 literal_context_mode, | 1167 literal_context_mode, |
900 &mb); | 1168 &mb); |
901 OptimizeHistograms(num_direct_distance_codes, | 1169 if (BROTLI_IS_OOM(m)) goto oom; |
902 distance_postfix_bits, | 1170 BrotliOptimizeHistograms(num_direct_distance_codes, |
903 &mb); | 1171 distance_postfix_bits, |
904 const size_t max_out_metablock_size = 2 * metablock_size + 500; | 1172 &mb); |
905 storage = new uint8_t[max_out_metablock_size]; | 1173 storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 502); |
| 1174 if (BROTLI_IS_OOM(m)) goto oom; |
906 storage[0] = last_byte; | 1175 storage[0] = last_byte; |
907 StoreMetaBlock(input_buffer, metablock_start, metablock_size, mask, | 1176 BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size, |
908 prev_byte, prev_byte2, | 1177 mask, prev_byte, prev_byte2, |
909 is_last, | 1178 is_last, |
910 num_direct_distance_codes, | 1179 num_direct_distance_codes, |
911 distance_postfix_bits, | 1180 distance_postfix_bits, |
912 literal_context_mode, | 1181 literal_context_mode, |
913 commands, num_commands, | 1182 commands, num_commands, |
914 mb, | 1183 &mb, |
915 &storage_ix, storage); | 1184 &storage_ix, storage); |
| 1185 if (BROTLI_IS_OOM(m)) goto oom; |
916 if (metablock_size + 4 < (storage_ix >> 3)) { | 1186 if (metablock_size + 4 < (storage_ix >> 3)) { |
917 // Restore the distance cache and last byte. | 1187 /* Restore the distance cache and last byte. */ |
918 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); | 1188 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); |
919 storage[0] = last_byte; | 1189 storage[0] = last_byte; |
920 storage_ix = last_byte_bits; | 1190 storage_ix = last_byte_bits; |
921 StoreUncompressedMetaBlock(is_last, input_buffer, | 1191 BrotliStoreUncompressedMetaBlock(is_last, input_buffer, |
922 metablock_start, mask, | 1192 metablock_start, mask, |
923 metablock_size, &storage_ix, storage); | 1193 metablock_size, &storage_ix, storage); |
924 } | 1194 } |
| 1195 DestroyMetaBlockSplit(m, &mb); |
925 } | 1196 } |
926 last_byte = storage[storage_ix >> 3]; | 1197 last_byte = storage[storage_ix >> 3]; |
927 last_byte_bits = storage_ix & 7u; | 1198 last_byte_bits = storage_ix & 7u; |
928 metablock_start += metablock_size; | 1199 metablock_start += metablock_size; |
929 prev_byte = input_buffer[metablock_start - 1]; | 1200 prev_byte = input_buffer[metablock_start - 1]; |
930 prev_byte2 = input_buffer[metablock_start - 2]; | 1201 prev_byte2 = input_buffer[metablock_start - 2]; |
931 // Save the state of the distance cache in case we need to restore it for | 1202 /* Save the state of the distance cache in case we need to restore it for |
932 // emitting an uncompressed block. | 1203 emitting an uncompressed block. */ |
933 memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0])); | 1204 memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0])); |
934 | 1205 |
935 const size_t out_size = storage_ix >> 3; | 1206 { |
936 total_out_size += out_size; | 1207 const size_t out_size = storage_ix >> 3; |
937 if (total_out_size <= max_out_size) { | 1208 total_out_size += out_size; |
938 memcpy(encoded_buffer, storage, out_size); | 1209 if (total_out_size <= max_out_size) { |
939 encoded_buffer += out_size; | 1210 memcpy(encoded_buffer, storage, out_size); |
940 } else { | 1211 encoded_buffer += out_size; |
941 ok = 0; | 1212 } else { |
942 } | 1213 ok = BROTLI_FALSE; |
943 delete[] storage; | 1214 } |
944 free(commands); | 1215 } |
| 1216 BROTLI_FREE(m, storage); |
| 1217 BROTLI_FREE(m, commands); |
945 } | 1218 } |
946 | 1219 |
947 *encoded_size = total_out_size; | 1220 *encoded_size = total_out_size; |
948 delete hasher; | 1221 CleanupH10(m, hasher); |
| 1222 BROTLI_FREE(m, hasher); |
949 return ok; | 1223 return ok; |
950 } | 1224 |
951 | 1225 oom: |
952 int BrotliCompressBuffer(BrotliParams params, | 1226 BrotliWipeOutMemoryManager(m); |
953 size_t input_size, | 1227 return BROTLI_FALSE; |
954 const uint8_t* input_buffer, | 1228 } |
955 size_t* encoded_size, | 1229 |
956 uint8_t* encoded_buffer) { | 1230 size_t BrotliEncoderMaxCompressedSize(size_t input_size) { |
957 if (*encoded_size == 0) { | 1231 /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */ |
958 // Output buffer needs at least one byte. | 1232 size_t num_large_blocks = input_size >> 24; |
959 return 0; | 1233 size_t tail = input_size - (num_large_blocks << 24); |
960 } | 1234 size_t tail_overhead = (tail > (1 << 20)) ? 4 : 3; |
| 1235 size_t overhead = 2 + (4 * num_large_blocks) + tail_overhead + 1; |
| 1236 size_t result = input_size + overhead; |
| 1237 if (input_size == 0) return 1; |
| 1238 return (result < input_size) ? 0 : result; |
| 1239 } |
| 1240 |
| 1241 /* Wraps data to uncompressed brotli stream with minimal window size. |
| 1242 |output| should point at region with at least BrotliEncoderMaxCompressedSize |
| 1243 addressable bytes. |
| 1244 Returns the length of stream. */ |
| 1245 static size_t MakeUncompressedStream( |
| 1246 const uint8_t* input, size_t input_size, uint8_t* output) { |
| 1247 size_t size = input_size; |
| 1248 size_t result = 0; |
| 1249 size_t offset = 0; |
961 if (input_size == 0) { | 1250 if (input_size == 0) { |
962 // Handle the special case of empty input. | 1251 output[0] = 6; |
| 1252 return 1; |
| 1253 } |
| 1254 output[result++] = 0x21; /* window bits = 10, is_last = false */ |
| 1255 output[result++] = 0x03; /* empty metadata, padding */ |
| 1256 while (size > 0) { |
| 1257 uint32_t nibbles = 0; |
| 1258 uint32_t chunk_size; |
| 1259 uint32_t bits; |
| 1260 chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size; |
| 1261 if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1; |
| 1262 bits = |
| 1263 (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles)); |
| 1264 output[result++] = (uint8_t)bits; |
| 1265 output[result++] = (uint8_t)(bits >> 8); |
| 1266 output[result++] = (uint8_t)(bits >> 16); |
| 1267 if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24); |
| 1268 memcpy(&output[result], &input[offset], chunk_size); |
| 1269 result += chunk_size; |
| 1270 offset += chunk_size; |
| 1271 size -= chunk_size; |
| 1272 } |
| 1273 output[result++] = 3; |
| 1274 return result; |
| 1275 } |
| 1276 |
| 1277 BROTLI_BOOL BrotliEncoderCompress( |
| 1278 int quality, int lgwin, BrotliEncoderMode mode, size_t input_size, |
| 1279 const uint8_t* input_buffer, size_t* encoded_size, |
| 1280 uint8_t* encoded_buffer) { |
| 1281 BrotliEncoderState* s; |
| 1282 size_t out_size = *encoded_size; |
| 1283 const uint8_t* input_start = input_buffer; |
| 1284 uint8_t* output_start = encoded_buffer; |
| 1285 size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size); |
| 1286 if (out_size == 0) { |
| 1287 /* Output buffer needs at least one byte. */ |
| 1288 return BROTLI_FALSE; |
| 1289 } |
| 1290 if (input_size == 0) { |
| 1291 /* Handle the special case of empty input. */ |
963 *encoded_size = 1; | 1292 *encoded_size = 1; |
964 *encoded_buffer = 6; | 1293 *encoded_buffer = 6; |
965 return 1; | 1294 return BROTLI_TRUE; |
966 } | 1295 } |
967 if (params.quality == 10) { | 1296 if (quality == 10) { |
968 // TODO: Implement this direct path for all quality levels. | 1297 /* TODO: Implement this direct path for all quality levels. */ |
969 const int lgwin = std::min(24, std::max(16, params.lgwin)); | 1298 const int lg_win = BROTLI_MIN(int, BROTLI_MAX_WINDOW_BITS, |
970 return BrotliCompressBufferQuality10(lgwin, input_size, input_buffer, | 1299 BROTLI_MAX(int, 16, lgwin)); |
971 encoded_size, encoded_buffer); | 1300 int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer, |
972 } | 1301 encoded_size, encoded_buffer); |
973 BrotliMemIn in(input_buffer, input_size); | 1302 if (!ok || (max_out_size && *encoded_size > max_out_size)) { |
974 BrotliMemOut out(encoded_buffer, *encoded_size); | 1303 goto fallback; |
975 if (!BrotliCompress(params, &in, &out)) { | 1304 } |
976 return 0; | 1305 return BROTLI_TRUE; |
977 } | 1306 } |
978 *encoded_size = out.position(); | 1307 |
979 return 1; | 1308 s = BrotliEncoderCreateInstance(0, 0, 0); |
980 } | 1309 if (!s) { |
981 | 1310 return BROTLI_FALSE; |
982 static bool BrotliInIsFinished(BrotliIn* r) { | 1311 } else { |
983 size_t read_bytes; | 1312 size_t available_in = input_size; |
984 return r->Read(0, &read_bytes) == NULL; | 1313 const uint8_t* next_in = input_buffer; |
985 } | 1314 size_t available_out = *encoded_size; |
986 | 1315 uint8_t* next_out = encoded_buffer; |
987 static const uint8_t* BrotliInReadAndCheckEnd(const size_t block_size, | 1316 size_t total_out = 0; |
988 BrotliIn* r, | 1317 BROTLI_BOOL result = BROTLI_FALSE; |
989 size_t* bytes_read, | 1318 BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality); |
990 bool* is_last) { | 1319 BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin); |
991 *bytes_read = 0; | 1320 BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode); |
992 const uint8_t* data = reinterpret_cast<const uint8_t*>( | 1321 result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH, |
993 r->Read(block_size, bytes_read)); | 1322 &available_in, &next_in, &available_out, &next_out, &total_out); |
994 assert((data == NULL) == (*bytes_read == 0)); | 1323 if (!BrotliEncoderIsFinished(s)) result = 0; |
995 *is_last = BrotliInIsFinished(r); | 1324 *encoded_size = total_out; |
996 return data; | 1325 BrotliEncoderDestroyInstance(s); |
997 } | 1326 if (!result || (max_out_size && *encoded_size > max_out_size)) { |
998 | 1327 goto fallback; |
999 static bool CopyOneBlockToRingBuffer(BrotliIn* r, | 1328 } |
1000 BrotliCompressor* compressor, | 1329 return BROTLI_TRUE; |
1001 size_t* bytes_read, | 1330 } |
1002 bool* is_last) { | 1331 fallback: |
1003 const size_t block_size = compressor->input_block_size(); | 1332 *encoded_size = 0; |
1004 const uint8_t* data = BrotliInReadAndCheckEnd(block_size, r, | 1333 if (!max_out_size) return BROTLI_FALSE; |
1005 bytes_read, is_last); | 1334 if (out_size >= max_out_size) { |
1006 if (data == NULL) { | 1335 *encoded_size = |
1007 return *is_last; | 1336 MakeUncompressedStream(input_start, input_size, output_start); |
1008 } | 1337 return BROTLI_TRUE; |
1009 compressor->CopyInputToRingBuffer(*bytes_read, data); | 1338 } |
1010 | 1339 return BROTLI_FALSE; |
1011 // Read more bytes until block_size is filled or an EOF (data == NULL) is | 1340 } |
1012 // received. This is useful to get deterministic compressed output for the | 1341 |
1013 // same input no matter how r->Read splits the input to chunks. | 1342 static void InjectBytePaddingBlock(BrotliEncoderState* s) { |
1014 for (size_t remaining = block_size - *bytes_read; remaining > 0; ) { | 1343 uint32_t seal = s->last_byte_; |
1015 size_t more_bytes_read = 0; | 1344 size_t seal_bits = s->last_byte_bits_; |
1016 data = BrotliInReadAndCheckEnd(remaining, r, &more_bytes_read, is_last); | 1345 uint8_t* destination; |
1017 if (data == NULL) { | 1346 s->last_byte_ = 0; |
1018 return *is_last; | 1347 s->last_byte_bits_ = 0; |
1019 } | 1348 /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */ |
1020 compressor->CopyInputToRingBuffer(more_bytes_read, data); | 1349 seal |= 0x6u << seal_bits; |
1021 *bytes_read += more_bytes_read; | 1350 seal_bits += 6; |
1022 remaining -= more_bytes_read; | 1351 /* If we have already created storage, then append to it. |
1023 } | 1352 Storage is valid until next block is being compressed. */ |
1024 return true; | 1353 if (s->next_out_) { |
1025 } | 1354 destination = s->next_out_ + s->available_out_; |
1026 | 1355 } else { |
1027 | 1356 destination = s->tiny_buf_.u8; |
1028 int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) { | 1357 s->next_out_ = destination; |
1029 return BrotliCompressWithCustomDictionary(0, 0, params, in, out); | 1358 } |
1030 } | 1359 destination[0] = (uint8_t)seal; |
1031 | 1360 if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8); |
1032 // Reads the provided input in 'block_size' blocks. Only the last read can be | 1361 s->available_out_ += (seal_bits + 7) >> 3; |
1033 // smaller than 'block_size'. | 1362 } |
1034 class BrotliBlockReader { | 1363 |
1035 public: | 1364 /* Injects padding bits or pushes compressed data to output. |
1036 explicit BrotliBlockReader(size_t block_size) | 1365 Returns false if nothing is done. */ |
1037 : block_size_(block_size), buf_(NULL) {} | 1366 static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s, |
1038 ~BrotliBlockReader(void) { delete[] buf_; } | 1367 size_t* available_out, uint8_t** next_out, size_t* total_out) { |
1039 | 1368 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED && |
1040 const uint8_t* Read(BrotliIn* in, size_t* bytes_read, bool* is_last) { | 1369 s->last_byte_bits_ != 0) { |
1041 *bytes_read = 0; | 1370 InjectBytePaddingBlock(s); |
1042 const uint8_t* data = BrotliInReadAndCheckEnd(block_size_, in, | 1371 return BROTLI_TRUE; |
1043 bytes_read, is_last); | 1372 } |
1044 if (data == NULL || *bytes_read == block_size_ || *is_last) { | 1373 |
1045 // If we could get the whole block in one read, or it is the last block, | 1374 if (s->available_out_ != 0 && *available_out != 0) { |
1046 // we just return the pointer to the data without copying. | 1375 size_t copy_output_size = |
1047 return data; | 1376 BROTLI_MIN(size_t, s->available_out_, *available_out); |
1048 } | 1377 memcpy(*next_out, s->next_out_, copy_output_size); |
1049 // If the data comes in smaller chunks, we need to copy it into an internal | 1378 *next_out += copy_output_size; |
1050 // buffer until we get a whole block or reach the last chunk. | 1379 *available_out -= copy_output_size; |
1051 if (buf_ == NULL) { | 1380 s->next_out_ += copy_output_size; |
1052 buf_ = new uint8_t[block_size_]; | 1381 s->available_out_ -= copy_output_size; |
1053 } | 1382 s->total_out_ += copy_output_size; |
1054 memcpy(buf_, data, *bytes_read); | 1383 if (total_out) *total_out = s->total_out_; |
1055 do { | 1384 return BROTLI_TRUE; |
1056 size_t cur_bytes_read = 0; | 1385 } |
1057 data = BrotliInReadAndCheckEnd(block_size_ - *bytes_read, in, | 1386 |
1058 &cur_bytes_read, is_last); | 1387 return BROTLI_FALSE; |
1059 if (data == NULL) { | 1388 } |
1060 return *is_last ? buf_ : NULL; | 1389 |
1061 } | 1390 static void CheckFlushComplete(BrotliEncoderState* s) { |
1062 memcpy(&buf_[*bytes_read], data, cur_bytes_read); | 1391 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED && |
1063 *bytes_read += cur_bytes_read; | 1392 s->available_out_ == 0) { |
1064 } while (*bytes_read < block_size_ && !*is_last); | 1393 s->stream_state_ = BROTLI_STREAM_PROCESSING; |
1065 return buf_; | 1394 s->next_out_ = 0; |
1066 } | 1395 } |
1067 | 1396 } |
1068 private: | 1397 |
1069 const size_t block_size_; | 1398 static BROTLI_BOOL BrotliEncoderCompressStreamFast( |
1070 uint8_t* buf_; | 1399 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in, |
1071 }; | 1400 const uint8_t** next_in, size_t* available_out, uint8_t** next_out, |
1072 | 1401 size_t* total_out) { |
1073 int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict, | 1402 const size_t block_size_limit = (size_t)1 << s->params.lgwin; |
1074 BrotliParams params, | 1403 const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize, |
1075 BrotliIn* in, BrotliOut* out) { | 1404 BROTLI_MIN(size_t, *available_in, block_size_limit)); |
1076 if (params.quality <= 1) { | 1405 uint32_t* tmp_command_buf = NULL; |
1077 const int quality = std::max(0, params.quality); | 1406 uint32_t* command_buf = NULL; |
1078 const int lgwin = std::min(kMaxWindowBits, | 1407 uint8_t* tmp_literal_buf = NULL; |
1079 std::max(kMinWindowBits, params.lgwin)); | 1408 uint8_t* literal_buf = NULL; |
1080 uint8_t* storage = NULL; | 1409 MemoryManager* m = &s->memory_manager_; |
1081 int* table = NULL; | 1410 if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY && |
1082 uint32_t* command_buf = NULL; | 1411 s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) { |
1083 uint8_t* literal_buf = NULL; | 1412 return BROTLI_FALSE; |
1084 uint8_t cmd_depths[128]; | 1413 } |
1085 uint16_t cmd_bits[128]; | 1414 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { |
1086 uint8_t cmd_code[512]; | 1415 if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) { |
1087 size_t cmd_code_numbits; | 1416 s->command_buf_ = |
1088 if (quality == 0) { | 1417 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize); |
1089 InitCommandPrefixCodes(cmd_depths, cmd_bits, cmd_code, &cmd_code_numbits); | 1418 s->literal_buf_ = |
1090 } | 1419 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize); |
1091 uint8_t last_byte; | 1420 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
1092 uint8_t last_byte_bits; | 1421 } |
1093 EncodeWindowBits(lgwin, &last_byte, &last_byte_bits); | 1422 if (s->command_buf_) { |
1094 BrotliBlockReader r(1u << lgwin); | 1423 command_buf = s->command_buf_; |
1095 int ok = 1; | 1424 literal_buf = s->literal_buf_; |
1096 bool is_last = false; | 1425 } else { |
1097 while (ok && !is_last) { | 1426 tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size); |
1098 // Read next block of input. | 1427 tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size); |
1099 size_t bytes; | 1428 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
1100 const uint8_t* data = r.Read(in, &bytes, &is_last); | 1429 command_buf = tmp_command_buf; |
1101 if (data == NULL) { | 1430 literal_buf = tmp_literal_buf; |
1102 if (!is_last) { | 1431 } |
1103 ok = 0; | 1432 } |
1104 break; | 1433 |
1105 } | 1434 while (BROTLI_TRUE) { |
1106 assert(bytes == 0); | 1435 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) { |
1107 } | 1436 continue; |
1108 // Set up output storage. | 1437 } |
1109 const size_t max_out_size = 2 * bytes + 500; | 1438 |
1110 if (storage == NULL) { | 1439 /* Compress block only when internal output buffer is empty, stream is not |
1111 storage = new uint8_t[max_out_size]; | 1440 finished, there is no pending flush request, and there is either |
1112 } | 1441 additional input or pending operation. */ |
1113 storage[0] = last_byte; | 1442 if (s->available_out_ == 0 && |
1114 size_t storage_ix = last_byte_bits; | 1443 s->stream_state_ == BROTLI_STREAM_PROCESSING && |
1115 // Set up hash table. | 1444 (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) { |
1116 size_t htsize = HashTableSize(MaxHashTableSize(quality), bytes); | 1445 size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in); |
1117 if (table == NULL) { | 1446 BROTLI_BOOL is_last = |
1118 table = new int[htsize]; | 1447 (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH); |
1119 } | 1448 BROTLI_BOOL force_flush = |
1120 memset(table, 0, htsize * sizeof(table[0])); | 1449 (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH); |
1121 // Set up command and literal buffers for two pass mode. | 1450 size_t max_out_size = 2 * block_size + 502; |
1122 if (quality == 1 && command_buf == NULL) { | 1451 BROTLI_BOOL inplace = BROTLI_TRUE; |
1123 size_t buf_size = std::min(bytes, kCompressFragmentTwoPassBlockSize); | 1452 uint8_t* storage = NULL; |
1124 command_buf = new uint32_t[buf_size]; | 1453 size_t storage_ix = s->last_byte_bits_; |
1125 literal_buf = new uint8_t[buf_size]; | 1454 size_t table_size; |
1126 } | 1455 int* table; |
1127 // Do the actual compression. | 1456 |
1128 if (quality == 0) { | 1457 if (force_flush && block_size == 0) { |
1129 BrotliCompressFragmentFast(data, bytes, is_last, table, htsize, | 1458 s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; |
1130 cmd_depths, cmd_bits, | 1459 continue; |
1131 &cmd_code_numbits, cmd_code, | 1460 } |
1132 &storage_ix, storage); | 1461 if (max_out_size <= *available_out) { |
| 1462 storage = *next_out; |
1133 } else { | 1463 } else { |
1134 BrotliCompressFragmentTwoPass(data, bytes, is_last, | 1464 inplace = BROTLI_FALSE; |
1135 command_buf, literal_buf, | 1465 storage = GetBrotliStorage(s, max_out_size); |
1136 table, htsize, | 1466 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
1137 &storage_ix, storage); | 1467 } |
1138 } | 1468 storage[0] = s->last_byte_; |
1139 // Save last bytes to stitch it together with the next output block. | 1469 table = GetHashTable(s, s->params.quality, block_size, &table_size); |
1140 last_byte = storage[storage_ix >> 3]; | 1470 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
1141 last_byte_bits = storage_ix & 7u; | 1471 |
1142 // Write output block. | 1472 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { |
1143 size_t out_bytes = storage_ix >> 3; | 1473 BrotliCompressFragmentFast(m, *next_in, block_size, is_last, table, |
1144 if (out_bytes > 0 && !out->Write(storage, out_bytes)) { | 1474 table_size, s->cmd_depths_, s->cmd_bits_, &s->cmd_code_numbits_, |
1145 ok = 0; | 1475 s->cmd_code_, &storage_ix, storage); |
| 1476 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
| 1477 } else { |
| 1478 BrotliCompressFragmentTwoPass(m, *next_in, block_size, is_last, |
| 1479 command_buf, literal_buf, table, table_size, |
| 1480 &storage_ix, storage); |
| 1481 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; |
| 1482 } |
| 1483 *next_in += block_size; |
| 1484 *available_in -= block_size; |
| 1485 if (inplace) { |
| 1486 size_t out_bytes = storage_ix >> 3; |
| 1487 assert(out_bytes <= *available_out); |
| 1488 assert((storage_ix & 7) == 0 || out_bytes < *available_out); |
| 1489 *next_out += out_bytes; |
| 1490 *available_out -= out_bytes; |
| 1491 s->total_out_ += out_bytes; |
| 1492 if (total_out) *total_out = s->total_out_; |
| 1493 } else { |
| 1494 size_t out_bytes = storage_ix >> 3; |
| 1495 s->next_out_ = storage; |
| 1496 s->available_out_ = out_bytes; |
| 1497 } |
| 1498 s->last_byte_ = storage[storage_ix >> 3]; |
| 1499 s->last_byte_bits_ = storage_ix & 7u; |
| 1500 |
| 1501 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; |
| 1502 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED; |
| 1503 continue; |
| 1504 } |
| 1505 break; |
| 1506 } |
| 1507 BROTLI_FREE(m, tmp_command_buf); |
| 1508 BROTLI_FREE(m, tmp_literal_buf); |
| 1509 CheckFlushComplete(s); |
| 1510 return BROTLI_TRUE; |
| 1511 } |
| 1512 |
| 1513 static BROTLI_BOOL ProcessMetadata( |
| 1514 BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in, |
| 1515 size_t* available_out, uint8_t** next_out, size_t* total_out) { |
| 1516 if (*available_in > (1u << 24)) return BROTLI_FALSE; |
| 1517 /* Switch to metadata block workflow, if required. */ |
| 1518 if (s->stream_state_ == BROTLI_STREAM_PROCESSING) { |
| 1519 s->remaining_metadata_bytes_ = (uint32_t)*available_in; |
| 1520 s->stream_state_ = BROTLI_STREAM_METADATA_HEAD; |
| 1521 } |
| 1522 if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD && |
| 1523 s->stream_state_ != BROTLI_STREAM_METADATA_BODY) { |
| 1524 return BROTLI_FALSE; |
| 1525 } |
| 1526 |
| 1527 while (BROTLI_TRUE) { |
| 1528 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) { |
| 1529 continue; |
| 1530 } |
| 1531 if (s->available_out_ != 0) break; |
| 1532 |
| 1533 if (s->input_pos_ != s->last_flush_pos_) { |
| 1534 BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE, |
| 1535 &s->available_out_, &s->next_out_); |
| 1536 if (!result) return BROTLI_FALSE; |
| 1537 continue; |
| 1538 } |
| 1539 |
| 1540 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) { |
| 1541 s->next_out_ = s->tiny_buf_.u8; |
| 1542 s->available_out_ = |
| 1543 WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_); |
| 1544 s->stream_state_ = BROTLI_STREAM_METADATA_BODY; |
| 1545 continue; |
| 1546 } else { |
| 1547 /* Exit workflow only when there is no more input and no more output. |
| 1548 Otherwise client may continue producing empty metadata blocks. */ |
| 1549 if (s->remaining_metadata_bytes_ == 0) { |
| 1550 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX; |
| 1551 s->stream_state_ = BROTLI_STREAM_PROCESSING; |
1146 break; | 1552 break; |
1147 } | 1553 } |
1148 } | 1554 if (*available_out) { |
1149 delete[] storage; | 1555 /* Directly copy input to output. */ |
1150 delete[] table; | 1556 uint32_t copy = (uint32_t)BROTLI_MIN( |
1151 delete[] command_buf; | 1557 size_t, s->remaining_metadata_bytes_, *available_out); |
1152 delete[] literal_buf; | 1558 memcpy(*next_out, *next_in, copy); |
1153 return ok; | 1559 *next_in += copy; |
1154 } | 1560 *available_in -= copy; |
1155 | 1561 s->remaining_metadata_bytes_ -= copy; |
1156 size_t in_bytes = 0; | 1562 *next_out += copy; |
1157 size_t out_bytes = 0; | 1563 *available_out -= copy; |
1158 uint8_t* output = NULL; | 1564 } else { |
1159 bool final_block = false; | 1565 /* This guarantees progress in "TakeOutput" workflow. */ |
1160 BrotliCompressor compressor(params); | 1566 uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16); |
1161 if (dictsize != 0) compressor.BrotliSetCustomDictionary(dictsize, dict); | 1567 s->next_out_ = s->tiny_buf_.u8; |
1162 while (!final_block) { | 1568 memcpy(s->next_out_, *next_in, copy); |
1163 if (!CopyOneBlockToRingBuffer(in, &compressor, &in_bytes, &final_block)) { | 1569 *next_in += copy; |
1164 return false; | 1570 *available_in -= copy; |
1165 } | 1571 s->remaining_metadata_bytes_ -= copy; |
1166 out_bytes = 0; | 1572 s->available_out_ = copy; |
1167 if (!compressor.WriteBrotliData(final_block, | 1573 } |
1168 /* force_flush = */ false, | 1574 continue; |
1169 &out_bytes, &output)) { | 1575 } |
1170 return false; | 1576 } |
1171 } | 1577 |
1172 if (out_bytes > 0 && !out->Write(output, out_bytes)) { | 1578 return BROTLI_TRUE; |
1173 return false; | 1579 } |
1174 } | 1580 |
1175 } | 1581 BROTLI_BOOL BrotliEncoderCompressStream( |
1176 return true; | 1582 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in, |
1177 } | 1583 const uint8_t** next_in, size_t* available_out,uint8_t** next_out, |
1178 | 1584 size_t* total_out) { |
1179 | 1585 if (!EnsureInitialized(s)) return BROTLI_FALSE; |
1180 } // namespace brotli | 1586 |
| 1587 /* Unfinished metadata block; check requirements. */ |
| 1588 if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) { |
| 1589 if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE; |
| 1590 if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE; |
| 1591 } |
| 1592 |
| 1593 if (op == BROTLI_OPERATION_EMIT_METADATA) { |
| 1594 return ProcessMetadata( |
| 1595 s, available_in, next_in, available_out, next_out, total_out); |
| 1596 } |
| 1597 |
| 1598 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD || |
| 1599 s->stream_state_ == BROTLI_STREAM_METADATA_BODY) { |
| 1600 return BROTLI_FALSE; |
| 1601 } |
| 1602 |
| 1603 if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) { |
| 1604 return BROTLI_FALSE; |
| 1605 } |
| 1606 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || |
| 1607 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { |
| 1608 return BrotliEncoderCompressStreamFast(s, op, available_in, next_in, |
| 1609 available_out, next_out, total_out); |
| 1610 } |
| 1611 while (BROTLI_TRUE) { |
| 1612 size_t remaining_block_size = RemainingInputBlockSize(s); |
| 1613 |
| 1614 if (remaining_block_size != 0 && *available_in != 0) { |
| 1615 size_t copy_input_size = |
| 1616 BROTLI_MIN(size_t, remaining_block_size, *available_in); |
| 1617 CopyInputToRingBuffer(s, copy_input_size, *next_in); |
| 1618 *next_in += copy_input_size; |
| 1619 *available_in -= copy_input_size; |
| 1620 continue; |
| 1621 } |
| 1622 |
| 1623 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) { |
| 1624 continue; |
| 1625 } |
| 1626 |
| 1627 /* Compress data only when internal output buffer is empty, stream is not |
| 1628 finished and there is no pending flush request. */ |
| 1629 if (s->available_out_ == 0 && |
| 1630 s->stream_state_ == BROTLI_STREAM_PROCESSING) { |
| 1631 if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) { |
| 1632 BROTLI_BOOL is_last = TO_BROTLI_BOOL( |
| 1633 (*available_in == 0) && op == BROTLI_OPERATION_FINISH); |
| 1634 BROTLI_BOOL force_flush = TO_BROTLI_BOOL( |
| 1635 (*available_in == 0) && op == BROTLI_OPERATION_FLUSH); |
| 1636 BROTLI_BOOL result = EncodeData(s, is_last, force_flush, |
| 1637 &s->available_out_, &s->next_out_); |
| 1638 if (!result) return BROTLI_FALSE; |
| 1639 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; |
| 1640 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED; |
| 1641 continue; |
| 1642 } |
| 1643 } |
| 1644 break; |
| 1645 } |
| 1646 CheckFlushComplete(s); |
| 1647 return BROTLI_TRUE; |
| 1648 } |
| 1649 |
| 1650 BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) { |
| 1651 return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED && |
| 1652 !BrotliEncoderHasMoreOutput(s)); |
| 1653 } |
| 1654 |
| 1655 BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) { |
| 1656 return TO_BROTLI_BOOL(s->available_out_ != 0); |
| 1657 } |
| 1658 |
| 1659 const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) { |
| 1660 size_t consumed_size = s->available_out_; |
| 1661 uint8_t* result = s->next_out_; |
| 1662 if (*size) { |
| 1663 consumed_size = BROTLI_MIN(size_t, *size, s->available_out_); |
| 1664 } |
| 1665 if (consumed_size) { |
| 1666 s->next_out_ += consumed_size; |
| 1667 s->available_out_ -= consumed_size; |
| 1668 s->total_out_ += consumed_size; |
| 1669 CheckFlushComplete(s); |
| 1670 *size = consumed_size; |
| 1671 } else { |
| 1672 *size = 0; |
| 1673 result = 0; |
| 1674 } |
| 1675 return result; |
| 1676 } |
| 1677 |
| 1678 uint32_t BrotliEncoderVersion(void) { |
| 1679 return BROTLI_VERSION; |
| 1680 } |
| 1681 |
| 1682 |
| 1683 /* DEPRECATED >>> */ |
| 1684 size_t BrotliEncoderInputBlockSize(BrotliEncoderState* s) { |
| 1685 return InputBlockSize(s); |
| 1686 } |
| 1687 void BrotliEncoderCopyInputToRingBuffer(BrotliEncoderState* s, |
| 1688 const size_t input_size, |
| 1689 const uint8_t* input_buffer) { |
| 1690 CopyInputToRingBuffer(s, input_size, input_buffer); |
| 1691 } |
| 1692 BROTLI_BOOL BrotliEncoderWriteData( |
| 1693 BrotliEncoderState* s, const BROTLI_BOOL is_last, |
| 1694 const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) { |
| 1695 return EncodeData(s, is_last, force_flush, out_size, output); |
| 1696 } |
| 1697 /* <<< DEPRECATED */ |
| 1698 |
| 1699 #if defined(__cplusplus) || defined(c_plusplus) |
| 1700 } /* extern "C" */ |
| 1701 #endif |
OLD | NEW |