OLD | NEW |
| (Empty) |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "net/http2/hpack/huffman/http2_hpack_huffman_decoder.h" | |
6 | |
7 #include <bitset> | |
8 #include <limits> | |
9 | |
10 #include "base/logging.h" | |
11 | |
12 using base::StringPiece; | |
13 using std::string; | |
14 | |
15 // Terminology: | |
16 // | |
17 // Symbol - a plain text (unencoded) character (uint8), or the End-of-String | |
18 // (EOS) symbol, 256. | |
19 // | |
20 // Code - the sequence of bits used to encode a symbol, varying in length from | |
21 // 5 bits for the most common symbols (e.g. '0', '1', and 'a'), to | |
22 // 30 bits for the least common (e.g. the EOS symbol). | |
23 // For those symbols whose codes have the same length, their code values | |
24 // are sorted such that the lower symbol value has a lower code value. | |
25 // | |
26 // Canonical - a symbol's cardinal value when sorted first by code length, and | |
27 // then by symbol value. For example, canonical 0 is for ASCII '0' | |
28 // (uint8 value 0x30), which is the first of the symbols whose code | |
29 // is 5 bits long, and the last canonical is EOS, which is the last | |
30 // of the symbols whose code is 30 bits long. | |
31 | |
32 // TODO(jamessynge): Remove use of binary literals, that is a C++ 14 feature. | |
33 | |
34 namespace net { | |
35 namespace { | |
36 | |
37 // HuffmanCode is used to store the codes associated with symbols (a pattern of | |
38 // from 5 to 30 bits). | |
39 typedef uint32_t HuffmanCode; | |
40 | |
41 // HuffmanCodeBitCount is used to store a count of bits in a code. | |
42 typedef uint16_t HuffmanCodeBitCount; | |
43 | |
44 // HuffmanCodeBitSet is used for producing a string version of a code because | |
45 // std::bitset logs nicely. | |
46 typedef std::bitset<32> HuffmanCodeBitSet; | |
47 typedef std::bitset<64> HuffmanAccumulatorBitSet; | |
48 | |
49 static constexpr HuffmanCodeBitCount kMinCodeBitCount = 5; | |
50 static constexpr HuffmanCodeBitCount kMaxCodeBitCount = 30; | |
51 static constexpr HuffmanCodeBitCount kHuffmanCodeBitCount = | |
52 std::numeric_limits<HuffmanCode>::digits; | |
53 | |
54 static_assert(std::numeric_limits<HuffmanCode>::digits >= kMaxCodeBitCount, | |
55 "HuffmanCode isn't big enough."); | |
56 | |
57 static_assert(std::numeric_limits<HuffmanAccumulator>::digits >= | |
58 kMaxCodeBitCount, | |
59 "HuffmanAccumulator isn't big enough."); | |
60 | |
61 static constexpr HuffmanAccumulatorBitCount kHuffmanAccumulatorBitCount = | |
62 std::numeric_limits<HuffmanAccumulator>::digits; | |
63 static constexpr HuffmanAccumulatorBitCount kExtraAccumulatorBitCount = | |
64 kHuffmanAccumulatorBitCount - kHuffmanCodeBitCount; | |
65 | |
66 // PrefixInfo holds info about a group of codes that are all of the same length. | |
67 struct PrefixInfo { | |
68 // Given the leading bits (32 in this case) of the encoded string, and that | |
69 // they start with a code of length |code_length|, return the corresponding | |
70 // canonical for that leading code. | |
71 uint32_t DecodeToCanonical(HuffmanCode bits) const { | |
72 // What is the position of the canonical symbol being decoded within | |
73 // the canonical symbols of |length|? | |
74 HuffmanCode ordinal_in_length = | |
75 ((bits - first_code) >> (kHuffmanCodeBitCount - code_length)); | |
76 | |
77 // Combined with |canonical| to produce the position of the canonical symbol | |
78 // being decoded within all of the canonical symbols. | |
79 return first_canonical + ordinal_in_length; | |
80 } | |
81 | |
82 const HuffmanCode first_code; // First code of this length, left justified in | |
83 // the field (i.e. the first bit of the code is | |
84 // the high-order bit). | |
85 const uint16_t code_length; // Length of the prefix code |base|. | |
86 const uint16_t first_canonical; // First canonical symbol of this length. | |
87 }; | |
88 | |
89 inline std::ostream& operator<<(std::ostream& out, const PrefixInfo& v) { | |
90 return out << "{first_code: " << HuffmanCodeBitSet(v.first_code) | |
91 << ", code_length: " << v.code_length | |
92 << ", first_canonical: " << v.first_canonical << "}"; | |
93 } | |
94 | |
95 // Given |value|, a sequence of the leading bits remaining to be decoded, | |
96 // figure out which group of canonicals (by code length) that value starts | |
97 // with. This function was generated. | |
98 PrefixInfo PrefixToInfo(HuffmanCode value) { | |
99 if (value < 0b10111000000000000000000000000000) { | |
100 if (value < 0b01010000000000000000000000000000) { | |
101 return {0b00000000000000000000000000000000, 5, 0}; | |
102 } else { | |
103 return {0b01010000000000000000000000000000, 6, 10}; | |
104 } | |
105 } else { | |
106 if (value < 0b11111110000000000000000000000000) { | |
107 if (value < 0b11111000000000000000000000000000) { | |
108 return {0b10111000000000000000000000000000, 7, 36}; | |
109 } else { | |
110 return {0b11111000000000000000000000000000, 8, 68}; | |
111 } | |
112 } else { | |
113 if (value < 0b11111111110000000000000000000000) { | |
114 if (value < 0b11111111101000000000000000000000) { | |
115 if (value < 0b11111111010000000000000000000000) { | |
116 return {0b11111110000000000000000000000000, 10, 74}; | |
117 } else { | |
118 return {0b11111111010000000000000000000000, 11, 79}; | |
119 } | |
120 } else { | |
121 return {0b11111111101000000000000000000000, 12, 82}; | |
122 } | |
123 } else { | |
124 if (value < 0b11111111111111100000000000000000) { | |
125 if (value < 0b11111111111110000000000000000000) { | |
126 if (value < 0b11111111111100000000000000000000) { | |
127 return {0b11111111110000000000000000000000, 13, 84}; | |
128 } else { | |
129 return {0b11111111111100000000000000000000, 14, 90}; | |
130 } | |
131 } else { | |
132 return {0b11111111111110000000000000000000, 15, 92}; | |
133 } | |
134 } else { | |
135 if (value < 0b11111111111111110100100000000000) { | |
136 if (value < 0b11111111111111101110000000000000) { | |
137 if (value < 0b11111111111111100110000000000000) { | |
138 return {0b11111111111111100000000000000000, 19, 95}; | |
139 } else { | |
140 return {0b11111111111111100110000000000000, 20, 98}; | |
141 } | |
142 } else { | |
143 return {0b11111111111111101110000000000000, 21, 106}; | |
144 } | |
145 } else { | |
146 if (value < 0b11111111111111111110101000000000) { | |
147 if (value < 0b11111111111111111011000000000000) { | |
148 return {0b11111111111111110100100000000000, 22, 119}; | |
149 } else { | |
150 return {0b11111111111111111011000000000000, 23, 145}; | |
151 } | |
152 } else { | |
153 if (value < 0b11111111111111111111101111000000) { | |
154 if (value < 0b11111111111111111111100000000000) { | |
155 if (value < 0b11111111111111111111011000000000) { | |
156 return {0b11111111111111111110101000000000, 24, 174}; | |
157 } else { | |
158 return {0b11111111111111111111011000000000, 25, 186}; | |
159 } | |
160 } else { | |
161 return {0b11111111111111111111100000000000, 26, 190}; | |
162 } | |
163 } else { | |
164 if (value < 0b11111111111111111111111111110000) { | |
165 if (value < 0b11111111111111111111111000100000) { | |
166 return {0b11111111111111111111101111000000, 27, 205}; | |
167 } else { | |
168 return {0b11111111111111111111111000100000, 28, 224}; | |
169 } | |
170 } else { | |
171 return {0b11111111111111111111111111110000, 30, 253}; | |
172 } | |
173 } | |
174 } | |
175 } | |
176 } | |
177 } | |
178 } | |
179 } | |
180 } | |
181 | |
182 // Mapping from canonical symbol (0 to 255) to actual symbol. | |
183 // clang-format off | |
184 constexpr unsigned char kCanonicalToSymbol[] = { | |
185 '0', '1', '2', 'a', 'c', 'e', 'i', 'o', | |
186 's', 't', 0x20, '%', '-', '.', '/', '3', | |
187 '4', '5', '6', '7', '8', '9', '=', 'A', | |
188 '_', 'b', 'd', 'f', 'g', 'h', 'l', 'm', | |
189 'n', 'p', 'r', 'u', ':', 'B', 'C', 'D', | |
190 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', | |
191 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', | |
192 'U', 'V', 'W', 'Y', 'j', 'k', 'q', 'v', | |
193 'w', 'x', 'y', 'z', '&', '*', ',', ';', | |
194 'X', 'Z', '!', '\"', '(', ')', '?', '\'', | |
195 '+', '|', '#', '>', 0x00, '$', '@', '[', | |
196 ']', '~', '^', '}', '<', '`', '{', '\\', | |
197 0xc3, 0xd0, 0x80, 0x82, 0x83, 0xa2, 0xb8, 0xc2, | |
198 0xe0, 0xe2, 0x99, 0xa1, 0xa7, 0xac, 0xb0, 0xb1, | |
199 0xb3, 0xd1, 0xd8, 0xd9, 0xe3, 0xe5, 0xe6, 0x81, | |
200 0x84, 0x85, 0x86, 0x88, 0x92, 0x9a, 0x9c, 0xa0, | |
201 0xa3, 0xa4, 0xa9, 0xaa, 0xad, 0xb2, 0xb5, 0xb9, | |
202 0xba, 0xbb, 0xbd, 0xbe, 0xc4, 0xc6, 0xe4, 0xe8, | |
203 0xe9, 0x01, 0x87, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, | |
204 0x8f, 0x93, 0x95, 0x96, 0x97, 0x98, 0x9b, 0x9d, | |
205 0x9e, 0xa5, 0xa6, 0xa8, 0xae, 0xaf, 0xb4, 0xb6, | |
206 0xb7, 0xbc, 0xbf, 0xc5, 0xe7, 0xef, 0x09, 0x8e, | |
207 0x90, 0x91, 0x94, 0x9f, 0xab, 0xce, 0xd7, 0xe1, | |
208 0xec, 0xed, 0xc7, 0xcf, 0xea, 0xeb, 0xc0, 0xc1, | |
209 0xc8, 0xc9, 0xca, 0xcd, 0xd2, 0xd5, 0xda, 0xdb, | |
210 0xee, 0xf0, 0xf2, 0xf3, 0xff, 0xcb, 0xcc, 0xd3, | |
211 0xd4, 0xd6, 0xdd, 0xde, 0xdf, 0xf1, 0xf4, 0xf5, | |
212 0xf6, 0xf7, 0xf8, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, | |
213 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0b, | |
214 0x0c, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, | |
215 0x15, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, | |
216 0x1e, 0x1f, 0x7f, 0xdc, 0xf9, 0x0a, 0x0d, 0x16, | |
217 }; | |
218 // clang-format on | |
219 | |
220 constexpr size_t kShortCodeTableSize = 124; | |
221 struct ShortCodeInfo { | |
222 uint8_t symbol; | |
223 uint8_t length; | |
224 } kShortCodeTable[kShortCodeTableSize] = { | |
225 {0x30, 5}, // Match: 0b0000000, Symbol: 0 | |
226 {0x30, 5}, // Match: 0b0000001, Symbol: 0 | |
227 {0x30, 5}, // Match: 0b0000010, Symbol: 0 | |
228 {0x30, 5}, // Match: 0b0000011, Symbol: 0 | |
229 {0x31, 5}, // Match: 0b0000100, Symbol: 1 | |
230 {0x31, 5}, // Match: 0b0000101, Symbol: 1 | |
231 {0x31, 5}, // Match: 0b0000110, Symbol: 1 | |
232 {0x31, 5}, // Match: 0b0000111, Symbol: 1 | |
233 {0x32, 5}, // Match: 0b0001000, Symbol: 2 | |
234 {0x32, 5}, // Match: 0b0001001, Symbol: 2 | |
235 {0x32, 5}, // Match: 0b0001010, Symbol: 2 | |
236 {0x32, 5}, // Match: 0b0001011, Symbol: 2 | |
237 {0x61, 5}, // Match: 0b0001100, Symbol: a | |
238 {0x61, 5}, // Match: 0b0001101, Symbol: a | |
239 {0x61, 5}, // Match: 0b0001110, Symbol: a | |
240 {0x61, 5}, // Match: 0b0001111, Symbol: a | |
241 {0x63, 5}, // Match: 0b0010000, Symbol: c | |
242 {0x63, 5}, // Match: 0b0010001, Symbol: c | |
243 {0x63, 5}, // Match: 0b0010010, Symbol: c | |
244 {0x63, 5}, // Match: 0b0010011, Symbol: c | |
245 {0x65, 5}, // Match: 0b0010100, Symbol: e | |
246 {0x65, 5}, // Match: 0b0010101, Symbol: e | |
247 {0x65, 5}, // Match: 0b0010110, Symbol: e | |
248 {0x65, 5}, // Match: 0b0010111, Symbol: e | |
249 {0x69, 5}, // Match: 0b0011000, Symbol: i | |
250 {0x69, 5}, // Match: 0b0011001, Symbol: i | |
251 {0x69, 5}, // Match: 0b0011010, Symbol: i | |
252 {0x69, 5}, // Match: 0b0011011, Symbol: i | |
253 {0x6f, 5}, // Match: 0b0011100, Symbol: o | |
254 {0x6f, 5}, // Match: 0b0011101, Symbol: o | |
255 {0x6f, 5}, // Match: 0b0011110, Symbol: o | |
256 {0x6f, 5}, // Match: 0b0011111, Symbol: o | |
257 {0x73, 5}, // Match: 0b0100000, Symbol: s | |
258 {0x73, 5}, // Match: 0b0100001, Symbol: s | |
259 {0x73, 5}, // Match: 0b0100010, Symbol: s | |
260 {0x73, 5}, // Match: 0b0100011, Symbol: s | |
261 {0x74, 5}, // Match: 0b0100100, Symbol: t | |
262 {0x74, 5}, // Match: 0b0100101, Symbol: t | |
263 {0x74, 5}, // Match: 0b0100110, Symbol: t | |
264 {0x74, 5}, // Match: 0b0100111, Symbol: t | |
265 {0x20, 6}, // Match: 0b0101000, Symbol: (space) | |
266 {0x20, 6}, // Match: 0b0101001, Symbol: (space) | |
267 {0x25, 6}, // Match: 0b0101010, Symbol: % | |
268 {0x25, 6}, // Match: 0b0101011, Symbol: % | |
269 {0x2d, 6}, // Match: 0b0101100, Symbol: - | |
270 {0x2d, 6}, // Match: 0b0101101, Symbol: - | |
271 {0x2e, 6}, // Match: 0b0101110, Symbol: . | |
272 {0x2e, 6}, // Match: 0b0101111, Symbol: . | |
273 {0x2f, 6}, // Match: 0b0110000, Symbol: / | |
274 {0x2f, 6}, // Match: 0b0110001, Symbol: / | |
275 {0x33, 6}, // Match: 0b0110010, Symbol: 3 | |
276 {0x33, 6}, // Match: 0b0110011, Symbol: 3 | |
277 {0x34, 6}, // Match: 0b0110100, Symbol: 4 | |
278 {0x34, 6}, // Match: 0b0110101, Symbol: 4 | |
279 {0x35, 6}, // Match: 0b0110110, Symbol: 5 | |
280 {0x35, 6}, // Match: 0b0110111, Symbol: 5 | |
281 {0x36, 6}, // Match: 0b0111000, Symbol: 6 | |
282 {0x36, 6}, // Match: 0b0111001, Symbol: 6 | |
283 {0x37, 6}, // Match: 0b0111010, Symbol: 7 | |
284 {0x37, 6}, // Match: 0b0111011, Symbol: 7 | |
285 {0x38, 6}, // Match: 0b0111100, Symbol: 8 | |
286 {0x38, 6}, // Match: 0b0111101, Symbol: 8 | |
287 {0x39, 6}, // Match: 0b0111110, Symbol: 9 | |
288 {0x39, 6}, // Match: 0b0111111, Symbol: 9 | |
289 {0x3d, 6}, // Match: 0b1000000, Symbol: = | |
290 {0x3d, 6}, // Match: 0b1000001, Symbol: = | |
291 {0x41, 6}, // Match: 0b1000010, Symbol: A | |
292 {0x41, 6}, // Match: 0b1000011, Symbol: A | |
293 {0x5f, 6}, // Match: 0b1000100, Symbol: _ | |
294 {0x5f, 6}, // Match: 0b1000101, Symbol: _ | |
295 {0x62, 6}, // Match: 0b1000110, Symbol: b | |
296 {0x62, 6}, // Match: 0b1000111, Symbol: b | |
297 {0x64, 6}, // Match: 0b1001000, Symbol: d | |
298 {0x64, 6}, // Match: 0b1001001, Symbol: d | |
299 {0x66, 6}, // Match: 0b1001010, Symbol: f | |
300 {0x66, 6}, // Match: 0b1001011, Symbol: f | |
301 {0x67, 6}, // Match: 0b1001100, Symbol: g | |
302 {0x67, 6}, // Match: 0b1001101, Symbol: g | |
303 {0x68, 6}, // Match: 0b1001110, Symbol: h | |
304 {0x68, 6}, // Match: 0b1001111, Symbol: h | |
305 {0x6c, 6}, // Match: 0b1010000, Symbol: l | |
306 {0x6c, 6}, // Match: 0b1010001, Symbol: l | |
307 {0x6d, 6}, // Match: 0b1010010, Symbol: m | |
308 {0x6d, 6}, // Match: 0b1010011, Symbol: m | |
309 {0x6e, 6}, // Match: 0b1010100, Symbol: n | |
310 {0x6e, 6}, // Match: 0b1010101, Symbol: n | |
311 {0x70, 6}, // Match: 0b1010110, Symbol: p | |
312 {0x70, 6}, // Match: 0b1010111, Symbol: p | |
313 {0x72, 6}, // Match: 0b1011000, Symbol: r | |
314 {0x72, 6}, // Match: 0b1011001, Symbol: r | |
315 {0x75, 6}, // Match: 0b1011010, Symbol: u | |
316 {0x75, 6}, // Match: 0b1011011, Symbol: u | |
317 {0x3a, 7}, // Match: 0b1011100, Symbol: : | |
318 {0x42, 7}, // Match: 0b1011101, Symbol: B | |
319 {0x43, 7}, // Match: 0b1011110, Symbol: C | |
320 {0x44, 7}, // Match: 0b1011111, Symbol: D | |
321 {0x45, 7}, // Match: 0b1100000, Symbol: E | |
322 {0x46, 7}, // Match: 0b1100001, Symbol: F | |
323 {0x47, 7}, // Match: 0b1100010, Symbol: G | |
324 {0x48, 7}, // Match: 0b1100011, Symbol: H | |
325 {0x49, 7}, // Match: 0b1100100, Symbol: I | |
326 {0x4a, 7}, // Match: 0b1100101, Symbol: J | |
327 {0x4b, 7}, // Match: 0b1100110, Symbol: K | |
328 {0x4c, 7}, // Match: 0b1100111, Symbol: L | |
329 {0x4d, 7}, // Match: 0b1101000, Symbol: M | |
330 {0x4e, 7}, // Match: 0b1101001, Symbol: N | |
331 {0x4f, 7}, // Match: 0b1101010, Symbol: O | |
332 {0x50, 7}, // Match: 0b1101011, Symbol: P | |
333 {0x51, 7}, // Match: 0b1101100, Symbol: Q | |
334 {0x52, 7}, // Match: 0b1101101, Symbol: R | |
335 {0x53, 7}, // Match: 0b1101110, Symbol: S | |
336 {0x54, 7}, // Match: 0b1101111, Symbol: T | |
337 {0x55, 7}, // Match: 0b1110000, Symbol: U | |
338 {0x56, 7}, // Match: 0b1110001, Symbol: V | |
339 {0x57, 7}, // Match: 0b1110010, Symbol: W | |
340 {0x59, 7}, // Match: 0b1110011, Symbol: Y | |
341 {0x6a, 7}, // Match: 0b1110100, Symbol: j | |
342 {0x6b, 7}, // Match: 0b1110101, Symbol: k | |
343 {0x71, 7}, // Match: 0b1110110, Symbol: q | |
344 {0x76, 7}, // Match: 0b1110111, Symbol: v | |
345 {0x77, 7}, // Match: 0b1111000, Symbol: w | |
346 {0x78, 7}, // Match: 0b1111001, Symbol: x | |
347 {0x79, 7}, // Match: 0b1111010, Symbol: y | |
348 {0x7a, 7}, // Match: 0b1111011, Symbol: z | |
349 }; | |
350 | |
351 } // namespace | |
352 | |
353 HuffmanBitBuffer::HuffmanBitBuffer() { | |
354 Reset(); | |
355 } | |
356 | |
357 void HuffmanBitBuffer::Reset() { | |
358 accumulator_ = 0; | |
359 count_ = 0; | |
360 } | |
361 | |
362 size_t HuffmanBitBuffer::AppendBytes(StringPiece input) { | |
363 HuffmanAccumulatorBitCount free_cnt = free_count(); | |
364 size_t bytes_available = input.size(); | |
365 if (free_cnt < 8 || bytes_available == 0) { | |
366 return 0; | |
367 } | |
368 | |
369 // Top up |accumulator_| until there isn't room for a whole byte. | |
370 size_t bytes_used = 0; | |
371 auto ptr = reinterpret_cast<const uint8_t*>(input.data()); | |
372 do { | |
373 auto b = static_cast<HuffmanAccumulator>(*ptr++); | |
374 free_cnt -= 8; | |
375 accumulator_ |= (b << free_cnt); | |
376 ++bytes_used; | |
377 } while (free_cnt >= 8 && bytes_used < bytes_available); | |
378 count_ += (bytes_used * 8); | |
379 return bytes_used; | |
380 } | |
381 | |
382 HuffmanAccumulatorBitCount HuffmanBitBuffer::free_count() const { | |
383 return kHuffmanAccumulatorBitCount - count_; | |
384 } | |
385 | |
386 void HuffmanBitBuffer::ConsumeBits(HuffmanAccumulatorBitCount code_length) { | |
387 DCHECK_LE(code_length, count_); | |
388 accumulator_ <<= code_length; | |
389 count_ -= code_length; | |
390 } | |
391 | |
392 bool HuffmanBitBuffer::InputProperlyTerminated() const { | |
393 auto cnt = count(); | |
394 if (cnt < 8) { | |
395 if (cnt == 0) { | |
396 return true; | |
397 } | |
398 HuffmanAccumulator expected = ~(~HuffmanAccumulator() >> cnt); | |
399 // We expect all the bits below the high order |cnt| bits of accumulator_ | |
400 // to be cleared as we perform left shift operations while decoding. | |
401 DCHECK_EQ(accumulator_ & ~expected, 0u) | |
402 << "\n expected: " << HuffmanAccumulatorBitSet(expected) << "\n " | |
403 << *this; | |
404 return accumulator_ == expected; | |
405 } | |
406 return false; | |
407 } | |
408 | |
409 string HuffmanBitBuffer::DebugString() const { | |
410 std::stringstream ss; | |
411 ss << "{accumulator: " << HuffmanAccumulatorBitSet(accumulator_) | |
412 << "; count: " << count_ << "}"; | |
413 return ss.str(); | |
414 } | |
415 | |
416 HpackHuffmanDecoder::HpackHuffmanDecoder() {} | |
417 | |
418 HpackHuffmanDecoder::~HpackHuffmanDecoder() {} | |
419 | |
420 bool HpackHuffmanDecoder::Decode(StringPiece input, string* output) { | |
421 return DecodeShortCodesFirst(input, output); | |
422 } | |
423 | |
424 // "Legacy" decoder, used until cl/129771019 submitted, which added | |
425 // DecodeShortCodesFirst() as primary decoder method. | |
426 // TODO(jamessynge): Remove this once satisfied that there is no going back. | |
427 bool HpackHuffmanDecoder::DecodeWithIfTreeAndStruct(StringPiece input, | |
428 string* output) { | |
429 DVLOG(1) << "HpackHuffmanDecoder::DecodeWithIfTreeAndStruct"; | |
430 | |
431 // Fill bit_buffer_ from input. | |
432 input.remove_prefix(bit_buffer_.AppendBytes(input)); | |
433 | |
434 while (true) { | |
435 DVLOG(3) << "Enter Decode Loop, bit_buffer_: " << bit_buffer_; | |
436 | |
437 HuffmanCode code_prefix = bit_buffer_.value() >> kExtraAccumulatorBitCount; | |
438 DVLOG(3) << "code_prefix: " << HuffmanCodeBitSet(code_prefix); | |
439 | |
440 PrefixInfo prefix_info = PrefixToInfo(code_prefix); | |
441 DVLOG(3) << "prefix_info: " << prefix_info; | |
442 DCHECK_LE(kMinCodeBitCount, prefix_info.code_length); | |
443 DCHECK_LE(prefix_info.code_length, kMaxCodeBitCount); | |
444 | |
445 if (prefix_info.code_length <= bit_buffer_.count()) { | |
446 // We have enough bits for one code. | |
447 uint32_t canonical = prefix_info.DecodeToCanonical(code_prefix); | |
448 if (canonical < 256) { | |
449 // Valid code. | |
450 char c = kCanonicalToSymbol[canonical]; | |
451 output->push_back(c); | |
452 bit_buffer_.ConsumeBits(prefix_info.code_length); | |
453 continue; | |
454 } | |
455 // Encoder is not supposed to explicity encode the EOS symbol. | |
456 DLOG(ERROR) << "EOS explicitly encoded!\n " << bit_buffer_ << "\n " | |
457 << prefix_info; | |
458 return false; | |
459 } | |
460 // bit_buffer_ doesn't have enough bits in it to decode the next symbol. | |
461 // Append to it as many bytes as are available AND fit. | |
462 size_t byte_count = bit_buffer_.AppendBytes(input); | |
463 if (byte_count == 0) { | |
464 DCHECK_EQ(input.size(), 0u); | |
465 return true; | |
466 } | |
467 input.remove_prefix(byte_count); | |
468 } | |
469 } | |
470 | |
471 bool HpackHuffmanDecoder::DecodeShortCodesFirst(StringPiece input, | |
472 string* output) { | |
473 DVLOG(1) << "HpackHuffmanDecoder::DecodeShortCodesFirst"; | |
474 | |
475 // Fill bit_buffer_ from input. | |
476 input.remove_prefix(bit_buffer_.AppendBytes(input)); | |
477 | |
478 while (true) { | |
479 DVLOG(3) << "Enter Decode Loop, bit_buffer_: " << bit_buffer_; | |
480 if (bit_buffer_.count() >= 7) { | |
481 // Get high 7 bits of the bit buffer, see if that contains a complete | |
482 // code of 5, 6 or 7 bits. | |
483 uint8_t short_code = | |
484 bit_buffer_.value() >> (kHuffmanAccumulatorBitCount - 7); | |
485 DCHECK_LT(short_code, 128); | |
486 if (short_code < kShortCodeTableSize) { | |
487 ShortCodeInfo info = kShortCodeTable[short_code]; | |
488 bit_buffer_.ConsumeBits(info.length); | |
489 output->push_back(static_cast<char>(info.symbol)); | |
490 continue; | |
491 } | |
492 // The code is more than 7 bits long. Use PrefixToInfo, etc. to decode | |
493 // longer codes. | |
494 } else { | |
495 // We may have (mostly) drained bit_buffer_. If we can top it up, try | |
496 // using the table decoder above. | |
497 size_t byte_count = bit_buffer_.AppendBytes(input); | |
498 if (byte_count > 0) { | |
499 input.remove_prefix(byte_count); | |
500 continue; | |
501 } | |
502 } | |
503 | |
504 HuffmanCode code_prefix = bit_buffer_.value() >> kExtraAccumulatorBitCount; | |
505 DVLOG(3) << "code_prefix: " << HuffmanCodeBitSet(code_prefix); | |
506 | |
507 PrefixInfo prefix_info = PrefixToInfo(code_prefix); | |
508 DVLOG(3) << "prefix_info: " << prefix_info; | |
509 DCHECK_LE(kMinCodeBitCount, prefix_info.code_length); | |
510 DCHECK_LE(prefix_info.code_length, kMaxCodeBitCount); | |
511 | |
512 if (prefix_info.code_length <= bit_buffer_.count()) { | |
513 // We have enough bits for one code. | |
514 uint32_t canonical = prefix_info.DecodeToCanonical(code_prefix); | |
515 if (canonical < 256) { | |
516 // Valid code. | |
517 char c = kCanonicalToSymbol[canonical]; | |
518 output->push_back(c); | |
519 bit_buffer_.ConsumeBits(prefix_info.code_length); | |
520 continue; | |
521 } | |
522 // Encoder is not supposed to explicity encode the EOS symbol. | |
523 DLOG(ERROR) << "EOS explicitly encoded!\n " << bit_buffer_ << "\n " | |
524 << prefix_info; | |
525 return false; | |
526 } | |
527 // bit_buffer_ doesn't have enough bits in it to decode the next symbol. | |
528 // Append to it as many bytes as are available AND fit. | |
529 size_t byte_count = bit_buffer_.AppendBytes(input); | |
530 if (byte_count == 0) { | |
531 DCHECK_EQ(input.size(), 0u); | |
532 return true; | |
533 } | |
534 input.remove_prefix(byte_count); | |
535 } | |
536 } | |
537 | |
538 string HpackHuffmanDecoder::DebugString() const { | |
539 return bit_buffer_.DebugString(); | |
540 } | |
541 | |
542 } // namespace net | |
OLD | NEW |