Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(306)

Side by Side Diff: net/spdy/hpack/hpack_huffman_decoder.cc

Issue 2832973003: Split net/spdy into core and chromium subdirectories. (Closed)
Patch Set: Fix some more build rules. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « net/spdy/hpack/hpack_huffman_decoder.h ('k') | net/spdy/hpack/hpack_huffman_decoder_test.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Decoder for strings encoded using the HPACK Huffman Code (see
6 // https://httpwg.github.io/specs/rfc7541.html#huffman.code).
7 //
8 // This implementation is inspired by the One-Shift algorithm described in
9 // "On the Implementation of Minimum Redundancy Prefix Codes", by Alistair
10 // Moffat and Andrew Turpin, 1997.
11 // See also https://en.wikipedia.org/wiki/Canonical_Huffman_code for background
12 // on canonical Huffman codes.
13 //
14 // This decoder differs from that in .../spdy/hpack/hpack_huffman_table.cc
15 // as follows:
16 // 1) It decodes only the code described in RFC7541, where as the older
17 // implementation supported any canonical Huffman code provided at run
18 // time.
19 // 2) It uses a fixed amount of memory allocated at build time; it doesn't
20 // construct a tree of of decoding tables based on an encoding
21 // table provided at run time.
22 // 3) In benchmarks it runs from 10% to 70% faster, based on the length
23 // of the strings (faster for longer strings). Some of the improvements
24 // could be back ported, but others are fundamental to the approach.
25
26 #include "net/spdy/hpack/hpack_huffman_decoder.h"
27
28 #include <bitset>
29 #include <limits>
30 #include <utility>
31
32 #include "base/logging.h"
33 #include "net/spdy/hpack/hpack_input_stream.h"
34
35 namespace net {
36 namespace {
37
38 typedef HpackHuffmanDecoder::HuffmanWord HuffmanWord;
39 typedef HpackHuffmanDecoder::HuffmanCodeLength HuffmanCodeLength;
40
41 const HuffmanCodeLength kHuffmanWordLength =
42 std::numeric_limits<HuffmanWord>::digits;
43
44 const HuffmanCodeLength kMinCodeLength = 5;
45 const HuffmanCodeLength kMaxCodeLength = 30;
46
47 const HuffmanWord kInvalidLJCode = ~static_cast<HuffmanWord>(0);
48 // Length of a code in bits to the first code with that length, left-justified.
49 // Note that this can be computed from kLengthToFirstCanonical.
50 const HuffmanWord kLengthToFirstLJCode[] = {
51 kInvalidLJCode, // There are no codes of length 0.
52 kInvalidLJCode, // There are no codes of length 1.
53 kInvalidLJCode, // There are no codes of length 2.
54 kInvalidLJCode, // There are no codes of length 3.
55 kInvalidLJCode, // There are no codes of length 4.
56 0x00000000, // Length 5.
57 0x50000000, // Length 6.
58 0xb8000000, // Length 7.
59 0xf8000000, // Length 8.
60 kInvalidLJCode, // There are no codes of length 9.
61 0xfe000000, // Length 10.
62 0xff400000, // Length 11.
63 0xffa00000, // Length 12.
64 0xffc00000, // Length 13.
65 0xfff00000, // Length 14.
66 0xfff80000, // Length 15.
67 kInvalidLJCode, // There are no codes of length 16.
68 kInvalidLJCode, // There are no codes of length 17.
69 kInvalidLJCode, // There are no codes of length 18.
70 0xfffe0000, // Length 19.
71 0xfffe6000, // Length 20.
72 0xfffee000, // Length 21.
73 0xffff4800, // Length 22.
74 0xffffb000, // Length 23.
75 0xffffea00, // Length 24.
76 0xfffff600, // Length 25.
77 0xfffff800, // Length 26.
78 0xfffffbc0, // Length 27.
79 0xfffffe20, // Length 28.
80 kInvalidLJCode, // There are no codes of length 29.
81 0xfffffff0, // Length 30.
82 };
83
84 // TODO(jamessynge): Determine the performance impact of different types for
85 // the elements of this array (i.e. a larger type uses more cache, yet might
86 // better on some architectures).
87 const uint8_t kInvalidCanonical = 255;
88 // Maps from length of a code to the first 'canonical symbol' with that length.
89 const uint8_t kLengthToFirstCanonical[] = {
90 kInvalidCanonical, // Length 0, 0 codes.
91 kInvalidCanonical, // Length 1, 0 codes.
92 kInvalidCanonical, // Length 2, 0 codes.
93 kInvalidCanonical, // Length 3, 0 codes.
94 kInvalidCanonical, // Length 4, 0 codes.
95 0, // Length 5, 10 codes.
96 10, // Length 6, 26 codes.
97 36, // Length 7, 32 codes.
98 68, // Length 8, 6 codes.
99 kInvalidCanonical, // Length 9, 0 codes.
100 74, // Length 10, 5 codes.
101 79, // Length 11, 3 codes.
102 82, // Length 12, 2 codes.
103 84, // Length 13, 6 codes.
104 90, // Length 14, 2 codes.
105 92, // Length 15, 3 codes.
106 kInvalidCanonical, // Length 16, 0 codes.
107 kInvalidCanonical, // Length 17, 0 codes.
108 kInvalidCanonical, // Length 18, 0 codes.
109 95, // Length 19, 3 codes.
110 98, // Length 20, 8 codes.
111 106, // Length 21, 13 codes.
112 119, // Length 22, 26 codes.
113 145, // Length 23, 29 codes.
114 174, // Length 24, 12 codes.
115 186, // Length 25, 4 codes.
116 190, // Length 26, 15 codes.
117 205, // Length 27, 19 codes.
118 224, // Length 28, 29 codes.
119 kInvalidCanonical, // Length 29, 0 codes.
120 253, // Length 30, 4 codes.
121 };
122
123 // Mapping from canonical symbol (0 to 255) to actual symbol.
124 // clang-format off
125 const uint8_t kCanonicalToSymbol[] = {
126 '0', '1', '2', 'a', 'c', 'e', 'i', 'o',
127 's', 't', 0x20, '%', '-', '.', '/', '3',
128 '4', '5', '6', '7', '8', '9', '=', 'A',
129 '_', 'b', 'd', 'f', 'g', 'h', 'l', 'm',
130 'n', 'p', 'r', 'u', ':', 'B', 'C', 'D',
131 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
132 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
133 'U', 'V', 'W', 'Y', 'j', 'k', 'q', 'v',
134 'w', 'x', 'y', 'z', '&', '*', ',', ';',
135 'X', 'Z', '!', '\"', '(', ')', '?', '\'',
136 '+', '|', '#', '>', 0x00, '$', '@', '[',
137 ']', '~', '^', '}', '<', '`', '{', '\\',
138 0xc3, 0xd0, 0x80, 0x82, 0x83, 0xa2, 0xb8, 0xc2,
139 0xe0, 0xe2, 0x99, 0xa1, 0xa7, 0xac, 0xb0, 0xb1,
140 0xb3, 0xd1, 0xd8, 0xd9, 0xe3, 0xe5, 0xe6, 0x81,
141 0x84, 0x85, 0x86, 0x88, 0x92, 0x9a, 0x9c, 0xa0,
142 0xa3, 0xa4, 0xa9, 0xaa, 0xad, 0xb2, 0xb5, 0xb9,
143 0xba, 0xbb, 0xbd, 0xbe, 0xc4, 0xc6, 0xe4, 0xe8,
144 0xe9, 0x01, 0x87, 0x89, 0x8a, 0x8b, 0x8c, 0x8d,
145 0x8f, 0x93, 0x95, 0x96, 0x97, 0x98, 0x9b, 0x9d,
146 0x9e, 0xa5, 0xa6, 0xa8, 0xae, 0xaf, 0xb4, 0xb6,
147 0xb7, 0xbc, 0xbf, 0xc5, 0xe7, 0xef, 0x09, 0x8e,
148 0x90, 0x91, 0x94, 0x9f, 0xab, 0xce, 0xd7, 0xe1,
149 0xec, 0xed, 0xc7, 0xcf, 0xea, 0xeb, 0xc0, 0xc1,
150 0xc8, 0xc9, 0xca, 0xcd, 0xd2, 0xd5, 0xda, 0xdb,
151 0xee, 0xf0, 0xf2, 0xf3, 0xff, 0xcb, 0xcc, 0xd3,
152 0xd4, 0xd6, 0xdd, 0xde, 0xdf, 0xf1, 0xf4, 0xf5,
153 0xf6, 0xf7, 0xf8, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe,
154 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0b,
155 0x0c, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
156 0x15, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
157 0x1e, 0x1f, 0x7f, 0xdc, 0xf9, 0x0a, 0x0d, 0x16,
158 };
159 // clang-format on
160
161 #if DCHECK_IS_ON()
162
163 // Only used in DLOG.
164 bool IsEOSPrefix(HuffmanWord bits, HuffmanCodeLength bits_available) {
165 if (bits_available == 0) {
166 return true;
167 }
168 // We expect all the bits below the high order |bits_available| bits
169 // to be cleared.
170 HuffmanWord expected = HuffmanWord(0xffffffff) << (32 - bits_available);
171 return bits == expected;
172 }
173
174 #endif // DCHECK_IS_ON()
175
176 } // namespace
177
178 // TODO(jamessynge): Should we read these magic numbers from
179 // kLengthToFirstLJCode? Would that reduce cache consumption? Slow decoding?
180 // TODO(jamessynge): Is this being inlined by the compiler? Should we inline
181 // into DecodeString the tests for code lengths 5 through 8 (> 99% of codes
182 // according to the HPACK spec)?
183 HpackHuffmanDecoder::HuffmanCodeLength HpackHuffmanDecoder::CodeLengthOfPrefix(
184 HpackHuffmanDecoder::HuffmanWord value) {
185 HuffmanCodeLength length;
186 if (value < 0xb8000000) {
187 if (value < 0x50000000) {
188 length = 5;
189 } else {
190 length = 6;
191 }
192 } else {
193 if (value < 0xfe000000) {
194 if (value < 0xf8000000) {
195 length = 7;
196 } else {
197 length = 8;
198 }
199 } else {
200 if (value < 0xffc00000) {
201 if (value < 0xffa00000) {
202 if (value < 0xff400000) {
203 length = 10;
204 } else {
205 length = 11;
206 }
207 } else {
208 length = 12;
209 }
210 } else {
211 if (value < 0xfffe0000) {
212 if (value < 0xfff80000) {
213 if (value < 0xfff00000) {
214 length = 13;
215 } else {
216 length = 14;
217 }
218 } else {
219 length = 15;
220 }
221 } else {
222 if (value < 0xffff4800) {
223 if (value < 0xfffee000) {
224 if (value < 0xfffe6000) {
225 length = 19;
226 } else {
227 length = 20;
228 }
229 } else {
230 length = 21;
231 }
232 } else {
233 if (value < 0xffffea00) {
234 if (value < 0xffffb000) {
235 length = 22;
236 } else {
237 length = 23;
238 }
239 } else {
240 if (value < 0xfffffbc0) {
241 if (value < 0xfffff800) {
242 if (value < 0xfffff600) {
243 length = 24;
244 } else {
245 length = 25;
246 }
247 } else {
248 length = 26;
249 }
250 } else {
251 if (value < 0xfffffff0) {
252 if (value < 0xfffffe20) {
253 length = 27;
254 } else {
255 length = 28;
256 }
257 } else {
258 length = 30;
259 }
260 }
261 }
262 }
263 }
264 }
265 }
266 }
267 return length;
268 }
269
270 HuffmanWord HpackHuffmanDecoder::DecodeToCanonical(
271 HuffmanCodeLength code_length,
272 HuffmanWord bits) {
273 DCHECK_LE(kMinCodeLength, code_length);
274 DCHECK_LE(code_length, kMaxCodeLength);
275
276 // What is the first left-justified code of length |code_length|?
277 HuffmanWord first_lj_code = kLengthToFirstLJCode[code_length];
278 DCHECK_NE(kInvalidLJCode, first_lj_code);
279
280 // Which canonical symbol corresponds to the high order |code_length|
281 // bits of |first_lj_code|?
282 HuffmanWord first_canonical = kLengthToFirstCanonical[code_length];
283 DCHECK_NE(kInvalidCanonical, first_canonical);
284
285 // What is the position of the canonical symbol being decoded within
286 // the canonical symbols of length |code_length|?
287 HuffmanWord ordinal_in_length =
288 ((bits - first_lj_code) >> (kHuffmanWordLength - code_length));
289
290 // Combined these two to produce the position of the canonical symbol
291 // being decoded within all of the canonical symbols.
292 return first_canonical + ordinal_in_length;
293 }
294
295 char HpackHuffmanDecoder::CanonicalToSource(HuffmanWord canonical) {
296 DCHECK_LT(canonical, 256u);
297 return static_cast<char>(kCanonicalToSymbol[canonical]);
298 }
299
300 // TODO(jamessynge): Maybe further refactorings, including just passing in a
301 // SpdyStringPiece instead of an HpackInputStream, thus avoiding the PeekBits
302 // calls, and also allowing us to separate the code into portions dealing with
303 // long strings, and a later portion dealing with the last few bytes of strings.
304 // TODO(jamessynge): Determine if that is worth it by adding some counters to
305 // measure the distribution of string sizes seen in practice.
306 bool HpackHuffmanDecoder::DecodeString(HpackInputStream* in, SpdyString* out) {
307 out->clear();
308
309 // Load |bits| with the leading bits of the input stream, left justified
310 // (i.e. the bits of the first byte are the high-order bits of |bits|,
311 // and the bits of the fourth byte are the low-order bits of |bits|).
312 // |peeked_success| if there are more bits in |*in| (i.e. the encoding
313 // of the string to be decoded is more than 4 bytes).
314
315 auto bits_available_and_bits = in->InitializePeekBits();
316 HuffmanCodeLength bits_available = bits_available_and_bits.first;
317 HuffmanWord bits = bits_available_and_bits.second;
318
319 // |peeked_success| tracks whether the previous PeekBits call was able to
320 // store any new bits into |bits|. For the first pass through the loop below
321 // the value false is appropriate:
322 // If we have 32 bits (i.e. the input has at least 4 bytes), then:
323 // |peeked_sucess| is not examined because |code_length| is
324 // at most 30 in the HPACK Huffman Code.
325 // If we have at most 24 bits (i.e. the input has at most 3 bytes), then:
326 // It is possible that the very first |code_length| is greater than
327 // |bits_available|, in which case we need to read peeked_success to
328 // determine whether we should try to read more input, or have already
329 // loaded |bits| with the final bits of the input.
330 // After the first loop |peeked_success| has been set by a call to PeekBits.
331 bool peeked_success = false;
332
333 while (true) {
334 const HuffmanCodeLength code_length = CodeLengthOfPrefix(bits);
335 DCHECK_LE(kMinCodeLength, code_length);
336 DCHECK_LE(code_length, kMaxCodeLength);
337 DVLOG(2) << "bits: 0b" << std::bitset<32>(bits)
338 << " (avail=" << bits_available << ")"
339 << " prefix length: " << code_length
340 << (code_length > bits_available ? " *****" : "");
341 if (code_length > bits_available) {
342 if (!peeked_success) {
343 // Unable to read enough input for a match. If only a portion of
344 // the last byte remains, this is a successful EOS condition.
345 // Note that this does NOT check whether the available bits are all
346 // set to 1, which the encoder is required to set at EOS, and the
347 // decoder is required to check.
348 // TODO(jamessynge): Discuss whether we should enforce this check,
349 // as required by the RFC, presumably flag guarded so that we can
350 // disable it should it occur a lot. From my testing it appears that
351 // our encoder may be doing this wrong. Sigh.
352 // TODO(jamessynge): Add a counter for how often the remaining bits
353 // are non-zero.
354 in->ConsumeByteRemainder();
355 DLOG_IF(WARNING,
356 (in->HasMoreData() || !IsEOSPrefix(bits, bits_available)))
357 << "bits: 0b" << std::bitset<32>(bits)
358 << " (avail=" << bits_available << ")"
359 << " prefix length: " << code_length
360 << " HasMoreData: " << in->HasMoreData();
361 return !in->HasMoreData();
362 }
363 // We're dealing with a long code. It *might* be useful to add a special
364 // method to HpackInputStream for getting more than "at most 8" bits
365 // at a time.
366 do {
367 peeked_success = in->PeekBits(&bits_available, &bits);
368 } while (peeked_success && bits_available < 32);
369 } else {
370 // Convert from the prefix code of length |code_length| to the
371 // canonical symbol (i.e. where the input symbols (bytes) are ordered by
372 // increasing code length and then by their increasing uint8 value).
373 HuffmanWord canonical = DecodeToCanonical(code_length, bits);
374
375 bits = bits << code_length;
376 bits_available -= code_length;
377 in->ConsumeBits(code_length);
378
379 if (canonical < 256) {
380 out->push_back(CanonicalToSource(canonical));
381 } else {
382 // Encoder is not supposed to explicity encode the EOS symbol (30
383 // 1-bits).
384 // TODO(jamessynge): Discuss returning false here, as required by HPACK.
385 DCHECK(false) << "EOS explicitly encoded!\n"
386 << "bits: 0b" << std::bitset<32>(bits)
387 << " (avail=" << bits_available << ")"
388 << " prefix length: " << code_length
389 << " canonical: " << canonical;
390 }
391 // Get some more bits for decoding (up to 8). |peeked_success| is true
392 // if we got any bits.
393 peeked_success = in->PeekBits(&bits_available, &bits);
394 }
395 DLOG_IF(WARNING, (VLOG_IS_ON(2) && bits_available < 32 && !peeked_success))
396 << "no more peeking possible";
397 }
398 }
399
400 } // namespace net
OLDNEW
« no previous file with comments | « net/spdy/hpack/hpack_huffman_decoder.h ('k') | net/spdy/hpack/hpack_huffman_decoder_test.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698