OLD | NEW |
| (Empty) |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #ifndef NET_HTTP2_HPACK_HUFFMAN_HTTP2_HPACK_HUFFMAN_DECODER_H_ | |
6 #define NET_HTTP2_HPACK_HUFFMAN_HTTP2_HPACK_HUFFMAN_DECODER_H_ | |
7 | |
8 // HpackHuffmanDecoder is an incremental decoder of strings that have been | |
9 // encoded using the Huffman table defined in the HPACK spec. | |
10 // By incremental, we mean that the HpackHuffmanDecoder::Decode method does | |
11 // not require the entire string to be provided, and can instead decode the | |
12 // string as fragments of it become available (e.g. as HPACK block fragments | |
13 // are received for decoding by HpackEntryDecoder). | |
14 | |
15 #include <stddef.h> | |
16 | |
17 #include <iosfwd> | |
18 #include <string> | |
19 | |
20 #include "base/strings/string_piece.h" | |
21 #include "net/base/net_export.h" | |
22 | |
23 namespace net { | |
24 | |
25 // HuffmanAccumulator is used to store bits during decoding, e.g. next N bits | |
26 // that have not yet been decoded, but have been extracted from the encoded | |
27 // string). An advantage of using a uint64 for the accumulator | |
28 // is that it has room for the bits of the longest code plus the bits of a full | |
29 // byte; that means that when adding more bits to the accumulator, it can always | |
30 // be done in whole bytes. For example, if we currently have 26 bits in the | |
31 // accumulator, and need more to decode the current symbol, we can add a whole | |
32 // byte to the accumulator, and not have to do juggling with adding 6 bits (to | |
33 // reach 30), and then keep track of the last two bits we've not been able to | |
34 // add to the accumulator. | |
35 typedef uint64_t HuffmanAccumulator; | |
36 typedef size_t HuffmanAccumulatorBitCount; | |
37 | |
38 // HuffmanBitBuffer stores the leading edge of bits to be decoded. The high | |
39 // order bit of accumulator_ is the next bit to be decoded. | |
40 class NET_EXPORT_PRIVATE HuffmanBitBuffer { | |
41 public: | |
42 HuffmanBitBuffer(); | |
43 | |
44 // Prepare for decoding a new Huffman encoded string. | |
45 void Reset(); | |
46 | |
47 // Add as many whole bytes to the accumulator (accumulator_) as possible, | |
48 // returning the number of bytes added. | |
49 size_t AppendBytes(base::StringPiece input); | |
50 | |
51 // Get the bits of the accumulator. | |
52 HuffmanAccumulator value() const { return accumulator_; } | |
53 | |
54 // Number of bits of the encoded string that are in the accumulator | |
55 // (accumulator_). | |
56 HuffmanAccumulatorBitCount count() const { return count_; } | |
57 | |
58 // Are there no bits in the accumulator? | |
59 bool IsEmpty() const { return count_ == 0; } | |
60 | |
61 // Number of additional bits that can be added to the accumulator. | |
62 HuffmanAccumulatorBitCount free_count() const; | |
63 | |
64 // Consume the leading |code_length| bits of the accumulator. | |
65 void ConsumeBits(HuffmanAccumulatorBitCount code_length); | |
66 | |
67 // Are the contents valid for the end of a Huffman encoded string? The RFC | |
68 // states that EOS (end-of-string) symbol must not be explicitly encoded in | |
69 // the bit stream, but any unused bits in the final byte must be set to the | |
70 // prefix of the EOS symbol, which is all 1 bits. So there can be at most 7 | |
71 // such bits. | |
72 // Returns true if the bit buffer is empty, or contains at most 7 bits, all | |
73 // of them 1. Otherwise returns false. | |
74 bool InputProperlyTerminated() const; | |
75 | |
76 std::string DebugString() const; | |
77 | |
78 private: | |
79 HuffmanAccumulator accumulator_; | |
80 HuffmanAccumulatorBitCount count_; | |
81 }; | |
82 | |
83 inline std::ostream& operator<<(std::ostream& out, const HuffmanBitBuffer& v) { | |
84 return out << v.DebugString(); | |
85 } | |
86 | |
87 class NET_EXPORT_PRIVATE HpackHuffmanDecoder { | |
88 public: | |
89 HpackHuffmanDecoder(); | |
90 ~HpackHuffmanDecoder(); | |
91 | |
92 // Prepare for decoding a new Huffman encoded string. | |
93 void Reset() { bit_buffer_.Reset(); } | |
94 | |
95 // Decode the portion of a HPACK Huffman encoded string that is in |input|, | |
96 // appending the decoded symbols into |*output|, stopping when more bits are | |
97 // needed to determine the next symbol, which/ means that the input has been | |
98 // drained, and also that the bit_buffer_ is empty or that the bits that are | |
99 // in it are not a whole symbol. | |
100 // If |input| is the start of a string, the caller must first call Reset. | |
101 // If |input| includes the end of the encoded string, the caller must call | |
102 // InputProperlyTerminated after Decode has returned true in order to | |
103 // determine if the encoded string was properly terminated. | |
104 // Returns false if something went wrong (e.g. the encoding contains the code | |
105 // EOS symbol). Otherwise returns true, in which case input has been fully | |
106 // decoded or buffered; in particular, if the low-order bit of the final byte | |
107 // of the input is not the last bit of an encoded symbol, then bit_buffer_ | |
108 // will contain the leading bits of the code for that symbol, but not the | |
109 // final bits of that code. | |
110 // Note that output should be empty, but that it is not cleared by Decode(). | |
111 bool Decode(base::StringPiece input, std::string* output); | |
112 | |
113 // Is what remains in the bit_buffer_ valid at the end of an encoded string? | |
114 // Call after passing the the final portion of a Huffman string to Decode, | |
115 // and getting true as the result. | |
116 bool InputProperlyTerminated() const { | |
117 return bit_buffer_.InputProperlyTerminated(); | |
118 } | |
119 | |
120 bool IsEmptyForTest() const { return bit_buffer_.IsEmpty(); } | |
121 | |
122 std::string DebugString() const; | |
123 | |
124 ////////////////////////////////////////////////////////////////////////////// | |
125 // Alternate implementations of Decode: | |
126 | |
127 // As above, implemented using a tree of if statements to determine the code | |
128 // length, etc., which are returned as a tree. See PrefixToInfo. This is the | |
129 // original implementation, current as of 2016-8-8. | |
130 bool DecodeWithIfTreeAndStruct(base::StringPiece input, std::string* output); | |
131 | |
132 // Based on DecodeWithIfTreeAndStruct, but adds an optimization for the common | |
133 // case of short codes (5, 6 or 7), which make up a large fraction of the | |
134 // frequency distribution on which the HPACK table was based. | |
135 // TODO(jamessynge): Be precise about that fraction. | |
136 bool DecodeShortCodesFirst(base::StringPiece input, std::string* output); | |
137 | |
138 private: | |
139 HuffmanBitBuffer bit_buffer_; | |
140 }; | |
141 | |
142 inline std::ostream& operator<<(std::ostream& out, | |
143 const HpackHuffmanDecoder& v) { | |
144 return out << v.DebugString(); | |
145 } | |
146 | |
147 } // namespace net | |
148 | |
149 #endif // NET_HTTP2_HPACK_HUFFMAN_HTTP2_HPACK_HUFFMAN_DECODER_H_ | |
OLD | NEW |