OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #ifndef NET_HTTP2_HPACK_HUFFMAN_HTTP2_HPACK_HUFFMAN_DECODER_H_ |
| 6 #define NET_HTTP2_HPACK_HUFFMAN_HTTP2_HPACK_HUFFMAN_DECODER_H_ |
| 7 |
| 8 // HpackHuffmanDecoder is an incremental decoder of strings that have been |
| 9 // encoded using the Huffman table defined in the HPACK spec. |
| 10 // By incremental, we mean that the HpackHuffmanDecoder::Decode method does |
| 11 // not require the entire string to be provided, and can instead decode the |
| 12 // string as fragments of it become available (e.g. as HPACK block fragments |
| 13 // are received for decoding by HpackEntryDecoder). |
| 14 |
| 15 #include <stddef.h> |
| 16 |
| 17 #include <iosfwd> |
| 18 #include <string> |
| 19 |
| 20 #include "base/strings/string_piece.h" |
| 21 #include "net/base/net_export.h" |
| 22 |
| 23 namespace net { |
| 24 |
| 25 // HuffmanAccumulator is used to store bits during decoding, e.g. next N bits |
| 26 // that have not yet been decoded, but have been extracted from the encoded |
| 27 // string). An advantage of using a uint64 for the accumulator |
| 28 // is that it has room for the bits of the longest code plus the bits of a full |
| 29 // byte; that means that when adding more bits to the accumulator, it can always |
| 30 // be done in whole bytes. For example, if we currently have 26 bits in the |
| 31 // accumulator, and need more to decode the current symbol, we can add a whole |
| 32 // byte to the accumulator, and not have to do juggling with adding 6 bits (to |
| 33 // reach 30), and then keep track of the last two bits we've not been able to |
| 34 // add to the accumulator. |
| 35 typedef uint64_t HuffmanAccumulator; |
| 36 typedef size_t HuffmanAccumulatorBitCount; |
| 37 |
| 38 // HuffmanBitBuffer stores the leading edge of bits to be decoded. The high |
| 39 // order bit of accumulator_ is the next bit to be decoded. |
| 40 class NET_EXPORT_PRIVATE HuffmanBitBuffer { |
| 41 public: |
| 42 HuffmanBitBuffer(); |
| 43 |
| 44 // Prepare for decoding a new Huffman encoded string. |
| 45 void Reset(); |
| 46 |
| 47 // Add as many whole bytes to the accumulator (accumulator_) as possible, |
| 48 // returning the number of bytes added. |
| 49 size_t AppendBytes(base::StringPiece input); |
| 50 |
| 51 // Get the bits of the accumulator. |
| 52 HuffmanAccumulator value() const { return accumulator_; } |
| 53 |
| 54 // Number of bits of the encoded string that are in the accumulator |
| 55 // (accumulator_). |
| 56 HuffmanAccumulatorBitCount count() const { return count_; } |
| 57 |
| 58 // Are there no bits in the accumulator? |
| 59 bool IsEmpty() const { return count_ == 0; } |
| 60 |
| 61 // Number of additional bits that can be added to the accumulator. |
| 62 HuffmanAccumulatorBitCount free_count() const; |
| 63 |
| 64 // Consume the leading |code_length| bits of the accumulator. |
| 65 void ConsumeBits(HuffmanAccumulatorBitCount code_length); |
| 66 |
| 67 // Are the contents valid for the end of a Huffman encoded string? The RFC |
| 68 // states that EOS (end-of-string) symbol must not be explicitly encoded in |
| 69 // the bit stream, but any unused bits in the final byte must be set to the |
| 70 // prefix of the EOS symbol, which is all 1 bits. So there can be at most 7 |
| 71 // such bits. |
| 72 // Returns true if the bit buffer is empty, or contains at most 7 bits, all |
| 73 // of them 1. Otherwise returns false. |
| 74 bool InputProperlyTerminated() const; |
| 75 |
| 76 std::string DebugString() const; |
| 77 |
| 78 private: |
| 79 HuffmanAccumulator accumulator_; |
| 80 HuffmanAccumulatorBitCount count_; |
| 81 }; |
| 82 |
| 83 inline std::ostream& operator<<(std::ostream& out, const HuffmanBitBuffer& v) { |
| 84 return out << v.DebugString(); |
| 85 } |
| 86 |
| 87 class NET_EXPORT_PRIVATE HpackHuffmanDecoder { |
| 88 public: |
| 89 HpackHuffmanDecoder(); |
| 90 ~HpackHuffmanDecoder(); |
| 91 |
| 92 // Prepare for decoding a new Huffman encoded string. |
| 93 void Reset() { bit_buffer_.Reset(); } |
| 94 |
| 95 // Decode the portion of a HPACK Huffman encoded string that is in |input|, |
| 96 // appending the decoded symbols into |*output|, stopping when more bits are |
| 97 // needed to determine the next symbol, which/ means that the input has been |
| 98 // drained, and also that the bit_buffer_ is empty or that the bits that are |
| 99 // in it are not a whole symbol. |
| 100 // If |input| is the start of a string, the caller must first call Reset. |
| 101 // If |input| includes the end of the encoded string, the caller must call |
| 102 // InputProperlyTerminated after Decode has returned true in order to |
| 103 // determine if the encoded string was properly terminated. |
| 104 // Returns false if something went wrong (e.g. the encoding contains the code |
| 105 // EOS symbol). Otherwise returns true, in which case input has been fully |
| 106 // decoded or buffered; in particular, if the low-order bit of the final byte |
| 107 // of the input is not the last bit of an encoded symbol, then bit_buffer_ |
| 108 // will contain the leading bits of the code for that symbol, but not the |
| 109 // final bits of that code. |
| 110 // Note that output should be empty, but that it is not cleared by Decode(). |
| 111 bool Decode(base::StringPiece input, std::string* output); |
| 112 |
| 113 // Is what remains in the bit_buffer_ valid at the end of an encoded string? |
| 114 // Call after passing the the final portion of a Huffman string to Decode, |
| 115 // and getting true as the result. |
| 116 bool InputProperlyTerminated() const { |
| 117 return bit_buffer_.InputProperlyTerminated(); |
| 118 } |
| 119 |
| 120 bool IsEmptyForTest() const { return bit_buffer_.IsEmpty(); } |
| 121 |
| 122 std::string DebugString() const; |
| 123 |
| 124 ////////////////////////////////////////////////////////////////////////////// |
| 125 // Alternate implementations of Decode: |
| 126 |
| 127 // As above, implemented using a tree of if statements to determine the code |
| 128 // length, etc., which are returned as a tree. See PrefixToInfo. This is the |
| 129 // original implementation, current as of 2016-8-8. |
| 130 bool DecodeWithIfTreeAndStruct(base::StringPiece input, std::string* output); |
| 131 |
| 132 // Based on DecodeWithIfTreeAndStruct, but adds an optimization for the common |
| 133 // case of short codes (5, 6 or 7), which make up a large fraction of the |
| 134 // frequency distribution on which the HPACK table was based. |
| 135 // TODO(jamessynge): Be precise about that fraction. |
| 136 bool DecodeShortCodesFirst(base::StringPiece input, std::string* output); |
| 137 |
| 138 private: |
| 139 HuffmanBitBuffer bit_buffer_; |
| 140 }; |
| 141 |
| 142 inline std::ostream& operator<<(std::ostream& out, |
| 143 const HpackHuffmanDecoder& v) { |
| 144 return out << v.DebugString(); |
| 145 } |
| 146 |
| 147 } // namespace net |
| 148 |
| 149 #endif // NET_HTTP2_HPACK_HUFFMAN_HTTP2_HPACK_HUFFMAN_DECODER_H_ |
OLD | NEW |