net/spdy/hpack/hpack_huffman_decoder.cc - Issue 2832973003: Split net/spdy into core and chromium subdirectories.

Side by Side Diff: net/spdy/hpack/hpack_huffman_decoder.cc

Issue 2832973003: Split net/spdy into core and chromium subdirectories. (Closed)

Patch Set: Fix some more build rules. Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4 //

5 // Decoder for strings encoded using the HPACK Huffman Code (see

6 // https://httpwg.github.io/specs/rfc7541.html#huffman.code).

7 //

8 // This implementation is inspired by the One-Shift algorithm described in

9 // "On the Implementation of Minimum Redundancy Prefix Codes", by Alistair

10 // Moffat and Andrew Turpin, 1997.

11 // See also https://en.wikipedia.org/wiki/Canonical_Huffman_code for background

12 // on canonical Huffman codes.

13 //

14 // This decoder differs from that in .../spdy/hpack/hpack_huffman_table.cc

15 // as follows:

16 // 1) It decodes only the code described in RFC7541, where as the older

17 // implementation supported any canonical Huffman code provided at run

18 // time.

19 // 2) It uses a fixed amount of memory allocated at build time; it doesn't

20 // construct a tree of of decoding tables based on an encoding

21 // table provided at run time.

22 // 3) In benchmarks it runs from 10% to 70% faster, based on the length

23 // of the strings (faster for longer strings). Some of the improvements

24 // could be back ported, but others are fundamental to the approach.

25

26 #include "net/spdy/hpack/hpack_huffman_decoder.h"

27

28 #include <bitset>

29 #include <limits>

30 #include <utility>

31

32 #include "base/logging.h"

33 #include "net/spdy/hpack/hpack_input_stream.h"

34

35 namespace net {

36 namespace {

37

38 typedef HpackHuffmanDecoder::HuffmanWord HuffmanWord;

39 typedef HpackHuffmanDecoder::HuffmanCodeLength HuffmanCodeLength;

40

41 const HuffmanCodeLength kHuffmanWordLength =

42 std::numeric_limits<HuffmanWord>::digits;

43

44 const HuffmanCodeLength kMinCodeLength = 5;

45 const HuffmanCodeLength kMaxCodeLength = 30;

46

47 const HuffmanWord kInvalidLJCode = ~static_cast<HuffmanWord>(0);

48 // Length of a code in bits to the first code with that length, left-justified.

49 // Note that this can be computed from kLengthToFirstCanonical.

50 const HuffmanWord kLengthToFirstLJCode[] = {

51 kInvalidLJCode, // There are no codes of length 0.

52 kInvalidLJCode, // There are no codes of length 1.

53 kInvalidLJCode, // There are no codes of length 2.

54 kInvalidLJCode, // There are no codes of length 3.

55 kInvalidLJCode, // There are no codes of length 4.

56 0x00000000, // Length 5.

57 0x50000000, // Length 6.

58 0xb8000000, // Length 7.

59 0xf8000000, // Length 8.

60 kInvalidLJCode, // There are no codes of length 9.

61 0xfe000000, // Length 10.

62 0xff400000, // Length 11.

63 0xffa00000, // Length 12.

64 0xffc00000, // Length 13.

65 0xfff00000, // Length 14.

66 0xfff80000, // Length 15.

67 kInvalidLJCode, // There are no codes of length 16.

68 kInvalidLJCode, // There are no codes of length 17.

69 kInvalidLJCode, // There are no codes of length 18.

70 0xfffe0000, // Length 19.

71 0xfffe6000, // Length 20.

72 0xfffee000, // Length 21.

73 0xffff4800, // Length 22.

74 0xffffb000, // Length 23.

75 0xffffea00, // Length 24.

76 0xfffff600, // Length 25.

77 0xfffff800, // Length 26.

78 0xfffffbc0, // Length 27.

79 0xfffffe20, // Length 28.

80 kInvalidLJCode, // There are no codes of length 29.

81 0xfffffff0, // Length 30.

82 };

83

84 // TODO(jamessynge): Determine the performance impact of different types for

85 // the elements of this array (i.e. a larger type uses more cache, yet might

86 // better on some architectures).

87 const uint8_t kInvalidCanonical = 255;

88 // Maps from length of a code to the first 'canonical symbol' with that length.

89 const uint8_t kLengthToFirstCanonical[] = {

90 kInvalidCanonical, // Length 0, 0 codes.

91 kInvalidCanonical, // Length 1, 0 codes.

92 kInvalidCanonical, // Length 2, 0 codes.

93 kInvalidCanonical, // Length 3, 0 codes.

94 kInvalidCanonical, // Length 4, 0 codes.

95 0, // Length 5, 10 codes.

96 10, // Length 6, 26 codes.

97 36, // Length 7, 32 codes.

98 68, // Length 8, 6 codes.

99 kInvalidCanonical, // Length 9, 0 codes.

100 74, // Length 10, 5 codes.

101 79, // Length 11, 3 codes.

102 82, // Length 12, 2 codes.

103 84, // Length 13, 6 codes.

104 90, // Length 14, 2 codes.

105 92, // Length 15, 3 codes.

106 kInvalidCanonical, // Length 16, 0 codes.

107 kInvalidCanonical, // Length 17, 0 codes.

108 kInvalidCanonical, // Length 18, 0 codes.

109 95, // Length 19, 3 codes.

110 98, // Length 20, 8 codes.

111 106, // Length 21, 13 codes.

112 119, // Length 22, 26 codes.

113 145, // Length 23, 29 codes.

114 174, // Length 24, 12 codes.

115 186, // Length 25, 4 codes.

116 190, // Length 26, 15 codes.

117 205, // Length 27, 19 codes.

118 224, // Length 28, 29 codes.

119 kInvalidCanonical, // Length 29, 0 codes.

120 253, // Length 30, 4 codes.

121 };

122

123 // Mapping from canonical symbol (0 to 255) to actual symbol.

124 // clang-format off

125 const uint8_t kCanonicalToSymbol[] = {

126 '0', '1', '2', 'a', 'c', 'e', 'i', 'o',

127 's', 't', 0x20, '%', '-', '.', '/', '3',

128 '4', '5', '6', '7', '8', '9', '=', 'A',

129 '_', 'b', 'd', 'f', 'g', 'h', 'l', 'm',

130 'n', 'p', 'r', 'u', ':', 'B', 'C', 'D',

131 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',

132 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',

133 'U', 'V', 'W', 'Y', 'j', 'k', 'q', 'v',

134 'w', 'x', 'y', 'z', '&', '*', ',', ';',

135 'X', 'Z', '!', '\"', '(', ')', '?', '\'',

136 '+', '\|', '#', '>', 0x00, '$', '@', '[',

137 ']', '~', '^', '}', '<', '`', '{', '\\',

138 0xc3, 0xd0, 0x80, 0x82, 0x83, 0xa2, 0xb8, 0xc2,

139 0xe0, 0xe2, 0x99, 0xa1, 0xa7, 0xac, 0xb0, 0xb1,

140 0xb3, 0xd1, 0xd8, 0xd9, 0xe3, 0xe5, 0xe6, 0x81,

141 0x84, 0x85, 0x86, 0x88, 0x92, 0x9a, 0x9c, 0xa0,

142 0xa3, 0xa4, 0xa9, 0xaa, 0xad, 0xb2, 0xb5, 0xb9,

143 0xba, 0xbb, 0xbd, 0xbe, 0xc4, 0xc6, 0xe4, 0xe8,

144 0xe9, 0x01, 0x87, 0x89, 0x8a, 0x8b, 0x8c, 0x8d,

145 0x8f, 0x93, 0x95, 0x96, 0x97, 0x98, 0x9b, 0x9d,

146 0x9e, 0xa5, 0xa6, 0xa8, 0xae, 0xaf, 0xb4, 0xb6,

147 0xb7, 0xbc, 0xbf, 0xc5, 0xe7, 0xef, 0x09, 0x8e,

148 0x90, 0x91, 0x94, 0x9f, 0xab, 0xce, 0xd7, 0xe1,

149 0xec, 0xed, 0xc7, 0xcf, 0xea, 0xeb, 0xc0, 0xc1,

150 0xc8, 0xc9, 0xca, 0xcd, 0xd2, 0xd5, 0xda, 0xdb,

151 0xee, 0xf0, 0xf2, 0xf3, 0xff, 0xcb, 0xcc, 0xd3,

152 0xd4, 0xd6, 0xdd, 0xde, 0xdf, 0xf1, 0xf4, 0xf5,

153 0xf6, 0xf7, 0xf8, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe,

154 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0b,

155 0x0c, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,

156 0x15, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,

157 0x1e, 0x1f, 0x7f, 0xdc, 0xf9, 0x0a, 0x0d, 0x16,

158 };

159 // clang-format on

160

161 #if DCHECK_IS_ON()

162

163 // Only used in DLOG.

164 bool IsEOSPrefix(HuffmanWord bits, HuffmanCodeLength bits_available) {

165 if (bits_available == 0) {

166 return true;

167 }

168 // We expect all the bits below the high order \|bits_available\| bits

169 // to be cleared.

170 HuffmanWord expected = HuffmanWord(0xffffffff) << (32 - bits_available);

171 return bits == expected;

172 }

173

174 #endif // DCHECK_IS_ON()

175

176 } // namespace

177

178 // TODO(jamessynge): Should we read these magic numbers from

179 // kLengthToFirstLJCode? Would that reduce cache consumption? Slow decoding?

180 // TODO(jamessynge): Is this being inlined by the compiler? Should we inline

181 // into DecodeString the tests for code lengths 5 through 8 (> 99% of codes

182 // according to the HPACK spec)?

183 HpackHuffmanDecoder::HuffmanCodeLength HpackHuffmanDecoder::CodeLengthOfPrefix(

184 HpackHuffmanDecoder::HuffmanWord value) {

185 HuffmanCodeLength length;

186 if (value < 0xb8000000) {

187 if (value < 0x50000000) {

188 length = 5;

189 } else {

190 length = 6;

191 }

192 } else {

193 if (value < 0xfe000000) {

194 if (value < 0xf8000000) {

195 length = 7;

196 } else {

197 length = 8;

198 }

199 } else {

200 if (value < 0xffc00000) {

201 if (value < 0xffa00000) {

202 if (value < 0xff400000) {

203 length = 10;

204 } else {

205 length = 11;

206 }

207 } else {

208 length = 12;

209 }

210 } else {

211 if (value < 0xfffe0000) {

212 if (value < 0xfff80000) {

213 if (value < 0xfff00000) {

214 length = 13;

215 } else {

216 length = 14;

217 }

218 } else {

219 length = 15;

220 }

221 } else {

222 if (value < 0xffff4800) {

223 if (value < 0xfffee000) {

224 if (value < 0xfffe6000) {

225 length = 19;

226 } else {

227 length = 20;

228 }

229 } else {

230 length = 21;

231 }

232 } else {

233 if (value < 0xffffea00) {

234 if (value < 0xffffb000) {

235 length = 22;

236 } else {

237 length = 23;

238 }

239 } else {

240 if (value < 0xfffffbc0) {

241 if (value < 0xfffff800) {

242 if (value < 0xfffff600) {

243 length = 24;

244 } else {

245 length = 25;

246 }

247 } else {

248 length = 26;

249 }

250 } else {

251 if (value < 0xfffffff0) {

252 if (value < 0xfffffe20) {

253 length = 27;

254 } else {

255 length = 28;

256 }

257 } else {

258 length = 30;

259 }

260 }

261 }

262 }

263 }

264 }

265 }

266 }

267 return length;

268 }

269

270 HuffmanWord HpackHuffmanDecoder::DecodeToCanonical(

271 HuffmanCodeLength code_length,

272 HuffmanWord bits) {

273 DCHECK_LE(kMinCodeLength, code_length);

274 DCHECK_LE(code_length, kMaxCodeLength);

275

276 // What is the first left-justified code of length \|code_length\|?

277 HuffmanWord first_lj_code = kLengthToFirstLJCode[code_length];

278 DCHECK_NE(kInvalidLJCode, first_lj_code);

279

280 // Which canonical symbol corresponds to the high order \|code_length\|

281 // bits of \|first_lj_code\|?

282 HuffmanWord first_canonical = kLengthToFirstCanonical[code_length];

283 DCHECK_NE(kInvalidCanonical, first_canonical);

284

285 // What is the position of the canonical symbol being decoded within

286 // the canonical symbols of length \|code_length\|?

287 HuffmanWord ordinal_in_length =

288 ((bits - first_lj_code) >> (kHuffmanWordLength - code_length));

289

290 // Combined these two to produce the position of the canonical symbol

291 // being decoded within all of the canonical symbols.

292 return first_canonical + ordinal_in_length;

293 }

294

295 char HpackHuffmanDecoder::CanonicalToSource(HuffmanWord canonical) {

296 DCHECK_LT(canonical, 256u);

297 return static_cast<char>(kCanonicalToSymbol[canonical]);

298 }

299

300 // TODO(jamessynge): Maybe further refactorings, including just passing in a

301 // SpdyStringPiece instead of an HpackInputStream, thus avoiding the PeekBits

302 // calls, and also allowing us to separate the code into portions dealing with

303 // long strings, and a later portion dealing with the last few bytes of strings.

304 // TODO(jamessynge): Determine if that is worth it by adding some counters to

305 // measure the distribution of string sizes seen in practice.

306 bool HpackHuffmanDecoder::DecodeString(HpackInputStream* in, SpdyString* out) {

307 out->clear();

308

309 // Load \|bits\| with the leading bits of the input stream, left justified

310 // (i.e. the bits of the first byte are the high-order bits of \|bits\|,

311 // and the bits of the fourth byte are the low-order bits of \|bits\|).

312 // \|peeked_success\| if there are more bits in \|*in\| (i.e. the encoding

313 // of the string to be decoded is more than 4 bytes).

314

315 auto bits_available_and_bits = in->InitializePeekBits();

316 HuffmanCodeLength bits_available = bits_available_and_bits.first;

317 HuffmanWord bits = bits_available_and_bits.second;

318

319 // \|peeked_success\| tracks whether the previous PeekBits call was able to

320 // store any new bits into \|bits\|. For the first pass through the loop below

321 // the value false is appropriate:

322 // If we have 32 bits (i.e. the input has at least 4 bytes), then:

323 // \|peeked_sucess\| is not examined because \|code_length\| is

324 // at most 30 in the HPACK Huffman Code.

325 // If we have at most 24 bits (i.e. the input has at most 3 bytes), then:

326 // It is possible that the very first \|code_length\| is greater than

327 // \|bits_available\|, in which case we need to read peeked_success to

328 // determine whether we should try to read more input, or have already

329 // loaded \|bits\| with the final bits of the input.

330 // After the first loop \|peeked_success\| has been set by a call to PeekBits.

331 bool peeked_success = false;

332

333 while (true) {

334 const HuffmanCodeLength code_length = CodeLengthOfPrefix(bits);

335 DCHECK_LE(kMinCodeLength, code_length);

336 DCHECK_LE(code_length, kMaxCodeLength);

337 DVLOG(2) << "bits: 0b" << std::bitset<32>(bits)

338 << " (avail=" << bits_available << ")"

339 << " prefix length: " << code_length

340 << (code_length > bits_available ? " *****" : "");

341 if (code_length > bits_available) {

342 if (!peeked_success) {

343 // Unable to read enough input for a match. If only a portion of

344 // the last byte remains, this is a successful EOS condition.

345 // Note that this does NOT check whether the available bits are all

346 // set to 1, which the encoder is required to set at EOS, and the

347 // decoder is required to check.

348 // TODO(jamessynge): Discuss whether we should enforce this check,

349 // as required by the RFC, presumably flag guarded so that we can

350 // disable it should it occur a lot. From my testing it appears that

351 // our encoder may be doing this wrong. Sigh.

352 // TODO(jamessynge): Add a counter for how often the remaining bits

353 // are non-zero.

354 in->ConsumeByteRemainder();

355 DLOG_IF(WARNING,

356 (in->HasMoreData() \|\| !IsEOSPrefix(bits, bits_available)))

357 << "bits: 0b" << std::bitset<32>(bits)

358 << " (avail=" << bits_available << ")"

359 << " prefix length: " << code_length

360 << " HasMoreData: " << in->HasMoreData();

361 return !in->HasMoreData();

362 }

363 // We're dealing with a long code. It might be useful to add a special

364 // method to HpackInputStream for getting more than "at most 8" bits

365 // at a time.

366 do {

367 peeked_success = in->PeekBits(&bits_available, &bits);

368 } while (peeked_success && bits_available < 32);

369 } else {

370 // Convert from the prefix code of length \|code_length\| to the

371 // canonical symbol (i.e. where the input symbols (bytes) are ordered by

372 // increasing code length and then by their increasing uint8 value).

373 HuffmanWord canonical = DecodeToCanonical(code_length, bits);

374

375 bits = bits << code_length;

376 bits_available -= code_length;

377 in->ConsumeBits(code_length);

378

379 if (canonical < 256) {

380 out->push_back(CanonicalToSource(canonical));

381 } else {

382 // Encoder is not supposed to explicity encode the EOS symbol (30

383 // 1-bits).

384 // TODO(jamessynge): Discuss returning false here, as required by HPACK.

385 DCHECK(false) << "EOS explicitly encoded!\n"

386 << "bits: 0b" << std::bitset<32>(bits)

387 << " (avail=" << bits_available << ")"

388 << " prefix length: " << code_length

389 << " canonical: " << canonical;

390 }

391 // Get some more bits for decoding (up to 8). \|peeked_success\| is true

392 // if we got any bits.

393 peeked_success = in->PeekBits(&bits_available, &bits);

394 }

395 DLOG_IF(WARNING, (VLOG_IS_ON(2) && bits_available < 32 && !peeked_success))

396 << "no more peeking possible";

397 }

398 }

399

400 } // namespace net

OLD	NEW

« no previous file with comments | « net/spdy/hpack/hpack_huffman_decoder.h ('k') | net/spdy/hpack/hpack_huffman_decoder_test.cc » ('j') | no next file with comments »