Chromium Code Reviews| Index: net/filter/gzip_stream_source.cc |
| diff --git a/net/filter/gzip_stream_source.cc b/net/filter/gzip_stream_source.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..0ffcab7bdf771c06f610439e2354e57a730ace98 |
| --- /dev/null |
| +++ b/net/filter/gzip_stream_source.cc |
| @@ -0,0 +1,236 @@ |
| +// Copyright 2016 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "base/bind.h" |
| +#include "base/bit_cast.h" |
| +#include "net/filter/block_buffer.h" |
| +#include "net/filter/gzip_stream_source.h" |
| +#include "third_party/zlib/zlib.h" |
| + |
| +namespace net { |
| + |
| +GzipStreamSource::GzipStreamSource(scoped_ptr<StreamSource> previous) |
| + : StreamSource(StreamSource::SOURCE_GZIP, std::move(previous)), |
| + zlib_eof_(false), |
| + zlib_header_added_(false), |
| + gzip_header_unchecked_(false), |
| + gzip_footer_bytes_left_(0) {} |
| + |
| +GzipStreamSource::~GzipStreamSource() { |
| + if (zlib_stream_) |
| + inflateEnd(zlib_stream_.get()); |
| +} |
| + |
| +bool GzipStreamSource::Init(GzipStreamSourceMode mode, bool gzip_fallback) { |
| + zlib_stream_.reset(new z_stream); |
| + if (!zlib_stream_) |
| + return false; |
| + memset(zlib_stream_.get(), 0, sizeof(z_stream)); |
| + |
| + if (mode == GZIP_STREAM_SOURCE_GZIP) { |
| + gzip_header_unchecked_ = true; |
| + gzip_fallback_unchecked_ = gzip_fallback; |
| + if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK) |
| + return false; |
| + } else { |
| + if (inflateInit(zlib_stream_.get()) != Z_OK) |
| + return false; |
| + } |
| + |
| + return true; |
| +} |
| + |
| +Error GzipStreamSource::ReadInternal(IOBuffer* dest_buffer, |
| + size_t buffer_size, |
| + size_t* bytes_read) { |
| + // If this stream is not really gzipped as detected by |
| + // ShouldFallbackToPlain, pretend the zlib stream already ended. |
| + if (ShouldFallbackToPlain()) { |
| + zlib_eof_ = true; |
| + gzip_header_unchecked_ = false; |
| + gzip_fallback_unchecked_ = false; |
| + } |
| + |
| + // Require a valid gzip header when decompressing a gzip stream. |
| + if (gzip_header_unchecked_ && IsGzipHeaderInvalid()) |
| + return ERR_CONTENT_DECODING_FAILED; |
| + |
| + GzipStreamState state = Decompress(dest_buffer, buffer_size, bytes_read); |
| + |
| + // If the decompressor threw an error, fail synchronously. |
| + if (state == GZIP_STREAM_ERROR) |
| + return ERR_CONTENT_DECODING_FAILED; |
| + |
| + // Awkward special case: if Decompress returns with data still left in the |
| + // input buffer, ordinarily that would imply needing more output space. |
| + // However, for compatibility reasons, it is permissible to include trailing |
| + // uncompressed data after the zlib stream ends. That data is copied through |
| + // here by Passthrough. |
| + if (*bytes_read == 0 && zlib_eof_ && buffer_->HasMoreBytes()) { |
| + state = Passthrough(dest_buffer, buffer_size, bytes_read); |
| + } |
| + |
| + // If there was already some data buffered internally in |buffer_|, |
| + // or some output buffered internally in zlib, |Decompress| can succeed |
| + // synchronously. If this happens, return right here. |
| + if (*bytes_read > 0) |
| + return OK; |
| + |
| + // GZIP_STREAM_MORE_OUTPUT_SPACE implies |*bytes_read| > 0, since Decompress |
| + // will fill all available output buffer space first and the output buffer |
| + // is empty coming into |Read|. Since GZIP_STREAM_ERROR is handled above, |
| + // this is the only other case. |
| + DCHECK_EQ(GZIP_STREAM_MORE_INPUT, state); |
| + |
| + // Since Decompress needs more input, it has consumed all existing input. |
| + DCHECK(!buffer_->HasMoreBytes()); |
| + |
| + return OK; |
| +} |
| + |
| +// Private helpers. |
| + |
| +// Synchronous decompressor. This function consumes bytes from |buffer_| and |
| +// decompresses them into |dest_buffer| until either: |
| +// a) |buffer_| is empty, and it returns MORE_INPUT; |
| +// b) |dest_buffer| is full, and it returns MORE_OUTPUT_SPACE; |
| +// This decompressor will decompress a zlib stream (either gzip or deflate) |
| +// until the zlib EOF, then will pass any further input through untouched. |
| +GzipStreamSource::GzipStreamState GzipStreamSource::Decompress( |
| + IOBuffer* dest_buffer, |
| + size_t buffer_size, |
| + size_t* bytes_output) { |
| + DCHECK(dest_buffer); |
| + DCHECK(buffer_size != 0); |
| + |
| + if (!buffer_->HasMoreBytes()) { |
| + return GZIP_STREAM_MORE_INPUT; |
| + } |
| + |
| + // If the zlib stream has already ended, pass any further data through. |
| + if (zlib_eof_) |
| + return Passthrough(dest_buffer, buffer_size, bytes_output); |
| + |
| + zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes()); |
| + zlib_stream_.get()->avail_in = buffer_->bytes_left(); |
| + zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data()); |
| + zlib_stream_.get()->avail_out = buffer_size; |
| + |
| + int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
| + |
| + LOG(ERROR) << "inflate " << ret << " " << zlib_stream_.get()->avail_in << " " |
| + << zlib_stream_.get()->avail_out; |
| + |
| + // Sometime misconfigured servers omit the zlib header, relying on clients to |
|
mmenke
2016/03/04 21:15:56
Sometimes
xunjieli
2016/04/20 19:16:09
Done.
|
| + // splice it back in. |
| + if (ret < 0 && !zlib_header_added_) { |
| + zlib_header_added_ = true; |
| + if (!InsertZlibHeader()) |
| + return GZIP_STREAM_ERROR; |
| + |
| + zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes()); |
| + zlib_stream_.get()->avail_in = buffer_->bytes_left(); |
| + zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data()); |
| + zlib_stream_.get()->avail_out = buffer_size; |
| + |
| + ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
| + // TODO(ellyjones): add a histogram to see how often this happens. The |
| + // original bug for this behavior was ancient and maybe it doesn't happen in |
| + // the wild any more? |
| + } |
| + |
| + size_t bytes_used = buffer_->bytes_left() - zlib_stream_.get()->avail_in; |
| + size_t bytes_out = buffer_size - zlib_stream_.get()->avail_out; |
| + |
| + buffer_->WasDrained(bytes_used); |
| + *bytes_output = bytes_out; |
| + |
| + if (ret != Z_STREAM_END && ret != Z_OK) |
| + return GZIP_STREAM_ERROR; |
| + |
| + // The zlib stream can end before the input stream ends. If this happens, |
| + // |Decompress| will pass any further data on untouched. |
| + if (ret == Z_STREAM_END) |
| + zlib_eof_ = true; |
| + |
| + if (!buffer_->HasMoreBytes()) |
| + return GZIP_STREAM_MORE_INPUT; |
| + else |
| + return GZIP_STREAM_MORE_OUTPUT_SPACE; |
| +} |
| + |
| +GzipStreamSource::GzipStreamState GzipStreamSource::Passthrough( |
| + IOBuffer* dest_buffer, |
| + size_t buffer_size, |
| + size_t* bytes_read) { |
| + SkipGzipFooterIfNeeded(); |
| + size_t to_copy = buffer_->bytes_left(); |
| + if (to_copy > buffer_size) |
| + to_copy = buffer_size; |
| + memcpy(dest_buffer->data(), buffer_->bytes(), to_copy); |
| + buffer_->WasDrained(to_copy); |
| + *bytes_read = to_copy; |
| + if (!buffer_->HasMoreBytes()) |
| + return GZIP_STREAM_MORE_INPUT; |
| + else |
| + return GZIP_STREAM_MORE_OUTPUT_SPACE; |
| +} |
| + |
| +bool GzipStreamSource::InsertZlibHeader() { |
| + char dummy_header[] = {0x78, 0x01}; |
| + char dummy_output[4]; |
| + |
| + inflateReset(zlib_stream_.get()); |
| + zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]); |
| + zlib_stream_.get()->avail_in = sizeof(dummy_header); |
| + zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]); |
| + zlib_stream_.get()->avail_out = sizeof(dummy_output); |
| + |
| + int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
| + return ret == Z_OK; |
| +} |
| + |
| +bool GzipStreamSource::IsGzipHeaderInvalid() { |
| + const size_t kGzipFooterBytes = 8; |
| + const char* end = nullptr; |
| + GZipHeader::Status status = |
| + gzip_header_.ReadMore(buffer_->bytes(), buffer_->bytes_left(), &end); |
| + if (status == GZipHeader::INCOMPLETE_HEADER) { |
| + buffer_->WasDrained(buffer_->bytes_left()); |
| + return false; |
| + } |
| + |
| + gzip_header_unchecked_ = false; |
| + if (status == GZipHeader::COMPLETE_HEADER) { |
| + // If there is a valid header, there should also be a valid footer. |
| + gzip_footer_bytes_left_ = kGzipFooterBytes; |
| + buffer_->WasDrained(end - buffer_->bytes()); |
| + } |
| + |
| + return status == GZipHeader::INVALID_HEADER; |
| +} |
| + |
| +// Dumb heuristic. Gzip files always start with a two-byte magic value per RFC |
| +// 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and |
| +// this filter is checking whether it should fallback, then fallback. |
| +bool GzipStreamSource::ShouldFallbackToPlain() { |
| + static const char kGzipFirstByte = 0x1f; |
| + if (!gzip_header_unchecked_ || !gzip_fallback_unchecked_) |
| + return false; |
| + if (!buffer_->HasMoreBytes()) |
| + return false; |
| + char d = buffer_->bytes()[0]; |
| + return d != kGzipFirstByte; |
| +} |
| + |
| +void GzipStreamSource::SkipGzipFooterIfNeeded() { |
| + if (gzip_footer_bytes_left_ == 0) |
| + return; |
| + size_t to_read = gzip_footer_bytes_left_; |
| + if (to_read > buffer_->bytes_left()) |
| + to_read = buffer_->bytes_left(); |
| + buffer_->WasDrained(to_read); |
| +} |
| + |
| +} // namespace net |