Chromium Code Reviews| Index: net/filter/gzip_stream_source.cc |
| diff --git a/net/filter/gzip_stream_source.cc b/net/filter/gzip_stream_source.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..2c5d9a5a1555f774b6953e8934c844c274873211 |
| --- /dev/null |
| +++ b/net/filter/gzip_stream_source.cc |
| @@ -0,0 +1,316 @@ |
| +// Copyright 2016 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "base/bind.h" |
| +#include "base/bit_cast.h" |
| +#include "net/filter/block_buffer.h" |
| +#include "net/filter/gzip_stream_source.h" |
| +#include "third_party/zlib/zlib.h" |
| + |
| +namespace net { |
| + |
| +GzipStreamSource::GzipStreamSource(scoped_ptr<StreamSource> previous) |
| + : buffer_(new BlockBuffer()), |
| + previous_(std::move(previous)), |
| + zlib_eof_(false), |
| + zlib_header_added_(false), |
| + gzip_header_unchecked_(false), |
| + gzip_footer_bytes_left_(0), |
| + total_bytes_output_(0) {} |
| + |
| +GzipStreamSource::~GzipStreamSource() { |
| + if (zlib_stream_) |
| + inflateEnd(zlib_stream_.get()); |
| +} |
| + |
| +bool GzipStreamSource::Init(GzipStreamSourceMode mode, bool gzip_fallback) { |
| + zlib_stream_.reset(new z_stream); |
| + if (!zlib_stream_) |
| + return false; |
| + memset(zlib_stream_.get(), 0, sizeof(z_stream)); |
| + |
| + if (mode == GZIP_STREAM_SOURCE_GZIP) { |
| + gzip_header_unchecked_ = true; |
| + gzip_fallback_unchecked_ = gzip_fallback; |
| + if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK) |
| + return false; |
| + } else { |
| + if (inflateInit(zlib_stream_.get()) != Z_OK) |
| + return false; |
| + } |
| + |
| + return true; |
| +} |
| + |
| +Error GzipStreamSource::Read(IOBuffer* dest_buffer, |
| + size_t buffer_size, |
| + size_t* bytes_read, |
| + const OnReadCompleteCallback& callback) { |
| + *bytes_read = 0; |
| + |
| + // Loop on reading the previous source until either: |
| + // * Decompress() returns some data, in which case this method completes |
| + // synchronously, or |
| + // * Read() does not complete synchronously, in which case OnReadComplete() is |
| + // responsible for finishing the decompression. |
| + Error error; |
| + do { |
| + // If this stream is not really gzipped as detected by |
| + // ShouldFallbackToPlain, pretend the zlib stream already ended. |
| + if (ShouldFallbackToPlain()) { |
| + zlib_eof_ = true; |
| + gzip_header_unchecked_ = false; |
| + gzip_fallback_unchecked_ = false; |
| + } |
| + |
| + // Require a valid gzip header when decompressing a gzip stream. |
| + if (gzip_header_unchecked_ && IsGzipHeaderInvalid()) |
| + return ERR_CONTENT_DECODING_FAILED; |
| + |
| + GzipStreamState state = Decompress(dest_buffer, buffer_size, bytes_read); |
| + |
| + // If the decompressor threw an error, fail synchronously. |
| + if (state == GZIP_STREAM_ERROR) |
| + return ERR_CONTENT_DECODING_FAILED; |
| + |
| + // Awkward special case: if Decompress returns with data still left in the |
| + // input buffer, ordinarily that would imply needing more output space. |
| + // However, for compatibility reasons, it is permissible to include trailing |
| + // uncompressed data after the zlib stream ends. That data is copied through |
| + // here by Passthrough. |
| + if (*bytes_read == 0 && zlib_eof_ && buffer_->HasMoreBytes()) { |
| + state = Passthrough(dest_buffer, buffer_size, bytes_read); |
| + } |
| + |
| + // If there was already some data buffered internally in |buffer_|, |
| + // or some output buffered internally in zlib, |Decompress| can succeed |
| + // synchronously. If this happens, return right here. |
| + if (*bytes_read > 0) { |
| + total_bytes_output_ += *bytes_read; |
| + return OK; |
| + } |
| + |
| + // GZIP_STREAM_MORE_OUTPUT_SPACE implies |*bytes_read| > 0, since Decompress |
| + // will fill all available output buffer space first and the output buffer |
| + // is empty coming into |Read|. Since GZIP_STREAM_ERROR is handled above, |
| + // this is the only other case. |
| + DCHECK_EQ(GZIP_STREAM_MORE_INPUT, state); |
| + |
| + // Since Decompress needs more input, it has consumed all existing input. |
| + DCHECK(!buffer_->HasMoreBytes()); |
| + |
| + // Dispatch a read to refill the input buffer. |
| + size_t previous_bytes_read; |
|
Randy Smith (Not in Mondays)
2016/02/08 23:28:42
nit: Initialize to zero.
xunjieli
2016/03/03 23:00:08
Done.
|
| + error = previous_->Read( |
| + buffer_->buffer(), buffer_->size(), &previous_bytes_read, |
| + base::Bind(&GzipStreamSource::OnReadComplete, base::Unretained(this), |
| + callback, base::Unretained(dest_buffer), buffer_size)); |
| + |
| + // OK with 0 bytes read means EOF. Since the buffer is already empty, and |
| + // Decompress already failed to return any more data, this source is also |
| + // at EOF. Just return that synchronously. |
| + if (error == OK && previous_bytes_read == 0) |
| + return OK; |
| + |
| + // If the underlying read completed synchronously, mark the buffer as |
| + // refilled and try again. |
| + if (error == OK) |
| + buffer_->WasRefilled(previous_bytes_read); |
| + } while (error == OK); |
| + |
| + if (error == ERR_IO_PENDING) |
| + pending_read_buffer_ = dest_buffer; |
| + |
| + return error; |
| +} |
| + |
| +// Private helpers. |
| + |
| +// Synchronous decompressor. This function consumes bytes from |buffer_| and |
| +// decompresses them into |dest_buffer| until either: |
| +// a) |buffer_| is empty, and it returns MORE_INPUT; |
| +// b) |dest_buffer| is full, and it returns MORE_OUTPUT_SPACE; |
| +// This decompressor will decompress a zlib stream (either gzip or deflate) |
| +// until the zlib EOF, then will pass any further input through untouched. |
| +GzipStreamSource::GzipStreamState GzipStreamSource::Decompress( |
| + IOBuffer* dest_buffer, |
| + size_t buffer_size, |
| + size_t* bytes_output) { |
| + DCHECK(dest_buffer); |
| + DCHECK(buffer_size != 0); |
| + |
| + if (!buffer_->HasMoreBytes()) { |
| + return GZIP_STREAM_MORE_INPUT; |
| + } |
| + |
| + // If the zlib stream has already ended, pass any further data through. |
| + if (zlib_eof_) |
| + return Passthrough(dest_buffer, buffer_size, bytes_output); |
| + |
| + zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes()); |
| + zlib_stream_.get()->avail_in = buffer_->bytes_left(); |
| + zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data()); |
| + zlib_stream_.get()->avail_out = buffer_size; |
| + |
| + int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
| + |
| + LOG(ERROR) << "inflate " << ret << " " << zlib_stream_.get()->avail_in << " " |
| + << zlib_stream_.get()->avail_out; |
| + |
| + // Sometime misconfigured servers omit the zlib header, relying on clients to |
| + // splice it back in. |
| + if (ret < 0 && !zlib_header_added_) { |
| + zlib_header_added_ = true; |
| + if (!InsertZlibHeader()) |
| + return GZIP_STREAM_ERROR; |
| + |
| + zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes()); |
| + zlib_stream_.get()->avail_in = buffer_->bytes_left(); |
| + zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data()); |
| + zlib_stream_.get()->avail_out = buffer_size; |
| + |
| + ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
| + // TODO(ellyjones): add a histogram to see how often this happens. The |
| + // original bug for this behavior was ancient and maybe it doesn't happen in |
| + // the wild any more? |
| + } |
| + |
| + size_t bytes_used = buffer_->bytes_left() - zlib_stream_.get()->avail_in; |
| + size_t bytes_out = buffer_size - zlib_stream_.get()->avail_out; |
| + |
| + buffer_->WasDrained(bytes_used); |
| + *bytes_output = bytes_out; |
| + |
| + if (ret != Z_STREAM_END && ret != Z_OK) |
| + return GZIP_STREAM_ERROR; |
| + |
| + // The zlib stream can end before the input stream ends. If this happens, |
| + // |Decompress| will pass any further data on untouched. |
| + if (ret == Z_STREAM_END) |
| + zlib_eof_ = true; |
| + |
| + if (!buffer_->HasMoreBytes()) |
| + return GZIP_STREAM_MORE_INPUT; |
| + else |
| + return GZIP_STREAM_MORE_OUTPUT_SPACE; |
| +} |
| + |
| +GzipStreamSource::GzipStreamState GzipStreamSource::Passthrough( |
| + IOBuffer* dest_buffer, |
| + size_t buffer_size, |
| + size_t* bytes_read) { |
| + SkipGzipFooterIfNeeded(); |
| + size_t to_copy = buffer_->bytes_left(); |
| + if (to_copy > buffer_size) |
| + to_copy = buffer_size; |
| + memcpy(dest_buffer->data(), buffer_->bytes(), to_copy); |
| + buffer_->WasDrained(to_copy); |
| + *bytes_read = to_copy; |
| + if (!buffer_->HasMoreBytes()) |
| + return GZIP_STREAM_MORE_INPUT; |
| + else |
| + return GZIP_STREAM_MORE_OUTPUT_SPACE; |
| +} |
| + |
| +void GzipStreamSource::OnReadComplete(const OnReadCompleteCallback& callback, |
| + IOBuffer* dest_buffer, |
| + size_t dest_buffer_size, |
| + Error error, |
| + size_t bytes_read) { |
| + DCHECK(!buffer_->HasMoreBytes()); |
| + DCHECK_EQ(dest_buffer, pending_read_buffer_.get()); |
| + |
| + // Take a ref for the lifetime of this function. |
| + scoped_refptr<IOBuffer> dest_ref(dest_buffer); |
| + pending_read_buffer_ = nullptr; |
| + |
| + // If the underlying read failed, fail this read directly. |
| + if (error != OK) { |
| + callback.Run(error, bytes_read); |
| + return; |
| + } |
| + |
| + if (bytes_read == 0) { |
| + // EOF. Since the buffer is empty, there is no more data to decompress (any |
| + // internally buffered data would have been drained already before calling |
| + // the previous stream's Read). Return EOF to our caller. |
| + callback.Run(OK, 0); |
| + return; |
| + } |
| + |
| + // Mark the buffer as refilled and try decompressing. |
| + buffer_->WasRefilled(bytes_read); |
| + |
| + // Recurse. If this Read completes synchronously, this method runs the |
| + // callback; if it does not, Read will have posted an asynchronous read that |
| + // will later re-invoke OnReadComplete to run the callback. |
| + error = Read(dest_buffer, dest_buffer_size, &bytes_read, callback); |
| + if (error != ERR_IO_PENDING) { |
| + if (error == OK) |
| + total_bytes_output_ += bytes_read; |
| + callback.Run(error, bytes_read); |
| + } |
| +} |
| + |
| +bool GzipStreamSource::InsertZlibHeader() { |
| + char dummy_header[] = {0x78, 0x01}; |
| + char dummy_output[4]; |
| + |
| + inflateReset(zlib_stream_.get()); |
| + zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]); |
| + zlib_stream_.get()->avail_in = sizeof(dummy_header); |
| + zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]); |
| + zlib_stream_.get()->avail_out = sizeof(dummy_output); |
| + |
| + int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
| + return ret == Z_OK; |
| +} |
| + |
| +bool GzipStreamSource::IsGzipHeaderInvalid() { |
| + const size_t kGzipFooterBytes = 8; |
| + const char* end = nullptr; |
| + GZipHeader::Status status = |
| + gzip_header_.ReadMore(buffer_->bytes(), buffer_->bytes_left(), &end); |
| + if (status == GZipHeader::INCOMPLETE_HEADER) { |
| + buffer_->WasDrained(buffer_->bytes_left()); |
| + return false; |
| + } |
| + |
| + gzip_header_unchecked_ = false; |
| + if (status == GZipHeader::COMPLETE_HEADER) { |
| + // If there is a valid header, there should also be a valid footer. |
| + gzip_footer_bytes_left_ = kGzipFooterBytes; |
| + buffer_->WasDrained(end - buffer_->bytes()); |
| + } |
| + |
| + return status == GZipHeader::INVALID_HEADER; |
| +} |
| + |
| +// Dumb heuristic. Gzip files always start with a two-byte magic value per RFC |
| +// 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and |
| +// this filter is checking whether it should fallback, then fallback. |
| +bool GzipStreamSource::ShouldFallbackToPlain() { |
| + static const char kGzipFirstByte = 0x1f; |
| + if (!gzip_header_unchecked_ || !gzip_fallback_unchecked_) |
| + return false; |
| + if (!buffer_->HasMoreBytes()) |
| + return false; |
| + char d = buffer_->bytes()[0]; |
| + return d != kGzipFirstByte; |
| +} |
| + |
| +size_t GzipStreamSource::GetBytesOutput() const { |
| + return total_bytes_output_; |
| +} |
| + |
| +void GzipStreamSource::SkipGzipFooterIfNeeded() { |
| + if (gzip_footer_bytes_left_ == 0) |
| + return; |
| + size_t to_read = gzip_footer_bytes_left_; |
| + if (to_read > buffer_->bytes_left()) |
| + to_read = buffer_->bytes_left(); |
| + buffer_->WasDrained(to_read); |
| +} |
| + |
| +} // namespace net |