net/filter/gzip_stream_source.cc - Issue 1662763002: [ON HOLD] Implement pull-based design for content decoding

Unified Diff: net/filter/gzip_stream_source.cc

Issue 1662763002: [ON HOLD] Implement pull-based design for content decoding (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Refactor common logic Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: net/filter/gzip_stream_source.cc

diff --git a/net/filter/gzip_stream_source.cc b/net/filter/gzip_stream_source.cc

new file mode 100644

index 0000000000000000000000000000000000000000..0ffcab7bdf771c06f610439e2354e57a730ace98

--- /dev/null

+++ b/net/filter/gzip_stream_source.cc

@@ -0,0 +1,236 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "base/bind.h"

+#include "base/bit_cast.h"

+#include "net/filter/block_buffer.h"

+#include "net/filter/gzip_stream_source.h"

+#include "third_party/zlib/zlib.h"

+namespace net {

+GzipStreamSource::GzipStreamSource(scoped_ptr<StreamSource> previous)

+ : StreamSource(StreamSource::SOURCE_GZIP, std::move(previous)),

+ zlib_eof_(false),

+ zlib_header_added_(false),

+ gzip_header_unchecked_(false),

+ gzip_footer_bytes_left_(0) {}

+GzipStreamSource::~GzipStreamSource() {

+ if (zlib_stream_)

+ inflateEnd(zlib_stream_.get());

+bool GzipStreamSource::Init(GzipStreamSourceMode mode, bool gzip_fallback) {

+ zlib_stream_.reset(new z_stream);

+ if (!zlib_stream_)

+ return false;

+ memset(zlib_stream_.get(), 0, sizeof(z_stream));

+ if (mode == GZIP_STREAM_SOURCE_GZIP) {

+ gzip_header_unchecked_ = true;

+ gzip_fallback_unchecked_ = gzip_fallback;

+ if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK)

+ return false;

+ } else {

+ if (inflateInit(zlib_stream_.get()) != Z_OK)

+ return false;

+ }

+ return true;

+Error GzipStreamSource::ReadInternal(IOBuffer* dest_buffer,

+ size_t buffer_size,

+ size_t* bytes_read) {

+ // If this stream is not really gzipped as detected by

+ // ShouldFallbackToPlain, pretend the zlib stream already ended.

+ if (ShouldFallbackToPlain()) {

+ zlib_eof_ = true;

+ gzip_header_unchecked_ = false;

+ gzip_fallback_unchecked_ = false;

+ }

+ // Require a valid gzip header when decompressing a gzip stream.

+ if (gzip_header_unchecked_ && IsGzipHeaderInvalid())

+ return ERR_CONTENT_DECODING_FAILED;

+ GzipStreamState state = Decompress(dest_buffer, buffer_size, bytes_read);

+ // If the decompressor threw an error, fail synchronously.

+ if (state == GZIP_STREAM_ERROR)

+ return ERR_CONTENT_DECODING_FAILED;

+ // Awkward special case: if Decompress returns with data still left in the

+ // input buffer, ordinarily that would imply needing more output space.

+ // However, for compatibility reasons, it is permissible to include trailing

+ // uncompressed data after the zlib stream ends. That data is copied through

+ // here by Passthrough.

+ if (*bytes_read == 0 && zlib_eof_ && buffer_->HasMoreBytes()) {

+ state = Passthrough(dest_buffer, buffer_size, bytes_read);

+ }

+ // If there was already some data buffered internally in |buffer_|,

+ // or some output buffered internally in zlib, |Decompress| can succeed

+ // synchronously. If this happens, return right here.

+ if (*bytes_read > 0)

+ return OK;

+ // GZIP_STREAM_MORE_OUTPUT_SPACE implies |*bytes_read| > 0, since Decompress

+ // will fill all available output buffer space first and the output buffer

+ // is empty coming into |Read|. Since GZIP_STREAM_ERROR is handled above,

+ // this is the only other case.

+ DCHECK_EQ(GZIP_STREAM_MORE_INPUT, state);

+ // Since Decompress needs more input, it has consumed all existing input.

+ DCHECK(!buffer_->HasMoreBytes());

+ return OK;

+// Private helpers.

+// Synchronous decompressor. This function consumes bytes from |buffer_| and

+// decompresses them into |dest_buffer| until either:

+// a) |buffer_| is empty, and it returns MORE_INPUT;

+// b) |dest_buffer| is full, and it returns MORE_OUTPUT_SPACE;

+// This decompressor will decompress a zlib stream (either gzip or deflate)

+// until the zlib EOF, then will pass any further input through untouched.

+GzipStreamSource::GzipStreamState GzipStreamSource::Decompress(

+ IOBuffer* dest_buffer,

+ size_t buffer_size,

+ size_t* bytes_output) {

+ DCHECK(dest_buffer);

+ DCHECK(buffer_size != 0);

+ if (!buffer_->HasMoreBytes()) {

+ return GZIP_STREAM_MORE_INPUT;

+ }

+ // If the zlib stream has already ended, pass any further data through.

+ if (zlib_eof_)

+ return Passthrough(dest_buffer, buffer_size, bytes_output);

+ zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes());

+ zlib_stream_.get()->avail_in = buffer_->bytes_left();

+ zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data());

+ zlib_stream_.get()->avail_out = buffer_size;

+ int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);

+ LOG(ERROR) << "inflate " << ret << " " << zlib_stream_.get()->avail_in << " "

+ << zlib_stream_.get()->avail_out;

+ // Sometime misconfigured servers omit the zlib header, relying on clients to

mmenke 2016/03/04 21:15:56 Sometimes

xunjieli 2016/04/20 19:16:09 Done.

+ // splice it back in.

+ if (ret < 0 && !zlib_header_added_) {

+ zlib_header_added_ = true;

+ if (!InsertZlibHeader())

+ return GZIP_STREAM_ERROR;

+ zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes());

+ zlib_stream_.get()->avail_in = buffer_->bytes_left();

+ zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data());

+ zlib_stream_.get()->avail_out = buffer_size;

+ ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);

+ // TODO(ellyjones): add a histogram to see how often this happens. The

+ // original bug for this behavior was ancient and maybe it doesn't happen in

+ // the wild any more?

+ }

+ size_t bytes_used = buffer_->bytes_left() - zlib_stream_.get()->avail_in;

+ size_t bytes_out = buffer_size - zlib_stream_.get()->avail_out;

+ buffer_->WasDrained(bytes_used);

+ *bytes_output = bytes_out;

+ if (ret != Z_STREAM_END && ret != Z_OK)

+ return GZIP_STREAM_ERROR;

+ // The zlib stream can end before the input stream ends. If this happens,

+ // |Decompress| will pass any further data on untouched.

+ if (ret == Z_STREAM_END)

+ zlib_eof_ = true;

+ if (!buffer_->HasMoreBytes())

+ return GZIP_STREAM_MORE_INPUT;

+ else

+ return GZIP_STREAM_MORE_OUTPUT_SPACE;

+GzipStreamSource::GzipStreamState GzipStreamSource::Passthrough(

+ IOBuffer* dest_buffer,

+ size_t buffer_size,

+ size_t* bytes_read) {

+ SkipGzipFooterIfNeeded();

+ size_t to_copy = buffer_->bytes_left();

+ if (to_copy > buffer_size)

+ to_copy = buffer_size;

+ memcpy(dest_buffer->data(), buffer_->bytes(), to_copy);

+ buffer_->WasDrained(to_copy);

+ *bytes_read = to_copy;

+ if (!buffer_->HasMoreBytes())

+ return GZIP_STREAM_MORE_INPUT;

+ else

+ return GZIP_STREAM_MORE_OUTPUT_SPACE;

+bool GzipStreamSource::InsertZlibHeader() {

+ char dummy_header[] = {0x78, 0x01};

+ char dummy_output[4];

+ inflateReset(zlib_stream_.get());

+ zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]);

+ zlib_stream_.get()->avail_in = sizeof(dummy_header);

+ zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]);

+ zlib_stream_.get()->avail_out = sizeof(dummy_output);

+ int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);

+ return ret == Z_OK;

+bool GzipStreamSource::IsGzipHeaderInvalid() {

+ const size_t kGzipFooterBytes = 8;

+ const char* end = nullptr;

+ GZipHeader::Status status =

+ gzip_header_.ReadMore(buffer_->bytes(), buffer_->bytes_left(), &end);

+ if (status == GZipHeader::INCOMPLETE_HEADER) {

+ buffer_->WasDrained(buffer_->bytes_left());

+ return false;

+ }

+ gzip_header_unchecked_ = false;

+ if (status == GZipHeader::COMPLETE_HEADER) {

+ // If there is a valid header, there should also be a valid footer.

+ gzip_footer_bytes_left_ = kGzipFooterBytes;

+ buffer_->WasDrained(end - buffer_->bytes());

+ }

+ return status == GZipHeader::INVALID_HEADER;

+// Dumb heuristic. Gzip files always start with a two-byte magic value per RFC

+// 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and

+// this filter is checking whether it should fallback, then fallback.

+bool GzipStreamSource::ShouldFallbackToPlain() {

+ static const char kGzipFirstByte = 0x1f;

+ if (!gzip_header_unchecked_ || !gzip_fallback_unchecked_)

+ return false;

+ if (!buffer_->HasMoreBytes())

+ return false;

+ char d = buffer_->bytes()[0];

+ return d != kGzipFirstByte;

+void GzipStreamSource::SkipGzipFooterIfNeeded() {

+ if (gzip_footer_bytes_left_ == 0)

+ return;

+ size_t to_read = gzip_footer_bytes_left_;

+ if (to_read > buffer_->bytes_left())

+ to_read = buffer_->bytes_left();

+ buffer_->WasDrained(to_read);

+} // namespace net

« net/filter/gzip_stream_source.h ('K') | « net/filter/gzip_stream_source.h ('k') | net/filter/gzip_stream_source_unittest.cc » ('j') | net/filter/stream_source.h » ('J')