net/filter/gzip_source_stream.cc - Issue 2334773002: Add net::GzipSourceStream

Unified Diff: net/filter/gzip_source_stream.cc

Issue 2334773002: Add net::GzipSourceStream (Closed)

Patch Set: Address Randy's comments (synced to 92104a0503b2862f54d60473b59dd1ae145eb22b) Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: net/filter/gzip_source_stream.cc

diff --git a/net/filter/gzip_source_stream.cc b/net/filter/gzip_source_stream.cc

new file mode 100644

index 0000000000000000000000000000000000000000..7bf55d8f4db4fa895ccf638c6e331d2432f0bab7

--- /dev/null

+++ b/net/filter/gzip_source_stream.cc

@@ -0,0 +1,223 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "net/filter/gzip_source_stream.h"

+#include "base/bind.h"

+#include "base/bit_cast.h"

+#include "base/logging.h"

+#include "net/base/io_buffer.h"

+#include "third_party/zlib/zlib.h"

+namespace net {

+namespace {

+const char kDeflate[] = "DEFLATE";

+const char kGzip[] = "GZIP";

+const char kGzipFallback[] = "GZIP_FALLBACK";

+} // namespace

+GzipSourceStream::~GzipSourceStream() {

+ if (zlib_stream_)

+ inflateEnd(zlib_stream_.get());

+std::unique_ptr<GzipSourceStream> GzipSourceStream::Create(

+ std::unique_ptr<SourceStream> upstream,

+ SourceStream::SourceType type) {

+ std::unique_ptr<GzipSourceStream> source(

+ new GzipSourceStream(std::move(upstream), type));

+ if (!source->Init())

+ return nullptr;

+ return source;

+GzipSourceStream::GzipSourceStream(std::unique_ptr<SourceStream> upstream,

+ SourceStream::SourceType type)

+ : FilterSourceStream(type, std::move(upstream)),

+ zlib_header_added_(false),

+ gzip_footer_bytes_left_(0),

+ input_state_(STATE_START) {}

+bool GzipSourceStream::Init() {

+ zlib_stream_.reset(new z_stream);

+ if (!zlib_stream_)

+ return false;

+ memset(zlib_stream_.get(), 0, sizeof(z_stream));

+ int ret;

+ if (type() == TYPE_GZIP || type() == TYPE_GZIP_FALLBACK) {

+ ret = inflateInit2(zlib_stream_.get(), -MAX_WBITS);

+ } else {

+ ret = inflateInit(zlib_stream_.get());

+ }

+ DCHECK_NE(Z_VERSION_ERROR, ret);

+ return ret == Z_OK;

+std::string GzipSourceStream::GetTypeAsString() const {

+ switch (type()) {

+ case TYPE_GZIP:

+ return kGzip;

+ case TYPE_GZIP_FALLBACK:

+ return kGzipFallback;

+ case TYPE_DEFLATE:

+ return kDeflate;

+ default:

+ NOTREACHED();

+ return "";

+ }

+int GzipSourceStream::FilterData(IOBuffer* output_buffer,

+ int output_buffer_size,

+ IOBuffer* input_buffer,

+ int input_buffer_size,

+ int* consumed_bytes,

+ bool /*upstream_end_reached*/) {

+ if (input_buffer_size == 0)

+ return 0;

+ *consumed_bytes = 0;

+ char* input_data = input_buffer->data();

+ int input_data_size = input_buffer_size;

+ int bytes_out = 0;

+ while (true) {

+ InputState state = input_state_;

+ switch (state) {

+ case STATE_START: {

+ if (type() == TYPE_DEFLATE) {

+ input_state_ = STATE_COMPRESSED_BODY;

+ break;

+ }

+ // If this stream is not really gzipped as detected by

+ // ShouldFallbackToPlain, pretend that the zlib stream has ended.

+ if (ShouldFallbackToPlain(input_data[0])) {

Randy Smith (Not in Mondays) 2016/09/23 20:53:47 Suggestion: I'll call out that this is assuming th

xunjieli 2016/09/26 15:29:43 Done. I added a DCHECK.

+ input_state_ = STATE_UNCOMPRESSED_BODY;

+ } else {

+ input_state_ = STATE_GZIP_HEADER;

+ }

+ break;

+ }

+ case STATE_GZIP_HEADER: {

+ const size_t kGzipFooterBytes = 8;

+ const char* end = nullptr;

+ GZipHeader::Status status =

+ gzip_header_.ReadMore(input_data, input_data_size, &end);

+ if (status == GZipHeader::INCOMPLETE_HEADER) {

+ input_data += input_data_size;

+ input_data_size = 0;

+ } else if (status == GZipHeader::COMPLETE_HEADER) {

+ // If there is a valid header, there should also be a valid footer.

+ gzip_footer_bytes_left_ = kGzipFooterBytes;

+ int bytes_consumed = end - input_data;

+ input_data += bytes_consumed;

+ input_data_size -= bytes_consumed;

Randy Smith (Not in Mondays) 2016/09/23 20:53:47 If this drops input_data_size to zero, we want to

xunjieli 2016/09/26 15:29:43 Done. This is a bug! Thanks for catching it. There

+ input_state_ = STATE_COMPRESSED_BODY;

+ } else if (status == GZipHeader::INVALID_HEADER) {

+ return ERR_CONTENT_DECODING_FAILED;

+ }

+ break;

+ }

+ case STATE_COMPRESSED_BODY: {

+ DCHECK_LE(0, input_data_size);

+ zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data);

+ zlib_stream_.get()->avail_in = input_data_size;

+ zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data());

+ zlib_stream_.get()->avail_out = output_buffer_size;

Randy Smith (Not in Mondays) 2016/09/23 20:53:47 Suggestion: The fact that {next,avail}_out are bei

xunjieli 2016/09/26 15:29:43 Done.

+ int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);

+ // Sometimes misconfigured servers omit the zlib header, relying on

+ // clients to splice it back in.

+ if (ret < 0 && !zlib_header_added_) {

+ zlib_header_added_ = true;

+ if (!InsertZlibHeader())

+ return ERR_CONTENT_DECODING_FAILED;

+ zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data);

+ zlib_stream_.get()->avail_in = input_data_size;

+ zlib_stream_.get()->next_out =

+ bit_cast<Bytef*>(output_buffer->data());

+ zlib_stream_.get()->avail_out = output_buffer_size;

+ ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);

+ // TODO(xunjieli): add a histogram to see how often this happens. The

+ // original bug for this behavior was ancient and maybe it doesn't

+ // happen in the wild any more? crbug.com/649339

+ }

+ int bytes_used = input_data_size - zlib_stream_.get()->avail_in;

+ bytes_out = output_buffer_size - zlib_stream_.get()->avail_out;

+ input_data_size -= bytes_used;

+ input_data += bytes_used;

+ if (ret != Z_STREAM_END && ret != Z_OK)

+ return ERR_CONTENT_DECODING_FAILED;

Randy Smith (Not in Mondays) 2016/09/23 20:53:47 nit, suggestion: Is there a reason this isn't up a

xunjieli 2016/09/26 15:29:43 Done.

+ if (ret == Z_STREAM_END) {

+ input_state_ = STATE_GZIP_FOOTER;

+ break;

+ }

+ *consumed_bytes = input_buffer_size - input_data_size;

+ return bytes_out;

+ }

+ case STATE_GZIP_FOOTER: {

+ size_t footer_bytes_skipped = NumGzipFooterBytesToSkip(input_data_size);

+ input_data_size -= footer_bytes_skipped;

+ input_data += footer_bytes_skipped;

+ input_state_ = STATE_UNCOMPRESSED_BODY;

Randy Smith (Not in Mondays) 2016/09/23 20:53:47 NumGzipFooterBytesToSkip() handles being called re

xunjieli 2016/09/26 15:29:43 Done. Good catch. This is a bug. The newly added S

+ break;

+ }

+ case STATE_UNCOMPRESSED_BODY: {

+ int to_copy = std::min(input_data_size, output_buffer_size - bytes_out);

Randy Smith (Not in Mondays) 2016/09/23 20:53:47 Suggestion: I don't know if it's worth it, but if

xunjieli 2016/09/26 15:29:43 Acknowledged. The new code is less clustered. |out

+ memcpy(output_buffer->data() + bytes_out, input_data, to_copy);

+ input_data_size -= to_copy;

+ input_data += to_copy;

+ *consumed_bytes = input_buffer_size - input_data_size;

+ return bytes_out + to_copy;

+ }

+ NOTREACHED();

+ return ERR_UNEXPECTED;

+bool GzipSourceStream::InsertZlibHeader() {

+ char dummy_header[] = {0x78, 0x01};

+ char dummy_output[4];

+ inflateReset(zlib_stream_.get());

+ zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]);

+ zlib_stream_.get()->avail_in = sizeof(dummy_header);

+ zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]);

+ zlib_stream_.get()->avail_out = sizeof(dummy_output);

+ int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);

+ return ret == Z_OK;

+// Dumb heuristic. Gzip files always start with a two-byte magic value per RFC

+// 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and

+// this filter is checking whether it should fallback, then fallback.

+bool GzipSourceStream::ShouldFallbackToPlain(char first_byte) {

+ if (type() != TYPE_GZIP_FALLBACK)

+ return false;

+ static const char kGzipFirstByte = 0x1f;

+ return first_byte != kGzipFirstByte;

+size_t GzipSourceStream::NumGzipFooterBytesToSkip(int input_buffer_size) {

+ if (gzip_footer_bytes_left_ == 0)

+ return 0;

+ size_t to_read = gzip_footer_bytes_left_;

+ if (to_read > base::checked_cast<size_t>(input_buffer_size))

+ to_read = input_buffer_size;

+ gzip_footer_bytes_left_ -= to_read;

+ return to_read;

+} // namespace net

« no previous file with comments | « net/filter/gzip_source_stream.h ('k') | net/filter/gzip_source_stream_unittest.cc » ('j') | net/filter/gzip_source_stream_unittest.cc » ('J')