Chromium Code Reviews| Index: net/filter/gzip_source_stream.cc |
| diff --git a/net/filter/gzip_source_stream.cc b/net/filter/gzip_source_stream.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..81613d13098ebac5b0ecf5fe6795c729003c099e |
| --- /dev/null |
| +++ b/net/filter/gzip_source_stream.cc |
| @@ -0,0 +1,248 @@ |
| +// Copyright 2016 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "net/filter/gzip_source_stream.h" |
| + |
| +#include "base/bind.h" |
| +#include "base/bit_cast.h" |
| +#include "base/logging.h" |
| +#include "net/base/io_buffer.h" |
| +#include "third_party/zlib/zlib.h" |
| + |
| +namespace net { |
| + |
| +namespace { |
| + |
| +const char kDeflate[] = "DEFLATE"; |
| +const char kGzip[] = "GZIP"; |
| +const char kGzipFallback[] = "GZIP_FALLBACK"; |
| + |
| +} // namespace |
| + |
| +GzipSourceStream::~GzipSourceStream() { |
| + if (zlib_stream_) |
| + inflateEnd(zlib_stream_.get()); |
| +} |
| + |
| +std::unique_ptr<GzipSourceStream> GzipSourceStream::Create( |
| + std::unique_ptr<SourceStream> previous, |
| + GzipSourceStreamMode mode) { |
| + std::unique_ptr<GzipSourceStream> source( |
| + new GzipSourceStream(std::move(previous), mode)); |
| + |
| + if (!source->Init()) |
| + return nullptr; |
| + return source; |
| +} |
| + |
| +GzipSourceStream::GzipSourceStream(std::unique_ptr<SourceStream> previous, |
| + GzipSourceStreamMode mode) |
| + : FilterSourceStream(SourceStream::TYPE_GZIP, std::move(previous)), |
| + mode_(mode), |
| + zlib_eof_(false), |
| + zlib_header_added_(false), |
| + should_check_gzip_header_(true), |
| + should_check_first_byte_(mode == GZIP_SOURCE_STREAM_GZIP_WITH_FALLBACK), |
| + gzip_footer_bytes_left_(0) {} |
| + |
| +bool GzipSourceStream::Init() { |
| + zlib_stream_.reset(new z_stream); |
| + if (!zlib_stream_) |
| + return false; |
| + memset(zlib_stream_.get(), 0, sizeof(z_stream)); |
| + |
| + if (mode_ == GZIP_SOURCE_STREAM_GZIP || |
| + mode_ == GZIP_SOURCE_STREAM_GZIP_WITH_FALLBACK) { |
| + if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK) |
| + return false; |
| + } else { |
| + should_check_gzip_header_ = false; |
| + if (inflateInit(zlib_stream_.get()) != Z_OK) |
| + return false; |
| + } |
| + return true; |
| +} |
| + |
| +std::string GzipSourceStream::GetTypeAsString() const { |
| + switch (type()) { |
| + case TYPE_GZIP: |
| + return kGzip; |
| + case TYPE_GZIP_FALLBACK: |
| + return kGzipFallback; |
| + case TYPE_DEFLATE: |
| + return kDeflate; |
| + default: |
| + NOTREACHED(); |
| + return ""; |
| + } |
| +} |
| + |
| +int GzipSourceStream::FilterData(IOBuffer* output_buffer, |
| + int output_buffer_size, |
| + IOBuffer* input_buffer, |
| + int input_buffer_size, |
| + int* consumed_bytes, |
| + bool /*upstream_end_reached*/) { |
| + // If this stream is not really gzipped as detected by ShouldFallbackToPlain, |
| + // pretend that the zlib stream has already ended. |
| + if (input_buffer_size > 0 && ShouldFallbackToPlain(input_buffer->data()[0])) { |
| + zlib_eof_ = true; |
| + should_check_gzip_header_ = false; |
|
Randy Smith (Not in Mondays)
2016/09/14 22:25:23
Suggestion: I'm finding myself a bit uncomfortabl
xunjieli
2016/09/19 13:57:03
Done. Great idea!
|
| + } |
| + |
| + *consumed_bytes = 0; |
| + // Require a valid gzip header when decompressing a gzip stream. |
| + if (should_check_gzip_header_ && |
| + IsGzipHeaderInvalid(input_buffer->data(), input_buffer_size, |
| + consumed_bytes)) { |
| + return ERR_CONTENT_DECODING_FAILED; |
| + } |
| + |
| + int bytes_read = Decompress( |
| + output_buffer, output_buffer_size, input_buffer->data() + *consumed_bytes, |
| + input_buffer_size - *consumed_bytes, consumed_bytes); |
| + |
| + // If output is 0 byte length, it means that the filter must have consumed |
| + // all input. |
| + DCHECK(bytes_read != 0 || *consumed_bytes == input_buffer_size); |
| + return bytes_read; |
| +} |
| + |
| +int GzipSourceStream::Decompress(IOBuffer* output_buffer, |
| + int output_buffer_size, |
| + char* input_buffer, |
| + int input_buffer_size, |
| + int* consumed_bytes) { |
| + DCHECK(output_buffer); |
| + DCHECK_NE(0, output_buffer_size); |
| + |
| + if (input_buffer_size == 0) |
| + return 0; |
| + |
| + // If the zlib stream has already ended, pass any further data through. |
| + if (zlib_eof_) { |
| + return Passthrough(output_buffer->data(), output_buffer_size, input_buffer, |
| + input_buffer_size, consumed_bytes); |
| + } |
| + |
| + zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_buffer); |
| + zlib_stream_.get()->avail_in = input_buffer_size; |
| + zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data()); |
| + zlib_stream_.get()->avail_out = output_buffer_size; |
| + |
| + int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
| + |
| + // Sometime misconfigured servers omit the zlib header, relying on clients |
| + // to splice it back in. |
|
Randy Smith (Not in Mondays)
2016/09/14 22:25:23
It looks like this would happen anywhere in the st
xunjieli
2016/09/19 13:57:03
I believe it should only show up in the beginning.
Randy Smith (Not in Mondays)
2016/09/21 20:57:05
Gotcha--I think I understand better now. I'm happ
xunjieli
2016/09/22 17:20:49
Acknowledged. Thanks! I filed a bug and linked it
|
| + if (ret < 0 && !zlib_header_added_) { |
| + zlib_header_added_ = true; |
| + if (!InsertZlibHeader()) |
| + return ERR_CONTENT_DECODING_FAILED; |
| + |
| + zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_buffer); |
| + zlib_stream_.get()->avail_in = input_buffer_size; |
| + zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data()); |
| + zlib_stream_.get()->avail_out = output_buffer_size; |
| + |
| + ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
| + // TODO(xunjieli): add a histogram to see how often this happens. The |
| + // original bug for this behavior was ancient and maybe it doesn't happen |
| + // in the wild any more? |
|
Randy Smith (Not in Mondays)
2016/09/14 22:25:23
+1 (though it's fine if you don't want to do that
xunjieli
2016/09/19 13:57:03
Acknowledged. I am planning to do it in a followup
|
| + } |
| + |
| + size_t bytes_used = input_buffer_size - zlib_stream_.get()->avail_in; |
| + size_t bytes_out = output_buffer_size - zlib_stream_.get()->avail_out; |
| + |
| + *consumed_bytes += base::checked_cast<int>(bytes_used); |
| + |
| + if (ret != Z_STREAM_END && ret != Z_OK) |
| + return ERR_CONTENT_DECODING_FAILED; |
|
Randy Smith (Not in Mondays)
2016/09/14 22:25:23
Do we have any documentation in the interface cont
xunjieli
2016/09/19 13:57:03
The interface documentation says that ERR_CONTENT_
|
| + |
| + // The zlib stream can end before the input stream ends. If this happens, |
| + // |Decompress| will pass any further data on untouched. |
| + if (ret == Z_STREAM_END) { |
| + zlib_eof_ = true; |
| + return bytes_out + Passthrough(output_buffer->data() + bytes_out, |
| + output_buffer_size - bytes_out, |
| + input_buffer + bytes_used, |
| + input_buffer_size - bytes_used, |
| + consumed_bytes); |
| + } |
| + return bytes_out; |
| +} |
| + |
| +size_t GzipSourceStream::Passthrough(char* output_buffer, |
| + int output_buffer_size, |
| + char* input_buffer, |
| + int input_buffer_size, |
| + int* consumed_bytes) { |
| + size_t footer_bytes_skipped = NumGzipFooterBytesToSkip(input_buffer_size); |
| + size_t to_copy = input_buffer_size - footer_bytes_skipped; |
| + if (to_copy > base::checked_cast<size_t>(output_buffer_size)) |
| + to_copy = output_buffer_size; |
| + memcpy(output_buffer, input_buffer, to_copy); |
| + *consumed_bytes += to_copy + footer_bytes_skipped; |
| + return to_copy; |
| +} |
| + |
| +bool GzipSourceStream::InsertZlibHeader() { |
| + char dummy_header[] = {0x78, 0x01}; |
| + char dummy_output[4]; |
| + |
| + inflateReset(zlib_stream_.get()); |
| + zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]); |
| + zlib_stream_.get()->avail_in = sizeof(dummy_header); |
| + zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]); |
| + zlib_stream_.get()->avail_out = sizeof(dummy_output); |
| + |
| + int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
| + return ret == Z_OK; |
| +} |
| + |
| +bool GzipSourceStream::IsGzipHeaderInvalid(char* input_buffer, |
| + int input_buffer_size, |
| + int* consumed_bytes) { |
| + const size_t kGzipFooterBytes = 8; |
| + const char* end = nullptr; |
| + GZipHeader::Status status = |
| + gzip_header_.ReadMore(input_buffer, input_buffer_size, &end); |
| + if (status == GZipHeader::INCOMPLETE_HEADER) { |
| + *consumed_bytes += input_buffer_size; |
| + return false; |
| + } |
| + |
| + should_check_gzip_header_ = false; |
| + if (status == GZipHeader::COMPLETE_HEADER) { |
| + // If there is a valid header, there should also be a valid footer. |
| + gzip_footer_bytes_left_ = kGzipFooterBytes; |
| + *consumed_bytes += end - input_buffer; |
| + } |
| + |
| + return status == GZipHeader::INVALID_HEADER; |
| +} |
| + |
| +// Dumb heuristic. Gzip files always start with a two-byte magic value per RFC |
| +// 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and |
| +// this filter is checking whether it should fallback, then fallback. |
| +bool GzipSourceStream::ShouldFallbackToPlain(char first_byte) { |
| + static const char kGzipFirstByte = 0x1f; |
| + if (!should_check_first_byte_) |
| + return false; |
| + if (!should_check_gzip_header_) |
| + return false; |
| + should_check_first_byte_ = false; |
| + return first_byte != kGzipFirstByte; |
| +} |
| + |
| +size_t GzipSourceStream::NumGzipFooterBytesToSkip(int input_buffer_size) { |
| + if (gzip_footer_bytes_left_ == 0) |
| + return 0; |
| + size_t to_read = gzip_footer_bytes_left_; |
| + if (to_read > base::checked_cast<size_t>(input_buffer_size)) |
| + to_read = input_buffer_size; |
| + gzip_footer_bytes_left_ -= to_read; |
| + return to_read; |
| +} |
| + |
| +} // namespace net |