Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(356)

Unified Diff: net/filter/gzip_stream_source.cc

Issue 1662763002: [ON HOLD] Implement pull-based design for content decoding (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: net/filter/gzip_stream_source.cc
diff --git a/net/filter/gzip_stream_source.cc b/net/filter/gzip_stream_source.cc
new file mode 100644
index 0000000000000000000000000000000000000000..2c5d9a5a1555f774b6953e8934c844c274873211
--- /dev/null
+++ b/net/filter/gzip_stream_source.cc
@@ -0,0 +1,316 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/bind.h"
+#include "base/bit_cast.h"
+#include "net/filter/block_buffer.h"
+#include "net/filter/gzip_stream_source.h"
+#include "third_party/zlib/zlib.h"
+
+namespace net {
+
+GzipStreamSource::GzipStreamSource(scoped_ptr<StreamSource> previous)
+ : buffer_(new BlockBuffer()),
+ previous_(std::move(previous)),
+ zlib_eof_(false),
+ zlib_header_added_(false),
+ gzip_header_unchecked_(false),
+ gzip_footer_bytes_left_(0),
+ total_bytes_output_(0) {}
+
+GzipStreamSource::~GzipStreamSource() {
+ if (zlib_stream_)
+ inflateEnd(zlib_stream_.get());
+}
+
+bool GzipStreamSource::Init(GzipStreamSourceMode mode, bool gzip_fallback) {
+ zlib_stream_.reset(new z_stream);
+ if (!zlib_stream_)
+ return false;
+ memset(zlib_stream_.get(), 0, sizeof(z_stream));
+
+ if (mode == GZIP_STREAM_SOURCE_GZIP) {
+ gzip_header_unchecked_ = true;
+ gzip_fallback_unchecked_ = gzip_fallback;
+ if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK)
+ return false;
+ } else {
+ if (inflateInit(zlib_stream_.get()) != Z_OK)
+ return false;
+ }
+
+ return true;
+}
+
+Error GzipStreamSource::Read(IOBuffer* dest_buffer,
+ size_t buffer_size,
+ size_t* bytes_read,
+ const OnReadCompleteCallback& callback) {
+ *bytes_read = 0;
+
+ // Loop on reading the previous source until either:
+ // * Decompress() returns some data, in which case this method completes
+ // synchronously, or
+ // * Read() does not complete synchronously, in which case OnReadComplete() is
+ // responsible for finishing the decompression.
+ Error error;
+ do {
+ // If this stream is not really gzipped as detected by
+ // ShouldFallbackToPlain, pretend the zlib stream already ended.
+ if (ShouldFallbackToPlain()) {
+ zlib_eof_ = true;
+ gzip_header_unchecked_ = false;
+ gzip_fallback_unchecked_ = false;
+ }
+
+ // Require a valid gzip header when decompressing a gzip stream.
+ if (gzip_header_unchecked_ && IsGzipHeaderInvalid())
+ return ERR_CONTENT_DECODING_FAILED;
+
+ GzipStreamState state = Decompress(dest_buffer, buffer_size, bytes_read);
+
+ // If the decompressor threw an error, fail synchronously.
+ if (state == GZIP_STREAM_ERROR)
+ return ERR_CONTENT_DECODING_FAILED;
+
+ // Awkward special case: if Decompress returns with data still left in the
+ // input buffer, ordinarily that would imply needing more output space.
+ // However, for compatibility reasons, it is permissible to include trailing
+ // uncompressed data after the zlib stream ends. That data is copied through
+ // here by Passthrough.
+ if (*bytes_read == 0 && zlib_eof_ && buffer_->HasMoreBytes()) {
+ state = Passthrough(dest_buffer, buffer_size, bytes_read);
+ }
+
+ // If there was already some data buffered internally in |buffer_|,
+ // or some output buffered internally in zlib, |Decompress| can succeed
+ // synchronously. If this happens, return right here.
+ if (*bytes_read > 0) {
+ total_bytes_output_ += *bytes_read;
+ return OK;
+ }
+
+ // GZIP_STREAM_MORE_OUTPUT_SPACE implies |*bytes_read| > 0, since Decompress
+ // will fill all available output buffer space first and the output buffer
+ // is empty coming into |Read|. Since GZIP_STREAM_ERROR is handled above,
+ // this is the only other case.
+ DCHECK_EQ(GZIP_STREAM_MORE_INPUT, state);
+
+ // Since Decompress needs more input, it has consumed all existing input.
+ DCHECK(!buffer_->HasMoreBytes());
+
+ // Dispatch a read to refill the input buffer.
+ size_t previous_bytes_read;
Randy Smith (Not in Mondays) 2016/02/08 23:28:42 nit: Initialize to zero.
xunjieli 2016/03/03 23:00:08 Done.
+ error = previous_->Read(
+ buffer_->buffer(), buffer_->size(), &previous_bytes_read,
+ base::Bind(&GzipStreamSource::OnReadComplete, base::Unretained(this),
+ callback, base::Unretained(dest_buffer), buffer_size));
+
+ // OK with 0 bytes read means EOF. Since the buffer is already empty, and
+ // Decompress already failed to return any more data, this source is also
+ // at EOF. Just return that synchronously.
+ if (error == OK && previous_bytes_read == 0)
+ return OK;
+
+ // If the underlying read completed synchronously, mark the buffer as
+ // refilled and try again.
+ if (error == OK)
+ buffer_->WasRefilled(previous_bytes_read);
+ } while (error == OK);
+
+ if (error == ERR_IO_PENDING)
+ pending_read_buffer_ = dest_buffer;
+
+ return error;
+}
+
+// Private helpers.
+
+// Synchronous decompressor. This function consumes bytes from |buffer_| and
+// decompresses them into |dest_buffer| until either:
+// a) |buffer_| is empty, and it returns MORE_INPUT;
+// b) |dest_buffer| is full, and it returns MORE_OUTPUT_SPACE;
+// This decompressor will decompress a zlib stream (either gzip or deflate)
+// until the zlib EOF, then will pass any further input through untouched.
+GzipStreamSource::GzipStreamState GzipStreamSource::Decompress(
+ IOBuffer* dest_buffer,
+ size_t buffer_size,
+ size_t* bytes_output) {
+ DCHECK(dest_buffer);
+ DCHECK(buffer_size != 0);
+
+ if (!buffer_->HasMoreBytes()) {
+ return GZIP_STREAM_MORE_INPUT;
+ }
+
+ // If the zlib stream has already ended, pass any further data through.
+ if (zlib_eof_)
+ return Passthrough(dest_buffer, buffer_size, bytes_output);
+
+ zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes());
+ zlib_stream_.get()->avail_in = buffer_->bytes_left();
+ zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data());
+ zlib_stream_.get()->avail_out = buffer_size;
+
+ int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
+
+ LOG(ERROR) << "inflate " << ret << " " << zlib_stream_.get()->avail_in << " "
+ << zlib_stream_.get()->avail_out;
+
+ // Sometime misconfigured servers omit the zlib header, relying on clients to
+ // splice it back in.
+ if (ret < 0 && !zlib_header_added_) {
+ zlib_header_added_ = true;
+ if (!InsertZlibHeader())
+ return GZIP_STREAM_ERROR;
+
+ zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes());
+ zlib_stream_.get()->avail_in = buffer_->bytes_left();
+ zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data());
+ zlib_stream_.get()->avail_out = buffer_size;
+
+ ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
+ // TODO(ellyjones): add a histogram to see how often this happens. The
+ // original bug for this behavior was ancient and maybe it doesn't happen in
+ // the wild any more?
+ }
+
+ size_t bytes_used = buffer_->bytes_left() - zlib_stream_.get()->avail_in;
+ size_t bytes_out = buffer_size - zlib_stream_.get()->avail_out;
+
+ buffer_->WasDrained(bytes_used);
+ *bytes_output = bytes_out;
+
+ if (ret != Z_STREAM_END && ret != Z_OK)
+ return GZIP_STREAM_ERROR;
+
+ // The zlib stream can end before the input stream ends. If this happens,
+ // |Decompress| will pass any further data on untouched.
+ if (ret == Z_STREAM_END)
+ zlib_eof_ = true;
+
+ if (!buffer_->HasMoreBytes())
+ return GZIP_STREAM_MORE_INPUT;
+ else
+ return GZIP_STREAM_MORE_OUTPUT_SPACE;
+}
+
+GzipStreamSource::GzipStreamState GzipStreamSource::Passthrough(
+ IOBuffer* dest_buffer,
+ size_t buffer_size,
+ size_t* bytes_read) {
+ SkipGzipFooterIfNeeded();
+ size_t to_copy = buffer_->bytes_left();
+ if (to_copy > buffer_size)
+ to_copy = buffer_size;
+ memcpy(dest_buffer->data(), buffer_->bytes(), to_copy);
+ buffer_->WasDrained(to_copy);
+ *bytes_read = to_copy;
+ if (!buffer_->HasMoreBytes())
+ return GZIP_STREAM_MORE_INPUT;
+ else
+ return GZIP_STREAM_MORE_OUTPUT_SPACE;
+}
+
+void GzipStreamSource::OnReadComplete(const OnReadCompleteCallback& callback,
+ IOBuffer* dest_buffer,
+ size_t dest_buffer_size,
+ Error error,
+ size_t bytes_read) {
+ DCHECK(!buffer_->HasMoreBytes());
+ DCHECK_EQ(dest_buffer, pending_read_buffer_.get());
+
+ // Take a ref for the lifetime of this function.
+ scoped_refptr<IOBuffer> dest_ref(dest_buffer);
+ pending_read_buffer_ = nullptr;
+
+ // If the underlying read failed, fail this read directly.
+ if (error != OK) {
+ callback.Run(error, bytes_read);
+ return;
+ }
+
+ if (bytes_read == 0) {
+ // EOF. Since the buffer is empty, there is no more data to decompress (any
+ // internally buffered data would have been drained already before calling
+ // the previous stream's Read). Return EOF to our caller.
+ callback.Run(OK, 0);
+ return;
+ }
+
+ // Mark the buffer as refilled and try decompressing.
+ buffer_->WasRefilled(bytes_read);
+
+ // Recurse. If this Read completes synchronously, this method runs the
+ // callback; if it does not, Read will have posted an asynchronous read that
+ // will later re-invoke OnReadComplete to run the callback.
+ error = Read(dest_buffer, dest_buffer_size, &bytes_read, callback);
+ if (error != ERR_IO_PENDING) {
+ if (error == OK)
+ total_bytes_output_ += bytes_read;
+ callback.Run(error, bytes_read);
+ }
+}
+
+bool GzipStreamSource::InsertZlibHeader() {
+ char dummy_header[] = {0x78, 0x01};
+ char dummy_output[4];
+
+ inflateReset(zlib_stream_.get());
+ zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]);
+ zlib_stream_.get()->avail_in = sizeof(dummy_header);
+ zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]);
+ zlib_stream_.get()->avail_out = sizeof(dummy_output);
+
+ int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
+ return ret == Z_OK;
+}
+
+bool GzipStreamSource::IsGzipHeaderInvalid() {
+ const size_t kGzipFooterBytes = 8;
+ const char* end = nullptr;
+ GZipHeader::Status status =
+ gzip_header_.ReadMore(buffer_->bytes(), buffer_->bytes_left(), &end);
+ if (status == GZipHeader::INCOMPLETE_HEADER) {
+ buffer_->WasDrained(buffer_->bytes_left());
+ return false;
+ }
+
+ gzip_header_unchecked_ = false;
+ if (status == GZipHeader::COMPLETE_HEADER) {
+ // If there is a valid header, there should also be a valid footer.
+ gzip_footer_bytes_left_ = kGzipFooterBytes;
+ buffer_->WasDrained(end - buffer_->bytes());
+ }
+
+ return status == GZipHeader::INVALID_HEADER;
+}
+
+// Dumb heuristic. Gzip files always start with a two-byte magic value per RFC
+// 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and
+// this filter is checking whether it should fallback, then fallback.
+bool GzipStreamSource::ShouldFallbackToPlain() {
+ static const char kGzipFirstByte = 0x1f;
+ if (!gzip_header_unchecked_ || !gzip_fallback_unchecked_)
+ return false;
+ if (!buffer_->HasMoreBytes())
+ return false;
+ char d = buffer_->bytes()[0];
+ return d != kGzipFirstByte;
+}
+
+size_t GzipStreamSource::GetBytesOutput() const {
+ return total_bytes_output_;
+}
+
+void GzipStreamSource::SkipGzipFooterIfNeeded() {
+ if (gzip_footer_bytes_left_ == 0)
+ return;
+ size_t to_read = gzip_footer_bytes_left_;
+ if (to_read > buffer_->bytes_left())
+ to_read = buffer_->bytes_left();
+ buffer_->WasDrained(to_read);
+}
+
+} // namespace net

Powered by Google App Engine
This is Rietveld 408576698