Index: net/filter/gzip_source_stream.cc |
diff --git a/net/filter/gzip_source_stream.cc b/net/filter/gzip_source_stream.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..31aaebd325328ef25577b93e9f11c44372001779 |
--- /dev/null |
+++ b/net/filter/gzip_source_stream.cc |
@@ -0,0 +1,224 @@ |
+// Copyright 2016 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "net/filter/gzip_source_stream.h" |
+ |
+#include "base/bind.h" |
+#include "base/bit_cast.h" |
+#include "base/logging.h" |
+#include "net/base/io_buffer.h" |
+#include "third_party/zlib/zlib.h" |
+ |
+namespace net { |
+ |
+namespace { |
+ |
+const char kDeflate[] = "DEFLATE"; |
+const char kGzip[] = "GZIP"; |
+const char kGzipFallback[] = "GZIP_FALLBACK"; |
+ |
+} // namespace |
+ |
+GzipSourceStream::~GzipSourceStream() { |
+ if (zlib_stream_) |
+ inflateEnd(zlib_stream_.get()); |
+} |
+ |
+std::unique_ptr<GzipSourceStream> GzipSourceStream::Create( |
+ std::unique_ptr<SourceStream> upstream, |
+ SourceStream::SourceType type) { |
+ std::unique_ptr<GzipSourceStream> source( |
+ new GzipSourceStream(std::move(upstream), type)); |
+ |
+ if (!source->Init()) |
+ return nullptr; |
+ return source; |
+} |
+ |
+GzipSourceStream::GzipSourceStream(std::unique_ptr<SourceStream> upstream, |
+ SourceStream::SourceType type) |
+ : FilterSourceStream(type, std::move(upstream)), |
+ zlib_header_added_(false), |
+ gzip_footer_bytes_left_(0), |
+ input_state_(STATE_START) {} |
+ |
+bool GzipSourceStream::Init() { |
+ zlib_stream_.reset(new z_stream); |
+ if (!zlib_stream_) |
+ return false; |
+ memset(zlib_stream_.get(), 0, sizeof(z_stream)); |
+ |
+ int ret; |
+ if (type() == TYPE_GZIP || type() == TYPE_GZIP_FALLBACK) { |
+ ret = inflateInit2(zlib_stream_.get(), -MAX_WBITS); |
+ } else { |
+ ret = inflateInit(zlib_stream_.get()); |
+ } |
+ DCHECK_NE(Z_VERSION_ERROR, ret); |
+ return ret == Z_OK; |
+} |
+ |
+std::string GzipSourceStream::GetTypeAsString() const { |
+ switch (type()) { |
+ case TYPE_GZIP: |
+ return kGzip; |
+ case TYPE_GZIP_FALLBACK: |
+ return kGzipFallback; |
+ case TYPE_DEFLATE: |
+ return kDeflate; |
+ default: |
+ NOTREACHED(); |
+ return ""; |
+ } |
+} |
+ |
+int GzipSourceStream::FilterData(IOBuffer* output_buffer, |
+ int output_buffer_size, |
+ IOBuffer* input_buffer, |
+ int input_buffer_size, |
+ int* consumed_bytes, |
+ bool /*upstream_end_reached*/) { |
+ if (input_buffer_size == 0) |
+ return 0; |
+ *consumed_bytes = 0; |
+ int bytes_out = 0; |
+ int bytes_used = 0; |
+ while (true) { |
+ InputState state = input_state_; |
+ switch (state) { |
+ case STATE_START: |
+ if (type() == TYPE_DEFLATE) { |
+ input_state_ = STATE_COMPRESSED_BODY; |
+ break; |
+ } |
+ // If this stream is not really gzipped as detected by |
+ // ShouldFallbackToPlain, pretend that the zlib stream has ended. |
+ if (ShouldFallbackToPlain(input_buffer->data()[0])) { |
+ input_state_ = STATE_UNCOMPRESSED_BODY; |
+ } else { |
+ input_state_ = STATE_GZIP_HEADER; |
+ } |
+ break; |
+ case STATE_GZIP_HEADER: { |
+ const size_t kGzipFooterBytes = 8; |
+ const char* end = nullptr; |
+ GZipHeader::Status status = gzip_header_.ReadMore( |
+ input_buffer->data(), input_buffer_size, &end); |
+ if (status == GZipHeader::INCOMPLETE_HEADER) { |
+ *consumed_bytes += input_buffer_size; |
+ } else if (status == GZipHeader::COMPLETE_HEADER) { |
+ // If there is a valid header, there should also be a valid footer. |
+ gzip_footer_bytes_left_ = kGzipFooterBytes; |
+ *consumed_bytes += end - input_buffer->data(); |
+ input_state_ = STATE_COMPRESSED_BODY; |
+ } else if (status == GZipHeader::INVALID_HEADER) { |
+ return ERR_CONTENT_DECODING_FAILED; |
+ } |
+ break; |
+ } |
+ case STATE_COMPRESSED_BODY: { |
+ DCHECK_LE(0, *consumed_bytes); |
+ zlib_stream_.get()->next_in = |
+ bit_cast<Bytef*>(input_buffer->data() + *consumed_bytes); |
+ zlib_stream_.get()->avail_in = input_buffer_size - *consumed_bytes; |
Randy Smith (Not in Mondays)
2016/09/21 20:57:05
Thought (i.e. not even a suggestion, just tossing
xunjieli
2016/09/22 17:20:49
Done.
|
+ zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data()); |
+ zlib_stream_.get()->avail_out = output_buffer_size; |
+ |
+ int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
+ |
+ // Sometime misconfigured servers omit the zlib header, relying on |
+ // clients to splice it back in. |
+ if (ret < 0 && !zlib_header_added_) { |
+ zlib_header_added_ = true; |
+ if (!InsertZlibHeader()) |
+ return ERR_CONTENT_DECODING_FAILED; |
+ |
+ zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_buffer->data()); |
+ zlib_stream_.get()->avail_in = input_buffer_size; |
+ zlib_stream_.get()->next_out = |
+ bit_cast<Bytef*>(output_buffer->data()); |
+ zlib_stream_.get()->avail_out = output_buffer_size; |
+ |
+ ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
+ // TODO(xunjieli): add a histogram to see how often this happens. The |
+ // original bug for this behavior was ancient and maybe it doesn't |
+ // happen in the wild any more? |
+ } |
+ |
+ bytes_used = |
+ input_buffer_size - *consumed_bytes - zlib_stream_.get()->avail_in; |
+ bytes_out = output_buffer_size - zlib_stream_.get()->avail_out; |
+ |
+ *consumed_bytes += base::checked_cast<int>(bytes_used); |
+ |
+ if (ret != Z_STREAM_END && ret != Z_OK) |
+ return ERR_CONTENT_DECODING_FAILED; |
+ DCHECK_LE(*consumed_bytes, input_buffer_size); |
+ if (*consumed_bytes == input_buffer_size) |
+ return bytes_out; |
Randy Smith (Not in Mondays)
2016/09/21 20:57:06
I'm confused. Above here there is code to (IIUC)
xunjieli
2016/09/22 17:20:49
Done. Thanks for catching that! That's bug in my c
|
+ // Pass any futher data uncompressed. |
+ input_state_ = STATE_UNCOMPRESSED_BODY; |
+ break; |
+ } |
+ case STATE_UNCOMPRESSED_BODY: |
+ return bytes_out + Passthrough(output_buffer->data() + bytes_out, |
+ output_buffer_size - bytes_out, |
+ input_buffer->data() + *consumed_bytes, |
+ input_buffer_size - *consumed_bytes, |
+ consumed_bytes); |
+ } |
+ } |
+ NOTREACHED(); |
+ return ERR_UNEXPECTED; |
+} |
+ |
+size_t GzipSourceStream::Passthrough(char* output_buffer, |
+ int output_buffer_size, |
+ char* input_buffer, |
+ int input_buffer_size, |
+ int* consumed_bytes) { |
+ size_t footer_bytes_skipped = NumGzipFooterBytesToSkip(input_buffer_size); |
Randy Smith (Not in Mondays)
2016/09/21 20:57:06
Why not implement this with a separate state? It
xunjieli
2016/09/22 17:20:49
Done.
|
+ size_t to_copy = input_buffer_size - footer_bytes_skipped; |
+ if (to_copy > base::checked_cast<size_t>(output_buffer_size)) |
+ to_copy = output_buffer_size; |
+ memcpy(output_buffer, input_buffer, to_copy); |
+ *consumed_bytes += to_copy + footer_bytes_skipped; |
+ return to_copy; |
+} |
+ |
+bool GzipSourceStream::InsertZlibHeader() { |
+ char dummy_header[] = {0x78, 0x01}; |
+ char dummy_output[4]; |
+ |
+ inflateReset(zlib_stream_.get()); |
+ zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]); |
+ zlib_stream_.get()->avail_in = sizeof(dummy_header); |
+ zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]); |
+ zlib_stream_.get()->avail_out = sizeof(dummy_output); |
+ |
+ int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
+ return ret == Z_OK; |
+} |
+ |
+// Dumb heuristic. Gzip files always start with a two-byte magic value per RFC |
+// 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and |
+// this filter is checking whether it should fallback, then fallback. |
+bool GzipSourceStream::ShouldFallbackToPlain(char first_byte) { |
+ if (type() != TYPE_GZIP_FALLBACK) |
+ return false; |
+ static const char kGzipFirstByte = 0x1f; |
+ return first_byte != kGzipFirstByte; |
+} |
+ |
+size_t GzipSourceStream::NumGzipFooterBytesToSkip(int input_buffer_size) { |
+ if (gzip_footer_bytes_left_ == 0) |
+ return 0; |
+ size_t to_read = gzip_footer_bytes_left_; |
+ if (to_read > base::checked_cast<size_t>(input_buffer_size)) |
+ to_read = input_buffer_size; |
+ gzip_footer_bytes_left_ -= to_read; |
+ return to_read; |
+} |
+ |
+} // namespace net |