net/filter/gzip_stream_source.cc - Issue 1662763002: [ON HOLD] Implement pull-based design for content decoding

Side by Side Diff: net/filter/gzip_stream_source.cc

Issue 1662763002: [ON HOLD] Implement pull-based design for content decoding (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2016 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "base/bind.h"

	6 #include "base/bit_cast.h"

	7 #include "net/filter/block_buffer.h"

	8 #include "net/filter/gzip_stream_source.h"

	9 #include "third_party/zlib/zlib.h"

	10

	11 namespace net {

	12

	13 GzipStreamSource::GzipStreamSource(scoped_ptr<StreamSource> previous)

	14 : buffer_(new BlockBuffer()),

	15 previous_(std::move(previous)),

	16 zlib_eof_(false),

	17 zlib_header_added_(false),

	18 gzip_header_unchecked_(false),

	19 gzip_footer_bytes_left_(0),

	20 total_bytes_output_(0) {}

	21

	22 GzipStreamSource::~GzipStreamSource() {

	23 if (zlib_stream_)

	24 inflateEnd(zlib_stream_.get());

	25 }

	26

	27 bool GzipStreamSource::Init(GzipStreamSourceMode mode, bool gzip_fallback) {

	28 zlib_stream_.reset(new z_stream);

	29 if (!zlib_stream_)

	30 return false;

	31 memset(zlib_stream_.get(), 0, sizeof(z_stream));

	32

	33 if (mode == GZIP_STREAM_SOURCE_GZIP) {

	34 gzip_header_unchecked_ = true;

	35 gzip_fallback_unchecked_ = gzip_fallback;

	36 if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK)

	37 return false;

	38 } else {

	39 if (inflateInit(zlib_stream_.get()) != Z_OK)

	40 return false;

	41 }

	42

	43 return true;

	44 }

	45

	46 Error GzipStreamSource::Read(IOBuffer* dest_buffer,

	47 size_t buffer_size,

	48 size_t* bytes_read,

	49 const OnReadCompleteCallback& callback) {

	50 *bytes_read = 0;

	51

	52 // Loop on reading the previous source until either:

	53 // * Decompress() returns some data, in which case this method completes

	54 // synchronously, or

	55 // * Read() does not complete synchronously, in which case OnReadComplete() is

	56 // responsible for finishing the decompression.

	57 Error error;

	58 do {

	59 // If this stream is not really gzipped as detected by

	60 // ShouldFallbackToPlain, pretend the zlib stream already ended.

	61 if (ShouldFallbackToPlain()) {

	62 zlib_eof_ = true;

	63 gzip_header_unchecked_ = false;

	64 gzip_fallback_unchecked_ = false;

	65 }

	66

	67 // Require a valid gzip header when decompressing a gzip stream.

	68 if (gzip_header_unchecked_ && IsGzipHeaderInvalid())

	69 return ERR_CONTENT_DECODING_FAILED;

	70

	71 GzipStreamState state = Decompress(dest_buffer, buffer_size, bytes_read);

	72

	73 // If the decompressor threw an error, fail synchronously.

	74 if (state == GZIP_STREAM_ERROR)

	75 return ERR_CONTENT_DECODING_FAILED;

	76

	77 // Awkward special case: if Decompress returns with data still left in the

	78 // input buffer, ordinarily that would imply needing more output space.

	79 // However, for compatibility reasons, it is permissible to include trailing

	80 // uncompressed data after the zlib stream ends. That data is copied through

	81 // here by Passthrough.

	82 if (*bytes_read == 0 && zlib_eof_ && buffer_->HasMoreBytes()) {

	83 state = Passthrough(dest_buffer, buffer_size, bytes_read);

	84 }

	85

	86 // If there was already some data buffered internally in \|buffer_\|,

	87 // or some output buffered internally in zlib, \|Decompress\| can succeed

	88 // synchronously. If this happens, return right here.

	89 if (*bytes_read > 0) {

	90 total_bytes_output_ += *bytes_read;

	91 return OK;

	92 }

	93

	94 // GZIP_STREAM_MORE_OUTPUT_SPACE implies \|*bytes_read\| > 0, since Decompress

	95 // will fill all available output buffer space first and the output buffer

	96 // is empty coming into \|Read\|. Since GZIP_STREAM_ERROR is handled above,

	97 // this is the only other case.

	98 DCHECK_EQ(GZIP_STREAM_MORE_INPUT, state);

	99

	100 // Since Decompress needs more input, it has consumed all existing input.

	101 DCHECK(!buffer_->HasMoreBytes());

	102

	103 // Dispatch a read to refill the input buffer.

	104 size_t previous_bytes_read;
	Randy Smith (Not in Mondays) 2016/02/08 23:28:42 nit: Initialize to zero. nit: Initialize to zero. xunjieli 2016/03/03 23:00:08 Done. Show quoted text On 2016/02/08 23:28:42, Randy Smith - Not in Fridays wrote: > nit: Initialize to zero. Done.
	105 error = previous_->Read(

	106 buffer_->buffer(), buffer_->size(), &previous_bytes_read,

	107 base::Bind(&GzipStreamSource::OnReadComplete, base::Unretained(this),

	108 callback, base::Unretained(dest_buffer), buffer_size));

	109

	110 // OK with 0 bytes read means EOF. Since the buffer is already empty, and

	111 // Decompress already failed to return any more data, this source is also

	112 // at EOF. Just return that synchronously.

	113 if (error == OK && previous_bytes_read == 0)

	114 return OK;

	115

	116 // If the underlying read completed synchronously, mark the buffer as

	117 // refilled and try again.

	118 if (error == OK)

	119 buffer_->WasRefilled(previous_bytes_read);

	120 } while (error == OK);

	121

	122 if (error == ERR_IO_PENDING)

	123 pending_read_buffer_ = dest_buffer;

	124

	125 return error;

	126 }

	127

	128 // Private helpers.

	129

	130 // Synchronous decompressor. This function consumes bytes from \|buffer_\| and

	131 // decompresses them into \|dest_buffer\| until either:

	132 // a) \|buffer_\| is empty, and it returns MORE_INPUT;

	133 // b) \|dest_buffer\| is full, and it returns MORE_OUTPUT_SPACE;

	134 // This decompressor will decompress a zlib stream (either gzip or deflate)

	135 // until the zlib EOF, then will pass any further input through untouched.

	136 GzipStreamSource::GzipStreamState GzipStreamSource::Decompress(

	137 IOBuffer* dest_buffer,

	138 size_t buffer_size,

	139 size_t* bytes_output) {

	140 DCHECK(dest_buffer);

	141 DCHECK(buffer_size != 0);

	142

	143 if (!buffer_->HasMoreBytes()) {

	144 return GZIP_STREAM_MORE_INPUT;

	145 }

	146

	147 // If the zlib stream has already ended, pass any further data through.

	148 if (zlib_eof_)

	149 return Passthrough(dest_buffer, buffer_size, bytes_output);

	150

	151 zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes());

	152 zlib_stream_.get()->avail_in = buffer_->bytes_left();

	153 zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data());

	154 zlib_stream_.get()->avail_out = buffer_size;

	155

	156 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);

	157

	158 LOG(ERROR) << "inflate " << ret << " " << zlib_stream_.get()->avail_in << " "

	159 << zlib_stream_.get()->avail_out;

	160

	161 // Sometime misconfigured servers omit the zlib header, relying on clients to

	162 // splice it back in.

	163 if (ret < 0 && !zlib_header_added_) {

	164 zlib_header_added_ = true;

	165 if (!InsertZlibHeader())

	166 return GZIP_STREAM_ERROR;

	167

	168 zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes());

	169 zlib_stream_.get()->avail_in = buffer_->bytes_left();

	170 zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data());

	171 zlib_stream_.get()->avail_out = buffer_size;

	172

	173 ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);

	174 // TODO(ellyjones): add a histogram to see how often this happens. The

	175 // original bug for this behavior was ancient and maybe it doesn't happen in

	176 // the wild any more?

	177 }

	178

	179 size_t bytes_used = buffer_->bytes_left() - zlib_stream_.get()->avail_in;

	180 size_t bytes_out = buffer_size - zlib_stream_.get()->avail_out;

	181

	182 buffer_->WasDrained(bytes_used);

	183 *bytes_output = bytes_out;

	184

	185 if (ret != Z_STREAM_END && ret != Z_OK)

	186 return GZIP_STREAM_ERROR;

	187

	188 // The zlib stream can end before the input stream ends. If this happens,

	189 // \|Decompress\| will pass any further data on untouched.

	190 if (ret == Z_STREAM_END)

	191 zlib_eof_ = true;

	192

	193 if (!buffer_->HasMoreBytes())

	194 return GZIP_STREAM_MORE_INPUT;

	195 else

	196 return GZIP_STREAM_MORE_OUTPUT_SPACE;

	197 }

	198

	199 GzipStreamSource::GzipStreamState GzipStreamSource::Passthrough(

	200 IOBuffer* dest_buffer,

	201 size_t buffer_size,

	202 size_t* bytes_read) {

	203 SkipGzipFooterIfNeeded();

	204 size_t to_copy = buffer_->bytes_left();

	205 if (to_copy > buffer_size)

	206 to_copy = buffer_size;

	207 memcpy(dest_buffer->data(), buffer_->bytes(), to_copy);

	208 buffer_->WasDrained(to_copy);

	209 *bytes_read = to_copy;

	210 if (!buffer_->HasMoreBytes())

	211 return GZIP_STREAM_MORE_INPUT;

	212 else

	213 return GZIP_STREAM_MORE_OUTPUT_SPACE;

	214 }

	215

	216 void GzipStreamSource::OnReadComplete(const OnReadCompleteCallback& callback,

	217 IOBuffer* dest_buffer,

	218 size_t dest_buffer_size,

	219 Error error,

	220 size_t bytes_read) {

	221 DCHECK(!buffer_->HasMoreBytes());

	222 DCHECK_EQ(dest_buffer, pending_read_buffer_.get());

	223

	224 // Take a ref for the lifetime of this function.

	225 scoped_refptr<IOBuffer> dest_ref(dest_buffer);

	226 pending_read_buffer_ = nullptr;

	227

	228 // If the underlying read failed, fail this read directly.

	229 if (error != OK) {

	230 callback.Run(error, bytes_read);

	231 return;

	232 }

	233

	234 if (bytes_read == 0) {

	235 // EOF. Since the buffer is empty, there is no more data to decompress (any

	236 // internally buffered data would have been drained already before calling

	237 // the previous stream's Read). Return EOF to our caller.

	238 callback.Run(OK, 0);

	239 return;

	240 }

	241

	242 // Mark the buffer as refilled and try decompressing.

	243 buffer_->WasRefilled(bytes_read);

	244

	245 // Recurse. If this Read completes synchronously, this method runs the

	246 // callback; if it does not, Read will have posted an asynchronous read that

	247 // will later re-invoke OnReadComplete to run the callback.

	248 error = Read(dest_buffer, dest_buffer_size, &bytes_read, callback);

	249 if (error != ERR_IO_PENDING) {

	250 if (error == OK)

	251 total_bytes_output_ += bytes_read;

	252 callback.Run(error, bytes_read);

	253 }

	254 }

	255

	256 bool GzipStreamSource::InsertZlibHeader() {

	257 char dummy_header[] = {0x78, 0x01};

	258 char dummy_output[4];

	259

	260 inflateReset(zlib_stream_.get());

	261 zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]);

	262 zlib_stream_.get()->avail_in = sizeof(dummy_header);

	263 zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]);

	264 zlib_stream_.get()->avail_out = sizeof(dummy_output);

	265

	266 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);

	267 return ret == Z_OK;

	268 }

	269

	270 bool GzipStreamSource::IsGzipHeaderInvalid() {

	271 const size_t kGzipFooterBytes = 8;

	272 const char* end = nullptr;

	273 GZipHeader::Status status =

	274 gzip_header_.ReadMore(buffer_->bytes(), buffer_->bytes_left(), &end);

	275 if (status == GZipHeader::INCOMPLETE_HEADER) {

	276 buffer_->WasDrained(buffer_->bytes_left());

	277 return false;

	278 }

	279

	280 gzip_header_unchecked_ = false;

	281 if (status == GZipHeader::COMPLETE_HEADER) {

	282 // If there is a valid header, there should also be a valid footer.

	283 gzip_footer_bytes_left_ = kGzipFooterBytes;

	284 buffer_->WasDrained(end - buffer_->bytes());

	285 }

	286

	287 return status == GZipHeader::INVALID_HEADER;

	288 }

	289

	290 // Dumb heuristic. Gzip files always start with a two-byte magic value per RFC

	291 // 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and

	292 // this filter is checking whether it should fallback, then fallback.

	293 bool GzipStreamSource::ShouldFallbackToPlain() {

	294 static const char kGzipFirstByte = 0x1f;

	295 if (!gzip_header_unchecked_ \|\| !gzip_fallback_unchecked_)

	296 return false;

	297 if (!buffer_->HasMoreBytes())

	298 return false;

	299 char d = buffer_->bytes()[0];

	300 return d != kGzipFirstByte;

	301 }

	302

	303 size_t GzipStreamSource::GetBytesOutput() const {

	304 return total_bytes_output_;

	305 }

	306

	307 void GzipStreamSource::SkipGzipFooterIfNeeded() {

	308 if (gzip_footer_bytes_left_ == 0)

	309 return;

	310 size_t to_read = gzip_footer_bytes_left_;

	311 if (to_read > buffer_->bytes_left())

	312 to_read = buffer_->bytes_left();

	313 buffer_->WasDrained(to_read);

	314 }

	315

	316 } // namespace net

OLD	NEW

« net/docs/filter.md ('K') | « net/filter/gzip_stream_source.h ('k') | net/filter/gzip_stream_source_unittest.cc » ('j') | net/filter/stream_source.h » ('J')