Chromium Code Reviews| Index: net/filter/gzip_source_stream.cc |
| diff --git a/net/filter/gzip_source_stream.cc b/net/filter/gzip_source_stream.cc |
| index de1a2de33cb8081688db800c2dcfa7364de558af..02f2099c4a3753b53f1552d73e43ecdd34e624dc 100644 |
| --- a/net/filter/gzip_source_stream.cc |
| +++ b/net/filter/gzip_source_stream.cc |
| @@ -21,6 +21,14 @@ const char kDeflate[] = "DEFLATE"; |
| const char kGzip[] = "GZIP"; |
| const char kGzipFallback[] = "GZIP_FALLBACK"; |
| +// For deflate streams, if more than this many bytes have been received without |
| +// an error and without adding a Zlib header, assume the original stream had a |
| +// Zlib header. In practice, don't need nearly this much data, but since the |
| +// detection logic is a heuristic, best to be safe. Data is freed once it's been |
| +// determined whether the stream has a zlib header or not, so larger values |
| +// shouldn't affect memory usage, in practice. |
| +const int kMaxZlibHeaderSniffBytes = 1000; |
|
xunjieli
2017/01/03 16:21:17
Do you mean 100 instead of 1000? In the linked bug
mmenke
2017/01/03 18:25:41
I meant 1000, out of paranoia (And because the cos
xunjieli
2017/01/03 18:52:03
Acknowledged.
|
| + |
| } // namespace |
| GzipSourceStream::~GzipSourceStream() { |
| @@ -42,9 +50,9 @@ std::unique_ptr<GzipSourceStream> GzipSourceStream::Create( |
| GzipSourceStream::GzipSourceStream(std::unique_ptr<SourceStream> upstream, |
| SourceStream::SourceType type) |
| : FilterSourceStream(type, std::move(upstream)), |
| - zlib_header_added_(false), |
| gzip_footer_bytes_left_(0), |
| - input_state_(STATE_START) {} |
| + input_state_(STATE_START), |
| + replay_state_(STATE_COMPRESSED_BODY) {} |
| bool GzipSourceStream::Init() { |
| zlib_stream_.reset(new z_stream); |
| @@ -81,7 +89,7 @@ int GzipSourceStream::FilterData(IOBuffer* output_buffer, |
| IOBuffer* input_buffer, |
| int input_buffer_size, |
| int* consumed_bytes, |
| - bool /*upstream_end_reached*/) { |
| + bool upstream_end_reached) { |
| *consumed_bytes = 0; |
| char* input_data = input_buffer->data(); |
| int input_data_size = input_buffer_size; |
| @@ -92,7 +100,7 @@ int GzipSourceStream::FilterData(IOBuffer* output_buffer, |
| switch (state) { |
| case STATE_START: { |
| if (type() == TYPE_DEFLATE) { |
| - input_state_ = STATE_COMPRESSED_BODY; |
| + input_state_ = STATE_SNIFFING_DEFLATE_HEADER; |
| break; |
| } |
| // If this stream is not really gzipped as detected by |
| @@ -125,11 +133,7 @@ int GzipSourceStream::FilterData(IOBuffer* output_buffer, |
| } |
| break; |
| } |
| - case STATE_COMPRESSED_BODY: { |
| - DCHECK(!state_compressed_entered); |
| - DCHECK_LE(0, input_data_size); |
| - |
| - state_compressed_entered = true; |
| + case STATE_SNIFFING_DEFLATE_HEADER: { |
| zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data); |
| zlib_stream_.get()->avail_in = input_data_size; |
| zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data()); |
| @@ -137,24 +141,78 @@ int GzipSourceStream::FilterData(IOBuffer* output_buffer, |
| int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
| - // Sometimes misconfigured servers omit the zlib header, relying on |
| - // clients to splice it back in. |
| - if (ret < 0 && !zlib_header_added_) { |
| - zlib_header_added_ = true; |
| + // On error, try adding a zlib header and replaying the response. Note |
| + // that data just received doesn't have to be replayed, since it hasn't |
| + // been removed from input_data yet, only data from previous FilterData |
| + // calls needs to be replayed. |
| + if (ret != Z_STREAM_END && ret != Z_OK) { |
| if (!InsertZlibHeader()) |
| return ERR_CONTENT_DECODING_FAILED; |
| + input_state_ = STATE_REPLAY_DATA; |
| + continue; |
|
xunjieli
2017/01/03 16:21:17
I believe this is equivalent to "break". If so, ca
mmenke
2017/01/03 18:25:42
Done.
|
| + } |
| - zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data); |
| - zlib_stream_.get()->avail_in = input_data_size; |
| - zlib_stream_.get()->next_out = |
| - bit_cast<Bytef*>(output_buffer->data()); |
| - zlib_stream_.get()->avail_out = output_buffer_size; |
| + int bytes_used = input_data_size - zlib_stream_.get()->avail_in; |
| + bytes_out = output_buffer_size - zlib_stream_.get()->avail_out; |
| + // If any bytes are output, enough total bytes have been received, or at |
| + // the end of the stream, assume the response had a valid Zlib header. |
| + if (bytes_out > 0 || |
| + bytes_used + replay_data_.size() >= kMaxZlibHeaderSniffBytes || |
| + ret == Z_STREAM_END) { |
| + std::move(replay_data_); |
| + if (ret == Z_STREAM_END) { |
| + input_state_ = STATE_GZIP_FOOTER; |
| + } else { |
| + input_state_ = STATE_COMPRESSED_BODY; |
| + } |
| + } else { |
| + replay_data_.append(input_data, bytes_used); |
| + } |
| - ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
| - // TODO(xunjieli): add a histogram to see how often this happens. The |
| - // original bug for this behavior was ancient and maybe it doesn't |
| - // happen in the wild any more? crbug.com/649339 |
| + input_data_size -= bytes_used; |
| + input_data += bytes_used; |
| + break; |
| + } |
| + case STATE_REPLAY_DATA: { |
|
xunjieli
2017/01/03 16:21:17
nit: I am assuming this state only applies to defl
mmenke
2017/01/03 18:25:41
Done.
|
| + if (!replay_data_.size()) { |
|
mmenke
2016/12/29 22:39:31
Worth noting while we do go through both branches
xunjieli
2017/01/03 16:21:17
nit: Can we not enter STATE_REPLAY_DATA, if |repla
mmenke
2017/01/03 18:25:42
Done (x2). You're certainly right about empty(),
|
| + std::move(replay_data_); |
| + input_state_ = replay_state_; |
| + continue; |
| } |
| + |
| + // Call FilterData recursively, after updating |input_state_|, with |
| + // |replay_data_|. This recursive call makes handling data from |
| + // |replay_data_| and |input_buffer| much simpler than the alternative |
| + // operations, though it's not pretty. |
| + input_state_ = replay_state_; |
| + int bytes_used; |
| + int result = |
| + FilterData(output_buffer, output_buffer_size, |
| + new WrappedIOBuffer(replay_data_.data()), |
| + replay_data_.size(), &bytes_used, upstream_end_reached); |
| + replay_data_.erase(0, bytes_used); |
| + // Back up resulting state, and return state to STATE_REPLAY_DATA. |
| + replay_state_ = input_state_; |
| + input_state_ = STATE_REPLAY_DATA; |
| + |
| + // On error, or if bytes were read, just return result immediately. |
| + // Could continue consuming data in the success case, but simplest not |
| + // to. |
| + if (result != 0) |
| + return result; |
|
mmenke
2016/12/29 22:39:31
No unit tests exercise this line, unfortunately.
xunjieli
2017/01/03 16:21:17
Acknowledged.
|
| + continue; |
|
xunjieli
2017/01/03 16:21:17
I believe this is equivalent to "break". If so, ca
mmenke
2017/01/03 18:25:41
Done.
|
| + } |
| + case STATE_COMPRESSED_BODY: { |
| + DCHECK(!state_compressed_entered); |
| + DCHECK_LE(0, input_data_size); |
| + |
| + state_compressed_entered = true; |
| + zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data); |
| + zlib_stream_.get()->avail_in = input_data_size; |
| + zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data()); |
| + zlib_stream_.get()->avail_out = output_buffer_size; |
| + |
| + int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
| if (ret != Z_STREAM_END && ret != Z_OK) |
| return ERR_CONTENT_DECODING_FAILED; |