| OLD | NEW |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/filter/gzip_source_stream.h" | 5 #include "net/filter/gzip_source_stream.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <utility> | 8 #include <utility> |
| 9 | 9 |
| 10 #include "base/bind.h" | 10 #include "base/bind.h" |
| 11 #include "base/bit_cast.h" | 11 #include "base/bit_cast.h" |
| 12 #include "base/logging.h" | 12 #include "base/logging.h" |
| 13 #include "base/memory/ref_counted.h" |
| 13 #include "net/base/io_buffer.h" | 14 #include "net/base/io_buffer.h" |
| 14 #include "third_party/zlib/zlib.h" | 15 #include "third_party/zlib/zlib.h" |
| 15 | 16 |
| 16 namespace net { | 17 namespace net { |
| 17 | 18 |
| 18 namespace { | 19 namespace { |
| 19 | 20 |
| 20 const char kDeflate[] = "DEFLATE"; | 21 const char kDeflate[] = "DEFLATE"; |
| 21 const char kGzip[] = "GZIP"; | 22 const char kGzip[] = "GZIP"; |
| 22 const char kGzipFallback[] = "GZIP_FALLBACK"; | 23 const char kGzipFallback[] = "GZIP_FALLBACK"; |
| 23 | 24 |
| 25 // For deflate streams, if more than this many bytes have been received without |
| 26 // an error and without adding a Zlib header, assume the original stream had a |
| 27 // Zlib header. In practice, don't need nearly this much data, but since the |
| 28 // detection logic is a heuristic, best to be safe. Data is freed once it's been |
| 29 // determined whether the stream has a zlib header or not, so larger values |
| 30 // shouldn't affect memory usage, in practice. |
| 31 const int kMaxZlibHeaderSniffBytes = 1000; |
| 32 |
| 24 } // namespace | 33 } // namespace |
| 25 | 34 |
| 26 GzipSourceStream::~GzipSourceStream() { | 35 GzipSourceStream::~GzipSourceStream() { |
| 27 if (zlib_stream_) | 36 if (zlib_stream_) |
| 28 inflateEnd(zlib_stream_.get()); | 37 inflateEnd(zlib_stream_.get()); |
| 29 } | 38 } |
| 30 | 39 |
| 31 std::unique_ptr<GzipSourceStream> GzipSourceStream::Create( | 40 std::unique_ptr<GzipSourceStream> GzipSourceStream::Create( |
| 32 std::unique_ptr<SourceStream> upstream, | 41 std::unique_ptr<SourceStream> upstream, |
| 33 SourceStream::SourceType type) { | 42 SourceStream::SourceType type) { |
| 34 std::unique_ptr<GzipSourceStream> source( | 43 std::unique_ptr<GzipSourceStream> source( |
| 35 new GzipSourceStream(std::move(upstream), type)); | 44 new GzipSourceStream(std::move(upstream), type)); |
| 36 | 45 |
| 37 if (!source->Init()) | 46 if (!source->Init()) |
| 38 return nullptr; | 47 return nullptr; |
| 39 return source; | 48 return source; |
| 40 } | 49 } |
| 41 | 50 |
| 42 GzipSourceStream::GzipSourceStream(std::unique_ptr<SourceStream> upstream, | 51 GzipSourceStream::GzipSourceStream(std::unique_ptr<SourceStream> upstream, |
| 43 SourceStream::SourceType type) | 52 SourceStream::SourceType type) |
| 44 : FilterSourceStream(type, std::move(upstream)), | 53 : FilterSourceStream(type, std::move(upstream)), |
| 45 zlib_header_added_(false), | |
| 46 gzip_footer_bytes_left_(0), | 54 gzip_footer_bytes_left_(0), |
| 47 input_state_(STATE_START) {} | 55 input_state_(STATE_START), |
| 56 replay_state_(STATE_COMPRESSED_BODY) {} |
| 48 | 57 |
| 49 bool GzipSourceStream::Init() { | 58 bool GzipSourceStream::Init() { |
| 50 zlib_stream_.reset(new z_stream); | 59 zlib_stream_.reset(new z_stream); |
| 51 if (!zlib_stream_) | 60 if (!zlib_stream_) |
| 52 return false; | 61 return false; |
| 53 memset(zlib_stream_.get(), 0, sizeof(z_stream)); | 62 memset(zlib_stream_.get(), 0, sizeof(z_stream)); |
| 54 | 63 |
| 55 int ret; | 64 int ret; |
| 56 if (type() == TYPE_GZIP || type() == TYPE_GZIP_FALLBACK) { | 65 if (type() == TYPE_GZIP || type() == TYPE_GZIP_FALLBACK) { |
| 57 ret = inflateInit2(zlib_stream_.get(), -MAX_WBITS); | 66 ret = inflateInit2(zlib_stream_.get(), -MAX_WBITS); |
| (...skipping 16 matching lines...) Expand all Loading... |
| 74 NOTREACHED(); | 83 NOTREACHED(); |
| 75 return ""; | 84 return ""; |
| 76 } | 85 } |
| 77 } | 86 } |
| 78 | 87 |
| 79 int GzipSourceStream::FilterData(IOBuffer* output_buffer, | 88 int GzipSourceStream::FilterData(IOBuffer* output_buffer, |
| 80 int output_buffer_size, | 89 int output_buffer_size, |
| 81 IOBuffer* input_buffer, | 90 IOBuffer* input_buffer, |
| 82 int input_buffer_size, | 91 int input_buffer_size, |
| 83 int* consumed_bytes, | 92 int* consumed_bytes, |
| 84 bool /*upstream_end_reached*/) { | 93 bool upstream_end_reached) { |
| 85 *consumed_bytes = 0; | 94 *consumed_bytes = 0; |
| 86 char* input_data = input_buffer->data(); | 95 char* input_data = input_buffer->data(); |
| 87 int input_data_size = input_buffer_size; | 96 int input_data_size = input_buffer_size; |
| 88 int bytes_out = 0; | 97 int bytes_out = 0; |
| 89 bool state_compressed_entered = false; | 98 bool state_compressed_entered = false; |
| 90 while (input_data_size > 0 && bytes_out < output_buffer_size) { | 99 while (input_data_size > 0 && bytes_out < output_buffer_size) { |
| 91 InputState state = input_state_; | 100 InputState state = input_state_; |
| 92 switch (state) { | 101 switch (state) { |
| 93 case STATE_START: { | 102 case STATE_START: { |
| 94 if (type() == TYPE_DEFLATE) { | 103 if (type() == TYPE_DEFLATE) { |
| 95 input_state_ = STATE_COMPRESSED_BODY; | 104 input_state_ = STATE_SNIFFING_DEFLATE_HEADER; |
| 96 break; | 105 break; |
| 97 } | 106 } |
| 98 // If this stream is not really gzipped as detected by | 107 // If this stream is not really gzipped as detected by |
| 99 // ShouldFallbackToPlain, pretend that the zlib stream has ended. | 108 // ShouldFallbackToPlain, pretend that the zlib stream has ended. |
| 100 DCHECK_LT(0, input_data_size); | 109 DCHECK_LT(0, input_data_size); |
| 101 if (ShouldFallbackToPlain(input_data[0])) { | 110 if (ShouldFallbackToPlain(input_data[0])) { |
| 102 input_state_ = STATE_UNCOMPRESSED_BODY; | 111 input_state_ = STATE_UNCOMPRESSED_BODY; |
| 103 } else { | 112 } else { |
| 104 input_state_ = STATE_GZIP_HEADER; | 113 input_state_ = STATE_GZIP_HEADER; |
| 105 } | 114 } |
| 106 break; | 115 break; |
| 107 } | 116 } |
| 108 case STATE_GZIP_HEADER: { | 117 case STATE_GZIP_HEADER: { |
| 118 DCHECK_NE(TYPE_DEFLATE, type()); |
| 119 |
| 109 const size_t kGzipFooterBytes = 8; | 120 const size_t kGzipFooterBytes = 8; |
| 110 const char* end = nullptr; | 121 const char* end = nullptr; |
| 111 GZipHeader::Status status = | 122 GZipHeader::Status status = |
| 112 gzip_header_.ReadMore(input_data, input_data_size, &end); | 123 gzip_header_.ReadMore(input_data, input_data_size, &end); |
| 113 if (status == GZipHeader::INCOMPLETE_HEADER) { | 124 if (status == GZipHeader::INCOMPLETE_HEADER) { |
| 114 input_data += input_data_size; | 125 input_data += input_data_size; |
| 115 input_data_size = 0; | 126 input_data_size = 0; |
| 116 } else if (status == GZipHeader::COMPLETE_HEADER) { | 127 } else if (status == GZipHeader::COMPLETE_HEADER) { |
| 117 // If there is a valid header, there should also be a valid footer. | 128 // If there is a valid header, there should also be a valid footer. |
| 118 gzip_footer_bytes_left_ = kGzipFooterBytes; | 129 gzip_footer_bytes_left_ = kGzipFooterBytes; |
| 119 int bytes_consumed = end - input_data; | 130 int bytes_consumed = end - input_data; |
| 120 input_data += bytes_consumed; | 131 input_data += bytes_consumed; |
| 121 input_data_size -= bytes_consumed; | 132 input_data_size -= bytes_consumed; |
| 122 input_state_ = STATE_COMPRESSED_BODY; | 133 input_state_ = STATE_COMPRESSED_BODY; |
| 123 } else if (status == GZipHeader::INVALID_HEADER) { | 134 } else if (status == GZipHeader::INVALID_HEADER) { |
| 124 return ERR_CONTENT_DECODING_FAILED; | 135 return ERR_CONTENT_DECODING_FAILED; |
| 125 } | 136 } |
| 126 break; | 137 break; |
| 127 } | 138 } |
| 128 case STATE_COMPRESSED_BODY: { | 139 case STATE_SNIFFING_DEFLATE_HEADER: { |
| 129 DCHECK(!state_compressed_entered); | 140 DCHECK_EQ(TYPE_DEFLATE, type()); |
| 130 DCHECK_LE(0, input_data_size); | |
| 131 | 141 |
| 132 state_compressed_entered = true; | |
| 133 zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data); | 142 zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data); |
| 134 zlib_stream_.get()->avail_in = input_data_size; | 143 zlib_stream_.get()->avail_in = input_data_size; |
| 135 zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data()); | 144 zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data()); |
| 136 zlib_stream_.get()->avail_out = output_buffer_size; | 145 zlib_stream_.get()->avail_out = output_buffer_size; |
| 137 | 146 |
| 138 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); | 147 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
| 139 | 148 |
| 140 // Sometimes misconfigured servers omit the zlib header, relying on | 149 // On error, try adding a zlib header and replaying the response. Note |
| 141 // clients to splice it back in. | 150 // that data just received doesn't have to be replayed, since it hasn't |
| 142 if (ret < 0 && !zlib_header_added_) { | 151 // been removed from input_data yet, only data from previous FilterData |
| 143 zlib_header_added_ = true; | 152 // calls needs to be replayed. |
| 153 if (ret != Z_STREAM_END && ret != Z_OK) { |
| 144 if (!InsertZlibHeader()) | 154 if (!InsertZlibHeader()) |
| 145 return ERR_CONTENT_DECODING_FAILED; | 155 return ERR_CONTENT_DECODING_FAILED; |
| 146 | 156 |
| 147 zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data); | 157 input_state_ = STATE_REPLAY_DATA; |
| 148 zlib_stream_.get()->avail_in = input_data_size; | 158 // |replay_state_| should still have its initial value. |
| 149 zlib_stream_.get()->next_out = | 159 DCHECK_EQ(STATE_COMPRESSED_BODY, replay_state_); |
| 150 bit_cast<Bytef*>(output_buffer->data()); | 160 break; |
| 151 zlib_stream_.get()->avail_out = output_buffer_size; | 161 } |
| 152 | 162 |
| 153 ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); | 163 int bytes_used = input_data_size - zlib_stream_.get()->avail_in; |
| 154 // TODO(xunjieli): add a histogram to see how often this happens. The | 164 bytes_out = output_buffer_size - zlib_stream_.get()->avail_out; |
| 155 // original bug for this behavior was ancient and maybe it doesn't | 165 // If any bytes are output, enough total bytes have been received, or at |
| 156 // happen in the wild any more? crbug.com/649339 | 166 // the end of the stream, assume the response had a valid Zlib header. |
| 167 if (bytes_out > 0 || |
| 168 bytes_used + replay_data_.size() >= kMaxZlibHeaderSniffBytes || |
| 169 ret == Z_STREAM_END) { |
| 170 std::move(replay_data_); |
| 171 if (ret == Z_STREAM_END) { |
| 172 input_state_ = STATE_GZIP_FOOTER; |
| 173 } else { |
| 174 input_state_ = STATE_COMPRESSED_BODY; |
| 175 } |
| 176 } else { |
| 177 replay_data_.append(input_data, bytes_used); |
| 157 } | 178 } |
| 179 |
| 180 input_data_size -= bytes_used; |
| 181 input_data += bytes_used; |
| 182 break; |
| 183 } |
| 184 case STATE_REPLAY_DATA: { |
| 185 DCHECK_EQ(TYPE_DEFLATE, type()); |
| 186 |
| 187 if (replay_data_.empty()) { |
| 188 std::move(replay_data_); |
| 189 input_state_ = replay_state_; |
| 190 break; |
| 191 } |
| 192 |
| 193 // Call FilterData recursively, after updating |input_state_|, with |
| 194 // |replay_data_|. This recursive call makes handling data from |
| 195 // |replay_data_| and |input_buffer| much simpler than the alternative |
| 196 // operations, though it's not pretty. |
| 197 input_state_ = replay_state_; |
| 198 int bytes_used; |
| 199 scoped_refptr<IOBuffer> replay_buffer( |
| 200 new WrappedIOBuffer(replay_data_.data())); |
| 201 int result = |
| 202 FilterData(output_buffer, output_buffer_size, replay_buffer.get(), |
| 203 replay_data_.size(), &bytes_used, upstream_end_reached); |
| 204 replay_data_.erase(0, bytes_used); |
| 205 // Back up resulting state, and return state to STATE_REPLAY_DATA. |
| 206 replay_state_ = input_state_; |
| 207 input_state_ = STATE_REPLAY_DATA; |
| 208 |
| 209 // On error, or if bytes were read, just return result immediately. |
| 210 // Could continue consuming data in the success case, but simplest not |
| 211 // to. |
| 212 if (result != 0) |
| 213 return result; |
| 214 break; |
| 215 } |
| 216 case STATE_COMPRESSED_BODY: { |
| 217 DCHECK(!state_compressed_entered); |
| 218 DCHECK_LE(0, input_data_size); |
| 219 |
| 220 state_compressed_entered = true; |
| 221 zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data); |
| 222 zlib_stream_.get()->avail_in = input_data_size; |
| 223 zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data()); |
| 224 zlib_stream_.get()->avail_out = output_buffer_size; |
| 225 |
| 226 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); |
| 158 if (ret != Z_STREAM_END && ret != Z_OK) | 227 if (ret != Z_STREAM_END && ret != Z_OK) |
| 159 return ERR_CONTENT_DECODING_FAILED; | 228 return ERR_CONTENT_DECODING_FAILED; |
| 160 | 229 |
| 161 int bytes_used = input_data_size - zlib_stream_.get()->avail_in; | 230 int bytes_used = input_data_size - zlib_stream_.get()->avail_in; |
| 162 bytes_out = output_buffer_size - zlib_stream_.get()->avail_out; | 231 bytes_out = output_buffer_size - zlib_stream_.get()->avail_out; |
| 163 input_data_size -= bytes_used; | 232 input_data_size -= bytes_used; |
| 164 input_data += bytes_used; | 233 input_data += bytes_used; |
| 165 if (ret == Z_STREAM_END) | 234 if (ret == Z_STREAM_END) |
| 166 input_state_ = STATE_GZIP_FOOTER; | 235 input_state_ = STATE_GZIP_FOOTER; |
| 167 // zlib has written as much data to |output_buffer| as it could. | 236 // zlib has written as much data to |output_buffer| as it could. |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 211 // 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and | 280 // 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and |
| 212 // this filter is checking whether it should fallback, then fallback. | 281 // this filter is checking whether it should fallback, then fallback. |
| 213 bool GzipSourceStream::ShouldFallbackToPlain(char first_byte) { | 282 bool GzipSourceStream::ShouldFallbackToPlain(char first_byte) { |
| 214 if (type() != TYPE_GZIP_FALLBACK) | 283 if (type() != TYPE_GZIP_FALLBACK) |
| 215 return false; | 284 return false; |
| 216 static const char kGzipFirstByte = 0x1f; | 285 static const char kGzipFirstByte = 0x1f; |
| 217 return first_byte != kGzipFirstByte; | 286 return first_byte != kGzipFirstByte; |
| 218 } | 287 } |
| 219 | 288 |
| 220 } // namespace net | 289 } // namespace net |
| OLD | NEW |