Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/bind.h" | |
| 6 #include "base/bit_cast.h" | |
| 7 #include "net/filter/block_buffer.h" | |
| 8 #include "net/filter/gzip_stream_source.h" | |
| 9 #include "third_party/zlib/zlib.h" | |
| 10 | |
| 11 namespace net { | |
| 12 | |
| 13 GzipStreamSource::GzipStreamSource(scoped_ptr<StreamSource> previous) | |
| 14 : StreamSource(StreamSource::SOURCE_GZIP, std::move(previous)), | |
| 15 zlib_eof_(false), | |
| 16 zlib_header_added_(false), | |
| 17 gzip_header_unchecked_(false), | |
| 18 gzip_footer_bytes_left_(0) {} | |
| 19 | |
| 20 GzipStreamSource::~GzipStreamSource() { | |
| 21 if (zlib_stream_) | |
| 22 inflateEnd(zlib_stream_.get()); | |
| 23 } | |
| 24 | |
| 25 bool GzipStreamSource::Init(GzipStreamSourceMode mode, bool gzip_fallback) { | |
| 26 zlib_stream_.reset(new z_stream); | |
| 27 if (!zlib_stream_) | |
| 28 return false; | |
| 29 memset(zlib_stream_.get(), 0, sizeof(z_stream)); | |
| 30 | |
| 31 if (mode == GZIP_STREAM_SOURCE_GZIP) { | |
| 32 gzip_header_unchecked_ = true; | |
| 33 gzip_fallback_unchecked_ = gzip_fallback; | |
| 34 if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK) | |
| 35 return false; | |
| 36 } else { | |
| 37 if (inflateInit(zlib_stream_.get()) != Z_OK) | |
| 38 return false; | |
| 39 } | |
| 40 | |
| 41 return true; | |
| 42 } | |
| 43 | |
| 44 Error GzipStreamSource::ReadInternal(IOBuffer* dest_buffer, | |
| 45 size_t buffer_size, | |
| 46 size_t* bytes_read) { | |
| 47 // If this stream is not really gzipped as detected by | |
| 48 // ShouldFallbackToPlain, pretend the zlib stream already ended. | |
| 49 if (ShouldFallbackToPlain()) { | |
| 50 zlib_eof_ = true; | |
| 51 gzip_header_unchecked_ = false; | |
| 52 gzip_fallback_unchecked_ = false; | |
| 53 } | |
| 54 | |
| 55 // Require a valid gzip header when decompressing a gzip stream. | |
| 56 if (gzip_header_unchecked_ && IsGzipHeaderInvalid()) | |
| 57 return ERR_CONTENT_DECODING_FAILED; | |
| 58 | |
| 59 GzipStreamState state = Decompress(dest_buffer, buffer_size, bytes_read); | |
| 60 | |
| 61 // If the decompressor threw an error, fail synchronously. | |
| 62 if (state == GZIP_STREAM_ERROR) | |
| 63 return ERR_CONTENT_DECODING_FAILED; | |
| 64 | |
| 65 // Awkward special case: if Decompress returns with data still left in the | |
| 66 // input buffer, ordinarily that would imply needing more output space. | |
| 67 // However, for compatibility reasons, it is permissible to include trailing | |
| 68 // uncompressed data after the zlib stream ends. That data is copied through | |
| 69 // here by Passthrough. | |
| 70 if (*bytes_read == 0 && zlib_eof_ && buffer_->HasMoreBytes()) { | |
| 71 state = Passthrough(dest_buffer, buffer_size, bytes_read); | |
| 72 } | |
| 73 | |
| 74 // If there was already some data buffered internally in |buffer_|, | |
| 75 // or some output buffered internally in zlib, |Decompress| can succeed | |
| 76 // synchronously. If this happens, return right here. | |
| 77 if (*bytes_read > 0) | |
| 78 return OK; | |
| 79 | |
| 80 // GZIP_STREAM_MORE_OUTPUT_SPACE implies |*bytes_read| > 0, since Decompress | |
| 81 // will fill all available output buffer space first and the output buffer | |
| 82 // is empty coming into |Read|. Since GZIP_STREAM_ERROR is handled above, | |
| 83 // this is the only other case. | |
| 84 DCHECK_EQ(GZIP_STREAM_MORE_INPUT, state); | |
| 85 | |
| 86 // Since Decompress needs more input, it has consumed all existing input. | |
| 87 DCHECK(!buffer_->HasMoreBytes()); | |
| 88 | |
| 89 return OK; | |
| 90 } | |
| 91 | |
| 92 // Private helpers. | |
| 93 | |
| 94 // Synchronous decompressor. This function consumes bytes from |buffer_| and | |
| 95 // decompresses them into |dest_buffer| until either: | |
| 96 // a) |buffer_| is empty, and it returns MORE_INPUT; | |
| 97 // b) |dest_buffer| is full, and it returns MORE_OUTPUT_SPACE; | |
| 98 // This decompressor will decompress a zlib stream (either gzip or deflate) | |
| 99 // until the zlib EOF, then will pass any further input through untouched. | |
| 100 GzipStreamSource::GzipStreamState GzipStreamSource::Decompress( | |
| 101 IOBuffer* dest_buffer, | |
| 102 size_t buffer_size, | |
| 103 size_t* bytes_output) { | |
| 104 DCHECK(dest_buffer); | |
| 105 DCHECK(buffer_size != 0); | |
| 106 | |
| 107 if (!buffer_->HasMoreBytes()) { | |
| 108 return GZIP_STREAM_MORE_INPUT; | |
| 109 } | |
| 110 | |
| 111 // If the zlib stream has already ended, pass any further data through. | |
| 112 if (zlib_eof_) | |
| 113 return Passthrough(dest_buffer, buffer_size, bytes_output); | |
| 114 | |
| 115 zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes()); | |
| 116 zlib_stream_.get()->avail_in = buffer_->bytes_left(); | |
| 117 zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data()); | |
| 118 zlib_stream_.get()->avail_out = buffer_size; | |
| 119 | |
| 120 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); | |
| 121 | |
| 122 LOG(ERROR) << "inflate " << ret << " " << zlib_stream_.get()->avail_in << " " | |
| 123 << zlib_stream_.get()->avail_out; | |
| 124 | |
| 125 // Sometime misconfigured servers omit the zlib header, relying on clients to | |
|
mmenke
2016/03/04 21:15:56
Sometimes
xunjieli
2016/04/20 19:16:09
Done.
| |
| 126 // splice it back in. | |
| 127 if (ret < 0 && !zlib_header_added_) { | |
| 128 zlib_header_added_ = true; | |
| 129 if (!InsertZlibHeader()) | |
| 130 return GZIP_STREAM_ERROR; | |
| 131 | |
| 132 zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes()); | |
| 133 zlib_stream_.get()->avail_in = buffer_->bytes_left(); | |
| 134 zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data()); | |
| 135 zlib_stream_.get()->avail_out = buffer_size; | |
| 136 | |
| 137 ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); | |
| 138 // TODO(ellyjones): add a histogram to see how often this happens. The | |
| 139 // original bug for this behavior was ancient and maybe it doesn't happen in | |
| 140 // the wild any more? | |
| 141 } | |
| 142 | |
| 143 size_t bytes_used = buffer_->bytes_left() - zlib_stream_.get()->avail_in; | |
| 144 size_t bytes_out = buffer_size - zlib_stream_.get()->avail_out; | |
| 145 | |
| 146 buffer_->WasDrained(bytes_used); | |
| 147 *bytes_output = bytes_out; | |
| 148 | |
| 149 if (ret != Z_STREAM_END && ret != Z_OK) | |
| 150 return GZIP_STREAM_ERROR; | |
| 151 | |
| 152 // The zlib stream can end before the input stream ends. If this happens, | |
| 153 // |Decompress| will pass any further data on untouched. | |
| 154 if (ret == Z_STREAM_END) | |
| 155 zlib_eof_ = true; | |
| 156 | |
| 157 if (!buffer_->HasMoreBytes()) | |
| 158 return GZIP_STREAM_MORE_INPUT; | |
| 159 else | |
| 160 return GZIP_STREAM_MORE_OUTPUT_SPACE; | |
| 161 } | |
| 162 | |
| 163 GzipStreamSource::GzipStreamState GzipStreamSource::Passthrough( | |
| 164 IOBuffer* dest_buffer, | |
| 165 size_t buffer_size, | |
| 166 size_t* bytes_read) { | |
| 167 SkipGzipFooterIfNeeded(); | |
| 168 size_t to_copy = buffer_->bytes_left(); | |
| 169 if (to_copy > buffer_size) | |
| 170 to_copy = buffer_size; | |
| 171 memcpy(dest_buffer->data(), buffer_->bytes(), to_copy); | |
| 172 buffer_->WasDrained(to_copy); | |
| 173 *bytes_read = to_copy; | |
| 174 if (!buffer_->HasMoreBytes()) | |
| 175 return GZIP_STREAM_MORE_INPUT; | |
| 176 else | |
| 177 return GZIP_STREAM_MORE_OUTPUT_SPACE; | |
| 178 } | |
| 179 | |
| 180 bool GzipStreamSource::InsertZlibHeader() { | |
| 181 char dummy_header[] = {0x78, 0x01}; | |
| 182 char dummy_output[4]; | |
| 183 | |
| 184 inflateReset(zlib_stream_.get()); | |
| 185 zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]); | |
| 186 zlib_stream_.get()->avail_in = sizeof(dummy_header); | |
| 187 zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]); | |
| 188 zlib_stream_.get()->avail_out = sizeof(dummy_output); | |
| 189 | |
| 190 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); | |
| 191 return ret == Z_OK; | |
| 192 } | |
| 193 | |
| 194 bool GzipStreamSource::IsGzipHeaderInvalid() { | |
| 195 const size_t kGzipFooterBytes = 8; | |
| 196 const char* end = nullptr; | |
| 197 GZipHeader::Status status = | |
| 198 gzip_header_.ReadMore(buffer_->bytes(), buffer_->bytes_left(), &end); | |
| 199 if (status == GZipHeader::INCOMPLETE_HEADER) { | |
| 200 buffer_->WasDrained(buffer_->bytes_left()); | |
| 201 return false; | |
| 202 } | |
| 203 | |
| 204 gzip_header_unchecked_ = false; | |
| 205 if (status == GZipHeader::COMPLETE_HEADER) { | |
| 206 // If there is a valid header, there should also be a valid footer. | |
| 207 gzip_footer_bytes_left_ = kGzipFooterBytes; | |
| 208 buffer_->WasDrained(end - buffer_->bytes()); | |
| 209 } | |
| 210 | |
| 211 return status == GZipHeader::INVALID_HEADER; | |
| 212 } | |
| 213 | |
| 214 // Dumb heuristic. Gzip files always start with a two-byte magic value per RFC | |
| 215 // 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and | |
| 216 // this filter is checking whether it should fallback, then fallback. | |
| 217 bool GzipStreamSource::ShouldFallbackToPlain() { | |
| 218 static const char kGzipFirstByte = 0x1f; | |
| 219 if (!gzip_header_unchecked_ || !gzip_fallback_unchecked_) | |
| 220 return false; | |
| 221 if (!buffer_->HasMoreBytes()) | |
| 222 return false; | |
| 223 char d = buffer_->bytes()[0]; | |
| 224 return d != kGzipFirstByte; | |
| 225 } | |
| 226 | |
| 227 void GzipStreamSource::SkipGzipFooterIfNeeded() { | |
| 228 if (gzip_footer_bytes_left_ == 0) | |
| 229 return; | |
| 230 size_t to_read = gzip_footer_bytes_left_; | |
| 231 if (to_read > buffer_->bytes_left()) | |
| 232 to_read = buffer_->bytes_left(); | |
| 233 buffer_->WasDrained(to_read); | |
| 234 } | |
| 235 | |
| 236 } // namespace net | |
| OLD | NEW |