Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(121)

Side by Side Diff: net/filter/gzip_stream_source.cc

Issue 1662763002: [ON HOLD] Implement pull-based design for content decoding (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/bind.h"
6 #include "base/bit_cast.h"
7 #include "net/filter/block_buffer.h"
8 #include "net/filter/gzip_stream_source.h"
9 #include "third_party/zlib/zlib.h"
10
11 namespace net {
12
13 GzipStreamSource::GzipStreamSource(scoped_ptr<StreamSource> previous)
14 : buffer_(new BlockBuffer()),
15 previous_(std::move(previous)),
16 zlib_eof_(false),
17 zlib_header_added_(false),
18 gzip_header_unchecked_(false),
19 gzip_footer_bytes_left_(0),
20 total_bytes_output_(0) {}
21
22 GzipStreamSource::~GzipStreamSource() {
23 if (zlib_stream_)
24 inflateEnd(zlib_stream_.get());
25 }
26
27 bool GzipStreamSource::Init(GzipStreamSourceMode mode, bool gzip_fallback) {
28 zlib_stream_.reset(new z_stream);
29 if (!zlib_stream_)
30 return false;
31 memset(zlib_stream_.get(), 0, sizeof(z_stream));
32
33 if (mode == GZIP_STREAM_SOURCE_GZIP) {
34 gzip_header_unchecked_ = true;
35 gzip_fallback_unchecked_ = gzip_fallback;
36 if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK)
37 return false;
38 } else {
39 if (inflateInit(zlib_stream_.get()) != Z_OK)
40 return false;
41 }
42
43 return true;
44 }
45
46 Error GzipStreamSource::Read(IOBuffer* dest_buffer,
47 size_t buffer_size,
48 size_t* bytes_read,
49 const OnReadCompleteCallback& callback) {
50 *bytes_read = 0;
51
52 // Loop on reading the previous source until either:
53 // * Decompress() returns some data, in which case this method completes
54 // synchronously, or
55 // * Read() does not complete synchronously, in which case OnReadComplete() is
56 // responsible for finishing the decompression.
57 Error error;
58 do {
59 // If this stream is not really gzipped as detected by
60 // ShouldFallbackToPlain, pretend the zlib stream already ended.
61 if (ShouldFallbackToPlain()) {
62 zlib_eof_ = true;
63 gzip_header_unchecked_ = false;
64 gzip_fallback_unchecked_ = false;
65 }
66
67 // Require a valid gzip header when decompressing a gzip stream.
68 if (gzip_header_unchecked_ && IsGzipHeaderInvalid())
69 return ERR_CONTENT_DECODING_FAILED;
70
71 GzipStreamState state = Decompress(dest_buffer, buffer_size, bytes_read);
72
73 // If the decompressor threw an error, fail synchronously.
74 if (state == GZIP_STREAM_ERROR)
75 return ERR_CONTENT_DECODING_FAILED;
76
77 // Awkward special case: if Decompress returns with data still left in the
78 // input buffer, ordinarily that would imply needing more output space.
79 // However, for compatibility reasons, it is permissible to include trailing
80 // uncompressed data after the zlib stream ends. That data is copied through
81 // here by Passthrough.
82 if (*bytes_read == 0 && zlib_eof_ && buffer_->HasMoreBytes()) {
83 state = Passthrough(dest_buffer, buffer_size, bytes_read);
84 }
85
86 // If there was already some data buffered internally in |buffer_|,
87 // or some output buffered internally in zlib, |Decompress| can succeed
88 // synchronously. If this happens, return right here.
89 if (*bytes_read > 0) {
90 total_bytes_output_ += *bytes_read;
91 return OK;
92 }
93
94 // GZIP_STREAM_MORE_OUTPUT_SPACE implies |*bytes_read| > 0, since Decompress
95 // will fill all available output buffer space first and the output buffer
96 // is empty coming into |Read|. Since GZIP_STREAM_ERROR is handled above,
97 // this is the only other case.
98 DCHECK_EQ(GZIP_STREAM_MORE_INPUT, state);
99
100 // Since Decompress needs more input, it has consumed all existing input.
101 DCHECK(!buffer_->HasMoreBytes());
102
103 // Dispatch a read to refill the input buffer.
104 size_t previous_bytes_read;
Randy Smith (Not in Mondays) 2016/02/08 23:28:42 nit: Initialize to zero.
xunjieli 2016/03/03 23:00:08 Done.
105 error = previous_->Read(
106 buffer_->buffer(), buffer_->size(), &previous_bytes_read,
107 base::Bind(&GzipStreamSource::OnReadComplete, base::Unretained(this),
108 callback, base::Unretained(dest_buffer), buffer_size));
109
110 // OK with 0 bytes read means EOF. Since the buffer is already empty, and
111 // Decompress already failed to return any more data, this source is also
112 // at EOF. Just return that synchronously.
113 if (error == OK && previous_bytes_read == 0)
114 return OK;
115
116 // If the underlying read completed synchronously, mark the buffer as
117 // refilled and try again.
118 if (error == OK)
119 buffer_->WasRefilled(previous_bytes_read);
120 } while (error == OK);
121
122 if (error == ERR_IO_PENDING)
123 pending_read_buffer_ = dest_buffer;
124
125 return error;
126 }
127
128 // Private helpers.
129
130 // Synchronous decompressor. This function consumes bytes from |buffer_| and
131 // decompresses them into |dest_buffer| until either:
132 // a) |buffer_| is empty, and it returns MORE_INPUT;
133 // b) |dest_buffer| is full, and it returns MORE_OUTPUT_SPACE;
134 // This decompressor will decompress a zlib stream (either gzip or deflate)
135 // until the zlib EOF, then will pass any further input through untouched.
136 GzipStreamSource::GzipStreamState GzipStreamSource::Decompress(
137 IOBuffer* dest_buffer,
138 size_t buffer_size,
139 size_t* bytes_output) {
140 DCHECK(dest_buffer);
141 DCHECK(buffer_size != 0);
142
143 if (!buffer_->HasMoreBytes()) {
144 return GZIP_STREAM_MORE_INPUT;
145 }
146
147 // If the zlib stream has already ended, pass any further data through.
148 if (zlib_eof_)
149 return Passthrough(dest_buffer, buffer_size, bytes_output);
150
151 zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes());
152 zlib_stream_.get()->avail_in = buffer_->bytes_left();
153 zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data());
154 zlib_stream_.get()->avail_out = buffer_size;
155
156 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
157
158 LOG(ERROR) << "inflate " << ret << " " << zlib_stream_.get()->avail_in << " "
159 << zlib_stream_.get()->avail_out;
160
161 // Sometime misconfigured servers omit the zlib header, relying on clients to
162 // splice it back in.
163 if (ret < 0 && !zlib_header_added_) {
164 zlib_header_added_ = true;
165 if (!InsertZlibHeader())
166 return GZIP_STREAM_ERROR;
167
168 zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes());
169 zlib_stream_.get()->avail_in = buffer_->bytes_left();
170 zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data());
171 zlib_stream_.get()->avail_out = buffer_size;
172
173 ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
174 // TODO(ellyjones): add a histogram to see how often this happens. The
175 // original bug for this behavior was ancient and maybe it doesn't happen in
176 // the wild any more?
177 }
178
179 size_t bytes_used = buffer_->bytes_left() - zlib_stream_.get()->avail_in;
180 size_t bytes_out = buffer_size - zlib_stream_.get()->avail_out;
181
182 buffer_->WasDrained(bytes_used);
183 *bytes_output = bytes_out;
184
185 if (ret != Z_STREAM_END && ret != Z_OK)
186 return GZIP_STREAM_ERROR;
187
188 // The zlib stream can end before the input stream ends. If this happens,
189 // |Decompress| will pass any further data on untouched.
190 if (ret == Z_STREAM_END)
191 zlib_eof_ = true;
192
193 if (!buffer_->HasMoreBytes())
194 return GZIP_STREAM_MORE_INPUT;
195 else
196 return GZIP_STREAM_MORE_OUTPUT_SPACE;
197 }
198
199 GzipStreamSource::GzipStreamState GzipStreamSource::Passthrough(
200 IOBuffer* dest_buffer,
201 size_t buffer_size,
202 size_t* bytes_read) {
203 SkipGzipFooterIfNeeded();
204 size_t to_copy = buffer_->bytes_left();
205 if (to_copy > buffer_size)
206 to_copy = buffer_size;
207 memcpy(dest_buffer->data(), buffer_->bytes(), to_copy);
208 buffer_->WasDrained(to_copy);
209 *bytes_read = to_copy;
210 if (!buffer_->HasMoreBytes())
211 return GZIP_STREAM_MORE_INPUT;
212 else
213 return GZIP_STREAM_MORE_OUTPUT_SPACE;
214 }
215
216 void GzipStreamSource::OnReadComplete(const OnReadCompleteCallback& callback,
217 IOBuffer* dest_buffer,
218 size_t dest_buffer_size,
219 Error error,
220 size_t bytes_read) {
221 DCHECK(!buffer_->HasMoreBytes());
222 DCHECK_EQ(dest_buffer, pending_read_buffer_.get());
223
224 // Take a ref for the lifetime of this function.
225 scoped_refptr<IOBuffer> dest_ref(dest_buffer);
226 pending_read_buffer_ = nullptr;
227
228 // If the underlying read failed, fail this read directly.
229 if (error != OK) {
230 callback.Run(error, bytes_read);
231 return;
232 }
233
234 if (bytes_read == 0) {
235 // EOF. Since the buffer is empty, there is no more data to decompress (any
236 // internally buffered data would have been drained already before calling
237 // the previous stream's Read). Return EOF to our caller.
238 callback.Run(OK, 0);
239 return;
240 }
241
242 // Mark the buffer as refilled and try decompressing.
243 buffer_->WasRefilled(bytes_read);
244
245 // Recurse. If this Read completes synchronously, this method runs the
246 // callback; if it does not, Read will have posted an asynchronous read that
247 // will later re-invoke OnReadComplete to run the callback.
248 error = Read(dest_buffer, dest_buffer_size, &bytes_read, callback);
249 if (error != ERR_IO_PENDING) {
250 if (error == OK)
251 total_bytes_output_ += bytes_read;
252 callback.Run(error, bytes_read);
253 }
254 }
255
256 bool GzipStreamSource::InsertZlibHeader() {
257 char dummy_header[] = {0x78, 0x01};
258 char dummy_output[4];
259
260 inflateReset(zlib_stream_.get());
261 zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]);
262 zlib_stream_.get()->avail_in = sizeof(dummy_header);
263 zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]);
264 zlib_stream_.get()->avail_out = sizeof(dummy_output);
265
266 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
267 return ret == Z_OK;
268 }
269
270 bool GzipStreamSource::IsGzipHeaderInvalid() {
271 const size_t kGzipFooterBytes = 8;
272 const char* end = nullptr;
273 GZipHeader::Status status =
274 gzip_header_.ReadMore(buffer_->bytes(), buffer_->bytes_left(), &end);
275 if (status == GZipHeader::INCOMPLETE_HEADER) {
276 buffer_->WasDrained(buffer_->bytes_left());
277 return false;
278 }
279
280 gzip_header_unchecked_ = false;
281 if (status == GZipHeader::COMPLETE_HEADER) {
282 // If there is a valid header, there should also be a valid footer.
283 gzip_footer_bytes_left_ = kGzipFooterBytes;
284 buffer_->WasDrained(end - buffer_->bytes());
285 }
286
287 return status == GZipHeader::INVALID_HEADER;
288 }
289
290 // Dumb heuristic. Gzip files always start with a two-byte magic value per RFC
291 // 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and
292 // this filter is checking whether it should fallback, then fallback.
293 bool GzipStreamSource::ShouldFallbackToPlain() {
294 static const char kGzipFirstByte = 0x1f;
295 if (!gzip_header_unchecked_ || !gzip_fallback_unchecked_)
296 return false;
297 if (!buffer_->HasMoreBytes())
298 return false;
299 char d = buffer_->bytes()[0];
300 return d != kGzipFirstByte;
301 }
302
303 size_t GzipStreamSource::GetBytesOutput() const {
304 return total_bytes_output_;
305 }
306
307 void GzipStreamSource::SkipGzipFooterIfNeeded() {
308 if (gzip_footer_bytes_left_ == 0)
309 return;
310 size_t to_read = gzip_footer_bytes_left_;
311 if (to_read > buffer_->bytes_left())
312 to_read = buffer_->bytes_left();
313 buffer_->WasDrained(to_read);
314 }
315
316 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698