Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(764)

Side by Side Diff: net/filter/gzip_stream_source.cc

Issue 1662763002: [ON HOLD] Implement pull-based design for content decoding (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Address comments Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/filter/gzip_stream_source.h"
6
7 #include "base/bind.h"
8 #include "base/bit_cast.h"
9 #include "base/logging.h"
10 #include "net/filter/block_buffer.h"
11 #include "third_party/zlib/zlib.h"
12
13 namespace net {
14
15 GzipStreamSource::GzipStreamSource(std::unique_ptr<StreamSource> previous,
16 GzipStreamSourceMode mode)
17 : FilterStreamSource(StreamSource::TYPE_GZIP, std::move(previous)),
18 mode_(mode),
19 zlib_eof_(false),
20 zlib_header_added_(false),
21 should_check_gzip_header_(true),
22 gzip_footer_bytes_left_(0) {}
23
24 GzipStreamSource::~GzipStreamSource() {
25 if (zlib_stream_)
26 inflateEnd(zlib_stream_.get());
27 }
28
29 bool GzipStreamSource::Init() {
30 zlib_stream_.reset(new z_stream);
31 if (!zlib_stream_)
32 return false;
33 memset(zlib_stream_.get(), 0, sizeof(z_stream));
34
35 if (mode_ == GZIP_STREAM_SOURCE_GZIP ||
36 mode_ == GZIP_STREAM_SOURCE_GZIP_WITH_FALLBACK) {
37 if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK)
38 return false;
39 } else {
40 should_check_gzip_header_ = false;
41 if (inflateInit(zlib_stream_.get()) != Z_OK)
42 return false;
43 }
44 return true;
45 }
46
47 Error GzipStreamSource::ReadInternal(IOBuffer* dest_buffer,
48 size_t buffer_size,
49 size_t* bytes_read) {
50 // If this stream is not really gzipped as detected by
51 // ShouldFallbackToPlain, pretend the zlib stream already ended.
52 if (ShouldFallbackToPlain()) {
53 zlib_eof_ = true;
54 should_check_gzip_header_ = false;
55 }
56
57 // Require a valid gzip header when decompressing a gzip stream.
58 if (should_check_gzip_header_ && IsGzipHeaderInvalid())
59 return ERR_CONTENT_DECODING_FAILED;
60
61 GzipStreamState state = Decompress(dest_buffer, buffer_size, bytes_read);
62
63 // If the decompressor threw an error, fail synchronously.
64 if (state == GZIP_STREAM_ERROR)
65 return ERR_CONTENT_DECODING_FAILED;
66
67 // Awkward special case: if Decompress returns with data still left in the
68 // input buffer, ordinarily that would imply needing more output space.
69 // However, for compatibility reasons, it is permissible to include trailing
70 // uncompressed data after the zlib stream ends. That data is copied through
71 // here by Passthrough.
72 if (*bytes_read == 0 && zlib_eof_ && buffer_->HasMoreBytes()) {
73 state = Passthrough(dest_buffer, buffer_size, bytes_read);
74 }
75
76 // If there was already some data buffered internally in |buffer_|,
77 // or some output buffered internally in zlib, |Decompress| can succeed
78 // synchronously. If this happens, return right here.
79 if (*bytes_read > 0)
80 return OK;
81
82 // GZIP_STREAM_MORE_OUTPUT_SPACE implies |*bytes_read| > 0, since Decompress
83 // will fill all available output buffer space first and the output buffer
84 // is empty coming into |Read|. Since GZIP_STREAM_ERROR is handled above,
85 // this is the only other case.
86 DCHECK_EQ(GZIP_STREAM_MORE_INPUT, state);
87
88 // Since Decompress needs more input, it has consumed all existing input.
89 DCHECK(!buffer_->HasMoreBytes());
90
91 return OK;
92 }
93
94 // Private helpers.
95
96 // Synchronous decompressor. This function consumes bytes from |buffer_| and
97 // decompresses them into |dest_buffer| until either:
98 // a) |buffer_| is empty, and it returns MORE_INPUT;
99 // b) |dest_buffer| is full, and it returns MORE_OUTPUT_SPACE;
100 // This decompressor will decompress a zlib stream (either gzip or deflate)
101 // until the zlib EOF, then will pass any further input through untouched.
102 GzipStreamSource::GzipStreamState GzipStreamSource::Decompress(
103 IOBuffer* dest_buffer,
104 size_t buffer_size,
105 size_t* bytes_output) {
106 DCHECK(dest_buffer);
107 DCHECK_NE(0u, buffer_size);
108
109 if (!buffer_->HasMoreBytes()) {
110 return GZIP_STREAM_MORE_INPUT;
111 }
112
113 // If the zlib stream has already ended, pass any further data through.
114 if (zlib_eof_)
115 return Passthrough(dest_buffer, buffer_size, bytes_output);
116
117 zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes());
118 zlib_stream_.get()->avail_in = buffer_->bytes_left();
119 zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data());
120 zlib_stream_.get()->avail_out = buffer_size;
121
122 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
123
124 DVLOG(1) << "inflate " << ret << " " << zlib_stream_.get()->avail_in << " "
125 << zlib_stream_.get()->avail_out;
126
127 // Sometime misconfigured servers omit the zlib header, relying on clients to
128 // splice it back in.
129 if (ret < 0 && !zlib_header_added_) {
130 zlib_header_added_ = true;
131 if (!InsertZlibHeader())
132 return GZIP_STREAM_ERROR;
133
134 zlib_stream_.get()->next_in = bit_cast<Bytef*>(buffer_->bytes());
135 zlib_stream_.get()->avail_in = buffer_->bytes_left();
136 zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer->data());
137 zlib_stream_.get()->avail_out = buffer_size;
138
139 ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
140 // TODO(ellyjones): add a histogram to see how often this happens. The
141 // original bug for this behavior was ancient and maybe it doesn't happen in
142 // the wild any more?
143 }
144
145 size_t bytes_used = buffer_->bytes_left() - zlib_stream_.get()->avail_in;
146 size_t bytes_out = buffer_size - zlib_stream_.get()->avail_out;
147
148 buffer_->WasDrained(bytes_used);
149 *bytes_output = bytes_out;
150
151 if (ret != Z_STREAM_END && ret != Z_OK)
152 return GZIP_STREAM_ERROR;
153
154 // The zlib stream can end before the input stream ends. If this happens,
155 // |Decompress| will pass any further data on untouched.
156 if (ret == Z_STREAM_END)
157 zlib_eof_ = true;
158
159 if (!buffer_->HasMoreBytes())
160 return GZIP_STREAM_MORE_INPUT;
161 else
162 return GZIP_STREAM_MORE_OUTPUT_SPACE;
163 }
164
165 GzipStreamSource::GzipStreamState GzipStreamSource::Passthrough(
166 IOBuffer* dest_buffer,
167 size_t buffer_size,
168 size_t* bytes_read) {
169 SkipGzipFooterIfNeeded();
170 size_t to_copy = buffer_->bytes_left();
171 if (to_copy > buffer_size)
172 to_copy = buffer_size;
173 memcpy(dest_buffer->data(), buffer_->bytes(), to_copy);
174 buffer_->WasDrained(to_copy);
175 *bytes_read = to_copy;
176 if (!buffer_->HasMoreBytes())
177 return GZIP_STREAM_MORE_INPUT;
178 else
179 return GZIP_STREAM_MORE_OUTPUT_SPACE;
180 }
181
182 bool GzipStreamSource::InsertZlibHeader() {
183 char dummy_header[] = {0x78, 0x01};
184 char dummy_output[4];
185
186 inflateReset(zlib_stream_.get());
187 zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]);
188 zlib_stream_.get()->avail_in = sizeof(dummy_header);
189 zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]);
190 zlib_stream_.get()->avail_out = sizeof(dummy_output);
191
192 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
193 return ret == Z_OK;
194 }
195
196 bool GzipStreamSource::IsGzipHeaderInvalid() {
197 const size_t kGzipFooterBytes = 8;
198 const char* end = nullptr;
199 GZipHeader::Status status =
200 gzip_header_.ReadMore(buffer_->bytes(), buffer_->bytes_left(), &end);
201 if (status == GZipHeader::INCOMPLETE_HEADER) {
202 buffer_->WasDrained(buffer_->bytes_left());
203 return false;
204 }
205
206 should_check_gzip_header_ = false;
207 if (status == GZipHeader::COMPLETE_HEADER) {
208 // If there is a valid header, there should also be a valid footer.
209 gzip_footer_bytes_left_ = kGzipFooterBytes;
210 buffer_->WasDrained(end - buffer_->bytes());
211 }
212
213 return status == GZipHeader::INVALID_HEADER;
214 }
215
216 // Dumb heuristic. Gzip files always start with a two-byte magic value per RFC
217 // 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and
218 // this filter is checking whether it should fallback, then fallback.
219 bool GzipStreamSource::ShouldFallbackToPlain() {
220 static const char kGzipFirstByte = 0x1f;
221 if (mode_ != GZIP_STREAM_SOURCE_GZIP_WITH_FALLBACK)
222 return false;
223 if (!should_check_gzip_header_)
224 return false;
225 if (!buffer_->HasMoreBytes())
226 return false;
227 char d = buffer_->bytes()[0];
228 return d != kGzipFirstByte;
229 }
230
231 void GzipStreamSource::SkipGzipFooterIfNeeded() {
232 if (gzip_footer_bytes_left_ == 0)
233 return;
234 size_t to_read = gzip_footer_bytes_left_;
235 if (to_read > buffer_->bytes_left())
236 to_read = buffer_->bytes_left();
237 buffer_->WasDrained(to_read);
238 }
239
240 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698