Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(55)

Side by Side Diff: net/filter/gzip_source_stream.cc

Issue 2334773002: Add net::GzipSourceStream (Closed)
Patch Set: address comments and rebased onto CL 2338043002 Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/filter/gzip_source_stream.h"
6
7 #include "base/bind.h"
8 #include "base/bit_cast.h"
9 #include "base/logging.h"
10 #include "net/base/io_buffer.h"
11 #include "third_party/zlib/zlib.h"
12
13 namespace net {
14
15 namespace {
16
17 const char kDeflate[] = "DEFLATE";
18 const char kGzip[] = "GZIP";
19 const char kGzipFallback[] = "GZIP_FALLBACK";
20
21 } // namespace
22
23 GzipSourceStream::~GzipSourceStream() {
24 if (zlib_stream_)
25 inflateEnd(zlib_stream_.get());
26 }
27
28 std::unique_ptr<GzipSourceStream> GzipSourceStream::Create(
29 std::unique_ptr<SourceStream> previous,
30 GzipSourceStreamMode mode) {
31 std::unique_ptr<GzipSourceStream> source(
32 new GzipSourceStream(std::move(previous), mode));
33
34 if (!source->Init())
35 return nullptr;
36 return source;
37 }
38
39 GzipSourceStream::GzipSourceStream(std::unique_ptr<SourceStream> previous,
40 GzipSourceStreamMode mode)
41 : FilterSourceStream(SourceStream::TYPE_GZIP, std::move(previous)),
42 mode_(mode),
43 zlib_eof_(false),
44 zlib_header_added_(false),
45 should_check_gzip_header_(true),
46 should_check_first_byte_(mode == GZIP_SOURCE_STREAM_GZIP_WITH_FALLBACK),
47 gzip_footer_bytes_left_(0) {}
48
49 bool GzipSourceStream::Init() {
50 zlib_stream_.reset(new z_stream);
51 if (!zlib_stream_)
52 return false;
53 memset(zlib_stream_.get(), 0, sizeof(z_stream));
54
55 if (mode_ == GZIP_SOURCE_STREAM_GZIP ||
56 mode_ == GZIP_SOURCE_STREAM_GZIP_WITH_FALLBACK) {
57 if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK)
58 return false;
59 } else {
60 should_check_gzip_header_ = false;
61 if (inflateInit(zlib_stream_.get()) != Z_OK)
62 return false;
63 }
64 return true;
65 }
66
67 std::string GzipSourceStream::GetTypeAsString() const {
68 switch (type()) {
69 case TYPE_GZIP:
70 return kGzip;
71 case TYPE_GZIP_FALLBACK:
72 return kGzipFallback;
73 case TYPE_DEFLATE:
74 return kDeflate;
75 default:
76 NOTREACHED();
77 return "";
78 }
79 }
80
81 int GzipSourceStream::FilterData(IOBuffer* output_buffer,
82 int output_buffer_size,
83 IOBuffer* input_buffer,
84 int input_buffer_size,
85 int* consumed_bytes,
86 bool /*upstream_end_reached*/) {
87 // If this stream is not really gzipped as detected by ShouldFallbackToPlain,
88 // pretend that the zlib stream has already ended.
89 if (input_buffer_size > 0 && ShouldFallbackToPlain(input_buffer->data()[0])) {
90 zlib_eof_ = true;
91 should_check_gzip_header_ = false;
Randy Smith (Not in Mondays) 2016/09/14 22:25:23 Suggestion: I'm finding myself a bit uncomfortabl
xunjieli 2016/09/19 13:57:03 Done. Great idea!
92 }
93
94 *consumed_bytes = 0;
95 // Require a valid gzip header when decompressing a gzip stream.
96 if (should_check_gzip_header_ &&
97 IsGzipHeaderInvalid(input_buffer->data(), input_buffer_size,
98 consumed_bytes)) {
99 return ERR_CONTENT_DECODING_FAILED;
100 }
101
102 int bytes_read = Decompress(
103 output_buffer, output_buffer_size, input_buffer->data() + *consumed_bytes,
104 input_buffer_size - *consumed_bytes, consumed_bytes);
105
106 // If output is 0 byte length, it means that the filter must have consumed
107 // all input.
108 DCHECK(bytes_read != 0 || *consumed_bytes == input_buffer_size);
109 return bytes_read;
110 }
111
112 int GzipSourceStream::Decompress(IOBuffer* output_buffer,
113 int output_buffer_size,
114 char* input_buffer,
115 int input_buffer_size,
116 int* consumed_bytes) {
117 DCHECK(output_buffer);
118 DCHECK_NE(0, output_buffer_size);
119
120 if (input_buffer_size == 0)
121 return 0;
122
123 // If the zlib stream has already ended, pass any further data through.
124 if (zlib_eof_) {
125 return Passthrough(output_buffer->data(), output_buffer_size, input_buffer,
126 input_buffer_size, consumed_bytes);
127 }
128
129 zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_buffer);
130 zlib_stream_.get()->avail_in = input_buffer_size;
131 zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data());
132 zlib_stream_.get()->avail_out = output_buffer_size;
133
134 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
135
136 // Sometime misconfigured servers omit the zlib header, relying on clients
137 // to splice it back in.
Randy Smith (Not in Mondays) 2016/09/14 22:25:23 It looks like this would happen anywhere in the st
xunjieli 2016/09/19 13:57:03 I believe it should only show up in the beginning.
Randy Smith (Not in Mondays) 2016/09/21 20:57:05 Gotcha--I think I understand better now. I'm happ
xunjieli 2016/09/22 17:20:49 Acknowledged. Thanks! I filed a bug and linked it
138 if (ret < 0 && !zlib_header_added_) {
139 zlib_header_added_ = true;
140 if (!InsertZlibHeader())
141 return ERR_CONTENT_DECODING_FAILED;
142
143 zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_buffer);
144 zlib_stream_.get()->avail_in = input_buffer_size;
145 zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data());
146 zlib_stream_.get()->avail_out = output_buffer_size;
147
148 ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
149 // TODO(xunjieli): add a histogram to see how often this happens. The
150 // original bug for this behavior was ancient and maybe it doesn't happen
151 // in the wild any more?
Randy Smith (Not in Mondays) 2016/09/14 22:25:23 +1 (though it's fine if you don't want to do that
xunjieli 2016/09/19 13:57:03 Acknowledged. I am planning to do it in a followup
152 }
153
154 size_t bytes_used = input_buffer_size - zlib_stream_.get()->avail_in;
155 size_t bytes_out = output_buffer_size - zlib_stream_.get()->avail_out;
156
157 *consumed_bytes += base::checked_cast<int>(bytes_used);
158
159 if (ret != Z_STREAM_END && ret != Z_OK)
160 return ERR_CONTENT_DECODING_FAILED;
Randy Smith (Not in Mondays) 2016/09/14 22:25:23 Do we have any documentation in the interface cont
xunjieli 2016/09/19 13:57:03 The interface documentation says that ERR_CONTENT_
161
162 // The zlib stream can end before the input stream ends. If this happens,
163 // |Decompress| will pass any further data on untouched.
164 if (ret == Z_STREAM_END) {
165 zlib_eof_ = true;
166 return bytes_out + Passthrough(output_buffer->data() + bytes_out,
167 output_buffer_size - bytes_out,
168 input_buffer + bytes_used,
169 input_buffer_size - bytes_used,
170 consumed_bytes);
171 }
172 return bytes_out;
173 }
174
175 size_t GzipSourceStream::Passthrough(char* output_buffer,
176 int output_buffer_size,
177 char* input_buffer,
178 int input_buffer_size,
179 int* consumed_bytes) {
180 size_t footer_bytes_skipped = NumGzipFooterBytesToSkip(input_buffer_size);
181 size_t to_copy = input_buffer_size - footer_bytes_skipped;
182 if (to_copy > base::checked_cast<size_t>(output_buffer_size))
183 to_copy = output_buffer_size;
184 memcpy(output_buffer, input_buffer, to_copy);
185 *consumed_bytes += to_copy + footer_bytes_skipped;
186 return to_copy;
187 }
188
189 bool GzipSourceStream::InsertZlibHeader() {
190 char dummy_header[] = {0x78, 0x01};
191 char dummy_output[4];
192
193 inflateReset(zlib_stream_.get());
194 zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]);
195 zlib_stream_.get()->avail_in = sizeof(dummy_header);
196 zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]);
197 zlib_stream_.get()->avail_out = sizeof(dummy_output);
198
199 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
200 return ret == Z_OK;
201 }
202
203 bool GzipSourceStream::IsGzipHeaderInvalid(char* input_buffer,
204 int input_buffer_size,
205 int* consumed_bytes) {
206 const size_t kGzipFooterBytes = 8;
207 const char* end = nullptr;
208 GZipHeader::Status status =
209 gzip_header_.ReadMore(input_buffer, input_buffer_size, &end);
210 if (status == GZipHeader::INCOMPLETE_HEADER) {
211 *consumed_bytes += input_buffer_size;
212 return false;
213 }
214
215 should_check_gzip_header_ = false;
216 if (status == GZipHeader::COMPLETE_HEADER) {
217 // If there is a valid header, there should also be a valid footer.
218 gzip_footer_bytes_left_ = kGzipFooterBytes;
219 *consumed_bytes += end - input_buffer;
220 }
221
222 return status == GZipHeader::INVALID_HEADER;
223 }
224
225 // Dumb heuristic. Gzip files always start with a two-byte magic value per RFC
226 // 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and
227 // this filter is checking whether it should fallback, then fallback.
228 bool GzipSourceStream::ShouldFallbackToPlain(char first_byte) {
229 static const char kGzipFirstByte = 0x1f;
230 if (!should_check_first_byte_)
231 return false;
232 if (!should_check_gzip_header_)
233 return false;
234 should_check_first_byte_ = false;
235 return first_byte != kGzipFirstByte;
236 }
237
238 size_t GzipSourceStream::NumGzipFooterBytesToSkip(int input_buffer_size) {
239 if (gzip_footer_bytes_left_ == 0)
240 return 0;
241 size_t to_read = gzip_footer_bytes_left_;
242 if (to_read > base::checked_cast<size_t>(input_buffer_size))
243 to_read = input_buffer_size;
244 gzip_footer_bytes_left_ -= to_read;
245 return to_read;
246 }
247
248 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698