OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "net/filter/gzip_source_stream.h" | |
6 | |
7 #include "base/bind.h" | |
8 #include "base/bit_cast.h" | |
9 #include "base/logging.h" | |
10 #include "net/base/io_buffer.h" | |
11 #include "third_party/zlib/zlib.h" | |
12 | |
13 namespace net { | |
14 | |
15 namespace { | |
16 | |
17 const char kDeflate[] = "DEFLATE"; | |
18 const char kGzip[] = "GZIP"; | |
19 const char kGzipFallback[] = "GZIP_FALLBACK"; | |
20 | |
21 } // namespace | |
22 | |
23 GzipSourceStream::~GzipSourceStream() { | |
24 if (zlib_stream_) | |
25 inflateEnd(zlib_stream_.get()); | |
26 } | |
27 | |
28 std::unique_ptr<GzipSourceStream> GzipSourceStream::Create( | |
29 std::unique_ptr<SourceStream> previous, | |
30 GzipSourceStreamMode mode) { | |
31 std::unique_ptr<GzipSourceStream> source( | |
32 new GzipSourceStream(std::move(previous), mode)); | |
33 | |
34 if (!source->Init()) | |
35 return nullptr; | |
36 return source; | |
37 } | |
38 | |
39 GzipSourceStream::GzipSourceStream(std::unique_ptr<SourceStream> previous, | |
40 GzipSourceStreamMode mode) | |
41 : FilterSourceStream(SourceStream::TYPE_GZIP, std::move(previous)), | |
42 mode_(mode), | |
43 zlib_eof_(false), | |
44 zlib_header_added_(false), | |
45 should_check_gzip_header_(true), | |
46 should_check_first_byte_(mode == GZIP_SOURCE_STREAM_GZIP_WITH_FALLBACK), | |
47 gzip_footer_bytes_left_(0) {} | |
48 | |
49 bool GzipSourceStream::Init() { | |
50 zlib_stream_.reset(new z_stream); | |
51 if (!zlib_stream_) | |
52 return false; | |
53 memset(zlib_stream_.get(), 0, sizeof(z_stream)); | |
54 | |
55 if (mode_ == GZIP_SOURCE_STREAM_GZIP || | |
56 mode_ == GZIP_SOURCE_STREAM_GZIP_WITH_FALLBACK) { | |
57 if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK) | |
58 return false; | |
59 } else { | |
60 should_check_gzip_header_ = false; | |
61 if (inflateInit(zlib_stream_.get()) != Z_OK) | |
62 return false; | |
63 } | |
64 return true; | |
65 } | |
66 | |
67 std::string GzipSourceStream::GetTypeAsString() const { | |
68 switch (type()) { | |
69 case TYPE_GZIP: | |
70 return kGzip; | |
71 case TYPE_GZIP_FALLBACK: | |
72 return kGzipFallback; | |
73 case TYPE_DEFLATE: | |
74 return kDeflate; | |
75 default: | |
76 NOTREACHED(); | |
77 return ""; | |
78 } | |
79 } | |
80 | |
81 int GzipSourceStream::FilterData(IOBuffer* output_buffer, | |
82 int output_buffer_size, | |
83 IOBuffer* input_buffer, | |
84 int input_buffer_size, | |
85 int* consumed_bytes, | |
86 bool /*upstream_end_reached*/) { | |
87 // If this stream is not really gzipped as detected by ShouldFallbackToPlain, | |
88 // pretend that the zlib stream has already ended. | |
89 if (input_buffer_size > 0 && ShouldFallbackToPlain(input_buffer->data()[0])) { | |
90 zlib_eof_ = true; | |
91 should_check_gzip_header_ = false; | |
Randy Smith (Not in Mondays)
2016/09/14 22:25:23
Suggestion: I'm finding myself a bit uncomfortabl
xunjieli
2016/09/19 13:57:03
Done. Great idea!
| |
92 } | |
93 | |
94 *consumed_bytes = 0; | |
95 // Require a valid gzip header when decompressing a gzip stream. | |
96 if (should_check_gzip_header_ && | |
97 IsGzipHeaderInvalid(input_buffer->data(), input_buffer_size, | |
98 consumed_bytes)) { | |
99 return ERR_CONTENT_DECODING_FAILED; | |
100 } | |
101 | |
102 int bytes_read = Decompress( | |
103 output_buffer, output_buffer_size, input_buffer->data() + *consumed_bytes, | |
104 input_buffer_size - *consumed_bytes, consumed_bytes); | |
105 | |
106 // If output is 0 byte length, it means that the filter must have consumed | |
107 // all input. | |
108 DCHECK(bytes_read != 0 || *consumed_bytes == input_buffer_size); | |
109 return bytes_read; | |
110 } | |
111 | |
112 int GzipSourceStream::Decompress(IOBuffer* output_buffer, | |
113 int output_buffer_size, | |
114 char* input_buffer, | |
115 int input_buffer_size, | |
116 int* consumed_bytes) { | |
117 DCHECK(output_buffer); | |
118 DCHECK_NE(0, output_buffer_size); | |
119 | |
120 if (input_buffer_size == 0) | |
121 return 0; | |
122 | |
123 // If the zlib stream has already ended, pass any further data through. | |
124 if (zlib_eof_) { | |
125 return Passthrough(output_buffer->data(), output_buffer_size, input_buffer, | |
126 input_buffer_size, consumed_bytes); | |
127 } | |
128 | |
129 zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_buffer); | |
130 zlib_stream_.get()->avail_in = input_buffer_size; | |
131 zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data()); | |
132 zlib_stream_.get()->avail_out = output_buffer_size; | |
133 | |
134 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); | |
135 | |
136 // Sometime misconfigured servers omit the zlib header, relying on clients | |
137 // to splice it back in. | |
Randy Smith (Not in Mondays)
2016/09/14 22:25:23
It looks like this would happen anywhere in the st
xunjieli
2016/09/19 13:57:03
I believe it should only show up in the beginning.
Randy Smith (Not in Mondays)
2016/09/21 20:57:05
Gotcha--I think I understand better now. I'm happ
xunjieli
2016/09/22 17:20:49
Acknowledged. Thanks! I filed a bug and linked it
| |
138 if (ret < 0 && !zlib_header_added_) { | |
139 zlib_header_added_ = true; | |
140 if (!InsertZlibHeader()) | |
141 return ERR_CONTENT_DECODING_FAILED; | |
142 | |
143 zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_buffer); | |
144 zlib_stream_.get()->avail_in = input_buffer_size; | |
145 zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data()); | |
146 zlib_stream_.get()->avail_out = output_buffer_size; | |
147 | |
148 ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); | |
149 // TODO(xunjieli): add a histogram to see how often this happens. The | |
150 // original bug for this behavior was ancient and maybe it doesn't happen | |
151 // in the wild any more? | |
Randy Smith (Not in Mondays)
2016/09/14 22:25:23
+1 (though it's fine if you don't want to do that
xunjieli
2016/09/19 13:57:03
Acknowledged. I am planning to do it in a followup
| |
152 } | |
153 | |
154 size_t bytes_used = input_buffer_size - zlib_stream_.get()->avail_in; | |
155 size_t bytes_out = output_buffer_size - zlib_stream_.get()->avail_out; | |
156 | |
157 *consumed_bytes += base::checked_cast<int>(bytes_used); | |
158 | |
159 if (ret != Z_STREAM_END && ret != Z_OK) | |
160 return ERR_CONTENT_DECODING_FAILED; | |
Randy Smith (Not in Mondays)
2016/09/14 22:25:23
Do we have any documentation in the interface cont
xunjieli
2016/09/19 13:57:03
The interface documentation says that ERR_CONTENT_
| |
161 | |
162 // The zlib stream can end before the input stream ends. If this happens, | |
163 // |Decompress| will pass any further data on untouched. | |
164 if (ret == Z_STREAM_END) { | |
165 zlib_eof_ = true; | |
166 return bytes_out + Passthrough(output_buffer->data() + bytes_out, | |
167 output_buffer_size - bytes_out, | |
168 input_buffer + bytes_used, | |
169 input_buffer_size - bytes_used, | |
170 consumed_bytes); | |
171 } | |
172 return bytes_out; | |
173 } | |
174 | |
175 size_t GzipSourceStream::Passthrough(char* output_buffer, | |
176 int output_buffer_size, | |
177 char* input_buffer, | |
178 int input_buffer_size, | |
179 int* consumed_bytes) { | |
180 size_t footer_bytes_skipped = NumGzipFooterBytesToSkip(input_buffer_size); | |
181 size_t to_copy = input_buffer_size - footer_bytes_skipped; | |
182 if (to_copy > base::checked_cast<size_t>(output_buffer_size)) | |
183 to_copy = output_buffer_size; | |
184 memcpy(output_buffer, input_buffer, to_copy); | |
185 *consumed_bytes += to_copy + footer_bytes_skipped; | |
186 return to_copy; | |
187 } | |
188 | |
189 bool GzipSourceStream::InsertZlibHeader() { | |
190 char dummy_header[] = {0x78, 0x01}; | |
191 char dummy_output[4]; | |
192 | |
193 inflateReset(zlib_stream_.get()); | |
194 zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]); | |
195 zlib_stream_.get()->avail_in = sizeof(dummy_header); | |
196 zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]); | |
197 zlib_stream_.get()->avail_out = sizeof(dummy_output); | |
198 | |
199 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); | |
200 return ret == Z_OK; | |
201 } | |
202 | |
203 bool GzipSourceStream::IsGzipHeaderInvalid(char* input_buffer, | |
204 int input_buffer_size, | |
205 int* consumed_bytes) { | |
206 const size_t kGzipFooterBytes = 8; | |
207 const char* end = nullptr; | |
208 GZipHeader::Status status = | |
209 gzip_header_.ReadMore(input_buffer, input_buffer_size, &end); | |
210 if (status == GZipHeader::INCOMPLETE_HEADER) { | |
211 *consumed_bytes += input_buffer_size; | |
212 return false; | |
213 } | |
214 | |
215 should_check_gzip_header_ = false; | |
216 if (status == GZipHeader::COMPLETE_HEADER) { | |
217 // If there is a valid header, there should also be a valid footer. | |
218 gzip_footer_bytes_left_ = kGzipFooterBytes; | |
219 *consumed_bytes += end - input_buffer; | |
220 } | |
221 | |
222 return status == GZipHeader::INVALID_HEADER; | |
223 } | |
224 | |
225 // Dumb heuristic. Gzip files always start with a two-byte magic value per RFC | |
226 // 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and | |
227 // this filter is checking whether it should fallback, then fallback. | |
228 bool GzipSourceStream::ShouldFallbackToPlain(char first_byte) { | |
229 static const char kGzipFirstByte = 0x1f; | |
230 if (!should_check_first_byte_) | |
231 return false; | |
232 if (!should_check_gzip_header_) | |
233 return false; | |
234 should_check_first_byte_ = false; | |
235 return first_byte != kGzipFirstByte; | |
236 } | |
237 | |
238 size_t GzipSourceStream::NumGzipFooterBytesToSkip(int input_buffer_size) { | |
239 if (gzip_footer_bytes_left_ == 0) | |
240 return 0; | |
241 size_t to_read = gzip_footer_bytes_left_; | |
242 if (to_read > base::checked_cast<size_t>(input_buffer_size)) | |
243 to_read = input_buffer_size; | |
244 gzip_footer_bytes_left_ -= to_read; | |
245 return to_read; | |
246 } | |
247 | |
248 } // namespace net | |
OLD | NEW |