OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "net/filter/gzip_source_stream.h" | |
6 | |
7 #include "base/bind.h" | |
8 #include "base/bit_cast.h" | |
9 #include "base/logging.h" | |
10 #include "net/base/io_buffer.h" | |
11 #include "third_party/zlib/zlib.h" | |
12 | |
13 namespace net { | |
14 | |
15 namespace { | |
16 | |
17 const char kDeflate[] = "DEFLATE"; | |
18 const char kGzip[] = "GZIP"; | |
19 const char kGzipFallback[] = "GZIP_FALLBACK"; | |
20 | |
21 } // namespace | |
22 | |
23 GzipSourceStream::~GzipSourceStream() { | |
24 if (zlib_stream_) | |
25 inflateEnd(zlib_stream_.get()); | |
26 } | |
27 | |
28 std::unique_ptr<GzipSourceStream> GzipSourceStream::Create( | |
29 std::unique_ptr<SourceStream> upstream, | |
30 SourceStream::SourceType type) { | |
31 std::unique_ptr<GzipSourceStream> source( | |
32 new GzipSourceStream(std::move(upstream), type)); | |
33 | |
34 if (!source->Init()) | |
35 return nullptr; | |
36 return source; | |
37 } | |
38 | |
39 GzipSourceStream::GzipSourceStream(std::unique_ptr<SourceStream> upstream, | |
40 SourceStream::SourceType type) | |
41 : FilterSourceStream(type, std::move(upstream)), | |
42 zlib_header_added_(false), | |
43 gzip_footer_bytes_left_(0), | |
44 input_state_(STATE_START) {} | |
45 | |
46 bool GzipSourceStream::Init() { | |
47 zlib_stream_.reset(new z_stream); | |
48 if (!zlib_stream_) | |
49 return false; | |
50 memset(zlib_stream_.get(), 0, sizeof(z_stream)); | |
51 | |
52 int ret; | |
53 if (type() == TYPE_GZIP || type() == TYPE_GZIP_FALLBACK) { | |
54 ret = inflateInit2(zlib_stream_.get(), -MAX_WBITS); | |
55 } else { | |
56 ret = inflateInit(zlib_stream_.get()); | |
57 } | |
58 DCHECK_NE(Z_VERSION_ERROR, ret); | |
59 return ret == Z_OK; | |
60 } | |
61 | |
62 std::string GzipSourceStream::GetTypeAsString() const { | |
63 switch (type()) { | |
64 case TYPE_GZIP: | |
65 return kGzip; | |
66 case TYPE_GZIP_FALLBACK: | |
67 return kGzipFallback; | |
68 case TYPE_DEFLATE: | |
69 return kDeflate; | |
70 default: | |
71 NOTREACHED(); | |
72 return ""; | |
73 } | |
74 } | |
75 | |
76 int GzipSourceStream::FilterData(IOBuffer* output_buffer, | |
77 int output_buffer_size, | |
78 IOBuffer* input_buffer, | |
79 int input_buffer_size, | |
80 int* consumed_bytes, | |
81 bool /*upstream_end_reached*/) { | |
82 if (input_buffer_size == 0) | |
83 return 0; | |
84 *consumed_bytes = 0; | |
85 char* input_data = input_buffer->data(); | |
86 int input_data_size = input_buffer_size; | |
87 int bytes_out = 0; | |
88 while (true) { | |
89 InputState state = input_state_; | |
90 switch (state) { | |
91 case STATE_START: { | |
92 if (type() == TYPE_DEFLATE) { | |
93 input_state_ = STATE_COMPRESSED_BODY; | |
94 break; | |
95 } | |
96 // If this stream is not really gzipped as detected by | |
97 // ShouldFallbackToPlain, pretend that the zlib stream has ended. | |
98 if (ShouldFallbackToPlain(input_data[0])) { | |
Randy Smith (Not in Mondays)
2016/09/23 20:53:47
Suggestion: I'll call out that this is assuming th
xunjieli
2016/09/26 15:29:43
Done. I added a DCHECK.
| |
99 input_state_ = STATE_UNCOMPRESSED_BODY; | |
100 } else { | |
101 input_state_ = STATE_GZIP_HEADER; | |
102 } | |
103 break; | |
104 } | |
105 case STATE_GZIP_HEADER: { | |
106 const size_t kGzipFooterBytes = 8; | |
107 const char* end = nullptr; | |
108 GZipHeader::Status status = | |
109 gzip_header_.ReadMore(input_data, input_data_size, &end); | |
110 if (status == GZipHeader::INCOMPLETE_HEADER) { | |
111 input_data += input_data_size; | |
112 input_data_size = 0; | |
113 } else if (status == GZipHeader::COMPLETE_HEADER) { | |
114 // If there is a valid header, there should also be a valid footer. | |
115 gzip_footer_bytes_left_ = kGzipFooterBytes; | |
116 int bytes_consumed = end - input_data; | |
117 input_data += bytes_consumed; | |
118 input_data_size -= bytes_consumed; | |
Randy Smith (Not in Mondays)
2016/09/23 20:53:47
If this drops input_data_size to zero, we want to
xunjieli
2016/09/26 15:29:43
Done. This is a bug! Thanks for catching it. There
| |
119 input_state_ = STATE_COMPRESSED_BODY; | |
120 } else if (status == GZipHeader::INVALID_HEADER) { | |
121 return ERR_CONTENT_DECODING_FAILED; | |
122 } | |
123 break; | |
124 } | |
125 case STATE_COMPRESSED_BODY: { | |
126 DCHECK_LE(0, input_data_size); | |
127 zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data); | |
128 zlib_stream_.get()->avail_in = input_data_size; | |
129 zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data()); | |
130 zlib_stream_.get()->avail_out = output_buffer_size; | |
Randy Smith (Not in Mondays)
2016/09/23 20:53:47
Suggestion: The fact that {next,avail}_out are bei
xunjieli
2016/09/26 15:29:43
Done.
| |
131 | |
132 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); | |
133 | |
134 // Sometimes misconfigured servers omit the zlib header, relying on | |
135 // clients to splice it back in. | |
136 if (ret < 0 && !zlib_header_added_) { | |
137 zlib_header_added_ = true; | |
138 if (!InsertZlibHeader()) | |
139 return ERR_CONTENT_DECODING_FAILED; | |
140 | |
141 zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data); | |
142 zlib_stream_.get()->avail_in = input_data_size; | |
143 zlib_stream_.get()->next_out = | |
144 bit_cast<Bytef*>(output_buffer->data()); | |
145 zlib_stream_.get()->avail_out = output_buffer_size; | |
146 | |
147 ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); | |
148 // TODO(xunjieli): add a histogram to see how often this happens. The | |
149 // original bug for this behavior was ancient and maybe it doesn't | |
150 // happen in the wild any more? crbug.com/649339 | |
151 } | |
152 | |
153 int bytes_used = input_data_size - zlib_stream_.get()->avail_in; | |
154 bytes_out = output_buffer_size - zlib_stream_.get()->avail_out; | |
155 | |
156 input_data_size -= bytes_used; | |
157 input_data += bytes_used; | |
158 | |
159 if (ret != Z_STREAM_END && ret != Z_OK) | |
160 return ERR_CONTENT_DECODING_FAILED; | |
Randy Smith (Not in Mondays)
2016/09/23 20:53:47
nit, suggestion: Is there a reason this isn't up a
xunjieli
2016/09/26 15:29:43
Done.
| |
161 if (ret == Z_STREAM_END) { | |
162 input_state_ = STATE_GZIP_FOOTER; | |
163 break; | |
164 } | |
165 *consumed_bytes = input_buffer_size - input_data_size; | |
166 return bytes_out; | |
167 } | |
168 case STATE_GZIP_FOOTER: { | |
169 size_t footer_bytes_skipped = NumGzipFooterBytesToSkip(input_data_size); | |
170 input_data_size -= footer_bytes_skipped; | |
171 input_data += footer_bytes_skipped; | |
172 input_state_ = STATE_UNCOMPRESSED_BODY; | |
Randy Smith (Not in Mondays)
2016/09/23 20:53:47
NumGzipFooterBytesToSkip() handles being called re
xunjieli
2016/09/26 15:29:43
Done. Good catch. This is a bug. The newly added S
| |
173 break; | |
174 } | |
175 case STATE_UNCOMPRESSED_BODY: { | |
176 int to_copy = std::min(input_data_size, output_buffer_size - bytes_out); | |
Randy Smith (Not in Mondays)
2016/09/23 20:53:47
Suggestion: I don't know if it's worth it, but if
xunjieli
2016/09/26 15:29:43
Acknowledged. The new code is less clustered. |out
| |
177 memcpy(output_buffer->data() + bytes_out, input_data, to_copy); | |
178 input_data_size -= to_copy; | |
179 input_data += to_copy; | |
180 *consumed_bytes = input_buffer_size - input_data_size; | |
181 return bytes_out + to_copy; | |
182 } | |
183 } | |
184 } | |
185 NOTREACHED(); | |
186 return ERR_UNEXPECTED; | |
187 } | |
188 | |
189 bool GzipSourceStream::InsertZlibHeader() { | |
190 char dummy_header[] = {0x78, 0x01}; | |
191 char dummy_output[4]; | |
192 | |
193 inflateReset(zlib_stream_.get()); | |
194 zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]); | |
195 zlib_stream_.get()->avail_in = sizeof(dummy_header); | |
196 zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]); | |
197 zlib_stream_.get()->avail_out = sizeof(dummy_output); | |
198 | |
199 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH); | |
200 return ret == Z_OK; | |
201 } | |
202 | |
203 // Dumb heuristic. Gzip files always start with a two-byte magic value per RFC | |
204 // 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and | |
205 // this filter is checking whether it should fallback, then fallback. | |
206 bool GzipSourceStream::ShouldFallbackToPlain(char first_byte) { | |
207 if (type() != TYPE_GZIP_FALLBACK) | |
208 return false; | |
209 static const char kGzipFirstByte = 0x1f; | |
210 return first_byte != kGzipFirstByte; | |
211 } | |
212 | |
213 size_t GzipSourceStream::NumGzipFooterBytesToSkip(int input_buffer_size) { | |
214 if (gzip_footer_bytes_left_ == 0) | |
215 return 0; | |
216 size_t to_read = gzip_footer_bytes_left_; | |
217 if (to_read > base::checked_cast<size_t>(input_buffer_size)) | |
218 to_read = input_buffer_size; | |
219 gzip_footer_bytes_left_ -= to_read; | |
220 return to_read; | |
221 } | |
222 | |
223 } // namespace net | |
OLD | NEW |