Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(56)

Side by Side Diff: net/filter/gzip_source_stream.cc

Issue 2334773002: Add net::GzipSourceStream (Closed)
Patch Set: Address Randy's comments (synced to 92104a0503b2862f54d60473b59dd1ae145eb22b) Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/filter/gzip_source_stream.h"
6
7 #include "base/bind.h"
8 #include "base/bit_cast.h"
9 #include "base/logging.h"
10 #include "net/base/io_buffer.h"
11 #include "third_party/zlib/zlib.h"
12
13 namespace net {
14
15 namespace {
16
17 const char kDeflate[] = "DEFLATE";
18 const char kGzip[] = "GZIP";
19 const char kGzipFallback[] = "GZIP_FALLBACK";
20
21 } // namespace
22
23 GzipSourceStream::~GzipSourceStream() {
24 if (zlib_stream_)
25 inflateEnd(zlib_stream_.get());
26 }
27
28 std::unique_ptr<GzipSourceStream> GzipSourceStream::Create(
29 std::unique_ptr<SourceStream> upstream,
30 SourceStream::SourceType type) {
31 std::unique_ptr<GzipSourceStream> source(
32 new GzipSourceStream(std::move(upstream), type));
33
34 if (!source->Init())
35 return nullptr;
36 return source;
37 }
38
39 GzipSourceStream::GzipSourceStream(std::unique_ptr<SourceStream> upstream,
40 SourceStream::SourceType type)
41 : FilterSourceStream(type, std::move(upstream)),
42 zlib_header_added_(false),
43 gzip_footer_bytes_left_(0),
44 input_state_(STATE_START) {}
45
46 bool GzipSourceStream::Init() {
47 zlib_stream_.reset(new z_stream);
48 if (!zlib_stream_)
49 return false;
50 memset(zlib_stream_.get(), 0, sizeof(z_stream));
51
52 int ret;
53 if (type() == TYPE_GZIP || type() == TYPE_GZIP_FALLBACK) {
54 ret = inflateInit2(zlib_stream_.get(), -MAX_WBITS);
55 } else {
56 ret = inflateInit(zlib_stream_.get());
57 }
58 DCHECK_NE(Z_VERSION_ERROR, ret);
59 return ret == Z_OK;
60 }
61
62 std::string GzipSourceStream::GetTypeAsString() const {
63 switch (type()) {
64 case TYPE_GZIP:
65 return kGzip;
66 case TYPE_GZIP_FALLBACK:
67 return kGzipFallback;
68 case TYPE_DEFLATE:
69 return kDeflate;
70 default:
71 NOTREACHED();
72 return "";
73 }
74 }
75
76 int GzipSourceStream::FilterData(IOBuffer* output_buffer,
77 int output_buffer_size,
78 IOBuffer* input_buffer,
79 int input_buffer_size,
80 int* consumed_bytes,
81 bool /*upstream_end_reached*/) {
82 if (input_buffer_size == 0)
83 return 0;
84 *consumed_bytes = 0;
85 char* input_data = input_buffer->data();
86 int input_data_size = input_buffer_size;
87 int bytes_out = 0;
88 while (true) {
89 InputState state = input_state_;
90 switch (state) {
91 case STATE_START: {
92 if (type() == TYPE_DEFLATE) {
93 input_state_ = STATE_COMPRESSED_BODY;
94 break;
95 }
96 // If this stream is not really gzipped as detected by
97 // ShouldFallbackToPlain, pretend that the zlib stream has ended.
98 if (ShouldFallbackToPlain(input_data[0])) {
Randy Smith (Not in Mondays) 2016/09/23 20:53:47 Suggestion: I'll call out that this is assuming th
xunjieli 2016/09/26 15:29:43 Done. I added a DCHECK.
99 input_state_ = STATE_UNCOMPRESSED_BODY;
100 } else {
101 input_state_ = STATE_GZIP_HEADER;
102 }
103 break;
104 }
105 case STATE_GZIP_HEADER: {
106 const size_t kGzipFooterBytes = 8;
107 const char* end = nullptr;
108 GZipHeader::Status status =
109 gzip_header_.ReadMore(input_data, input_data_size, &end);
110 if (status == GZipHeader::INCOMPLETE_HEADER) {
111 input_data += input_data_size;
112 input_data_size = 0;
113 } else if (status == GZipHeader::COMPLETE_HEADER) {
114 // If there is a valid header, there should also be a valid footer.
115 gzip_footer_bytes_left_ = kGzipFooterBytes;
116 int bytes_consumed = end - input_data;
117 input_data += bytes_consumed;
118 input_data_size -= bytes_consumed;
Randy Smith (Not in Mondays) 2016/09/23 20:53:47 If this drops input_data_size to zero, we want to
xunjieli 2016/09/26 15:29:43 Done. This is a bug! Thanks for catching it. There
119 input_state_ = STATE_COMPRESSED_BODY;
120 } else if (status == GZipHeader::INVALID_HEADER) {
121 return ERR_CONTENT_DECODING_FAILED;
122 }
123 break;
124 }
125 case STATE_COMPRESSED_BODY: {
126 DCHECK_LE(0, input_data_size);
127 zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data);
128 zlib_stream_.get()->avail_in = input_data_size;
129 zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data());
130 zlib_stream_.get()->avail_out = output_buffer_size;
Randy Smith (Not in Mondays) 2016/09/23 20:53:47 Suggestion: The fact that {next,avail}_out are bei
xunjieli 2016/09/26 15:29:43 Done.
131
132 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
133
134 // Sometimes misconfigured servers omit the zlib header, relying on
135 // clients to splice it back in.
136 if (ret < 0 && !zlib_header_added_) {
137 zlib_header_added_ = true;
138 if (!InsertZlibHeader())
139 return ERR_CONTENT_DECODING_FAILED;
140
141 zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data);
142 zlib_stream_.get()->avail_in = input_data_size;
143 zlib_stream_.get()->next_out =
144 bit_cast<Bytef*>(output_buffer->data());
145 zlib_stream_.get()->avail_out = output_buffer_size;
146
147 ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
148 // TODO(xunjieli): add a histogram to see how often this happens. The
149 // original bug for this behavior was ancient and maybe it doesn't
150 // happen in the wild any more? crbug.com/649339
151 }
152
153 int bytes_used = input_data_size - zlib_stream_.get()->avail_in;
154 bytes_out = output_buffer_size - zlib_stream_.get()->avail_out;
155
156 input_data_size -= bytes_used;
157 input_data += bytes_used;
158
159 if (ret != Z_STREAM_END && ret != Z_OK)
160 return ERR_CONTENT_DECODING_FAILED;
Randy Smith (Not in Mondays) 2016/09/23 20:53:47 nit, suggestion: Is there a reason this isn't up a
xunjieli 2016/09/26 15:29:43 Done.
161 if (ret == Z_STREAM_END) {
162 input_state_ = STATE_GZIP_FOOTER;
163 break;
164 }
165 *consumed_bytes = input_buffer_size - input_data_size;
166 return bytes_out;
167 }
168 case STATE_GZIP_FOOTER: {
169 size_t footer_bytes_skipped = NumGzipFooterBytesToSkip(input_data_size);
170 input_data_size -= footer_bytes_skipped;
171 input_data += footer_bytes_skipped;
172 input_state_ = STATE_UNCOMPRESSED_BODY;
Randy Smith (Not in Mondays) 2016/09/23 20:53:47 NumGzipFooterBytesToSkip() handles being called re
xunjieli 2016/09/26 15:29:43 Done. Good catch. This is a bug. The newly added S
173 break;
174 }
175 case STATE_UNCOMPRESSED_BODY: {
176 int to_copy = std::min(input_data_size, output_buffer_size - bytes_out);
Randy Smith (Not in Mondays) 2016/09/23 20:53:47 Suggestion: I don't know if it's worth it, but if
xunjieli 2016/09/26 15:29:43 Acknowledged. The new code is less clustered. |out
177 memcpy(output_buffer->data() + bytes_out, input_data, to_copy);
178 input_data_size -= to_copy;
179 input_data += to_copy;
180 *consumed_bytes = input_buffer_size - input_data_size;
181 return bytes_out + to_copy;
182 }
183 }
184 }
185 NOTREACHED();
186 return ERR_UNEXPECTED;
187 }
188
189 bool GzipSourceStream::InsertZlibHeader() {
190 char dummy_header[] = {0x78, 0x01};
191 char dummy_output[4];
192
193 inflateReset(zlib_stream_.get());
194 zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]);
195 zlib_stream_.get()->avail_in = sizeof(dummy_header);
196 zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]);
197 zlib_stream_.get()->avail_out = sizeof(dummy_output);
198
199 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
200 return ret == Z_OK;
201 }
202
203 // Dumb heuristic. Gzip files always start with a two-byte magic value per RFC
204 // 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and
205 // this filter is checking whether it should fallback, then fallback.
206 bool GzipSourceStream::ShouldFallbackToPlain(char first_byte) {
207 if (type() != TYPE_GZIP_FALLBACK)
208 return false;
209 static const char kGzipFirstByte = 0x1f;
210 return first_byte != kGzipFirstByte;
211 }
212
213 size_t GzipSourceStream::NumGzipFooterBytesToSkip(int input_buffer_size) {
214 if (gzip_footer_bytes_left_ == 0)
215 return 0;
216 size_t to_read = gzip_footer_bytes_left_;
217 if (to_read > base::checked_cast<size_t>(input_buffer_size))
218 to_read = input_buffer_size;
219 gzip_footer_bytes_left_ -= to_read;
220 return to_read;
221 }
222
223 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698