Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(259)

Side by Side Diff: net/filter/gzip_source_stream.cc

Issue 2334773002: Add net::GzipSourceStream (Closed)
Patch Set: Addressed Randy's comments and synced to r419441 Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/filter/gzip_source_stream.h"
6
7 #include "base/bind.h"
8 #include "base/bit_cast.h"
9 #include "base/logging.h"
10 #include "net/base/io_buffer.h"
11 #include "third_party/zlib/zlib.h"
12
13 namespace net {
14
15 namespace {
16
17 const char kDeflate[] = "DEFLATE";
18 const char kGzip[] = "GZIP";
19 const char kGzipFallback[] = "GZIP_FALLBACK";
20
21 } // namespace
22
23 GzipSourceStream::~GzipSourceStream() {
24 if (zlib_stream_)
25 inflateEnd(zlib_stream_.get());
26 }
27
28 std::unique_ptr<GzipSourceStream> GzipSourceStream::Create(
29 std::unique_ptr<SourceStream> upstream,
30 SourceStream::SourceType type) {
31 std::unique_ptr<GzipSourceStream> source(
32 new GzipSourceStream(std::move(upstream), type));
33
34 if (!source->Init())
35 return nullptr;
36 return source;
37 }
38
39 GzipSourceStream::GzipSourceStream(std::unique_ptr<SourceStream> upstream,
40 SourceStream::SourceType type)
41 : FilterSourceStream(type, std::move(upstream)),
42 zlib_header_added_(false),
43 gzip_footer_bytes_left_(0),
44 input_state_(STATE_START) {}
45
46 bool GzipSourceStream::Init() {
47 zlib_stream_.reset(new z_stream);
48 if (!zlib_stream_)
49 return false;
50 memset(zlib_stream_.get(), 0, sizeof(z_stream));
51
52 int ret;
53 if (type() == TYPE_GZIP || type() == TYPE_GZIP_FALLBACK) {
54 ret = inflateInit2(zlib_stream_.get(), -MAX_WBITS);
55 } else {
56 ret = inflateInit(zlib_stream_.get());
57 }
58 DCHECK_NE(Z_VERSION_ERROR, ret);
59 return ret == Z_OK;
60 }
61
62 std::string GzipSourceStream::GetTypeAsString() const {
63 switch (type()) {
64 case TYPE_GZIP:
65 return kGzip;
66 case TYPE_GZIP_FALLBACK:
67 return kGzipFallback;
68 case TYPE_DEFLATE:
69 return kDeflate;
70 default:
71 NOTREACHED();
72 return "";
73 }
74 }
75
76 int GzipSourceStream::FilterData(IOBuffer* output_buffer,
77 int output_buffer_size,
78 IOBuffer* input_buffer,
79 int input_buffer_size,
80 int* consumed_bytes,
81 bool /*upstream_end_reached*/) {
82 if (input_buffer_size == 0)
83 return 0;
84 *consumed_bytes = 0;
85 int bytes_out = 0;
86 int bytes_used = 0;
87 while (true) {
88 InputState state = input_state_;
89 switch (state) {
90 case STATE_START:
91 if (type() == TYPE_DEFLATE) {
92 input_state_ = STATE_COMPRESSED_BODY;
93 break;
94 }
95 // If this stream is not really gzipped as detected by
96 // ShouldFallbackToPlain, pretend that the zlib stream has ended.
97 if (ShouldFallbackToPlain(input_buffer->data()[0])) {
98 input_state_ = STATE_UNCOMPRESSED_BODY;
99 } else {
100 input_state_ = STATE_GZIP_HEADER;
101 }
102 break;
103 case STATE_GZIP_HEADER: {
104 const size_t kGzipFooterBytes = 8;
105 const char* end = nullptr;
106 GZipHeader::Status status = gzip_header_.ReadMore(
107 input_buffer->data(), input_buffer_size, &end);
108 if (status == GZipHeader::INCOMPLETE_HEADER) {
109 *consumed_bytes += input_buffer_size;
110 } else if (status == GZipHeader::COMPLETE_HEADER) {
111 // If there is a valid header, there should also be a valid footer.
112 gzip_footer_bytes_left_ = kGzipFooterBytes;
113 *consumed_bytes += end - input_buffer->data();
114 input_state_ = STATE_COMPRESSED_BODY;
115 } else if (status == GZipHeader::INVALID_HEADER) {
116 return ERR_CONTENT_DECODING_FAILED;
117 }
118 break;
119 }
120 case STATE_COMPRESSED_BODY: {
121 DCHECK_LE(0, *consumed_bytes);
122 zlib_stream_.get()->next_in =
123 bit_cast<Bytef*>(input_buffer->data() + *consumed_bytes);
124 zlib_stream_.get()->avail_in = input_buffer_size - *consumed_bytes;
Randy Smith (Not in Mondays) 2016/09/21 20:57:05 Thought (i.e. not even a suggestion, just tossing
xunjieli 2016/09/22 17:20:49 Done.
125 zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data());
126 zlib_stream_.get()->avail_out = output_buffer_size;
127
128 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
129
130 // Sometime misconfigured servers omit the zlib header, relying on
131 // clients to splice it back in.
132 if (ret < 0 && !zlib_header_added_) {
133 zlib_header_added_ = true;
134 if (!InsertZlibHeader())
135 return ERR_CONTENT_DECODING_FAILED;
136
137 zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_buffer->data());
138 zlib_stream_.get()->avail_in = input_buffer_size;
139 zlib_stream_.get()->next_out =
140 bit_cast<Bytef*>(output_buffer->data());
141 zlib_stream_.get()->avail_out = output_buffer_size;
142
143 ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
144 // TODO(xunjieli): add a histogram to see how often this happens. The
145 // original bug for this behavior was ancient and maybe it doesn't
146 // happen in the wild any more?
147 }
148
149 bytes_used =
150 input_buffer_size - *consumed_bytes - zlib_stream_.get()->avail_in;
151 bytes_out = output_buffer_size - zlib_stream_.get()->avail_out;
152
153 *consumed_bytes += base::checked_cast<int>(bytes_used);
154
155 if (ret != Z_STREAM_END && ret != Z_OK)
156 return ERR_CONTENT_DECODING_FAILED;
157 DCHECK_LE(*consumed_bytes, input_buffer_size);
158 if (*consumed_bytes == input_buffer_size)
159 return bytes_out;
Randy Smith (Not in Mondays) 2016/09/21 20:57:06 I'm confused. Above here there is code to (IIUC)
xunjieli 2016/09/22 17:20:49 Done. Thanks for catching that! That's bug in my c
160 // Pass any futher data uncompressed.
161 input_state_ = STATE_UNCOMPRESSED_BODY;
162 break;
163 }
164 case STATE_UNCOMPRESSED_BODY:
165 return bytes_out + Passthrough(output_buffer->data() + bytes_out,
166 output_buffer_size - bytes_out,
167 input_buffer->data() + *consumed_bytes,
168 input_buffer_size - *consumed_bytes,
169 consumed_bytes);
170 }
171 }
172 NOTREACHED();
173 return ERR_UNEXPECTED;
174 }
175
176 size_t GzipSourceStream::Passthrough(char* output_buffer,
177 int output_buffer_size,
178 char* input_buffer,
179 int input_buffer_size,
180 int* consumed_bytes) {
181 size_t footer_bytes_skipped = NumGzipFooterBytesToSkip(input_buffer_size);
Randy Smith (Not in Mondays) 2016/09/21 20:57:06 Why not implement this with a separate state? It
xunjieli 2016/09/22 17:20:49 Done.
182 size_t to_copy = input_buffer_size - footer_bytes_skipped;
183 if (to_copy > base::checked_cast<size_t>(output_buffer_size))
184 to_copy = output_buffer_size;
185 memcpy(output_buffer, input_buffer, to_copy);
186 *consumed_bytes += to_copy + footer_bytes_skipped;
187 return to_copy;
188 }
189
190 bool GzipSourceStream::InsertZlibHeader() {
191 char dummy_header[] = {0x78, 0x01};
192 char dummy_output[4];
193
194 inflateReset(zlib_stream_.get());
195 zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]);
196 zlib_stream_.get()->avail_in = sizeof(dummy_header);
197 zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]);
198 zlib_stream_.get()->avail_out = sizeof(dummy_output);
199
200 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
201 return ret == Z_OK;
202 }
203
204 // Dumb heuristic. Gzip files always start with a two-byte magic value per RFC
205 // 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and
206 // this filter is checking whether it should fallback, then fallback.
207 bool GzipSourceStream::ShouldFallbackToPlain(char first_byte) {
208 if (type() != TYPE_GZIP_FALLBACK)
209 return false;
210 static const char kGzipFirstByte = 0x1f;
211 return first_byte != kGzipFirstByte;
212 }
213
214 size_t GzipSourceStream::NumGzipFooterBytesToSkip(int input_buffer_size) {
215 if (gzip_footer_bytes_left_ == 0)
216 return 0;
217 size_t to_read = gzip_footer_bytes_left_;
218 if (to_read > base::checked_cast<size_t>(input_buffer_size))
219 to_read = input_buffer_size;
220 gzip_footer_bytes_left_ -= to_read;
221 return to_read;
222 }
223
224 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698