OLD | NEW |
| (Empty) |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Derived from: | |
6 // mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp | |
7 // The license block is: | |
8 /* ***** BEGIN LICENSE BLOCK ***** | |
9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 | |
10 * | |
11 * The contents of this file are subject to the Mozilla Public License Version | |
12 * 1.1 (the "License"); you may not use this file except in compliance with | |
13 * the License. You may obtain a copy of the License at | |
14 * http://www.mozilla.org/MPL/ | |
15 * | |
16 * Software distributed under the License is distributed on an "AS IS" basis, | |
17 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License | |
18 * for the specific language governing rights and limitations under the | |
19 * License. | |
20 * | |
21 * The Original Code is Mozilla. | |
22 * | |
23 * The Initial Developer of the Original Code is | |
24 * Netscape Communications. | |
25 * Portions created by the Initial Developer are Copyright (C) 2001 | |
26 * the Initial Developer. All Rights Reserved. | |
27 * | |
28 * Contributor(s): | |
29 * Darin Fisher <darin@netscape.com> (original author) | |
30 * | |
31 * Alternatively, the contents of this file may be used under the terms of | |
32 * either the GNU General Public License Version 2 or later (the "GPL"), or | |
33 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), | |
34 * in which case the provisions of the GPL or the LGPL are applicable instead | |
35 * of those above. If you wish to allow use of your version of this file only | |
36 * under the terms of either the GPL or the LGPL, and not to allow others to | |
37 * use your version of this file under the terms of the MPL, indicate your | |
38 * decision by deleting the provisions above and replace them with the notice | |
39 * and other provisions required by the GPL or the LGPL. If you do not delete | |
40 * the provisions above, a recipient may use your version of this file under | |
41 * the terms of any one of the MPL, the GPL or the LGPL. | |
42 * | |
43 * ***** END LICENSE BLOCK ***** */ | |
44 | |
45 #include "net/http/http_chunked_decoder.h" | |
46 | |
47 #include <algorithm> | |
48 | |
49 #include "base/logging.h" | |
50 #include "base/strings/string_number_conversions.h" | |
51 #include "base/strings/string_piece.h" | |
52 #include "base/strings/string_util.h" | |
53 #include "net/base/net_errors.h" | |
54 | |
55 namespace net { | |
56 | |
57 // Absurdly long size to avoid imposing a constraint on chunked encoding | |
58 // extensions. | |
59 const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384; | |
60 | |
61 HttpChunkedDecoder::HttpChunkedDecoder() | |
62 : chunk_remaining_(0), | |
63 chunk_terminator_remaining_(false), | |
64 reached_last_chunk_(false), | |
65 reached_eof_(false), | |
66 bytes_after_eof_(0) { | |
67 } | |
68 | |
69 int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) { | |
70 int result = 0; | |
71 | |
72 while (buf_len) { | |
73 if (chunk_remaining_) { | |
74 int num = std::min(chunk_remaining_, buf_len); | |
75 | |
76 buf_len -= num; | |
77 chunk_remaining_ -= num; | |
78 | |
79 result += num; | |
80 buf += num; | |
81 | |
82 // After each chunk's data there should be a CRLF | |
83 if (!chunk_remaining_) | |
84 chunk_terminator_remaining_ = true; | |
85 continue; | |
86 } else if (reached_eof_) { | |
87 bytes_after_eof_ += buf_len; | |
88 break; // Done! | |
89 } | |
90 | |
91 int bytes_consumed = ScanForChunkRemaining(buf, buf_len); | |
92 if (bytes_consumed < 0) | |
93 return bytes_consumed; // Error | |
94 | |
95 buf_len -= bytes_consumed; | |
96 if (buf_len) | |
97 memmove(buf, buf + bytes_consumed, buf_len); | |
98 } | |
99 | |
100 return result; | |
101 } | |
102 | |
103 int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) { | |
104 DCHECK_EQ(0, chunk_remaining_); | |
105 DCHECK_GT(buf_len, 0); | |
106 | |
107 int bytes_consumed = 0; | |
108 | |
109 size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n'); | |
110 if (index_of_lf != base::StringPiece::npos) { | |
111 buf_len = static_cast<int>(index_of_lf); | |
112 if (buf_len && buf[buf_len - 1] == '\r') // Eliminate a preceding CR. | |
113 buf_len--; | |
114 bytes_consumed = static_cast<int>(index_of_lf) + 1; | |
115 | |
116 // Make buf point to the full line buffer to parse. | |
117 if (!line_buf_.empty()) { | |
118 line_buf_.append(buf, buf_len); | |
119 buf = line_buf_.data(); | |
120 buf_len = static_cast<int>(line_buf_.size()); | |
121 } | |
122 | |
123 if (reached_last_chunk_) { | |
124 if (buf_len) | |
125 DVLOG(1) << "ignoring http trailer"; | |
126 else | |
127 reached_eof_ = true; | |
128 } else if (chunk_terminator_remaining_) { | |
129 if (buf_len) { | |
130 DLOG(ERROR) << "chunk data not terminated properly"; | |
131 return ERR_INVALID_CHUNKED_ENCODING; | |
132 } | |
133 chunk_terminator_remaining_ = false; | |
134 } else if (buf_len) { | |
135 // Ignore any chunk-extensions. | |
136 size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';'); | |
137 if (index_of_semicolon != base::StringPiece::npos) | |
138 buf_len = static_cast<int>(index_of_semicolon); | |
139 | |
140 if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) { | |
141 DLOG(ERROR) << "Failed parsing HEX from: " << | |
142 std::string(buf, buf_len); | |
143 return ERR_INVALID_CHUNKED_ENCODING; | |
144 } | |
145 | |
146 if (chunk_remaining_ == 0) | |
147 reached_last_chunk_ = true; | |
148 } else { | |
149 DLOG(ERROR) << "missing chunk-size"; | |
150 return ERR_INVALID_CHUNKED_ENCODING; | |
151 } | |
152 line_buf_.clear(); | |
153 } else { | |
154 // Save the partial line; wait for more data. | |
155 bytes_consumed = buf_len; | |
156 | |
157 // Ignore a trailing CR | |
158 if (buf[buf_len - 1] == '\r') | |
159 buf_len--; | |
160 | |
161 if (line_buf_.length() + buf_len > kMaxLineBufLen) { | |
162 DLOG(ERROR) << "Chunked line length too long"; | |
163 return ERR_INVALID_CHUNKED_ENCODING; | |
164 } | |
165 | |
166 line_buf_.append(buf, buf_len); | |
167 } | |
168 return bytes_consumed; | |
169 } | |
170 | |
171 | |
172 // While the HTTP 1.1 specification defines chunk-size as 1*HEX | |
173 // some sites rely on more lenient parsing. | |
174 // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces | |
175 // (0x20) to be 7 characters long, such as "819b ". | |
176 // | |
177 // A comparison of browsers running on WindowsXP shows that | |
178 // they will parse the following inputs (egrep syntax): | |
179 // | |
180 // Let \X be the character class for a hex digit: [0-9a-fA-F] | |
181 // | |
182 // RFC 2616: ^\X+$ | |
183 // IE7: ^\X+[^\X]*$ | |
184 // Safari 3.1: ^[\t\r ]*\X+[\t ]*$ | |
185 // Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$ | |
186 // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$ | |
187 // | |
188 // Our strategy is to be as strict as possible, while not breaking | |
189 // known sites. | |
190 // | |
191 // Us: ^\X+[ ]*$ | |
192 bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) { | |
193 DCHECK_GE(len, 0); | |
194 | |
195 // Strip trailing spaces | |
196 while (len && start[len - 1] == ' ') | |
197 len--; | |
198 | |
199 // Be more restrictive than HexStringToInt; | |
200 // don't allow inputs with leading "-", "+", "0x", "0X" | |
201 base::StringPiece chunk_size(start, len); | |
202 if (chunk_size.find_first_not_of("0123456789abcdefABCDEF") | |
203 != base::StringPiece::npos) { | |
204 return false; | |
205 } | |
206 | |
207 int parsed_number; | |
208 bool ok = base::HexStringToInt(chunk_size, &parsed_number); | |
209 if (ok && parsed_number >= 0) { | |
210 *out = parsed_number; | |
211 return true; | |
212 } | |
213 return false; | |
214 } | |
215 | |
216 } // namespace net | |
OLD | NEW |