| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // Derived from: | |
| 6 // mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp | |
| 7 // The license block is: | |
| 8 /* ***** BEGIN LICENSE BLOCK ***** | |
| 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 | |
| 10 * | |
| 11 * The contents of this file are subject to the Mozilla Public License Version | |
| 12 * 1.1 (the "License"); you may not use this file except in compliance with | |
| 13 * the License. You may obtain a copy of the License at | |
| 14 * http://www.mozilla.org/MPL/ | |
| 15 * | |
| 16 * Software distributed under the License is distributed on an "AS IS" basis, | |
| 17 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License | |
| 18 * for the specific language governing rights and limitations under the | |
| 19 * License. | |
| 20 * | |
| 21 * The Original Code is Mozilla. | |
| 22 * | |
| 23 * The Initial Developer of the Original Code is | |
| 24 * Netscape Communications. | |
| 25 * Portions created by the Initial Developer are Copyright (C) 2001 | |
| 26 * the Initial Developer. All Rights Reserved. | |
| 27 * | |
| 28 * Contributor(s): | |
| 29 * Darin Fisher <darin@netscape.com> (original author) | |
| 30 * | |
| 31 * Alternatively, the contents of this file may be used under the terms of | |
| 32 * either the GNU General Public License Version 2 or later (the "GPL"), or | |
| 33 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), | |
| 34 * in which case the provisions of the GPL or the LGPL are applicable instead | |
| 35 * of those above. If you wish to allow use of your version of this file only | |
| 36 * under the terms of either the GPL or the LGPL, and not to allow others to | |
| 37 * use your version of this file under the terms of the MPL, indicate your | |
| 38 * decision by deleting the provisions above and replace them with the notice | |
| 39 * and other provisions required by the GPL or the LGPL. If you do not delete | |
| 40 * the provisions above, a recipient may use your version of this file under | |
| 41 * the terms of any one of the MPL, the GPL or the LGPL. | |
| 42 * | |
| 43 * ***** END LICENSE BLOCK ***** */ | |
| 44 | |
| 45 #include "net/http/http_chunked_decoder.h" | |
| 46 | |
| 47 #include <algorithm> | |
| 48 | |
| 49 #include "base/logging.h" | |
| 50 #include "base/strings/string_number_conversions.h" | |
| 51 #include "base/strings/string_piece.h" | |
| 52 #include "base/strings/string_util.h" | |
| 53 #include "net/base/net_errors.h" | |
| 54 | |
| 55 namespace net { | |
| 56 | |
| 57 // Absurdly long size to avoid imposing a constraint on chunked encoding | |
| 58 // extensions. | |
| 59 const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384; | |
| 60 | |
| 61 HttpChunkedDecoder::HttpChunkedDecoder() | |
| 62 : chunk_remaining_(0), | |
| 63 chunk_terminator_remaining_(false), | |
| 64 reached_last_chunk_(false), | |
| 65 reached_eof_(false), | |
| 66 bytes_after_eof_(0) { | |
| 67 } | |
| 68 | |
| 69 int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) { | |
| 70 int result = 0; | |
| 71 | |
| 72 while (buf_len) { | |
| 73 if (chunk_remaining_) { | |
| 74 int num = std::min(chunk_remaining_, buf_len); | |
| 75 | |
| 76 buf_len -= num; | |
| 77 chunk_remaining_ -= num; | |
| 78 | |
| 79 result += num; | |
| 80 buf += num; | |
| 81 | |
| 82 // After each chunk's data there should be a CRLF | |
| 83 if (!chunk_remaining_) | |
| 84 chunk_terminator_remaining_ = true; | |
| 85 continue; | |
| 86 } else if (reached_eof_) { | |
| 87 bytes_after_eof_ += buf_len; | |
| 88 break; // Done! | |
| 89 } | |
| 90 | |
| 91 int bytes_consumed = ScanForChunkRemaining(buf, buf_len); | |
| 92 if (bytes_consumed < 0) | |
| 93 return bytes_consumed; // Error | |
| 94 | |
| 95 buf_len -= bytes_consumed; | |
| 96 if (buf_len) | |
| 97 memmove(buf, buf + bytes_consumed, buf_len); | |
| 98 } | |
| 99 | |
| 100 return result; | |
| 101 } | |
| 102 | |
| 103 int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) { | |
| 104 DCHECK_EQ(0, chunk_remaining_); | |
| 105 DCHECK_GT(buf_len, 0); | |
| 106 | |
| 107 int bytes_consumed = 0; | |
| 108 | |
| 109 size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n'); | |
| 110 if (index_of_lf != base::StringPiece::npos) { | |
| 111 buf_len = static_cast<int>(index_of_lf); | |
| 112 if (buf_len && buf[buf_len - 1] == '\r') // Eliminate a preceding CR. | |
| 113 buf_len--; | |
| 114 bytes_consumed = static_cast<int>(index_of_lf) + 1; | |
| 115 | |
| 116 // Make buf point to the full line buffer to parse. | |
| 117 if (!line_buf_.empty()) { | |
| 118 line_buf_.append(buf, buf_len); | |
| 119 buf = line_buf_.data(); | |
| 120 buf_len = static_cast<int>(line_buf_.size()); | |
| 121 } | |
| 122 | |
| 123 if (reached_last_chunk_) { | |
| 124 if (buf_len) | |
| 125 DVLOG(1) << "ignoring http trailer"; | |
| 126 else | |
| 127 reached_eof_ = true; | |
| 128 } else if (chunk_terminator_remaining_) { | |
| 129 if (buf_len) { | |
| 130 DLOG(ERROR) << "chunk data not terminated properly"; | |
| 131 return ERR_INVALID_CHUNKED_ENCODING; | |
| 132 } | |
| 133 chunk_terminator_remaining_ = false; | |
| 134 } else if (buf_len) { | |
| 135 // Ignore any chunk-extensions. | |
| 136 size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';'); | |
| 137 if (index_of_semicolon != base::StringPiece::npos) | |
| 138 buf_len = static_cast<int>(index_of_semicolon); | |
| 139 | |
| 140 if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) { | |
| 141 DLOG(ERROR) << "Failed parsing HEX from: " << | |
| 142 std::string(buf, buf_len); | |
| 143 return ERR_INVALID_CHUNKED_ENCODING; | |
| 144 } | |
| 145 | |
| 146 if (chunk_remaining_ == 0) | |
| 147 reached_last_chunk_ = true; | |
| 148 } else { | |
| 149 DLOG(ERROR) << "missing chunk-size"; | |
| 150 return ERR_INVALID_CHUNKED_ENCODING; | |
| 151 } | |
| 152 line_buf_.clear(); | |
| 153 } else { | |
| 154 // Save the partial line; wait for more data. | |
| 155 bytes_consumed = buf_len; | |
| 156 | |
| 157 // Ignore a trailing CR | |
| 158 if (buf[buf_len - 1] == '\r') | |
| 159 buf_len--; | |
| 160 | |
| 161 if (line_buf_.length() + buf_len > kMaxLineBufLen) { | |
| 162 DLOG(ERROR) << "Chunked line length too long"; | |
| 163 return ERR_INVALID_CHUNKED_ENCODING; | |
| 164 } | |
| 165 | |
| 166 line_buf_.append(buf, buf_len); | |
| 167 } | |
| 168 return bytes_consumed; | |
| 169 } | |
| 170 | |
| 171 | |
| 172 // While the HTTP 1.1 specification defines chunk-size as 1*HEX | |
| 173 // some sites rely on more lenient parsing. | |
| 174 // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces | |
| 175 // (0x20) to be 7 characters long, such as "819b ". | |
| 176 // | |
| 177 // A comparison of browsers running on WindowsXP shows that | |
| 178 // they will parse the following inputs (egrep syntax): | |
| 179 // | |
| 180 // Let \X be the character class for a hex digit: [0-9a-fA-F] | |
| 181 // | |
| 182 // RFC 2616: ^\X+$ | |
| 183 // IE7: ^\X+[^\X]*$ | |
| 184 // Safari 3.1: ^[\t\r ]*\X+[\t ]*$ | |
| 185 // Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$ | |
| 186 // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$ | |
| 187 // | |
| 188 // Our strategy is to be as strict as possible, while not breaking | |
| 189 // known sites. | |
| 190 // | |
| 191 // Us: ^\X+[ ]*$ | |
| 192 bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) { | |
| 193 DCHECK_GE(len, 0); | |
| 194 | |
| 195 // Strip trailing spaces | |
| 196 while (len && start[len - 1] == ' ') | |
| 197 len--; | |
| 198 | |
| 199 // Be more restrictive than HexStringToInt; | |
| 200 // don't allow inputs with leading "-", "+", "0x", "0X" | |
| 201 base::StringPiece chunk_size(start, len); | |
| 202 if (chunk_size.find_first_not_of("0123456789abcdefABCDEF") | |
| 203 != base::StringPiece::npos) { | |
| 204 return false; | |
| 205 } | |
| 206 | |
| 207 int parsed_number; | |
| 208 bool ok = base::HexStringToInt(chunk_size, &parsed_number); | |
| 209 if (ok && parsed_number >= 0) { | |
| 210 *out = parsed_number; | |
| 211 return true; | |
| 212 } | |
| 213 return false; | |
| 214 } | |
| 215 | |
| 216 } // namespace net | |
| OLD | NEW |