net/http/http_content_disposition.cc - Issue 992733002: Remove //net (except for Android test stuff) and sdch

Side by Side Diff: net/http/http_content_disposition.cc

Issue 992733002: Remove //net (except for Android test stuff) and sdch (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "net/http/http_content_disposition.h"

6

7 #include "base/base64.h"

8 #include "base/logging.h"

9 #include "base/strings/string_tokenizer.h"

10 #include "base/strings/string_util.h"

11 #include "base/strings/sys_string_conversions.h"

12 #include "base/strings/utf_string_conversions.h"

13 #include "net/base/net_string_util.h"

14 #include "net/base/net_util.h"

15 #include "net/http/http_util.h"

16

17 namespace {

18

19 enum RFC2047EncodingType {

20 Q_ENCODING,

21 B_ENCODING

22 };

23

24 // Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to

25 // decoding a quoted-printable string. Returns true if the input was valid.

26 bool DecodeQEncoding(const std::string& input, std::string* output) {

27 std::string temp;

28 temp.reserve(input.size());

29 for (std::string::const_iterator it = input.begin(); it != input.end();

30 ++it) {

31 if (*it == '_') {

32 temp.push_back(' ');

33 } else if (*it == '=') {

34 if ((input.end() - it < 3) \|\|

35 !IsHexDigit(static_cast<unsigned char>(*(it + 1))) \|\|

36 !IsHexDigit(static_cast<unsigned char>(*(it + 2))))

37 return false;

38 unsigned char ch = HexDigitToInt((it + 1)) 16 +

39 HexDigitToInt(*(it + 2));

40 temp.push_back(static_cast<char>(ch));

41 ++it;

42 ++it;

43 } else if (0x20 < it && it < 0x7F && *it != '?') {

44 // In a Q-encoded word, only printable ASCII characters

45 // represent themselves. Besides, space, '=', '_' and '?' are

46 // not allowed, but they're already filtered out.

47 DCHECK_NE('=', *it);

48 DCHECK_NE('?', *it);

49 DCHECK_NE('_', *it);

50 temp.push_back(*it);

51 } else {

52 return false;

53 }

54 }

55 output->swap(temp);

56 return true;

57 }

58

59 // Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding

60 // type is specified in \|enc_type\|.

61 bool DecodeBQEncoding(const std::string& part,

62 RFC2047EncodingType enc_type,

63 const std::string& charset,

64 std::string* output) {

65 std::string decoded;

66 if (!((enc_type == B_ENCODING) ?

67 base::Base64Decode(part, &decoded) : DecodeQEncoding(part, &decoded))) {

68 return false;

69 }

70

71 if (decoded.empty()) {

72 output->clear();

73 return true;

74 }

75

76 return net::ConvertToUtf8(decoded, charset.c_str(), output);

77 }

78

79 bool DecodeWord(const std::string& encoded_word,

80 const std::string& referrer_charset,

81 bool* is_rfc2047,

82 std::string* output,

83 int* parse_result_flags) {

84 *is_rfc2047 = false;

85 output->clear();

86 if (encoded_word.empty())

87 return true;

88

89 if (!base::IsStringASCII(encoded_word)) {

90 // Try UTF-8, referrer_charset and the native OS default charset in turn.

91 if (base::IsStringUTF8(encoded_word)) {

92 *output = encoded_word;

93 } else {

94 base::string16 utf16_output;

95 if (!referrer_charset.empty() &&

96 net::ConvertToUTF16(encoded_word, referrer_charset.c_str(),

97 &utf16_output)) {

98 *output = base::UTF16ToUTF8(utf16_output);

99 } else {

100 *output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word));

101 }

102 }

103

104 *parse_result_flags \|= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS;

105 return true;

106 }

107

108 // RFC 2047 : one of encoding methods supported by Firefox and relatively

109 // widely used by web servers.

110 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.

111 // We don't care about the length restriction (72 bytes) because

112 // many web servers generate encoded words longer than the limit.

113 std::string decoded_word;

114 *is_rfc2047 = true;

115 int part_index = 0;

116 std::string charset;

117 base::StringTokenizer t(encoded_word, "?");

118 RFC2047EncodingType enc_type = Q_ENCODING;

119 while (*is_rfc2047 && t.GetNext()) {

120 std::string part = t.token();

121 switch (part_index) {

122 case 0:

123 if (part != "=") {

124 *is_rfc2047 = false;

125 break;

126 }

127 ++part_index;

128 break;

129 case 1:

130 // Do we need charset validity check here?

131 charset = part;

132 ++part_index;

133 break;

134 case 2:

135 if (part.size() > 1 \|\|

136 part.find_first_of("bBqQ") == std::string::npos) {

137 *is_rfc2047 = false;

138 break;

139 }

140 if (part[0] == 'b' \|\| part[0] == 'B') {

141 enc_type = B_ENCODING;

142 }

143 ++part_index;

144 break;

145 case 3:

146 *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &decoded_word);

147 if (!*is_rfc2047) {

148 // Last minute failure. Invalid B/Q encoding. Rather than

149 // passing it through, return now.

150 return false;

151 }

152 ++part_index;

153 break;

154 case 4:

155 if (part != "=") {

156 // Another last minute failure !

157 // Likely to be a case of two encoded-words in a row or

158 // an encoded word followed by a non-encoded word. We can be

159 // generous, but it does not help much in terms of compatibility,

160 // I believe. Return immediately.

161 *is_rfc2047 = false;

162 return false;

163 }

164 ++part_index;

165 break;

166 default:

167 *is_rfc2047 = false;

168 return false;

169 }

170 }

171

172 if (*is_rfc2047) {

173 if (*(encoded_word.end() - 1) == '=') {

174 output->swap(decoded_word);

175 *parse_result_flags \|=

176 net::HttpContentDisposition::HAS_RFC2047_ENCODED_STRINGS;

177 return true;

178 }

179 // encoded_word ending prematurelly with '?' or extra '?'

180 *is_rfc2047 = false;

181 return false;

182 }

183

184 // We're not handling 'especial' characters quoted with '\', but

185 // it should be Ok because we're not an email client but a

186 // web browser.

187

188 // What IE6/7 does: %-escaped UTF-8.

189 decoded_word = net::UnescapeURLComponent(encoded_word,

190 net::UnescapeRule::SPACES);

191 if (decoded_word != encoded_word)

192 *parse_result_flags \|=

193 net::HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS;

194 if (base::IsStringUTF8(decoded_word)) {

195 output->swap(decoded_word);

196 return true;

197 // We can try either the OS default charset or 'origin charset' here,

198 // As far as I can tell, IE does not support it. However, I've seen

199 // web servers emit %-escaped string in a legacy encoding (usually

200 // origin charset).

201 // TODO(jungshik) : Test IE further and consider adding a fallback here.

202 }

203 return false;

204 }

205

206 // Decodes the value of a 'filename' or 'name' parameter given as \|input\|. The

207 // value is supposed to be of the form:

208 //

209 // value = token \| quoted-string

210 //

211 // However we currently also allow RFC 2047 encoding and non-ASCII

212 // strings. Non-ASCII strings are interpreted based on \|referrer_charset\|.

213 bool DecodeFilenameValue(const std::string& input,

214 const std::string& referrer_charset,

215 std::string* output,

216 int* parse_result_flags) {

217 int current_parse_result_flags = 0;

218 std::string decoded_value;

219 bool is_previous_token_rfc2047 = true;

220

221 // Tokenize with whitespace characters.

222 base::StringTokenizer t(input, " \t\n\r");

223 t.set_options(base::StringTokenizer::RETURN_DELIMS);

224 while (t.GetNext()) {

225 if (t.token_is_delim()) {

226 // If the previous non-delimeter token is not RFC2047-encoded,

227 // put in a space in its place. Otheriwse, skip over it.

228 if (!is_previous_token_rfc2047)

229 decoded_value.push_back(' ');

230 continue;

231 }

232 // We don't support a single multibyte character split into

233 // adjacent encoded words. Some broken mail clients emit headers

234 // with that problem, but most web servers usually encode a filename

235 // in a single encoded-word. Firefox/Thunderbird do not support

236 // it, either.

237 std::string decoded;

238 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,

239 &decoded, &current_parse_result_flags))

240 return false;

241 decoded_value.append(decoded);

242 }

243 output->swap(decoded_value);

244 if (parse_result_flags && !output->empty())

245 *parse_result_flags \|= current_parse_result_flags;

246 return true;

247 }

248

249 // Parses the charset and value-chars out of an ext-value string.

250 //

251 // ext-value = charset "'" [ language ] "'" value-chars

252 bool ParseExtValueComponents(const std::string& input,

253 std::string* charset,

254 std::string* value_chars) {

255 base::StringTokenizer t(input, "'");

256 t.set_options(base::StringTokenizer::RETURN_DELIMS);

257 std::string temp_charset;

258 std::string temp_value;

259 int numDelimsSeen = 0;

260 while (t.GetNext()) {

261 if (t.token_is_delim()) {

262 ++numDelimsSeen;

263 continue;

264 } else {

265 switch (numDelimsSeen) {

266 case 0:

267 temp_charset = t.token();

268 break;

269 case 1:

270 // Language is ignored.

271 break;

272 case 2:

273 temp_value = t.token();

274 break;

275 default:

276 return false;

277 }

278 }

279 }

280 if (numDelimsSeen != 2)

281 return false;

282 if (temp_charset.empty() \|\| temp_value.empty())

283 return false;

284 charset->swap(temp_charset);

285 value_chars->swap(temp_value);

286 return true;

287 }

288

289 // http://tools.ietf.org/html/rfc5987#section-3.2

290 //

291 // ext-value = charset "'" [ language ] "'" value-chars

292 //

293 // charset = "UTF-8" / "ISO-8859-1" / mime-charset

294 //

295 // mime-charset = 1*mime-charsetc

296 // mime-charsetc = ALPHA / DIGIT

297 // / "!" / "#" / "$" / "%" / "&"

298 // / "+" / "-" / "^" / "_" / "`"

299 // / "{" / "}" / "~"

300 //

301 // language = <Language-Tag, defined in [RFC5646], Section 2.1>

302 //

303 // value-chars = *( pct-encoded / attr-char )

304 //

305 // pct-encoded = "%" HEXDIG HEXDIG

306 //

307 // attr-char = ALPHA / DIGIT

308 // / "!" / "#" / "$" / "&" / "+" / "-" / "."

309 // / "^" / "_" / "`" / "\|" / "~"

310 bool DecodeExtValue(const std::string& param_value, std::string* decoded) {

311 if (param_value.find('"') != std::string::npos)

312 return false;

313

314 std::string charset;

315 std::string value;

316 if (!ParseExtValueComponents(param_value, &charset, &value))

317 return false;

318

319 // RFC 5987 value should be ASCII-only.

320 if (!base::IsStringASCII(value)) {

321 decoded->clear();

322 return true;

323 }

324

325 std::string unescaped = net::UnescapeURLComponent(

326 value, net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS);

327

328 return net::ConvertToUtf8AndNormalize(unescaped, charset.c_str(), decoded);

329 }

330

331 } // namespace

332

333 namespace net {

334

335 HttpContentDisposition::HttpContentDisposition(

336 const std::string& header, const std::string& referrer_charset)

337 : type_(INLINE),

338 parse_result_flags_(INVALID) {

339 Parse(header, referrer_charset);

340 }

341

342 HttpContentDisposition::~HttpContentDisposition() {

343 }

344

345 std::string::const_iterator HttpContentDisposition::ConsumeDispositionType(

346 std::string::const_iterator begin, std::string::const_iterator end) {

347 DCHECK(type_ == INLINE);

348 std::string::const_iterator delimiter = std::find(begin, end, ';');

349

350 std::string::const_iterator type_begin = begin;

351 std::string::const_iterator type_end = delimiter;

352 HttpUtil::TrimLWS(&type_begin, &type_end);

353

354 // If the disposition-type isn't a valid token the then the

355 // Content-Disposition header is malformed, and we treat the first bytes as

356 // a parameter rather than a disposition-type.

357 if (!HttpUtil::IsToken(type_begin, type_end))

358 return begin;

359

360 parse_result_flags_ \|= HAS_DISPOSITION_TYPE;

361

362 DCHECK(std::find(type_begin, type_end, '=') == type_end);

363

364 if (LowerCaseEqualsASCII(type_begin, type_end, "inline")) {

365 type_ = INLINE;

366 } else if (LowerCaseEqualsASCII(type_begin, type_end, "attachment")) {

367 type_ = ATTACHMENT;

368 } else {

369 parse_result_flags_ \|= HAS_UNKNOWN_DISPOSITION_TYPE;

370 type_ = ATTACHMENT;

371 }

372 return delimiter;

373 }

374

375 // http://tools.ietf.org/html/rfc6266

376 //

377 // content-disposition = "Content-Disposition" ":"

378 // disposition-type *( ";" disposition-parm )

379 //

380 // disposition-type = "inline" \| "attachment" \| disp-ext-type

381 // ; case-insensitive

382 // disp-ext-type = token

383 //

384 // disposition-parm = filename-parm \| disp-ext-parm

385 //

386 // filename-parm = "filename" "=" value

387 // \| "filename*" "=" ext-value

388 //

389 // disp-ext-parm = token "=" value

390 // \| ext-token "=" ext-value

391 // ext-token = <the characters in token, followed by "*">

392 //

393 void HttpContentDisposition::Parse(const std::string& header,

394 const std::string& referrer_charset) {

395 DCHECK(type_ == INLINE);

396 DCHECK(filename_.empty());

397

398 std::string::const_iterator pos = header.begin();

399 std::string::const_iterator end = header.end();

400 pos = ConsumeDispositionType(pos, end);

401

402 std::string name;

403 std::string filename;

404 std::string ext_filename;

405

406 HttpUtil::NameValuePairsIterator iter(pos, end, ';');

407 while (iter.GetNext()) {

408 if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(),

409 iter.name_end(),

410 "filename")) {

411 DecodeFilenameValue(iter.value(), referrer_charset, &filename,

412 &parse_result_flags_);

413 if (!filename.empty())

414 parse_result_flags_ \|= HAS_FILENAME;

415 } else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(),

416 iter.name_end(),

417 "name")) {

418 DecodeFilenameValue(iter.value(), referrer_charset, &name, NULL);

419 if (!name.empty())

420 parse_result_flags_ \|= HAS_NAME;

421 } else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(),

422 iter.name_end(),

423 "filename*")) {

424 DecodeExtValue(iter.raw_value(), &ext_filename);

425 if (!ext_filename.empty())

426 parse_result_flags_ \|= HAS_EXT_FILENAME;

427 }

428 }

429

430 if (!ext_filename.empty())

431 filename_ = ext_filename;

432 else if (!filename.empty())

433 filename_ = filename;

434 else

435 filename_ = name;

436 }

437

438 } // namespace net

OLD	NEW

« no previous file with comments | « net/http/http_content_disposition.h ('k') | net/http/http_content_disposition_unittest.cc » ('j') | no next file with comments »