OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/logging.h" | 5 #include "base/logging.h" |
6 #include "url/url_canon.h" | 6 #include "url/url_canon.h" |
7 #include "url/url_canon_internal.h" | 7 #include "url/url_canon_internal.h" |
8 #include "url/url_parse_internal.h" | 8 #include "url/url_parse_internal.h" |
9 | 9 |
10 namespace url { | 10 namespace url { |
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
238 | 238 |
239 } else if (out_ch == '\\') { | 239 } else if (out_ch == '\\') { |
240 // Convert backslashes to forward slashes | 240 // Convert backslashes to forward slashes |
241 output->push_back('/'); | 241 output->push_back('/'); |
242 | 242 |
243 } else if (out_ch == '%') { | 243 } else if (out_ch == '%') { |
244 // Handle escape sequences. | 244 // Handle escape sequences. |
245 unsigned char unescaped_value; | 245 unsigned char unescaped_value; |
246 if (DecodeEscaped(spec, &i, end, &unescaped_value)) { | 246 if (DecodeEscaped(spec, &i, end, &unescaped_value)) { |
247 // Valid escape sequence, see if we keep, reject, or unescape it. | 247 // Valid escape sequence, see if we keep, reject, or unescape it. |
| 248 // Note that at this point DecodeEscape() will have advanced |i| to |
| 249 // the last character of the escape sequence. |
248 char unescaped_flags = kPathCharLookup[unescaped_value]; | 250 char unescaped_flags = kPathCharLookup[unescaped_value]; |
249 | 251 |
250 if (unescaped_flags & UNESCAPE) { | 252 bool unescape = (unescaped_flags & UNESCAPE) != 0; |
251 // This escaped value shouldn't be escaped, copy it. | 253 if (unescape) { |
| 254 // This escaped value shouldn't be escaped. Try to copy it. |
| 255 int original_length = output->length(); |
252 output->push_back(unescaped_value); | 256 output->push_back(unescaped_value); |
253 } else if (unescaped_flags & INVALID_BIT) { | 257 |
254 // Invalid escaped character, copy it and remember the error. | 258 // Bail if this results in the output string containing a new |
| 259 // escaped value -- this means the source string nested escapes |
| 260 // multiple levels deep (e.g. "%%300", which would turn into |
| 261 // "%00"), and unescaping would result in a URL spec that could |
| 262 // change further if canonicalized a second time, which can cause |
| 263 // a variety of problems in various places in the codebase. |
| 264 unsigned char temp; |
| 265 if ((original_length > 0) && ((i + 1) < end) && |
| 266 (output->at(original_length - 1) == '%')) { |
| 267 // The output contains "%x" where 'x' is the unescaped value |
| 268 // computed above. Try appending the next source character and |
| 269 // see if we get a new escape sequence. Note that because we |
| 270 // simply append the next character instead of seeing whether |
| 271 // it's also a nested escape sequence, we'll unescape an input |
| 272 // like "%%30%30" into "%0%30" before detecting that the second |
| 273 // "%30" can cause a problem and bailing. This is sufficient to |
| 274 // avoid problematic cases and easier/more performant. |
| 275 output->push_back(spec[i + 1]); |
| 276 int begin = original_length - 1; |
| 277 if (DecodeEscaped(output->data(), &begin, output->length(), |
| 278 &temp)) { |
| 279 // New escape sequence found; refuse to unescape this |
| 280 // character. |
| 281 unescape = false; |
| 282 output->set_length(original_length); |
| 283 } else { |
| 284 // We're OK, but we still need to undo the naive appending of |
| 285 // the next source character so the next loop iteration can |
| 286 // handle it correctly. |
| 287 output->set_length(original_length + 1); |
| 288 } |
| 289 } else if ((original_length > 1) && |
| 290 (output->at(original_length - 2) == '%')) { |
| 291 // The output contains "%yx" where 'x' is the unescaped value |
| 292 // computed above and 'y' is some other character. See if this |
| 293 // forms a new escape sequence. |
| 294 int begin = original_length - 2; |
| 295 if (DecodeEscaped(output->data(), &begin, output->length(), |
| 296 &temp)) { |
| 297 // New escape sequence found; refuse to unescape this |
| 298 // character. |
| 299 unescape = false; |
| 300 output->set_length(original_length); |
| 301 } |
| 302 } |
| 303 } |
| 304 |
| 305 if (!unescape) { |
| 306 // Either this is an invalid escaped character, or it's a valid |
| 307 // escaped character we should keep escaped. In the first case we |
| 308 // should just copy it exactly and remember the error. In the |
| 309 // second we also copy exactly in case the server is sensitive to |
| 310 // changing the case of any hex letters. |
255 output->push_back('%'); | 311 output->push_back('%'); |
256 output->push_back(static_cast<char>(spec[i - 1])); | 312 output->push_back(static_cast<char>(spec[i - 1])); |
257 output->push_back(static_cast<char>(spec[i])); | 313 output->push_back(static_cast<char>(spec[i])); |
258 success = false; | 314 if (unescaped_flags & INVALID_BIT) |
259 } else { | 315 success = false; |
260 // Valid escaped character but we should keep it escaped. We | |
261 // don't want to change the case of any hex letters in case | |
262 // the server is sensitive to that, so we just copy the two | |
263 // characters without checking (DecodeEscape will have advanced | |
264 // to the last character of the pair). | |
265 output->push_back('%'); | |
266 output->push_back(static_cast<char>(spec[i - 1])); | |
267 output->push_back(static_cast<char>(spec[i])); | |
268 } | 316 } |
269 } else { | 317 } else { |
270 // Invalid escape sequence. IE7 rejects any URLs with such | 318 // Invalid escape sequence. IE7 rejects any URLs with such |
271 // sequences, while Firefox, IE6, and Safari all pass it through | 319 // sequences, while Firefox, IE6, and Safari all pass it through |
272 // unchanged. We are more permissive unlike IE7. I don't think this | 320 // unchanged. We are more permissive unlike IE7. I don't think this |
273 // can cause significant problems, if it does, we should change | 321 // can cause significant problems, if it does, we should change |
274 // to be more like IE7. | 322 // to be more like IE7. |
275 output->push_back('%'); | 323 output->push_back('%'); |
276 } | 324 } |
277 | 325 |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
344 bool CanonicalizePartialPath(const base::char16* spec, | 392 bool CanonicalizePartialPath(const base::char16* spec, |
345 const Component& path, | 393 const Component& path, |
346 int path_begin_in_output, | 394 int path_begin_in_output, |
347 CanonOutput* output) { | 395 CanonOutput* output) { |
348 return DoPartialPath<base::char16, base::char16>(spec, path, | 396 return DoPartialPath<base::char16, base::char16>(spec, path, |
349 path_begin_in_output, | 397 path_begin_in_output, |
350 output); | 398 output); |
351 } | 399 } |
352 | 400 |
353 } // namespace url | 401 } // namespace url |
OLD | NEW |