| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/logging.h" | 5 #include "base/logging.h" |
| 6 #include "url/url_canon.h" | 6 #include "url/url_canon.h" |
| 7 #include "url/url_canon_internal.h" | 7 #include "url/url_canon_internal.h" |
| 8 | 8 |
| 9 namespace url { | 9 namespace url { |
| 10 | 10 |
| (...skipping 16 matching lines...) Expand all Loading... |
| 27 // 2 E + E E + E + + + + + + + U U 0 | 27 // 2 E + E E + E + + + + + + + U U 0 |
| 28 // 3 % % E + E 0 <-- Those are : ; < = >
? | 28 // 3 % % E + E 0 <-- Those are : ; < = >
? |
| 29 // 4 % | 29 // 4 % |
| 30 // 5 U 0 U U U <-- Those are [ \ ] ^ _ | 30 // 5 U 0 U U U <-- Those are [ \ ] ^ _ |
| 31 // 6 E <-- That's ` | 31 // 6 E <-- That's ` |
| 32 // 7 E E E U E <-- Those are { | } ~ (UN
PRINTABLE) | 32 // 7 E E E U E <-- Those are { | } ~ (UN
PRINTABLE) |
| 33 // | 33 // |
| 34 // NOTE: I didn't actually test all the control characters. Some may be | 34 // NOTE: I didn't actually test all the control characters. Some may be |
| 35 // disallowed in the input, but they are all accepted escaped except for 0. | 35 // disallowed in the input, but they are all accepted escaped except for 0. |
| 36 // I also didn't test if characters affecting HTML parsing are allowed | 36 // I also didn't test if characters affecting HTML parsing are allowed |
| 37 // unescaped, eg. (") or (#), which would indicate the beginning of the path. | 37 // unescaped, e.g. (") or (#), which would indicate the beginning of the path. |
| 38 // Surprisingly, space is accepted in the input and always escaped. | 38 // Surprisingly, space is accepted in the input and always escaped. |
| 39 | 39 |
| 40 // This table lists the canonical version of all characters we allow in the | 40 // This table lists the canonical version of all characters we allow in the |
| 41 // input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar | 41 // input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar |
| 42 // value to indicate that this character should be escaped. We are a little more | 42 // value to indicate that this character should be escaped. We are a little more |
| 43 // restrictive than IE, but less restrictive than Firefox. | 43 // restrictive than IE, but less restrictive than Firefox. |
| 44 // | 44 // |
| 45 // Note that we disallow the % character. We will allow it when part of an | 45 // Note that we disallow the % character. We will allow it when part of an |
| 46 // escape sequence, of course, but this disallows "%25". Even though IE allows | 46 // escape sequence, of course, but this disallows "%25". Even though IE allows |
| 47 // it, allowing it would put us in a funny state. If there was an invalid | 47 // it, allowing it would put us in a funny state. If there was an invalid |
| (...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 309 if (!has_non_ascii && !has_escaped) { | 309 if (!has_non_ascii && !has_escaped) { |
| 310 success = DoSimpleHost(&spec[host.begin], host.len, | 310 success = DoSimpleHost(&spec[host.begin], host.len, |
| 311 output, &has_non_ascii); | 311 output, &has_non_ascii); |
| 312 DCHECK(!has_non_ascii); | 312 DCHECK(!has_non_ascii); |
| 313 } else { | 313 } else { |
| 314 success = DoComplexHost(&spec[host.begin], host.len, | 314 success = DoComplexHost(&spec[host.begin], host.len, |
| 315 has_non_ascii, has_escaped, output); | 315 has_non_ascii, has_escaped, output); |
| 316 } | 316 } |
| 317 | 317 |
| 318 if (!success) { | 318 if (!success) { |
| 319 // Canonicalization failed. Set BROKEN to notify the caller. | 319 // Canonicalization failed. Set BROKEN to notify the caller. |
| 320 host_info->family = CanonHostInfo::BROKEN; | 320 host_info->family = CanonHostInfo::BROKEN; |
| 321 } else { | 321 } else { |
| 322 // After all the other canonicalization, check if we ended up with an IP | 322 // After all the other canonicalization, check if we ended up with an IP |
| 323 // address. IP addresses are small, so writing into this temporary buffer | 323 // address. IP addresses are small, so writing into this temporary buffer |
| 324 // should not cause an allocation. | 324 // should not cause an allocation. |
| 325 RawCanonOutput<64> canon_ip; | 325 RawCanonOutput<64> canon_ip; |
| 326 CanonicalizeIPAddress(output->data(), | 326 CanonicalizeIPAddress(output->data(), |
| 327 MakeRange(output_begin, output->length()), | 327 MakeRange(output_begin, output->length()), |
| 328 &canon_ip, host_info); | 328 &canon_ip, host_info); |
| 329 | 329 |
| 330 // If we got an IPv4/IPv6 address, copy the canonical form back to the | 330 // If we got an IPv4/IPv6 address, copy the canonical form back to the |
| 331 // real buffer. Otherwise, it's a hostname or broken IP, in which case | 331 // real buffer. Otherwise, it's a hostname or broken IP, in which case |
| 332 // we just leave it in place. | 332 // we just leave it in place. |
| 333 if (host_info->IsIPAddress()) { | 333 if (host_info->IsIPAddress()) { |
| 334 output->set_length(output_begin); | 334 output->set_length(output_begin); |
| 335 output->Append(canon_ip.data(), canon_ip.length()); | 335 output->Append(canon_ip.data(), canon_ip.length()); |
| 336 } | 336 } |
| 337 } | 337 } |
| 338 | 338 |
| 339 host_info->out_host = MakeRange(output_begin, output->length()); | 339 host_info->out_host = MakeRange(output_begin, output->length()); |
| 340 } | 340 } |
| 341 | 341 |
| (...skipping 27 matching lines...) Expand all Loading... |
| 369 } | 369 } |
| 370 | 370 |
| 371 void CanonicalizeHostVerbose(const base::char16* spec, | 371 void CanonicalizeHostVerbose(const base::char16* spec, |
| 372 const Component& host, | 372 const Component& host, |
| 373 CanonOutput* output, | 373 CanonOutput* output, |
| 374 CanonHostInfo* host_info) { | 374 CanonHostInfo* host_info) { |
| 375 DoHost<base::char16, base::char16>(spec, host, output, host_info); | 375 DoHost<base::char16, base::char16>(spec, host, output, host_info); |
| 376 } | 376 } |
| 377 | 377 |
| 378 } // namespace url | 378 } // namespace url |
| OLD | NEW |