OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/logging.h" | 5 #include "base/logging.h" |
6 #include "url/url_canon.h" | 6 #include "url/url_canon.h" |
7 #include "url/url_canon_internal.h" | 7 #include "url/url_canon_internal.h" |
8 | 8 |
9 namespace url { | 9 namespace url { |
10 | 10 |
(...skipping 16 matching lines...) Expand all Loading... |
27 // 2 E + E E + E + + + + + + + U U 0 | 27 // 2 E + E E + E + + + + + + + U U 0 |
28 // 3 % % E + E 0 <-- Those are : ; < = >
? | 28 // 3 % % E + E 0 <-- Those are : ; < = >
? |
29 // 4 % | 29 // 4 % |
30 // 5 U 0 U U U <-- Those are [ \ ] ^ _ | 30 // 5 U 0 U U U <-- Those are [ \ ] ^ _ |
31 // 6 E <-- That's ` | 31 // 6 E <-- That's ` |
32 // 7 E E E U E <-- Those are { | } ~ (UN
PRINTABLE) | 32 // 7 E E E U E <-- Those are { | } ~ (UN
PRINTABLE) |
33 // | 33 // |
34 // NOTE: I didn't actually test all the control characters. Some may be | 34 // NOTE: I didn't actually test all the control characters. Some may be |
35 // disallowed in the input, but they are all accepted escaped except for 0. | 35 // disallowed in the input, but they are all accepted escaped except for 0. |
36 // I also didn't test if characters affecting HTML parsing are allowed | 36 // I also didn't test if characters affecting HTML parsing are allowed |
37 // unescaped, eg. (") or (#), which would indicate the beginning of the path. | 37 // unescaped, e.g. (") or (#), which would indicate the beginning of the path. |
38 // Surprisingly, space is accepted in the input and always escaped. | 38 // Surprisingly, space is accepted in the input and always escaped. |
39 | 39 |
40 // This table lists the canonical version of all characters we allow in the | 40 // This table lists the canonical version of all characters we allow in the |
41 // input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar | 41 // input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar |
42 // value to indicate that this character should be escaped. We are a little more | 42 // value to indicate that this character should be escaped. We are a little more |
43 // restrictive than IE, but less restrictive than Firefox. | 43 // restrictive than IE, but less restrictive than Firefox. |
44 // | 44 // |
45 // Note that we disallow the % character. We will allow it when part of an | 45 // Note that we disallow the % character. We will allow it when part of an |
46 // escape sequence, of course, but this disallows "%25". Even though IE allows | 46 // escape sequence, of course, but this disallows "%25". Even though IE allows |
47 // it, allowing it would put us in a funny state. If there was an invalid | 47 // it, allowing it would put us in a funny state. If there was an invalid |
(...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
309 if (!has_non_ascii && !has_escaped) { | 309 if (!has_non_ascii && !has_escaped) { |
310 success = DoSimpleHost(&spec[host.begin], host.len, | 310 success = DoSimpleHost(&spec[host.begin], host.len, |
311 output, &has_non_ascii); | 311 output, &has_non_ascii); |
312 DCHECK(!has_non_ascii); | 312 DCHECK(!has_non_ascii); |
313 } else { | 313 } else { |
314 success = DoComplexHost(&spec[host.begin], host.len, | 314 success = DoComplexHost(&spec[host.begin], host.len, |
315 has_non_ascii, has_escaped, output); | 315 has_non_ascii, has_escaped, output); |
316 } | 316 } |
317 | 317 |
318 if (!success) { | 318 if (!success) { |
319 // Canonicalization failed. Set BROKEN to notify the caller. | 319 // Canonicalization failed. Set BROKEN to notify the caller. |
320 host_info->family = CanonHostInfo::BROKEN; | 320 host_info->family = CanonHostInfo::BROKEN; |
321 } else { | 321 } else { |
322 // After all the other canonicalization, check if we ended up with an IP | 322 // After all the other canonicalization, check if we ended up with an IP |
323 // address. IP addresses are small, so writing into this temporary buffer | 323 // address. IP addresses are small, so writing into this temporary buffer |
324 // should not cause an allocation. | 324 // should not cause an allocation. |
325 RawCanonOutput<64> canon_ip; | 325 RawCanonOutput<64> canon_ip; |
326 CanonicalizeIPAddress(output->data(), | 326 CanonicalizeIPAddress(output->data(), |
327 MakeRange(output_begin, output->length()), | 327 MakeRange(output_begin, output->length()), |
328 &canon_ip, host_info); | 328 &canon_ip, host_info); |
329 | 329 |
330 // If we got an IPv4/IPv6 address, copy the canonical form back to the | 330 // If we got an IPv4/IPv6 address, copy the canonical form back to the |
331 // real buffer. Otherwise, it's a hostname or broken IP, in which case | 331 // real buffer. Otherwise, it's a hostname or broken IP, in which case |
332 // we just leave it in place. | 332 // we just leave it in place. |
333 if (host_info->IsIPAddress()) { | 333 if (host_info->IsIPAddress()) { |
334 output->set_length(output_begin); | 334 output->set_length(output_begin); |
335 output->Append(canon_ip.data(), canon_ip.length()); | 335 output->Append(canon_ip.data(), canon_ip.length()); |
336 } | 336 } |
337 } | 337 } |
338 | 338 |
339 host_info->out_host = MakeRange(output_begin, output->length()); | 339 host_info->out_host = MakeRange(output_begin, output->length()); |
340 } | 340 } |
341 | 341 |
(...skipping 27 matching lines...) Expand all Loading... |
369 } | 369 } |
370 | 370 |
371 void CanonicalizeHostVerbose(const base::char16* spec, | 371 void CanonicalizeHostVerbose(const base::char16* spec, |
372 const Component& host, | 372 const Component& host, |
373 CanonOutput* output, | 373 CanonOutput* output, |
374 CanonHostInfo* host_info) { | 374 CanonHostInfo* host_info) { |
375 DoHost<base::char16, base::char16>(spec, host, output, host_info); | 375 DoHost<base::char16, base::char16>(spec, host, output, host_info); |
376 } | 376 } |
377 | 377 |
378 } // namespace url | 378 } // namespace url |
OLD | NEW |