OLD | NEW |
1 // Copyright 2007, Google Inc. | 1 // Copyright 2007, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
58 // | 58 // |
59 // NOTE: I didn't actually test all the control characters. Some may be | 59 // NOTE: I didn't actually test all the control characters. Some may be |
60 // disallowed in the input, but they are all accepted escaped except for 0. | 60 // disallowed in the input, but they are all accepted escaped except for 0. |
61 // I also didn't test if characters affecting HTML parsing are allowed | 61 // I also didn't test if characters affecting HTML parsing are allowed |
62 // unescaped, eg. (") or (#), which would indicate the beginning of the path. | 62 // unescaped, eg. (") or (#), which would indicate the beginning of the path. |
63 // Surprisingly, space is accepted in the input and always escaped. | 63 // Surprisingly, space is accepted in the input and always escaped. |
64 | 64 |
65 // This table lists the canonical version of all characters we allow in the | 65 // This table lists the canonical version of all characters we allow in the |
66 // input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar | 66 // input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar |
67 // value to indicate that this character should be escaped. We are a little more | 67 // value to indicate that this character should be escaped. We are a little more |
68 // restricive than IE, but less restrictive than Firefox. | 68 // restrictive than IE, but less restrictive than Firefox. |
69 // | 69 // |
70 // Note that we disallow the % character. We will allow it when part of an | 70 // Note that we disallow the % character. We will allow it when part of an |
71 // escape sequence, of course, but this disallows "%25". Even though IE allows | 71 // escape sequence, of course, but this disallows "%25". Even though IE allows |
72 // it, allowing it would put us in a funny state. If there was an invalid | 72 // it, allowing it would put us in a funny state. If there was an invalid |
73 // escape sequence like "%zz", we'll add "%25zz" to the output and fail. | 73 // escape sequence like "%zz", we'll add "%25zz" to the output and fail. |
74 // Allowing percents means we'll succeed a second time, so validity would change | 74 // Allowing percents means we'll succeed a second time, so validity would change |
75 // based on how many times you run the canonicalizer. We prefer to always report | 75 // based on how many times you run the canonicalizer. We prefer to always report |
76 // the same vailidity, so reject this. | 76 // the same vailidity, so reject this. |
77 const unsigned char kEsc = 0xff; | 77 const unsigned char kEsc = 0xff; |
78 const unsigned char kHostCharLookup[0x80] = { | 78 const unsigned char kHostCharLookup[0x80] = { |
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
370 } | 370 } |
371 | 371 |
372 bool CanonicalizeHost(const char16* spec, | 372 bool CanonicalizeHost(const char16* spec, |
373 const url_parse::Component& host, | 373 const url_parse::Component& host, |
374 CanonOutput* output, | 374 CanonOutput* output, |
375 url_parse::Component* out_host) { | 375 url_parse::Component* out_host) { |
376 return DoHost<char16, char16>(spec, host, output, out_host); | 376 return DoHost<char16, char16>(spec, host, output, out_host); |
377 } | 377 } |
378 | 378 |
379 } // namespace url_canon | 379 } // namespace url_canon |
OLD | NEW |