| OLD | NEW |
| 1 // Copyright 2007, Google Inc. | 1 // Copyright 2007, Google Inc. |
| 2 // All rights reserved. | 2 // All rights reserved. |
| 3 // | 3 // |
| 4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
| 5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
| 6 // met: | 6 // met: |
| 7 // | 7 // |
| 8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
| 9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
| 10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 58 // | 58 // |
| 59 // NOTE: I didn't actually test all the control characters. Some may be | 59 // NOTE: I didn't actually test all the control characters. Some may be |
| 60 // disallowed in the input, but they are all accepted escaped except for 0. | 60 // disallowed in the input, but they are all accepted escaped except for 0. |
| 61 // I also didn't test if characters affecting HTML parsing are allowed | 61 // I also didn't test if characters affecting HTML parsing are allowed |
| 62 // unescaped, eg. (") or (#), which would indicate the beginning of the path. | 62 // unescaped, eg. (") or (#), which would indicate the beginning of the path. |
| 63 // Surprisingly, space is accepted in the input and always escaped. | 63 // Surprisingly, space is accepted in the input and always escaped. |
| 64 | 64 |
| 65 // This table lists the canonical version of all characters we allow in the | 65 // This table lists the canonical version of all characters we allow in the |
| 66 // input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar | 66 // input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar |
| 67 // value to indicate that this character should be escaped. We are a little more | 67 // value to indicate that this character should be escaped. We are a little more |
| 68 // restricive than IE, but less restrictive than Firefox. | 68 // restrictive than IE, but less restrictive than Firefox. |
| 69 // | 69 // |
| 70 // Note that we disallow the % character. We will allow it when part of an | 70 // Note that we disallow the % character. We will allow it when part of an |
| 71 // escape sequence, of course, but this disallows "%25". Even though IE allows | 71 // escape sequence, of course, but this disallows "%25". Even though IE allows |
| 72 // it, allowing it would put us in a funny state. If there was an invalid | 72 // it, allowing it would put us in a funny state. If there was an invalid |
| 73 // escape sequence like "%zz", we'll add "%25zz" to the output and fail. | 73 // escape sequence like "%zz", we'll add "%25zz" to the output and fail. |
| 74 // Allowing percents means we'll succeed a second time, so validity would change | 74 // Allowing percents means we'll succeed a second time, so validity would change |
| 75 // based on how many times you run the canonicalizer. We prefer to always report | 75 // based on how many times you run the canonicalizer. We prefer to always report |
| 76 // the same vailidity, so reject this. | 76 // the same vailidity, so reject this. |
| 77 const unsigned char kEsc = 0xff; | 77 const unsigned char kEsc = 0xff; |
| 78 const unsigned char kHostCharLookup[0x80] = { | 78 const unsigned char kHostCharLookup[0x80] = { |
| (...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 370 } | 370 } |
| 371 | 371 |
| 372 bool CanonicalizeHost(const char16* spec, | 372 bool CanonicalizeHost(const char16* spec, |
| 373 const url_parse::Component& host, | 373 const url_parse::Component& host, |
| 374 CanonOutput* output, | 374 CanonOutput* output, |
| 375 url_parse::Component* out_host) { | 375 url_parse::Component* out_host) { |
| 376 return DoHost<char16, char16>(spec, host, output, out_host); | 376 return DoHost<char16, char16>(spec, host, output, out_host); |
| 377 } | 377 } |
| 378 | 378 |
| 379 } // namespace url_canon | 379 } // namespace url_canon |
| OLD | NEW |