| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Functions for canonicalizing "path" URLs. Not to be confused with the path | 5 // Functions for canonicalizing "path" URLs. Not to be confused with the path |
| 6 // of a URL, these are URLs that have no authority section, only a path. For | 6 // of a URL, these are URLs that have no authority section, only a path. For |
| 7 // example, "javascript:" and "data:". | 7 // example, "javascript:" and "data:". |
| 8 | 8 |
| 9 #include "url/url_canon.h" | 9 #include "url/url_canon.h" |
| 10 #include "url/url_canon_internal.h" | 10 #include "url/url_canon_internal.h" |
| 11 | 11 |
| 12 namespace url_canon { | 12 namespace url_canon { |
| 13 | 13 |
| 14 namespace { | 14 namespace { |
| 15 | 15 |
| 16 template<typename CHAR, typename UCHAR> | 16 template<typename CHAR, typename UCHAR> |
| 17 bool DoCanonicalizePathComponent(const CHAR* source, |
| 18 const url_parse::Component& component, |
| 19 CHAR seperator, |
| 20 CanonOutput* output, |
| 21 url_parse::Component* new_parsed) { |
| 22 bool success = true; |
| 23 if (component.is_valid()) { |
| 24 if (seperator) |
| 25 output->push_back(seperator); |
| 26 // Copy the path using path URL's more lax escaping rules (think for |
| 27 // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all |
| 28 // ASCII characters alone. This helps readability of JavaStript. |
| 29 new_parsed->begin = output->length(); |
| 30 int end = component.end(); |
| 31 for (int i = component.begin; i < end; i++) { |
| 32 UCHAR uch = static_cast<UCHAR>(source[i]); |
| 33 if (uch < 0x20 || uch >= 0x80) |
| 34 success &= AppendUTF8EscapedChar(source, &i, end, output); |
| 35 else |
| 36 output->push_back(static_cast<char>(uch)); |
| 37 } |
| 38 new_parsed->len = output->length() - new_parsed->begin; |
| 39 } else { |
| 40 // Empty part. |
| 41 new_parsed->reset(); |
| 42 } |
| 43 return success; |
| 44 } |
| 45 |
| 46 template<typename CHAR, typename UCHAR> |
| 17 bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source, | 47 bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source, |
| 18 const url_parse::Parsed& parsed, | 48 const url_parse::Parsed& parsed, |
| 19 CanonOutput* output, | 49 CanonOutput* output, |
| 20 url_parse::Parsed* new_parsed) { | 50 url_parse::Parsed* new_parsed) { |
| 21 // Scheme: this will append the colon. | 51 // Scheme: this will append the colon. |
| 22 bool success = CanonicalizeScheme(source.scheme, parsed.scheme, | 52 bool success = CanonicalizeScheme(source.scheme, parsed.scheme, |
| 23 output, &new_parsed->scheme); | 53 output, &new_parsed->scheme); |
| 24 | 54 |
| 25 // We assume there's no authority for path URLs. Note that hosts should never | 55 // We assume there's no authority for path URLs. Note that hosts should never |
| 26 // have -1 length. | 56 // have -1 length. |
| 27 new_parsed->username.reset(); | 57 new_parsed->username.reset(); |
| 28 new_parsed->password.reset(); | 58 new_parsed->password.reset(); |
| 29 new_parsed->host.reset(); | 59 new_parsed->host.reset(); |
| 30 new_parsed->port.reset(); | 60 new_parsed->port.reset(); |
| 31 | 61 success &= DoCanonicalizePathComponent<CHAR, UCHAR>( |
| 32 if (parsed.path.is_valid()) { | 62 source.path, parsed.path, 0, output, &new_parsed->path); |
| 33 // Copy the path using path URL's more lax escaping rules (think for | 63 success &= DoCanonicalizePathComponent<CHAR, UCHAR>( |
| 34 // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all | 64 source.query, parsed.query, '?', output, &new_parsed->query); |
| 35 // ASCII characters alone. This helps readability of JavaStript. | 65 success &= DoCanonicalizePathComponent<CHAR, UCHAR>( |
| 36 new_parsed->path.begin = output->length(); | 66 source.ref, parsed.ref, '#', output, &new_parsed->ref); |
| 37 int end = parsed.path.end(); | |
| 38 for (int i = parsed.path.begin; i < end; i++) { | |
| 39 UCHAR uch = static_cast<UCHAR>(source.path[i]); | |
| 40 if (uch < 0x20 || uch >= 0x80) | |
| 41 success &= AppendUTF8EscapedChar(source.path, &i, end, output); | |
| 42 else | |
| 43 output->push_back(static_cast<char>(uch)); | |
| 44 } | |
| 45 new_parsed->path.len = output->length() - new_parsed->path.begin; | |
| 46 } else { | |
| 47 // Empty path. | |
| 48 new_parsed->path.reset(); | |
| 49 } | |
| 50 | |
| 51 // Assume there's no query or ref. | |
| 52 new_parsed->query.reset(); | |
| 53 new_parsed->ref.reset(); | |
| 54 | 67 |
| 55 return success; | 68 return success; |
| 56 } | 69 } |
| 57 | 70 |
| 58 } // namespace | 71 } // namespace |
| 59 | 72 |
| 60 bool CanonicalizePathURL(const char* spec, | 73 bool CanonicalizePathURL(const char* spec, |
| 61 int spec_len, | 74 int spec_len, |
| 62 const url_parse::Parsed& parsed, | 75 const url_parse::Parsed& parsed, |
| 63 CanonOutput* output, | 76 CanonOutput* output, |
| (...skipping 30 matching lines...) Expand all Loading... |
| 94 url_parse::Parsed* new_parsed) { | 107 url_parse::Parsed* new_parsed) { |
| 95 RawCanonOutput<1024> utf8; | 108 RawCanonOutput<1024> utf8; |
| 96 URLComponentSource<char> source(base); | 109 URLComponentSource<char> source(base); |
| 97 url_parse::Parsed parsed(base_parsed); | 110 url_parse::Parsed parsed(base_parsed); |
| 98 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); | 111 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); |
| 99 return DoCanonicalizePathURL<char, unsigned char>( | 112 return DoCanonicalizePathURL<char, unsigned char>( |
| 100 source, parsed, output, new_parsed); | 113 source, parsed, output, new_parsed); |
| 101 } | 114 } |
| 102 | 115 |
| 103 } // namespace url_canon | 116 } // namespace url_canon |
| OLD | NEW |