Index: url/url_canon_pathurl.cc |
diff --git a/url/url_canon_pathurl.cc b/url/url_canon_pathurl.cc |
index bc681f4d144f27fa982c4d57523fbe53806364cb..8f7dee48f73868900640409e442629a7a33f18f6 100644 |
--- a/url/url_canon_pathurl.cc |
+++ b/url/url_canon_pathurl.cc |
@@ -13,6 +13,39 @@ namespace url_canon { |
namespace { |
+// Canonicalize the given |component| from |source| into |output| and |
+// |new_component|. If |separator| is non-zero, it is pre-pended to |ouput| |
+// prior to the canonicalized component; i.e. for the '?' or '#' characters. |
+template<typename CHAR, typename UCHAR> |
+bool DoCanonicalizePathComponent(const CHAR* source, |
+ const url_parse::Component& component, |
+ CHAR seperator, |
+ CanonOutput* output, |
+ url_parse::Component* new_component) { |
+ bool success = true; |
+ if (component.is_valid()) { |
+ if (seperator) |
+ output->push_back(seperator); |
+ // Copy the path using path URL's more lax escaping rules (think for |
+ // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all |
+ // ASCII characters alone. This helps readability of JavaStript. |
+ new_component->begin = output->length(); |
+ int end = component.end(); |
+ for (int i = component.begin; i < end; i++) { |
+ UCHAR uch = static_cast<UCHAR>(source[i]); |
+ if (uch < 0x20 || uch >= 0x80) |
+ success &= AppendUTF8EscapedChar(source, &i, end, output); |
+ else |
+ output->push_back(static_cast<char>(uch)); |
+ } |
+ new_component->len = output->length() - new_component->begin; |
+ } else { |
+ // Empty part. |
+ new_component->reset(); |
+ } |
+ return success; |
+} |
+ |
template<typename CHAR, typename UCHAR> |
bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source, |
const url_parse::Parsed& parsed, |
@@ -28,29 +61,14 @@ bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source, |
new_parsed->password.reset(); |
new_parsed->host.reset(); |
new_parsed->port.reset(); |
- |
- if (parsed.path.is_valid()) { |
- // Copy the path using path URL's more lax escaping rules (think for |
- // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all |
- // ASCII characters alone. This helps readability of JavaStript. |
- new_parsed->path.begin = output->length(); |
- int end = parsed.path.end(); |
- for (int i = parsed.path.begin; i < end; i++) { |
- UCHAR uch = static_cast<UCHAR>(source.path[i]); |
- if (uch < 0x20 || uch >= 0x80) |
- success &= AppendUTF8EscapedChar(source.path, &i, end, output); |
- else |
- output->push_back(static_cast<char>(uch)); |
- } |
- new_parsed->path.len = output->length() - new_parsed->path.begin; |
- } else { |
- // Empty path. |
- new_parsed->path.reset(); |
- } |
- |
- // Assume there's no query or ref. |
- new_parsed->query.reset(); |
- new_parsed->ref.reset(); |
+ // We allow path URLs to have the path, query and fragment components, but we |
+ // will canonicalize each of the via the weaker path URL rules. |
+ success &= DoCanonicalizePathComponent<CHAR, UCHAR>( |
+ source.path, parsed.path, 0, output, &new_parsed->path); |
+ success &= DoCanonicalizePathComponent<CHAR, UCHAR>( |
+ source.query, parsed.query, '?', output, &new_parsed->query); |
+ success &= DoCanonicalizePathComponent<CHAR, UCHAR>( |
+ source.ref, parsed.ref, '#', output, &new_parsed->ref); |
return success; |
} |