Index: components/url_formatter/url_formatter.cc |
diff --git a/net/base/net_util_icu.cc b/components/url_formatter/url_formatter.cc |
similarity index 79% |
copy from net/base/net_util_icu.cc |
copy to components/url_formatter/url_formatter.cc |
index 259baba33bd41ef781afe121579857c95b602dc2..cc209d82cdb453316b616c2ec5e42e13489c4348 100644 |
--- a/net/base/net_util_icu.cc |
+++ b/components/url_formatter/url_formatter.cc |
@@ -1,39 +1,252 @@ |
-// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
+// Copyright 2015 The Chromium Authors. All rights reserved. |
// Use of this source code is governed by a BSD-style license that can be |
// found in the LICENSE file. |
-#include "net/base/net_util.h" |
+#include "components/url_formatter/url_formatter.h" |
+#include <algorithm> |
#include <map> |
-#include <vector> |
+#include <utility> |
-#include "base/i18n/time_formatting.h" |
-#include "base/json/string_escape.h" |
#include "base/lazy_instance.h" |
#include "base/logging.h" |
+#include "base/macros.h" |
#include "base/memory/singleton.h" |
#include "base/stl_util.h" |
#include "base/strings/string_tokenizer.h" |
#include "base/strings/string_util.h" |
#include "base/strings/utf_offset_string_conversions.h" |
#include "base/strings/utf_string_conversions.h" |
-#include "base/time/time.h" |
-#include "url/gurl.h" |
+#include "base/synchronization/lock.h" |
#include "third_party/icu/source/common/unicode/uidna.h" |
#include "third_party/icu/source/common/unicode/uniset.h" |
#include "third_party/icu/source/common/unicode/uscript.h" |
-#include "third_party/icu/source/common/unicode/uset.h" |
-#include "third_party/icu/source/i18n/unicode/datefmt.h" |
#include "third_party/icu/source/i18n/unicode/regex.h" |
#include "third_party/icu/source/i18n/unicode/ulocdata.h" |
+#include "url/gurl.h" |
+#include "url/third_party/mozilla/url_parse.h" |
-using base::Time; |
- |
-namespace net { |
+namespace url_formatter { |
namespace { |
-typedef std::vector<size_t> Offsets; |
+base::string16 IDNToUnicodeWithAdjustments( |
+ const std::string& host, |
+ const std::string& languages, |
+ base::OffsetAdjuster::Adjustments* adjustments); |
+bool IDNToUnicodeOneComponent(const base::char16* comp, |
+ size_t comp_len, |
+ const std::string& languages, |
+ base::string16* out); |
+ |
+class AppendComponentTransform { |
+ public: |
+ AppendComponentTransform() {} |
+ virtual ~AppendComponentTransform() {} |
+ |
+ virtual base::string16 Execute( |
+ const std::string& component_text, |
+ base::OffsetAdjuster::Adjustments* adjustments) const = 0; |
+ |
+ // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an |
+ // accessible copy constructor in order to call AppendFormattedComponent() |
+ // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). |
+}; |
+ |
+class HostComponentTransform : public AppendComponentTransform { |
+ public: |
+ explicit HostComponentTransform(const std::string& languages) |
+ : languages_(languages) {} |
+ |
+ private: |
+ base::string16 Execute( |
+ const std::string& component_text, |
+ base::OffsetAdjuster::Adjustments* adjustments) const override { |
+ return IDNToUnicodeWithAdjustments(component_text, languages_, adjustments); |
+ } |
+ |
+ const std::string& languages_; |
+}; |
+ |
+class NonHostComponentTransform : public AppendComponentTransform { |
+ public: |
+ explicit NonHostComponentTransform(net::UnescapeRule::Type unescape_rules) |
+ : unescape_rules_(unescape_rules) {} |
+ |
+ private: |
+ base::string16 Execute( |
+ const std::string& component_text, |
+ base::OffsetAdjuster::Adjustments* adjustments) const override { |
+ return (unescape_rules_ == net::UnescapeRule::NONE) |
+ ? base::UTF8ToUTF16WithAdjustments(component_text, adjustments) |
+ : net::UnescapeAndDecodeUTF8URLComponentWithAdjustments( |
+ component_text, unescape_rules_, adjustments); |
+ } |
+ |
+ const net::UnescapeRule::Type unescape_rules_; |
+}; |
+ |
+// Transforms the portion of |spec| covered by |original_component| according to |
+// |transform|. Appends the result to |output|. If |output_component| is |
+// non-NULL, its start and length are set to the transformed component's new |
+// start and length. If |adjustments| is non-NULL, appends adjustments (if |
+// any) that reflect the transformation the original component underwent to |
+// become the transformed value appended to |output|. |
+void AppendFormattedComponent(const std::string& spec, |
+ const url::Component& original_component, |
+ const AppendComponentTransform& transform, |
+ base::string16* output, |
+ url::Component* output_component, |
+ base::OffsetAdjuster::Adjustments* adjustments) { |
+ DCHECK(output); |
+ if (original_component.is_nonempty()) { |
+ size_t original_component_begin = |
+ static_cast<size_t>(original_component.begin); |
+ size_t output_component_begin = output->length(); |
+ std::string component_str(spec, original_component_begin, |
+ static_cast<size_t>(original_component.len)); |
+ |
+ // Transform |component_str| and modify |adjustments| appropriately. |
+ base::OffsetAdjuster::Adjustments component_transform_adjustments; |
+ output->append( |
+ transform.Execute(component_str, &component_transform_adjustments)); |
+ |
+ // Shift all the adjustments made for this component so the offsets are |
+ // valid for the original string and add them to |adjustments|. |
+ for (base::OffsetAdjuster::Adjustments::iterator comp_iter = |
+ component_transform_adjustments.begin(); |
+ comp_iter != component_transform_adjustments.end(); ++comp_iter) |
+ comp_iter->original_offset += original_component_begin; |
+ if (adjustments) { |
+ adjustments->insert(adjustments->end(), |
+ component_transform_adjustments.begin(), |
+ component_transform_adjustments.end()); |
+ } |
+ |
+ // Set positions of the parsed component. |
+ if (output_component) { |
+ output_component->begin = static_cast<int>(output_component_begin); |
+ output_component->len = |
+ static_cast<int>(output->length() - output_component_begin); |
+ } |
+ } else if (output_component) { |
+ output_component->reset(); |
+ } |
+} |
+ |
+// If |component| is valid, its begin is incremented by |delta|. |
+void AdjustComponent(int delta, url::Component* component) { |
+ if (!component->is_valid()) |
+ return; |
+ |
+ DCHECK(delta >= 0 || component->begin >= -delta); |
+ component->begin += delta; |
+} |
+ |
+// Adjusts all the components of |parsed| by |delta|, except for the scheme. |
+void AdjustAllComponentsButScheme(int delta, url::Parsed* parsed) { |
+ AdjustComponent(delta, &(parsed->username)); |
+ AdjustComponent(delta, &(parsed->password)); |
+ AdjustComponent(delta, &(parsed->host)); |
+ AdjustComponent(delta, &(parsed->port)); |
+ AdjustComponent(delta, &(parsed->path)); |
+ AdjustComponent(delta, &(parsed->query)); |
+ AdjustComponent(delta, &(parsed->ref)); |
+} |
+ |
+// Helper for FormatUrlWithOffsets(). |
+base::string16 FormatViewSourceUrl( |
+ const GURL& url, |
+ const std::string& languages, |
+ FormatUrlTypes format_types, |
+ net::UnescapeRule::Type unescape_rules, |
+ url::Parsed* new_parsed, |
+ size_t* prefix_end, |
+ base::OffsetAdjuster::Adjustments* adjustments) { |
+ DCHECK(new_parsed); |
+ const char kViewSource[] = "view-source:"; |
+ const size_t kViewSourceLength = arraysize(kViewSource) - 1; |
+ |
+ // Format the underlying URL and record adjustments. |
+ const std::string& url_str(url.possibly_invalid_spec()); |
+ adjustments->clear(); |
+ base::string16 result( |
+ base::ASCIIToUTF16(kViewSource) + |
+ FormatUrlWithAdjustments(GURL(url_str.substr(kViewSourceLength)), |
+ languages, format_types, unescape_rules, |
+ new_parsed, prefix_end, adjustments)); |
+ // Revise |adjustments| by shifting to the offsets to prefix that the above |
+ // call to FormatUrl didn't get to see. |
+ for (base::OffsetAdjuster::Adjustments::iterator it = adjustments->begin(); |
+ it != adjustments->end(); ++it) |
+ it->original_offset += kViewSourceLength; |
+ |
+ // Adjust positions of the parsed components. |
+ if (new_parsed->scheme.is_nonempty()) { |
+ // Assume "view-source:real-scheme" as a scheme. |
+ new_parsed->scheme.len += kViewSourceLength; |
+ } else { |
+ new_parsed->scheme.begin = 0; |
+ new_parsed->scheme.len = kViewSourceLength - 1; |
+ } |
+ AdjustAllComponentsButScheme(kViewSourceLength, new_parsed); |
+ |
+ if (prefix_end) |
+ *prefix_end += kViewSourceLength; |
+ |
+ return result; |
+} |
+ |
+// TODO(brettw) bug 734373: check the scripts for each host component and |
+// don't un-IDN-ize if there is more than one. Alternatively, only IDN for |
+// scripts that the user has installed. For now, just put the entire |
+// path through IDN. Maybe this feature can be implemented in ICU itself? |
+// |
+// We may want to skip this step in the case of file URLs to allow unicode |
+// UNC hostnames regardless of encodings. |
+base::string16 IDNToUnicodeWithAdjustments( |
+ const std::string& host, |
+ const std::string& languages, |
+ base::OffsetAdjuster::Adjustments* adjustments) { |
+ if (adjustments) |
+ adjustments->clear(); |
+ // Convert the ASCII input to a base::string16 for ICU. |
+ base::string16 input16; |
+ input16.reserve(host.length()); |
+ input16.insert(input16.end(), host.begin(), host.end()); |
+ |
+ // Do each component of the host separately, since we enforce script matching |
+ // on a per-component basis. |
+ base::string16 out16; |
+ for (size_t component_start = 0, component_end; |
+ component_start < input16.length(); |
+ component_start = component_end + 1) { |
+ // Find the end of the component. |
+ component_end = input16.find('.', component_start); |
+ if (component_end == base::string16::npos) |
+ component_end = input16.length(); // For getting the last component. |
+ size_t component_length = component_end - component_start; |
+ size_t new_component_start = out16.length(); |
+ bool converted_idn = false; |
+ if (component_end > component_start) { |
+ // Add the substring that we just found. |
+ converted_idn = |
+ IDNToUnicodeOneComponent(input16.data() + component_start, |
+ component_length, languages, &out16); |
+ } |
+ size_t new_component_length = out16.length() - new_component_start; |
+ |
+ if (converted_idn && adjustments) { |
+ adjustments->push_back(base::OffsetAdjuster::Adjustment( |
+ component_start, component_length, new_component_length)); |
+ } |
+ |
+ // Need to add the dot we just found (if we found one). |
+ if (component_end < input16.length()) |
+ out16.push_back('.'); |
+ } |
+ return out16; |
+} |
// Does some simple normalization of scripts so we can allow certain scripts |
// to exist together. |
@@ -85,8 +298,7 @@ bool IsCompatibleWithASCIILetters(const std::string& lang) { |
// For now, just list Chinese, Japanese and Korean (positive list). |
// An alternative is negative-listing (languages using Greek and |
// Cyrillic letters), but it can be more dangerous. |
- return !lang.substr(0, 2).compare("zh") || |
- !lang.substr(0, 2).compare("ja") || |
+ return !lang.substr(0, 2).compare("zh") || !lang.substr(0, 2).compare("ja") || |
!lang.substr(0, 2).compare("ko"); |
} |
@@ -100,7 +312,7 @@ class LangToExemplarSet { |
private: |
LangToExemplarSetMap map; |
- LangToExemplarSet() { } |
+ LangToExemplarSet() {} |
~LangToExemplarSet() { |
STLDeleteContainerPairSecondPointers(map.begin(), map.end()); |
} |
@@ -124,21 +336,19 @@ bool GetExemplarSetForLang(const std::string& lang, |
return false; |
} |
-void SetExemplarSetForLang(const std::string& lang, |
- icu::UnicodeSet* lang_set) { |
+void SetExemplarSetForLang(const std::string& lang, icu::UnicodeSet* lang_set) { |
LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map; |
map.insert(std::make_pair(lang, lang_set)); |
} |
-static base::LazyInstance<base::Lock>::Leaky |
- g_lang_set_lock = LAZY_INSTANCE_INITIALIZER; |
+static base::LazyInstance<base::Lock>::Leaky g_lang_set_lock = |
+ LAZY_INSTANCE_INITIALIZER; |
// Returns true if all the characters in component_characters are used by |
// the language |lang|. |
bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters, |
const std::string& lang) { |
- CR_DEFINE_STATIC_LOCAL( |
- const icu::UnicodeSet, kASCIILetters, ('a', 'z')); |
+ CR_DEFINE_STATIC_LOCAL(const icu::UnicodeSet, kASCIILetters, ('a', 'z')); |
icu::UnicodeSet* lang_set = nullptr; |
// We're called from both the UI thread and the history thread. |
{ |
@@ -190,7 +400,8 @@ bool IsIDNComponentSafe(const base::char16* str, |
// reaching here.) |
// The original list is available at |
// http://kb.mozillazine.org/Network.IDN.blacklist_chars and |
- // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703 |
+ // at |
+ // http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703 |
UErrorCode status = U_ZERO_ERROR; |
#ifdef U_WCHAR_IS_UTF16 |
@@ -206,29 +417,35 @@ bool IsIDNComponentSafe(const base::char16* str, |
L"[\ufffa-\ufffd]\U0001f50f\U0001f510\U0001f512\U0001f513]"), |
status); |
DCHECK(U_SUCCESS(status)); |
- icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
- // Lone katakana no, so, or n |
- L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" |
- // Repeating Japanese accent characters |
- L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), |
+ icu::RegexMatcher dangerous_patterns( |
+ icu::UnicodeString( |
+ // Lone katakana no, so, or n |
+ L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" |
+ // Repeating Japanese accent characters |
+ L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), |
0, status); |
#else |
- icu::UnicodeSet dangerous_characters(icu::UnicodeString( |
- "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" |
- "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" |
- "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" |
- "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" |
- "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" |
- "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" |
- "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" |
- "[\\ufffa-\\ufffd]\\U0001f50f\\U0001f510\\U0001f512\\U0001f513]", -1, |
- US_INV), status); |
+ icu::UnicodeSet dangerous_characters( |
+ icu::UnicodeString( |
+ "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" |
+ "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" |
+ "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" |
+ "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" |
+ "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" |
+ "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe" |
+ "14" |
+ "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\uff" |
+ "f9]" |
+ "[\\ufffa-\\ufffd]\\U0001f50f\\U0001f510\\U0001f512\\U0001f513]", |
+ -1, US_INV), |
+ status); |
DCHECK(U_SUCCESS(status)); |
- icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
- // Lone katakana no, so, or n |
- "[^\\p{Katakana}][\\u30ce\\u30f3\\u30bd][^\\p{Katakana}]" |
- // Repeating Japanese accent characters |
- "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"), |
+ icu::RegexMatcher dangerous_patterns( |
+ icu::UnicodeString( |
+ // Lone katakana no, so, or n |
+ "[^\\p{Katakana}][\\u30ce\\u30f3\\u30bd][^\\p{Katakana}]" |
+ // Repeating Japanese accent characters |
+ "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"), |
0, status); |
#endif |
DCHECK(U_SUCCESS(status)); |
@@ -305,8 +522,8 @@ struct UIDNAWrapper { |
UIDNA* value; |
}; |
-static base::LazyInstance<UIDNAWrapper>::Leaky |
- g_uidna = LAZY_INSTANCE_INITIALIZER; |
+static base::LazyInstance<UIDNAWrapper>::Leaky g_uidna = |
+ LAZY_INSTANCE_INITIALIZER; |
// Converts one component of a host (between dots) to IDN if safe. The result |
// will be APPENDED to the given output string and will be the same as the input |
@@ -360,286 +577,39 @@ bool IDNToUnicodeOneComponent(const base::char16* comp, |
return false; |
} |
-// TODO(brettw) bug 734373: check the scripts for each host component and |
-// don't un-IDN-ize if there is more than one. Alternatively, only IDN for |
-// scripts that the user has installed. For now, just put the entire |
-// path through IDN. Maybe this feature can be implemented in ICU itself? |
-// |
-// We may want to skip this step in the case of file URLs to allow unicode |
-// UNC hostnames regardless of encodings. |
-base::string16 IDNToUnicodeWithAdjustments( |
- const std::string& host, |
- const std::string& languages, |
- base::OffsetAdjuster::Adjustments* adjustments) { |
- if (adjustments) |
- adjustments->clear(); |
- // Convert the ASCII input to a base::string16 for ICU. |
- base::string16 input16; |
- input16.reserve(host.length()); |
- input16.insert(input16.end(), host.begin(), host.end()); |
- |
- // Do each component of the host separately, since we enforce script matching |
- // on a per-component basis. |
- base::string16 out16; |
- { |
- for (size_t component_start = 0, component_end; |
- component_start < input16.length(); |
- component_start = component_end + 1) { |
- // Find the end of the component. |
- component_end = input16.find('.', component_start); |
- if (component_end == base::string16::npos) |
- component_end = input16.length(); // For getting the last component. |
- size_t component_length = component_end - component_start; |
- size_t new_component_start = out16.length(); |
- bool converted_idn = false; |
- if (component_end > component_start) { |
- // Add the substring that we just found. |
- converted_idn = IDNToUnicodeOneComponent( |
- input16.data() + component_start, component_length, languages, |
- &out16); |
- } |
- size_t new_component_length = out16.length() - new_component_start; |
- |
- if (converted_idn && adjustments) { |
- adjustments->push_back(base::OffsetAdjuster::Adjustment( |
- component_start, component_length, new_component_length)); |
- } |
- |
- // Need to add the dot we just found (if we found one). |
- if (component_end < input16.length()) |
- out16.push_back('.'); |
- } |
- } |
- return out16; |
-} |
- |
-// If |component| is valid, its begin is incremented by |delta|. |
-void AdjustComponent(int delta, url::Component* component) { |
- if (!component->is_valid()) |
- return; |
- |
- DCHECK(delta >= 0 || component->begin >= -delta); |
- component->begin += delta; |
-} |
- |
-// Adjusts all the components of |parsed| by |delta|, except for the scheme. |
-void AdjustAllComponentsButScheme(int delta, url::Parsed* parsed) { |
- AdjustComponent(delta, &(parsed->username)); |
- AdjustComponent(delta, &(parsed->password)); |
- AdjustComponent(delta, &(parsed->host)); |
- AdjustComponent(delta, &(parsed->port)); |
- AdjustComponent(delta, &(parsed->path)); |
- AdjustComponent(delta, &(parsed->query)); |
- AdjustComponent(delta, &(parsed->ref)); |
-} |
- |
-// Helper for FormatUrlWithOffsets(). |
-base::string16 FormatViewSourceUrl( |
- const GURL& url, |
- const std::string& languages, |
- FormatUrlTypes format_types, |
- UnescapeRule::Type unescape_rules, |
- url::Parsed* new_parsed, |
- size_t* prefix_end, |
- base::OffsetAdjuster::Adjustments* adjustments) { |
- DCHECK(new_parsed); |
- const char kViewSource[] = "view-source:"; |
- const size_t kViewSourceLength = arraysize(kViewSource) - 1; |
- |
- // Format the underlying URL and record adjustments. |
- const std::string& url_str(url.possibly_invalid_spec()); |
- adjustments->clear(); |
- base::string16 result(base::ASCIIToUTF16(kViewSource) + |
- FormatUrlWithAdjustments(GURL(url_str.substr(kViewSourceLength)), |
- languages, format_types, unescape_rules, |
- new_parsed, prefix_end, adjustments)); |
- // Revise |adjustments| by shifting to the offsets to prefix that the above |
- // call to FormatUrl didn't get to see. |
- for (base::OffsetAdjuster::Adjustments::iterator it = adjustments->begin(); |
- it != adjustments->end(); ++it) |
- it->original_offset += kViewSourceLength; |
- |
- // Adjust positions of the parsed components. |
- if (new_parsed->scheme.is_nonempty()) { |
- // Assume "view-source:real-scheme" as a scheme. |
- new_parsed->scheme.len += kViewSourceLength; |
- } else { |
- new_parsed->scheme.begin = 0; |
- new_parsed->scheme.len = kViewSourceLength - 1; |
- } |
- AdjustAllComponentsButScheme(kViewSourceLength, new_parsed); |
- |
- if (prefix_end) |
- *prefix_end += kViewSourceLength; |
- |
- return result; |
-} |
- |
-class AppendComponentTransform { |
- public: |
- AppendComponentTransform() {} |
- virtual ~AppendComponentTransform() {} |
- |
- virtual base::string16 Execute( |
- const std::string& component_text, |
- base::OffsetAdjuster::Adjustments* adjustments) const = 0; |
- |
- // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an |
- // accessible copy constructor in order to call AppendFormattedComponent() |
- // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). |
-}; |
- |
-class HostComponentTransform : public AppendComponentTransform { |
- public: |
- explicit HostComponentTransform(const std::string& languages) |
- : languages_(languages) { |
- } |
- |
- private: |
- base::string16 Execute( |
- const std::string& component_text, |
- base::OffsetAdjuster::Adjustments* adjustments) const override { |
- return IDNToUnicodeWithAdjustments(component_text, languages_, |
- adjustments); |
- } |
- |
- const std::string& languages_; |
-}; |
- |
-class NonHostComponentTransform : public AppendComponentTransform { |
- public: |
- explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules) |
- : unescape_rules_(unescape_rules) { |
- } |
- |
- private: |
- base::string16 Execute( |
- const std::string& component_text, |
- base::OffsetAdjuster::Adjustments* adjustments) const override { |
- return (unescape_rules_ == UnescapeRule::NONE) ? |
- base::UTF8ToUTF16WithAdjustments(component_text, adjustments) : |
- UnescapeAndDecodeUTF8URLComponentWithAdjustments(component_text, |
- unescape_rules_, adjustments); |
- } |
- |
- const UnescapeRule::Type unescape_rules_; |
-}; |
- |
-// Transforms the portion of |spec| covered by |original_component| according to |
-// |transform|. Appends the result to |output|. If |output_component| is |
-// non-NULL, its start and length are set to the transformed component's new |
-// start and length. If |adjustments| is non-NULL, appends adjustments (if |
-// any) that reflect the transformation the original component underwent to |
-// become the transformed value appended to |output|. |
-void AppendFormattedComponent(const std::string& spec, |
- const url::Component& original_component, |
- const AppendComponentTransform& transform, |
- base::string16* output, |
- url::Component* output_component, |
- base::OffsetAdjuster::Adjustments* adjustments) { |
- DCHECK(output); |
- if (original_component.is_nonempty()) { |
- size_t original_component_begin = |
- static_cast<size_t>(original_component.begin); |
- size_t output_component_begin = output->length(); |
- std::string component_str(spec, original_component_begin, |
- static_cast<size_t>(original_component.len)); |
- |
- // Transform |component_str| and modify |adjustments| appropriately. |
- base::OffsetAdjuster::Adjustments component_transform_adjustments; |
- output->append( |
- transform.Execute(component_str, &component_transform_adjustments)); |
- |
- // Shift all the adjustments made for this component so the offsets are |
- // valid for the original string and add them to |adjustments|. |
- for (base::OffsetAdjuster::Adjustments::iterator comp_iter = |
- component_transform_adjustments.begin(); |
- comp_iter != component_transform_adjustments.end(); ++comp_iter) |
- comp_iter->original_offset += original_component_begin; |
- if (adjustments) { |
- adjustments->insert(adjustments->end(), |
- component_transform_adjustments.begin(), |
- component_transform_adjustments.end()); |
- } |
- |
- // Set positions of the parsed component. |
- if (output_component) { |
- output_component->begin = static_cast<int>(output_component_begin); |
- output_component->len = |
- static_cast<int>(output->length() - output_component_begin); |
- } |
- } else if (output_component) { |
- output_component->reset(); |
- } |
-} |
- |
} // namespace |
-const FormatUrlType kFormatUrlOmitNothing = 0; |
-const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; |
-const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; |
+const FormatUrlType kFormatUrlOmitNothing = 0; |
+const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; |
+const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; |
const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; |
-const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | |
- kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; |
+const FormatUrlType kFormatUrlOmitAll = |
+ kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP | |
+ kFormatUrlOmitTrailingSlashOnBareHostname; |
-base::string16 IDNToUnicode(const std::string& host, |
- const std::string& languages) { |
- return IDNToUnicodeWithAdjustments(host, languages, NULL); |
-} |
- |
-std::string GetDirectoryListingEntry(const base::string16& name, |
- const std::string& raw_bytes, |
- bool is_dir, |
- int64_t size, |
- Time modified) { |
- std::string result; |
- result.append("<script>addRow("); |
- base::EscapeJSONString(name, true, &result); |
- result.append(","); |
- if (raw_bytes.empty()) { |
- base::EscapeJSONString(EscapePath(base::UTF16ToUTF8(name)), true, &result); |
- } else { |
- base::EscapeJSONString(EscapePath(raw_bytes), true, &result); |
- } |
- if (is_dir) { |
- result.append(",1,"); |
- } else { |
- result.append(",0,"); |
- } |
- |
- // Negative size means unknown or not applicable (e.g. directory). |
- base::string16 size_string; |
- if (size >= 0) |
- size_string = base::FormatBytesUnlocalized(size); |
- base::EscapeJSONString(size_string, true, &result); |
- |
- result.append(","); |
- |
- base::string16 modified_str; |
- // |modified| can be NULL in FTP listings. |
- if (!modified.is_null()) { |
- modified_str = base::TimeFormatShortDateAndTime(modified); |
- } |
- base::EscapeJSONString(modified_str, true, &result); |
- |
- result.append(");</script>\n"); |
- |
- return result; |
-} |
- |
-void AppendFormattedHost(const GURL& url, |
+base::string16 FormatUrl(const GURL& url, |
const std::string& languages, |
- base::string16* output) { |
- AppendFormattedComponent(url.possibly_invalid_spec(), |
- url.parsed_for_possibly_invalid_spec().host, |
- HostComponentTransform(languages), output, NULL, NULL); |
+ FormatUrlTypes format_types, |
+ net::UnescapeRule::Type unescape_rules, |
+ url::Parsed* new_parsed, |
+ size_t* prefix_end, |
+ size_t* offset_for_adjustment) { |
+ std::vector<size_t> offsets; |
+ if (offset_for_adjustment) |
+ offsets.push_back(*offset_for_adjustment); |
+ base::string16 result = |
+ FormatUrlWithOffsets(url, languages, format_types, unescape_rules, |
+ new_parsed, prefix_end, &offsets); |
+ if (offset_for_adjustment) |
+ *offset_for_adjustment = offsets[0]; |
+ return result; |
} |
base::string16 FormatUrlWithOffsets( |
const GURL& url, |
const std::string& languages, |
FormatUrlTypes format_types, |
- UnescapeRule::Type unescape_rules, |
+ net::UnescapeRule::Type unescape_rules, |
url::Parsed* new_parsed, |
size_t* prefix_end, |
std::vector<size_t>* offsets_for_adjustment) { |
@@ -650,8 +620,7 @@ base::string16 FormatUrlWithOffsets( |
base::OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment); |
if (offsets_for_adjustment) { |
std::for_each( |
- offsets_for_adjustment->begin(), |
- offsets_for_adjustment->end(), |
+ offsets_for_adjustment->begin(), offsets_for_adjustment->end(), |
base::LimitOffset<std::string>(format_url_return_value.length())); |
} |
return format_url_return_value; |
@@ -661,7 +630,7 @@ base::string16 FormatUrlWithAdjustments( |
const GURL& url, |
const std::string& languages, |
FormatUrlTypes format_types, |
- UnescapeRule::Type unescape_rules, |
+ net::UnescapeRule::Type unescape_rules, |
url::Parsed* new_parsed, |
size_t* prefix_end, |
base::OffsetAdjuster::Adjustments* adjustments) { |
@@ -681,9 +650,8 @@ base::string16 FormatUrlWithAdjustments( |
if (url.SchemeIs(kViewSource) && |
!base::StartsWith(url.possibly_invalid_spec(), kViewSourceTwice, |
base::CompareCase::INSENSITIVE_ASCII)) { |
- return FormatViewSourceUrl(url, languages, format_types, |
- unescape_rules, new_parsed, prefix_end, |
- adjustments); |
+ return FormatViewSourceUrl(url, languages, format_types, unescape_rules, |
+ new_parsed, prefix_end, adjustments); |
} |
// We handle both valid and invalid URLs (this will give us the spec |
@@ -698,7 +666,7 @@ base::string16 FormatUrlWithAdjustments( |
spec.begin() + parsed.CountCharactersBefore(url::Parsed::USERNAME, true)); |
const char kHTTP[] = "http://"; |
const char kFTP[] = "ftp."; |
- // url_fixer::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This |
+ // url_formatter::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This |
// means that if we trim "http://" off a URL whose host starts with "ftp." and |
// the user inputs this into any field subject to fixup (which is basically |
// all input fields), the meaning would be changed. (In fact, often the |
@@ -733,8 +701,7 @@ base::string16 FormatUrlWithAdjustments( |
// username/password. |
adjustments->push_back(base::OffsetAdjuster::Adjustment( |
static_cast<size_t>(nonempty_component->begin), |
- static_cast<size_t>(nonempty_component->len + 1), |
- 0)); |
+ static_cast<size_t>(nonempty_component->len + 1), 0)); |
} |
} |
} else { |
@@ -760,8 +727,7 @@ base::string16 FormatUrlWithAdjustments( |
if (parsed.port.is_nonempty()) { |
url_string.push_back(':'); |
new_parsed->port.begin = url_string.length(); |
- url_string.insert(url_string.end(), |
- spec.begin() + parsed.port.begin, |
+ url_string.insert(url_string.end(), spec.begin() + parsed.port.begin, |
spec.begin() + parsed.port.end()); |
new_parsed->port.len = url_string.length() - new_parsed->port.begin; |
} else { |
@@ -790,13 +756,12 @@ base::string16 FormatUrlWithAdjustments( |
if (parsed.ref.is_valid()) |
url_string.push_back('#'); |
AppendFormattedComponent(spec, parsed.ref, |
- NonHostComponentTransform(UnescapeRule::NONE), |
+ NonHostComponentTransform(net::UnescapeRule::NONE), |
&url_string, &new_parsed->ref, adjustments); |
// If we need to strip out http do it after the fact. |
- if (omit_http && |
- base::StartsWith(url_string, base::ASCIIToUTF16(kHTTP), |
- base::CompareCase::SENSITIVE)) { |
+ if (omit_http && base::StartsWith(url_string, base::ASCIIToUTF16(kHTTP), |
+ base::CompareCase::SENSITIVE)) { |
const size_t kHTTPSize = arraysize(kHTTP) - 1; |
url_string = url_string.substr(kHTTPSize); |
// Because offsets in the |adjustments| are already calculated with respect |
@@ -804,7 +769,7 @@ base::string16 FormatUrlWithAdjustments( |
// after stripping the prefix. The only thing necessary is to add an |
// adjustment to reflect the stripped prefix. |
adjustments->insert(adjustments->begin(), |
- base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); |
+ base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); |
if (prefix_end) |
*prefix_end -= kHTTPSize; |
@@ -819,21 +784,24 @@ base::string16 FormatUrlWithAdjustments( |
return url_string; |
} |
-base::string16 FormatUrl(const GURL& url, |
+bool CanStripTrailingSlash(const GURL& url) { |
+ // Omit the path only for standard, non-file URLs with nothing but "/" after |
+ // the hostname. |
+ return url.IsStandard() && !url.SchemeIsFile() && !url.SchemeIsFileSystem() && |
+ !url.has_query() && !url.has_ref() && url.path() == "/"; |
+} |
+ |
+void AppendFormattedHost(const GURL& url, |
const std::string& languages, |
- FormatUrlTypes format_types, |
- UnescapeRule::Type unescape_rules, |
- url::Parsed* new_parsed, |
- size_t* prefix_end, |
- size_t* offset_for_adjustment) { |
- Offsets offsets; |
- if (offset_for_adjustment) |
- offsets.push_back(*offset_for_adjustment); |
- base::string16 result = FormatUrlWithOffsets(url, languages, format_types, |
- unescape_rules, new_parsed, prefix_end, &offsets); |
- if (offset_for_adjustment) |
- *offset_for_adjustment = offsets[0]; |
- return result; |
+ base::string16* output) { |
+ AppendFormattedComponent( |
+ url.possibly_invalid_spec(), url.parsed_for_possibly_invalid_spec().host, |
+ HostComponentTransform(languages), output, NULL, NULL); |
+} |
+ |
+base::string16 IDNToUnicode(const std::string& host, |
+ const std::string& languages) { |
+ return IDNToUnicodeWithAdjustments(host, languages, NULL); |
} |
-} // namespace net |
+} // url_formatter |