Index: net/base/net_util_icu.cc |
diff --git a/net/base/net_util_icu.cc b/net/base/net_util_icu.cc |
index 4c5837566ff37f36341ae94a13d5ae25e34f4a99..14bd959019eb99f16904da54d43a3048f4896dfa 100644 |
--- a/net/base/net_util_icu.cc |
+++ b/net/base/net_util_icu.cc |
@@ -85,8 +85,7 @@ bool IsCompatibleWithASCIILetters(const std::string& lang) { |
// For now, just list Chinese, Japanese and Korean (positive list). |
// An alternative is negative-listing (languages using Greek and |
// Cyrillic letters), but it can be more dangerous. |
- return !lang.substr(0, 2).compare("zh") || |
- !lang.substr(0, 2).compare("ja") || |
+ return !lang.substr(0, 2).compare("zh") || !lang.substr(0, 2).compare("ja") || |
!lang.substr(0, 2).compare("ko"); |
} |
@@ -100,7 +99,7 @@ class LangToExemplarSet { |
private: |
LangToExemplarSetMap map; |
- LangToExemplarSet() { } |
+ LangToExemplarSet() {} |
~LangToExemplarSet() { |
STLDeleteContainerPairSecondPointers(map.begin(), map.end()); |
} |
@@ -124,21 +123,19 @@ bool GetExemplarSetForLang(const std::string& lang, |
return false; |
} |
-void SetExemplarSetForLang(const std::string& lang, |
- icu::UnicodeSet* lang_set) { |
+void SetExemplarSetForLang(const std::string& lang, icu::UnicodeSet* lang_set) { |
LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map; |
map.insert(std::make_pair(lang, lang_set)); |
} |
-static base::LazyInstance<base::Lock>::Leaky |
- g_lang_set_lock = LAZY_INSTANCE_INITIALIZER; |
+static base::LazyInstance<base::Lock>::Leaky g_lang_set_lock = |
+ LAZY_INSTANCE_INITIALIZER; |
// Returns true if all the characters in component_characters are used by |
// the language |lang|. |
bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters, |
const std::string& lang) { |
- CR_DEFINE_STATIC_LOCAL( |
- const icu::UnicodeSet, kASCIILetters, ('a', 'z')); |
+ CR_DEFINE_STATIC_LOCAL(const icu::UnicodeSet, kASCIILetters, ('a', 'z')); |
icu::UnicodeSet* lang_set = NULL; |
// We're called from both the UI thread and the history thread. |
{ |
@@ -154,9 +151,8 @@ bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters, |
// (issue 2078) |
// DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING); |
if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) { |
- lang_set = reinterpret_cast<icu::UnicodeSet *>( |
- ulocdata_getExemplarSet(uld, NULL, 0, |
- ULOCDATA_ES_STANDARD, &status)); |
+ lang_set = reinterpret_cast<icu::UnicodeSet*>(ulocdata_getExemplarSet( |
+ uld, NULL, 0, ULOCDATA_ES_STANDARD, &status)); |
// If |lang| is compatible with ASCII Latin letters, add them. |
if (IsCompatibleWithASCIILetters(lang)) |
lang_set->addAll(kASCIILetters); |
@@ -188,43 +184,56 @@ bool IsIDNComponentSafe(const base::char16* str, |
// reaching here.) |
// The original list is available at |
// http://kb.mozillazine.org/Network.IDN.blacklist_chars and |
- // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703 |
+ // at |
+ // http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703 |
UErrorCode status = U_ZERO_ERROR; |
#ifdef U_WCHAR_IS_UTF16 |
- icu::UnicodeSet dangerous_characters(icu::UnicodeString( |
- L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338" |
- L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" |
- L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" |
- L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" |
- L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" |
- L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" |
- L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" |
- L"[\ufffa-\ufffd]]"), status); |
+ icu::UnicodeSet dangerous_characters( |
+ icu::UnicodeString( |
+ L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338" |
+ L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" |
+ L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" |
+ L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" |
+ L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" |
+ L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" |
+ L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" |
+ L"[\ufffa-\ufffd]]"), |
+ status); |
DCHECK(U_SUCCESS(status)); |
- icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
- // Lone katakana no, so, or n |
- L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" |
- // Repeating Japanese accent characters |
- L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), |
- 0, status); |
+ icu::RegexMatcher dangerous_patterns( |
+ icu::UnicodeString( |
+ // Lone katakana no, so, or n |
+ L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" |
+ // Repeating Japanese accent characters |
+ L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), |
+ 0, |
+ status); |
#else |
- icu::UnicodeSet dangerous_characters(icu::UnicodeString( |
- "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" |
- "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" |
- "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" |
- "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" |
- "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" |
- "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" |
- "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" |
- "[\\ufffa-\\ufffd]]", -1, US_INV), status); |
+ icu::UnicodeSet dangerous_characters( |
+ icu::UnicodeString( |
+ "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" |
+ "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" |
+ "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" |
+ "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" |
+ "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" |
+ "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe" |
+ "14" |
+ "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\uff" |
+ "f9]" |
+ "[\\ufffa-\\ufffd]]", |
+ -1, |
+ US_INV), |
+ status); |
DCHECK(U_SUCCESS(status)); |
- icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
- // Lone katakana no, so, or n |
- "[^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]" |
- // Repeating Japanese accent characters |
- "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"), |
- 0, status); |
+ icu::RegexMatcher dangerous_patterns( |
+ icu::UnicodeString( |
+ // Lone katakana no, so, or n |
+ "[^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]" |
+ // Repeating Japanese accent characters |
+ "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"), |
+ 0, |
+ status); |
#endif |
DCHECK(U_SUCCESS(status)); |
icu::UnicodeSet component_characters; |
@@ -300,8 +309,8 @@ struct UIDNAWrapper { |
UIDNA* value; |
}; |
-static base::LazyInstance<UIDNAWrapper>::Leaky |
- g_uidna = LAZY_INSTANCE_INITIALIZER; |
+static base::LazyInstance<UIDNAWrapper>::Leaky g_uidna = |
+ LAZY_INSTANCE_INITIALIZER; |
// Converts one component of a host (between dots) to IDN if safe. The result |
// will be APPENDED to the given output string and will be the same as the input |
@@ -331,17 +340,21 @@ bool IDNToUnicodeOneComponent(const base::char16* comp, |
// This returns the actual length required. If this is more than 64 |
// code units, |status| will be U_BUFFER_OVERFLOW_ERROR and we'll try |
// the conversion again, but with a sufficiently large buffer. |
- output_length = uidna_labelToUnicode( |
- uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length], |
- output_length, &info, &status); |
+ output_length = uidna_labelToUnicode(uidna, |
+ comp, |
+ static_cast<int32_t>(comp_len), |
+ &(*out)[original_length], |
+ output_length, |
+ &info, |
+ &status); |
} while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0)); |
if (U_SUCCESS(status) && info.errors == 0) { |
// Converted successfully. Ensure that the converted component |
// can be safely displayed to the user. |
out->resize(original_length + output_length); |
- if (IsIDNComponentSafe(out->data() + original_length, output_length, |
- languages)) |
+ if (IsIDNComponentSafe( |
+ out->data() + original_length, output_length, languages)) |
return true; |
} |
@@ -389,9 +402,11 @@ base::string16 IDNToUnicodeWithAdjustments( |
bool converted_idn = false; |
if (component_end > component_start) { |
// Add the substring that we just found. |
- converted_idn = IDNToUnicodeOneComponent( |
- input16.data() + component_start, component_length, languages, |
- &out16); |
+ converted_idn = |
+ IDNToUnicodeOneComponent(input16.data() + component_start, |
+ component_length, |
+ languages, |
+ &out16); |
} |
size_t new_component_length = out16.length() - new_component_start; |
@@ -444,14 +459,20 @@ base::string16 FormatViewSourceUrl( |
// Format the underlying URL and record adjustments. |
const std::string& url_str(url.possibly_invalid_spec()); |
adjustments->clear(); |
- base::string16 result(base::ASCIIToUTF16(kViewSource) + |
+ base::string16 result( |
+ base::ASCIIToUTF16(kViewSource) + |
FormatUrlWithAdjustments(GURL(url_str.substr(kViewSourceLength)), |
- languages, format_types, unescape_rules, |
- new_parsed, prefix_end, adjustments)); |
+ languages, |
+ format_types, |
+ unescape_rules, |
+ new_parsed, |
+ prefix_end, |
+ adjustments)); |
// Revise |adjustments| by shifting to the offsets to prefix that the above |
// call to FormatUrl didn't get to see. |
for (base::OffsetAdjuster::Adjustments::iterator it = adjustments->begin(); |
- it != adjustments->end(); ++it) |
+ it != adjustments->end(); |
+ ++it) |
it->original_offset += kViewSourceLength; |
// Adjust positions of the parsed components. |
@@ -487,15 +508,13 @@ class AppendComponentTransform { |
class HostComponentTransform : public AppendComponentTransform { |
public: |
explicit HostComponentTransform(const std::string& languages) |
- : languages_(languages) { |
- } |
+ : languages_(languages) {} |
private: |
virtual base::string16 Execute( |
const std::string& component_text, |
base::OffsetAdjuster::Adjustments* adjustments) const OVERRIDE { |
- return IDNToUnicodeWithAdjustments(component_text, languages_, |
- adjustments); |
+ return IDNToUnicodeWithAdjustments(component_text, languages_, adjustments); |
} |
const std::string& languages_; |
@@ -504,17 +523,16 @@ class HostComponentTransform : public AppendComponentTransform { |
class NonHostComponentTransform : public AppendComponentTransform { |
public: |
explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules) |
- : unescape_rules_(unescape_rules) { |
- } |
+ : unescape_rules_(unescape_rules) {} |
private: |
virtual base::string16 Execute( |
const std::string& component_text, |
base::OffsetAdjuster::Adjustments* adjustments) const OVERRIDE { |
- return (unescape_rules_ == UnescapeRule::NONE) ? |
- base::UTF8ToUTF16WithAdjustments(component_text, adjustments) : |
- UnescapeAndDecodeUTF8URLComponentWithAdjustments(component_text, |
- unescape_rules_, adjustments); |
+ return (unescape_rules_ == UnescapeRule::NONE) |
+ ? base::UTF8ToUTF16WithAdjustments(component_text, adjustments) |
+ : UnescapeAndDecodeUTF8URLComponentWithAdjustments( |
+ component_text, unescape_rules_, adjustments); |
} |
const UnescapeRule::Type unescape_rules_; |
@@ -537,7 +555,8 @@ void AppendFormattedComponent(const std::string& spec, |
size_t original_component_begin = |
static_cast<size_t>(original_component.begin); |
size_t output_component_begin = output->length(); |
- std::string component_str(spec, original_component_begin, |
+ std::string component_str(spec, |
+ original_component_begin, |
static_cast<size_t>(original_component.len)); |
// Transform |component_str| and modify |adjustments| appropriately. |
@@ -548,8 +567,9 @@ void AppendFormattedComponent(const std::string& spec, |
// Shift all the adjustments made for this component so the offsets are |
// valid for the original string and add them to |adjustments|. |
for (base::OffsetAdjuster::Adjustments::iterator comp_iter = |
- component_transform_adjustments.begin(); |
- comp_iter != component_transform_adjustments.end(); ++comp_iter) |
+ component_transform_adjustments.begin(); |
+ comp_iter != component_transform_adjustments.end(); |
+ ++comp_iter) |
comp_iter->original_offset += original_component_begin; |
if (adjustments) { |
adjustments->insert(adjustments->end(), |
@@ -570,12 +590,13 @@ void AppendFormattedComponent(const std::string& spec, |
} // namespace |
-const FormatUrlType kFormatUrlOmitNothing = 0; |
-const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; |
-const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; |
+const FormatUrlType kFormatUrlOmitNothing = 0; |
+const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; |
+const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; |
const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; |
-const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | |
- kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; |
+const FormatUrlType kFormatUrlOmitAll = |
+ kFormatUrlOmitUsernamePassword | kFormatUrlOmitHTTP | |
+ kFormatUrlOmitTrailingSlashOnBareHostname; |
base::string16 IDNToUnicode(const std::string& host, |
const std::string& languages) { |
@@ -626,8 +647,11 @@ void AppendFormattedHost(const GURL& url, |
const std::string& languages, |
base::string16* output) { |
AppendFormattedComponent(url.possibly_invalid_spec(), |
- url.parsed_for_possibly_invalid_spec().host, |
- HostComponentTransform(languages), output, NULL, NULL); |
+ url.parsed_for_possibly_invalid_spec().host, |
+ HostComponentTransform(languages), |
+ output, |
+ NULL, |
+ NULL); |
} |
base::string16 FormatUrlWithOffsets( |
@@ -640,8 +664,13 @@ base::string16 FormatUrlWithOffsets( |
std::vector<size_t>* offsets_for_adjustment) { |
base::OffsetAdjuster::Adjustments adjustments; |
const base::string16& format_url_return_value = |
- FormatUrlWithAdjustments(url, languages, format_types, unescape_rules, |
- new_parsed, prefix_end, &adjustments); |
+ FormatUrlWithAdjustments(url, |
+ languages, |
+ format_types, |
+ unescape_rules, |
+ new_parsed, |
+ prefix_end, |
+ &adjustments); |
base::OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment); |
if (offsets_for_adjustment) { |
std::for_each( |
@@ -675,8 +704,12 @@ base::string16 FormatUrlWithAdjustments( |
const char* const kViewSourceTwice = "view-source:view-source:"; |
if (url.SchemeIs(kViewSource) && |
!StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { |
- return FormatViewSourceUrl(url, languages, format_types, |
- unescape_rules, new_parsed, prefix_end, |
+ return FormatViewSourceUrl(url, |
+ languages, |
+ format_types, |
+ unescape_rules, |
+ new_parsed, |
+ prefix_end, |
adjustments); |
} |
@@ -688,7 +721,8 @@ base::string16 FormatUrlWithAdjustments( |
// Scheme & separators. These are ASCII. |
base::string16 url_string; |
url_string.insert( |
- url_string.end(), spec.begin(), |
+ url_string.end(), |
+ spec.begin(), |
spec.begin() + parsed.CountCharactersBefore(url::Parsed::USERNAME, true)); |
const char kHTTP[] = "http://"; |
const char kFTP[] = "ftp."; |
@@ -699,8 +733,8 @@ base::string16 FormatUrlWithAdjustments( |
// formatted URL is directly pre-filled into an input field.) For this reason |
// we avoid stripping "http://" in this case. |
bool omit_http = (format_types & kFormatUrlOmitHTTP) && |
- EqualsASCII(url_string, kHTTP) && |
- !StartsWithASCII(url.host(), kFTP, true); |
+ EqualsASCII(url_string, kHTTP) && |
+ !StartsWithASCII(url.host(), kFTP, true); |
new_parsed->scheme = parsed.scheme; |
// Username & password. |
@@ -731,14 +765,20 @@ base::string16 FormatUrlWithAdjustments( |
} |
} |
} else { |
- AppendFormattedComponent(spec, parsed.username, |
+ AppendFormattedComponent(spec, |
+ parsed.username, |
NonHostComponentTransform(unescape_rules), |
- &url_string, &new_parsed->username, adjustments); |
+ &url_string, |
+ &new_parsed->username, |
+ adjustments); |
if (parsed.password.is_valid()) |
url_string.push_back(':'); |
- AppendFormattedComponent(spec, parsed.password, |
+ AppendFormattedComponent(spec, |
+ parsed.password, |
NonHostComponentTransform(unescape_rules), |
- &url_string, &new_parsed->password, adjustments); |
+ &url_string, |
+ &new_parsed->password, |
+ adjustments); |
if (parsed.username.is_valid() || parsed.password.is_valid()) |
url_string.push_back('@'); |
} |
@@ -746,8 +786,12 @@ base::string16 FormatUrlWithAdjustments( |
*prefix_end = static_cast<size_t>(url_string.length()); |
// Host. |
- AppendFormattedComponent(spec, parsed.host, HostComponentTransform(languages), |
- &url_string, &new_parsed->host, adjustments); |
+ AppendFormattedComponent(spec, |
+ parsed.host, |
+ HostComponentTransform(languages), |
+ &url_string, |
+ &new_parsed->host, |
+ adjustments); |
// Port. |
if (parsed.port.is_nonempty()) { |
@@ -764,9 +808,12 @@ base::string16 FormatUrlWithAdjustments( |
// Path & query. Both get the same general unescape & convert treatment. |
if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || |
!CanStripTrailingSlash(url)) { |
- AppendFormattedComponent(spec, parsed.path, |
+ AppendFormattedComponent(spec, |
+ parsed.path, |
NonHostComponentTransform(unescape_rules), |
- &url_string, &new_parsed->path, adjustments); |
+ &url_string, |
+ &new_parsed->path, |
+ adjustments); |
} else { |
if (parsed.path.len > 0) { |
adjustments->push_back(base::OffsetAdjuster::Adjustment( |
@@ -775,16 +822,22 @@ base::string16 FormatUrlWithAdjustments( |
} |
if (parsed.query.is_valid()) |
url_string.push_back('?'); |
- AppendFormattedComponent(spec, parsed.query, |
+ AppendFormattedComponent(spec, |
+ parsed.query, |
NonHostComponentTransform(unescape_rules), |
- &url_string, &new_parsed->query, adjustments); |
+ &url_string, |
+ &new_parsed->query, |
+ adjustments); |
// Ref. This is valid, unescaped UTF-8, so we can just convert. |
if (parsed.ref.is_valid()) |
url_string.push_back('#'); |
- AppendFormattedComponent(spec, parsed.ref, |
+ AppendFormattedComponent(spec, |
+ parsed.ref, |
NonHostComponentTransform(UnescapeRule::NONE), |
- &url_string, &new_parsed->ref, adjustments); |
+ &url_string, |
+ &new_parsed->ref, |
+ adjustments); |
// If we need to strip out http do it after the fact. |
if (omit_http && StartsWith(url_string, base::ASCIIToUTF16(kHTTP), true)) { |
@@ -795,7 +848,7 @@ base::string16 FormatUrlWithAdjustments( |
// after stripping the prefix. The only thing necessary is to add an |
// adjustment to reflect the stripped prefix. |
adjustments->insert(adjustments->begin(), |
- base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); |
+ base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); |
if (prefix_end) |
*prefix_end -= kHTTPSize; |
@@ -820,8 +873,13 @@ base::string16 FormatUrl(const GURL& url, |
Offsets offsets; |
if (offset_for_adjustment) |
offsets.push_back(*offset_for_adjustment); |
- base::string16 result = FormatUrlWithOffsets(url, languages, format_types, |
- unescape_rules, new_parsed, prefix_end, &offsets); |
+ base::string16 result = FormatUrlWithOffsets(url, |
+ languages, |
+ format_types, |
+ unescape_rules, |
+ new_parsed, |
+ prefix_end, |
+ &offsets); |
if (offset_for_adjustment) |
*offset_for_adjustment = offsets[0]; |
return result; |