Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1041)

Unified Diff: net/base/net_util.cc

Issue 6898026: Eliminate wstring from base/utf_offset_string_conversions.h, net/base/escape.h, and net/base/net_... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: net/base/net_util.cc
===================================================================
--- net/base/net_util.cc (revision 82762)
+++ net/base/net_util.cc (working copy)
@@ -155,40 +155,6 @@
22, // ssh
};
-template<typename STR>
-STR GetSpecificHeaderT(const STR& headers, const STR& name) {
- // We want to grab the Value from the "Key: Value" pairs in the headers,
- // which should look like this (no leading spaces, \n-separated) (we format
- // them this way in url_request_inet.cc):
- // HTTP/1.1 200 OK\n
- // ETag: "6d0b8-947-24f35ec0"\n
- // Content-Length: 2375\n
- // Content-Type: text/html; charset=UTF-8\n
- // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n
- if (headers.empty())
- return STR();
-
- STR match;
- match.push_back('\n');
- match.append(name);
- match.push_back(':');
-
- typename STR::const_iterator begin =
- search(headers.begin(), headers.end(), match.begin(), match.end(),
- base::CaseInsensitiveCompareASCII<typename STR::value_type>());
-
- if (begin == headers.end())
- return STR();
-
- begin += match.length();
-
- typename STR::const_iterator end = find(begin, headers.end(), '\n');
-
- STR ret;
- TrimWhitespace(STR(begin, end), TRIM_ALL, &ret);
- return ret;
-}
-
// Similar to Base64Decode. Decodes a Q-encoded string to a sequence
// of bytes. If input is invalid, return false.
bool QPDecode(const std::string& input, std::string* output) {
@@ -276,12 +242,12 @@
if (IsStringUTF8(encoded_word)) {
*output = encoded_word;
} else {
- std::wstring wide_output;
+ string16 utf16_output;
if (!referrer_charset.empty() &&
- base::CodepageToWide(encoded_word, referrer_charset.c_str(),
- base::OnStringConversionError::FAIL,
- &wide_output)) {
- *output = WideToUTF8(wide_output);
+ base::CodepageToUTF16(encoded_word, referrer_charset.c_str(),
+ base::OnStringConversionError::FAIL,
+ &utf16_output)) {
+ *output = UTF16ToUTF8(utf16_output);
} else {
*output = WideToUTF8(base::SysNativeMBToWide(encoded_word));
}
@@ -414,47 +380,6 @@
return true;
}
-// TODO(mpcomplete): This is a quick and dirty implementation for now. I'm
-// sure this doesn't properly handle all (most?) cases.
-template<typename STR>
-STR GetHeaderParamValueT(const STR& header, const STR& param_name,
- QuoteRule::Type quote_rule) {
- // This assumes args are formatted exactly like "bla; arg1=value; arg2=value".
- typename STR::const_iterator param_begin =
- search(header.begin(), header.end(), param_name.begin(), param_name.end(),
- base::CaseInsensitiveCompareASCII<typename STR::value_type>());
-
- if (param_begin == header.end())
- return STR();
- param_begin += param_name.length();
-
- STR whitespace;
- whitespace.push_back(' ');
- whitespace.push_back('\t');
- const typename STR::size_type equals_offset =
- header.find_first_not_of(whitespace, param_begin - header.begin());
- if (equals_offset == STR::npos || header.at(equals_offset) != '=')
- return STR();
-
- param_begin = header.begin() + equals_offset + 1;
- if (param_begin == header.end())
- return STR();
-
- typename STR::const_iterator param_end;
- if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) {
- ++param_begin; // skip past the quote.
- param_end = find(param_begin, header.end(), '"');
- // If the closing quote is missing, we will treat the rest of the
- // string as the parameter. We can't set |param_end| to the
- // location of the separator (';'), since the separator is
- // technically quoted. See: http://crbug.com/58840
- } else {
- param_end = find(param_begin+1, header.end(), ';');
- }
-
- return STR(param_begin, param_end);
-}
-
// Does some simple normalization of scripts so we can allow certain scripts
// to exist together.
// TODO(brettw) bug 880223: we should allow some other languages to be
@@ -593,7 +518,7 @@
// user.
bool IsIDNComponentSafe(const char16* str,
int str_len,
- const std::wstring& languages) {
+ const std::string& languages) {
// Most common cases (non-IDN) do not reach here so that we don't
// need a fast return path.
// TODO(jungshik) : Check if there's any character inappropriate
@@ -677,8 +602,7 @@
// the remainder.
component_characters.removeAll(common_characters);
- std::string languages_list(WideToASCII(languages));
- StringTokenizer t(languages_list, ",");
+ StringTokenizer t(languages, ",");
while (t.GetNext()) {
if (IsComponentCoveredByLang(component_characters, t.token()))
return true;
@@ -692,7 +616,7 @@
// conversion was performed.
bool IDNToUnicodeOneComponent(const char16* comp,
size_t comp_len,
- const std::wstring& languages,
+ const std::string& languages,
string16* out) {
DCHECK(out);
if (comp_len == 0)
@@ -734,15 +658,78 @@
return false;
}
+// TODO(brettw) bug 734373: check the scripts for each host component and
+// don't un-IDN-ize if there is more than one. Alternatively, only IDN for
+// scripts that the user has installed. For now, just put the entire
+// path through IDN. Maybe this feature can be implemented in ICU itself?
+//
+// We may want to skip this step in the case of file URLs to allow unicode
+// UNC hostnames regardless of encodings.
+string16 IDNToUnicodeWithOffsets(
+ const char* host,
+ size_t host_len,
+ const std::string& languages,
+ std::vector<size_t>* offsets_for_adjustment) {
+ // Convert the ASCII input to a string16 for ICU.
+ string16 input16;
+ input16.reserve(host_len);
+ input16.insert(input16.end(), host, host + host_len);
+
+ // Do each component of the host separately, since we enforce script matching
+ // on a per-component basis.
+ AdjustOffset::Adjustments adjustments;
+ string16 out16;
+ for (size_t component_start = 0, component_end;
+ component_start < input16.length();
+ component_start = component_end + 1) {
+ // Find the end of the component.
+ component_end = input16.find('.', component_start);
+ if (component_end == string16::npos)
+ component_end = input16.length(); // For getting the last component.
+ size_t component_length = component_end - component_start;
+ size_t new_component_start = out16.length();
+ bool converted_idn = false;
+ if (component_end > component_start) {
+ // Add the substring that we just found.
+ converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start,
+ component_length, languages, &out16);
+ }
+ size_t new_component_length = out16.length() - new_component_start;
+
+ if (converted_idn && offsets_for_adjustment) {
+ adjustments.push_back(AdjustOffset::Adjustment(
+ component_start, component_length, new_component_length));
+ }
+
+ // Need to add the dot we just found (if we found one).
+ if (component_end < input16.length())
+ out16.push_back('.');
+ }
+
+ // Make offset adjustment.
+ if (offsets_for_adjustment) {
+ if (!adjustments.empty()) {
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ AdjustOffset(adjustments));
+ }
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ LimitOffset<string16>(out16.length()));
+ }
+
+ return out16;
+}
+
struct SubtractFromOffset {
explicit SubtractFromOffset(size_t amount)
- : amount(amount) {}
+ : amount(amount) {}
void operator()(size_t& offset) {
- if (offset != std::wstring::npos) {
+ if (offset != string16::npos) {
if (offset >= amount)
offset -= amount;
else
- offset = std::wstring::npos;
+ offset = string16::npos;
}
}
@@ -751,9 +738,9 @@
struct AddToOffset {
explicit AddToOffset(size_t amount)
- : amount(amount) {}
+ : amount(amount) {}
void operator()(size_t& offset) {
- if (offset != std::wstring::npos)
+ if (offset != string16::npos)
offset += amount;
}
@@ -789,20 +776,20 @@
offsets_iter != offsets_for_adjustment->end();
++offsets_iter, ++host_offsets_iter) {
size_t offset = *offsets_iter;
- if (offset == std::wstring::npos || offset < section_begin) {
+ if (offset == string16::npos || offset < section_begin) {
// The offset is before the host section so leave it as is.
continue;
}
if (offset >= section_begin + old_section_len) {
// The offset is after the host section so adjust by host length delta.
offset += new_section_len - old_section_len;
- } else if (*host_offsets_iter != std::wstring::npos) {
+ } else if (*host_offsets_iter != string16::npos) {
// The offset is within the host and valid so adjust by the host
// reformatting offsets results.
offset = section_begin + *host_offsets_iter;
} else {
// The offset is invalid.
- offset = std::wstring::npos;
+ offset = string16::npos;
}
*offsets_iter = offset;
}
@@ -829,55 +816,47 @@
AdjustComponent(delta, &(parsed->ref));
}
-std::wstring FormatUrlInternal(const GURL& url,
- const std::wstring& languages,
- FormatUrlTypes format_types,
- UnescapeRule::Type unescape_rules,
- url_parse::Parsed* new_parsed,
- size_t* prefix_end,
- std::vector<size_t>* offsets_for_adjustment);
-
-// Helper for FormatUrl()/FormatUrlInternal().
-std::wstring FormatViewSourceUrl(const GURL& url,
- const std::wstring& languages,
- FormatUrlTypes format_types,
- UnescapeRule::Type unescape_rules,
- url_parse::Parsed* new_parsed,
- size_t* prefix_end,
- std::vector<size_t>* offsets_for_adjustment) {
+// Helper for FormatUrlWithOffsets().
+string16 FormatViewSourceUrl(const GURL& url,
+ const std::string& languages,
+ FormatUrlTypes format_types,
+ UnescapeRule::Type unescape_rules,
+ url_parse::Parsed* new_parsed,
+ size_t* prefix_end,
+ std::vector<size_t>* offsets_for_adjustment) {
DCHECK(new_parsed);
DCHECK(offsets_for_adjustment);
- const wchar_t* const kWideViewSource = L"view-source:";
- const size_t kViewSourceLengthPlus1 = 12;
+ const char kViewSource[] = "view-source:";
+ const size_t kViewSourceLength = arraysize(kViewSource) - 1;
std::vector<size_t> saved_offsets(*offsets_for_adjustment);
- GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1));
+ GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength));
// Clamp the offsets to the source area.
std::for_each(offsets_for_adjustment->begin(),
offsets_for_adjustment->end(),
- SubtractFromOffset(kViewSourceLengthPlus1));
- std::wstring result = FormatUrlInternal(real_url, languages, format_types,
+ SubtractFromOffset(kViewSourceLength));
+ string16 result = FormatUrlWithOffsets(real_url, languages, format_types,
unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);
- result.insert(0, kWideViewSource);
+ result.insert(0, ASCIIToUTF16(kViewSource));
// Adjust position values.
if (new_parsed->scheme.is_nonempty()) {
// Assume "view-source:real-scheme" as a scheme.
- new_parsed->scheme.len += kViewSourceLengthPlus1;
+ new_parsed->scheme.len += kViewSourceLength;
} else {
new_parsed->scheme.begin = 0;
- new_parsed->scheme.len = kViewSourceLengthPlus1 - 1;
+ new_parsed->scheme.len = kViewSourceLength - 1;
}
- AdjustComponents(kViewSourceLengthPlus1, new_parsed);
+ AdjustComponents(kViewSourceLength, new_parsed);
if (prefix_end)
- *prefix_end += kViewSourceLengthPlus1;
+ *prefix_end += kViewSourceLength;
std::for_each(offsets_for_adjustment->begin(),
offsets_for_adjustment->end(),
- AddToOffset(kViewSourceLengthPlus1));
- // Restore all offsets which were not affected by FormatUrlInternal.
+ AddToOffset(kViewSourceLength));
+ // Restore all offsets which were not affected by FormatUrlWithOffsets().
DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size());
for (size_t i = 0; i < saved_offsets.size(); ++i) {
- if (saved_offsets[i] < kViewSourceLengthPlus1)
+ if (saved_offsets[i] < kViewSourceLength)
(*offsets_for_adjustment)[i] = saved_offsets[i];
}
return result;
@@ -896,7 +875,7 @@
void AppendFormattedComponent(const std::string& spec,
const url_parse::Component& in_component,
UnescapeRule::Type unescape_rules,
- std::wstring* output,
+ string16* output,
url_parse::Component* out_component,
std::vector<size_t>* offsets_for_adjustment) {
DCHECK(output);
@@ -910,14 +889,13 @@
OffsetsIntoSection(offsets_for_adjustment, component_begin);
if (unescape_rules == UnescapeRule::NONE) {
- output->append(UTF8ToWideAndAdjustOffsets(
+ output->append(UTF8ToUTF16AndAdjustOffsets(
spec.substr(in_component.begin, in_component.len),
&offsets_into_component));
} else {
- output->append(UTF16ToWideHack(
- UnescapeAndDecodeUTF8URLComponentWithOffsets(
- spec.substr(in_component.begin, in_component.len), unescape_rules,
- &offsets_into_component)));
+ output->append(UnescapeAndDecodeUTF8URLComponentWithOffsets(
+ spec.substr(in_component.begin, in_component.len), unescape_rules,
+ &offsets_into_component));
}
size_t new_component_len = output->length() - component_begin;
out_component->len = static_cast<int>(new_component_len);
@@ -931,195 +909,6 @@
}
}
-// TODO(viettrungluu): This is really the old-fashioned version, made internal.
-// I need to really convert |FormatUrl()|.
-std::wstring FormatUrlInternal(const GURL& url,
- const std::wstring& languages,
- FormatUrlTypes format_types,
- UnescapeRule::Type unescape_rules,
- url_parse::Parsed* new_parsed,
- size_t* prefix_end,
- std::vector<size_t>* offsets_for_adjustment) {
- url_parse::Parsed parsed_temp;
- if (!new_parsed)
- new_parsed = &parsed_temp;
- else
- *new_parsed = url_parse::Parsed();
-
- std::vector<size_t> offsets_temp;
- if (!offsets_for_adjustment)
- offsets_for_adjustment = &offsets_temp;
-
- std::wstring url_string;
-
- // Check for empty URLs or 0 available text width.
- if (url.is_empty()) {
- if (prefix_end)
- *prefix_end = 0;
- std::for_each(offsets_for_adjustment->begin(),
- offsets_for_adjustment->end(),
- LimitOffset<std::wstring>(0));
- return url_string;
- }
-
- // Special handling for view-source:. Don't use chrome::kViewSourceScheme
- // because this library shouldn't depend on chrome.
- const char* const kViewSource = "view-source";
- // Reject "view-source:view-source:..." to avoid deep recursion.
- const char* const kViewSourceTwice = "view-source:view-source:";
- if (url.SchemeIs(kViewSource) &&
- !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {
- return FormatViewSourceUrl(url, languages, format_types,
- unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);
- }
-
- // We handle both valid and invalid URLs (this will give us the spec
- // regardless of validity).
- const std::string& spec = url.possibly_invalid_spec();
- const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
- size_t spec_length = spec.length();
- std::for_each(offsets_for_adjustment->begin(),
- offsets_for_adjustment->end(),
- LimitOffset<std::wstring>(spec_length));
-
- // Copy everything before the username (the scheme and the separators.)
- // These are ASCII.
- url_string.insert(url_string.end(), spec.begin(),
- spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME,
- true));
-
- const wchar_t kHTTP[] = L"http://";
- const char kFTP[] = "ftp.";
- // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This
- // means that if we trim "http://" off a URL whose host starts with "ftp." and
- // the user inputs this into any field subject to fixup (which is basically
- // all input fields), the meaning would be changed. (In fact, often the
- // formatted URL is directly pre-filled into an input field.) For this reason
- // we avoid stripping "http://" in this case.
- bool omit_http =
- (format_types & kFormatUrlOmitHTTP) && (url_string == kHTTP) &&
- (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0);
-
- new_parsed->scheme = parsed.scheme;
-
- if ((format_types & kFormatUrlOmitUsernamePassword) != 0) {
- // Remove the username and password fields. We don't want to display those
- // to the user since they can be used for attacks,
- // e.g. "http://google.com:search@evil.ru/"
- new_parsed->username.reset();
- new_parsed->password.reset();
- // Update the offsets based on removed username and/or password.
- if (!offsets_for_adjustment->empty() &&
- (parsed.username.is_nonempty() || parsed.password.is_nonempty())) {
- AdjustOffset::Adjustments adjustments;
- if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {
- // The seeming off-by-one and off-by-two in these first two lines are to
- // account for the ':' after the username and '@' after the password.
- adjustments.push_back(AdjustOffset::Adjustment(
- static_cast<size_t>(parsed.username.begin),
- static_cast<size_t>(parsed.username.len + parsed.password.len +
- 2), 0));
- } else {
- const url_parse::Component* nonempty_component =
- parsed.username.is_nonempty() ? &parsed.username : &parsed.password;
- // The seeming off-by-one in below is to account for the '@' after the
- // username/password.
- adjustments.push_back(AdjustOffset::Adjustment(
- static_cast<size_t>(nonempty_component->begin),
- static_cast<size_t>(nonempty_component->len + 1), 0));
- }
-
- // Make offset adjustment.
- std::for_each(offsets_for_adjustment->begin(),
- offsets_for_adjustment->end(),
- AdjustOffset(adjustments));
- }
- } else {
- AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string,
- &new_parsed->username, offsets_for_adjustment);
- if (parsed.password.is_valid())
- url_string.push_back(':');
- AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string,
- &new_parsed->password, offsets_for_adjustment);
- if (parsed.username.is_valid() || parsed.password.is_valid())
- url_string.push_back('@');
- }
- if (prefix_end)
- *prefix_end = static_cast<size_t>(url_string.length());
-
- AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed,
- offsets_for_adjustment);
-
- // Port.
- if (parsed.port.is_nonempty()) {
- url_string.push_back(':');
- new_parsed->port.begin = url_string.length();
- url_string.insert(url_string.end(),
- spec.begin() + parsed.port.begin,
- spec.begin() + parsed.port.end());
- new_parsed->port.len = url_string.length() - new_parsed->port.begin;
- } else {
- new_parsed->port.reset();
- }
-
- // Path and query both get the same general unescape & convert treatment.
- if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) ||
- !CanStripTrailingSlash(url)) {
- AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,
- &new_parsed->path, offsets_for_adjustment);
- }
- if (parsed.query.is_valid())
- url_string.push_back('?');
- AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string,
- &new_parsed->query, offsets_for_adjustment);
-
- // Reference is stored in valid, unescaped UTF-8, so we can just convert.
- if (parsed.ref.is_valid()) {
- url_string.push_back('#');
- size_t ref_begin = url_string.length();
- new_parsed->ref.begin = static_cast<int>(ref_begin);
-
- // Compose a list of offsets within the section.
- std::vector<size_t> offsets_into_ref =
- OffsetsIntoSection(offsets_for_adjustment, ref_begin);
-
- if (parsed.ref.len > 0) {
- url_string.append(UTF8ToWideAndAdjustOffsets(spec.substr(parsed.ref.begin,
- parsed.ref.len),
- &offsets_into_ref));
- }
- size_t old_ref_len = static_cast<size_t>(parsed.ref.len);
- size_t new_ref_len = url_string.length() - new_parsed->ref.begin;
- new_parsed->ref.len = static_cast<int>(new_ref_len);
-
- // Apply offset adjustments.
- ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment,
- old_ref_len, new_ref_len, ref_begin);
- }
-
- // If we need to strip out http do it after the fact. This way we don't need
- // to worry about how offset_for_adjustment is interpreted.
- const size_t kHTTPSize = arraysize(kHTTP) - 1;
- if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) {
- url_string = url_string.substr(kHTTPSize);
- AdjustOffset::Adjustments adjustments;
- adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0));
- std::for_each(offsets_for_adjustment->begin(),
- offsets_for_adjustment->end(),
- AdjustOffset(adjustments));
- if (prefix_end)
- *prefix_end -= kHTTPSize;
-
- // Adjust new_parsed.
- DCHECK(new_parsed->scheme.is_valid());
- int delta = -(new_parsed->scheme.len + 3); // +3 for ://.
- new_parsed->scheme.reset();
- AdjustComponents(delta, new_parsed);
- }
-
- return url_string;
-}
-
} // namespace
const FormatUrlType kFormatUrlOmitNothing = 0;
@@ -1163,14 +952,34 @@
return GURL(url_string);
}
-std::wstring GetSpecificHeader(const std::wstring& headers,
- const std::wstring& name) {
- return GetSpecificHeaderT(headers, name);
-}
-
std::string GetSpecificHeader(const std::string& headers,
const std::string& name) {
- return GetSpecificHeaderT(headers, name);
+ // We want to grab the Value from the "Key: Value" pairs in the headers,
+ // which should look like this (no leading spaces, \n-separated) (we format
+ // them this way in url_request_inet.cc):
+ // HTTP/1.1 200 OK\n
+ // ETag: "6d0b8-947-24f35ec0"\n
+ // Content-Length: 2375\n
+ // Content-Type: text/html; charset=UTF-8\n
+ // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n
+ if (headers.empty())
+ return std::string();
+
+ std::string match('\n' + name + ':');
+
+ std::string::const_iterator begin =
+ search(headers.begin(), headers.end(), match.begin(), match.end(),
+ base::CaseInsensitiveCompareASCII<char>());
+
+ if (begin == headers.end())
+ return std::string();
+
+ begin += match.length();
+
+ std::string ret;
+ TrimWhitespace(std::string(begin, find(begin, headers.end(), '\n')), TRIM_ALL,
+ &ret);
+ return ret;
}
bool DecodeCharset(const std::string& input,
@@ -1245,88 +1054,50 @@
return std::string();
}
-std::wstring GetHeaderParamValue(const std::wstring& field,
- const std::wstring& param_name,
- QuoteRule::Type quote_rule) {
- return GetHeaderParamValueT(field, param_name, quote_rule);
-}
-
-std::string GetHeaderParamValue(const std::string& field,
+// TODO(mpcomplete): This is a quick and dirty implementation for now. I'm
+// sure this doesn't properly handle all (most?) cases.
+std::string GetHeaderParamValue(const std::string& header,
const std::string& param_name,
QuoteRule::Type quote_rule) {
- return GetHeaderParamValueT(field, param_name, quote_rule);
-}
+ // This assumes args are formatted exactly like "bla; arg1=value; arg2=value".
+ std::string::const_iterator param_begin =
+ search(header.begin(), header.end(), param_name.begin(), param_name.end(),
+ base::CaseInsensitiveCompareASCII<char>());
-// TODO(brettw) bug 734373: check the scripts for each host component and
-// don't un-IDN-ize if there is more than one. Alternatively, only IDN for
-// scripts that the user has installed. For now, just put the entire
-// path through IDN. Maybe this feature can be implemented in ICU itself?
-//
-// We may want to skip this step in the case of file URLs to allow unicode
-// UNC hostnames regardless of encodings.
-std::wstring IDNToUnicodeWithOffsets(
- const char* host,
- size_t host_len,
- const std::wstring& languages,
- std::vector<size_t>* offsets_for_adjustment) {
- // Convert the ASCII input to a wide string for ICU.
- string16 input16;
- input16.reserve(host_len);
- input16.insert(input16.end(), host, host + host_len);
+ if (param_begin == header.end())
+ return std::string();
+ param_begin += param_name.length();
- // Do each component of the host separately, since we enforce script matching
- // on a per-component basis.
- AdjustOffset::Adjustments adjustments;
- string16 out16;
- for (size_t component_start = 0, component_end;
- component_start < input16.length();
- component_start = component_end + 1) {
- // Find the end of the component.
- component_end = input16.find('.', component_start);
- if (component_end == string16::npos)
- component_end = input16.length(); // For getting the last component.
- size_t component_length = component_end - component_start;
- size_t new_component_start = out16.length();
- bool converted_idn = false;
- if (component_end > component_start) {
- // Add the substring that we just found.
- converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start,
- component_length, languages, &out16);
- }
- size_t new_component_length = out16.length() - new_component_start;
+ std::string whitespace(" \t");
+ size_t equals_offset =
+ header.find_first_not_of(whitespace, param_begin - header.begin());
+ if (equals_offset == std::string::npos || header[equals_offset] != '=')
+ return std::string();
- if (converted_idn && offsets_for_adjustment) {
- adjustments.push_back(AdjustOffset::Adjustment(
- component_start, component_length, new_component_length));
- }
+ param_begin = header.begin() + equals_offset + 1;
+ if (param_begin == header.end())
+ return std::string();
- // Need to add the dot we just found (if we found one).
- if (component_end < input16.length())
- out16.push_back('.');
+ std::string::const_iterator param_end;
+ if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) {
+ ++param_begin; // skip past the quote.
+ param_end = find(param_begin, header.end(), '"');
+ // If the closing quote is missing, we will treat the rest of the
+ // string as the parameter. We can't set |param_end| to the
+ // location of the separator (';'), since the separator is
+ // technically quoted. See: http://crbug.com/58840
+ } else {
+ param_end = find(param_begin + 1, header.end(), ';');
}
- // Make offset adjustment.
- if (offsets_for_adjustment && !adjustments.empty()) {
- std::for_each(offsets_for_adjustment->begin(),
- offsets_for_adjustment->end(),
- AdjustOffset(adjustments));
- }
-
- return UTF16ToWideAndAdjustOffsets(out16, offsets_for_adjustment);
+ return std::string(param_begin, param_end);
}
-std::wstring IDNToUnicode(const char* host,
- size_t host_len,
- const std::wstring& languages,
- size_t* offset_for_adjustment) {
+string16 IDNToUnicode(const char* host,
+ size_t host_len,
+ const std::string& languages) {
std::vector<size_t> offsets;
- if (offset_for_adjustment)
- offsets.push_back(*offset_for_adjustment);
- std::wstring result =
- IDNToUnicodeWithOffsets(host, host_len, languages, &offsets);
- if (offset_for_adjustment)
- *offset_for_adjustment = offsets[0];
- return result;
+ return IDNToUnicodeWithOffsets(host, host_len, languages, &offsets);
}
std::string CanonicalizeHost(const std::string& host,
@@ -1352,13 +1123,6 @@
return canon_host;
}
-std::string CanonicalizeHost(const std::wstring& host,
- url_canon::CanonHostInfo* host_info) {
- std::string converted_host;
- WideToUTF8(host.c_str(), host.length(), &converted_host);
- return CanonicalizeHost(converted_host, host_info);
-}
-
std::string GetDirectoryListingHeader(const string16& title) {
static const base::StringPiece header(
NetModule::GetResource(IDR_DIR_HEADER_HTML));
@@ -1741,8 +1505,8 @@
void AppendFormattedHostWithOffsets(
const GURL& url,
- const std::wstring& languages,
- std::wstring* output,
+ const std::string& languages,
+ string16* output,
url_parse::Parsed* new_parsed,
std::vector<size_t>* offsets_for_adjustment) {
DCHECK(output);
@@ -1765,7 +1529,7 @@
((spec.length() == 0 && host.begin == 0) ||
host.begin < static_cast<int>(spec.length())));
output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len,
- languages, &offsets_into_host));
+ languages, &offsets_into_host));
size_t new_host_len = output->length() - host_begin;
if (new_parsed)
@@ -1780,8 +1544,8 @@
}
void AppendFormattedHost(const GURL& url,
- const std::wstring& languages,
- std::wstring* output,
+ const std::string& languages,
+ string16* output,
url_parse::Parsed* new_parsed,
size_t* offset_for_adjustment) {
std::vector<size_t> offsets;
@@ -1792,7 +1556,6 @@
*offset_for_adjustment = offsets[0];
}
-// TODO(viettrungluu): convert the wstring |FormatUrlInternal()|.
string16 FormatUrlWithOffsets(const GURL& url,
const std::string& languages,
FormatUrlTypes format_types,
@@ -1800,10 +1563,183 @@
url_parse::Parsed* new_parsed,
size_t* prefix_end,
std::vector<size_t>* offsets_for_adjustment) {
- return WideToUTF16Hack(
- FormatUrlInternal(url, ASCIIToWide(languages), format_types,
- unescape_rules, new_parsed, prefix_end,
- offsets_for_adjustment));
+ url_parse::Parsed parsed_temp;
brettw 2011/04/25 16:19:44 I'm assuming you just moved this code and changed
Peter Kasting 2011/04/25 17:44:52 The only non-trivial change was to change kHTTP fr
+ if (!new_parsed)
+ new_parsed = &parsed_temp;
+ else
+ *new_parsed = url_parse::Parsed();
+
+ std::vector<size_t> offsets_temp;
+ if (!offsets_for_adjustment)
+ offsets_for_adjustment = &offsets_temp;
+
+ string16 url_string;
+
+ // Check for empty URLs or 0 available text width.
+ if (url.is_empty()) {
+ if (prefix_end)
+ *prefix_end = 0;
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ LimitOffset<string16>(0));
+ return url_string;
+ }
+
+ // Special handling for view-source:. Don't use chrome::kViewSourceScheme
+ // because this library shouldn't depend on chrome.
+ const char* const kViewSource = "view-source";
+ // Reject "view-source:view-source:..." to avoid deep recursion.
+ const char* const kViewSourceTwice = "view-source:view-source:";
+ if (url.SchemeIs(kViewSource) &&
+ !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {
+ return FormatViewSourceUrl(url, languages, format_types,
+ unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);
+ }
+
+ // We handle both valid and invalid URLs (this will give us the spec
+ // regardless of validity).
+ const std::string& spec = url.possibly_invalid_spec();
+ const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
+ size_t spec_length = spec.length();
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ LimitOffset<string16>(spec_length));
+
+ // Copy everything before the username (the scheme and the separators.)
+ // These are ASCII.
+ url_string.insert(url_string.end(), spec.begin(),
+ spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME,
+ true));
+
+ string16 kHTTP = ASCIIToUTF16("http://");
Avi (use Gerrit) 2011/04/25 17:52:07 eww. const char like kFTP below.
+ const char kFTP[] = "ftp.";
+ // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This
+ // means that if we trim "http://" off a URL whose host starts with "ftp." and
+ // the user inputs this into any field subject to fixup (which is basically
+ // all input fields), the meaning would be changed. (In fact, often the
+ // formatted URL is directly pre-filled into an input field.) For this reason
+ // we avoid stripping "http://" in this case.
+ bool omit_http = (format_types & kFormatUrlOmitHTTP) &&
+ (url_string == kHTTP) &&
+ (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0);
Avi (use Gerrit) 2011/04/25 17:52:07 Can you use string_util's LowerCaseEqualsASCII?
brettw 2011/04/25 17:56:28 The host name will be canonicalized so this isn't
+
+ new_parsed->scheme = parsed.scheme;
+
+ if ((format_types & kFormatUrlOmitUsernamePassword) != 0) {
+ // Remove the username and password fields. We don't want to display those
+ // to the user since they can be used for attacks,
+ // e.g. "http://google.com:search@evil.ru/"
+ new_parsed->username.reset();
+ new_parsed->password.reset();
+ // Update the offsets based on removed username and/or password.
+ if (!offsets_for_adjustment->empty() &&
+ (parsed.username.is_nonempty() || parsed.password.is_nonempty())) {
+ AdjustOffset::Adjustments adjustments;
+ if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {
+ // The seeming off-by-one and off-by-two in these first two lines are to
+ // account for the ':' after the username and '@' after the password.
+ adjustments.push_back(AdjustOffset::Adjustment(
+ static_cast<size_t>(parsed.username.begin),
+ static_cast<size_t>(parsed.username.len + parsed.password.len +
+ 2), 0));
+ } else {
+ const url_parse::Component* nonempty_component =
+ parsed.username.is_nonempty() ? &parsed.username : &parsed.password;
+ // The seeming off-by-one in below is to account for the '@' after the
+ // username/password.
+ adjustments.push_back(AdjustOffset::Adjustment(
+ static_cast<size_t>(nonempty_component->begin),
+ static_cast<size_t>(nonempty_component->len + 1), 0));
+ }
+
+ // Make offset adjustment.
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ AdjustOffset(adjustments));
+ }
+ } else {
+ AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string,
+ &new_parsed->username, offsets_for_adjustment);
+ if (parsed.password.is_valid())
+ url_string.push_back(':');
+ AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string,
+ &new_parsed->password, offsets_for_adjustment);
+ if (parsed.username.is_valid() || parsed.password.is_valid())
+ url_string.push_back('@');
+ }
+ if (prefix_end)
+ *prefix_end = static_cast<size_t>(url_string.length());
+
+ AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed,
+ offsets_for_adjustment);
+
+ // Port.
+ if (parsed.port.is_nonempty()) {
+ url_string.push_back(':');
+ new_parsed->port.begin = url_string.length();
+ url_string.insert(url_string.end(),
+ spec.begin() + parsed.port.begin,
+ spec.begin() + parsed.port.end());
+ new_parsed->port.len = url_string.length() - new_parsed->port.begin;
+ } else {
+ new_parsed->port.reset();
+ }
+
+ // Path and query both get the same general unescape & convert treatment.
+ if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) ||
+ !CanStripTrailingSlash(url)) {
+ AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,
+ &new_parsed->path, offsets_for_adjustment);
+ }
+ if (parsed.query.is_valid())
+ url_string.push_back('?');
+ AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string,
+ &new_parsed->query, offsets_for_adjustment);
+
+ // Reference is stored in valid, unescaped UTF-8, so we can just convert.
+ if (parsed.ref.is_valid()) {
+ url_string.push_back('#');
+ size_t ref_begin = url_string.length();
+ new_parsed->ref.begin = static_cast<int>(ref_begin);
+
+ // Compose a list of offsets within the section.
+ std::vector<size_t> offsets_into_ref =
+ OffsetsIntoSection(offsets_for_adjustment, ref_begin);
+
+ if (parsed.ref.len > 0) {
+ url_string.append(UTF8ToUTF16AndAdjustOffsets(
+ spec.substr(parsed.ref.begin, parsed.ref.len), &offsets_into_ref));
+ }
+ size_t old_ref_len = static_cast<size_t>(parsed.ref.len);
+ size_t new_ref_len = url_string.length() - new_parsed->ref.begin;
+ new_parsed->ref.len = static_cast<int>(new_ref_len);
+
+ // Apply offset adjustments.
+ ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment,
+ old_ref_len, new_ref_len, ref_begin);
+ }
+
+ // If we need to strip out http do it after the fact. This way we don't need
+ // to worry about how offset_for_adjustment is interpreted.
+ const size_t kHTTPSize = kHTTP.length();
+ if (omit_http && !url_string.compare(0, kHTTP.length(), kHTTP)) {
+ url_string = url_string.substr(kHTTPSize);
+ AdjustOffset::Adjustments adjustments;
+ adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0));
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ AdjustOffset(adjustments));
+ if (prefix_end)
+ *prefix_end -= kHTTPSize;
+
+ // Adjust new_parsed.
+ DCHECK(new_parsed->scheme.is_valid());
+ int delta = -(new_parsed->scheme.len + 3); // +3 for ://.
+ new_parsed->scheme.reset();
+ AdjustComponents(delta, new_parsed);
+ }
+
+ return url_string;
}
string16 FormatUrl(const GURL& url,
@@ -1816,9 +1752,8 @@
std::vector<size_t> offsets;
if (offset_for_adjustment)
offsets.push_back(*offset_for_adjustment);
- string16 result = WideToUTF16Hack(
- FormatUrlInternal(url, ASCIIToWide(languages), format_types,
- unescape_rules, new_parsed, prefix_end, &offsets));
+ string16 result = FormatUrlWithOffsets(url, languages, format_types,
+ unescape_rules, new_parsed, prefix_end, &offsets);
if (offset_for_adjustment)
*offset_for_adjustment = offsets[0];
return result;
@@ -2279,7 +2214,7 @@
size_t ClampComponentOffset::operator()(size_t offset) {
return (offset >= component_start) ?
- offset : std::wstring::npos;
+ offset : string16::npos;
}
} // namespace net
« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698