Chromium Code Reviews| Index: net/base/net_util.cc |
| =================================================================== |
| --- net/base/net_util.cc (revision 82762) |
| +++ net/base/net_util.cc (working copy) |
| @@ -155,40 +155,6 @@ |
| 22, // ssh |
| }; |
| -template<typename STR> |
| -STR GetSpecificHeaderT(const STR& headers, const STR& name) { |
| - // We want to grab the Value from the "Key: Value" pairs in the headers, |
| - // which should look like this (no leading spaces, \n-separated) (we format |
| - // them this way in url_request_inet.cc): |
| - // HTTP/1.1 200 OK\n |
| - // ETag: "6d0b8-947-24f35ec0"\n |
| - // Content-Length: 2375\n |
| - // Content-Type: text/html; charset=UTF-8\n |
| - // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n |
| - if (headers.empty()) |
| - return STR(); |
| - |
| - STR match; |
| - match.push_back('\n'); |
| - match.append(name); |
| - match.push_back(':'); |
| - |
| - typename STR::const_iterator begin = |
| - search(headers.begin(), headers.end(), match.begin(), match.end(), |
| - base::CaseInsensitiveCompareASCII<typename STR::value_type>()); |
| - |
| - if (begin == headers.end()) |
| - return STR(); |
| - |
| - begin += match.length(); |
| - |
| - typename STR::const_iterator end = find(begin, headers.end(), '\n'); |
| - |
| - STR ret; |
| - TrimWhitespace(STR(begin, end), TRIM_ALL, &ret); |
| - return ret; |
| -} |
| - |
| // Similar to Base64Decode. Decodes a Q-encoded string to a sequence |
| // of bytes. If input is invalid, return false. |
| bool QPDecode(const std::string& input, std::string* output) { |
| @@ -276,12 +242,12 @@ |
| if (IsStringUTF8(encoded_word)) { |
| *output = encoded_word; |
| } else { |
| - std::wstring wide_output; |
| + string16 utf16_output; |
| if (!referrer_charset.empty() && |
| - base::CodepageToWide(encoded_word, referrer_charset.c_str(), |
| - base::OnStringConversionError::FAIL, |
| - &wide_output)) { |
| - *output = WideToUTF8(wide_output); |
| + base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), |
| + base::OnStringConversionError::FAIL, |
| + &utf16_output)) { |
| + *output = UTF16ToUTF8(utf16_output); |
| } else { |
| *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
| } |
| @@ -414,47 +380,6 @@ |
| return true; |
| } |
| -// TODO(mpcomplete): This is a quick and dirty implementation for now. I'm |
| -// sure this doesn't properly handle all (most?) cases. |
| -template<typename STR> |
| -STR GetHeaderParamValueT(const STR& header, const STR& param_name, |
| - QuoteRule::Type quote_rule) { |
| - // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". |
| - typename STR::const_iterator param_begin = |
| - search(header.begin(), header.end(), param_name.begin(), param_name.end(), |
| - base::CaseInsensitiveCompareASCII<typename STR::value_type>()); |
| - |
| - if (param_begin == header.end()) |
| - return STR(); |
| - param_begin += param_name.length(); |
| - |
| - STR whitespace; |
| - whitespace.push_back(' '); |
| - whitespace.push_back('\t'); |
| - const typename STR::size_type equals_offset = |
| - header.find_first_not_of(whitespace, param_begin - header.begin()); |
| - if (equals_offset == STR::npos || header.at(equals_offset) != '=') |
| - return STR(); |
| - |
| - param_begin = header.begin() + equals_offset + 1; |
| - if (param_begin == header.end()) |
| - return STR(); |
| - |
| - typename STR::const_iterator param_end; |
| - if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) { |
| - ++param_begin; // skip past the quote. |
| - param_end = find(param_begin, header.end(), '"'); |
| - // If the closing quote is missing, we will treat the rest of the |
| - // string as the parameter. We can't set |param_end| to the |
| - // location of the separator (';'), since the separator is |
| - // technically quoted. See: http://crbug.com/58840 |
| - } else { |
| - param_end = find(param_begin+1, header.end(), ';'); |
| - } |
| - |
| - return STR(param_begin, param_end); |
| -} |
| - |
| // Does some simple normalization of scripts so we can allow certain scripts |
| // to exist together. |
| // TODO(brettw) bug 880223: we should allow some other languages to be |
| @@ -593,7 +518,7 @@ |
| // user. |
| bool IsIDNComponentSafe(const char16* str, |
| int str_len, |
| - const std::wstring& languages) { |
| + const std::string& languages) { |
| // Most common cases (non-IDN) do not reach here so that we don't |
| // need a fast return path. |
| // TODO(jungshik) : Check if there's any character inappropriate |
| @@ -677,8 +602,7 @@ |
| // the remainder. |
| component_characters.removeAll(common_characters); |
| - std::string languages_list(WideToASCII(languages)); |
| - StringTokenizer t(languages_list, ","); |
| + StringTokenizer t(languages, ","); |
| while (t.GetNext()) { |
| if (IsComponentCoveredByLang(component_characters, t.token())) |
| return true; |
| @@ -692,7 +616,7 @@ |
| // conversion was performed. |
| bool IDNToUnicodeOneComponent(const char16* comp, |
| size_t comp_len, |
| - const std::wstring& languages, |
| + const std::string& languages, |
| string16* out) { |
| DCHECK(out); |
| if (comp_len == 0) |
| @@ -734,15 +658,78 @@ |
| return false; |
| } |
| +// TODO(brettw) bug 734373: check the scripts for each host component and |
| +// don't un-IDN-ize if there is more than one. Alternatively, only IDN for |
| +// scripts that the user has installed. For now, just put the entire |
| +// path through IDN. Maybe this feature can be implemented in ICU itself? |
| +// |
| +// We may want to skip this step in the case of file URLs to allow unicode |
| +// UNC hostnames regardless of encodings. |
| +string16 IDNToUnicodeWithOffsets( |
| + const char* host, |
| + size_t host_len, |
| + const std::string& languages, |
| + std::vector<size_t>* offsets_for_adjustment) { |
| + // Convert the ASCII input to a string16 for ICU. |
| + string16 input16; |
| + input16.reserve(host_len); |
| + input16.insert(input16.end(), host, host + host_len); |
| + |
| + // Do each component of the host separately, since we enforce script matching |
| + // on a per-component basis. |
| + AdjustOffset::Adjustments adjustments; |
| + string16 out16; |
| + for (size_t component_start = 0, component_end; |
| + component_start < input16.length(); |
| + component_start = component_end + 1) { |
| + // Find the end of the component. |
| + component_end = input16.find('.', component_start); |
| + if (component_end == string16::npos) |
| + component_end = input16.length(); // For getting the last component. |
| + size_t component_length = component_end - component_start; |
| + size_t new_component_start = out16.length(); |
| + bool converted_idn = false; |
| + if (component_end > component_start) { |
| + // Add the substring that we just found. |
| + converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, |
| + component_length, languages, &out16); |
| + } |
| + size_t new_component_length = out16.length() - new_component_start; |
| + |
| + if (converted_idn && offsets_for_adjustment) { |
| + adjustments.push_back(AdjustOffset::Adjustment( |
| + component_start, component_length, new_component_length)); |
| + } |
| + |
| + // Need to add the dot we just found (if we found one). |
| + if (component_end < input16.length()) |
| + out16.push_back('.'); |
| + } |
| + |
| + // Make offset adjustment. |
| + if (offsets_for_adjustment) { |
| + if (!adjustments.empty()) { |
| + std::for_each(offsets_for_adjustment->begin(), |
| + offsets_for_adjustment->end(), |
| + AdjustOffset(adjustments)); |
| + } |
| + std::for_each(offsets_for_adjustment->begin(), |
| + offsets_for_adjustment->end(), |
| + LimitOffset<string16>(out16.length())); |
| + } |
| + |
| + return out16; |
| +} |
| + |
| struct SubtractFromOffset { |
| explicit SubtractFromOffset(size_t amount) |
| - : amount(amount) {} |
| + : amount(amount) {} |
| void operator()(size_t& offset) { |
| - if (offset != std::wstring::npos) { |
| + if (offset != string16::npos) { |
| if (offset >= amount) |
| offset -= amount; |
| else |
| - offset = std::wstring::npos; |
| + offset = string16::npos; |
| } |
| } |
| @@ -751,9 +738,9 @@ |
| struct AddToOffset { |
| explicit AddToOffset(size_t amount) |
| - : amount(amount) {} |
| + : amount(amount) {} |
| void operator()(size_t& offset) { |
| - if (offset != std::wstring::npos) |
| + if (offset != string16::npos) |
| offset += amount; |
| } |
| @@ -789,20 +776,20 @@ |
| offsets_iter != offsets_for_adjustment->end(); |
| ++offsets_iter, ++host_offsets_iter) { |
| size_t offset = *offsets_iter; |
| - if (offset == std::wstring::npos || offset < section_begin) { |
| + if (offset == string16::npos || offset < section_begin) { |
| // The offset is before the host section so leave it as is. |
| continue; |
| } |
| if (offset >= section_begin + old_section_len) { |
| // The offset is after the host section so adjust by host length delta. |
| offset += new_section_len - old_section_len; |
| - } else if (*host_offsets_iter != std::wstring::npos) { |
| + } else if (*host_offsets_iter != string16::npos) { |
| // The offset is within the host and valid so adjust by the host |
| // reformatting offsets results. |
| offset = section_begin + *host_offsets_iter; |
| } else { |
| // The offset is invalid. |
| - offset = std::wstring::npos; |
| + offset = string16::npos; |
| } |
| *offsets_iter = offset; |
| } |
| @@ -829,55 +816,47 @@ |
| AdjustComponent(delta, &(parsed->ref)); |
| } |
| -std::wstring FormatUrlInternal(const GURL& url, |
| - const std::wstring& languages, |
| - FormatUrlTypes format_types, |
| - UnescapeRule::Type unescape_rules, |
| - url_parse::Parsed* new_parsed, |
| - size_t* prefix_end, |
| - std::vector<size_t>* offsets_for_adjustment); |
| - |
| -// Helper for FormatUrl()/FormatUrlInternal(). |
| -std::wstring FormatViewSourceUrl(const GURL& url, |
| - const std::wstring& languages, |
| - FormatUrlTypes format_types, |
| - UnescapeRule::Type unescape_rules, |
| - url_parse::Parsed* new_parsed, |
| - size_t* prefix_end, |
| - std::vector<size_t>* offsets_for_adjustment) { |
| +// Helper for FormatUrlWithOffsets(). |
| +string16 FormatViewSourceUrl(const GURL& url, |
| + const std::string& languages, |
| + FormatUrlTypes format_types, |
| + UnescapeRule::Type unescape_rules, |
| + url_parse::Parsed* new_parsed, |
| + size_t* prefix_end, |
| + std::vector<size_t>* offsets_for_adjustment) { |
| DCHECK(new_parsed); |
| DCHECK(offsets_for_adjustment); |
| - const wchar_t* const kWideViewSource = L"view-source:"; |
| - const size_t kViewSourceLengthPlus1 = 12; |
| + const char kViewSource[] = "view-source:"; |
| + const size_t kViewSourceLength = arraysize(kViewSource) - 1; |
| std::vector<size_t> saved_offsets(*offsets_for_adjustment); |
| - GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); |
| + GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength)); |
| // Clamp the offsets to the source area. |
| std::for_each(offsets_for_adjustment->begin(), |
| offsets_for_adjustment->end(), |
| - SubtractFromOffset(kViewSourceLengthPlus1)); |
| - std::wstring result = FormatUrlInternal(real_url, languages, format_types, |
| + SubtractFromOffset(kViewSourceLength)); |
| + string16 result = FormatUrlWithOffsets(real_url, languages, format_types, |
| unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); |
| - result.insert(0, kWideViewSource); |
| + result.insert(0, ASCIIToUTF16(kViewSource)); |
| // Adjust position values. |
| if (new_parsed->scheme.is_nonempty()) { |
| // Assume "view-source:real-scheme" as a scheme. |
| - new_parsed->scheme.len += kViewSourceLengthPlus1; |
| + new_parsed->scheme.len += kViewSourceLength; |
| } else { |
| new_parsed->scheme.begin = 0; |
| - new_parsed->scheme.len = kViewSourceLengthPlus1 - 1; |
| + new_parsed->scheme.len = kViewSourceLength - 1; |
| } |
| - AdjustComponents(kViewSourceLengthPlus1, new_parsed); |
| + AdjustComponents(kViewSourceLength, new_parsed); |
| if (prefix_end) |
| - *prefix_end += kViewSourceLengthPlus1; |
| + *prefix_end += kViewSourceLength; |
| std::for_each(offsets_for_adjustment->begin(), |
| offsets_for_adjustment->end(), |
| - AddToOffset(kViewSourceLengthPlus1)); |
| - // Restore all offsets which were not affected by FormatUrlInternal. |
| + AddToOffset(kViewSourceLength)); |
| + // Restore all offsets which were not affected by FormatUrlWithOffsets(). |
| DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size()); |
| for (size_t i = 0; i < saved_offsets.size(); ++i) { |
| - if (saved_offsets[i] < kViewSourceLengthPlus1) |
| + if (saved_offsets[i] < kViewSourceLength) |
| (*offsets_for_adjustment)[i] = saved_offsets[i]; |
| } |
| return result; |
| @@ -896,7 +875,7 @@ |
| void AppendFormattedComponent(const std::string& spec, |
| const url_parse::Component& in_component, |
| UnescapeRule::Type unescape_rules, |
| - std::wstring* output, |
| + string16* output, |
| url_parse::Component* out_component, |
| std::vector<size_t>* offsets_for_adjustment) { |
| DCHECK(output); |
| @@ -910,14 +889,13 @@ |
| OffsetsIntoSection(offsets_for_adjustment, component_begin); |
| if (unescape_rules == UnescapeRule::NONE) { |
| - output->append(UTF8ToWideAndAdjustOffsets( |
| + output->append(UTF8ToUTF16AndAdjustOffsets( |
| spec.substr(in_component.begin, in_component.len), |
| &offsets_into_component)); |
| } else { |
| - output->append(UTF16ToWideHack( |
| - UnescapeAndDecodeUTF8URLComponentWithOffsets( |
| - spec.substr(in_component.begin, in_component.len), unescape_rules, |
| - &offsets_into_component))); |
| + output->append(UnescapeAndDecodeUTF8URLComponentWithOffsets( |
| + spec.substr(in_component.begin, in_component.len), unescape_rules, |
| + &offsets_into_component)); |
| } |
| size_t new_component_len = output->length() - component_begin; |
| out_component->len = static_cast<int>(new_component_len); |
| @@ -931,195 +909,6 @@ |
| } |
| } |
| -// TODO(viettrungluu): This is really the old-fashioned version, made internal. |
| -// I need to really convert |FormatUrl()|. |
| -std::wstring FormatUrlInternal(const GURL& url, |
| - const std::wstring& languages, |
| - FormatUrlTypes format_types, |
| - UnescapeRule::Type unescape_rules, |
| - url_parse::Parsed* new_parsed, |
| - size_t* prefix_end, |
| - std::vector<size_t>* offsets_for_adjustment) { |
| - url_parse::Parsed parsed_temp; |
| - if (!new_parsed) |
| - new_parsed = &parsed_temp; |
| - else |
| - *new_parsed = url_parse::Parsed(); |
| - |
| - std::vector<size_t> offsets_temp; |
| - if (!offsets_for_adjustment) |
| - offsets_for_adjustment = &offsets_temp; |
| - |
| - std::wstring url_string; |
| - |
| - // Check for empty URLs or 0 available text width. |
| - if (url.is_empty()) { |
| - if (prefix_end) |
| - *prefix_end = 0; |
| - std::for_each(offsets_for_adjustment->begin(), |
| - offsets_for_adjustment->end(), |
| - LimitOffset<std::wstring>(0)); |
| - return url_string; |
| - } |
| - |
| - // Special handling for view-source:. Don't use chrome::kViewSourceScheme |
| - // because this library shouldn't depend on chrome. |
| - const char* const kViewSource = "view-source"; |
| - // Reject "view-source:view-source:..." to avoid deep recursion. |
| - const char* const kViewSourceTwice = "view-source:view-source:"; |
| - if (url.SchemeIs(kViewSource) && |
| - !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { |
| - return FormatViewSourceUrl(url, languages, format_types, |
| - unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); |
| - } |
| - |
| - // We handle both valid and invalid URLs (this will give us the spec |
| - // regardless of validity). |
| - const std::string& spec = url.possibly_invalid_spec(); |
| - const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); |
| - size_t spec_length = spec.length(); |
| - std::for_each(offsets_for_adjustment->begin(), |
| - offsets_for_adjustment->end(), |
| - LimitOffset<std::wstring>(spec_length)); |
| - |
| - // Copy everything before the username (the scheme and the separators.) |
| - // These are ASCII. |
| - url_string.insert(url_string.end(), spec.begin(), |
| - spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, |
| - true)); |
| - |
| - const wchar_t kHTTP[] = L"http://"; |
| - const char kFTP[] = "ftp."; |
| - // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This |
| - // means that if we trim "http://" off a URL whose host starts with "ftp." and |
| - // the user inputs this into any field subject to fixup (which is basically |
| - // all input fields), the meaning would be changed. (In fact, often the |
| - // formatted URL is directly pre-filled into an input field.) For this reason |
| - // we avoid stripping "http://" in this case. |
| - bool omit_http = |
| - (format_types & kFormatUrlOmitHTTP) && (url_string == kHTTP) && |
| - (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0); |
| - |
| - new_parsed->scheme = parsed.scheme; |
| - |
| - if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { |
| - // Remove the username and password fields. We don't want to display those |
| - // to the user since they can be used for attacks, |
| - // e.g. "http://google.com:search@evil.ru/" |
| - new_parsed->username.reset(); |
| - new_parsed->password.reset(); |
| - // Update the offsets based on removed username and/or password. |
| - if (!offsets_for_adjustment->empty() && |
| - (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { |
| - AdjustOffset::Adjustments adjustments; |
| - if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { |
| - // The seeming off-by-one and off-by-two in these first two lines are to |
| - // account for the ':' after the username and '@' after the password. |
| - adjustments.push_back(AdjustOffset::Adjustment( |
| - static_cast<size_t>(parsed.username.begin), |
| - static_cast<size_t>(parsed.username.len + parsed.password.len + |
| - 2), 0)); |
| - } else { |
| - const url_parse::Component* nonempty_component = |
| - parsed.username.is_nonempty() ? &parsed.username : &parsed.password; |
| - // The seeming off-by-one in below is to account for the '@' after the |
| - // username/password. |
| - adjustments.push_back(AdjustOffset::Adjustment( |
| - static_cast<size_t>(nonempty_component->begin), |
| - static_cast<size_t>(nonempty_component->len + 1), 0)); |
| - } |
| - |
| - // Make offset adjustment. |
| - std::for_each(offsets_for_adjustment->begin(), |
| - offsets_for_adjustment->end(), |
| - AdjustOffset(adjustments)); |
| - } |
| - } else { |
| - AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string, |
| - &new_parsed->username, offsets_for_adjustment); |
| - if (parsed.password.is_valid()) |
| - url_string.push_back(':'); |
| - AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string, |
| - &new_parsed->password, offsets_for_adjustment); |
| - if (parsed.username.is_valid() || parsed.password.is_valid()) |
| - url_string.push_back('@'); |
| - } |
| - if (prefix_end) |
| - *prefix_end = static_cast<size_t>(url_string.length()); |
| - |
| - AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed, |
| - offsets_for_adjustment); |
| - |
| - // Port. |
| - if (parsed.port.is_nonempty()) { |
| - url_string.push_back(':'); |
| - new_parsed->port.begin = url_string.length(); |
| - url_string.insert(url_string.end(), |
| - spec.begin() + parsed.port.begin, |
| - spec.begin() + parsed.port.end()); |
| - new_parsed->port.len = url_string.length() - new_parsed->port.begin; |
| - } else { |
| - new_parsed->port.reset(); |
| - } |
| - |
| - // Path and query both get the same general unescape & convert treatment. |
| - if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || |
| - !CanStripTrailingSlash(url)) { |
| - AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string, |
| - &new_parsed->path, offsets_for_adjustment); |
| - } |
| - if (parsed.query.is_valid()) |
| - url_string.push_back('?'); |
| - AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string, |
| - &new_parsed->query, offsets_for_adjustment); |
| - |
| - // Reference is stored in valid, unescaped UTF-8, so we can just convert. |
| - if (parsed.ref.is_valid()) { |
| - url_string.push_back('#'); |
| - size_t ref_begin = url_string.length(); |
| - new_parsed->ref.begin = static_cast<int>(ref_begin); |
| - |
| - // Compose a list of offsets within the section. |
| - std::vector<size_t> offsets_into_ref = |
| - OffsetsIntoSection(offsets_for_adjustment, ref_begin); |
| - |
| - if (parsed.ref.len > 0) { |
| - url_string.append(UTF8ToWideAndAdjustOffsets(spec.substr(parsed.ref.begin, |
| - parsed.ref.len), |
| - &offsets_into_ref)); |
| - } |
| - size_t old_ref_len = static_cast<size_t>(parsed.ref.len); |
| - size_t new_ref_len = url_string.length() - new_parsed->ref.begin; |
| - new_parsed->ref.len = static_cast<int>(new_ref_len); |
| - |
| - // Apply offset adjustments. |
| - ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment, |
| - old_ref_len, new_ref_len, ref_begin); |
| - } |
| - |
| - // If we need to strip out http do it after the fact. This way we don't need |
| - // to worry about how offset_for_adjustment is interpreted. |
| - const size_t kHTTPSize = arraysize(kHTTP) - 1; |
| - if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) { |
| - url_string = url_string.substr(kHTTPSize); |
| - AdjustOffset::Adjustments adjustments; |
| - adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0)); |
| - std::for_each(offsets_for_adjustment->begin(), |
| - offsets_for_adjustment->end(), |
| - AdjustOffset(adjustments)); |
| - if (prefix_end) |
| - *prefix_end -= kHTTPSize; |
| - |
| - // Adjust new_parsed. |
| - DCHECK(new_parsed->scheme.is_valid()); |
| - int delta = -(new_parsed->scheme.len + 3); // +3 for ://. |
| - new_parsed->scheme.reset(); |
| - AdjustComponents(delta, new_parsed); |
| - } |
| - |
| - return url_string; |
| -} |
| - |
| } // namespace |
| const FormatUrlType kFormatUrlOmitNothing = 0; |
| @@ -1163,14 +952,34 @@ |
| return GURL(url_string); |
| } |
| -std::wstring GetSpecificHeader(const std::wstring& headers, |
| - const std::wstring& name) { |
| - return GetSpecificHeaderT(headers, name); |
| -} |
| - |
| std::string GetSpecificHeader(const std::string& headers, |
| const std::string& name) { |
| - return GetSpecificHeaderT(headers, name); |
| + // We want to grab the Value from the "Key: Value" pairs in the headers, |
| + // which should look like this (no leading spaces, \n-separated) (we format |
| + // them this way in url_request_inet.cc): |
| + // HTTP/1.1 200 OK\n |
| + // ETag: "6d0b8-947-24f35ec0"\n |
| + // Content-Length: 2375\n |
| + // Content-Type: text/html; charset=UTF-8\n |
| + // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n |
| + if (headers.empty()) |
| + return std::string(); |
| + |
| + std::string match('\n' + name + ':'); |
| + |
| + std::string::const_iterator begin = |
| + search(headers.begin(), headers.end(), match.begin(), match.end(), |
| + base::CaseInsensitiveCompareASCII<char>()); |
| + |
| + if (begin == headers.end()) |
| + return std::string(); |
| + |
| + begin += match.length(); |
| + |
| + std::string ret; |
| + TrimWhitespace(std::string(begin, find(begin, headers.end(), '\n')), TRIM_ALL, |
| + &ret); |
| + return ret; |
| } |
| bool DecodeCharset(const std::string& input, |
| @@ -1245,88 +1054,50 @@ |
| return std::string(); |
| } |
| -std::wstring GetHeaderParamValue(const std::wstring& field, |
| - const std::wstring& param_name, |
| - QuoteRule::Type quote_rule) { |
| - return GetHeaderParamValueT(field, param_name, quote_rule); |
| -} |
| - |
| -std::string GetHeaderParamValue(const std::string& field, |
| +// TODO(mpcomplete): This is a quick and dirty implementation for now. I'm |
| +// sure this doesn't properly handle all (most?) cases. |
| +std::string GetHeaderParamValue(const std::string& header, |
| const std::string& param_name, |
| QuoteRule::Type quote_rule) { |
| - return GetHeaderParamValueT(field, param_name, quote_rule); |
| -} |
| + // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". |
| + std::string::const_iterator param_begin = |
| + search(header.begin(), header.end(), param_name.begin(), param_name.end(), |
| + base::CaseInsensitiveCompareASCII<char>()); |
| -// TODO(brettw) bug 734373: check the scripts for each host component and |
| -// don't un-IDN-ize if there is more than one. Alternatively, only IDN for |
| -// scripts that the user has installed. For now, just put the entire |
| -// path through IDN. Maybe this feature can be implemented in ICU itself? |
| -// |
| -// We may want to skip this step in the case of file URLs to allow unicode |
| -// UNC hostnames regardless of encodings. |
| -std::wstring IDNToUnicodeWithOffsets( |
| - const char* host, |
| - size_t host_len, |
| - const std::wstring& languages, |
| - std::vector<size_t>* offsets_for_adjustment) { |
| - // Convert the ASCII input to a wide string for ICU. |
| - string16 input16; |
| - input16.reserve(host_len); |
| - input16.insert(input16.end(), host, host + host_len); |
| + if (param_begin == header.end()) |
| + return std::string(); |
| + param_begin += param_name.length(); |
| - // Do each component of the host separately, since we enforce script matching |
| - // on a per-component basis. |
| - AdjustOffset::Adjustments adjustments; |
| - string16 out16; |
| - for (size_t component_start = 0, component_end; |
| - component_start < input16.length(); |
| - component_start = component_end + 1) { |
| - // Find the end of the component. |
| - component_end = input16.find('.', component_start); |
| - if (component_end == string16::npos) |
| - component_end = input16.length(); // For getting the last component. |
| - size_t component_length = component_end - component_start; |
| - size_t new_component_start = out16.length(); |
| - bool converted_idn = false; |
| - if (component_end > component_start) { |
| - // Add the substring that we just found. |
| - converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, |
| - component_length, languages, &out16); |
| - } |
| - size_t new_component_length = out16.length() - new_component_start; |
| + std::string whitespace(" \t"); |
| + size_t equals_offset = |
| + header.find_first_not_of(whitespace, param_begin - header.begin()); |
| + if (equals_offset == std::string::npos || header[equals_offset] != '=') |
| + return std::string(); |
| - if (converted_idn && offsets_for_adjustment) { |
| - adjustments.push_back(AdjustOffset::Adjustment( |
| - component_start, component_length, new_component_length)); |
| - } |
| + param_begin = header.begin() + equals_offset + 1; |
| + if (param_begin == header.end()) |
| + return std::string(); |
| - // Need to add the dot we just found (if we found one). |
| - if (component_end < input16.length()) |
| - out16.push_back('.'); |
| + std::string::const_iterator param_end; |
| + if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) { |
| + ++param_begin; // skip past the quote. |
| + param_end = find(param_begin, header.end(), '"'); |
| + // If the closing quote is missing, we will treat the rest of the |
| + // string as the parameter. We can't set |param_end| to the |
| + // location of the separator (';'), since the separator is |
| + // technically quoted. See: http://crbug.com/58840 |
| + } else { |
| + param_end = find(param_begin + 1, header.end(), ';'); |
| } |
| - // Make offset adjustment. |
| - if (offsets_for_adjustment && !adjustments.empty()) { |
| - std::for_each(offsets_for_adjustment->begin(), |
| - offsets_for_adjustment->end(), |
| - AdjustOffset(adjustments)); |
| - } |
| - |
| - return UTF16ToWideAndAdjustOffsets(out16, offsets_for_adjustment); |
| + return std::string(param_begin, param_end); |
| } |
| -std::wstring IDNToUnicode(const char* host, |
| - size_t host_len, |
| - const std::wstring& languages, |
| - size_t* offset_for_adjustment) { |
| +string16 IDNToUnicode(const char* host, |
| + size_t host_len, |
| + const std::string& languages) { |
| std::vector<size_t> offsets; |
| - if (offset_for_adjustment) |
| - offsets.push_back(*offset_for_adjustment); |
| - std::wstring result = |
| - IDNToUnicodeWithOffsets(host, host_len, languages, &offsets); |
| - if (offset_for_adjustment) |
| - *offset_for_adjustment = offsets[0]; |
| - return result; |
| + return IDNToUnicodeWithOffsets(host, host_len, languages, &offsets); |
| } |
| std::string CanonicalizeHost(const std::string& host, |
| @@ -1352,13 +1123,6 @@ |
| return canon_host; |
| } |
| -std::string CanonicalizeHost(const std::wstring& host, |
| - url_canon::CanonHostInfo* host_info) { |
| - std::string converted_host; |
| - WideToUTF8(host.c_str(), host.length(), &converted_host); |
| - return CanonicalizeHost(converted_host, host_info); |
| -} |
| - |
| std::string GetDirectoryListingHeader(const string16& title) { |
| static const base::StringPiece header( |
| NetModule::GetResource(IDR_DIR_HEADER_HTML)); |
| @@ -1741,8 +1505,8 @@ |
| void AppendFormattedHostWithOffsets( |
| const GURL& url, |
| - const std::wstring& languages, |
| - std::wstring* output, |
| + const std::string& languages, |
| + string16* output, |
| url_parse::Parsed* new_parsed, |
| std::vector<size_t>* offsets_for_adjustment) { |
| DCHECK(output); |
| @@ -1765,7 +1529,7 @@ |
| ((spec.length() == 0 && host.begin == 0) || |
| host.begin < static_cast<int>(spec.length()))); |
| output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len, |
| - languages, &offsets_into_host)); |
| + languages, &offsets_into_host)); |
| size_t new_host_len = output->length() - host_begin; |
| if (new_parsed) |
| @@ -1780,8 +1544,8 @@ |
| } |
| void AppendFormattedHost(const GURL& url, |
| - const std::wstring& languages, |
| - std::wstring* output, |
| + const std::string& languages, |
| + string16* output, |
| url_parse::Parsed* new_parsed, |
| size_t* offset_for_adjustment) { |
| std::vector<size_t> offsets; |
| @@ -1792,7 +1556,6 @@ |
| *offset_for_adjustment = offsets[0]; |
| } |
| -// TODO(viettrungluu): convert the wstring |FormatUrlInternal()|. |
| string16 FormatUrlWithOffsets(const GURL& url, |
| const std::string& languages, |
| FormatUrlTypes format_types, |
| @@ -1800,10 +1563,183 @@ |
| url_parse::Parsed* new_parsed, |
| size_t* prefix_end, |
| std::vector<size_t>* offsets_for_adjustment) { |
| - return WideToUTF16Hack( |
| - FormatUrlInternal(url, ASCIIToWide(languages), format_types, |
| - unescape_rules, new_parsed, prefix_end, |
| - offsets_for_adjustment)); |
| + url_parse::Parsed parsed_temp; |
|
brettw
2011/04/25 16:19:44
I'm assuming you just moved this code and changed
Peter Kasting
2011/04/25 17:44:52
The only non-trivial change was to change kHTTP fr
|
| + if (!new_parsed) |
| + new_parsed = &parsed_temp; |
| + else |
| + *new_parsed = url_parse::Parsed(); |
| + |
| + std::vector<size_t> offsets_temp; |
| + if (!offsets_for_adjustment) |
| + offsets_for_adjustment = &offsets_temp; |
| + |
| + string16 url_string; |
| + |
| + // Check for empty URLs or 0 available text width. |
| + if (url.is_empty()) { |
| + if (prefix_end) |
| + *prefix_end = 0; |
| + std::for_each(offsets_for_adjustment->begin(), |
| + offsets_for_adjustment->end(), |
| + LimitOffset<string16>(0)); |
| + return url_string; |
| + } |
| + |
| + // Special handling for view-source:. Don't use chrome::kViewSourceScheme |
| + // because this library shouldn't depend on chrome. |
| + const char* const kViewSource = "view-source"; |
| + // Reject "view-source:view-source:..." to avoid deep recursion. |
| + const char* const kViewSourceTwice = "view-source:view-source:"; |
| + if (url.SchemeIs(kViewSource) && |
| + !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { |
| + return FormatViewSourceUrl(url, languages, format_types, |
| + unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); |
| + } |
| + |
| + // We handle both valid and invalid URLs (this will give us the spec |
| + // regardless of validity). |
| + const std::string& spec = url.possibly_invalid_spec(); |
| + const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); |
| + size_t spec_length = spec.length(); |
| + std::for_each(offsets_for_adjustment->begin(), |
| + offsets_for_adjustment->end(), |
| + LimitOffset<string16>(spec_length)); |
| + |
| + // Copy everything before the username (the scheme and the separators.) |
| + // These are ASCII. |
| + url_string.insert(url_string.end(), spec.begin(), |
| + spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, |
| + true)); |
| + |
| + string16 kHTTP = ASCIIToUTF16("http://"); |
|
Avi (use Gerrit)
2011/04/25 17:52:07
eww. const char like kFTP below.
|
| + const char kFTP[] = "ftp."; |
| + // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This |
| + // means that if we trim "http://" off a URL whose host starts with "ftp." and |
| + // the user inputs this into any field subject to fixup (which is basically |
| + // all input fields), the meaning would be changed. (In fact, often the |
| + // formatted URL is directly pre-filled into an input field.) For this reason |
| + // we avoid stripping "http://" in this case. |
| + bool omit_http = (format_types & kFormatUrlOmitHTTP) && |
| + (url_string == kHTTP) && |
| + (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0); |
|
Avi (use Gerrit)
2011/04/25 17:52:07
Can you use string_util's LowerCaseEqualsASCII?
brettw
2011/04/25 17:56:28
The host name will be canonicalized so this isn't
|
| + |
| + new_parsed->scheme = parsed.scheme; |
| + |
| + if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { |
| + // Remove the username and password fields. We don't want to display those |
| + // to the user since they can be used for attacks, |
| + // e.g. "http://google.com:search@evil.ru/" |
| + new_parsed->username.reset(); |
| + new_parsed->password.reset(); |
| + // Update the offsets based on removed username and/or password. |
| + if (!offsets_for_adjustment->empty() && |
| + (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { |
| + AdjustOffset::Adjustments adjustments; |
| + if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { |
| + // The seeming off-by-one and off-by-two in these first two lines are to |
| + // account for the ':' after the username and '@' after the password. |
| + adjustments.push_back(AdjustOffset::Adjustment( |
| + static_cast<size_t>(parsed.username.begin), |
| + static_cast<size_t>(parsed.username.len + parsed.password.len + |
| + 2), 0)); |
| + } else { |
| + const url_parse::Component* nonempty_component = |
| + parsed.username.is_nonempty() ? &parsed.username : &parsed.password; |
| + // The seeming off-by-one in below is to account for the '@' after the |
| + // username/password. |
| + adjustments.push_back(AdjustOffset::Adjustment( |
| + static_cast<size_t>(nonempty_component->begin), |
| + static_cast<size_t>(nonempty_component->len + 1), 0)); |
| + } |
| + |
| + // Make offset adjustment. |
| + std::for_each(offsets_for_adjustment->begin(), |
| + offsets_for_adjustment->end(), |
| + AdjustOffset(adjustments)); |
| + } |
| + } else { |
| + AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string, |
| + &new_parsed->username, offsets_for_adjustment); |
| + if (parsed.password.is_valid()) |
| + url_string.push_back(':'); |
| + AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string, |
| + &new_parsed->password, offsets_for_adjustment); |
| + if (parsed.username.is_valid() || parsed.password.is_valid()) |
| + url_string.push_back('@'); |
| + } |
| + if (prefix_end) |
| + *prefix_end = static_cast<size_t>(url_string.length()); |
| + |
| + AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed, |
| + offsets_for_adjustment); |
| + |
| + // Port. |
| + if (parsed.port.is_nonempty()) { |
| + url_string.push_back(':'); |
| + new_parsed->port.begin = url_string.length(); |
| + url_string.insert(url_string.end(), |
| + spec.begin() + parsed.port.begin, |
| + spec.begin() + parsed.port.end()); |
| + new_parsed->port.len = url_string.length() - new_parsed->port.begin; |
| + } else { |
| + new_parsed->port.reset(); |
| + } |
| + |
| + // Path and query both get the same general unescape & convert treatment. |
| + if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || |
| + !CanStripTrailingSlash(url)) { |
| + AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string, |
| + &new_parsed->path, offsets_for_adjustment); |
| + } |
| + if (parsed.query.is_valid()) |
| + url_string.push_back('?'); |
| + AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string, |
| + &new_parsed->query, offsets_for_adjustment); |
| + |
| + // Reference is stored in valid, unescaped UTF-8, so we can just convert. |
| + if (parsed.ref.is_valid()) { |
| + url_string.push_back('#'); |
| + size_t ref_begin = url_string.length(); |
| + new_parsed->ref.begin = static_cast<int>(ref_begin); |
| + |
| + // Compose a list of offsets within the section. |
| + std::vector<size_t> offsets_into_ref = |
| + OffsetsIntoSection(offsets_for_adjustment, ref_begin); |
| + |
| + if (parsed.ref.len > 0) { |
| + url_string.append(UTF8ToUTF16AndAdjustOffsets( |
| + spec.substr(parsed.ref.begin, parsed.ref.len), &offsets_into_ref)); |
| + } |
| + size_t old_ref_len = static_cast<size_t>(parsed.ref.len); |
| + size_t new_ref_len = url_string.length() - new_parsed->ref.begin; |
| + new_parsed->ref.len = static_cast<int>(new_ref_len); |
| + |
| + // Apply offset adjustments. |
| + ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment, |
| + old_ref_len, new_ref_len, ref_begin); |
| + } |
| + |
| + // If we need to strip out http do it after the fact. This way we don't need |
| + // to worry about how offset_for_adjustment is interpreted. |
| + const size_t kHTTPSize = kHTTP.length(); |
| + if (omit_http && !url_string.compare(0, kHTTP.length(), kHTTP)) { |
| + url_string = url_string.substr(kHTTPSize); |
| + AdjustOffset::Adjustments adjustments; |
| + adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0)); |
| + std::for_each(offsets_for_adjustment->begin(), |
| + offsets_for_adjustment->end(), |
| + AdjustOffset(adjustments)); |
| + if (prefix_end) |
| + *prefix_end -= kHTTPSize; |
| + |
| + // Adjust new_parsed. |
| + DCHECK(new_parsed->scheme.is_valid()); |
| + int delta = -(new_parsed->scheme.len + 3); // +3 for ://. |
| + new_parsed->scheme.reset(); |
| + AdjustComponents(delta, new_parsed); |
| + } |
| + |
| + return url_string; |
| } |
| string16 FormatUrl(const GURL& url, |
| @@ -1816,9 +1752,8 @@ |
| std::vector<size_t> offsets; |
| if (offset_for_adjustment) |
| offsets.push_back(*offset_for_adjustment); |
| - string16 result = WideToUTF16Hack( |
| - FormatUrlInternal(url, ASCIIToWide(languages), format_types, |
| - unescape_rules, new_parsed, prefix_end, &offsets)); |
| + string16 result = FormatUrlWithOffsets(url, languages, format_types, |
| + unescape_rules, new_parsed, prefix_end, &offsets); |
| if (offset_for_adjustment) |
| *offset_for_adjustment = offsets[0]; |
| return result; |
| @@ -2279,7 +2214,7 @@ |
| size_t ClampComponentOffset::operator()(size_t offset) { |
| return (offset >= component_start) ? |
| - offset : std::wstring::npos; |
| + offset : string16::npos; |
| } |
| } // namespace net |