Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(109)

Side by Side Diff: net/base/net_util.cc

Issue 6898026: Eliminate wstring from base/utf_offset_string_conversions.h, net/base/escape.h, and net/base/net_... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/net_util.h" 5 #include "net/base/net_util.h"
6 6
7 #include <unicode/regex.h> 7 #include <unicode/regex.h>
8 #include <unicode/ucnv.h> 8 #include <unicode/ucnv.h>
9 #include <unicode/uidna.h> 9 #include <unicode/uidna.h>
10 #include <unicode/ulocdata.h> 10 #include <unicode/ulocdata.h>
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after
148 0xFFFF, // Used to block all invalid port numbers (see 148 0xFFFF, // Used to block all invalid port numbers (see
149 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port()) 149 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port())
150 }; 150 };
151 151
152 // FTP overrides the following restricted ports. 152 // FTP overrides the following restricted ports.
153 static const int kAllowedFtpPorts[] = { 153 static const int kAllowedFtpPorts[] = {
154 21, // ftp data 154 21, // ftp data
155 22, // ssh 155 22, // ssh
156 }; 156 };
157 157
158 template<typename STR>
159 STR GetSpecificHeaderT(const STR& headers, const STR& name) {
160 // We want to grab the Value from the "Key: Value" pairs in the headers,
161 // which should look like this (no leading spaces, \n-separated) (we format
162 // them this way in url_request_inet.cc):
163 // HTTP/1.1 200 OK\n
164 // ETag: "6d0b8-947-24f35ec0"\n
165 // Content-Length: 2375\n
166 // Content-Type: text/html; charset=UTF-8\n
167 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n
168 if (headers.empty())
169 return STR();
170
171 STR match;
172 match.push_back('\n');
173 match.append(name);
174 match.push_back(':');
175
176 typename STR::const_iterator begin =
177 search(headers.begin(), headers.end(), match.begin(), match.end(),
178 base::CaseInsensitiveCompareASCII<typename STR::value_type>());
179
180 if (begin == headers.end())
181 return STR();
182
183 begin += match.length();
184
185 typename STR::const_iterator end = find(begin, headers.end(), '\n');
186
187 STR ret;
188 TrimWhitespace(STR(begin, end), TRIM_ALL, &ret);
189 return ret;
190 }
191
192 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence 158 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence
193 // of bytes. If input is invalid, return false. 159 // of bytes. If input is invalid, return false.
194 bool QPDecode(const std::string& input, std::string* output) { 160 bool QPDecode(const std::string& input, std::string* output) {
195 std::string temp; 161 std::string temp;
196 temp.reserve(input.size()); 162 temp.reserve(input.size());
197 std::string::const_iterator it = input.begin(); 163 std::string::const_iterator it = input.begin();
198 while (it != input.end()) { 164 while (it != input.end()) {
199 if (*it == '_') { 165 if (*it == '_') {
200 temp.push_back(' '); 166 temp.push_back(' ');
201 } else if (*it == '=') { 167 } else if (*it == '=') {
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
269 *is_rfc2047 = false; 235 *is_rfc2047 = false;
270 output->clear(); 236 output->clear();
271 if (encoded_word.empty()) 237 if (encoded_word.empty())
272 return true; 238 return true;
273 239
274 if (!IsStringASCII(encoded_word)) { 240 if (!IsStringASCII(encoded_word)) {
275 // Try UTF-8, referrer_charset and the native OS default charset in turn. 241 // Try UTF-8, referrer_charset and the native OS default charset in turn.
276 if (IsStringUTF8(encoded_word)) { 242 if (IsStringUTF8(encoded_word)) {
277 *output = encoded_word; 243 *output = encoded_word;
278 } else { 244 } else {
279 std::wstring wide_output; 245 string16 utf16_output;
280 if (!referrer_charset.empty() && 246 if (!referrer_charset.empty() &&
281 base::CodepageToWide(encoded_word, referrer_charset.c_str(), 247 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(),
282 base::OnStringConversionError::FAIL, 248 base::OnStringConversionError::FAIL,
283 &wide_output)) { 249 &utf16_output)) {
284 *output = WideToUTF8(wide_output); 250 *output = UTF16ToUTF8(utf16_output);
285 } else { 251 } else {
286 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); 252 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word));
287 } 253 }
288 } 254 }
289 255
290 return true; 256 return true;
291 } 257 }
292 258
293 // RFC 2047 : one of encoding methods supported by Firefox and relatively 259 // RFC 2047 : one of encoding methods supported by Firefox and relatively
294 // widely used by web servers. 260 // widely used by web servers.
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
407 std::string decoded; 373 std::string decoded;
408 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, 374 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,
409 &decoded)) 375 &decoded))
410 return false; 376 return false;
411 tmp.append(decoded); 377 tmp.append(decoded);
412 } 378 }
413 output->swap(tmp); 379 output->swap(tmp);
414 return true; 380 return true;
415 } 381 }
416 382
417 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm
418 // sure this doesn't properly handle all (most?) cases.
419 template<typename STR>
420 STR GetHeaderParamValueT(const STR& header, const STR& param_name,
421 QuoteRule::Type quote_rule) {
422 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value".
423 typename STR::const_iterator param_begin =
424 search(header.begin(), header.end(), param_name.begin(), param_name.end(),
425 base::CaseInsensitiveCompareASCII<typename STR::value_type>());
426
427 if (param_begin == header.end())
428 return STR();
429 param_begin += param_name.length();
430
431 STR whitespace;
432 whitespace.push_back(' ');
433 whitespace.push_back('\t');
434 const typename STR::size_type equals_offset =
435 header.find_first_not_of(whitespace, param_begin - header.begin());
436 if (equals_offset == STR::npos || header.at(equals_offset) != '=')
437 return STR();
438
439 param_begin = header.begin() + equals_offset + 1;
440 if (param_begin == header.end())
441 return STR();
442
443 typename STR::const_iterator param_end;
444 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) {
445 ++param_begin; // skip past the quote.
446 param_end = find(param_begin, header.end(), '"');
447 // If the closing quote is missing, we will treat the rest of the
448 // string as the parameter. We can't set |param_end| to the
449 // location of the separator (';'), since the separator is
450 // technically quoted. See: http://crbug.com/58840
451 } else {
452 param_end = find(param_begin+1, header.end(), ';');
453 }
454
455 return STR(param_begin, param_end);
456 }
457
458 // Does some simple normalization of scripts so we can allow certain scripts 383 // Does some simple normalization of scripts so we can allow certain scripts
459 // to exist together. 384 // to exist together.
460 // TODO(brettw) bug 880223: we should allow some other languages to be 385 // TODO(brettw) bug 880223: we should allow some other languages to be
461 // oombined such as Chinese and Latin. We will probably need a more 386 // oombined such as Chinese and Latin. We will probably need a more
462 // complicated system of language pairs to have more fine-grained control. 387 // complicated system of language pairs to have more fine-grained control.
463 UScriptCode NormalizeScript(UScriptCode code) { 388 UScriptCode NormalizeScript(UScriptCode code) {
464 switch (code) { 389 switch (code) {
465 case USCRIPT_KATAKANA: 390 case USCRIPT_KATAKANA:
466 case USCRIPT_HIRAGANA: 391 case USCRIPT_HIRAGANA:
467 case USCRIPT_KATAKANA_OR_HIRAGANA: 392 case USCRIPT_KATAKANA_OR_HIRAGANA:
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after
586 ulocdata_close(uld); 511 ulocdata_close(uld);
587 } 512 }
588 } 513 }
589 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); 514 return !lang_set->isEmpty() && lang_set->containsAll(component_characters);
590 } 515 }
591 516
592 // Returns true if the given Unicode host component is safe to display to the 517 // Returns true if the given Unicode host component is safe to display to the
593 // user. 518 // user.
594 bool IsIDNComponentSafe(const char16* str, 519 bool IsIDNComponentSafe(const char16* str,
595 int str_len, 520 int str_len,
596 const std::wstring& languages) { 521 const std::string& languages) {
597 // Most common cases (non-IDN) do not reach here so that we don't 522 // Most common cases (non-IDN) do not reach here so that we don't
598 // need a fast return path. 523 // need a fast return path.
599 // TODO(jungshik) : Check if there's any character inappropriate 524 // TODO(jungshik) : Check if there's any character inappropriate
600 // (although allowed) for domain names. 525 // (although allowed) for domain names.
601 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and 526 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and
602 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt 527 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt
603 // For now, we borrow the list from Mozilla and tweaked it slightly. 528 // For now, we borrow the list from Mozilla and tweaked it slightly.
604 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because 529 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because
605 // they're gonna be canonicalized to U+0020 and full stop before 530 // they're gonna be canonicalized to U+0020 and full stop before
606 // reaching here.) 531 // reaching here.)
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
670 // (sync'd with characters allowed in url_canon_host with square 595 // (sync'd with characters allowed in url_canon_host with square
671 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. 596 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc.
672 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), 597 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"),
673 status); 598 status);
674 DCHECK(U_SUCCESS(status)); 599 DCHECK(U_SUCCESS(status));
675 // Subtract common characters because they're always allowed so that 600 // Subtract common characters because they're always allowed so that
676 // we just have to check if a language-specific set contains 601 // we just have to check if a language-specific set contains
677 // the remainder. 602 // the remainder.
678 component_characters.removeAll(common_characters); 603 component_characters.removeAll(common_characters);
679 604
680 std::string languages_list(WideToASCII(languages)); 605 StringTokenizer t(languages, ",");
681 StringTokenizer t(languages_list, ",");
682 while (t.GetNext()) { 606 while (t.GetNext()) {
683 if (IsComponentCoveredByLang(component_characters, t.token())) 607 if (IsComponentCoveredByLang(component_characters, t.token()))
684 return true; 608 return true;
685 } 609 }
686 return false; 610 return false;
687 } 611 }
688 612
689 // Converts one component of a host (between dots) to IDN if safe. The result 613 // Converts one component of a host (between dots) to IDN if safe. The result
690 // will be APPENDED to the given output string and will be the same as the input 614 // will be APPENDED to the given output string and will be the same as the input
691 // if it is not IDN or the IDN is unsafe to display. Returns whether any 615 // if it is not IDN or the IDN is unsafe to display. Returns whether any
692 // conversion was performed. 616 // conversion was performed.
693 bool IDNToUnicodeOneComponent(const char16* comp, 617 bool IDNToUnicodeOneComponent(const char16* comp,
694 size_t comp_len, 618 size_t comp_len,
695 const std::wstring& languages, 619 const std::string& languages,
696 string16* out) { 620 string16* out) {
697 DCHECK(out); 621 DCHECK(out);
698 if (comp_len == 0) 622 if (comp_len == 0)
699 return false; 623 return false;
700 624
701 // Only transform if the input can be an IDN component. 625 // Only transform if the input can be an IDN component.
702 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; 626 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'};
703 if ((comp_len > arraysize(kIdnPrefix)) && 627 if ((comp_len > arraysize(kIdnPrefix)) &&
704 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) { 628 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) {
705 // Repeatedly expand the output string until it's big enough. It looks like 629 // Repeatedly expand the output string until it's big enough. It looks like
(...skipping 21 matching lines...) Expand all
727 // Failed, revert back to original string. 651 // Failed, revert back to original string.
728 out->resize(original_length); 652 out->resize(original_length);
729 } 653 }
730 654
731 // We get here with no IDN or on error, in which case we just append the 655 // We get here with no IDN or on error, in which case we just append the
732 // literal input. 656 // literal input.
733 out->append(comp, comp_len); 657 out->append(comp, comp_len);
734 return false; 658 return false;
735 } 659 }
736 660
661 // TODO(brettw) bug 734373: check the scripts for each host component and
662 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for
663 // scripts that the user has installed. For now, just put the entire
664 // path through IDN. Maybe this feature can be implemented in ICU itself?
665 //
666 // We may want to skip this step in the case of file URLs to allow unicode
667 // UNC hostnames regardless of encodings.
668 string16 IDNToUnicodeWithOffsets(
669 const char* host,
670 size_t host_len,
671 const std::string& languages,
672 std::vector<size_t>* offsets_for_adjustment) {
673 // Convert the ASCII input to a string16 for ICU.
674 string16 input16;
675 input16.reserve(host_len);
676 input16.insert(input16.end(), host, host + host_len);
677
678 // Do each component of the host separately, since we enforce script matching
679 // on a per-component basis.
680 AdjustOffset::Adjustments adjustments;
681 string16 out16;
682 for (size_t component_start = 0, component_end;
683 component_start < input16.length();
684 component_start = component_end + 1) {
685 // Find the end of the component.
686 component_end = input16.find('.', component_start);
687 if (component_end == string16::npos)
688 component_end = input16.length(); // For getting the last component.
689 size_t component_length = component_end - component_start;
690 size_t new_component_start = out16.length();
691 bool converted_idn = false;
692 if (component_end > component_start) {
693 // Add the substring that we just found.
694 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start,
695 component_length, languages, &out16);
696 }
697 size_t new_component_length = out16.length() - new_component_start;
698
699 if (converted_idn && offsets_for_adjustment) {
700 adjustments.push_back(AdjustOffset::Adjustment(
701 component_start, component_length, new_component_length));
702 }
703
704 // Need to add the dot we just found (if we found one).
705 if (component_end < input16.length())
706 out16.push_back('.');
707 }
708
709 // Make offset adjustment.
710 if (offsets_for_adjustment) {
711 if (!adjustments.empty()) {
712 std::for_each(offsets_for_adjustment->begin(),
713 offsets_for_adjustment->end(),
714 AdjustOffset(adjustments));
715 }
716 std::for_each(offsets_for_adjustment->begin(),
717 offsets_for_adjustment->end(),
718 LimitOffset<string16>(out16.length()));
719 }
720
721 return out16;
722 }
723
737 struct SubtractFromOffset { 724 struct SubtractFromOffset {
738 explicit SubtractFromOffset(size_t amount) 725 explicit SubtractFromOffset(size_t amount)
739 : amount(amount) {} 726 : amount(amount) {}
740 void operator()(size_t& offset) { 727 void operator()(size_t& offset) {
741 if (offset != std::wstring::npos) { 728 if (offset != string16::npos) {
742 if (offset >= amount) 729 if (offset >= amount)
743 offset -= amount; 730 offset -= amount;
744 else 731 else
745 offset = std::wstring::npos; 732 offset = string16::npos;
746 } 733 }
747 } 734 }
748 735
749 size_t amount; 736 size_t amount;
750 }; 737 };
751 738
752 struct AddToOffset { 739 struct AddToOffset {
753 explicit AddToOffset(size_t amount) 740 explicit AddToOffset(size_t amount)
754 : amount(amount) {} 741 : amount(amount) {}
755 void operator()(size_t& offset) { 742 void operator()(size_t& offset) {
756 if (offset != std::wstring::npos) 743 if (offset != string16::npos)
757 offset += amount; 744 offset += amount;
758 } 745 }
759 746
760 size_t amount; 747 size_t amount;
761 }; 748 };
762 749
763 std::vector<size_t> OffsetsIntoSection( 750 std::vector<size_t> OffsetsIntoSection(
764 std::vector<size_t>* offsets_for_adjustment, 751 std::vector<size_t>* offsets_for_adjustment,
765 size_t section_begin) { 752 size_t section_begin) {
766 std::vector<size_t> offsets_into_section; 753 std::vector<size_t> offsets_into_section;
(...skipping 15 matching lines...) Expand all
782 size_t section_begin) { 769 size_t section_begin) {
783 if (offsets_for_adjustment) { 770 if (offsets_for_adjustment) {
784 DCHECK_EQ(offsets_for_adjustment->size(), offsets_into_section.size()); 771 DCHECK_EQ(offsets_for_adjustment->size(), offsets_into_section.size());
785 std::vector<size_t>::const_iterator host_offsets_iter = 772 std::vector<size_t>::const_iterator host_offsets_iter =
786 offsets_into_section.begin(); 773 offsets_into_section.begin();
787 for (std::vector<size_t>::iterator offsets_iter = 774 for (std::vector<size_t>::iterator offsets_iter =
788 offsets_for_adjustment->begin(); 775 offsets_for_adjustment->begin();
789 offsets_iter != offsets_for_adjustment->end(); 776 offsets_iter != offsets_for_adjustment->end();
790 ++offsets_iter, ++host_offsets_iter) { 777 ++offsets_iter, ++host_offsets_iter) {
791 size_t offset = *offsets_iter; 778 size_t offset = *offsets_iter;
792 if (offset == std::wstring::npos || offset < section_begin) { 779 if (offset == string16::npos || offset < section_begin) {
793 // The offset is before the host section so leave it as is. 780 // The offset is before the host section so leave it as is.
794 continue; 781 continue;
795 } 782 }
796 if (offset >= section_begin + old_section_len) { 783 if (offset >= section_begin + old_section_len) {
797 // The offset is after the host section so adjust by host length delta. 784 // The offset is after the host section so adjust by host length delta.
798 offset += new_section_len - old_section_len; 785 offset += new_section_len - old_section_len;
799 } else if (*host_offsets_iter != std::wstring::npos) { 786 } else if (*host_offsets_iter != string16::npos) {
800 // The offset is within the host and valid so adjust by the host 787 // The offset is within the host and valid so adjust by the host
801 // reformatting offsets results. 788 // reformatting offsets results.
802 offset = section_begin + *host_offsets_iter; 789 offset = section_begin + *host_offsets_iter;
803 } else { 790 } else {
804 // The offset is invalid. 791 // The offset is invalid.
805 offset = std::wstring::npos; 792 offset = string16::npos;
806 } 793 }
807 *offsets_iter = offset; 794 *offsets_iter = offset;
808 } 795 }
809 } 796 }
810 } 797 }
811 798
812 // If |component| is valid, its begin is incremented by |delta|. 799 // If |component| is valid, its begin is incremented by |delta|.
813 void AdjustComponent(int delta, url_parse::Component* component) { 800 void AdjustComponent(int delta, url_parse::Component* component) {
814 if (!component->is_valid()) 801 if (!component->is_valid())
815 return; 802 return;
816 803
817 DCHECK(delta >= 0 || component->begin >= -delta); 804 DCHECK(delta >= 0 || component->begin >= -delta);
818 component->begin += delta; 805 component->begin += delta;
819 } 806 }
820 807
821 // Adjusts all the components of |parsed| by |delta|, except for the scheme. 808 // Adjusts all the components of |parsed| by |delta|, except for the scheme.
822 void AdjustComponents(int delta, url_parse::Parsed* parsed) { 809 void AdjustComponents(int delta, url_parse::Parsed* parsed) {
823 AdjustComponent(delta, &(parsed->username)); 810 AdjustComponent(delta, &(parsed->username));
824 AdjustComponent(delta, &(parsed->password)); 811 AdjustComponent(delta, &(parsed->password));
825 AdjustComponent(delta, &(parsed->host)); 812 AdjustComponent(delta, &(parsed->host));
826 AdjustComponent(delta, &(parsed->port)); 813 AdjustComponent(delta, &(parsed->port));
827 AdjustComponent(delta, &(parsed->path)); 814 AdjustComponent(delta, &(parsed->path));
828 AdjustComponent(delta, &(parsed->query)); 815 AdjustComponent(delta, &(parsed->query));
829 AdjustComponent(delta, &(parsed->ref)); 816 AdjustComponent(delta, &(parsed->ref));
830 } 817 }
831 818
832 std::wstring FormatUrlInternal(const GURL& url, 819 // Helper for FormatUrlWithOffsets().
833 const std::wstring& languages, 820 string16 FormatViewSourceUrl(const GURL& url,
834 FormatUrlTypes format_types, 821 const std::string& languages,
835 UnescapeRule::Type unescape_rules, 822 FormatUrlTypes format_types,
836 url_parse::Parsed* new_parsed, 823 UnescapeRule::Type unescape_rules,
837 size_t* prefix_end, 824 url_parse::Parsed* new_parsed,
838 std::vector<size_t>* offsets_for_adjustment); 825 size_t* prefix_end,
839 826 std::vector<size_t>* offsets_for_adjustment) {
840 // Helper for FormatUrl()/FormatUrlInternal().
841 std::wstring FormatViewSourceUrl(const GURL& url,
842 const std::wstring& languages,
843 FormatUrlTypes format_types,
844 UnescapeRule::Type unescape_rules,
845 url_parse::Parsed* new_parsed,
846 size_t* prefix_end,
847 std::vector<size_t>* offsets_for_adjustment) {
848 DCHECK(new_parsed); 827 DCHECK(new_parsed);
849 DCHECK(offsets_for_adjustment); 828 DCHECK(offsets_for_adjustment);
850 const wchar_t* const kWideViewSource = L"view-source:"; 829 const char kViewSource[] = "view-source:";
851 const size_t kViewSourceLengthPlus1 = 12; 830 const size_t kViewSourceLength = arraysize(kViewSource) - 1;
852 std::vector<size_t> saved_offsets(*offsets_for_adjustment); 831 std::vector<size_t> saved_offsets(*offsets_for_adjustment);
853 832
854 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); 833 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength));
855 // Clamp the offsets to the source area. 834 // Clamp the offsets to the source area.
856 std::for_each(offsets_for_adjustment->begin(), 835 std::for_each(offsets_for_adjustment->begin(),
857 offsets_for_adjustment->end(), 836 offsets_for_adjustment->end(),
858 SubtractFromOffset(kViewSourceLengthPlus1)); 837 SubtractFromOffset(kViewSourceLength));
859 std::wstring result = FormatUrlInternal(real_url, languages, format_types, 838 string16 result = FormatUrlWithOffsets(real_url, languages, format_types,
860 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); 839 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);
861 result.insert(0, kWideViewSource); 840 result.insert(0, ASCIIToUTF16(kViewSource));
862 841
863 // Adjust position values. 842 // Adjust position values.
864 if (new_parsed->scheme.is_nonempty()) { 843 if (new_parsed->scheme.is_nonempty()) {
865 // Assume "view-source:real-scheme" as a scheme. 844 // Assume "view-source:real-scheme" as a scheme.
866 new_parsed->scheme.len += kViewSourceLengthPlus1; 845 new_parsed->scheme.len += kViewSourceLength;
867 } else { 846 } else {
868 new_parsed->scheme.begin = 0; 847 new_parsed->scheme.begin = 0;
869 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1; 848 new_parsed->scheme.len = kViewSourceLength - 1;
870 } 849 }
871 AdjustComponents(kViewSourceLengthPlus1, new_parsed); 850 AdjustComponents(kViewSourceLength, new_parsed);
872 if (prefix_end) 851 if (prefix_end)
873 *prefix_end += kViewSourceLengthPlus1; 852 *prefix_end += kViewSourceLength;
874 std::for_each(offsets_for_adjustment->begin(), 853 std::for_each(offsets_for_adjustment->begin(),
875 offsets_for_adjustment->end(), 854 offsets_for_adjustment->end(),
876 AddToOffset(kViewSourceLengthPlus1)); 855 AddToOffset(kViewSourceLength));
877 // Restore all offsets which were not affected by FormatUrlInternal. 856 // Restore all offsets which were not affected by FormatUrlWithOffsets().
878 DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size()); 857 DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size());
879 for (size_t i = 0; i < saved_offsets.size(); ++i) { 858 for (size_t i = 0; i < saved_offsets.size(); ++i) {
880 if (saved_offsets[i] < kViewSourceLengthPlus1) 859 if (saved_offsets[i] < kViewSourceLength)
881 (*offsets_for_adjustment)[i] = saved_offsets[i]; 860 (*offsets_for_adjustment)[i] = saved_offsets[i];
882 } 861 }
883 return result; 862 return result;
884 } 863 }
885 864
886 // Appends the substring |in_component| inside of the URL |spec| to |output|, 865 // Appends the substring |in_component| inside of the URL |spec| to |output|,
887 // and the resulting range will be filled into |out_component|. |unescape_rules| 866 // and the resulting range will be filled into |out_component|. |unescape_rules|
888 // defines how to clean the URL for human readability. |offsets_for_adjustment| 867 // defines how to clean the URL for human readability. |offsets_for_adjustment|
889 // is an array of offsets into |output| each of which will be adjusted based on 868 // is an array of offsets into |output| each of which will be adjusted based on
890 // how it maps to the component being converted; if it is less than 869 // how it maps to the component being converted; if it is less than
891 // output->length(), it will be untouched, and if it is greater than 870 // output->length(), it will be untouched, and if it is greater than
892 // output->length() + in_component.len it will be adjusted by the difference in 871 // output->length() + in_component.len it will be adjusted by the difference in
893 // lengths between the input and output components. Otherwise it points into 872 // lengths between the input and output components. Otherwise it points into
894 // the component being converted, and is adjusted to point to the same logical 873 // the component being converted, and is adjusted to point to the same logical
895 // place in |output|. |offsets_for_adjustment| may not be NULL. 874 // place in |output|. |offsets_for_adjustment| may not be NULL.
896 void AppendFormattedComponent(const std::string& spec, 875 void AppendFormattedComponent(const std::string& spec,
897 const url_parse::Component& in_component, 876 const url_parse::Component& in_component,
898 UnescapeRule::Type unescape_rules, 877 UnescapeRule::Type unescape_rules,
899 std::wstring* output, 878 string16* output,
900 url_parse::Component* out_component, 879 url_parse::Component* out_component,
901 std::vector<size_t>* offsets_for_adjustment) { 880 std::vector<size_t>* offsets_for_adjustment) {
902 DCHECK(output); 881 DCHECK(output);
903 DCHECK(offsets_for_adjustment); 882 DCHECK(offsets_for_adjustment);
904 if (in_component.is_nonempty()) { 883 if (in_component.is_nonempty()) {
905 size_t component_begin = output->length(); 884 size_t component_begin = output->length();
906 out_component->begin = static_cast<int>(component_begin); 885 out_component->begin = static_cast<int>(component_begin);
907 886
908 // Compose a list of offsets within the component area. 887 // Compose a list of offsets within the component area.
909 std::vector<size_t> offsets_into_component = 888 std::vector<size_t> offsets_into_component =
910 OffsetsIntoSection(offsets_for_adjustment, component_begin); 889 OffsetsIntoSection(offsets_for_adjustment, component_begin);
911 890
912 if (unescape_rules == UnescapeRule::NONE) { 891 if (unescape_rules == UnescapeRule::NONE) {
913 output->append(UTF8ToWideAndAdjustOffsets( 892 output->append(UTF8ToUTF16AndAdjustOffsets(
914 spec.substr(in_component.begin, in_component.len), 893 spec.substr(in_component.begin, in_component.len),
915 &offsets_into_component)); 894 &offsets_into_component));
916 } else { 895 } else {
917 output->append(UTF16ToWideHack( 896 output->append(UnescapeAndDecodeUTF8URLComponentWithOffsets(
918 UnescapeAndDecodeUTF8URLComponentWithOffsets( 897 spec.substr(in_component.begin, in_component.len), unescape_rules,
919 spec.substr(in_component.begin, in_component.len), unescape_rules, 898 &offsets_into_component));
920 &offsets_into_component)));
921 } 899 }
922 size_t new_component_len = output->length() - component_begin; 900 size_t new_component_len = output->length() - component_begin;
923 out_component->len = static_cast<int>(new_component_len); 901 out_component->len = static_cast<int>(new_component_len);
924 902
925 // Apply offset adjustments. 903 // Apply offset adjustments.
926 size_t old_component_len = static_cast<size_t>(in_component.len); 904 size_t old_component_len = static_cast<size_t>(in_component.len);
927 ApplySectionAdjustments(offsets_into_component, offsets_for_adjustment, 905 ApplySectionAdjustments(offsets_into_component, offsets_for_adjustment,
928 old_component_len, new_component_len, component_begin); 906 old_component_len, new_component_len, component_begin);
929 } else { 907 } else {
930 out_component->reset(); 908 out_component->reset();
931 } 909 }
932 } 910 }
933 911
934 // TODO(viettrungluu): This is really the old-fashioned version, made internal.
935 // I need to really convert |FormatUrl()|.
936 std::wstring FormatUrlInternal(const GURL& url,
937 const std::wstring& languages,
938 FormatUrlTypes format_types,
939 UnescapeRule::Type unescape_rules,
940 url_parse::Parsed* new_parsed,
941 size_t* prefix_end,
942 std::vector<size_t>* offsets_for_adjustment) {
943 url_parse::Parsed parsed_temp;
944 if (!new_parsed)
945 new_parsed = &parsed_temp;
946 else
947 *new_parsed = url_parse::Parsed();
948
949 std::vector<size_t> offsets_temp;
950 if (!offsets_for_adjustment)
951 offsets_for_adjustment = &offsets_temp;
952
953 std::wstring url_string;
954
955 // Check for empty URLs or 0 available text width.
956 if (url.is_empty()) {
957 if (prefix_end)
958 *prefix_end = 0;
959 std::for_each(offsets_for_adjustment->begin(),
960 offsets_for_adjustment->end(),
961 LimitOffset<std::wstring>(0));
962 return url_string;
963 }
964
965 // Special handling for view-source:. Don't use chrome::kViewSourceScheme
966 // because this library shouldn't depend on chrome.
967 const char* const kViewSource = "view-source";
968 // Reject "view-source:view-source:..." to avoid deep recursion.
969 const char* const kViewSourceTwice = "view-source:view-source:";
970 if (url.SchemeIs(kViewSource) &&
971 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {
972 return FormatViewSourceUrl(url, languages, format_types,
973 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);
974 }
975
976 // We handle both valid and invalid URLs (this will give us the spec
977 // regardless of validity).
978 const std::string& spec = url.possibly_invalid_spec();
979 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
980 size_t spec_length = spec.length();
981 std::for_each(offsets_for_adjustment->begin(),
982 offsets_for_adjustment->end(),
983 LimitOffset<std::wstring>(spec_length));
984
985 // Copy everything before the username (the scheme and the separators.)
986 // These are ASCII.
987 url_string.insert(url_string.end(), spec.begin(),
988 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME,
989 true));
990
991 const wchar_t kHTTP[] = L"http://";
992 const char kFTP[] = "ftp.";
993 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This
994 // means that if we trim "http://" off a URL whose host starts with "ftp." and
995 // the user inputs this into any field subject to fixup (which is basically
996 // all input fields), the meaning would be changed. (In fact, often the
997 // formatted URL is directly pre-filled into an input field.) For this reason
998 // we avoid stripping "http://" in this case.
999 bool omit_http =
1000 (format_types & kFormatUrlOmitHTTP) && (url_string == kHTTP) &&
1001 (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0);
1002
1003 new_parsed->scheme = parsed.scheme;
1004
1005 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) {
1006 // Remove the username and password fields. We don't want to display those
1007 // to the user since they can be used for attacks,
1008 // e.g. "http://google.com:search@evil.ru/"
1009 new_parsed->username.reset();
1010 new_parsed->password.reset();
1011 // Update the offsets based on removed username and/or password.
1012 if (!offsets_for_adjustment->empty() &&
1013 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) {
1014 AdjustOffset::Adjustments adjustments;
1015 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {
1016 // The seeming off-by-one and off-by-two in these first two lines are to
1017 // account for the ':' after the username and '@' after the password.
1018 adjustments.push_back(AdjustOffset::Adjustment(
1019 static_cast<size_t>(parsed.username.begin),
1020 static_cast<size_t>(parsed.username.len + parsed.password.len +
1021 2), 0));
1022 } else {
1023 const url_parse::Component* nonempty_component =
1024 parsed.username.is_nonempty() ? &parsed.username : &parsed.password;
1025 // The seeming off-by-one in below is to account for the '@' after the
1026 // username/password.
1027 adjustments.push_back(AdjustOffset::Adjustment(
1028 static_cast<size_t>(nonempty_component->begin),
1029 static_cast<size_t>(nonempty_component->len + 1), 0));
1030 }
1031
1032 // Make offset adjustment.
1033 std::for_each(offsets_for_adjustment->begin(),
1034 offsets_for_adjustment->end(),
1035 AdjustOffset(adjustments));
1036 }
1037 } else {
1038 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string,
1039 &new_parsed->username, offsets_for_adjustment);
1040 if (parsed.password.is_valid())
1041 url_string.push_back(':');
1042 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string,
1043 &new_parsed->password, offsets_for_adjustment);
1044 if (parsed.username.is_valid() || parsed.password.is_valid())
1045 url_string.push_back('@');
1046 }
1047 if (prefix_end)
1048 *prefix_end = static_cast<size_t>(url_string.length());
1049
1050 AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed,
1051 offsets_for_adjustment);
1052
1053 // Port.
1054 if (parsed.port.is_nonempty()) {
1055 url_string.push_back(':');
1056 new_parsed->port.begin = url_string.length();
1057 url_string.insert(url_string.end(),
1058 spec.begin() + parsed.port.begin,
1059 spec.begin() + parsed.port.end());
1060 new_parsed->port.len = url_string.length() - new_parsed->port.begin;
1061 } else {
1062 new_parsed->port.reset();
1063 }
1064
1065 // Path and query both get the same general unescape & convert treatment.
1066 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) ||
1067 !CanStripTrailingSlash(url)) {
1068 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,
1069 &new_parsed->path, offsets_for_adjustment);
1070 }
1071 if (parsed.query.is_valid())
1072 url_string.push_back('?');
1073 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string,
1074 &new_parsed->query, offsets_for_adjustment);
1075
1076 // Reference is stored in valid, unescaped UTF-8, so we can just convert.
1077 if (parsed.ref.is_valid()) {
1078 url_string.push_back('#');
1079 size_t ref_begin = url_string.length();
1080 new_parsed->ref.begin = static_cast<int>(ref_begin);
1081
1082 // Compose a list of offsets within the section.
1083 std::vector<size_t> offsets_into_ref =
1084 OffsetsIntoSection(offsets_for_adjustment, ref_begin);
1085
1086 if (parsed.ref.len > 0) {
1087 url_string.append(UTF8ToWideAndAdjustOffsets(spec.substr(parsed.ref.begin,
1088 parsed.ref.len),
1089 &offsets_into_ref));
1090 }
1091 size_t old_ref_len = static_cast<size_t>(parsed.ref.len);
1092 size_t new_ref_len = url_string.length() - new_parsed->ref.begin;
1093 new_parsed->ref.len = static_cast<int>(new_ref_len);
1094
1095 // Apply offset adjustments.
1096 ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment,
1097 old_ref_len, new_ref_len, ref_begin);
1098 }
1099
1100 // If we need to strip out http do it after the fact. This way we don't need
1101 // to worry about how offset_for_adjustment is interpreted.
1102 const size_t kHTTPSize = arraysize(kHTTP) - 1;
1103 if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) {
1104 url_string = url_string.substr(kHTTPSize);
1105 AdjustOffset::Adjustments adjustments;
1106 adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0));
1107 std::for_each(offsets_for_adjustment->begin(),
1108 offsets_for_adjustment->end(),
1109 AdjustOffset(adjustments));
1110 if (prefix_end)
1111 *prefix_end -= kHTTPSize;
1112
1113 // Adjust new_parsed.
1114 DCHECK(new_parsed->scheme.is_valid());
1115 int delta = -(new_parsed->scheme.len + 3); // +3 for ://.
1116 new_parsed->scheme.reset();
1117 AdjustComponents(delta, new_parsed);
1118 }
1119
1120 return url_string;
1121 }
1122
1123 } // namespace 912 } // namespace
1124 913
1125 const FormatUrlType kFormatUrlOmitNothing = 0; 914 const FormatUrlType kFormatUrlOmitNothing = 0;
1126 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; 915 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;
1127 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; 916 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;
1128 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; 917 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;
1129 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | 918 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword |
1130 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; 919 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname;
1131 920
1132 // TODO(viettrungluu): We don't want non-POD globals; change this. 921 // TODO(viettrungluu): We don't want non-POD globals; change this.
(...skipping 23 matching lines...) Expand all
1156 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); 945 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23"));
1157 946
1158 #if defined(OS_POSIX) 947 #if defined(OS_POSIX)
1159 ReplaceSubstringsAfterOffset(&url_string, 0, 948 ReplaceSubstringsAfterOffset(&url_string, 0,
1160 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); 949 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C"));
1161 #endif 950 #endif
1162 951
1163 return GURL(url_string); 952 return GURL(url_string);
1164 } 953 }
1165 954
1166 std::wstring GetSpecificHeader(const std::wstring& headers,
1167 const std::wstring& name) {
1168 return GetSpecificHeaderT(headers, name);
1169 }
1170
1171 std::string GetSpecificHeader(const std::string& headers, 955 std::string GetSpecificHeader(const std::string& headers,
1172 const std::string& name) { 956 const std::string& name) {
1173 return GetSpecificHeaderT(headers, name); 957 // We want to grab the Value from the "Key: Value" pairs in the headers,
958 // which should look like this (no leading spaces, \n-separated) (we format
959 // them this way in url_request_inet.cc):
960 // HTTP/1.1 200 OK\n
961 // ETag: "6d0b8-947-24f35ec0"\n
962 // Content-Length: 2375\n
963 // Content-Type: text/html; charset=UTF-8\n
964 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n
965 if (headers.empty())
966 return std::string();
967
968 std::string match('\n' + name + ':');
969
970 std::string::const_iterator begin =
971 search(headers.begin(), headers.end(), match.begin(), match.end(),
972 base::CaseInsensitiveCompareASCII<char>());
973
974 if (begin == headers.end())
975 return std::string();
976
977 begin += match.length();
978
979 std::string ret;
980 TrimWhitespace(std::string(begin, find(begin, headers.end(), '\n')), TRIM_ALL,
981 &ret);
982 return ret;
1174 } 983 }
1175 984
1176 bool DecodeCharset(const std::string& input, 985 bool DecodeCharset(const std::string& input,
1177 std::string* decoded_charset, 986 std::string* decoded_charset,
1178 std::string* value) { 987 std::string* value) {
1179 StringTokenizer t(input, "'"); 988 StringTokenizer t(input, "'");
1180 t.set_options(StringTokenizer::RETURN_DELIMS); 989 t.set_options(StringTokenizer::RETURN_DELIMS);
1181 std::string temp_charset; 990 std::string temp_charset;
1182 std::string temp_value; 991 std::string temp_value;
1183 int numDelimsSeen = 0; 992 int numDelimsSeen = 0;
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
1238 param_value = GetHeaderParamValue(header, "name", 1047 param_value = GetHeaderParamValue(header, "name",
1239 QuoteRule::REMOVE_OUTER_QUOTES); 1048 QuoteRule::REMOVE_OUTER_QUOTES);
1240 } 1049 }
1241 if (param_value.empty()) 1050 if (param_value.empty())
1242 return std::string(); 1051 return std::string();
1243 if (DecodeParamValue(param_value, referrer_charset, &decoded)) 1052 if (DecodeParamValue(param_value, referrer_charset, &decoded))
1244 return decoded; 1053 return decoded;
1245 return std::string(); 1054 return std::string();
1246 } 1055 }
1247 1056
1248 std::wstring GetHeaderParamValue(const std::wstring& field, 1057 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm
1249 const std::wstring& param_name, 1058 // sure this doesn't properly handle all (most?) cases.
1250 QuoteRule::Type quote_rule) { 1059 std::string GetHeaderParamValue(const std::string& header,
1251 return GetHeaderParamValueT(field, param_name, quote_rule); 1060 const std::string& param_name,
1061 QuoteRule::Type quote_rule) {
1062 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value".
1063 std::string::const_iterator param_begin =
1064 search(header.begin(), header.end(), param_name.begin(), param_name.end(),
1065 base::CaseInsensitiveCompareASCII<char>());
1066
1067 if (param_begin == header.end())
1068 return std::string();
1069 param_begin += param_name.length();
1070
1071 std::string whitespace(" \t");
1072 size_t equals_offset =
1073 header.find_first_not_of(whitespace, param_begin - header.begin());
1074 if (equals_offset == std::string::npos || header[equals_offset] != '=')
1075 return std::string();
1076
1077 param_begin = header.begin() + equals_offset + 1;
1078 if (param_begin == header.end())
1079 return std::string();
1080
1081 std::string::const_iterator param_end;
1082 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) {
1083 ++param_begin; // skip past the quote.
1084 param_end = find(param_begin, header.end(), '"');
1085 // If the closing quote is missing, we will treat the rest of the
1086 // string as the parameter. We can't set |param_end| to the
1087 // location of the separator (';'), since the separator is
1088 // technically quoted. See: http://crbug.com/58840
1089 } else {
1090 param_end = find(param_begin + 1, header.end(), ';');
1091 }
1092
1093 return std::string(param_begin, param_end);
1252 } 1094 }
1253 1095
1254 std::string GetHeaderParamValue(const std::string& field, 1096 string16 IDNToUnicode(const char* host,
1255 const std::string& param_name, 1097 size_t host_len,
1256 QuoteRule::Type quote_rule) { 1098 const std::string& languages) {
1257 return GetHeaderParamValueT(field, param_name, quote_rule);
1258 }
1259
1260 // TODO(brettw) bug 734373: check the scripts for each host component and
1261 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for
1262 // scripts that the user has installed. For now, just put the entire
1263 // path through IDN. Maybe this feature can be implemented in ICU itself?
1264 //
1265 // We may want to skip this step in the case of file URLs to allow unicode
1266 // UNC hostnames regardless of encodings.
1267 std::wstring IDNToUnicodeWithOffsets(
1268 const char* host,
1269 size_t host_len,
1270 const std::wstring& languages,
1271 std::vector<size_t>* offsets_for_adjustment) {
1272 // Convert the ASCII input to a wide string for ICU.
1273 string16 input16;
1274 input16.reserve(host_len);
1275 input16.insert(input16.end(), host, host + host_len);
1276
1277 // Do each component of the host separately, since we enforce script matching
1278 // on a per-component basis.
1279 AdjustOffset::Adjustments adjustments;
1280 string16 out16;
1281 for (size_t component_start = 0, component_end;
1282 component_start < input16.length();
1283 component_start = component_end + 1) {
1284 // Find the end of the component.
1285 component_end = input16.find('.', component_start);
1286 if (component_end == string16::npos)
1287 component_end = input16.length(); // For getting the last component.
1288 size_t component_length = component_end - component_start;
1289 size_t new_component_start = out16.length();
1290 bool converted_idn = false;
1291 if (component_end > component_start) {
1292 // Add the substring that we just found.
1293 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start,
1294 component_length, languages, &out16);
1295 }
1296 size_t new_component_length = out16.length() - new_component_start;
1297
1298 if (converted_idn && offsets_for_adjustment) {
1299 adjustments.push_back(AdjustOffset::Adjustment(
1300 component_start, component_length, new_component_length));
1301 }
1302
1303 // Need to add the dot we just found (if we found one).
1304 if (component_end < input16.length())
1305 out16.push_back('.');
1306 }
1307
1308 // Make offset adjustment.
1309 if (offsets_for_adjustment && !adjustments.empty()) {
1310 std::for_each(offsets_for_adjustment->begin(),
1311 offsets_for_adjustment->end(),
1312 AdjustOffset(adjustments));
1313 }
1314
1315 return UTF16ToWideAndAdjustOffsets(out16, offsets_for_adjustment);
1316 }
1317
1318 std::wstring IDNToUnicode(const char* host,
1319 size_t host_len,
1320 const std::wstring& languages,
1321 size_t* offset_for_adjustment) {
1322 std::vector<size_t> offsets; 1099 std::vector<size_t> offsets;
1323 if (offset_for_adjustment) 1100 return IDNToUnicodeWithOffsets(host, host_len, languages, &offsets);
1324 offsets.push_back(*offset_for_adjustment);
1325 std::wstring result =
1326 IDNToUnicodeWithOffsets(host, host_len, languages, &offsets);
1327 if (offset_for_adjustment)
1328 *offset_for_adjustment = offsets[0];
1329 return result;
1330 } 1101 }
1331 1102
1332 std::string CanonicalizeHost(const std::string& host, 1103 std::string CanonicalizeHost(const std::string& host,
1333 url_canon::CanonHostInfo* host_info) { 1104 url_canon::CanonHostInfo* host_info) {
1334 // Try to canonicalize the host. 1105 // Try to canonicalize the host.
1335 const url_parse::Component raw_host_component( 1106 const url_parse::Component raw_host_component(
1336 0, static_cast<int>(host.length())); 1107 0, static_cast<int>(host.length()));
1337 std::string canon_host; 1108 std::string canon_host;
1338 url_canon::StdStringCanonOutput canon_host_output(&canon_host); 1109 url_canon::StdStringCanonOutput canon_host_output(&canon_host);
1339 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, 1110 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component,
1340 &canon_host_output, host_info); 1111 &canon_host_output, host_info);
1341 1112
1342 if (host_info->out_host.is_nonempty() && 1113 if (host_info->out_host.is_nonempty() &&
1343 host_info->family != url_canon::CanonHostInfo::BROKEN) { 1114 host_info->family != url_canon::CanonHostInfo::BROKEN) {
1344 // Success! Assert that there's no extra garbage. 1115 // Success! Assert that there's no extra garbage.
1345 canon_host_output.Complete(); 1116 canon_host_output.Complete();
1346 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length())); 1117 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length()));
1347 } else { 1118 } else {
1348 // Empty host, or canonicalization failed. We'll return empty. 1119 // Empty host, or canonicalization failed. We'll return empty.
1349 canon_host.clear(); 1120 canon_host.clear();
1350 } 1121 }
1351 1122
1352 return canon_host; 1123 return canon_host;
1353 } 1124 }
1354 1125
1355 std::string CanonicalizeHost(const std::wstring& host,
1356 url_canon::CanonHostInfo* host_info) {
1357 std::string converted_host;
1358 WideToUTF8(host.c_str(), host.length(), &converted_host);
1359 return CanonicalizeHost(converted_host, host_info);
1360 }
1361
1362 std::string GetDirectoryListingHeader(const string16& title) { 1126 std::string GetDirectoryListingHeader(const string16& title) {
1363 static const base::StringPiece header( 1127 static const base::StringPiece header(
1364 NetModule::GetResource(IDR_DIR_HEADER_HTML)); 1128 NetModule::GetResource(IDR_DIR_HEADER_HTML));
1365 // This can be null in unit tests. 1129 // This can be null in unit tests.
1366 DLOG_IF(WARNING, header.empty()) << 1130 DLOG_IF(WARNING, header.empty()) <<
1367 "Missing resource: directory listing header"; 1131 "Missing resource: directory listing header";
1368 1132
1369 std::string result; 1133 std::string result;
1370 if (!header.empty()) 1134 if (!header.empty())
1371 result.assign(header.data(), header.size()); 1135 result.assign(header.data(), header.size());
(...skipping 362 matching lines...) Expand 10 before | Expand all | Expand 10 after
1734 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); 1498 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL);
1735 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); 1499 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL);
1736 } 1500 }
1737 1501
1738 std::string GetHostOrSpecFromURL(const GURL& url) { 1502 std::string GetHostOrSpecFromURL(const GURL& url) {
1739 return url.has_host() ? TrimEndingDot(url.host()) : url.spec(); 1503 return url.has_host() ? TrimEndingDot(url.host()) : url.spec();
1740 } 1504 }
1741 1505
1742 void AppendFormattedHostWithOffsets( 1506 void AppendFormattedHostWithOffsets(
1743 const GURL& url, 1507 const GURL& url,
1744 const std::wstring& languages, 1508 const std::string& languages,
1745 std::wstring* output, 1509 string16* output,
1746 url_parse::Parsed* new_parsed, 1510 url_parse::Parsed* new_parsed,
1747 std::vector<size_t>* offsets_for_adjustment) { 1511 std::vector<size_t>* offsets_for_adjustment) {
1748 DCHECK(output); 1512 DCHECK(output);
1749 const url_parse::Component& host = 1513 const url_parse::Component& host =
1750 url.parsed_for_possibly_invalid_spec().host; 1514 url.parsed_for_possibly_invalid_spec().host;
1751 1515
1752 if (host.is_nonempty()) { 1516 if (host.is_nonempty()) {
1753 // Handle possible IDN in the host name. 1517 // Handle possible IDN in the host name.
1754 size_t host_begin = output->length(); 1518 size_t host_begin = output->length();
1755 if (new_parsed) 1519 if (new_parsed)
1756 new_parsed->host.begin = static_cast<int>(host_begin); 1520 new_parsed->host.begin = static_cast<int>(host_begin);
1757 size_t old_host_len = static_cast<size_t>(host.len); 1521 size_t old_host_len = static_cast<size_t>(host.len);
1758 1522
1759 // Compose a list of offsets within the host area. 1523 // Compose a list of offsets within the host area.
1760 std::vector<size_t> offsets_into_host = 1524 std::vector<size_t> offsets_into_host =
1761 OffsetsIntoSection(offsets_for_adjustment, host_begin); 1525 OffsetsIntoSection(offsets_for_adjustment, host_begin);
1762 1526
1763 const std::string& spec = url.possibly_invalid_spec(); 1527 const std::string& spec = url.possibly_invalid_spec();
1764 DCHECK(host.begin >= 0 && 1528 DCHECK(host.begin >= 0 &&
1765 ((spec.length() == 0 && host.begin == 0) || 1529 ((spec.length() == 0 && host.begin == 0) ||
1766 host.begin < static_cast<int>(spec.length()))); 1530 host.begin < static_cast<int>(spec.length())));
1767 output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len, 1531 output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len,
1768 languages, &offsets_into_host)); 1532 languages, &offsets_into_host));
1769 1533
1770 size_t new_host_len = output->length() - host_begin; 1534 size_t new_host_len = output->length() - host_begin;
1771 if (new_parsed) 1535 if (new_parsed)
1772 new_parsed->host.len = static_cast<int>(new_host_len); 1536 new_parsed->host.len = static_cast<int>(new_host_len);
1773 1537
1774 // Apply offset adjustments. 1538 // Apply offset adjustments.
1775 ApplySectionAdjustments(offsets_into_host, offsets_for_adjustment, 1539 ApplySectionAdjustments(offsets_into_host, offsets_for_adjustment,
1776 old_host_len, new_host_len, host_begin); 1540 old_host_len, new_host_len, host_begin);
1777 } else if (new_parsed) { 1541 } else if (new_parsed) {
1778 new_parsed->host.reset(); 1542 new_parsed->host.reset();
1779 } 1543 }
1780 } 1544 }
1781 1545
1782 void AppendFormattedHost(const GURL& url, 1546 void AppendFormattedHost(const GURL& url,
1783 const std::wstring& languages, 1547 const std::string& languages,
1784 std::wstring* output, 1548 string16* output,
1785 url_parse::Parsed* new_parsed, 1549 url_parse::Parsed* new_parsed,
1786 size_t* offset_for_adjustment) { 1550 size_t* offset_for_adjustment) {
1787 std::vector<size_t> offsets; 1551 std::vector<size_t> offsets;
1788 if (offset_for_adjustment) 1552 if (offset_for_adjustment)
1789 offsets.push_back(*offset_for_adjustment); 1553 offsets.push_back(*offset_for_adjustment);
1790 AppendFormattedHostWithOffsets(url, languages, output, new_parsed, &offsets); 1554 AppendFormattedHostWithOffsets(url, languages, output, new_parsed, &offsets);
1791 if (offset_for_adjustment) 1555 if (offset_for_adjustment)
1792 *offset_for_adjustment = offsets[0]; 1556 *offset_for_adjustment = offsets[0];
1793 } 1557 }
1794 1558
1795 // TODO(viettrungluu): convert the wstring |FormatUrlInternal()|.
1796 string16 FormatUrlWithOffsets(const GURL& url, 1559 string16 FormatUrlWithOffsets(const GURL& url,
1797 const std::string& languages, 1560 const std::string& languages,
1798 FormatUrlTypes format_types, 1561 FormatUrlTypes format_types,
1799 UnescapeRule::Type unescape_rules, 1562 UnescapeRule::Type unescape_rules,
1800 url_parse::Parsed* new_parsed, 1563 url_parse::Parsed* new_parsed,
1801 size_t* prefix_end, 1564 size_t* prefix_end,
1802 std::vector<size_t>* offsets_for_adjustment) { 1565 std::vector<size_t>* offsets_for_adjustment) {
1803 return WideToUTF16Hack( 1566 url_parse::Parsed parsed_temp;
brettw 2011/04/25 16:19:44 I'm assuming you just moved this code and changed
Peter Kasting 2011/04/25 17:44:52 The only non-trivial change was to change kHTTP fr
1804 FormatUrlInternal(url, ASCIIToWide(languages), format_types, 1567 if (!new_parsed)
1805 unescape_rules, new_parsed, prefix_end, 1568 new_parsed = &parsed_temp;
1806 offsets_for_adjustment)); 1569 else
1570 *new_parsed = url_parse::Parsed();
1571
1572 std::vector<size_t> offsets_temp;
1573 if (!offsets_for_adjustment)
1574 offsets_for_adjustment = &offsets_temp;
1575
1576 string16 url_string;
1577
1578 // Check for empty URLs or 0 available text width.
1579 if (url.is_empty()) {
1580 if (prefix_end)
1581 *prefix_end = 0;
1582 std::for_each(offsets_for_adjustment->begin(),
1583 offsets_for_adjustment->end(),
1584 LimitOffset<string16>(0));
1585 return url_string;
1586 }
1587
1588 // Special handling for view-source:. Don't use chrome::kViewSourceScheme
1589 // because this library shouldn't depend on chrome.
1590 const char* const kViewSource = "view-source";
1591 // Reject "view-source:view-source:..." to avoid deep recursion.
1592 const char* const kViewSourceTwice = "view-source:view-source:";
1593 if (url.SchemeIs(kViewSource) &&
1594 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {
1595 return FormatViewSourceUrl(url, languages, format_types,
1596 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);
1597 }
1598
1599 // We handle both valid and invalid URLs (this will give us the spec
1600 // regardless of validity).
1601 const std::string& spec = url.possibly_invalid_spec();
1602 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
1603 size_t spec_length = spec.length();
1604 std::for_each(offsets_for_adjustment->begin(),
1605 offsets_for_adjustment->end(),
1606 LimitOffset<string16>(spec_length));
1607
1608 // Copy everything before the username (the scheme and the separators.)
1609 // These are ASCII.
1610 url_string.insert(url_string.end(), spec.begin(),
1611 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME,
1612 true));
1613
1614 string16 kHTTP = ASCIIToUTF16("http://");
Avi (use Gerrit) 2011/04/25 17:52:07 eww. const char like kFTP below.
1615 const char kFTP[] = "ftp.";
1616 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This
1617 // means that if we trim "http://" off a URL whose host starts with "ftp." and
1618 // the user inputs this into any field subject to fixup (which is basically
1619 // all input fields), the meaning would be changed. (In fact, often the
1620 // formatted URL is directly pre-filled into an input field.) For this reason
1621 // we avoid stripping "http://" in this case.
1622 bool omit_http = (format_types & kFormatUrlOmitHTTP) &&
1623 (url_string == kHTTP) &&
1624 (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0);
Avi (use Gerrit) 2011/04/25 17:52:07 Can you use string_util's LowerCaseEqualsASCII?
brettw 2011/04/25 17:56:28 The host name will be canonicalized so this isn't
1625
1626 new_parsed->scheme = parsed.scheme;
1627
1628 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) {
1629 // Remove the username and password fields. We don't want to display those
1630 // to the user since they can be used for attacks,
1631 // e.g. "http://google.com:search@evil.ru/"
1632 new_parsed->username.reset();
1633 new_parsed->password.reset();
1634 // Update the offsets based on removed username and/or password.
1635 if (!offsets_for_adjustment->empty() &&
1636 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) {
1637 AdjustOffset::Adjustments adjustments;
1638 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {
1639 // The seeming off-by-one and off-by-two in these first two lines are to
1640 // account for the ':' after the username and '@' after the password.
1641 adjustments.push_back(AdjustOffset::Adjustment(
1642 static_cast<size_t>(parsed.username.begin),
1643 static_cast<size_t>(parsed.username.len + parsed.password.len +
1644 2), 0));
1645 } else {
1646 const url_parse::Component* nonempty_component =
1647 parsed.username.is_nonempty() ? &parsed.username : &parsed.password;
1648 // The seeming off-by-one in below is to account for the '@' after the
1649 // username/password.
1650 adjustments.push_back(AdjustOffset::Adjustment(
1651 static_cast<size_t>(nonempty_component->begin),
1652 static_cast<size_t>(nonempty_component->len + 1), 0));
1653 }
1654
1655 // Make offset adjustment.
1656 std::for_each(offsets_for_adjustment->begin(),
1657 offsets_for_adjustment->end(),
1658 AdjustOffset(adjustments));
1659 }
1660 } else {
1661 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string,
1662 &new_parsed->username, offsets_for_adjustment);
1663 if (parsed.password.is_valid())
1664 url_string.push_back(':');
1665 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string,
1666 &new_parsed->password, offsets_for_adjustment);
1667 if (parsed.username.is_valid() || parsed.password.is_valid())
1668 url_string.push_back('@');
1669 }
1670 if (prefix_end)
1671 *prefix_end = static_cast<size_t>(url_string.length());
1672
1673 AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed,
1674 offsets_for_adjustment);
1675
1676 // Port.
1677 if (parsed.port.is_nonempty()) {
1678 url_string.push_back(':');
1679 new_parsed->port.begin = url_string.length();
1680 url_string.insert(url_string.end(),
1681 spec.begin() + parsed.port.begin,
1682 spec.begin() + parsed.port.end());
1683 new_parsed->port.len = url_string.length() - new_parsed->port.begin;
1684 } else {
1685 new_parsed->port.reset();
1686 }
1687
1688 // Path and query both get the same general unescape & convert treatment.
1689 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) ||
1690 !CanStripTrailingSlash(url)) {
1691 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,
1692 &new_parsed->path, offsets_for_adjustment);
1693 }
1694 if (parsed.query.is_valid())
1695 url_string.push_back('?');
1696 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string,
1697 &new_parsed->query, offsets_for_adjustment);
1698
1699 // Reference is stored in valid, unescaped UTF-8, so we can just convert.
1700 if (parsed.ref.is_valid()) {
1701 url_string.push_back('#');
1702 size_t ref_begin = url_string.length();
1703 new_parsed->ref.begin = static_cast<int>(ref_begin);
1704
1705 // Compose a list of offsets within the section.
1706 std::vector<size_t> offsets_into_ref =
1707 OffsetsIntoSection(offsets_for_adjustment, ref_begin);
1708
1709 if (parsed.ref.len > 0) {
1710 url_string.append(UTF8ToUTF16AndAdjustOffsets(
1711 spec.substr(parsed.ref.begin, parsed.ref.len), &offsets_into_ref));
1712 }
1713 size_t old_ref_len = static_cast<size_t>(parsed.ref.len);
1714 size_t new_ref_len = url_string.length() - new_parsed->ref.begin;
1715 new_parsed->ref.len = static_cast<int>(new_ref_len);
1716
1717 // Apply offset adjustments.
1718 ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment,
1719 old_ref_len, new_ref_len, ref_begin);
1720 }
1721
1722 // If we need to strip out http do it after the fact. This way we don't need
1723 // to worry about how offset_for_adjustment is interpreted.
1724 const size_t kHTTPSize = kHTTP.length();
1725 if (omit_http && !url_string.compare(0, kHTTP.length(), kHTTP)) {
1726 url_string = url_string.substr(kHTTPSize);
1727 AdjustOffset::Adjustments adjustments;
1728 adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0));
1729 std::for_each(offsets_for_adjustment->begin(),
1730 offsets_for_adjustment->end(),
1731 AdjustOffset(adjustments));
1732 if (prefix_end)
1733 *prefix_end -= kHTTPSize;
1734
1735 // Adjust new_parsed.
1736 DCHECK(new_parsed->scheme.is_valid());
1737 int delta = -(new_parsed->scheme.len + 3); // +3 for ://.
1738 new_parsed->scheme.reset();
1739 AdjustComponents(delta, new_parsed);
1740 }
1741
1742 return url_string;
1807 } 1743 }
1808 1744
1809 string16 FormatUrl(const GURL& url, 1745 string16 FormatUrl(const GURL& url,
1810 const std::string& languages, 1746 const std::string& languages,
1811 FormatUrlTypes format_types, 1747 FormatUrlTypes format_types,
1812 UnescapeRule::Type unescape_rules, 1748 UnescapeRule::Type unescape_rules,
1813 url_parse::Parsed* new_parsed, 1749 url_parse::Parsed* new_parsed,
1814 size_t* prefix_end, 1750 size_t* prefix_end,
1815 size_t* offset_for_adjustment) { 1751 size_t* offset_for_adjustment) {
1816 std::vector<size_t> offsets; 1752 std::vector<size_t> offsets;
1817 if (offset_for_adjustment) 1753 if (offset_for_adjustment)
1818 offsets.push_back(*offset_for_adjustment); 1754 offsets.push_back(*offset_for_adjustment);
1819 string16 result = WideToUTF16Hack( 1755 string16 result = FormatUrlWithOffsets(url, languages, format_types,
1820 FormatUrlInternal(url, ASCIIToWide(languages), format_types, 1756 unescape_rules, new_parsed, prefix_end, &offsets);
1821 unescape_rules, new_parsed, prefix_end, &offsets));
1822 if (offset_for_adjustment) 1757 if (offset_for_adjustment)
1823 *offset_for_adjustment = offsets[0]; 1758 *offset_for_adjustment = offsets[0];
1824 return result; 1759 return result;
1825 } 1760 }
1826 1761
1827 bool CanStripTrailingSlash(const GURL& url) { 1762 bool CanStripTrailingSlash(const GURL& url) {
1828 // Omit the path only for standard, non-file URLs with nothing but "/" after 1763 // Omit the path only for standard, non-file URLs with nothing but "/" after
1829 // the hostname. 1764 // the hostname.
1830 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() && 1765 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() &&
1831 !url.has_ref() && url.path() == "/"; 1766 !url.has_ref() && url.path() == "/";
(...skipping 440 matching lines...) Expand 10 before | Expand all | Expand 10 after
2272 } 2207 }
2273 2208
2274 NetworkInterface::~NetworkInterface() { 2209 NetworkInterface::~NetworkInterface() {
2275 } 2210 }
2276 2211
2277 ClampComponentOffset::ClampComponentOffset(size_t component_start) 2212 ClampComponentOffset::ClampComponentOffset(size_t component_start)
2278 : component_start(component_start) {} 2213 : component_start(component_start) {}
2279 2214
2280 size_t ClampComponentOffset::operator()(size_t offset) { 2215 size_t ClampComponentOffset::operator()(size_t offset) {
2281 return (offset >= component_start) ? 2216 return (offset >= component_start) ?
2282 offset : std::wstring::npos; 2217 offset : string16::npos;
2283 } 2218 }
2284 2219
2285 } // namespace net 2220 } // namespace net
OLDNEW
« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698