OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
6 | 6 |
7 #include <unicode/regex.h> | 7 #include <unicode/regex.h> |
8 #include <unicode/ucnv.h> | 8 #include <unicode/ucnv.h> |
9 #include <unicode/uidna.h> | 9 #include <unicode/uidna.h> |
10 #include <unicode/ulocdata.h> | 10 #include <unicode/ulocdata.h> |
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
148 0xFFFF, // Used to block all invalid port numbers (see | 148 0xFFFF, // Used to block all invalid port numbers (see |
149 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port()) | 149 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port()) |
150 }; | 150 }; |
151 | 151 |
152 // FTP overrides the following restricted ports. | 152 // FTP overrides the following restricted ports. |
153 static const int kAllowedFtpPorts[] = { | 153 static const int kAllowedFtpPorts[] = { |
154 21, // ftp data | 154 21, // ftp data |
155 22, // ssh | 155 22, // ssh |
156 }; | 156 }; |
157 | 157 |
158 template<typename STR> | |
159 STR GetSpecificHeaderT(const STR& headers, const STR& name) { | |
160 // We want to grab the Value from the "Key: Value" pairs in the headers, | |
161 // which should look like this (no leading spaces, \n-separated) (we format | |
162 // them this way in url_request_inet.cc): | |
163 // HTTP/1.1 200 OK\n | |
164 // ETag: "6d0b8-947-24f35ec0"\n | |
165 // Content-Length: 2375\n | |
166 // Content-Type: text/html; charset=UTF-8\n | |
167 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n | |
168 if (headers.empty()) | |
169 return STR(); | |
170 | |
171 STR match; | |
172 match.push_back('\n'); | |
173 match.append(name); | |
174 match.push_back(':'); | |
175 | |
176 typename STR::const_iterator begin = | |
177 search(headers.begin(), headers.end(), match.begin(), match.end(), | |
178 base::CaseInsensitiveCompareASCII<typename STR::value_type>()); | |
179 | |
180 if (begin == headers.end()) | |
181 return STR(); | |
182 | |
183 begin += match.length(); | |
184 | |
185 typename STR::const_iterator end = find(begin, headers.end(), '\n'); | |
186 | |
187 STR ret; | |
188 TrimWhitespace(STR(begin, end), TRIM_ALL, &ret); | |
189 return ret; | |
190 } | |
191 | |
192 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence | 158 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence |
193 // of bytes. If input is invalid, return false. | 159 // of bytes. If input is invalid, return false. |
194 bool QPDecode(const std::string& input, std::string* output) { | 160 bool QPDecode(const std::string& input, std::string* output) { |
195 std::string temp; | 161 std::string temp; |
196 temp.reserve(input.size()); | 162 temp.reserve(input.size()); |
197 std::string::const_iterator it = input.begin(); | 163 std::string::const_iterator it = input.begin(); |
198 while (it != input.end()) { | 164 while (it != input.end()) { |
199 if (*it == '_') { | 165 if (*it == '_') { |
200 temp.push_back(' '); | 166 temp.push_back(' '); |
201 } else if (*it == '=') { | 167 } else if (*it == '=') { |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
269 *is_rfc2047 = false; | 235 *is_rfc2047 = false; |
270 output->clear(); | 236 output->clear(); |
271 if (encoded_word.empty()) | 237 if (encoded_word.empty()) |
272 return true; | 238 return true; |
273 | 239 |
274 if (!IsStringASCII(encoded_word)) { | 240 if (!IsStringASCII(encoded_word)) { |
275 // Try UTF-8, referrer_charset and the native OS default charset in turn. | 241 // Try UTF-8, referrer_charset and the native OS default charset in turn. |
276 if (IsStringUTF8(encoded_word)) { | 242 if (IsStringUTF8(encoded_word)) { |
277 *output = encoded_word; | 243 *output = encoded_word; |
278 } else { | 244 } else { |
279 std::wstring wide_output; | 245 string16 utf16_output; |
280 if (!referrer_charset.empty() && | 246 if (!referrer_charset.empty() && |
281 base::CodepageToWide(encoded_word, referrer_charset.c_str(), | 247 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), |
282 base::OnStringConversionError::FAIL, | 248 base::OnStringConversionError::FAIL, |
283 &wide_output)) { | 249 &utf16_output)) { |
284 *output = WideToUTF8(wide_output); | 250 *output = UTF16ToUTF8(utf16_output); |
285 } else { | 251 } else { |
286 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); | 252 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
287 } | 253 } |
288 } | 254 } |
289 | 255 |
290 return true; | 256 return true; |
291 } | 257 } |
292 | 258 |
293 // RFC 2047 : one of encoding methods supported by Firefox and relatively | 259 // RFC 2047 : one of encoding methods supported by Firefox and relatively |
294 // widely used by web servers. | 260 // widely used by web servers. |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
407 std::string decoded; | 373 std::string decoded; |
408 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, | 374 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, |
409 &decoded)) | 375 &decoded)) |
410 return false; | 376 return false; |
411 tmp.append(decoded); | 377 tmp.append(decoded); |
412 } | 378 } |
413 output->swap(tmp); | 379 output->swap(tmp); |
414 return true; | 380 return true; |
415 } | 381 } |
416 | 382 |
417 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm | |
418 // sure this doesn't properly handle all (most?) cases. | |
419 template<typename STR> | |
420 STR GetHeaderParamValueT(const STR& header, const STR& param_name, | |
421 QuoteRule::Type quote_rule) { | |
422 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". | |
423 typename STR::const_iterator param_begin = | |
424 search(header.begin(), header.end(), param_name.begin(), param_name.end(), | |
425 base::CaseInsensitiveCompareASCII<typename STR::value_type>()); | |
426 | |
427 if (param_begin == header.end()) | |
428 return STR(); | |
429 param_begin += param_name.length(); | |
430 | |
431 STR whitespace; | |
432 whitespace.push_back(' '); | |
433 whitespace.push_back('\t'); | |
434 const typename STR::size_type equals_offset = | |
435 header.find_first_not_of(whitespace, param_begin - header.begin()); | |
436 if (equals_offset == STR::npos || header.at(equals_offset) != '=') | |
437 return STR(); | |
438 | |
439 param_begin = header.begin() + equals_offset + 1; | |
440 if (param_begin == header.end()) | |
441 return STR(); | |
442 | |
443 typename STR::const_iterator param_end; | |
444 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) { | |
445 ++param_begin; // skip past the quote. | |
446 param_end = find(param_begin, header.end(), '"'); | |
447 // If the closing quote is missing, we will treat the rest of the | |
448 // string as the parameter. We can't set |param_end| to the | |
449 // location of the separator (';'), since the separator is | |
450 // technically quoted. See: http://crbug.com/58840 | |
451 } else { | |
452 param_end = find(param_begin+1, header.end(), ';'); | |
453 } | |
454 | |
455 return STR(param_begin, param_end); | |
456 } | |
457 | |
458 // Does some simple normalization of scripts so we can allow certain scripts | 383 // Does some simple normalization of scripts so we can allow certain scripts |
459 // to exist together. | 384 // to exist together. |
460 // TODO(brettw) bug 880223: we should allow some other languages to be | 385 // TODO(brettw) bug 880223: we should allow some other languages to be |
461 // oombined such as Chinese and Latin. We will probably need a more | 386 // oombined such as Chinese and Latin. We will probably need a more |
462 // complicated system of language pairs to have more fine-grained control. | 387 // complicated system of language pairs to have more fine-grained control. |
463 UScriptCode NormalizeScript(UScriptCode code) { | 388 UScriptCode NormalizeScript(UScriptCode code) { |
464 switch (code) { | 389 switch (code) { |
465 case USCRIPT_KATAKANA: | 390 case USCRIPT_KATAKANA: |
466 case USCRIPT_HIRAGANA: | 391 case USCRIPT_HIRAGANA: |
467 case USCRIPT_KATAKANA_OR_HIRAGANA: | 392 case USCRIPT_KATAKANA_OR_HIRAGANA: |
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
586 ulocdata_close(uld); | 511 ulocdata_close(uld); |
587 } | 512 } |
588 } | 513 } |
589 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); | 514 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); |
590 } | 515 } |
591 | 516 |
592 // Returns true if the given Unicode host component is safe to display to the | 517 // Returns true if the given Unicode host component is safe to display to the |
593 // user. | 518 // user. |
594 bool IsIDNComponentSafe(const char16* str, | 519 bool IsIDNComponentSafe(const char16* str, |
595 int str_len, | 520 int str_len, |
596 const std::wstring& languages) { | 521 const std::string& languages) { |
597 // Most common cases (non-IDN) do not reach here so that we don't | 522 // Most common cases (non-IDN) do not reach here so that we don't |
598 // need a fast return path. | 523 // need a fast return path. |
599 // TODO(jungshik) : Check if there's any character inappropriate | 524 // TODO(jungshik) : Check if there's any character inappropriate |
600 // (although allowed) for domain names. | 525 // (although allowed) for domain names. |
601 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and | 526 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and |
602 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt | 527 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt |
603 // For now, we borrow the list from Mozilla and tweaked it slightly. | 528 // For now, we borrow the list from Mozilla and tweaked it slightly. |
604 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because | 529 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because |
605 // they're gonna be canonicalized to U+0020 and full stop before | 530 // they're gonna be canonicalized to U+0020 and full stop before |
606 // reaching here.) | 531 // reaching here.) |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
670 // (sync'd with characters allowed in url_canon_host with square | 595 // (sync'd with characters allowed in url_canon_host with square |
671 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. | 596 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. |
672 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), | 597 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), |
673 status); | 598 status); |
674 DCHECK(U_SUCCESS(status)); | 599 DCHECK(U_SUCCESS(status)); |
675 // Subtract common characters because they're always allowed so that | 600 // Subtract common characters because they're always allowed so that |
676 // we just have to check if a language-specific set contains | 601 // we just have to check if a language-specific set contains |
677 // the remainder. | 602 // the remainder. |
678 component_characters.removeAll(common_characters); | 603 component_characters.removeAll(common_characters); |
679 | 604 |
680 std::string languages_list(WideToASCII(languages)); | 605 StringTokenizer t(languages, ","); |
681 StringTokenizer t(languages_list, ","); | |
682 while (t.GetNext()) { | 606 while (t.GetNext()) { |
683 if (IsComponentCoveredByLang(component_characters, t.token())) | 607 if (IsComponentCoveredByLang(component_characters, t.token())) |
684 return true; | 608 return true; |
685 } | 609 } |
686 return false; | 610 return false; |
687 } | 611 } |
688 | 612 |
689 // Converts one component of a host (between dots) to IDN if safe. The result | 613 // Converts one component of a host (between dots) to IDN if safe. The result |
690 // will be APPENDED to the given output string and will be the same as the input | 614 // will be APPENDED to the given output string and will be the same as the input |
691 // if it is not IDN or the IDN is unsafe to display. Returns whether any | 615 // if it is not IDN or the IDN is unsafe to display. Returns whether any |
692 // conversion was performed. | 616 // conversion was performed. |
693 bool IDNToUnicodeOneComponent(const char16* comp, | 617 bool IDNToUnicodeOneComponent(const char16* comp, |
694 size_t comp_len, | 618 size_t comp_len, |
695 const std::wstring& languages, | 619 const std::string& languages, |
696 string16* out) { | 620 string16* out) { |
697 DCHECK(out); | 621 DCHECK(out); |
698 if (comp_len == 0) | 622 if (comp_len == 0) |
699 return false; | 623 return false; |
700 | 624 |
701 // Only transform if the input can be an IDN component. | 625 // Only transform if the input can be an IDN component. |
702 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; | 626 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; |
703 if ((comp_len > arraysize(kIdnPrefix)) && | 627 if ((comp_len > arraysize(kIdnPrefix)) && |
704 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) { | 628 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) { |
705 // Repeatedly expand the output string until it's big enough. It looks like | 629 // Repeatedly expand the output string until it's big enough. It looks like |
(...skipping 21 matching lines...) Expand all Loading... | |
727 // Failed, revert back to original string. | 651 // Failed, revert back to original string. |
728 out->resize(original_length); | 652 out->resize(original_length); |
729 } | 653 } |
730 | 654 |
731 // We get here with no IDN or on error, in which case we just append the | 655 // We get here with no IDN or on error, in which case we just append the |
732 // literal input. | 656 // literal input. |
733 out->append(comp, comp_len); | 657 out->append(comp, comp_len); |
734 return false; | 658 return false; |
735 } | 659 } |
736 | 660 |
661 // TODO(brettw) bug 734373: check the scripts for each host component and | |
662 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for | |
663 // scripts that the user has installed. For now, just put the entire | |
664 // path through IDN. Maybe this feature can be implemented in ICU itself? | |
665 // | |
666 // We may want to skip this step in the case of file URLs to allow unicode | |
667 // UNC hostnames regardless of encodings. | |
668 string16 IDNToUnicodeWithOffsets( | |
669 const char* host, | |
670 size_t host_len, | |
671 const std::string& languages, | |
672 std::vector<size_t>* offsets_for_adjustment) { | |
673 // Convert the ASCII input to a string16 for ICU. | |
674 string16 input16; | |
675 input16.reserve(host_len); | |
676 input16.insert(input16.end(), host, host + host_len); | |
677 | |
678 // Do each component of the host separately, since we enforce script matching | |
679 // on a per-component basis. | |
680 AdjustOffset::Adjustments adjustments; | |
681 string16 out16; | |
682 for (size_t component_start = 0, component_end; | |
683 component_start < input16.length(); | |
684 component_start = component_end + 1) { | |
685 // Find the end of the component. | |
686 component_end = input16.find('.', component_start); | |
687 if (component_end == string16::npos) | |
688 component_end = input16.length(); // For getting the last component. | |
689 size_t component_length = component_end - component_start; | |
690 size_t new_component_start = out16.length(); | |
691 bool converted_idn = false; | |
692 if (component_end > component_start) { | |
693 // Add the substring that we just found. | |
694 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, | |
695 component_length, languages, &out16); | |
696 } | |
697 size_t new_component_length = out16.length() - new_component_start; | |
698 | |
699 if (converted_idn && offsets_for_adjustment) { | |
700 adjustments.push_back(AdjustOffset::Adjustment( | |
701 component_start, component_length, new_component_length)); | |
702 } | |
703 | |
704 // Need to add the dot we just found (if we found one). | |
705 if (component_end < input16.length()) | |
706 out16.push_back('.'); | |
707 } | |
708 | |
709 // Make offset adjustment. | |
710 if (offsets_for_adjustment) { | |
711 if (!adjustments.empty()) { | |
712 std::for_each(offsets_for_adjustment->begin(), | |
713 offsets_for_adjustment->end(), | |
714 AdjustOffset(adjustments)); | |
715 } | |
716 std::for_each(offsets_for_adjustment->begin(), | |
717 offsets_for_adjustment->end(), | |
718 LimitOffset<string16>(out16.length())); | |
719 } | |
720 | |
721 return out16; | |
722 } | |
723 | |
737 struct SubtractFromOffset { | 724 struct SubtractFromOffset { |
738 explicit SubtractFromOffset(size_t amount) | 725 explicit SubtractFromOffset(size_t amount) |
739 : amount(amount) {} | 726 : amount(amount) {} |
740 void operator()(size_t& offset) { | 727 void operator()(size_t& offset) { |
741 if (offset != std::wstring::npos) { | 728 if (offset != string16::npos) { |
742 if (offset >= amount) | 729 if (offset >= amount) |
743 offset -= amount; | 730 offset -= amount; |
744 else | 731 else |
745 offset = std::wstring::npos; | 732 offset = string16::npos; |
746 } | 733 } |
747 } | 734 } |
748 | 735 |
749 size_t amount; | 736 size_t amount; |
750 }; | 737 }; |
751 | 738 |
752 struct AddToOffset { | 739 struct AddToOffset { |
753 explicit AddToOffset(size_t amount) | 740 explicit AddToOffset(size_t amount) |
754 : amount(amount) {} | 741 : amount(amount) {} |
755 void operator()(size_t& offset) { | 742 void operator()(size_t& offset) { |
756 if (offset != std::wstring::npos) | 743 if (offset != string16::npos) |
757 offset += amount; | 744 offset += amount; |
758 } | 745 } |
759 | 746 |
760 size_t amount; | 747 size_t amount; |
761 }; | 748 }; |
762 | 749 |
763 std::vector<size_t> OffsetsIntoSection( | 750 std::vector<size_t> OffsetsIntoSection( |
764 std::vector<size_t>* offsets_for_adjustment, | 751 std::vector<size_t>* offsets_for_adjustment, |
765 size_t section_begin) { | 752 size_t section_begin) { |
766 std::vector<size_t> offsets_into_section; | 753 std::vector<size_t> offsets_into_section; |
(...skipping 15 matching lines...) Expand all Loading... | |
782 size_t section_begin) { | 769 size_t section_begin) { |
783 if (offsets_for_adjustment) { | 770 if (offsets_for_adjustment) { |
784 DCHECK_EQ(offsets_for_adjustment->size(), offsets_into_section.size()); | 771 DCHECK_EQ(offsets_for_adjustment->size(), offsets_into_section.size()); |
785 std::vector<size_t>::const_iterator host_offsets_iter = | 772 std::vector<size_t>::const_iterator host_offsets_iter = |
786 offsets_into_section.begin(); | 773 offsets_into_section.begin(); |
787 for (std::vector<size_t>::iterator offsets_iter = | 774 for (std::vector<size_t>::iterator offsets_iter = |
788 offsets_for_adjustment->begin(); | 775 offsets_for_adjustment->begin(); |
789 offsets_iter != offsets_for_adjustment->end(); | 776 offsets_iter != offsets_for_adjustment->end(); |
790 ++offsets_iter, ++host_offsets_iter) { | 777 ++offsets_iter, ++host_offsets_iter) { |
791 size_t offset = *offsets_iter; | 778 size_t offset = *offsets_iter; |
792 if (offset == std::wstring::npos || offset < section_begin) { | 779 if (offset == string16::npos || offset < section_begin) { |
793 // The offset is before the host section so leave it as is. | 780 // The offset is before the host section so leave it as is. |
794 continue; | 781 continue; |
795 } | 782 } |
796 if (offset >= section_begin + old_section_len) { | 783 if (offset >= section_begin + old_section_len) { |
797 // The offset is after the host section so adjust by host length delta. | 784 // The offset is after the host section so adjust by host length delta. |
798 offset += new_section_len - old_section_len; | 785 offset += new_section_len - old_section_len; |
799 } else if (*host_offsets_iter != std::wstring::npos) { | 786 } else if (*host_offsets_iter != string16::npos) { |
800 // The offset is within the host and valid so adjust by the host | 787 // The offset is within the host and valid so adjust by the host |
801 // reformatting offsets results. | 788 // reformatting offsets results. |
802 offset = section_begin + *host_offsets_iter; | 789 offset = section_begin + *host_offsets_iter; |
803 } else { | 790 } else { |
804 // The offset is invalid. | 791 // The offset is invalid. |
805 offset = std::wstring::npos; | 792 offset = string16::npos; |
806 } | 793 } |
807 *offsets_iter = offset; | 794 *offsets_iter = offset; |
808 } | 795 } |
809 } | 796 } |
810 } | 797 } |
811 | 798 |
812 // If |component| is valid, its begin is incremented by |delta|. | 799 // If |component| is valid, its begin is incremented by |delta|. |
813 void AdjustComponent(int delta, url_parse::Component* component) { | 800 void AdjustComponent(int delta, url_parse::Component* component) { |
814 if (!component->is_valid()) | 801 if (!component->is_valid()) |
815 return; | 802 return; |
816 | 803 |
817 DCHECK(delta >= 0 || component->begin >= -delta); | 804 DCHECK(delta >= 0 || component->begin >= -delta); |
818 component->begin += delta; | 805 component->begin += delta; |
819 } | 806 } |
820 | 807 |
821 // Adjusts all the components of |parsed| by |delta|, except for the scheme. | 808 // Adjusts all the components of |parsed| by |delta|, except for the scheme. |
822 void AdjustComponents(int delta, url_parse::Parsed* parsed) { | 809 void AdjustComponents(int delta, url_parse::Parsed* parsed) { |
823 AdjustComponent(delta, &(parsed->username)); | 810 AdjustComponent(delta, &(parsed->username)); |
824 AdjustComponent(delta, &(parsed->password)); | 811 AdjustComponent(delta, &(parsed->password)); |
825 AdjustComponent(delta, &(parsed->host)); | 812 AdjustComponent(delta, &(parsed->host)); |
826 AdjustComponent(delta, &(parsed->port)); | 813 AdjustComponent(delta, &(parsed->port)); |
827 AdjustComponent(delta, &(parsed->path)); | 814 AdjustComponent(delta, &(parsed->path)); |
828 AdjustComponent(delta, &(parsed->query)); | 815 AdjustComponent(delta, &(parsed->query)); |
829 AdjustComponent(delta, &(parsed->ref)); | 816 AdjustComponent(delta, &(parsed->ref)); |
830 } | 817 } |
831 | 818 |
832 std::wstring FormatUrlInternal(const GURL& url, | 819 // Helper for FormatUrlWithOffsets(). |
833 const std::wstring& languages, | 820 string16 FormatViewSourceUrl(const GURL& url, |
834 FormatUrlTypes format_types, | 821 const std::string& languages, |
835 UnescapeRule::Type unescape_rules, | 822 FormatUrlTypes format_types, |
836 url_parse::Parsed* new_parsed, | 823 UnescapeRule::Type unescape_rules, |
837 size_t* prefix_end, | 824 url_parse::Parsed* new_parsed, |
838 std::vector<size_t>* offsets_for_adjustment); | 825 size_t* prefix_end, |
839 | 826 std::vector<size_t>* offsets_for_adjustment) { |
840 // Helper for FormatUrl()/FormatUrlInternal(). | |
841 std::wstring FormatViewSourceUrl(const GURL& url, | |
842 const std::wstring& languages, | |
843 FormatUrlTypes format_types, | |
844 UnescapeRule::Type unescape_rules, | |
845 url_parse::Parsed* new_parsed, | |
846 size_t* prefix_end, | |
847 std::vector<size_t>* offsets_for_adjustment) { | |
848 DCHECK(new_parsed); | 827 DCHECK(new_parsed); |
849 DCHECK(offsets_for_adjustment); | 828 DCHECK(offsets_for_adjustment); |
850 const wchar_t* const kWideViewSource = L"view-source:"; | 829 const char kViewSource[] = "view-source:"; |
851 const size_t kViewSourceLengthPlus1 = 12; | 830 const size_t kViewSourceLength = arraysize(kViewSource) - 1; |
852 std::vector<size_t> saved_offsets(*offsets_for_adjustment); | 831 std::vector<size_t> saved_offsets(*offsets_for_adjustment); |
853 | 832 |
854 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); | 833 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength)); |
855 // Clamp the offsets to the source area. | 834 // Clamp the offsets to the source area. |
856 std::for_each(offsets_for_adjustment->begin(), | 835 std::for_each(offsets_for_adjustment->begin(), |
857 offsets_for_adjustment->end(), | 836 offsets_for_adjustment->end(), |
858 SubtractFromOffset(kViewSourceLengthPlus1)); | 837 SubtractFromOffset(kViewSourceLength)); |
859 std::wstring result = FormatUrlInternal(real_url, languages, format_types, | 838 string16 result = FormatUrlWithOffsets(real_url, languages, format_types, |
860 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); | 839 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); |
861 result.insert(0, kWideViewSource); | 840 result.insert(0, ASCIIToUTF16(kViewSource)); |
862 | 841 |
863 // Adjust position values. | 842 // Adjust position values. |
864 if (new_parsed->scheme.is_nonempty()) { | 843 if (new_parsed->scheme.is_nonempty()) { |
865 // Assume "view-source:real-scheme" as a scheme. | 844 // Assume "view-source:real-scheme" as a scheme. |
866 new_parsed->scheme.len += kViewSourceLengthPlus1; | 845 new_parsed->scheme.len += kViewSourceLength; |
867 } else { | 846 } else { |
868 new_parsed->scheme.begin = 0; | 847 new_parsed->scheme.begin = 0; |
869 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1; | 848 new_parsed->scheme.len = kViewSourceLength - 1; |
870 } | 849 } |
871 AdjustComponents(kViewSourceLengthPlus1, new_parsed); | 850 AdjustComponents(kViewSourceLength, new_parsed); |
872 if (prefix_end) | 851 if (prefix_end) |
873 *prefix_end += kViewSourceLengthPlus1; | 852 *prefix_end += kViewSourceLength; |
874 std::for_each(offsets_for_adjustment->begin(), | 853 std::for_each(offsets_for_adjustment->begin(), |
875 offsets_for_adjustment->end(), | 854 offsets_for_adjustment->end(), |
876 AddToOffset(kViewSourceLengthPlus1)); | 855 AddToOffset(kViewSourceLength)); |
877 // Restore all offsets which were not affected by FormatUrlInternal. | 856 // Restore all offsets which were not affected by FormatUrlWithOffsets(). |
878 DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size()); | 857 DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size()); |
879 for (size_t i = 0; i < saved_offsets.size(); ++i) { | 858 for (size_t i = 0; i < saved_offsets.size(); ++i) { |
880 if (saved_offsets[i] < kViewSourceLengthPlus1) | 859 if (saved_offsets[i] < kViewSourceLength) |
881 (*offsets_for_adjustment)[i] = saved_offsets[i]; | 860 (*offsets_for_adjustment)[i] = saved_offsets[i]; |
882 } | 861 } |
883 return result; | 862 return result; |
884 } | 863 } |
885 | 864 |
886 // Appends the substring |in_component| inside of the URL |spec| to |output|, | 865 // Appends the substring |in_component| inside of the URL |spec| to |output|, |
887 // and the resulting range will be filled into |out_component|. |unescape_rules| | 866 // and the resulting range will be filled into |out_component|. |unescape_rules| |
888 // defines how to clean the URL for human readability. |offsets_for_adjustment| | 867 // defines how to clean the URL for human readability. |offsets_for_adjustment| |
889 // is an array of offsets into |output| each of which will be adjusted based on | 868 // is an array of offsets into |output| each of which will be adjusted based on |
890 // how it maps to the component being converted; if it is less than | 869 // how it maps to the component being converted; if it is less than |
891 // output->length(), it will be untouched, and if it is greater than | 870 // output->length(), it will be untouched, and if it is greater than |
892 // output->length() + in_component.len it will be adjusted by the difference in | 871 // output->length() + in_component.len it will be adjusted by the difference in |
893 // lengths between the input and output components. Otherwise it points into | 872 // lengths between the input and output components. Otherwise it points into |
894 // the component being converted, and is adjusted to point to the same logical | 873 // the component being converted, and is adjusted to point to the same logical |
895 // place in |output|. |offsets_for_adjustment| may not be NULL. | 874 // place in |output|. |offsets_for_adjustment| may not be NULL. |
896 void AppendFormattedComponent(const std::string& spec, | 875 void AppendFormattedComponent(const std::string& spec, |
897 const url_parse::Component& in_component, | 876 const url_parse::Component& in_component, |
898 UnescapeRule::Type unescape_rules, | 877 UnescapeRule::Type unescape_rules, |
899 std::wstring* output, | 878 string16* output, |
900 url_parse::Component* out_component, | 879 url_parse::Component* out_component, |
901 std::vector<size_t>* offsets_for_adjustment) { | 880 std::vector<size_t>* offsets_for_adjustment) { |
902 DCHECK(output); | 881 DCHECK(output); |
903 DCHECK(offsets_for_adjustment); | 882 DCHECK(offsets_for_adjustment); |
904 if (in_component.is_nonempty()) { | 883 if (in_component.is_nonempty()) { |
905 size_t component_begin = output->length(); | 884 size_t component_begin = output->length(); |
906 out_component->begin = static_cast<int>(component_begin); | 885 out_component->begin = static_cast<int>(component_begin); |
907 | 886 |
908 // Compose a list of offsets within the component area. | 887 // Compose a list of offsets within the component area. |
909 std::vector<size_t> offsets_into_component = | 888 std::vector<size_t> offsets_into_component = |
910 OffsetsIntoSection(offsets_for_adjustment, component_begin); | 889 OffsetsIntoSection(offsets_for_adjustment, component_begin); |
911 | 890 |
912 if (unescape_rules == UnescapeRule::NONE) { | 891 if (unescape_rules == UnescapeRule::NONE) { |
913 output->append(UTF8ToWideAndAdjustOffsets( | 892 output->append(UTF8ToUTF16AndAdjustOffsets( |
914 spec.substr(in_component.begin, in_component.len), | 893 spec.substr(in_component.begin, in_component.len), |
915 &offsets_into_component)); | 894 &offsets_into_component)); |
916 } else { | 895 } else { |
917 output->append(UTF16ToWideHack( | 896 output->append(UnescapeAndDecodeUTF8URLComponentWithOffsets( |
918 UnescapeAndDecodeUTF8URLComponentWithOffsets( | 897 spec.substr(in_component.begin, in_component.len), unescape_rules, |
919 spec.substr(in_component.begin, in_component.len), unescape_rules, | 898 &offsets_into_component)); |
920 &offsets_into_component))); | |
921 } | 899 } |
922 size_t new_component_len = output->length() - component_begin; | 900 size_t new_component_len = output->length() - component_begin; |
923 out_component->len = static_cast<int>(new_component_len); | 901 out_component->len = static_cast<int>(new_component_len); |
924 | 902 |
925 // Apply offset adjustments. | 903 // Apply offset adjustments. |
926 size_t old_component_len = static_cast<size_t>(in_component.len); | 904 size_t old_component_len = static_cast<size_t>(in_component.len); |
927 ApplySectionAdjustments(offsets_into_component, offsets_for_adjustment, | 905 ApplySectionAdjustments(offsets_into_component, offsets_for_adjustment, |
928 old_component_len, new_component_len, component_begin); | 906 old_component_len, new_component_len, component_begin); |
929 } else { | 907 } else { |
930 out_component->reset(); | 908 out_component->reset(); |
931 } | 909 } |
932 } | 910 } |
933 | 911 |
934 // TODO(viettrungluu): This is really the old-fashioned version, made internal. | |
935 // I need to really convert |FormatUrl()|. | |
936 std::wstring FormatUrlInternal(const GURL& url, | |
937 const std::wstring& languages, | |
938 FormatUrlTypes format_types, | |
939 UnescapeRule::Type unescape_rules, | |
940 url_parse::Parsed* new_parsed, | |
941 size_t* prefix_end, | |
942 std::vector<size_t>* offsets_for_adjustment) { | |
943 url_parse::Parsed parsed_temp; | |
944 if (!new_parsed) | |
945 new_parsed = &parsed_temp; | |
946 else | |
947 *new_parsed = url_parse::Parsed(); | |
948 | |
949 std::vector<size_t> offsets_temp; | |
950 if (!offsets_for_adjustment) | |
951 offsets_for_adjustment = &offsets_temp; | |
952 | |
953 std::wstring url_string; | |
954 | |
955 // Check for empty URLs or 0 available text width. | |
956 if (url.is_empty()) { | |
957 if (prefix_end) | |
958 *prefix_end = 0; | |
959 std::for_each(offsets_for_adjustment->begin(), | |
960 offsets_for_adjustment->end(), | |
961 LimitOffset<std::wstring>(0)); | |
962 return url_string; | |
963 } | |
964 | |
965 // Special handling for view-source:. Don't use chrome::kViewSourceScheme | |
966 // because this library shouldn't depend on chrome. | |
967 const char* const kViewSource = "view-source"; | |
968 // Reject "view-source:view-source:..." to avoid deep recursion. | |
969 const char* const kViewSourceTwice = "view-source:view-source:"; | |
970 if (url.SchemeIs(kViewSource) && | |
971 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { | |
972 return FormatViewSourceUrl(url, languages, format_types, | |
973 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); | |
974 } | |
975 | |
976 // We handle both valid and invalid URLs (this will give us the spec | |
977 // regardless of validity). | |
978 const std::string& spec = url.possibly_invalid_spec(); | |
979 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); | |
980 size_t spec_length = spec.length(); | |
981 std::for_each(offsets_for_adjustment->begin(), | |
982 offsets_for_adjustment->end(), | |
983 LimitOffset<std::wstring>(spec_length)); | |
984 | |
985 // Copy everything before the username (the scheme and the separators.) | |
986 // These are ASCII. | |
987 url_string.insert(url_string.end(), spec.begin(), | |
988 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, | |
989 true)); | |
990 | |
991 const wchar_t kHTTP[] = L"http://"; | |
992 const char kFTP[] = "ftp."; | |
993 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This | |
994 // means that if we trim "http://" off a URL whose host starts with "ftp." and | |
995 // the user inputs this into any field subject to fixup (which is basically | |
996 // all input fields), the meaning would be changed. (In fact, often the | |
997 // formatted URL is directly pre-filled into an input field.) For this reason | |
998 // we avoid stripping "http://" in this case. | |
999 bool omit_http = | |
1000 (format_types & kFormatUrlOmitHTTP) && (url_string == kHTTP) && | |
1001 (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0); | |
1002 | |
1003 new_parsed->scheme = parsed.scheme; | |
1004 | |
1005 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { | |
1006 // Remove the username and password fields. We don't want to display those | |
1007 // to the user since they can be used for attacks, | |
1008 // e.g. "http://google.com:search@evil.ru/" | |
1009 new_parsed->username.reset(); | |
1010 new_parsed->password.reset(); | |
1011 // Update the offsets based on removed username and/or password. | |
1012 if (!offsets_for_adjustment->empty() && | |
1013 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { | |
1014 AdjustOffset::Adjustments adjustments; | |
1015 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { | |
1016 // The seeming off-by-one and off-by-two in these first two lines are to | |
1017 // account for the ':' after the username and '@' after the password. | |
1018 adjustments.push_back(AdjustOffset::Adjustment( | |
1019 static_cast<size_t>(parsed.username.begin), | |
1020 static_cast<size_t>(parsed.username.len + parsed.password.len + | |
1021 2), 0)); | |
1022 } else { | |
1023 const url_parse::Component* nonempty_component = | |
1024 parsed.username.is_nonempty() ? &parsed.username : &parsed.password; | |
1025 // The seeming off-by-one in below is to account for the '@' after the | |
1026 // username/password. | |
1027 adjustments.push_back(AdjustOffset::Adjustment( | |
1028 static_cast<size_t>(nonempty_component->begin), | |
1029 static_cast<size_t>(nonempty_component->len + 1), 0)); | |
1030 } | |
1031 | |
1032 // Make offset adjustment. | |
1033 std::for_each(offsets_for_adjustment->begin(), | |
1034 offsets_for_adjustment->end(), | |
1035 AdjustOffset(adjustments)); | |
1036 } | |
1037 } else { | |
1038 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string, | |
1039 &new_parsed->username, offsets_for_adjustment); | |
1040 if (parsed.password.is_valid()) | |
1041 url_string.push_back(':'); | |
1042 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string, | |
1043 &new_parsed->password, offsets_for_adjustment); | |
1044 if (parsed.username.is_valid() || parsed.password.is_valid()) | |
1045 url_string.push_back('@'); | |
1046 } | |
1047 if (prefix_end) | |
1048 *prefix_end = static_cast<size_t>(url_string.length()); | |
1049 | |
1050 AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed, | |
1051 offsets_for_adjustment); | |
1052 | |
1053 // Port. | |
1054 if (parsed.port.is_nonempty()) { | |
1055 url_string.push_back(':'); | |
1056 new_parsed->port.begin = url_string.length(); | |
1057 url_string.insert(url_string.end(), | |
1058 spec.begin() + parsed.port.begin, | |
1059 spec.begin() + parsed.port.end()); | |
1060 new_parsed->port.len = url_string.length() - new_parsed->port.begin; | |
1061 } else { | |
1062 new_parsed->port.reset(); | |
1063 } | |
1064 | |
1065 // Path and query both get the same general unescape & convert treatment. | |
1066 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || | |
1067 !CanStripTrailingSlash(url)) { | |
1068 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string, | |
1069 &new_parsed->path, offsets_for_adjustment); | |
1070 } | |
1071 if (parsed.query.is_valid()) | |
1072 url_string.push_back('?'); | |
1073 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string, | |
1074 &new_parsed->query, offsets_for_adjustment); | |
1075 | |
1076 // Reference is stored in valid, unescaped UTF-8, so we can just convert. | |
1077 if (parsed.ref.is_valid()) { | |
1078 url_string.push_back('#'); | |
1079 size_t ref_begin = url_string.length(); | |
1080 new_parsed->ref.begin = static_cast<int>(ref_begin); | |
1081 | |
1082 // Compose a list of offsets within the section. | |
1083 std::vector<size_t> offsets_into_ref = | |
1084 OffsetsIntoSection(offsets_for_adjustment, ref_begin); | |
1085 | |
1086 if (parsed.ref.len > 0) { | |
1087 url_string.append(UTF8ToWideAndAdjustOffsets(spec.substr(parsed.ref.begin, | |
1088 parsed.ref.len), | |
1089 &offsets_into_ref)); | |
1090 } | |
1091 size_t old_ref_len = static_cast<size_t>(parsed.ref.len); | |
1092 size_t new_ref_len = url_string.length() - new_parsed->ref.begin; | |
1093 new_parsed->ref.len = static_cast<int>(new_ref_len); | |
1094 | |
1095 // Apply offset adjustments. | |
1096 ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment, | |
1097 old_ref_len, new_ref_len, ref_begin); | |
1098 } | |
1099 | |
1100 // If we need to strip out http do it after the fact. This way we don't need | |
1101 // to worry about how offset_for_adjustment is interpreted. | |
1102 const size_t kHTTPSize = arraysize(kHTTP) - 1; | |
1103 if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) { | |
1104 url_string = url_string.substr(kHTTPSize); | |
1105 AdjustOffset::Adjustments adjustments; | |
1106 adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0)); | |
1107 std::for_each(offsets_for_adjustment->begin(), | |
1108 offsets_for_adjustment->end(), | |
1109 AdjustOffset(adjustments)); | |
1110 if (prefix_end) | |
1111 *prefix_end -= kHTTPSize; | |
1112 | |
1113 // Adjust new_parsed. | |
1114 DCHECK(new_parsed->scheme.is_valid()); | |
1115 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. | |
1116 new_parsed->scheme.reset(); | |
1117 AdjustComponents(delta, new_parsed); | |
1118 } | |
1119 | |
1120 return url_string; | |
1121 } | |
1122 | |
1123 } // namespace | 912 } // namespace |
1124 | 913 |
1125 const FormatUrlType kFormatUrlOmitNothing = 0; | 914 const FormatUrlType kFormatUrlOmitNothing = 0; |
1126 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; | 915 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; |
1127 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; | 916 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; |
1128 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; | 917 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; |
1129 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | | 918 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | |
1130 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; | 919 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; |
1131 | 920 |
1132 // TODO(viettrungluu): We don't want non-POD globals; change this. | 921 // TODO(viettrungluu): We don't want non-POD globals; change this. |
(...skipping 23 matching lines...) Expand all Loading... | |
1156 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); | 945 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); |
1157 | 946 |
1158 #if defined(OS_POSIX) | 947 #if defined(OS_POSIX) |
1159 ReplaceSubstringsAfterOffset(&url_string, 0, | 948 ReplaceSubstringsAfterOffset(&url_string, 0, |
1160 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); | 949 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); |
1161 #endif | 950 #endif |
1162 | 951 |
1163 return GURL(url_string); | 952 return GURL(url_string); |
1164 } | 953 } |
1165 | 954 |
1166 std::wstring GetSpecificHeader(const std::wstring& headers, | |
1167 const std::wstring& name) { | |
1168 return GetSpecificHeaderT(headers, name); | |
1169 } | |
1170 | |
1171 std::string GetSpecificHeader(const std::string& headers, | 955 std::string GetSpecificHeader(const std::string& headers, |
1172 const std::string& name) { | 956 const std::string& name) { |
1173 return GetSpecificHeaderT(headers, name); | 957 // We want to grab the Value from the "Key: Value" pairs in the headers, |
958 // which should look like this (no leading spaces, \n-separated) (we format | |
959 // them this way in url_request_inet.cc): | |
960 // HTTP/1.1 200 OK\n | |
961 // ETag: "6d0b8-947-24f35ec0"\n | |
962 // Content-Length: 2375\n | |
963 // Content-Type: text/html; charset=UTF-8\n | |
964 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n | |
965 if (headers.empty()) | |
966 return std::string(); | |
967 | |
968 std::string match('\n' + name + ':'); | |
969 | |
970 std::string::const_iterator begin = | |
971 search(headers.begin(), headers.end(), match.begin(), match.end(), | |
972 base::CaseInsensitiveCompareASCII<char>()); | |
973 | |
974 if (begin == headers.end()) | |
975 return std::string(); | |
976 | |
977 begin += match.length(); | |
978 | |
979 std::string ret; | |
980 TrimWhitespace(std::string(begin, find(begin, headers.end(), '\n')), TRIM_ALL, | |
981 &ret); | |
982 return ret; | |
1174 } | 983 } |
1175 | 984 |
1176 bool DecodeCharset(const std::string& input, | 985 bool DecodeCharset(const std::string& input, |
1177 std::string* decoded_charset, | 986 std::string* decoded_charset, |
1178 std::string* value) { | 987 std::string* value) { |
1179 StringTokenizer t(input, "'"); | 988 StringTokenizer t(input, "'"); |
1180 t.set_options(StringTokenizer::RETURN_DELIMS); | 989 t.set_options(StringTokenizer::RETURN_DELIMS); |
1181 std::string temp_charset; | 990 std::string temp_charset; |
1182 std::string temp_value; | 991 std::string temp_value; |
1183 int numDelimsSeen = 0; | 992 int numDelimsSeen = 0; |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1238 param_value = GetHeaderParamValue(header, "name", | 1047 param_value = GetHeaderParamValue(header, "name", |
1239 QuoteRule::REMOVE_OUTER_QUOTES); | 1048 QuoteRule::REMOVE_OUTER_QUOTES); |
1240 } | 1049 } |
1241 if (param_value.empty()) | 1050 if (param_value.empty()) |
1242 return std::string(); | 1051 return std::string(); |
1243 if (DecodeParamValue(param_value, referrer_charset, &decoded)) | 1052 if (DecodeParamValue(param_value, referrer_charset, &decoded)) |
1244 return decoded; | 1053 return decoded; |
1245 return std::string(); | 1054 return std::string(); |
1246 } | 1055 } |
1247 | 1056 |
1248 std::wstring GetHeaderParamValue(const std::wstring& field, | 1057 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm |
1249 const std::wstring& param_name, | 1058 // sure this doesn't properly handle all (most?) cases. |
1250 QuoteRule::Type quote_rule) { | 1059 std::string GetHeaderParamValue(const std::string& header, |
1251 return GetHeaderParamValueT(field, param_name, quote_rule); | 1060 const std::string& param_name, |
1061 QuoteRule::Type quote_rule) { | |
1062 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". | |
1063 std::string::const_iterator param_begin = | |
1064 search(header.begin(), header.end(), param_name.begin(), param_name.end(), | |
1065 base::CaseInsensitiveCompareASCII<char>()); | |
1066 | |
1067 if (param_begin == header.end()) | |
1068 return std::string(); | |
1069 param_begin += param_name.length(); | |
1070 | |
1071 std::string whitespace(" \t"); | |
1072 size_t equals_offset = | |
1073 header.find_first_not_of(whitespace, param_begin - header.begin()); | |
1074 if (equals_offset == std::string::npos || header[equals_offset] != '=') | |
1075 return std::string(); | |
1076 | |
1077 param_begin = header.begin() + equals_offset + 1; | |
1078 if (param_begin == header.end()) | |
1079 return std::string(); | |
1080 | |
1081 std::string::const_iterator param_end; | |
1082 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) { | |
1083 ++param_begin; // skip past the quote. | |
1084 param_end = find(param_begin, header.end(), '"'); | |
1085 // If the closing quote is missing, we will treat the rest of the | |
1086 // string as the parameter. We can't set |param_end| to the | |
1087 // location of the separator (';'), since the separator is | |
1088 // technically quoted. See: http://crbug.com/58840 | |
1089 } else { | |
1090 param_end = find(param_begin + 1, header.end(), ';'); | |
1091 } | |
1092 | |
1093 return std::string(param_begin, param_end); | |
1252 } | 1094 } |
1253 | 1095 |
1254 std::string GetHeaderParamValue(const std::string& field, | 1096 string16 IDNToUnicode(const char* host, |
1255 const std::string& param_name, | 1097 size_t host_len, |
1256 QuoteRule::Type quote_rule) { | 1098 const std::string& languages) { |
1257 return GetHeaderParamValueT(field, param_name, quote_rule); | |
1258 } | |
1259 | |
1260 // TODO(brettw) bug 734373: check the scripts for each host component and | |
1261 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for | |
1262 // scripts that the user has installed. For now, just put the entire | |
1263 // path through IDN. Maybe this feature can be implemented in ICU itself? | |
1264 // | |
1265 // We may want to skip this step in the case of file URLs to allow unicode | |
1266 // UNC hostnames regardless of encodings. | |
1267 std::wstring IDNToUnicodeWithOffsets( | |
1268 const char* host, | |
1269 size_t host_len, | |
1270 const std::wstring& languages, | |
1271 std::vector<size_t>* offsets_for_adjustment) { | |
1272 // Convert the ASCII input to a wide string for ICU. | |
1273 string16 input16; | |
1274 input16.reserve(host_len); | |
1275 input16.insert(input16.end(), host, host + host_len); | |
1276 | |
1277 // Do each component of the host separately, since we enforce script matching | |
1278 // on a per-component basis. | |
1279 AdjustOffset::Adjustments adjustments; | |
1280 string16 out16; | |
1281 for (size_t component_start = 0, component_end; | |
1282 component_start < input16.length(); | |
1283 component_start = component_end + 1) { | |
1284 // Find the end of the component. | |
1285 component_end = input16.find('.', component_start); | |
1286 if (component_end == string16::npos) | |
1287 component_end = input16.length(); // For getting the last component. | |
1288 size_t component_length = component_end - component_start; | |
1289 size_t new_component_start = out16.length(); | |
1290 bool converted_idn = false; | |
1291 if (component_end > component_start) { | |
1292 // Add the substring that we just found. | |
1293 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, | |
1294 component_length, languages, &out16); | |
1295 } | |
1296 size_t new_component_length = out16.length() - new_component_start; | |
1297 | |
1298 if (converted_idn && offsets_for_adjustment) { | |
1299 adjustments.push_back(AdjustOffset::Adjustment( | |
1300 component_start, component_length, new_component_length)); | |
1301 } | |
1302 | |
1303 // Need to add the dot we just found (if we found one). | |
1304 if (component_end < input16.length()) | |
1305 out16.push_back('.'); | |
1306 } | |
1307 | |
1308 // Make offset adjustment. | |
1309 if (offsets_for_adjustment && !adjustments.empty()) { | |
1310 std::for_each(offsets_for_adjustment->begin(), | |
1311 offsets_for_adjustment->end(), | |
1312 AdjustOffset(adjustments)); | |
1313 } | |
1314 | |
1315 return UTF16ToWideAndAdjustOffsets(out16, offsets_for_adjustment); | |
1316 } | |
1317 | |
1318 std::wstring IDNToUnicode(const char* host, | |
1319 size_t host_len, | |
1320 const std::wstring& languages, | |
1321 size_t* offset_for_adjustment) { | |
1322 std::vector<size_t> offsets; | 1099 std::vector<size_t> offsets; |
1323 if (offset_for_adjustment) | 1100 return IDNToUnicodeWithOffsets(host, host_len, languages, &offsets); |
1324 offsets.push_back(*offset_for_adjustment); | |
1325 std::wstring result = | |
1326 IDNToUnicodeWithOffsets(host, host_len, languages, &offsets); | |
1327 if (offset_for_adjustment) | |
1328 *offset_for_adjustment = offsets[0]; | |
1329 return result; | |
1330 } | 1101 } |
1331 | 1102 |
1332 std::string CanonicalizeHost(const std::string& host, | 1103 std::string CanonicalizeHost(const std::string& host, |
1333 url_canon::CanonHostInfo* host_info) { | 1104 url_canon::CanonHostInfo* host_info) { |
1334 // Try to canonicalize the host. | 1105 // Try to canonicalize the host. |
1335 const url_parse::Component raw_host_component( | 1106 const url_parse::Component raw_host_component( |
1336 0, static_cast<int>(host.length())); | 1107 0, static_cast<int>(host.length())); |
1337 std::string canon_host; | 1108 std::string canon_host; |
1338 url_canon::StdStringCanonOutput canon_host_output(&canon_host); | 1109 url_canon::StdStringCanonOutput canon_host_output(&canon_host); |
1339 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, | 1110 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, |
1340 &canon_host_output, host_info); | 1111 &canon_host_output, host_info); |
1341 | 1112 |
1342 if (host_info->out_host.is_nonempty() && | 1113 if (host_info->out_host.is_nonempty() && |
1343 host_info->family != url_canon::CanonHostInfo::BROKEN) { | 1114 host_info->family != url_canon::CanonHostInfo::BROKEN) { |
1344 // Success! Assert that there's no extra garbage. | 1115 // Success! Assert that there's no extra garbage. |
1345 canon_host_output.Complete(); | 1116 canon_host_output.Complete(); |
1346 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length())); | 1117 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length())); |
1347 } else { | 1118 } else { |
1348 // Empty host, or canonicalization failed. We'll return empty. | 1119 // Empty host, or canonicalization failed. We'll return empty. |
1349 canon_host.clear(); | 1120 canon_host.clear(); |
1350 } | 1121 } |
1351 | 1122 |
1352 return canon_host; | 1123 return canon_host; |
1353 } | 1124 } |
1354 | 1125 |
1355 std::string CanonicalizeHost(const std::wstring& host, | |
1356 url_canon::CanonHostInfo* host_info) { | |
1357 std::string converted_host; | |
1358 WideToUTF8(host.c_str(), host.length(), &converted_host); | |
1359 return CanonicalizeHost(converted_host, host_info); | |
1360 } | |
1361 | |
1362 std::string GetDirectoryListingHeader(const string16& title) { | 1126 std::string GetDirectoryListingHeader(const string16& title) { |
1363 static const base::StringPiece header( | 1127 static const base::StringPiece header( |
1364 NetModule::GetResource(IDR_DIR_HEADER_HTML)); | 1128 NetModule::GetResource(IDR_DIR_HEADER_HTML)); |
1365 // This can be null in unit tests. | 1129 // This can be null in unit tests. |
1366 DLOG_IF(WARNING, header.empty()) << | 1130 DLOG_IF(WARNING, header.empty()) << |
1367 "Missing resource: directory listing header"; | 1131 "Missing resource: directory listing header"; |
1368 | 1132 |
1369 std::string result; | 1133 std::string result; |
1370 if (!header.empty()) | 1134 if (!header.empty()) |
1371 result.assign(header.data(), header.size()); | 1135 result.assign(header.data(), header.size()); |
(...skipping 362 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1734 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); | 1498 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); |
1735 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); | 1499 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); |
1736 } | 1500 } |
1737 | 1501 |
1738 std::string GetHostOrSpecFromURL(const GURL& url) { | 1502 std::string GetHostOrSpecFromURL(const GURL& url) { |
1739 return url.has_host() ? TrimEndingDot(url.host()) : url.spec(); | 1503 return url.has_host() ? TrimEndingDot(url.host()) : url.spec(); |
1740 } | 1504 } |
1741 | 1505 |
1742 void AppendFormattedHostWithOffsets( | 1506 void AppendFormattedHostWithOffsets( |
1743 const GURL& url, | 1507 const GURL& url, |
1744 const std::wstring& languages, | 1508 const std::string& languages, |
1745 std::wstring* output, | 1509 string16* output, |
1746 url_parse::Parsed* new_parsed, | 1510 url_parse::Parsed* new_parsed, |
1747 std::vector<size_t>* offsets_for_adjustment) { | 1511 std::vector<size_t>* offsets_for_adjustment) { |
1748 DCHECK(output); | 1512 DCHECK(output); |
1749 const url_parse::Component& host = | 1513 const url_parse::Component& host = |
1750 url.parsed_for_possibly_invalid_spec().host; | 1514 url.parsed_for_possibly_invalid_spec().host; |
1751 | 1515 |
1752 if (host.is_nonempty()) { | 1516 if (host.is_nonempty()) { |
1753 // Handle possible IDN in the host name. | 1517 // Handle possible IDN in the host name. |
1754 size_t host_begin = output->length(); | 1518 size_t host_begin = output->length(); |
1755 if (new_parsed) | 1519 if (new_parsed) |
1756 new_parsed->host.begin = static_cast<int>(host_begin); | 1520 new_parsed->host.begin = static_cast<int>(host_begin); |
1757 size_t old_host_len = static_cast<size_t>(host.len); | 1521 size_t old_host_len = static_cast<size_t>(host.len); |
1758 | 1522 |
1759 // Compose a list of offsets within the host area. | 1523 // Compose a list of offsets within the host area. |
1760 std::vector<size_t> offsets_into_host = | 1524 std::vector<size_t> offsets_into_host = |
1761 OffsetsIntoSection(offsets_for_adjustment, host_begin); | 1525 OffsetsIntoSection(offsets_for_adjustment, host_begin); |
1762 | 1526 |
1763 const std::string& spec = url.possibly_invalid_spec(); | 1527 const std::string& spec = url.possibly_invalid_spec(); |
1764 DCHECK(host.begin >= 0 && | 1528 DCHECK(host.begin >= 0 && |
1765 ((spec.length() == 0 && host.begin == 0) || | 1529 ((spec.length() == 0 && host.begin == 0) || |
1766 host.begin < static_cast<int>(spec.length()))); | 1530 host.begin < static_cast<int>(spec.length()))); |
1767 output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len, | 1531 output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len, |
1768 languages, &offsets_into_host)); | 1532 languages, &offsets_into_host)); |
1769 | 1533 |
1770 size_t new_host_len = output->length() - host_begin; | 1534 size_t new_host_len = output->length() - host_begin; |
1771 if (new_parsed) | 1535 if (new_parsed) |
1772 new_parsed->host.len = static_cast<int>(new_host_len); | 1536 new_parsed->host.len = static_cast<int>(new_host_len); |
1773 | 1537 |
1774 // Apply offset adjustments. | 1538 // Apply offset adjustments. |
1775 ApplySectionAdjustments(offsets_into_host, offsets_for_adjustment, | 1539 ApplySectionAdjustments(offsets_into_host, offsets_for_adjustment, |
1776 old_host_len, new_host_len, host_begin); | 1540 old_host_len, new_host_len, host_begin); |
1777 } else if (new_parsed) { | 1541 } else if (new_parsed) { |
1778 new_parsed->host.reset(); | 1542 new_parsed->host.reset(); |
1779 } | 1543 } |
1780 } | 1544 } |
1781 | 1545 |
1782 void AppendFormattedHost(const GURL& url, | 1546 void AppendFormattedHost(const GURL& url, |
1783 const std::wstring& languages, | 1547 const std::string& languages, |
1784 std::wstring* output, | 1548 string16* output, |
1785 url_parse::Parsed* new_parsed, | 1549 url_parse::Parsed* new_parsed, |
1786 size_t* offset_for_adjustment) { | 1550 size_t* offset_for_adjustment) { |
1787 std::vector<size_t> offsets; | 1551 std::vector<size_t> offsets; |
1788 if (offset_for_adjustment) | 1552 if (offset_for_adjustment) |
1789 offsets.push_back(*offset_for_adjustment); | 1553 offsets.push_back(*offset_for_adjustment); |
1790 AppendFormattedHostWithOffsets(url, languages, output, new_parsed, &offsets); | 1554 AppendFormattedHostWithOffsets(url, languages, output, new_parsed, &offsets); |
1791 if (offset_for_adjustment) | 1555 if (offset_for_adjustment) |
1792 *offset_for_adjustment = offsets[0]; | 1556 *offset_for_adjustment = offsets[0]; |
1793 } | 1557 } |
1794 | 1558 |
1795 // TODO(viettrungluu): convert the wstring |FormatUrlInternal()|. | |
1796 string16 FormatUrlWithOffsets(const GURL& url, | 1559 string16 FormatUrlWithOffsets(const GURL& url, |
1797 const std::string& languages, | 1560 const std::string& languages, |
1798 FormatUrlTypes format_types, | 1561 FormatUrlTypes format_types, |
1799 UnescapeRule::Type unescape_rules, | 1562 UnescapeRule::Type unescape_rules, |
1800 url_parse::Parsed* new_parsed, | 1563 url_parse::Parsed* new_parsed, |
1801 size_t* prefix_end, | 1564 size_t* prefix_end, |
1802 std::vector<size_t>* offsets_for_adjustment) { | 1565 std::vector<size_t>* offsets_for_adjustment) { |
1803 return WideToUTF16Hack( | 1566 url_parse::Parsed parsed_temp; |
brettw
2011/04/25 16:19:44
I'm assuming you just moved this code and changed
Peter Kasting
2011/04/25 17:44:52
The only non-trivial change was to change kHTTP fr
| |
1804 FormatUrlInternal(url, ASCIIToWide(languages), format_types, | 1567 if (!new_parsed) |
1805 unescape_rules, new_parsed, prefix_end, | 1568 new_parsed = &parsed_temp; |
1806 offsets_for_adjustment)); | 1569 else |
1570 *new_parsed = url_parse::Parsed(); | |
1571 | |
1572 std::vector<size_t> offsets_temp; | |
1573 if (!offsets_for_adjustment) | |
1574 offsets_for_adjustment = &offsets_temp; | |
1575 | |
1576 string16 url_string; | |
1577 | |
1578 // Check for empty URLs or 0 available text width. | |
1579 if (url.is_empty()) { | |
1580 if (prefix_end) | |
1581 *prefix_end = 0; | |
1582 std::for_each(offsets_for_adjustment->begin(), | |
1583 offsets_for_adjustment->end(), | |
1584 LimitOffset<string16>(0)); | |
1585 return url_string; | |
1586 } | |
1587 | |
1588 // Special handling for view-source:. Don't use chrome::kViewSourceScheme | |
1589 // because this library shouldn't depend on chrome. | |
1590 const char* const kViewSource = "view-source"; | |
1591 // Reject "view-source:view-source:..." to avoid deep recursion. | |
1592 const char* const kViewSourceTwice = "view-source:view-source:"; | |
1593 if (url.SchemeIs(kViewSource) && | |
1594 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { | |
1595 return FormatViewSourceUrl(url, languages, format_types, | |
1596 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); | |
1597 } | |
1598 | |
1599 // We handle both valid and invalid URLs (this will give us the spec | |
1600 // regardless of validity). | |
1601 const std::string& spec = url.possibly_invalid_spec(); | |
1602 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); | |
1603 size_t spec_length = spec.length(); | |
1604 std::for_each(offsets_for_adjustment->begin(), | |
1605 offsets_for_adjustment->end(), | |
1606 LimitOffset<string16>(spec_length)); | |
1607 | |
1608 // Copy everything before the username (the scheme and the separators.) | |
1609 // These are ASCII. | |
1610 url_string.insert(url_string.end(), spec.begin(), | |
1611 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, | |
1612 true)); | |
1613 | |
1614 string16 kHTTP = ASCIIToUTF16("http://"); | |
Avi (use Gerrit)
2011/04/25 17:52:07
eww. const char like kFTP below.
| |
1615 const char kFTP[] = "ftp."; | |
1616 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This | |
1617 // means that if we trim "http://" off a URL whose host starts with "ftp." and | |
1618 // the user inputs this into any field subject to fixup (which is basically | |
1619 // all input fields), the meaning would be changed. (In fact, often the | |
1620 // formatted URL is directly pre-filled into an input field.) For this reason | |
1621 // we avoid stripping "http://" in this case. | |
1622 bool omit_http = (format_types & kFormatUrlOmitHTTP) && | |
1623 (url_string == kHTTP) && | |
1624 (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0); | |
Avi (use Gerrit)
2011/04/25 17:52:07
Can you use string_util's LowerCaseEqualsASCII?
brettw
2011/04/25 17:56:28
The host name will be canonicalized so this isn't
| |
1625 | |
1626 new_parsed->scheme = parsed.scheme; | |
1627 | |
1628 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { | |
1629 // Remove the username and password fields. We don't want to display those | |
1630 // to the user since they can be used for attacks, | |
1631 // e.g. "http://google.com:search@evil.ru/" | |
1632 new_parsed->username.reset(); | |
1633 new_parsed->password.reset(); | |
1634 // Update the offsets based on removed username and/or password. | |
1635 if (!offsets_for_adjustment->empty() && | |
1636 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { | |
1637 AdjustOffset::Adjustments adjustments; | |
1638 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { | |
1639 // The seeming off-by-one and off-by-two in these first two lines are to | |
1640 // account for the ':' after the username and '@' after the password. | |
1641 adjustments.push_back(AdjustOffset::Adjustment( | |
1642 static_cast<size_t>(parsed.username.begin), | |
1643 static_cast<size_t>(parsed.username.len + parsed.password.len + | |
1644 2), 0)); | |
1645 } else { | |
1646 const url_parse::Component* nonempty_component = | |
1647 parsed.username.is_nonempty() ? &parsed.username : &parsed.password; | |
1648 // The seeming off-by-one in below is to account for the '@' after the | |
1649 // username/password. | |
1650 adjustments.push_back(AdjustOffset::Adjustment( | |
1651 static_cast<size_t>(nonempty_component->begin), | |
1652 static_cast<size_t>(nonempty_component->len + 1), 0)); | |
1653 } | |
1654 | |
1655 // Make offset adjustment. | |
1656 std::for_each(offsets_for_adjustment->begin(), | |
1657 offsets_for_adjustment->end(), | |
1658 AdjustOffset(adjustments)); | |
1659 } | |
1660 } else { | |
1661 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string, | |
1662 &new_parsed->username, offsets_for_adjustment); | |
1663 if (parsed.password.is_valid()) | |
1664 url_string.push_back(':'); | |
1665 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string, | |
1666 &new_parsed->password, offsets_for_adjustment); | |
1667 if (parsed.username.is_valid() || parsed.password.is_valid()) | |
1668 url_string.push_back('@'); | |
1669 } | |
1670 if (prefix_end) | |
1671 *prefix_end = static_cast<size_t>(url_string.length()); | |
1672 | |
1673 AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed, | |
1674 offsets_for_adjustment); | |
1675 | |
1676 // Port. | |
1677 if (parsed.port.is_nonempty()) { | |
1678 url_string.push_back(':'); | |
1679 new_parsed->port.begin = url_string.length(); | |
1680 url_string.insert(url_string.end(), | |
1681 spec.begin() + parsed.port.begin, | |
1682 spec.begin() + parsed.port.end()); | |
1683 new_parsed->port.len = url_string.length() - new_parsed->port.begin; | |
1684 } else { | |
1685 new_parsed->port.reset(); | |
1686 } | |
1687 | |
1688 // Path and query both get the same general unescape & convert treatment. | |
1689 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || | |
1690 !CanStripTrailingSlash(url)) { | |
1691 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string, | |
1692 &new_parsed->path, offsets_for_adjustment); | |
1693 } | |
1694 if (parsed.query.is_valid()) | |
1695 url_string.push_back('?'); | |
1696 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string, | |
1697 &new_parsed->query, offsets_for_adjustment); | |
1698 | |
1699 // Reference is stored in valid, unescaped UTF-8, so we can just convert. | |
1700 if (parsed.ref.is_valid()) { | |
1701 url_string.push_back('#'); | |
1702 size_t ref_begin = url_string.length(); | |
1703 new_parsed->ref.begin = static_cast<int>(ref_begin); | |
1704 | |
1705 // Compose a list of offsets within the section. | |
1706 std::vector<size_t> offsets_into_ref = | |
1707 OffsetsIntoSection(offsets_for_adjustment, ref_begin); | |
1708 | |
1709 if (parsed.ref.len > 0) { | |
1710 url_string.append(UTF8ToUTF16AndAdjustOffsets( | |
1711 spec.substr(parsed.ref.begin, parsed.ref.len), &offsets_into_ref)); | |
1712 } | |
1713 size_t old_ref_len = static_cast<size_t>(parsed.ref.len); | |
1714 size_t new_ref_len = url_string.length() - new_parsed->ref.begin; | |
1715 new_parsed->ref.len = static_cast<int>(new_ref_len); | |
1716 | |
1717 // Apply offset adjustments. | |
1718 ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment, | |
1719 old_ref_len, new_ref_len, ref_begin); | |
1720 } | |
1721 | |
1722 // If we need to strip out http do it after the fact. This way we don't need | |
1723 // to worry about how offset_for_adjustment is interpreted. | |
1724 const size_t kHTTPSize = kHTTP.length(); | |
1725 if (omit_http && !url_string.compare(0, kHTTP.length(), kHTTP)) { | |
1726 url_string = url_string.substr(kHTTPSize); | |
1727 AdjustOffset::Adjustments adjustments; | |
1728 adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0)); | |
1729 std::for_each(offsets_for_adjustment->begin(), | |
1730 offsets_for_adjustment->end(), | |
1731 AdjustOffset(adjustments)); | |
1732 if (prefix_end) | |
1733 *prefix_end -= kHTTPSize; | |
1734 | |
1735 // Adjust new_parsed. | |
1736 DCHECK(new_parsed->scheme.is_valid()); | |
1737 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. | |
1738 new_parsed->scheme.reset(); | |
1739 AdjustComponents(delta, new_parsed); | |
1740 } | |
1741 | |
1742 return url_string; | |
1807 } | 1743 } |
1808 | 1744 |
1809 string16 FormatUrl(const GURL& url, | 1745 string16 FormatUrl(const GURL& url, |
1810 const std::string& languages, | 1746 const std::string& languages, |
1811 FormatUrlTypes format_types, | 1747 FormatUrlTypes format_types, |
1812 UnescapeRule::Type unescape_rules, | 1748 UnescapeRule::Type unescape_rules, |
1813 url_parse::Parsed* new_parsed, | 1749 url_parse::Parsed* new_parsed, |
1814 size_t* prefix_end, | 1750 size_t* prefix_end, |
1815 size_t* offset_for_adjustment) { | 1751 size_t* offset_for_adjustment) { |
1816 std::vector<size_t> offsets; | 1752 std::vector<size_t> offsets; |
1817 if (offset_for_adjustment) | 1753 if (offset_for_adjustment) |
1818 offsets.push_back(*offset_for_adjustment); | 1754 offsets.push_back(*offset_for_adjustment); |
1819 string16 result = WideToUTF16Hack( | 1755 string16 result = FormatUrlWithOffsets(url, languages, format_types, |
1820 FormatUrlInternal(url, ASCIIToWide(languages), format_types, | 1756 unescape_rules, new_parsed, prefix_end, &offsets); |
1821 unescape_rules, new_parsed, prefix_end, &offsets)); | |
1822 if (offset_for_adjustment) | 1757 if (offset_for_adjustment) |
1823 *offset_for_adjustment = offsets[0]; | 1758 *offset_for_adjustment = offsets[0]; |
1824 return result; | 1759 return result; |
1825 } | 1760 } |
1826 | 1761 |
1827 bool CanStripTrailingSlash(const GURL& url) { | 1762 bool CanStripTrailingSlash(const GURL& url) { |
1828 // Omit the path only for standard, non-file URLs with nothing but "/" after | 1763 // Omit the path only for standard, non-file URLs with nothing but "/" after |
1829 // the hostname. | 1764 // the hostname. |
1830 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() && | 1765 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() && |
1831 !url.has_ref() && url.path() == "/"; | 1766 !url.has_ref() && url.path() == "/"; |
(...skipping 440 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2272 } | 2207 } |
2273 | 2208 |
2274 NetworkInterface::~NetworkInterface() { | 2209 NetworkInterface::~NetworkInterface() { |
2275 } | 2210 } |
2276 | 2211 |
2277 ClampComponentOffset::ClampComponentOffset(size_t component_start) | 2212 ClampComponentOffset::ClampComponentOffset(size_t component_start) |
2278 : component_start(component_start) {} | 2213 : component_start(component_start) {} |
2279 | 2214 |
2280 size_t ClampComponentOffset::operator()(size_t offset) { | 2215 size_t ClampComponentOffset::operator()(size_t offset) { |
2281 return (offset >= component_start) ? | 2216 return (offset >= component_start) ? |
2282 offset : std::wstring::npos; | 2217 offset : string16::npos; |
2283 } | 2218 } |
2284 | 2219 |
2285 } // namespace net | 2220 } // namespace net |
OLD | NEW |