OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
6 | 6 |
7 #include <unicode/regex.h> | 7 #include <unicode/regex.h> |
8 #include <unicode/ucnv.h> | 8 #include <unicode/ucnv.h> |
9 #include <unicode/uidna.h> | 9 #include <unicode/uidna.h> |
10 #include <unicode/ulocdata.h> | 10 #include <unicode/ulocdata.h> |
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
148 0xFFFF, // Used to block all invalid port numbers (see | 148 0xFFFF, // Used to block all invalid port numbers (see |
149 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port()) | 149 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port()) |
150 }; | 150 }; |
151 | 151 |
152 // FTP overrides the following restricted ports. | 152 // FTP overrides the following restricted ports. |
153 static const int kAllowedFtpPorts[] = { | 153 static const int kAllowedFtpPorts[] = { |
154 21, // ftp data | 154 21, // ftp data |
155 22, // ssh | 155 22, // ssh |
156 }; | 156 }; |
157 | 157 |
158 template<typename STR> | |
159 STR GetSpecificHeaderT(const STR& headers, const STR& name) { | |
160 // We want to grab the Value from the "Key: Value" pairs in the headers, | |
161 // which should look like this (no leading spaces, \n-separated) (we format | |
162 // them this way in url_request_inet.cc): | |
163 // HTTP/1.1 200 OK\n | |
164 // ETag: "6d0b8-947-24f35ec0"\n | |
165 // Content-Length: 2375\n | |
166 // Content-Type: text/html; charset=UTF-8\n | |
167 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n | |
168 if (headers.empty()) | |
169 return STR(); | |
170 | |
171 STR match; | |
172 match.push_back('\n'); | |
173 match.append(name); | |
174 match.push_back(':'); | |
175 | |
176 typename STR::const_iterator begin = | |
177 search(headers.begin(), headers.end(), match.begin(), match.end(), | |
178 base::CaseInsensitiveCompareASCII<typename STR::value_type>()); | |
179 | |
180 if (begin == headers.end()) | |
181 return STR(); | |
182 | |
183 begin += match.length(); | |
184 | |
185 typename STR::const_iterator end = find(begin, headers.end(), '\n'); | |
186 | |
187 STR ret; | |
188 TrimWhitespace(STR(begin, end), TRIM_ALL, &ret); | |
189 return ret; | |
190 } | |
191 | |
192 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence | 158 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence |
193 // of bytes. If input is invalid, return false. | 159 // of bytes. If input is invalid, return false. |
194 bool QPDecode(const std::string& input, std::string* output) { | 160 bool QPDecode(const std::string& input, std::string* output) { |
195 std::string temp; | 161 std::string temp; |
196 temp.reserve(input.size()); | 162 temp.reserve(input.size()); |
197 std::string::const_iterator it = input.begin(); | 163 std::string::const_iterator it = input.begin(); |
198 while (it != input.end()) { | 164 while (it != input.end()) { |
199 if (*it == '_') { | 165 if (*it == '_') { |
200 temp.push_back(' '); | 166 temp.push_back(' '); |
201 } else if (*it == '=') { | 167 } else if (*it == '=') { |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
269 *is_rfc2047 = false; | 235 *is_rfc2047 = false; |
270 output->clear(); | 236 output->clear(); |
271 if (encoded_word.empty()) | 237 if (encoded_word.empty()) |
272 return true; | 238 return true; |
273 | 239 |
274 if (!IsStringASCII(encoded_word)) { | 240 if (!IsStringASCII(encoded_word)) { |
275 // Try UTF-8, referrer_charset and the native OS default charset in turn. | 241 // Try UTF-8, referrer_charset and the native OS default charset in turn. |
276 if (IsStringUTF8(encoded_word)) { | 242 if (IsStringUTF8(encoded_word)) { |
277 *output = encoded_word; | 243 *output = encoded_word; |
278 } else { | 244 } else { |
279 std::wstring wide_output; | 245 string16 utf16_output; |
280 if (!referrer_charset.empty() && | 246 if (!referrer_charset.empty() && |
281 base::CodepageToWide(encoded_word, referrer_charset.c_str(), | 247 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), |
282 base::OnStringConversionError::FAIL, | 248 base::OnStringConversionError::FAIL, |
283 &wide_output)) { | 249 &utf16_output)) { |
284 *output = WideToUTF8(wide_output); | 250 *output = UTF16ToUTF8(utf16_output); |
285 } else { | 251 } else { |
286 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); | 252 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
287 } | 253 } |
288 } | 254 } |
289 | 255 |
290 return true; | 256 return true; |
291 } | 257 } |
292 | 258 |
293 // RFC 2047 : one of encoding methods supported by Firefox and relatively | 259 // RFC 2047 : one of encoding methods supported by Firefox and relatively |
294 // widely used by web servers. | 260 // widely used by web servers. |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
407 std::string decoded; | 373 std::string decoded; |
408 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, | 374 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, |
409 &decoded)) | 375 &decoded)) |
410 return false; | 376 return false; |
411 tmp.append(decoded); | 377 tmp.append(decoded); |
412 } | 378 } |
413 output->swap(tmp); | 379 output->swap(tmp); |
414 return true; | 380 return true; |
415 } | 381 } |
416 | 382 |
417 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm | |
418 // sure this doesn't properly handle all (most?) cases. | |
419 template<typename STR> | |
420 STR GetHeaderParamValueT(const STR& header, const STR& param_name, | |
421 QuoteRule::Type quote_rule) { | |
422 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". | |
423 typename STR::const_iterator param_begin = | |
424 search(header.begin(), header.end(), param_name.begin(), param_name.end(), | |
425 base::CaseInsensitiveCompareASCII<typename STR::value_type>()); | |
426 | |
427 if (param_begin == header.end()) | |
428 return STR(); | |
429 param_begin += param_name.length(); | |
430 | |
431 STR whitespace; | |
432 whitespace.push_back(' '); | |
433 whitespace.push_back('\t'); | |
434 const typename STR::size_type equals_offset = | |
435 header.find_first_not_of(whitespace, param_begin - header.begin()); | |
436 if (equals_offset == STR::npos || header.at(equals_offset) != '=') | |
437 return STR(); | |
438 | |
439 param_begin = header.begin() + equals_offset + 1; | |
440 if (param_begin == header.end()) | |
441 return STR(); | |
442 | |
443 typename STR::const_iterator param_end; | |
444 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) { | |
445 ++param_begin; // skip past the quote. | |
446 param_end = find(param_begin, header.end(), '"'); | |
447 // If the closing quote is missing, we will treat the rest of the | |
448 // string as the parameter. We can't set |param_end| to the | |
449 // location of the separator (';'), since the separator is | |
450 // technically quoted. See: http://crbug.com/58840 | |
451 } else { | |
452 param_end = find(param_begin+1, header.end(), ';'); | |
453 } | |
454 | |
455 return STR(param_begin, param_end); | |
456 } | |
457 | |
458 // Does some simple normalization of scripts so we can allow certain scripts | 383 // Does some simple normalization of scripts so we can allow certain scripts |
459 // to exist together. | 384 // to exist together. |
460 // TODO(brettw) bug 880223: we should allow some other languages to be | 385 // TODO(brettw) bug 880223: we should allow some other languages to be |
461 // oombined such as Chinese and Latin. We will probably need a more | 386 // oombined such as Chinese and Latin. We will probably need a more |
462 // complicated system of language pairs to have more fine-grained control. | 387 // complicated system of language pairs to have more fine-grained control. |
463 UScriptCode NormalizeScript(UScriptCode code) { | 388 UScriptCode NormalizeScript(UScriptCode code) { |
464 switch (code) { | 389 switch (code) { |
465 case USCRIPT_KATAKANA: | 390 case USCRIPT_KATAKANA: |
466 case USCRIPT_HIRAGANA: | 391 case USCRIPT_HIRAGANA: |
467 case USCRIPT_KATAKANA_OR_HIRAGANA: | 392 case USCRIPT_KATAKANA_OR_HIRAGANA: |
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
586 ulocdata_close(uld); | 511 ulocdata_close(uld); |
587 } | 512 } |
588 } | 513 } |
589 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); | 514 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); |
590 } | 515 } |
591 | 516 |
592 // Returns true if the given Unicode host component is safe to display to the | 517 // Returns true if the given Unicode host component is safe to display to the |
593 // user. | 518 // user. |
594 bool IsIDNComponentSafe(const char16* str, | 519 bool IsIDNComponentSafe(const char16* str, |
595 int str_len, | 520 int str_len, |
596 const std::wstring& languages) { | 521 const std::string& languages) { |
597 // Most common cases (non-IDN) do not reach here so that we don't | 522 // Most common cases (non-IDN) do not reach here so that we don't |
598 // need a fast return path. | 523 // need a fast return path. |
599 // TODO(jungshik) : Check if there's any character inappropriate | 524 // TODO(jungshik) : Check if there's any character inappropriate |
600 // (although allowed) for domain names. | 525 // (although allowed) for domain names. |
601 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and | 526 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and |
602 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt | 527 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt |
603 // For now, we borrow the list from Mozilla and tweaked it slightly. | 528 // For now, we borrow the list from Mozilla and tweaked it slightly. |
604 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because | 529 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because |
605 // they're gonna be canonicalized to U+0020 and full stop before | 530 // they're gonna be canonicalized to U+0020 and full stop before |
606 // reaching here.) | 531 // reaching here.) |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
670 // (sync'd with characters allowed in url_canon_host with square | 595 // (sync'd with characters allowed in url_canon_host with square |
671 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. | 596 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. |
672 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), | 597 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), |
673 status); | 598 status); |
674 DCHECK(U_SUCCESS(status)); | 599 DCHECK(U_SUCCESS(status)); |
675 // Subtract common characters because they're always allowed so that | 600 // Subtract common characters because they're always allowed so that |
676 // we just have to check if a language-specific set contains | 601 // we just have to check if a language-specific set contains |
677 // the remainder. | 602 // the remainder. |
678 component_characters.removeAll(common_characters); | 603 component_characters.removeAll(common_characters); |
679 | 604 |
680 std::string languages_list(WideToASCII(languages)); | 605 StringTokenizer t(languages, ","); |
681 StringTokenizer t(languages_list, ","); | |
682 while (t.GetNext()) { | 606 while (t.GetNext()) { |
683 if (IsComponentCoveredByLang(component_characters, t.token())) | 607 if (IsComponentCoveredByLang(component_characters, t.token())) |
684 return true; | 608 return true; |
685 } | 609 } |
686 return false; | 610 return false; |
687 } | 611 } |
688 | 612 |
689 // Converts one component of a host (between dots) to IDN if safe. The result | 613 // Converts one component of a host (between dots) to IDN if safe. The result |
690 // will be APPENDED to the given output string and will be the same as the input | 614 // will be APPENDED to the given output string and will be the same as the input |
691 // if it is not IDN or the IDN is unsafe to display. Returns whether any | 615 // if it is not IDN or the IDN is unsafe to display. Returns whether any |
692 // conversion was performed. | 616 // conversion was performed. |
693 bool IDNToUnicodeOneComponent(const char16* comp, | 617 bool IDNToUnicodeOneComponent(const char16* comp, |
694 size_t comp_len, | 618 size_t comp_len, |
695 const std::wstring& languages, | 619 const std::string& languages, |
696 string16* out) { | 620 string16* out) { |
697 DCHECK(out); | 621 DCHECK(out); |
698 if (comp_len == 0) | 622 if (comp_len == 0) |
699 return false; | 623 return false; |
700 | 624 |
701 // Only transform if the input can be an IDN component. | 625 // Only transform if the input can be an IDN component. |
702 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; | 626 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; |
703 if ((comp_len > arraysize(kIdnPrefix)) && | 627 if ((comp_len > arraysize(kIdnPrefix)) && |
704 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) { | 628 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) { |
705 // Repeatedly expand the output string until it's big enough. It looks like | 629 // Repeatedly expand the output string until it's big enough. It looks like |
(...skipping 21 matching lines...) Expand all Loading... | |
727 // Failed, revert back to original string. | 651 // Failed, revert back to original string. |
728 out->resize(original_length); | 652 out->resize(original_length); |
729 } | 653 } |
730 | 654 |
731 // We get here with no IDN or on error, in which case we just append the | 655 // We get here with no IDN or on error, in which case we just append the |
732 // literal input. | 656 // literal input. |
733 out->append(comp, comp_len); | 657 out->append(comp, comp_len); |
734 return false; | 658 return false; |
735 } | 659 } |
736 | 660 |
737 struct SubtractFromOffset { | 661 // Clamps the offsets in |offsets_for_adjustment| to the length of |str|. |
738 explicit SubtractFromOffset(size_t amount) | 662 void LimitOffsets(const string16& str, |
739 : amount(amount) {} | 663 std::vector<size_t>* offsets_for_adjustment) { |
740 void operator()(size_t& offset) { | 664 if (offsets_for_adjustment) { |
741 if (offset != std::wstring::npos) { | 665 std::for_each(offsets_for_adjustment->begin(), |
742 if (offset >= amount) | 666 offsets_for_adjustment->end(), |
743 offset -= amount; | 667 LimitOffset<string16>(str.length())); |
744 else | 668 } |
745 offset = std::wstring::npos; | 669 } |
670 | |
671 // TODO(brettw) bug 734373: check the scripts for each host component and | |
672 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for | |
673 // scripts that the user has installed. For now, just put the entire | |
674 // path through IDN. Maybe this feature can be implemented in ICU itself? | |
675 // | |
676 // We may want to skip this step in the case of file URLs to allow unicode | |
677 // UNC hostnames regardless of encodings. | |
678 string16 IDNToUnicodeWithOffsets(const std::string& host, | |
679 const std::string& languages, | |
680 std::vector<size_t>* offsets_for_adjustment) { | |
681 // Convert the ASCII input to a string16 for ICU. | |
682 string16 input16; | |
683 input16.reserve(host.length()); | |
684 input16.insert(input16.end(), host.begin(), host.end()); | |
685 | |
686 // Do each component of the host separately, since we enforce script matching | |
687 // on a per-component basis. | |
688 string16 out16; | |
689 { | |
690 OffsetAdjuster offset_adjuster(offsets_for_adjustment); | |
691 for (size_t component_start = 0, component_end; | |
692 component_start < input16.length(); | |
693 component_start = component_end + 1) { | |
694 // Find the end of the component. | |
695 component_end = input16.find('.', component_start); | |
696 if (component_end == string16::npos) | |
697 component_end = input16.length(); // For getting the last component. | |
698 size_t component_length = component_end - component_start; | |
699 size_t new_component_start = out16.length(); | |
700 bool converted_idn = false; | |
701 if (component_end > component_start) { | |
702 // Add the substring that we just found. | |
703 converted_idn = IDNToUnicodeOneComponent( | |
704 input16.data() + component_start, component_length, languages, | |
705 &out16); | |
706 } | |
707 size_t new_component_length = out16.length() - new_component_start; | |
708 | |
709 if (converted_idn && offsets_for_adjustment) { | |
710 offset_adjuster.Add(OffsetAdjuster::Adjustment(component_start, | |
711 component_length, new_component_length)); | |
712 } | |
713 | |
714 // Need to add the dot we just found (if we found one). | |
715 if (component_end < input16.length()) | |
716 out16.push_back('.'); | |
746 } | 717 } |
747 } | 718 } |
748 | 719 |
749 size_t amount; | 720 LimitOffsets(out16, offsets_for_adjustment); |
750 }; | 721 return out16; |
751 | |
752 struct AddToOffset { | |
753 explicit AddToOffset(size_t amount) | |
754 : amount(amount) {} | |
755 void operator()(size_t& offset) { | |
756 if (offset != std::wstring::npos) | |
757 offset += amount; | |
758 } | |
759 | |
760 size_t amount; | |
761 }; | |
762 | |
763 std::vector<size_t> OffsetsIntoSection( | |
764 std::vector<size_t>* offsets_for_adjustment, | |
765 size_t section_begin) { | |
766 std::vector<size_t> offsets_into_section; | |
767 if (offsets_for_adjustment) { | |
768 std::transform(offsets_for_adjustment->begin(), | |
769 offsets_for_adjustment->end(), | |
770 std::back_inserter(offsets_into_section), | |
771 ClampComponentOffset(section_begin)); | |
772 std::for_each(offsets_into_section.begin(), offsets_into_section.end(), | |
773 SubtractFromOffset(section_begin)); | |
774 } | |
775 return offsets_into_section; | |
776 } | 722 } |
777 | 723 |
778 void ApplySectionAdjustments(const std::vector<size_t>& offsets_into_section, | 724 // Transforms |original_offsets| by subtracting |section_begin| from all |
brettw
2011/04/27 17:47:51
section_begin -> component_begin
| |
779 std::vector<size_t>* offsets_for_adjustment, | 725 // offsets. Any offset which was not at least this large to begin with is set |
780 size_t old_section_len, | 726 // to std::string::npos. |
781 size_t new_section_len, | 727 std::vector<size_t> OffsetsIntoComponent( |
782 size_t section_begin) { | 728 const std::vector<size_t>& original_offsets, |
783 if (offsets_for_adjustment) { | 729 size_t component_begin) { |
784 DCHECK_EQ(offsets_for_adjustment->size(), offsets_into_section.size()); | 730 DCHECK_NE(std::string::npos, component_begin); |
785 std::vector<size_t>::const_iterator host_offsets_iter = | 731 std::vector<size_t> offsets_into_component(original_offsets); |
786 offsets_into_section.begin(); | 732 for (std::vector<size_t>::iterator i(offsets_into_component.begin()); |
787 for (std::vector<size_t>::iterator offsets_iter = | 733 i != offsets_into_component.end(); ++i) { |
788 offsets_for_adjustment->begin(); | 734 if (*i != std::string::npos) |
789 offsets_iter != offsets_for_adjustment->end(); | 735 *i = (*i < component_begin) ? std::string::npos : (*i - component_begin); |
790 ++offsets_iter, ++host_offsets_iter) { | 736 } |
791 size_t offset = *offsets_iter; | 737 return offsets_into_component; |
792 if (offset == std::wstring::npos || offset < section_begin) { | 738 } |
793 // The offset is before the host section so leave it as is. | 739 |
794 continue; | 740 // Called after we transform a component and append it to an output string. |
795 } | 741 // Maps |transformed_offsets|, which represent offsets into the transformed |
796 if (offset >= section_begin + old_section_len) { | 742 // component itself, into appropriate offsets for the output string, by adding |
797 // The offset is after the host section so adjust by host length delta. | 743 // |output_component_begin| to each. Determines which offsets need mapping by |
798 offset += new_section_len - old_section_len; | 744 // checking to see which of the |original_offsets| were within the designated |
799 } else if (*host_offsets_iter != std::wstring::npos) { | 745 // original component, using its provided endpoints. |
800 // The offset is within the host and valid so adjust by the host | 746 void AdjustForComponentTransform( |
801 // reformatting offsets results. | 747 const std::vector<size_t>& original_offsets, |
802 offset = section_begin + *host_offsets_iter; | 748 size_t original_component_begin, |
803 } else { | 749 size_t original_component_end, |
804 // The offset is invalid. | 750 const std::vector<size_t>& transformed_offsets, |
805 offset = std::wstring::npos; | 751 size_t output_component_begin, |
806 } | 752 std::vector<size_t>* offsets_for_adjustment) { |
807 *offsets_iter = offset; | 753 if (!offsets_for_adjustment) |
754 return; | |
755 | |
756 DCHECK_NE(std::string::npos, original_component_begin); | |
757 DCHECK_NE(std::string::npos, original_component_end); | |
758 DCHECK_NE(string16::npos, output_component_begin); | |
759 size_t offsets_size = offsets_for_adjustment->size(); | |
760 DCHECK_EQ(offsets_size, original_offsets.size()); | |
761 DCHECK_EQ(offsets_size, transformed_offsets.size()); | |
762 for (size_t i = 0; i < offsets_size; ++i) { | |
763 size_t original_offset = original_offsets[i]; | |
764 if ((original_offset >= original_component_begin) && | |
765 (original_offset < original_component_end)) { | |
766 size_t transformed_offset = transformed_offsets[i]; | |
767 (*offsets_for_adjustment)[i] = (transformed_offset == string16::npos) ? | |
768 string16::npos : (output_component_begin + transformed_offset); | |
808 } | 769 } |
809 } | 770 } |
810 } | 771 } |
811 | 772 |
812 // If |component| is valid, its begin is incremented by |delta|. | 773 // If |component| is valid, its begin is incremented by |delta|. |
813 void AdjustComponent(int delta, url_parse::Component* component) { | 774 void AdjustComponent(int delta, url_parse::Component* component) { |
814 if (!component->is_valid()) | 775 if (!component->is_valid()) |
815 return; | 776 return; |
816 | 777 |
817 DCHECK(delta >= 0 || component->begin >= -delta); | 778 DCHECK(delta >= 0 || component->begin >= -delta); |
818 component->begin += delta; | 779 component->begin += delta; |
819 } | 780 } |
820 | 781 |
821 // Adjusts all the components of |parsed| by |delta|, except for the scheme. | 782 // Adjusts all the components of |parsed| by |delta|, except for the scheme. |
822 void AdjustComponents(int delta, url_parse::Parsed* parsed) { | 783 void AdjustComponents(int delta, url_parse::Parsed* parsed) { |
823 AdjustComponent(delta, &(parsed->username)); | 784 AdjustComponent(delta, &(parsed->username)); |
824 AdjustComponent(delta, &(parsed->password)); | 785 AdjustComponent(delta, &(parsed->password)); |
825 AdjustComponent(delta, &(parsed->host)); | 786 AdjustComponent(delta, &(parsed->host)); |
826 AdjustComponent(delta, &(parsed->port)); | 787 AdjustComponent(delta, &(parsed->port)); |
827 AdjustComponent(delta, &(parsed->path)); | 788 AdjustComponent(delta, &(parsed->path)); |
828 AdjustComponent(delta, &(parsed->query)); | 789 AdjustComponent(delta, &(parsed->query)); |
829 AdjustComponent(delta, &(parsed->ref)); | 790 AdjustComponent(delta, &(parsed->ref)); |
830 } | 791 } |
831 | 792 |
832 std::wstring FormatUrlInternal(const GURL& url, | 793 // Helper for FormatUrlWithOffsets(). |
833 const std::wstring& languages, | 794 string16 FormatViewSourceUrl(const GURL& url, |
834 FormatUrlTypes format_types, | 795 const std::vector<size_t>& original_offsets, |
835 UnescapeRule::Type unescape_rules, | 796 const std::string& languages, |
836 url_parse::Parsed* new_parsed, | 797 FormatUrlTypes format_types, |
837 size_t* prefix_end, | 798 UnescapeRule::Type unescape_rules, |
838 std::vector<size_t>* offsets_for_adjustment); | 799 url_parse::Parsed* new_parsed, |
800 size_t* prefix_end, | |
801 std::vector<size_t>* offsets_for_adjustment) { | |
802 DCHECK(new_parsed); | |
803 const char kViewSource[] = "view-source:"; | |
804 const size_t kViewSourceLength = arraysize(kViewSource) - 1; | |
805 std::vector<size_t> offsets_into_url( | |
806 OffsetsIntoComponent(original_offsets, kViewSourceLength)); | |
839 | 807 |
840 // Helper for FormatUrl()/FormatUrlInternal(). | 808 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength)); |
841 std::wstring FormatViewSourceUrl(const GURL& url, | 809 string16 result(ASCIIToUTF16(kViewSource) + |
842 const std::wstring& languages, | 810 FormatUrlWithOffsets(real_url, languages, format_types, unescape_rules, |
843 FormatUrlTypes format_types, | 811 new_parsed, prefix_end, &offsets_into_url)); |
844 UnescapeRule::Type unescape_rules, | |
845 url_parse::Parsed* new_parsed, | |
846 size_t* prefix_end, | |
847 std::vector<size_t>* offsets_for_adjustment) { | |
848 DCHECK(new_parsed); | |
849 DCHECK(offsets_for_adjustment); | |
850 const wchar_t* const kWideViewSource = L"view-source:"; | |
851 const size_t kViewSourceLengthPlus1 = 12; | |
852 std::vector<size_t> saved_offsets(*offsets_for_adjustment); | |
853 | |
854 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); | |
855 // Clamp the offsets to the source area. | |
856 std::for_each(offsets_for_adjustment->begin(), | |
857 offsets_for_adjustment->end(), | |
858 SubtractFromOffset(kViewSourceLengthPlus1)); | |
859 std::wstring result = FormatUrlInternal(real_url, languages, format_types, | |
860 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); | |
861 result.insert(0, kWideViewSource); | |
862 | 812 |
863 // Adjust position values. | 813 // Adjust position values. |
864 if (new_parsed->scheme.is_nonempty()) { | 814 if (new_parsed->scheme.is_nonempty()) { |
865 // Assume "view-source:real-scheme" as a scheme. | 815 // Assume "view-source:real-scheme" as a scheme. |
866 new_parsed->scheme.len += kViewSourceLengthPlus1; | 816 new_parsed->scheme.len += kViewSourceLength; |
867 } else { | 817 } else { |
868 new_parsed->scheme.begin = 0; | 818 new_parsed->scheme.begin = 0; |
869 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1; | 819 new_parsed->scheme.len = kViewSourceLength - 1; |
870 } | 820 } |
871 AdjustComponents(kViewSourceLengthPlus1, new_parsed); | 821 AdjustComponents(kViewSourceLength, new_parsed); |
872 if (prefix_end) | 822 if (prefix_end) |
873 *prefix_end += kViewSourceLengthPlus1; | 823 *prefix_end += kViewSourceLength; |
874 std::for_each(offsets_for_adjustment->begin(), | 824 AdjustForComponentTransform(original_offsets, kViewSourceLength, |
875 offsets_for_adjustment->end(), | 825 url.possibly_invalid_spec().length(), offsets_into_url, kViewSourceLength, |
876 AddToOffset(kViewSourceLengthPlus1)); | 826 offsets_for_adjustment); |
877 // Restore all offsets which were not affected by FormatUrlInternal. | 827 LimitOffsets(result, offsets_for_adjustment); |
878 DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size()); | |
879 for (size_t i = 0; i < saved_offsets.size(); ++i) { | |
880 if (saved_offsets[i] < kViewSourceLengthPlus1) | |
881 (*offsets_for_adjustment)[i] = saved_offsets[i]; | |
882 } | |
883 return result; | 828 return result; |
884 } | 829 } |
885 | 830 |
886 // Appends the substring |in_component| inside of the URL |spec| to |output|, | 831 class AppendComponentTransform { |
887 // and the resulting range will be filled into |out_component|. |unescape_rules| | 832 public: |
888 // defines how to clean the URL for human readability. |offsets_for_adjustment| | 833 AppendComponentTransform() {} |
889 // is an array of offsets into |output| each of which will be adjusted based on | 834 virtual ~AppendComponentTransform() {} |
890 // how it maps to the component being converted; if it is less than | 835 |
891 // output->length(), it will be untouched, and if it is greater than | 836 virtual string16 Execute( |
892 // output->length() + in_component.len it will be adjusted by the difference in | 837 const std::string& component_text, |
893 // lengths between the input and output components. Otherwise it points into | 838 std::vector<size_t>* offsets_into_component) const = 0; |
894 // the component being converted, and is adjusted to point to the same logical | 839 |
895 // place in |output|. |offsets_for_adjustment| may not be NULL. | 840 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an |
841 // accessible copy constructor in order to call AppendFormattedComponent() | |
842 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). | |
843 }; | |
844 | |
845 class HostComponentTransform : public AppendComponentTransform { | |
846 public: | |
847 explicit HostComponentTransform(const std::string& languages) | |
848 : languages_(languages) { | |
849 } | |
850 | |
851 private: | |
852 virtual string16 Execute( | |
853 const std::string& component_text, | |
854 std::vector<size_t>* offsets_into_component) const { | |
855 return IDNToUnicodeWithOffsets(component_text, languages_, | |
856 offsets_into_component); | |
857 } | |
858 | |
859 const std::string& languages_; | |
860 }; | |
861 | |
862 class NonHostComponentTransform : public AppendComponentTransform { | |
863 public: | |
864 explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules) | |
865 : unescape_rules_(unescape_rules) { | |
866 } | |
867 | |
868 private: | |
869 virtual string16 Execute( | |
870 const std::string& component_text, | |
871 std::vector<size_t>* offsets_into_component) const { | |
872 return (unescape_rules_ == UnescapeRule::NONE) ? | |
873 UTF8ToUTF16AndAdjustOffsets(component_text, offsets_into_component) : | |
874 UnescapeAndDecodeUTF8URLComponentWithOffsets(component_text, | |
875 unescape_rules_, offsets_into_component); | |
876 } | |
877 | |
878 const UnescapeRule::Type unescape_rules_; | |
879 }; | |
880 | |
896 void AppendFormattedComponent(const std::string& spec, | 881 void AppendFormattedComponent(const std::string& spec, |
897 const url_parse::Component& in_component, | 882 const url_parse::Component& original_component, |
898 UnescapeRule::Type unescape_rules, | 883 const std::vector<size_t>& original_offsets, |
899 std::wstring* output, | 884 const AppendComponentTransform& transform, |
900 url_parse::Component* out_component, | 885 string16* output, |
886 url_parse::Component* output_component, | |
901 std::vector<size_t>* offsets_for_adjustment) { | 887 std::vector<size_t>* offsets_for_adjustment) { |
902 DCHECK(output); | 888 DCHECK(output); |
903 DCHECK(offsets_for_adjustment); | 889 if (original_component.is_nonempty()) { |
904 if (in_component.is_nonempty()) { | 890 size_t original_component_begin = |
905 size_t component_begin = output->length(); | 891 static_cast<size_t>(original_component.begin); |
906 out_component->begin = static_cast<int>(component_begin); | 892 size_t output_component_begin = output->length(); |
893 if (output_component) | |
894 output_component->begin = static_cast<int>(output_component_begin); | |
907 | 895 |
908 // Compose a list of offsets within the component area. | |
909 std::vector<size_t> offsets_into_component = | 896 std::vector<size_t> offsets_into_component = |
910 OffsetsIntoSection(offsets_for_adjustment, component_begin); | 897 OffsetsIntoComponent(original_offsets, original_component_begin); |
898 output->append(transform.Execute(std::string(spec, original_component_begin, | |
899 static_cast<size_t>(original_component.len)), &offsets_into_component)); | |
911 | 900 |
912 if (unescape_rules == UnescapeRule::NONE) { | 901 if (output_component) { |
913 output->append(UTF8ToWideAndAdjustOffsets( | 902 output_component->len = |
914 spec.substr(in_component.begin, in_component.len), | 903 static_cast<int>(output->length() - output_component_begin); |
915 &offsets_into_component)); | |
916 } else { | |
917 output->append(UTF16ToWideHack( | |
918 UnescapeAndDecodeUTF8URLComponentWithOffsets( | |
919 spec.substr(in_component.begin, in_component.len), unescape_rules, | |
920 &offsets_into_component))); | |
921 } | 904 } |
922 size_t new_component_len = output->length() - component_begin; | 905 AdjustForComponentTransform(original_offsets, original_component_begin, |
923 out_component->len = static_cast<int>(new_component_len); | 906 static_cast<size_t>(original_component.end()), |
924 | 907 offsets_into_component, output_component_begin, |
925 // Apply offset adjustments. | 908 offsets_for_adjustment); |
926 size_t old_component_len = static_cast<size_t>(in_component.len); | 909 } else if (output_component) { |
927 ApplySectionAdjustments(offsets_into_component, offsets_for_adjustment, | 910 output_component->reset(); |
928 old_component_len, new_component_len, component_begin); | |
929 } else { | |
930 out_component->reset(); | |
931 } | 911 } |
932 } | 912 } |
933 | 913 |
934 // TODO(viettrungluu): This is really the old-fashioned version, made internal. | |
935 // I need to really convert |FormatUrl()|. | |
936 std::wstring FormatUrlInternal(const GURL& url, | |
937 const std::wstring& languages, | |
938 FormatUrlTypes format_types, | |
939 UnescapeRule::Type unescape_rules, | |
940 url_parse::Parsed* new_parsed, | |
941 size_t* prefix_end, | |
942 std::vector<size_t>* offsets_for_adjustment) { | |
943 url_parse::Parsed parsed_temp; | |
944 if (!new_parsed) | |
945 new_parsed = &parsed_temp; | |
946 else | |
947 *new_parsed = url_parse::Parsed(); | |
948 | |
949 std::vector<size_t> offsets_temp; | |
950 if (!offsets_for_adjustment) | |
951 offsets_for_adjustment = &offsets_temp; | |
952 | |
953 std::wstring url_string; | |
954 | |
955 // Check for empty URLs or 0 available text width. | |
956 if (url.is_empty()) { | |
957 if (prefix_end) | |
958 *prefix_end = 0; | |
959 std::for_each(offsets_for_adjustment->begin(), | |
960 offsets_for_adjustment->end(), | |
961 LimitOffset<std::wstring>(0)); | |
962 return url_string; | |
963 } | |
964 | |
965 // Special handling for view-source:. Don't use chrome::kViewSourceScheme | |
966 // because this library shouldn't depend on chrome. | |
967 const char* const kViewSource = "view-source"; | |
968 // Reject "view-source:view-source:..." to avoid deep recursion. | |
969 const char* const kViewSourceTwice = "view-source:view-source:"; | |
970 if (url.SchemeIs(kViewSource) && | |
971 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { | |
972 return FormatViewSourceUrl(url, languages, format_types, | |
973 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); | |
974 } | |
975 | |
976 // We handle both valid and invalid URLs (this will give us the spec | |
977 // regardless of validity). | |
978 const std::string& spec = url.possibly_invalid_spec(); | |
979 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); | |
980 size_t spec_length = spec.length(); | |
981 std::for_each(offsets_for_adjustment->begin(), | |
982 offsets_for_adjustment->end(), | |
983 LimitOffset<std::wstring>(spec_length)); | |
984 | |
985 // Copy everything before the username (the scheme and the separators.) | |
986 // These are ASCII. | |
987 url_string.insert(url_string.end(), spec.begin(), | |
988 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, | |
989 true)); | |
990 | |
991 const wchar_t kHTTP[] = L"http://"; | |
992 const char kFTP[] = "ftp."; | |
993 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This | |
994 // means that if we trim "http://" off a URL whose host starts with "ftp." and | |
995 // the user inputs this into any field subject to fixup (which is basically | |
996 // all input fields), the meaning would be changed. (In fact, often the | |
997 // formatted URL is directly pre-filled into an input field.) For this reason | |
998 // we avoid stripping "http://" in this case. | |
999 bool omit_http = | |
1000 (format_types & kFormatUrlOmitHTTP) && (url_string == kHTTP) && | |
1001 (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0); | |
1002 | |
1003 new_parsed->scheme = parsed.scheme; | |
1004 | |
1005 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { | |
1006 // Remove the username and password fields. We don't want to display those | |
1007 // to the user since they can be used for attacks, | |
1008 // e.g. "http://google.com:search@evil.ru/" | |
1009 new_parsed->username.reset(); | |
1010 new_parsed->password.reset(); | |
1011 // Update the offsets based on removed username and/or password. | |
1012 if (!offsets_for_adjustment->empty() && | |
1013 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { | |
1014 AdjustOffset::Adjustments adjustments; | |
1015 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { | |
1016 // The seeming off-by-one and off-by-two in these first two lines are to | |
1017 // account for the ':' after the username and '@' after the password. | |
1018 adjustments.push_back(AdjustOffset::Adjustment( | |
1019 static_cast<size_t>(parsed.username.begin), | |
1020 static_cast<size_t>(parsed.username.len + parsed.password.len + | |
1021 2), 0)); | |
1022 } else { | |
1023 const url_parse::Component* nonempty_component = | |
1024 parsed.username.is_nonempty() ? &parsed.username : &parsed.password; | |
1025 // The seeming off-by-one in below is to account for the '@' after the | |
1026 // username/password. | |
1027 adjustments.push_back(AdjustOffset::Adjustment( | |
1028 static_cast<size_t>(nonempty_component->begin), | |
1029 static_cast<size_t>(nonempty_component->len + 1), 0)); | |
1030 } | |
1031 | |
1032 // Make offset adjustment. | |
1033 std::for_each(offsets_for_adjustment->begin(), | |
1034 offsets_for_adjustment->end(), | |
1035 AdjustOffset(adjustments)); | |
1036 } | |
1037 } else { | |
1038 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string, | |
1039 &new_parsed->username, offsets_for_adjustment); | |
1040 if (parsed.password.is_valid()) | |
1041 url_string.push_back(':'); | |
1042 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string, | |
1043 &new_parsed->password, offsets_for_adjustment); | |
1044 if (parsed.username.is_valid() || parsed.password.is_valid()) | |
1045 url_string.push_back('@'); | |
1046 } | |
1047 if (prefix_end) | |
1048 *prefix_end = static_cast<size_t>(url_string.length()); | |
1049 | |
1050 AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed, | |
1051 offsets_for_adjustment); | |
1052 | |
1053 // Port. | |
1054 if (parsed.port.is_nonempty()) { | |
1055 url_string.push_back(':'); | |
1056 new_parsed->port.begin = url_string.length(); | |
1057 url_string.insert(url_string.end(), | |
1058 spec.begin() + parsed.port.begin, | |
1059 spec.begin() + parsed.port.end()); | |
1060 new_parsed->port.len = url_string.length() - new_parsed->port.begin; | |
1061 } else { | |
1062 new_parsed->port.reset(); | |
1063 } | |
1064 | |
1065 // Path and query both get the same general unescape & convert treatment. | |
1066 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || | |
1067 !CanStripTrailingSlash(url)) { | |
1068 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string, | |
1069 &new_parsed->path, offsets_for_adjustment); | |
1070 } | |
1071 if (parsed.query.is_valid()) | |
1072 url_string.push_back('?'); | |
1073 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string, | |
1074 &new_parsed->query, offsets_for_adjustment); | |
1075 | |
1076 // Reference is stored in valid, unescaped UTF-8, so we can just convert. | |
1077 if (parsed.ref.is_valid()) { | |
1078 url_string.push_back('#'); | |
1079 size_t ref_begin = url_string.length(); | |
1080 new_parsed->ref.begin = static_cast<int>(ref_begin); | |
1081 | |
1082 // Compose a list of offsets within the section. | |
1083 std::vector<size_t> offsets_into_ref = | |
1084 OffsetsIntoSection(offsets_for_adjustment, ref_begin); | |
1085 | |
1086 if (parsed.ref.len > 0) { | |
1087 url_string.append(UTF8ToWideAndAdjustOffsets(spec.substr(parsed.ref.begin, | |
1088 parsed.ref.len), | |
1089 &offsets_into_ref)); | |
1090 } | |
1091 size_t old_ref_len = static_cast<size_t>(parsed.ref.len); | |
1092 size_t new_ref_len = url_string.length() - new_parsed->ref.begin; | |
1093 new_parsed->ref.len = static_cast<int>(new_ref_len); | |
1094 | |
1095 // Apply offset adjustments. | |
1096 ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment, | |
1097 old_ref_len, new_ref_len, ref_begin); | |
1098 } | |
1099 | |
1100 // If we need to strip out http do it after the fact. This way we don't need | |
1101 // to worry about how offset_for_adjustment is interpreted. | |
1102 const size_t kHTTPSize = arraysize(kHTTP) - 1; | |
1103 if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) { | |
1104 url_string = url_string.substr(kHTTPSize); | |
1105 AdjustOffset::Adjustments adjustments; | |
1106 adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0)); | |
1107 std::for_each(offsets_for_adjustment->begin(), | |
1108 offsets_for_adjustment->end(), | |
1109 AdjustOffset(adjustments)); | |
1110 if (prefix_end) | |
1111 *prefix_end -= kHTTPSize; | |
1112 | |
1113 // Adjust new_parsed. | |
1114 DCHECK(new_parsed->scheme.is_valid()); | |
1115 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. | |
1116 new_parsed->scheme.reset(); | |
1117 AdjustComponents(delta, new_parsed); | |
1118 } | |
1119 | |
1120 return url_string; | |
1121 } | |
1122 | |
1123 } // namespace | 914 } // namespace |
1124 | 915 |
1125 const FormatUrlType kFormatUrlOmitNothing = 0; | 916 const FormatUrlType kFormatUrlOmitNothing = 0; |
1126 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; | 917 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; |
1127 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; | 918 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; |
1128 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; | 919 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; |
1129 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | | 920 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | |
1130 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; | 921 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; |
1131 | 922 |
1132 // TODO(viettrungluu): We don't want non-POD globals; change this. | 923 // TODO(viettrungluu): We don't want non-POD globals; change this. |
(...skipping 23 matching lines...) Expand all Loading... | |
1156 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); | 947 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); |
1157 | 948 |
1158 #if defined(OS_POSIX) | 949 #if defined(OS_POSIX) |
1159 ReplaceSubstringsAfterOffset(&url_string, 0, | 950 ReplaceSubstringsAfterOffset(&url_string, 0, |
1160 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); | 951 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); |
1161 #endif | 952 #endif |
1162 | 953 |
1163 return GURL(url_string); | 954 return GURL(url_string); |
1164 } | 955 } |
1165 | 956 |
1166 std::wstring GetSpecificHeader(const std::wstring& headers, | |
1167 const std::wstring& name) { | |
1168 return GetSpecificHeaderT(headers, name); | |
1169 } | |
1170 | |
1171 std::string GetSpecificHeader(const std::string& headers, | 957 std::string GetSpecificHeader(const std::string& headers, |
1172 const std::string& name) { | 958 const std::string& name) { |
1173 return GetSpecificHeaderT(headers, name); | 959 // We want to grab the Value from the "Key: Value" pairs in the headers, |
960 // which should look like this (no leading spaces, \n-separated) (we format | |
961 // them this way in url_request_inet.cc): | |
962 // HTTP/1.1 200 OK\n | |
963 // ETag: "6d0b8-947-24f35ec0"\n | |
964 // Content-Length: 2375\n | |
965 // Content-Type: text/html; charset=UTF-8\n | |
966 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n | |
967 if (headers.empty()) | |
968 return std::string(); | |
969 | |
970 std::string match('\n' + name + ':'); | |
971 | |
972 std::string::const_iterator begin = | |
973 search(headers.begin(), headers.end(), match.begin(), match.end(), | |
974 base::CaseInsensitiveCompareASCII<char>()); | |
975 | |
976 if (begin == headers.end()) | |
977 return std::string(); | |
978 | |
979 begin += match.length(); | |
980 | |
981 std::string ret; | |
982 TrimWhitespace(std::string(begin, find(begin, headers.end(), '\n')), TRIM_ALL, | |
983 &ret); | |
984 return ret; | |
1174 } | 985 } |
1175 | 986 |
1176 bool DecodeCharset(const std::string& input, | 987 bool DecodeCharset(const std::string& input, |
1177 std::string* decoded_charset, | 988 std::string* decoded_charset, |
1178 std::string* value) { | 989 std::string* value) { |
1179 StringTokenizer t(input, "'"); | 990 StringTokenizer t(input, "'"); |
1180 t.set_options(StringTokenizer::RETURN_DELIMS); | 991 t.set_options(StringTokenizer::RETURN_DELIMS); |
1181 std::string temp_charset; | 992 std::string temp_charset; |
1182 std::string temp_value; | 993 std::string temp_value; |
1183 int numDelimsSeen = 0; | 994 int numDelimsSeen = 0; |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1238 param_value = GetHeaderParamValue(header, "name", | 1049 param_value = GetHeaderParamValue(header, "name", |
1239 QuoteRule::REMOVE_OUTER_QUOTES); | 1050 QuoteRule::REMOVE_OUTER_QUOTES); |
1240 } | 1051 } |
1241 if (param_value.empty()) | 1052 if (param_value.empty()) |
1242 return std::string(); | 1053 return std::string(); |
1243 if (DecodeParamValue(param_value, referrer_charset, &decoded)) | 1054 if (DecodeParamValue(param_value, referrer_charset, &decoded)) |
1244 return decoded; | 1055 return decoded; |
1245 return std::string(); | 1056 return std::string(); |
1246 } | 1057 } |
1247 | 1058 |
1248 std::wstring GetHeaderParamValue(const std::wstring& field, | 1059 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm |
1249 const std::wstring& param_name, | 1060 // sure this doesn't properly handle all (most?) cases. |
1250 QuoteRule::Type quote_rule) { | 1061 std::string GetHeaderParamValue(const std::string& header, |
1251 return GetHeaderParamValueT(field, param_name, quote_rule); | 1062 const std::string& param_name, |
1063 QuoteRule::Type quote_rule) { | |
1064 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". | |
1065 std::string::const_iterator param_begin = | |
1066 search(header.begin(), header.end(), param_name.begin(), param_name.end(), | |
1067 base::CaseInsensitiveCompareASCII<char>()); | |
1068 | |
1069 if (param_begin == header.end()) | |
1070 return std::string(); | |
1071 param_begin += param_name.length(); | |
1072 | |
1073 std::string whitespace(" \t"); | |
1074 size_t equals_offset = | |
1075 header.find_first_not_of(whitespace, param_begin - header.begin()); | |
1076 if (equals_offset == std::string::npos || header[equals_offset] != '=') | |
1077 return std::string(); | |
1078 | |
1079 param_begin = header.begin() + equals_offset + 1; | |
1080 if (param_begin == header.end()) | |
1081 return std::string(); | |
1082 | |
1083 std::string::const_iterator param_end; | |
1084 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) { | |
1085 ++param_begin; // skip past the quote. | |
1086 param_end = find(param_begin, header.end(), '"'); | |
1087 // If the closing quote is missing, we will treat the rest of the | |
1088 // string as the parameter. We can't set |param_end| to the | |
1089 // location of the separator (';'), since the separator is | |
1090 // technically quoted. See: http://crbug.com/58840 | |
1091 } else { | |
1092 param_end = find(param_begin + 1, header.end(), ';'); | |
1093 } | |
1094 | |
1095 return std::string(param_begin, param_end); | |
1252 } | 1096 } |
1253 | 1097 |
1254 std::string GetHeaderParamValue(const std::string& field, | 1098 string16 IDNToUnicode(const std::string& host, |
1255 const std::string& param_name, | 1099 const std::string& languages) { |
1256 QuoteRule::Type quote_rule) { | |
1257 return GetHeaderParamValueT(field, param_name, quote_rule); | |
1258 } | |
1259 | |
1260 // TODO(brettw) bug 734373: check the scripts for each host component and | |
1261 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for | |
1262 // scripts that the user has installed. For now, just put the entire | |
1263 // path through IDN. Maybe this feature can be implemented in ICU itself? | |
1264 // | |
1265 // We may want to skip this step in the case of file URLs to allow unicode | |
1266 // UNC hostnames regardless of encodings. | |
1267 std::wstring IDNToUnicodeWithOffsets( | |
1268 const char* host, | |
1269 size_t host_len, | |
1270 const std::wstring& languages, | |
1271 std::vector<size_t>* offsets_for_adjustment) { | |
1272 // Convert the ASCII input to a wide string for ICU. | |
1273 string16 input16; | |
1274 input16.reserve(host_len); | |
1275 input16.insert(input16.end(), host, host + host_len); | |
1276 | |
1277 // Do each component of the host separately, since we enforce script matching | |
1278 // on a per-component basis. | |
1279 AdjustOffset::Adjustments adjustments; | |
1280 string16 out16; | |
1281 for (size_t component_start = 0, component_end; | |
1282 component_start < input16.length(); | |
1283 component_start = component_end + 1) { | |
1284 // Find the end of the component. | |
1285 component_end = input16.find('.', component_start); | |
1286 if (component_end == string16::npos) | |
1287 component_end = input16.length(); // For getting the last component. | |
1288 size_t component_length = component_end - component_start; | |
1289 size_t new_component_start = out16.length(); | |
1290 bool converted_idn = false; | |
1291 if (component_end > component_start) { | |
1292 // Add the substring that we just found. | |
1293 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, | |
1294 component_length, languages, &out16); | |
1295 } | |
1296 size_t new_component_length = out16.length() - new_component_start; | |
1297 | |
1298 if (converted_idn && offsets_for_adjustment) { | |
1299 adjustments.push_back(AdjustOffset::Adjustment( | |
1300 component_start, component_length, new_component_length)); | |
1301 } | |
1302 | |
1303 // Need to add the dot we just found (if we found one). | |
1304 if (component_end < input16.length()) | |
1305 out16.push_back('.'); | |
1306 } | |
1307 | |
1308 // Make offset adjustment. | |
1309 if (offsets_for_adjustment && !adjustments.empty()) { | |
1310 std::for_each(offsets_for_adjustment->begin(), | |
1311 offsets_for_adjustment->end(), | |
1312 AdjustOffset(adjustments)); | |
1313 } | |
1314 | |
1315 return UTF16ToWideAndAdjustOffsets(out16, offsets_for_adjustment); | |
1316 } | |
1317 | |
1318 std::wstring IDNToUnicode(const char* host, | |
1319 size_t host_len, | |
1320 const std::wstring& languages, | |
1321 size_t* offset_for_adjustment) { | |
1322 std::vector<size_t> offsets; | 1100 std::vector<size_t> offsets; |
1323 if (offset_for_adjustment) | 1101 return IDNToUnicodeWithOffsets(host, languages, &offsets); |
1324 offsets.push_back(*offset_for_adjustment); | |
1325 std::wstring result = | |
1326 IDNToUnicodeWithOffsets(host, host_len, languages, &offsets); | |
1327 if (offset_for_adjustment) | |
1328 *offset_for_adjustment = offsets[0]; | |
1329 return result; | |
1330 } | 1102 } |
1331 | 1103 |
1332 std::string CanonicalizeHost(const std::string& host, | 1104 std::string CanonicalizeHost(const std::string& host, |
1333 url_canon::CanonHostInfo* host_info) { | 1105 url_canon::CanonHostInfo* host_info) { |
1334 // Try to canonicalize the host. | 1106 // Try to canonicalize the host. |
1335 const url_parse::Component raw_host_component( | 1107 const url_parse::Component raw_host_component( |
1336 0, static_cast<int>(host.length())); | 1108 0, static_cast<int>(host.length())); |
1337 std::string canon_host; | 1109 std::string canon_host; |
1338 url_canon::StdStringCanonOutput canon_host_output(&canon_host); | 1110 url_canon::StdStringCanonOutput canon_host_output(&canon_host); |
1339 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, | 1111 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, |
1340 &canon_host_output, host_info); | 1112 &canon_host_output, host_info); |
1341 | 1113 |
1342 if (host_info->out_host.is_nonempty() && | 1114 if (host_info->out_host.is_nonempty() && |
1343 host_info->family != url_canon::CanonHostInfo::BROKEN) { | 1115 host_info->family != url_canon::CanonHostInfo::BROKEN) { |
1344 // Success! Assert that there's no extra garbage. | 1116 // Success! Assert that there's no extra garbage. |
1345 canon_host_output.Complete(); | 1117 canon_host_output.Complete(); |
1346 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length())); | 1118 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length())); |
1347 } else { | 1119 } else { |
1348 // Empty host, or canonicalization failed. We'll return empty. | 1120 // Empty host, or canonicalization failed. We'll return empty. |
1349 canon_host.clear(); | 1121 canon_host.clear(); |
1350 } | 1122 } |
1351 | 1123 |
1352 return canon_host; | 1124 return canon_host; |
1353 } | 1125 } |
1354 | 1126 |
1355 std::string CanonicalizeHost(const std::wstring& host, | |
1356 url_canon::CanonHostInfo* host_info) { | |
1357 std::string converted_host; | |
1358 WideToUTF8(host.c_str(), host.length(), &converted_host); | |
1359 return CanonicalizeHost(converted_host, host_info); | |
1360 } | |
1361 | |
1362 std::string GetDirectoryListingHeader(const string16& title) { | 1127 std::string GetDirectoryListingHeader(const string16& title) { |
1363 static const base::StringPiece header( | 1128 static const base::StringPiece header( |
1364 NetModule::GetResource(IDR_DIR_HEADER_HTML)); | 1129 NetModule::GetResource(IDR_DIR_HEADER_HTML)); |
1365 // This can be null in unit tests. | 1130 // This can be null in unit tests. |
1366 DLOG_IF(WARNING, header.empty()) << | 1131 DLOG_IF(WARNING, header.empty()) << |
1367 "Missing resource: directory listing header"; | 1132 "Missing resource: directory listing header"; |
1368 | 1133 |
1369 std::string result; | 1134 std::string result; |
1370 if (!header.empty()) | 1135 if (!header.empty()) |
1371 result.assign(header.data(), header.size()); | 1136 result.assign(header.data(), header.size()); |
(...skipping 360 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1732 UnescapeRule::Type flags = | 1497 UnescapeRule::Type flags = |
1733 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS; | 1498 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS; |
1734 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); | 1499 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); |
1735 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); | 1500 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); |
1736 } | 1501 } |
1737 | 1502 |
1738 std::string GetHostOrSpecFromURL(const GURL& url) { | 1503 std::string GetHostOrSpecFromURL(const GURL& url) { |
1739 return url.has_host() ? TrimEndingDot(url.host()) : url.spec(); | 1504 return url.has_host() ? TrimEndingDot(url.host()) : url.spec(); |
1740 } | 1505 } |
1741 | 1506 |
1742 void AppendFormattedHostWithOffsets( | 1507 void AppendFormattedHost(const GURL& url, |
1743 const GURL& url, | 1508 const std::string& languages, |
1744 const std::wstring& languages, | 1509 string16* output) { |
1745 std::wstring* output, | 1510 std::vector<size_t> offsets; |
1746 url_parse::Parsed* new_parsed, | 1511 AppendFormattedComponent(url.possibly_invalid_spec(), |
1747 std::vector<size_t>* offsets_for_adjustment) { | 1512 url.parsed_for_possibly_invalid_spec().host, offsets, |
1748 DCHECK(output); | 1513 HostComponentTransform(languages), output, NULL, NULL); |
1749 const url_parse::Component& host = | |
1750 url.parsed_for_possibly_invalid_spec().host; | |
1751 | |
1752 if (host.is_nonempty()) { | |
1753 // Handle possible IDN in the host name. | |
1754 size_t host_begin = output->length(); | |
1755 if (new_parsed) | |
1756 new_parsed->host.begin = static_cast<int>(host_begin); | |
1757 size_t old_host_len = static_cast<size_t>(host.len); | |
1758 | |
1759 // Compose a list of offsets within the host area. | |
1760 std::vector<size_t> offsets_into_host = | |
1761 OffsetsIntoSection(offsets_for_adjustment, host_begin); | |
1762 | |
1763 const std::string& spec = url.possibly_invalid_spec(); | |
1764 DCHECK(host.begin >= 0 && | |
1765 ((spec.length() == 0 && host.begin == 0) || | |
1766 host.begin < static_cast<int>(spec.length()))); | |
1767 output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len, | |
1768 languages, &offsets_into_host)); | |
1769 | |
1770 size_t new_host_len = output->length() - host_begin; | |
1771 if (new_parsed) | |
1772 new_parsed->host.len = static_cast<int>(new_host_len); | |
1773 | |
1774 // Apply offset adjustments. | |
1775 ApplySectionAdjustments(offsets_into_host, offsets_for_adjustment, | |
1776 old_host_len, new_host_len, host_begin); | |
1777 } else if (new_parsed) { | |
1778 new_parsed->host.reset(); | |
1779 } | |
1780 } | 1514 } |
1781 | 1515 |
1782 void AppendFormattedHost(const GURL& url, | |
1783 const std::wstring& languages, | |
1784 std::wstring* output, | |
1785 url_parse::Parsed* new_parsed, | |
1786 size_t* offset_for_adjustment) { | |
1787 std::vector<size_t> offsets; | |
1788 if (offset_for_adjustment) | |
1789 offsets.push_back(*offset_for_adjustment); | |
1790 AppendFormattedHostWithOffsets(url, languages, output, new_parsed, &offsets); | |
1791 if (offset_for_adjustment) | |
1792 *offset_for_adjustment = offsets[0]; | |
1793 } | |
1794 | |
1795 // TODO(viettrungluu): convert the wstring |FormatUrlInternal()|. | |
1796 string16 FormatUrlWithOffsets(const GURL& url, | 1516 string16 FormatUrlWithOffsets(const GURL& url, |
1797 const std::string& languages, | 1517 const std::string& languages, |
1798 FormatUrlTypes format_types, | 1518 FormatUrlTypes format_types, |
1799 UnescapeRule::Type unescape_rules, | 1519 UnescapeRule::Type unescape_rules, |
1800 url_parse::Parsed* new_parsed, | 1520 url_parse::Parsed* new_parsed, |
1801 size_t* prefix_end, | 1521 size_t* prefix_end, |
1802 std::vector<size_t>* offsets_for_adjustment) { | 1522 std::vector<size_t>* offsets_for_adjustment) { |
1803 return WideToUTF16Hack( | 1523 url_parse::Parsed parsed_temp; |
1804 FormatUrlInternal(url, ASCIIToWide(languages), format_types, | 1524 if (!new_parsed) |
1805 unescape_rules, new_parsed, prefix_end, | 1525 new_parsed = &parsed_temp; |
1806 offsets_for_adjustment)); | 1526 else |
1527 *new_parsed = url_parse::Parsed(); | |
1528 std::vector<size_t> original_offsets; | |
1529 if (offsets_for_adjustment) | |
1530 original_offsets = *offsets_for_adjustment; | |
1531 | |
1532 // Special handling for view-source:. Don't use chrome::kViewSourceScheme | |
1533 // because this library shouldn't depend on chrome. | |
1534 const char* const kViewSource = "view-source"; | |
1535 // Reject "view-source:view-source:..." to avoid deep recursion. | |
1536 const char* const kViewSourceTwice = "view-source:view-source:"; | |
1537 if (url.SchemeIs(kViewSource) && | |
1538 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { | |
1539 return FormatViewSourceUrl(url, original_offsets, languages, format_types, | |
1540 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); | |
1541 } | |
1542 | |
1543 // We handle both valid and invalid URLs (this will give us the spec | |
1544 // regardless of validity). | |
1545 const std::string& spec = url.possibly_invalid_spec(); | |
1546 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); | |
1547 | |
1548 // Scheme & separators. These are ASCII. | |
1549 string16 url_string; | |
1550 url_string.insert(url_string.end(), spec.begin(), | |
1551 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, | |
1552 true)); | |
1553 const char kHTTP[] = "http://"; | |
1554 const char kFTP[] = "ftp."; | |
1555 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This | |
1556 // means that if we trim "http://" off a URL whose host starts with "ftp." and | |
1557 // the user inputs this into any field subject to fixup (which is basically | |
1558 // all input fields), the meaning would be changed. (In fact, often the | |
1559 // formatted URL is directly pre-filled into an input field.) For this reason | |
1560 // we avoid stripping "http://" in this case. | |
1561 bool omit_http = (format_types & kFormatUrlOmitHTTP) && | |
1562 EqualsASCII(url_string, kHTTP) && | |
1563 !StartsWithASCII(url.host(), kFTP, true); | |
1564 new_parsed->scheme = parsed.scheme; | |
1565 | |
1566 // Username & password. | |
1567 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { | |
1568 // Remove the username and password fields. We don't want to display those | |
1569 // to the user since they can be used for attacks, | |
1570 // e.g. "http://google.com:search@evil.ru/" | |
1571 new_parsed->username.reset(); | |
1572 new_parsed->password.reset(); | |
1573 // Update the offsets based on removed username and/or password. | |
1574 if (offsets_for_adjustment && !offsets_for_adjustment->empty() && | |
1575 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { | |
1576 OffsetAdjuster offset_adjuster(offsets_for_adjustment); | |
1577 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { | |
1578 // The seeming off-by-one and off-by-two in these first two lines are to | |
1579 // account for the ':' after the username and '@' after the password. | |
1580 offset_adjuster.Add(OffsetAdjuster::Adjustment( | |
1581 static_cast<size_t>(parsed.username.begin), | |
1582 static_cast<size_t>(parsed.username.len + parsed.password.len + 2), | |
1583 0)); | |
1584 } else { | |
1585 const url_parse::Component* nonempty_component = | |
1586 parsed.username.is_nonempty() ? &parsed.username : &parsed.password; | |
1587 // The seeming off-by-one in below is to account for the '@' after the | |
1588 // username/password. | |
1589 offset_adjuster.Add(OffsetAdjuster::Adjustment( | |
1590 static_cast<size_t>(nonempty_component->begin), | |
1591 static_cast<size_t>(nonempty_component->len + 1), 0)); | |
1592 } | |
1593 } | |
1594 } else { | |
1595 AppendFormattedComponent(spec, parsed.username, original_offsets, | |
1596 NonHostComponentTransform(unescape_rules), &url_string, | |
1597 &new_parsed->username, offsets_for_adjustment); | |
1598 if (parsed.password.is_valid()) { | |
1599 size_t colon = parsed.username.end(); | |
1600 DCHECK_EQ(static_cast<size_t>(parsed.password.begin - 1), colon); | |
1601 std::vector<size_t>::const_iterator colon_iter = | |
1602 std::find(original_offsets.begin(), original_offsets.end(), colon); | |
1603 if (colon_iter != original_offsets.end()) { | |
1604 (*offsets_for_adjustment)[colon_iter - original_offsets.begin()] = | |
1605 url_string.length(); | |
1606 } | |
1607 url_string.push_back(':'); | |
1608 } | |
1609 AppendFormattedComponent(spec, parsed.password, original_offsets, | |
1610 NonHostComponentTransform(unescape_rules), &url_string, | |
1611 &new_parsed->password, offsets_for_adjustment); | |
1612 if (parsed.username.is_valid() || parsed.password.is_valid()) { | |
1613 size_t at_sign = (parsed.password.is_valid() ? | |
1614 parsed.password : parsed.username).end(); | |
1615 DCHECK_EQ(static_cast<size_t>(parsed.host.begin - 1), at_sign); | |
1616 std::vector<size_t>::const_iterator at_sign_iter = | |
1617 std::find(original_offsets.begin(), original_offsets.end(), at_sign); | |
1618 if (at_sign_iter != original_offsets.end()) { | |
1619 (*offsets_for_adjustment)[at_sign_iter - original_offsets.begin()] = | |
1620 url_string.length(); | |
1621 } | |
1622 url_string.push_back('@'); | |
1623 } | |
1624 } | |
1625 if (prefix_end) | |
1626 *prefix_end = static_cast<size_t>(url_string.length()); | |
1627 | |
1628 // Host. | |
1629 AppendFormattedComponent(spec, parsed.host, original_offsets, | |
1630 HostComponentTransform(languages), &url_string, &new_parsed->host, | |
1631 offsets_for_adjustment); | |
1632 | |
1633 // Port. | |
1634 if (parsed.port.is_nonempty()) { | |
1635 url_string.push_back(':'); | |
1636 new_parsed->port.begin = url_string.length(); | |
1637 url_string.insert(url_string.end(), | |
1638 spec.begin() + parsed.port.begin, | |
1639 spec.begin() + parsed.port.end()); | |
1640 new_parsed->port.len = url_string.length() - new_parsed->port.begin; | |
1641 } else { | |
1642 new_parsed->port.reset(); | |
1643 } | |
1644 | |
1645 // Path & query. Both get the same general unescape & convert treatment. | |
1646 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || | |
1647 !CanStripTrailingSlash(url)) { | |
1648 AppendFormattedComponent(spec, parsed.path, original_offsets, | |
1649 NonHostComponentTransform(unescape_rules), &url_string, | |
1650 &new_parsed->path, offsets_for_adjustment); | |
1651 } | |
1652 if (parsed.query.is_valid()) | |
1653 url_string.push_back('?'); | |
1654 AppendFormattedComponent(spec, parsed.query, original_offsets, | |
1655 NonHostComponentTransform(unescape_rules), &url_string, | |
1656 &new_parsed->query, offsets_for_adjustment); | |
1657 | |
1658 // Ref. This is valid, unescaped UTF-8, so we can just convert. | |
1659 if (parsed.ref.is_valid()) { | |
1660 url_string.push_back('#'); | |
1661 size_t original_ref_begin = static_cast<size_t>(parsed.ref.begin); | |
1662 size_t output_ref_begin = url_string.length(); | |
1663 new_parsed->ref.begin = static_cast<int>(output_ref_begin); | |
1664 | |
1665 std::vector<size_t> offsets_into_ref( | |
1666 OffsetsIntoComponent(original_offsets, original_ref_begin)); | |
1667 if (parsed.ref.len > 0) { | |
1668 url_string.append(UTF8ToUTF16AndAdjustOffsets( | |
1669 spec.substr(original_ref_begin, static_cast<size_t>(parsed.ref.len)), | |
1670 &offsets_into_ref)); | |
1671 } | |
1672 | |
1673 new_parsed->ref.len = | |
1674 static_cast<int>(url_string.length() - new_parsed->ref.begin); | |
1675 AdjustForComponentTransform(original_offsets, original_ref_begin, | |
1676 static_cast<size_t>(parsed.ref.end()), offsets_into_ref, | |
1677 output_ref_begin, offsets_for_adjustment); | |
1678 } | |
1679 | |
1680 // If we need to strip out http do it after the fact. This way we don't need | |
1681 // to worry about how offset_for_adjustment is interpreted. | |
1682 if (omit_http && StartsWith(url_string, ASCIIToUTF16(kHTTP), true)) { | |
1683 const size_t kHTTPSize = arraysize(kHTTP) - 1; | |
1684 url_string = url_string.substr(kHTTPSize); | |
1685 if (offsets_for_adjustment && !offsets_for_adjustment->empty()) { | |
1686 OffsetAdjuster offset_adjuster(offsets_for_adjustment); | |
1687 offset_adjuster.Add(OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); | |
1688 } | |
1689 if (prefix_end) | |
1690 *prefix_end -= kHTTPSize; | |
1691 | |
1692 // Adjust new_parsed. | |
1693 DCHECK(new_parsed->scheme.is_valid()); | |
1694 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. | |
1695 new_parsed->scheme.reset(); | |
1696 AdjustComponents(delta, new_parsed); | |
1697 } | |
1698 | |
1699 LimitOffsets(url_string, offsets_for_adjustment); | |
1700 return url_string; | |
1807 } | 1701 } |
1808 | 1702 |
1809 string16 FormatUrl(const GURL& url, | 1703 string16 FormatUrl(const GURL& url, |
1810 const std::string& languages, | 1704 const std::string& languages, |
1811 FormatUrlTypes format_types, | 1705 FormatUrlTypes format_types, |
1812 UnescapeRule::Type unescape_rules, | 1706 UnescapeRule::Type unescape_rules, |
1813 url_parse::Parsed* new_parsed, | 1707 url_parse::Parsed* new_parsed, |
1814 size_t* prefix_end, | 1708 size_t* prefix_end, |
1815 size_t* offset_for_adjustment) { | 1709 size_t* offset_for_adjustment) { |
1816 std::vector<size_t> offsets; | 1710 std::vector<size_t> offsets; |
1817 if (offset_for_adjustment) | 1711 if (offset_for_adjustment) |
1818 offsets.push_back(*offset_for_adjustment); | 1712 offsets.push_back(*offset_for_adjustment); |
1819 string16 result = WideToUTF16Hack( | 1713 string16 result = FormatUrlWithOffsets(url, languages, format_types, |
1820 FormatUrlInternal(url, ASCIIToWide(languages), format_types, | 1714 unescape_rules, new_parsed, prefix_end, &offsets); |
1821 unescape_rules, new_parsed, prefix_end, &offsets)); | |
1822 if (offset_for_adjustment) | 1715 if (offset_for_adjustment) |
1823 *offset_for_adjustment = offsets[0]; | 1716 *offset_for_adjustment = offsets[0]; |
1824 return result; | 1717 return result; |
1825 } | 1718 } |
1826 | 1719 |
1827 bool CanStripTrailingSlash(const GURL& url) { | 1720 bool CanStripTrailingSlash(const GURL& url) { |
1828 // Omit the path only for standard, non-file URLs with nothing but "/" after | 1721 // Omit the path only for standard, non-file URLs with nothing but "/" after |
1829 // the hostname. | 1722 // the hostname. |
1830 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() && | 1723 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() && |
1831 !url.has_ref() && url.path() == "/"; | 1724 !url.has_ref() && url.path() == "/"; |
(...skipping 435 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2267 } | 2160 } |
2268 | 2161 |
2269 NetworkInterface::NetworkInterface(const std::string& name, | 2162 NetworkInterface::NetworkInterface(const std::string& name, |
2270 const IPAddressNumber& address) | 2163 const IPAddressNumber& address) |
2271 : name(name), address(address) { | 2164 : name(name), address(address) { |
2272 } | 2165 } |
2273 | 2166 |
2274 NetworkInterface::~NetworkInterface() { | 2167 NetworkInterface::~NetworkInterface() { |
2275 } | 2168 } |
2276 | 2169 |
2277 ClampComponentOffset::ClampComponentOffset(size_t component_start) | |
2278 : component_start(component_start) {} | |
2279 | |
2280 size_t ClampComponentOffset::operator()(size_t offset) { | |
2281 return (offset >= component_start) ? | |
2282 offset : std::wstring::npos; | |
2283 } | |
2284 | |
2285 } // namespace net | 2170 } // namespace net |
OLD | NEW |