OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
6 | 6 |
7 #include <unicode/regex.h> | 7 #include <unicode/regex.h> |
8 #include <unicode/ucnv.h> | 8 #include <unicode/ucnv.h> |
9 #include <unicode/uidna.h> | 9 #include <unicode/uidna.h> |
10 #include <unicode/ulocdata.h> | 10 #include <unicode/ulocdata.h> |
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
148 0xFFFF, // Used to block all invalid port numbers (see | 148 0xFFFF, // Used to block all invalid port numbers (see |
149 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port()) | 149 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port()) |
150 }; | 150 }; |
151 | 151 |
152 // FTP overrides the following restricted ports. | 152 // FTP overrides the following restricted ports. |
153 static const int kAllowedFtpPorts[] = { | 153 static const int kAllowedFtpPorts[] = { |
154 21, // ftp data | 154 21, // ftp data |
155 22, // ssh | 155 22, // ssh |
156 }; | 156 }; |
157 | 157 |
158 template<typename STR> | |
159 STR GetSpecificHeaderT(const STR& headers, const STR& name) { | |
160 // We want to grab the Value from the "Key: Value" pairs in the headers, | |
161 // which should look like this (no leading spaces, \n-separated) (we format | |
162 // them this way in url_request_inet.cc): | |
163 // HTTP/1.1 200 OK\n | |
164 // ETag: "6d0b8-947-24f35ec0"\n | |
165 // Content-Length: 2375\n | |
166 // Content-Type: text/html; charset=UTF-8\n | |
167 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n | |
168 if (headers.empty()) | |
169 return STR(); | |
170 | |
171 STR match; | |
172 match.push_back('\n'); | |
173 match.append(name); | |
174 match.push_back(':'); | |
175 | |
176 typename STR::const_iterator begin = | |
177 search(headers.begin(), headers.end(), match.begin(), match.end(), | |
178 base::CaseInsensitiveCompareASCII<typename STR::value_type>()); | |
179 | |
180 if (begin == headers.end()) | |
181 return STR(); | |
182 | |
183 begin += match.length(); | |
184 | |
185 typename STR::const_iterator end = find(begin, headers.end(), '\n'); | |
186 | |
187 STR ret; | |
188 TrimWhitespace(STR(begin, end), TRIM_ALL, &ret); | |
189 return ret; | |
190 } | |
191 | |
192 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence | 158 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence |
193 // of bytes. If input is invalid, return false. | 159 // of bytes. If input is invalid, return false. |
194 bool QPDecode(const std::string& input, std::string* output) { | 160 bool QPDecode(const std::string& input, std::string* output) { |
195 std::string temp; | 161 std::string temp; |
196 temp.reserve(input.size()); | 162 temp.reserve(input.size()); |
197 std::string::const_iterator it = input.begin(); | 163 std::string::const_iterator it = input.begin(); |
198 while (it != input.end()) { | 164 while (it != input.end()) { |
199 if (*it == '_') { | 165 if (*it == '_') { |
200 temp.push_back(' '); | 166 temp.push_back(' '); |
201 } else if (*it == '=') { | 167 } else if (*it == '=') { |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
269 *is_rfc2047 = false; | 235 *is_rfc2047 = false; |
270 output->clear(); | 236 output->clear(); |
271 if (encoded_word.empty()) | 237 if (encoded_word.empty()) |
272 return true; | 238 return true; |
273 | 239 |
274 if (!IsStringASCII(encoded_word)) { | 240 if (!IsStringASCII(encoded_word)) { |
275 // Try UTF-8, referrer_charset and the native OS default charset in turn. | 241 // Try UTF-8, referrer_charset and the native OS default charset in turn. |
276 if (IsStringUTF8(encoded_word)) { | 242 if (IsStringUTF8(encoded_word)) { |
277 *output = encoded_word; | 243 *output = encoded_word; |
278 } else { | 244 } else { |
279 std::wstring wide_output; | 245 string16 utf16_output; |
280 if (!referrer_charset.empty() && | 246 if (!referrer_charset.empty() && |
281 base::CodepageToWide(encoded_word, referrer_charset.c_str(), | 247 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), |
282 base::OnStringConversionError::FAIL, | 248 base::OnStringConversionError::FAIL, |
283 &wide_output)) { | 249 &utf16_output)) { |
284 *output = WideToUTF8(wide_output); | 250 *output = UTF16ToUTF8(utf16_output); |
285 } else { | 251 } else { |
286 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); | 252 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
287 } | 253 } |
288 } | 254 } |
289 | 255 |
290 return true; | 256 return true; |
291 } | 257 } |
292 | 258 |
293 // RFC 2047 : one of encoding methods supported by Firefox and relatively | 259 // RFC 2047 : one of encoding methods supported by Firefox and relatively |
294 // widely used by web servers. | 260 // widely used by web servers. |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
407 std::string decoded; | 373 std::string decoded; |
408 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, | 374 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, |
409 &decoded)) | 375 &decoded)) |
410 return false; | 376 return false; |
411 tmp.append(decoded); | 377 tmp.append(decoded); |
412 } | 378 } |
413 output->swap(tmp); | 379 output->swap(tmp); |
414 return true; | 380 return true; |
415 } | 381 } |
416 | 382 |
417 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm | |
418 // sure this doesn't properly handle all (most?) cases. | |
419 template<typename STR> | |
420 STR GetHeaderParamValueT(const STR& header, const STR& param_name, | |
421 QuoteRule::Type quote_rule) { | |
422 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". | |
423 typename STR::const_iterator param_begin = | |
424 search(header.begin(), header.end(), param_name.begin(), param_name.end(), | |
425 base::CaseInsensitiveCompareASCII<typename STR::value_type>()); | |
426 | |
427 if (param_begin == header.end()) | |
428 return STR(); | |
429 param_begin += param_name.length(); | |
430 | |
431 STR whitespace; | |
432 whitespace.push_back(' '); | |
433 whitespace.push_back('\t'); | |
434 const typename STR::size_type equals_offset = | |
435 header.find_first_not_of(whitespace, param_begin - header.begin()); | |
436 if (equals_offset == STR::npos || header.at(equals_offset) != '=') | |
437 return STR(); | |
438 | |
439 param_begin = header.begin() + equals_offset + 1; | |
440 if (param_begin == header.end()) | |
441 return STR(); | |
442 | |
443 typename STR::const_iterator param_end; | |
444 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) { | |
445 ++param_begin; // skip past the quote. | |
446 param_end = find(param_begin, header.end(), '"'); | |
447 // If the closing quote is missing, we will treat the rest of the | |
448 // string as the parameter. We can't set |param_end| to the | |
449 // location of the separator (';'), since the separator is | |
450 // technically quoted. See: http://crbug.com/58840 | |
451 } else { | |
452 param_end = find(param_begin+1, header.end(), ';'); | |
453 } | |
454 | |
455 return STR(param_begin, param_end); | |
456 } | |
457 | |
458 // Does some simple normalization of scripts so we can allow certain scripts | 383 // Does some simple normalization of scripts so we can allow certain scripts |
459 // to exist together. | 384 // to exist together. |
460 // TODO(brettw) bug 880223: we should allow some other languages to be | 385 // TODO(brettw) bug 880223: we should allow some other languages to be |
461 // oombined such as Chinese and Latin. We will probably need a more | 386 // oombined such as Chinese and Latin. We will probably need a more |
462 // complicated system of language pairs to have more fine-grained control. | 387 // complicated system of language pairs to have more fine-grained control. |
463 UScriptCode NormalizeScript(UScriptCode code) { | 388 UScriptCode NormalizeScript(UScriptCode code) { |
464 switch (code) { | 389 switch (code) { |
465 case USCRIPT_KATAKANA: | 390 case USCRIPT_KATAKANA: |
466 case USCRIPT_HIRAGANA: | 391 case USCRIPT_HIRAGANA: |
467 case USCRIPT_KATAKANA_OR_HIRAGANA: | 392 case USCRIPT_KATAKANA_OR_HIRAGANA: |
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
586 ulocdata_close(uld); | 511 ulocdata_close(uld); |
587 } | 512 } |
588 } | 513 } |
589 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); | 514 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); |
590 } | 515 } |
591 | 516 |
592 // Returns true if the given Unicode host component is safe to display to the | 517 // Returns true if the given Unicode host component is safe to display to the |
593 // user. | 518 // user. |
594 bool IsIDNComponentSafe(const char16* str, | 519 bool IsIDNComponentSafe(const char16* str, |
595 int str_len, | 520 int str_len, |
596 const std::wstring& languages) { | 521 const std::string& languages) { |
597 // Most common cases (non-IDN) do not reach here so that we don't | 522 // Most common cases (non-IDN) do not reach here so that we don't |
598 // need a fast return path. | 523 // need a fast return path. |
599 // TODO(jungshik) : Check if there's any character inappropriate | 524 // TODO(jungshik) : Check if there's any character inappropriate |
600 // (although allowed) for domain names. | 525 // (although allowed) for domain names. |
601 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and | 526 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and |
602 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt | 527 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt |
603 // For now, we borrow the list from Mozilla and tweaked it slightly. | 528 // For now, we borrow the list from Mozilla and tweaked it slightly. |
604 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because | 529 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because |
605 // they're gonna be canonicalized to U+0020 and full stop before | 530 // they're gonna be canonicalized to U+0020 and full stop before |
606 // reaching here.) | 531 // reaching here.) |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
670 // (sync'd with characters allowed in url_canon_host with square | 595 // (sync'd with characters allowed in url_canon_host with square |
671 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. | 596 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. |
672 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), | 597 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), |
673 status); | 598 status); |
674 DCHECK(U_SUCCESS(status)); | 599 DCHECK(U_SUCCESS(status)); |
675 // Subtract common characters because they're always allowed so that | 600 // Subtract common characters because they're always allowed so that |
676 // we just have to check if a language-specific set contains | 601 // we just have to check if a language-specific set contains |
677 // the remainder. | 602 // the remainder. |
678 component_characters.removeAll(common_characters); | 603 component_characters.removeAll(common_characters); |
679 | 604 |
680 std::string languages_list(WideToASCII(languages)); | 605 StringTokenizer t(languages, ","); |
681 StringTokenizer t(languages_list, ","); | |
682 while (t.GetNext()) { | 606 while (t.GetNext()) { |
683 if (IsComponentCoveredByLang(component_characters, t.token())) | 607 if (IsComponentCoveredByLang(component_characters, t.token())) |
684 return true; | 608 return true; |
685 } | 609 } |
686 return false; | 610 return false; |
687 } | 611 } |
688 | 612 |
689 // Converts one component of a host (between dots) to IDN if safe. The result | 613 // Converts one component of a host (between dots) to IDN if safe. The result |
690 // will be APPENDED to the given output string and will be the same as the input | 614 // will be APPENDED to the given output string and will be the same as the input |
691 // if it is not IDN or the IDN is unsafe to display. Returns whether any | 615 // if it is not IDN or the IDN is unsafe to display. Returns whether any |
692 // conversion was performed. | 616 // conversion was performed. |
693 bool IDNToUnicodeOneComponent(const char16* comp, | 617 bool IDNToUnicodeOneComponent(const char16* comp, |
694 size_t comp_len, | 618 size_t comp_len, |
695 const std::wstring& languages, | 619 const std::string& languages, |
696 string16* out) { | 620 string16* out) { |
697 DCHECK(out); | 621 DCHECK(out); |
698 if (comp_len == 0) | 622 if (comp_len == 0) |
699 return false; | 623 return false; |
700 | 624 |
701 // Only transform if the input can be an IDN component. | 625 // Only transform if the input can be an IDN component. |
702 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; | 626 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; |
703 if ((comp_len > arraysize(kIdnPrefix)) && | 627 if ((comp_len > arraysize(kIdnPrefix)) && |
704 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) { | 628 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) { |
705 // Repeatedly expand the output string until it's big enough. It looks like | 629 // Repeatedly expand the output string until it's big enough. It looks like |
(...skipping 21 matching lines...) Expand all Loading... | |
727 // Failed, revert back to original string. | 651 // Failed, revert back to original string. |
728 out->resize(original_length); | 652 out->resize(original_length); |
729 } | 653 } |
730 | 654 |
731 // We get here with no IDN or on error, in which case we just append the | 655 // We get here with no IDN or on error, in which case we just append the |
732 // literal input. | 656 // literal input. |
733 out->append(comp, comp_len); | 657 out->append(comp, comp_len); |
734 return false; | 658 return false; |
735 } | 659 } |
736 | 660 |
737 struct SubtractFromOffset { | 661 // Functions may stack-allocate one of these in order to clamp the offsets in |
mrossetti
2011/04/26 22:13:34
I'm not sure I see the point of making this an aut
Peter Kasting
2011/04/27 02:07:19
Good point. Originally I wanted an object to guar
| |
738 explicit SubtractFromOffset(size_t amount) | 662 // |offsets_for_adjustment| to the length of |output| on exit. |
739 : amount(amount) {} | 663 class OffsetLimiter { |
740 void operator()(size_t& offset) { | 664 public: |
741 if (offset != std::wstring::npos) { | 665 OffsetLimiter(std::vector<size_t>* offsets_for_adjustment, string16* output) |
742 if (offset >= amount) | 666 : offsets_for_adjustment_(offsets_for_adjustment), |
743 offset -= amount; | 667 output_(output) { |
744 else | 668 } |
745 offset = std::wstring::npos; | 669 |
670 ~OffsetLimiter() { | |
671 if (offsets_for_adjustment_) { | |
672 std::for_each(offsets_for_adjustment_->begin(), | |
673 offsets_for_adjustment_->end(), | |
674 LimitOffset<string16>(output_->length())); | |
746 } | 675 } |
747 } | 676 } |
748 | 677 |
749 size_t amount; | 678 private: |
679 std::vector<size_t>* offsets_for_adjustment_; | |
680 string16* output_; | |
681 | |
682 DISALLOW_COPY_AND_ASSIGN(OffsetLimiter); | |
750 }; | 683 }; |
751 | 684 |
752 struct AddToOffset { | 685 // TODO(brettw) bug 734373: check the scripts for each host component and |
753 explicit AddToOffset(size_t amount) | 686 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for |
754 : amount(amount) {} | 687 // scripts that the user has installed. For now, just put the entire |
755 void operator()(size_t& offset) { | 688 // path through IDN. Maybe this feature can be implemented in ICU itself? |
756 if (offset != std::wstring::npos) | 689 // |
757 offset += amount; | 690 // We may want to skip this step in the case of file URLs to allow unicode |
691 // UNC hostnames regardless of encodings. | |
692 string16 IDNToUnicodeWithOffsets(const std::string& host, | |
693 const std::string& languages, | |
694 std::vector<size_t>* offsets_for_adjustment) { | |
695 // Convert the ASCII input to a string16 for ICU. | |
696 string16 input16; | |
697 input16.reserve(host.length()); | |
698 input16.insert(input16.end(), host.begin(), host.end()); | |
699 | |
700 // Do each component of the host separately, since we enforce script matching | |
701 // on a per-component basis. | |
702 AdjustOffset::Adjustments adjustments; | |
703 string16 out16; | |
704 OffsetLimiter offset_limiter(offsets_for_adjustment, &out16); | |
mrossetti
2011/04/26 22:13:34
Continuing from the previous comment: This auto co
| |
705 for (size_t component_start = 0, component_end; | |
706 component_start < input16.length(); | |
707 component_start = component_end + 1) { | |
708 // Find the end of the component. | |
709 component_end = input16.find('.', component_start); | |
710 if (component_end == string16::npos) | |
711 component_end = input16.length(); // For getting the last component. | |
712 size_t component_length = component_end - component_start; | |
713 size_t new_component_start = out16.length(); | |
714 bool converted_idn = false; | |
715 if (component_end > component_start) { | |
716 // Add the substring that we just found. | |
717 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, | |
718 component_length, languages, &out16); | |
719 } | |
720 size_t new_component_length = out16.length() - new_component_start; | |
721 | |
722 if (converted_idn && offsets_for_adjustment) { | |
723 adjustments.push_back(AdjustOffset::Adjustment( | |
724 component_start, component_length, new_component_length)); | |
725 } | |
726 | |
727 // Need to add the dot we just found (if we found one). | |
728 if (component_end < input16.length()) | |
729 out16.push_back('.'); | |
758 } | 730 } |
759 | 731 |
760 size_t amount; | 732 // Make offset adjustment. |
761 }; | 733 if (offsets_for_adjustment && !adjustments.empty()) { |
734 std::for_each(offsets_for_adjustment->begin(), | |
735 offsets_for_adjustment->end(), AdjustOffset(adjustments)); | |
736 } | |
762 | 737 |
763 std::vector<size_t> OffsetsIntoSection( | 738 return out16; |
764 std::vector<size_t>* offsets_for_adjustment, | |
765 size_t section_begin) { | |
766 std::vector<size_t> offsets_into_section; | |
767 if (offsets_for_adjustment) { | |
768 std::transform(offsets_for_adjustment->begin(), | |
769 offsets_for_adjustment->end(), | |
770 std::back_inserter(offsets_into_section), | |
771 ClampComponentOffset(section_begin)); | |
772 std::for_each(offsets_into_section.begin(), offsets_into_section.end(), | |
773 SubtractFromOffset(section_begin)); | |
774 } | |
775 return offsets_into_section; | |
776 } | 739 } |
777 | 740 |
778 void ApplySectionAdjustments(const std::vector<size_t>& offsets_into_section, | 741 // Transforms |original_offsets| by subtracting |section_begin| from all |
779 std::vector<size_t>* offsets_for_adjustment, | 742 // offsets. Any offset which was not at least this large to begin with is set |
780 size_t old_section_len, | 743 // to std::string::npos. |
781 size_t new_section_len, | 744 std::vector<size_t> OffsetsIntoComponent( |
782 size_t section_begin) { | 745 const std::vector<size_t>& original_offsets, |
783 if (offsets_for_adjustment) { | 746 size_t component_begin) { |
784 DCHECK_EQ(offsets_for_adjustment->size(), offsets_into_section.size()); | 747 DCHECK_NE(std::string::npos, component_begin); |
785 std::vector<size_t>::const_iterator host_offsets_iter = | 748 std::vector<size_t> offsets_into_component(original_offsets); |
786 offsets_into_section.begin(); | 749 for (std::vector<size_t>::iterator i(offsets_into_component.begin()); |
787 for (std::vector<size_t>::iterator offsets_iter = | 750 i != offsets_into_component.end(); ++i) { |
788 offsets_for_adjustment->begin(); | 751 if (*i != std::string::npos) |
789 offsets_iter != offsets_for_adjustment->end(); | 752 *i = (*i < component_begin) ? std::string::npos : (*i - component_begin); |
790 ++offsets_iter, ++host_offsets_iter) { | 753 } |
791 size_t offset = *offsets_iter; | 754 return offsets_into_component; |
792 if (offset == std::wstring::npos || offset < section_begin) { | 755 } |
793 // The offset is before the host section so leave it as is. | 756 |
794 continue; | 757 // Called after we transform a component and append it to an output string. |
795 } | 758 // Maps |transformed_offsets|, which represent offsets into the transformed |
796 if (offset >= section_begin + old_section_len) { | 759 // component itself, into appropriate offsets for the output string, by adding |
797 // The offset is after the host section so adjust by host length delta. | 760 // |output_component_begin| to each. Determines which offsets need mapping by |
798 offset += new_section_len - old_section_len; | 761 // checking to see which of the |original_offsets| were within the designated |
799 } else if (*host_offsets_iter != std::wstring::npos) { | 762 // original component, using its provided endpoints. |
800 // The offset is within the host and valid so adjust by the host | 763 void AdjustForComponentTransform( |
801 // reformatting offsets results. | 764 const std::vector<size_t>& original_offsets, |
802 offset = section_begin + *host_offsets_iter; | 765 size_t original_component_begin, |
803 } else { | 766 size_t original_component_end, |
804 // The offset is invalid. | 767 const std::vector<size_t>& transformed_offsets, |
805 offset = std::wstring::npos; | 768 size_t output_component_begin, |
806 } | 769 std::vector<size_t>* offsets_for_adjustment) { |
807 *offsets_iter = offset; | 770 if (!offsets_for_adjustment) |
771 return; | |
772 | |
773 DCHECK_NE(std::string::npos, original_component_begin); | |
774 DCHECK_NE(std::string::npos, original_component_end); | |
775 DCHECK_NE(string16::npos, output_component_begin); | |
776 size_t offsets_size = offsets_for_adjustment->size(); | |
777 DCHECK_EQ(offsets_size, original_offsets.size()); | |
778 DCHECK_EQ(offsets_size, transformed_offsets.size()); | |
779 for (size_t i = 0; i < offsets_size; ++i) { | |
780 size_t original_offset = original_offsets[i]; | |
781 if ((original_offset >= original_component_begin) && | |
782 (original_offset < original_component_end)) { | |
783 size_t transformed_offset = transformed_offsets[i]; | |
784 (*offsets_for_adjustment)[i] = (transformed_offset == string16::npos) ? | |
785 string16::npos : (output_component_begin + transformed_offset); | |
808 } | 786 } |
809 } | 787 } |
810 } | 788 } |
811 | 789 |
812 // If |component| is valid, its begin is incremented by |delta|. | 790 // If |component| is valid, its begin is incremented by |delta|. |
813 void AdjustComponent(int delta, url_parse::Component* component) { | 791 void AdjustComponent(int delta, url_parse::Component* component) { |
814 if (!component->is_valid()) | 792 if (!component->is_valid()) |
815 return; | 793 return; |
816 | 794 |
817 DCHECK(delta >= 0 || component->begin >= -delta); | 795 DCHECK(delta >= 0 || component->begin >= -delta); |
818 component->begin += delta; | 796 component->begin += delta; |
819 } | 797 } |
820 | 798 |
821 // Adjusts all the components of |parsed| by |delta|, except for the scheme. | 799 // Adjusts all the components of |parsed| by |delta|, except for the scheme. |
822 void AdjustComponents(int delta, url_parse::Parsed* parsed) { | 800 void AdjustComponents(int delta, url_parse::Parsed* parsed) { |
823 AdjustComponent(delta, &(parsed->username)); | 801 AdjustComponent(delta, &(parsed->username)); |
824 AdjustComponent(delta, &(parsed->password)); | 802 AdjustComponent(delta, &(parsed->password)); |
825 AdjustComponent(delta, &(parsed->host)); | 803 AdjustComponent(delta, &(parsed->host)); |
826 AdjustComponent(delta, &(parsed->port)); | 804 AdjustComponent(delta, &(parsed->port)); |
827 AdjustComponent(delta, &(parsed->path)); | 805 AdjustComponent(delta, &(parsed->path)); |
828 AdjustComponent(delta, &(parsed->query)); | 806 AdjustComponent(delta, &(parsed->query)); |
829 AdjustComponent(delta, &(parsed->ref)); | 807 AdjustComponent(delta, &(parsed->ref)); |
830 } | 808 } |
831 | 809 |
832 std::wstring FormatUrlInternal(const GURL& url, | 810 // Helper for FormatUrlWithOffsets(). |
833 const std::wstring& languages, | 811 string16 FormatViewSourceUrl(const GURL& url, |
834 FormatUrlTypes format_types, | 812 const std::vector<size_t>& original_offsets, |
835 UnescapeRule::Type unescape_rules, | 813 const std::string& languages, |
836 url_parse::Parsed* new_parsed, | 814 FormatUrlTypes format_types, |
837 size_t* prefix_end, | 815 UnescapeRule::Type unescape_rules, |
838 std::vector<size_t>* offsets_for_adjustment); | 816 url_parse::Parsed* new_parsed, |
817 size_t* prefix_end, | |
818 std::vector<size_t>* offsets_for_adjustment) { | |
819 DCHECK(new_parsed); | |
820 const char kViewSource[] = "view-source:"; | |
821 const size_t kViewSourceLength = arraysize(kViewSource) - 1; | |
822 std::vector<size_t> offsets_into_url( | |
823 OffsetsIntoComponent(original_offsets, kViewSourceLength)); | |
839 | 824 |
840 // Helper for FormatUrl()/FormatUrlInternal(). | 825 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength)); |
841 std::wstring FormatViewSourceUrl(const GURL& url, | 826 string16 result(ASCIIToUTF16(kViewSource) + |
842 const std::wstring& languages, | 827 FormatUrlWithOffsets(real_url, languages, format_types, unescape_rules, |
843 FormatUrlTypes format_types, | 828 new_parsed, prefix_end, &offsets_into_url)); |
844 UnescapeRule::Type unescape_rules, | 829 OffsetLimiter offset_limiter(offsets_for_adjustment, &result); |
845 url_parse::Parsed* new_parsed, | |
846 size_t* prefix_end, | |
847 std::vector<size_t>* offsets_for_adjustment) { | |
848 DCHECK(new_parsed); | |
849 DCHECK(offsets_for_adjustment); | |
850 const wchar_t* const kWideViewSource = L"view-source:"; | |
851 const size_t kViewSourceLengthPlus1 = 12; | |
852 std::vector<size_t> saved_offsets(*offsets_for_adjustment); | |
853 | |
854 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); | |
855 // Clamp the offsets to the source area. | |
856 std::for_each(offsets_for_adjustment->begin(), | |
857 offsets_for_adjustment->end(), | |
858 SubtractFromOffset(kViewSourceLengthPlus1)); | |
859 std::wstring result = FormatUrlInternal(real_url, languages, format_types, | |
860 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); | |
861 result.insert(0, kWideViewSource); | |
862 | 830 |
863 // Adjust position values. | 831 // Adjust position values. |
864 if (new_parsed->scheme.is_nonempty()) { | 832 if (new_parsed->scheme.is_nonempty()) { |
865 // Assume "view-source:real-scheme" as a scheme. | 833 // Assume "view-source:real-scheme" as a scheme. |
866 new_parsed->scheme.len += kViewSourceLengthPlus1; | 834 new_parsed->scheme.len += kViewSourceLength; |
867 } else { | 835 } else { |
868 new_parsed->scheme.begin = 0; | 836 new_parsed->scheme.begin = 0; |
869 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1; | 837 new_parsed->scheme.len = kViewSourceLength - 1; |
870 } | 838 } |
871 AdjustComponents(kViewSourceLengthPlus1, new_parsed); | 839 AdjustComponents(kViewSourceLength, new_parsed); |
872 if (prefix_end) | 840 if (prefix_end) |
873 *prefix_end += kViewSourceLengthPlus1; | 841 *prefix_end += kViewSourceLength; |
874 std::for_each(offsets_for_adjustment->begin(), | 842 AdjustForComponentTransform(original_offsets, kViewSourceLength, |
875 offsets_for_adjustment->end(), | 843 url.possibly_invalid_spec().length(), offsets_into_url, kViewSourceLength, |
876 AddToOffset(kViewSourceLengthPlus1)); | 844 offsets_for_adjustment); |
877 // Restore all offsets which were not affected by FormatUrlInternal. | |
878 DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size()); | |
879 for (size_t i = 0; i < saved_offsets.size(); ++i) { | |
880 if (saved_offsets[i] < kViewSourceLengthPlus1) | |
881 (*offsets_for_adjustment)[i] = saved_offsets[i]; | |
882 } | |
883 return result; | 845 return result; |
884 } | 846 } |
885 | 847 |
886 // Appends the substring |in_component| inside of the URL |spec| to |output|, | 848 class AppendComponentTransform { |
887 // and the resulting range will be filled into |out_component|. |unescape_rules| | 849 public: |
888 // defines how to clean the URL for human readability. |offsets_for_adjustment| | 850 AppendComponentTransform() {} |
889 // is an array of offsets into |output| each of which will be adjusted based on | 851 virtual string16 Execute( |
890 // how it maps to the component being converted; if it is less than | 852 const std::string& component_text, |
891 // output->length(), it will be untouched, and if it is greater than | 853 std::vector<size_t>* offsets_into_component) const = 0; |
892 // output->length() + in_component.len it will be adjusted by the difference in | 854 |
893 // lengths between the input and output components. Otherwise it points into | 855 private: |
894 // the component being converted, and is adjusted to point to the same logical | 856 DISALLOW_COPY_AND_ASSIGN(AppendComponentTransform); |
895 // place in |output|. |offsets_for_adjustment| may not be NULL. | 857 }; |
858 | |
859 class HostComponentTransform : public AppendComponentTransform { | |
860 public: | |
861 explicit HostComponentTransform(const std::string& languages) | |
862 : languages_(languages) { | |
863 } | |
864 | |
865 private: | |
866 virtual string16 Execute( | |
867 const std::string& component_text, | |
868 std::vector<size_t>* offsets_into_component) const { | |
869 return IDNToUnicodeWithOffsets(component_text, languages_, | |
870 offsets_into_component); | |
871 } | |
872 | |
873 const std::string& languages_; | |
874 }; | |
875 | |
876 class NonHostComponentTransform : public AppendComponentTransform { | |
877 public: | |
878 explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules) | |
879 : unescape_rules_(unescape_rules) { | |
880 } | |
881 | |
882 private: | |
883 virtual string16 Execute( | |
884 const std::string& component_text, | |
885 std::vector<size_t>* offsets_into_component) const { | |
886 return (unescape_rules_ == UnescapeRule::NONE) ? | |
887 UTF8ToUTF16AndAdjustOffsets(component_text, offsets_into_component) : | |
888 UnescapeAndDecodeUTF8URLComponentWithOffsets(component_text, | |
889 unescape_rules_, offsets_into_component); | |
890 } | |
891 | |
892 const UnescapeRule::Type unescape_rules_; | |
893 }; | |
894 | |
896 void AppendFormattedComponent(const std::string& spec, | 895 void AppendFormattedComponent(const std::string& spec, |
897 const url_parse::Component& in_component, | 896 const url_parse::Component& original_component, |
898 UnescapeRule::Type unescape_rules, | 897 const std::vector<size_t>& original_offsets, |
899 std::wstring* output, | 898 const AppendComponentTransform& transform, |
900 url_parse::Component* out_component, | 899 string16* output, |
900 url_parse::Component* output_component, | |
901 std::vector<size_t>* offsets_for_adjustment) { | 901 std::vector<size_t>* offsets_for_adjustment) { |
902 DCHECK(output); | 902 DCHECK(output); |
903 DCHECK(offsets_for_adjustment); | 903 if (original_component.is_nonempty()) { |
904 if (in_component.is_nonempty()) { | 904 size_t original_component_begin = |
905 size_t component_begin = output->length(); | 905 static_cast<size_t>(original_component.begin); |
906 out_component->begin = static_cast<int>(component_begin); | 906 size_t output_component_begin = output->length(); |
907 if (output_component) | |
908 output_component->begin = static_cast<int>(output_component_begin); | |
907 | 909 |
908 // Compose a list of offsets within the component area. | |
909 std::vector<size_t> offsets_into_component = | 910 std::vector<size_t> offsets_into_component = |
910 OffsetsIntoSection(offsets_for_adjustment, component_begin); | 911 OffsetsIntoComponent(original_offsets, original_component_begin); |
912 output->append(transform.Execute(std::string(spec, original_component_begin, | |
913 static_cast<size_t>(original_component.len)), &offsets_into_component)); | |
911 | 914 |
912 if (unescape_rules == UnescapeRule::NONE) { | 915 if (output_component) { |
913 output->append(UTF8ToWideAndAdjustOffsets( | 916 output_component->len = |
914 spec.substr(in_component.begin, in_component.len), | 917 static_cast<int>(output->length() - output_component_begin); |
915 &offsets_into_component)); | |
916 } else { | |
917 output->append(UTF16ToWideHack( | |
918 UnescapeAndDecodeUTF8URLComponentWithOffsets( | |
919 spec.substr(in_component.begin, in_component.len), unescape_rules, | |
920 &offsets_into_component))); | |
921 } | 918 } |
922 size_t new_component_len = output->length() - component_begin; | 919 AdjustForComponentTransform(original_offsets, original_component_begin, |
923 out_component->len = static_cast<int>(new_component_len); | 920 static_cast<size_t>(original_component.end()), |
924 | 921 offsets_into_component, output_component_begin, |
925 // Apply offset adjustments. | 922 offsets_for_adjustment); |
926 size_t old_component_len = static_cast<size_t>(in_component.len); | 923 } else if (output_component) { |
927 ApplySectionAdjustments(offsets_into_component, offsets_for_adjustment, | 924 output_component->reset(); |
928 old_component_len, new_component_len, component_begin); | |
929 } else { | |
930 out_component->reset(); | |
931 } | 925 } |
932 } | 926 } |
933 | 927 |
934 // TODO(viettrungluu): This is really the old-fashioned version, made internal. | |
935 // I need to really convert |FormatUrl()|. | |
936 std::wstring FormatUrlInternal(const GURL& url, | |
937 const std::wstring& languages, | |
938 FormatUrlTypes format_types, | |
939 UnescapeRule::Type unescape_rules, | |
940 url_parse::Parsed* new_parsed, | |
941 size_t* prefix_end, | |
942 std::vector<size_t>* offsets_for_adjustment) { | |
943 url_parse::Parsed parsed_temp; | |
944 if (!new_parsed) | |
945 new_parsed = &parsed_temp; | |
946 else | |
947 *new_parsed = url_parse::Parsed(); | |
948 | |
949 std::vector<size_t> offsets_temp; | |
950 if (!offsets_for_adjustment) | |
951 offsets_for_adjustment = &offsets_temp; | |
952 | |
953 std::wstring url_string; | |
954 | |
955 // Check for empty URLs or 0 available text width. | |
956 if (url.is_empty()) { | |
957 if (prefix_end) | |
958 *prefix_end = 0; | |
959 std::for_each(offsets_for_adjustment->begin(), | |
960 offsets_for_adjustment->end(), | |
961 LimitOffset<std::wstring>(0)); | |
962 return url_string; | |
963 } | |
964 | |
965 // Special handling for view-source:. Don't use chrome::kViewSourceScheme | |
966 // because this library shouldn't depend on chrome. | |
967 const char* const kViewSource = "view-source"; | |
968 // Reject "view-source:view-source:..." to avoid deep recursion. | |
969 const char* const kViewSourceTwice = "view-source:view-source:"; | |
970 if (url.SchemeIs(kViewSource) && | |
971 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { | |
972 return FormatViewSourceUrl(url, languages, format_types, | |
973 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); | |
974 } | |
975 | |
976 // We handle both valid and invalid URLs (this will give us the spec | |
977 // regardless of validity). | |
978 const std::string& spec = url.possibly_invalid_spec(); | |
979 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); | |
980 size_t spec_length = spec.length(); | |
981 std::for_each(offsets_for_adjustment->begin(), | |
982 offsets_for_adjustment->end(), | |
983 LimitOffset<std::wstring>(spec_length)); | |
984 | |
985 // Copy everything before the username (the scheme and the separators.) | |
986 // These are ASCII. | |
987 url_string.insert(url_string.end(), spec.begin(), | |
988 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, | |
989 true)); | |
990 | |
991 const wchar_t kHTTP[] = L"http://"; | |
992 const char kFTP[] = "ftp."; | |
993 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This | |
994 // means that if we trim "http://" off a URL whose host starts with "ftp." and | |
995 // the user inputs this into any field subject to fixup (which is basically | |
996 // all input fields), the meaning would be changed. (In fact, often the | |
997 // formatted URL is directly pre-filled into an input field.) For this reason | |
998 // we avoid stripping "http://" in this case. | |
999 bool omit_http = | |
1000 (format_types & kFormatUrlOmitHTTP) && (url_string == kHTTP) && | |
1001 (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0); | |
1002 | |
1003 new_parsed->scheme = parsed.scheme; | |
1004 | |
1005 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { | |
1006 // Remove the username and password fields. We don't want to display those | |
1007 // to the user since they can be used for attacks, | |
1008 // e.g. "http://google.com:search@evil.ru/" | |
1009 new_parsed->username.reset(); | |
1010 new_parsed->password.reset(); | |
1011 // Update the offsets based on removed username and/or password. | |
1012 if (!offsets_for_adjustment->empty() && | |
1013 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { | |
1014 AdjustOffset::Adjustments adjustments; | |
1015 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { | |
1016 // The seeming off-by-one and off-by-two in these first two lines are to | |
1017 // account for the ':' after the username and '@' after the password. | |
1018 adjustments.push_back(AdjustOffset::Adjustment( | |
1019 static_cast<size_t>(parsed.username.begin), | |
1020 static_cast<size_t>(parsed.username.len + parsed.password.len + | |
1021 2), 0)); | |
1022 } else { | |
1023 const url_parse::Component* nonempty_component = | |
1024 parsed.username.is_nonempty() ? &parsed.username : &parsed.password; | |
1025 // The seeming off-by-one in below is to account for the '@' after the | |
1026 // username/password. | |
1027 adjustments.push_back(AdjustOffset::Adjustment( | |
1028 static_cast<size_t>(nonempty_component->begin), | |
1029 static_cast<size_t>(nonempty_component->len + 1), 0)); | |
1030 } | |
1031 | |
1032 // Make offset adjustment. | |
1033 std::for_each(offsets_for_adjustment->begin(), | |
1034 offsets_for_adjustment->end(), | |
1035 AdjustOffset(adjustments)); | |
1036 } | |
1037 } else { | |
1038 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string, | |
1039 &new_parsed->username, offsets_for_adjustment); | |
1040 if (parsed.password.is_valid()) | |
1041 url_string.push_back(':'); | |
1042 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string, | |
1043 &new_parsed->password, offsets_for_adjustment); | |
1044 if (parsed.username.is_valid() || parsed.password.is_valid()) | |
1045 url_string.push_back('@'); | |
1046 } | |
1047 if (prefix_end) | |
1048 *prefix_end = static_cast<size_t>(url_string.length()); | |
1049 | |
1050 AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed, | |
1051 offsets_for_adjustment); | |
1052 | |
1053 // Port. | |
1054 if (parsed.port.is_nonempty()) { | |
1055 url_string.push_back(':'); | |
1056 new_parsed->port.begin = url_string.length(); | |
1057 url_string.insert(url_string.end(), | |
1058 spec.begin() + parsed.port.begin, | |
1059 spec.begin() + parsed.port.end()); | |
1060 new_parsed->port.len = url_string.length() - new_parsed->port.begin; | |
1061 } else { | |
1062 new_parsed->port.reset(); | |
1063 } | |
1064 | |
1065 // Path and query both get the same general unescape & convert treatment. | |
1066 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || | |
1067 !CanStripTrailingSlash(url)) { | |
1068 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string, | |
1069 &new_parsed->path, offsets_for_adjustment); | |
1070 } | |
1071 if (parsed.query.is_valid()) | |
1072 url_string.push_back('?'); | |
1073 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string, | |
1074 &new_parsed->query, offsets_for_adjustment); | |
1075 | |
1076 // Reference is stored in valid, unescaped UTF-8, so we can just convert. | |
1077 if (parsed.ref.is_valid()) { | |
1078 url_string.push_back('#'); | |
1079 size_t ref_begin = url_string.length(); | |
1080 new_parsed->ref.begin = static_cast<int>(ref_begin); | |
1081 | |
1082 // Compose a list of offsets within the section. | |
1083 std::vector<size_t> offsets_into_ref = | |
1084 OffsetsIntoSection(offsets_for_adjustment, ref_begin); | |
1085 | |
1086 if (parsed.ref.len > 0) { | |
1087 url_string.append(UTF8ToWideAndAdjustOffsets(spec.substr(parsed.ref.begin, | |
1088 parsed.ref.len), | |
1089 &offsets_into_ref)); | |
1090 } | |
1091 size_t old_ref_len = static_cast<size_t>(parsed.ref.len); | |
1092 size_t new_ref_len = url_string.length() - new_parsed->ref.begin; | |
1093 new_parsed->ref.len = static_cast<int>(new_ref_len); | |
1094 | |
1095 // Apply offset adjustments. | |
1096 ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment, | |
1097 old_ref_len, new_ref_len, ref_begin); | |
1098 } | |
1099 | |
1100 // If we need to strip out http do it after the fact. This way we don't need | |
1101 // to worry about how offset_for_adjustment is interpreted. | |
1102 const size_t kHTTPSize = arraysize(kHTTP) - 1; | |
1103 if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) { | |
1104 url_string = url_string.substr(kHTTPSize); | |
1105 AdjustOffset::Adjustments adjustments; | |
1106 adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0)); | |
1107 std::for_each(offsets_for_adjustment->begin(), | |
1108 offsets_for_adjustment->end(), | |
1109 AdjustOffset(adjustments)); | |
1110 if (prefix_end) | |
1111 *prefix_end -= kHTTPSize; | |
1112 | |
1113 // Adjust new_parsed. | |
1114 DCHECK(new_parsed->scheme.is_valid()); | |
1115 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. | |
1116 new_parsed->scheme.reset(); | |
1117 AdjustComponents(delta, new_parsed); | |
1118 } | |
1119 | |
1120 return url_string; | |
1121 } | |
1122 | |
1123 } // namespace | 928 } // namespace |
1124 | 929 |
1125 const FormatUrlType kFormatUrlOmitNothing = 0; | 930 const FormatUrlType kFormatUrlOmitNothing = 0; |
1126 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; | 931 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; |
1127 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; | 932 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; |
1128 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; | 933 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; |
1129 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | | 934 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | |
1130 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; | 935 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; |
1131 | 936 |
1132 // TODO(viettrungluu): We don't want non-POD globals; change this. | 937 // TODO(viettrungluu): We don't want non-POD globals; change this. |
(...skipping 23 matching lines...) Expand all Loading... | |
1156 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); | 961 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); |
1157 | 962 |
1158 #if defined(OS_POSIX) | 963 #if defined(OS_POSIX) |
1159 ReplaceSubstringsAfterOffset(&url_string, 0, | 964 ReplaceSubstringsAfterOffset(&url_string, 0, |
1160 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); | 965 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); |
1161 #endif | 966 #endif |
1162 | 967 |
1163 return GURL(url_string); | 968 return GURL(url_string); |
1164 } | 969 } |
1165 | 970 |
1166 std::wstring GetSpecificHeader(const std::wstring& headers, | |
1167 const std::wstring& name) { | |
1168 return GetSpecificHeaderT(headers, name); | |
1169 } | |
1170 | |
1171 std::string GetSpecificHeader(const std::string& headers, | 971 std::string GetSpecificHeader(const std::string& headers, |
1172 const std::string& name) { | 972 const std::string& name) { |
1173 return GetSpecificHeaderT(headers, name); | 973 // We want to grab the Value from the "Key: Value" pairs in the headers, |
974 // which should look like this (no leading spaces, \n-separated) (we format | |
975 // them this way in url_request_inet.cc): | |
976 // HTTP/1.1 200 OK\n | |
977 // ETag: "6d0b8-947-24f35ec0"\n | |
978 // Content-Length: 2375\n | |
979 // Content-Type: text/html; charset=UTF-8\n | |
980 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n | |
981 if (headers.empty()) | |
982 return std::string(); | |
983 | |
984 std::string match('\n' + name + ':'); | |
985 | |
986 std::string::const_iterator begin = | |
987 search(headers.begin(), headers.end(), match.begin(), match.end(), | |
988 base::CaseInsensitiveCompareASCII<char>()); | |
989 | |
990 if (begin == headers.end()) | |
991 return std::string(); | |
992 | |
993 begin += match.length(); | |
994 | |
995 std::string ret; | |
996 TrimWhitespace(std::string(begin, find(begin, headers.end(), '\n')), TRIM_ALL, | |
997 &ret); | |
998 return ret; | |
1174 } | 999 } |
1175 | 1000 |
1176 bool DecodeCharset(const std::string& input, | 1001 bool DecodeCharset(const std::string& input, |
1177 std::string* decoded_charset, | 1002 std::string* decoded_charset, |
1178 std::string* value) { | 1003 std::string* value) { |
1179 StringTokenizer t(input, "'"); | 1004 StringTokenizer t(input, "'"); |
1180 t.set_options(StringTokenizer::RETURN_DELIMS); | 1005 t.set_options(StringTokenizer::RETURN_DELIMS); |
1181 std::string temp_charset; | 1006 std::string temp_charset; |
1182 std::string temp_value; | 1007 std::string temp_value; |
1183 int numDelimsSeen = 0; | 1008 int numDelimsSeen = 0; |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1238 param_value = GetHeaderParamValue(header, "name", | 1063 param_value = GetHeaderParamValue(header, "name", |
1239 QuoteRule::REMOVE_OUTER_QUOTES); | 1064 QuoteRule::REMOVE_OUTER_QUOTES); |
1240 } | 1065 } |
1241 if (param_value.empty()) | 1066 if (param_value.empty()) |
1242 return std::string(); | 1067 return std::string(); |
1243 if (DecodeParamValue(param_value, referrer_charset, &decoded)) | 1068 if (DecodeParamValue(param_value, referrer_charset, &decoded)) |
1244 return decoded; | 1069 return decoded; |
1245 return std::string(); | 1070 return std::string(); |
1246 } | 1071 } |
1247 | 1072 |
1248 std::wstring GetHeaderParamValue(const std::wstring& field, | 1073 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm |
1249 const std::wstring& param_name, | 1074 // sure this doesn't properly handle all (most?) cases. |
1250 QuoteRule::Type quote_rule) { | 1075 std::string GetHeaderParamValue(const std::string& header, |
1251 return GetHeaderParamValueT(field, param_name, quote_rule); | 1076 const std::string& param_name, |
1077 QuoteRule::Type quote_rule) { | |
1078 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". | |
1079 std::string::const_iterator param_begin = | |
1080 search(header.begin(), header.end(), param_name.begin(), param_name.end(), | |
1081 base::CaseInsensitiveCompareASCII<char>()); | |
1082 | |
1083 if (param_begin == header.end()) | |
1084 return std::string(); | |
1085 param_begin += param_name.length(); | |
1086 | |
1087 std::string whitespace(" \t"); | |
1088 size_t equals_offset = | |
1089 header.find_first_not_of(whitespace, param_begin - header.begin()); | |
1090 if (equals_offset == std::string::npos || header[equals_offset] != '=') | |
1091 return std::string(); | |
1092 | |
1093 param_begin = header.begin() + equals_offset + 1; | |
1094 if (param_begin == header.end()) | |
1095 return std::string(); | |
1096 | |
1097 std::string::const_iterator param_end; | |
1098 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) { | |
1099 ++param_begin; // skip past the quote. | |
1100 param_end = find(param_begin, header.end(), '"'); | |
1101 // If the closing quote is missing, we will treat the rest of the | |
1102 // string as the parameter. We can't set |param_end| to the | |
1103 // location of the separator (';'), since the separator is | |
1104 // technically quoted. See: http://crbug.com/58840 | |
1105 } else { | |
1106 param_end = find(param_begin + 1, header.end(), ';'); | |
1107 } | |
1108 | |
1109 return std::string(param_begin, param_end); | |
1252 } | 1110 } |
1253 | 1111 |
1254 std::string GetHeaderParamValue(const std::string& field, | 1112 string16 IDNToUnicode(const std::string& host, |
1255 const std::string& param_name, | 1113 const std::string& languages) { |
1256 QuoteRule::Type quote_rule) { | |
1257 return GetHeaderParamValueT(field, param_name, quote_rule); | |
1258 } | |
1259 | |
1260 // TODO(brettw) bug 734373: check the scripts for each host component and | |
1261 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for | |
1262 // scripts that the user has installed. For now, just put the entire | |
1263 // path through IDN. Maybe this feature can be implemented in ICU itself? | |
1264 // | |
1265 // We may want to skip this step in the case of file URLs to allow unicode | |
1266 // UNC hostnames regardless of encodings. | |
1267 std::wstring IDNToUnicodeWithOffsets( | |
1268 const char* host, | |
1269 size_t host_len, | |
1270 const std::wstring& languages, | |
1271 std::vector<size_t>* offsets_for_adjustment) { | |
1272 // Convert the ASCII input to a wide string for ICU. | |
1273 string16 input16; | |
1274 input16.reserve(host_len); | |
1275 input16.insert(input16.end(), host, host + host_len); | |
1276 | |
1277 // Do each component of the host separately, since we enforce script matching | |
1278 // on a per-component basis. | |
1279 AdjustOffset::Adjustments adjustments; | |
1280 string16 out16; | |
1281 for (size_t component_start = 0, component_end; | |
1282 component_start < input16.length(); | |
1283 component_start = component_end + 1) { | |
1284 // Find the end of the component. | |
1285 component_end = input16.find('.', component_start); | |
1286 if (component_end == string16::npos) | |
1287 component_end = input16.length(); // For getting the last component. | |
1288 size_t component_length = component_end - component_start; | |
1289 size_t new_component_start = out16.length(); | |
1290 bool converted_idn = false; | |
1291 if (component_end > component_start) { | |
1292 // Add the substring that we just found. | |
1293 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, | |
1294 component_length, languages, &out16); | |
1295 } | |
1296 size_t new_component_length = out16.length() - new_component_start; | |
1297 | |
1298 if (converted_idn && offsets_for_adjustment) { | |
1299 adjustments.push_back(AdjustOffset::Adjustment( | |
1300 component_start, component_length, new_component_length)); | |
1301 } | |
1302 | |
1303 // Need to add the dot we just found (if we found one). | |
1304 if (component_end < input16.length()) | |
1305 out16.push_back('.'); | |
1306 } | |
1307 | |
1308 // Make offset adjustment. | |
1309 if (offsets_for_adjustment && !adjustments.empty()) { | |
1310 std::for_each(offsets_for_adjustment->begin(), | |
1311 offsets_for_adjustment->end(), | |
1312 AdjustOffset(adjustments)); | |
1313 } | |
1314 | |
1315 return UTF16ToWideAndAdjustOffsets(out16, offsets_for_adjustment); | |
1316 } | |
1317 | |
1318 std::wstring IDNToUnicode(const char* host, | |
1319 size_t host_len, | |
1320 const std::wstring& languages, | |
1321 size_t* offset_for_adjustment) { | |
1322 std::vector<size_t> offsets; | 1114 std::vector<size_t> offsets; |
1323 if (offset_for_adjustment) | 1115 return IDNToUnicodeWithOffsets(host, languages, &offsets); |
1324 offsets.push_back(*offset_for_adjustment); | |
1325 std::wstring result = | |
1326 IDNToUnicodeWithOffsets(host, host_len, languages, &offsets); | |
1327 if (offset_for_adjustment) | |
1328 *offset_for_adjustment = offsets[0]; | |
1329 return result; | |
1330 } | 1116 } |
1331 | 1117 |
1332 std::string CanonicalizeHost(const std::string& host, | 1118 std::string CanonicalizeHost(const std::string& host, |
1333 url_canon::CanonHostInfo* host_info) { | 1119 url_canon::CanonHostInfo* host_info) { |
1334 // Try to canonicalize the host. | 1120 // Try to canonicalize the host. |
1335 const url_parse::Component raw_host_component( | 1121 const url_parse::Component raw_host_component( |
1336 0, static_cast<int>(host.length())); | 1122 0, static_cast<int>(host.length())); |
1337 std::string canon_host; | 1123 std::string canon_host; |
1338 url_canon::StdStringCanonOutput canon_host_output(&canon_host); | 1124 url_canon::StdStringCanonOutput canon_host_output(&canon_host); |
1339 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, | 1125 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, |
1340 &canon_host_output, host_info); | 1126 &canon_host_output, host_info); |
1341 | 1127 |
1342 if (host_info->out_host.is_nonempty() && | 1128 if (host_info->out_host.is_nonempty() && |
1343 host_info->family != url_canon::CanonHostInfo::BROKEN) { | 1129 host_info->family != url_canon::CanonHostInfo::BROKEN) { |
1344 // Success! Assert that there's no extra garbage. | 1130 // Success! Assert that there's no extra garbage. |
1345 canon_host_output.Complete(); | 1131 canon_host_output.Complete(); |
1346 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length())); | 1132 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length())); |
1347 } else { | 1133 } else { |
1348 // Empty host, or canonicalization failed. We'll return empty. | 1134 // Empty host, or canonicalization failed. We'll return empty. |
1349 canon_host.clear(); | 1135 canon_host.clear(); |
1350 } | 1136 } |
1351 | 1137 |
1352 return canon_host; | 1138 return canon_host; |
1353 } | 1139 } |
1354 | 1140 |
1355 std::string CanonicalizeHost(const std::wstring& host, | |
1356 url_canon::CanonHostInfo* host_info) { | |
1357 std::string converted_host; | |
1358 WideToUTF8(host.c_str(), host.length(), &converted_host); | |
1359 return CanonicalizeHost(converted_host, host_info); | |
1360 } | |
1361 | |
1362 std::string GetDirectoryListingHeader(const string16& title) { | 1141 std::string GetDirectoryListingHeader(const string16& title) { |
1363 static const base::StringPiece header( | 1142 static const base::StringPiece header( |
1364 NetModule::GetResource(IDR_DIR_HEADER_HTML)); | 1143 NetModule::GetResource(IDR_DIR_HEADER_HTML)); |
1365 // This can be null in unit tests. | 1144 // This can be null in unit tests. |
1366 DLOG_IF(WARNING, header.empty()) << | 1145 DLOG_IF(WARNING, header.empty()) << |
1367 "Missing resource: directory listing header"; | 1146 "Missing resource: directory listing header"; |
1368 | 1147 |
1369 std::string result; | 1148 std::string result; |
1370 if (!header.empty()) | 1149 if (!header.empty()) |
1371 result.assign(header.data(), header.size()); | 1150 result.assign(header.data(), header.size()); |
(...skipping 360 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1732 UnescapeRule::Type flags = | 1511 UnescapeRule::Type flags = |
1733 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS; | 1512 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS; |
1734 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); | 1513 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); |
1735 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); | 1514 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); |
1736 } | 1515 } |
1737 | 1516 |
1738 std::string GetHostOrSpecFromURL(const GURL& url) { | 1517 std::string GetHostOrSpecFromURL(const GURL& url) { |
1739 return url.has_host() ? TrimEndingDot(url.host()) : url.spec(); | 1518 return url.has_host() ? TrimEndingDot(url.host()) : url.spec(); |
1740 } | 1519 } |
1741 | 1520 |
1742 void AppendFormattedHostWithOffsets( | 1521 void AppendFormattedHost(const GURL& url, |
1743 const GURL& url, | 1522 const std::string& languages, |
1744 const std::wstring& languages, | 1523 string16* output) { |
1745 std::wstring* output, | 1524 std::vector<size_t> offsets; |
1746 url_parse::Parsed* new_parsed, | 1525 AppendFormattedComponent(url.possibly_invalid_spec(), |
1747 std::vector<size_t>* offsets_for_adjustment) { | 1526 url.parsed_for_possibly_invalid_spec().host, offsets, |
1748 DCHECK(output); | 1527 HostComponentTransform(languages), output, NULL, NULL); |
1749 const url_parse::Component& host = | |
1750 url.parsed_for_possibly_invalid_spec().host; | |
1751 | |
1752 if (host.is_nonempty()) { | |
1753 // Handle possible IDN in the host name. | |
1754 size_t host_begin = output->length(); | |
1755 if (new_parsed) | |
1756 new_parsed->host.begin = static_cast<int>(host_begin); | |
1757 size_t old_host_len = static_cast<size_t>(host.len); | |
1758 | |
1759 // Compose a list of offsets within the host area. | |
1760 std::vector<size_t> offsets_into_host = | |
1761 OffsetsIntoSection(offsets_for_adjustment, host_begin); | |
1762 | |
1763 const std::string& spec = url.possibly_invalid_spec(); | |
1764 DCHECK(host.begin >= 0 && | |
1765 ((spec.length() == 0 && host.begin == 0) || | |
1766 host.begin < static_cast<int>(spec.length()))); | |
1767 output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len, | |
1768 languages, &offsets_into_host)); | |
1769 | |
1770 size_t new_host_len = output->length() - host_begin; | |
1771 if (new_parsed) | |
1772 new_parsed->host.len = static_cast<int>(new_host_len); | |
1773 | |
1774 // Apply offset adjustments. | |
1775 ApplySectionAdjustments(offsets_into_host, offsets_for_adjustment, | |
1776 old_host_len, new_host_len, host_begin); | |
1777 } else if (new_parsed) { | |
1778 new_parsed->host.reset(); | |
1779 } | |
1780 } | 1528 } |
1781 | 1529 |
1782 void AppendFormattedHost(const GURL& url, | |
1783 const std::wstring& languages, | |
1784 std::wstring* output, | |
1785 url_parse::Parsed* new_parsed, | |
1786 size_t* offset_for_adjustment) { | |
1787 std::vector<size_t> offsets; | |
1788 if (offset_for_adjustment) | |
1789 offsets.push_back(*offset_for_adjustment); | |
1790 AppendFormattedHostWithOffsets(url, languages, output, new_parsed, &offsets); | |
1791 if (offset_for_adjustment) | |
1792 *offset_for_adjustment = offsets[0]; | |
1793 } | |
1794 | |
1795 // TODO(viettrungluu): convert the wstring |FormatUrlInternal()|. | |
1796 string16 FormatUrlWithOffsets(const GURL& url, | 1530 string16 FormatUrlWithOffsets(const GURL& url, |
1797 const std::string& languages, | 1531 const std::string& languages, |
1798 FormatUrlTypes format_types, | 1532 FormatUrlTypes format_types, |
1799 UnescapeRule::Type unescape_rules, | 1533 UnescapeRule::Type unescape_rules, |
1800 url_parse::Parsed* new_parsed, | 1534 url_parse::Parsed* new_parsed, |
1801 size_t* prefix_end, | 1535 size_t* prefix_end, |
1802 std::vector<size_t>* offsets_for_adjustment) { | 1536 std::vector<size_t>* offsets_for_adjustment) { |
1803 return WideToUTF16Hack( | 1537 url_parse::Parsed parsed_temp; |
1804 FormatUrlInternal(url, ASCIIToWide(languages), format_types, | 1538 if (!new_parsed) |
1805 unescape_rules, new_parsed, prefix_end, | 1539 new_parsed = &parsed_temp; |
1806 offsets_for_adjustment)); | 1540 else |
1541 *new_parsed = url_parse::Parsed(); | |
1542 std::vector<size_t> original_offsets; | |
1543 if (offsets_for_adjustment) | |
1544 original_offsets = *offsets_for_adjustment; | |
1545 | |
1546 // Special handling for view-source:. Don't use chrome::kViewSourceScheme | |
1547 // because this library shouldn't depend on chrome. | |
1548 const char* const kViewSource = "view-source"; | |
1549 // Reject "view-source:view-source:..." to avoid deep recursion. | |
1550 const char* const kViewSourceTwice = "view-source:view-source:"; | |
1551 if (url.SchemeIs(kViewSource) && | |
1552 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { | |
1553 return FormatViewSourceUrl(url, original_offsets, languages, format_types, | |
1554 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); | |
1555 } | |
1556 | |
1557 // We handle both valid and invalid URLs (this will give us the spec | |
1558 // regardless of validity). | |
1559 const std::string& spec = url.possibly_invalid_spec(); | |
1560 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); | |
1561 size_t spec_length = spec.length(); | |
1562 | |
1563 // Scheme & separators. These are ASCII. | |
1564 string16 url_string; | |
1565 OffsetLimiter offset_limiter(offsets_for_adjustment, &url_string); | |
1566 url_string.insert(url_string.end(), spec.begin(), | |
1567 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, | |
1568 true)); | |
1569 const char kHTTP[] = "http://"; | |
1570 const char kFTP[] = "ftp."; | |
1571 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This | |
1572 // means that if we trim "http://" off a URL whose host starts with "ftp." and | |
1573 // the user inputs this into any field subject to fixup (which is basically | |
1574 // all input fields), the meaning would be changed. (In fact, often the | |
1575 // formatted URL is directly pre-filled into an input field.) For this reason | |
1576 // we avoid stripping "http://" in this case. | |
1577 bool omit_http = (format_types & kFormatUrlOmitHTTP) && | |
1578 EqualsASCII(url_string, kHTTP) && | |
1579 !StartsWithASCII(url.host(), kFTP, true); | |
1580 new_parsed->scheme = parsed.scheme; | |
1581 | |
1582 // Username & password. | |
1583 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { | |
1584 // Remove the username and password fields. We don't want to display those | |
1585 // to the user since they can be used for attacks, | |
1586 // e.g. "http://google.com:search@evil.ru/" | |
1587 new_parsed->username.reset(); | |
1588 new_parsed->password.reset(); | |
1589 // Update the offsets based on removed username and/or password. | |
1590 if (offsets_for_adjustment && !offsets_for_adjustment->empty() && | |
1591 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { | |
1592 AdjustOffset::Adjustments adjustments; | |
mrossetti
2011/04/26 22:13:34
The AdjustOffset::Adjustments concept would be a g
Peter Kasting
2011/04/27 02:07:19
Good idea. Rewrote this object.
| |
1593 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { | |
1594 // The seeming off-by-one and off-by-two in these first two lines are to | |
1595 // account for the ':' after the username and '@' after the password. | |
1596 adjustments.push_back(AdjustOffset::Adjustment( | |
1597 static_cast<size_t>(parsed.username.begin), | |
1598 static_cast<size_t>(parsed.username.len + parsed.password.len + | |
1599 2), 0)); | |
1600 } else { | |
1601 const url_parse::Component* nonempty_component = | |
1602 parsed.username.is_nonempty() ? &parsed.username : &parsed.password; | |
1603 // The seeming off-by-one in below is to account for the '@' after the | |
1604 // username/password. | |
1605 adjustments.push_back(AdjustOffset::Adjustment( | |
1606 static_cast<size_t>(nonempty_component->begin), | |
1607 static_cast<size_t>(nonempty_component->len + 1), 0)); | |
1608 } | |
1609 std::for_each(offsets_for_adjustment->begin(), | |
1610 offsets_for_adjustment->end(), AdjustOffset(adjustments)); | |
1611 } | |
1612 } else { | |
1613 AppendFormattedComponent(spec, parsed.username, original_offsets, | |
1614 NonHostComponentTransform(unescape_rules), &url_string, | |
1615 &new_parsed->username, offsets_for_adjustment); | |
1616 if (parsed.password.is_valid()) { | |
1617 size_t colon = parsed.username.end(); | |
1618 DCHECK_EQ(static_cast<size_t>(parsed.password.begin - 1), colon); | |
1619 std::vector<size_t>::const_iterator colon_iter = | |
1620 std::find(original_offsets.begin(), original_offsets.end(), colon); | |
1621 if (colon_iter != original_offsets.end()) { | |
1622 (*offsets_for_adjustment)[colon_iter - original_offsets.begin()] = | |
1623 url_string.length(); | |
1624 } | |
1625 url_string.push_back(':'); | |
1626 } | |
1627 AppendFormattedComponent(spec, parsed.password, original_offsets, | |
1628 NonHostComponentTransform(unescape_rules), &url_string, | |
1629 &new_parsed->password, offsets_for_adjustment); | |
1630 if (parsed.username.is_valid() || parsed.password.is_valid()) { | |
1631 size_t at_sign = (parsed.password.is_valid() ? | |
1632 parsed.password : parsed.username).end(); | |
1633 DCHECK_EQ(static_cast<size_t>(parsed.host.begin - 1), at_sign); | |
1634 std::vector<size_t>::const_iterator at_sign_iter = | |
1635 std::find(original_offsets.begin(), original_offsets.end(), at_sign); | |
1636 if (at_sign_iter != original_offsets.end()) { | |
1637 (*offsets_for_adjustment)[at_sign_iter - original_offsets.begin()] = | |
1638 url_string.length(); | |
1639 } | |
1640 url_string.push_back('@'); | |
1641 } | |
1642 } | |
1643 if (prefix_end) | |
1644 *prefix_end = static_cast<size_t>(url_string.length()); | |
1645 | |
1646 // Host. | |
1647 AppendFormattedComponent(spec, parsed.host, original_offsets, | |
1648 HostComponentTransform(languages), &url_string, &new_parsed->host, | |
1649 offsets_for_adjustment); | |
1650 | |
1651 // Port. | |
1652 if (parsed.port.is_nonempty()) { | |
1653 url_string.push_back(':'); | |
1654 new_parsed->port.begin = url_string.length(); | |
1655 url_string.insert(url_string.end(), | |
1656 spec.begin() + parsed.port.begin, | |
1657 spec.begin() + parsed.port.end()); | |
1658 new_parsed->port.len = url_string.length() - new_parsed->port.begin; | |
1659 } else { | |
1660 new_parsed->port.reset(); | |
1661 } | |
1662 | |
1663 // Path & query. Both get the same general unescape & convert treatment. | |
1664 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || | |
1665 !CanStripTrailingSlash(url)) { | |
1666 AppendFormattedComponent(spec, parsed.path, original_offsets, | |
1667 NonHostComponentTransform(unescape_rules), &url_string, | |
1668 &new_parsed->path, offsets_for_adjustment); | |
1669 } | |
1670 if (parsed.query.is_valid()) | |
1671 url_string.push_back('?'); | |
1672 AppendFormattedComponent(spec, parsed.query, original_offsets, | |
1673 NonHostComponentTransform(unescape_rules), &url_string, | |
1674 &new_parsed->query, offsets_for_adjustment); | |
1675 | |
1676 // Ref. This is valid, unescaped UTF-8, so we can just convert. | |
1677 if (parsed.ref.is_valid()) { | |
1678 url_string.push_back('#'); | |
1679 size_t original_ref_begin = static_cast<size_t>(parsed.ref.begin); | |
1680 size_t original_ref_len = static_cast<size_t>(parsed.ref.len); | |
1681 size_t output_ref_begin = url_string.length(); | |
1682 new_parsed->ref.begin = static_cast<int>(output_ref_begin); | |
1683 | |
1684 std::vector<size_t> offsets_into_ref( | |
1685 OffsetsIntoComponent(original_offsets, original_ref_begin)); | |
1686 if (parsed.ref.len > 0) { | |
1687 url_string.append(UTF8ToUTF16AndAdjustOffsets( | |
1688 spec.substr(original_ref_begin, static_cast<size_t>(parsed.ref.len)), | |
1689 &offsets_into_ref)); | |
1690 } | |
1691 | |
1692 new_parsed->ref.len = | |
1693 static_cast<int>(url_string.length() - new_parsed->ref.begin); | |
1694 AdjustForComponentTransform(original_offsets, original_ref_begin, | |
1695 static_cast<size_t>(parsed.ref.end()), offsets_into_ref, | |
1696 output_ref_begin, offsets_for_adjustment); | |
1697 } | |
1698 | |
1699 // If we need to strip out http do it after the fact. This way we don't need | |
1700 // to worry about how offset_for_adjustment is interpreted. | |
1701 if (omit_http && StartsWith(url_string, ASCIIToUTF16(kHTTP), true)) { | |
1702 const size_t kHTTPSize = arraysize(kHTTP) - 1; | |
1703 url_string = url_string.substr(kHTTPSize); | |
1704 if (offsets_for_adjustment && !offsets_for_adjustment->empty()) { | |
1705 AdjustOffset::Adjustments adjustments; | |
1706 adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0)); | |
1707 std::for_each(offsets_for_adjustment->begin(), | |
1708 offsets_for_adjustment->end(), AdjustOffset(adjustments)); | |
1709 } | |
1710 if (prefix_end) | |
1711 *prefix_end -= kHTTPSize; | |
1712 | |
1713 // Adjust new_parsed. | |
1714 DCHECK(new_parsed->scheme.is_valid()); | |
1715 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. | |
1716 new_parsed->scheme.reset(); | |
1717 AdjustComponents(delta, new_parsed); | |
1718 } | |
1719 | |
1720 return url_string; | |
1807 } | 1721 } |
1808 | 1722 |
1809 string16 FormatUrl(const GURL& url, | 1723 string16 FormatUrl(const GURL& url, |
1810 const std::string& languages, | 1724 const std::string& languages, |
1811 FormatUrlTypes format_types, | 1725 FormatUrlTypes format_types, |
1812 UnescapeRule::Type unescape_rules, | 1726 UnescapeRule::Type unescape_rules, |
1813 url_parse::Parsed* new_parsed, | 1727 url_parse::Parsed* new_parsed, |
1814 size_t* prefix_end, | 1728 size_t* prefix_end, |
1815 size_t* offset_for_adjustment) { | 1729 size_t* offset_for_adjustment) { |
1816 std::vector<size_t> offsets; | 1730 std::vector<size_t> offsets; |
1817 if (offset_for_adjustment) | 1731 if (offset_for_adjustment) |
1818 offsets.push_back(*offset_for_adjustment); | 1732 offsets.push_back(*offset_for_adjustment); |
1819 string16 result = WideToUTF16Hack( | 1733 string16 result = FormatUrlWithOffsets(url, languages, format_types, |
1820 FormatUrlInternal(url, ASCIIToWide(languages), format_types, | 1734 unescape_rules, new_parsed, prefix_end, &offsets); |
1821 unescape_rules, new_parsed, prefix_end, &offsets)); | |
1822 if (offset_for_adjustment) | 1735 if (offset_for_adjustment) |
1823 *offset_for_adjustment = offsets[0]; | 1736 *offset_for_adjustment = offsets[0]; |
1824 return result; | 1737 return result; |
1825 } | 1738 } |
1826 | 1739 |
1827 bool CanStripTrailingSlash(const GURL& url) { | 1740 bool CanStripTrailingSlash(const GURL& url) { |
1828 // Omit the path only for standard, non-file URLs with nothing but "/" after | 1741 // Omit the path only for standard, non-file URLs with nothing but "/" after |
1829 // the hostname. | 1742 // the hostname. |
1830 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() && | 1743 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() && |
1831 !url.has_ref() && url.path() == "/"; | 1744 !url.has_ref() && url.path() == "/"; |
(...skipping 435 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2267 } | 2180 } |
2268 | 2181 |
2269 NetworkInterface::NetworkInterface(const std::string& name, | 2182 NetworkInterface::NetworkInterface(const std::string& name, |
2270 const IPAddressNumber& address) | 2183 const IPAddressNumber& address) |
2271 : name(name), address(address) { | 2184 : name(name), address(address) { |
2272 } | 2185 } |
2273 | 2186 |
2274 NetworkInterface::~NetworkInterface() { | 2187 NetworkInterface::~NetworkInterface() { |
2275 } | 2188 } |
2276 | 2189 |
2277 ClampComponentOffset::ClampComponentOffset(size_t component_start) | |
2278 : component_start(component_start) {} | |
2279 | |
2280 size_t ClampComponentOffset::operator()(size_t offset) { | |
2281 return (offset >= component_start) ? | |
2282 offset : std::wstring::npos; | |
2283 } | |
2284 | |
2285 } // namespace net | 2190 } // namespace net |
OLD | NEW |