Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
| 6 | 6 |
| 7 #include <unicode/regex.h> | 7 #include <unicode/regex.h> |
| 8 #include <unicode/ucnv.h> | 8 #include <unicode/ucnv.h> |
| 9 #include <unicode/uidna.h> | 9 #include <unicode/uidna.h> |
| 10 #include <unicode/ulocdata.h> | 10 #include <unicode/ulocdata.h> |
| (...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 148 0xFFFF, // Used to block all invalid port numbers (see | 148 0xFFFF, // Used to block all invalid port numbers (see |
| 149 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port()) | 149 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port()) |
| 150 }; | 150 }; |
| 151 | 151 |
| 152 // FTP overrides the following restricted ports. | 152 // FTP overrides the following restricted ports. |
| 153 static const int kAllowedFtpPorts[] = { | 153 static const int kAllowedFtpPorts[] = { |
| 154 21, // ftp data | 154 21, // ftp data |
| 155 22, // ssh | 155 22, // ssh |
| 156 }; | 156 }; |
| 157 | 157 |
| 158 template<typename STR> | |
| 159 STR GetSpecificHeaderT(const STR& headers, const STR& name) { | |
| 160 // We want to grab the Value from the "Key: Value" pairs in the headers, | |
| 161 // which should look like this (no leading spaces, \n-separated) (we format | |
| 162 // them this way in url_request_inet.cc): | |
| 163 // HTTP/1.1 200 OK\n | |
| 164 // ETag: "6d0b8-947-24f35ec0"\n | |
| 165 // Content-Length: 2375\n | |
| 166 // Content-Type: text/html; charset=UTF-8\n | |
| 167 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n | |
| 168 if (headers.empty()) | |
| 169 return STR(); | |
| 170 | |
| 171 STR match; | |
| 172 match.push_back('\n'); | |
| 173 match.append(name); | |
| 174 match.push_back(':'); | |
| 175 | |
| 176 typename STR::const_iterator begin = | |
| 177 search(headers.begin(), headers.end(), match.begin(), match.end(), | |
| 178 base::CaseInsensitiveCompareASCII<typename STR::value_type>()); | |
| 179 | |
| 180 if (begin == headers.end()) | |
| 181 return STR(); | |
| 182 | |
| 183 begin += match.length(); | |
| 184 | |
| 185 typename STR::const_iterator end = find(begin, headers.end(), '\n'); | |
| 186 | |
| 187 STR ret; | |
| 188 TrimWhitespace(STR(begin, end), TRIM_ALL, &ret); | |
| 189 return ret; | |
| 190 } | |
| 191 | |
| 192 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence | 158 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence |
| 193 // of bytes. If input is invalid, return false. | 159 // of bytes. If input is invalid, return false. |
| 194 bool QPDecode(const std::string& input, std::string* output) { | 160 bool QPDecode(const std::string& input, std::string* output) { |
| 195 std::string temp; | 161 std::string temp; |
| 196 temp.reserve(input.size()); | 162 temp.reserve(input.size()); |
| 197 std::string::const_iterator it = input.begin(); | 163 std::string::const_iterator it = input.begin(); |
| 198 while (it != input.end()) { | 164 while (it != input.end()) { |
| 199 if (*it == '_') { | 165 if (*it == '_') { |
| 200 temp.push_back(' '); | 166 temp.push_back(' '); |
| 201 } else if (*it == '=') { | 167 } else if (*it == '=') { |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 269 *is_rfc2047 = false; | 235 *is_rfc2047 = false; |
| 270 output->clear(); | 236 output->clear(); |
| 271 if (encoded_word.empty()) | 237 if (encoded_word.empty()) |
| 272 return true; | 238 return true; |
| 273 | 239 |
| 274 if (!IsStringASCII(encoded_word)) { | 240 if (!IsStringASCII(encoded_word)) { |
| 275 // Try UTF-8, referrer_charset and the native OS default charset in turn. | 241 // Try UTF-8, referrer_charset and the native OS default charset in turn. |
| 276 if (IsStringUTF8(encoded_word)) { | 242 if (IsStringUTF8(encoded_word)) { |
| 277 *output = encoded_word; | 243 *output = encoded_word; |
| 278 } else { | 244 } else { |
| 279 std::wstring wide_output; | 245 string16 utf16_output; |
| 280 if (!referrer_charset.empty() && | 246 if (!referrer_charset.empty() && |
| 281 base::CodepageToWide(encoded_word, referrer_charset.c_str(), | 247 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), |
| 282 base::OnStringConversionError::FAIL, | 248 base::OnStringConversionError::FAIL, |
| 283 &wide_output)) { | 249 &utf16_output)) { |
| 284 *output = WideToUTF8(wide_output); | 250 *output = UTF16ToUTF8(utf16_output); |
| 285 } else { | 251 } else { |
| 286 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); | 252 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
| 287 } | 253 } |
| 288 } | 254 } |
| 289 | 255 |
| 290 return true; | 256 return true; |
| 291 } | 257 } |
| 292 | 258 |
| 293 // RFC 2047 : one of encoding methods supported by Firefox and relatively | 259 // RFC 2047 : one of encoding methods supported by Firefox and relatively |
| 294 // widely used by web servers. | 260 // widely used by web servers. |
| (...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 407 std::string decoded; | 373 std::string decoded; |
| 408 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, | 374 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, |
| 409 &decoded)) | 375 &decoded)) |
| 410 return false; | 376 return false; |
| 411 tmp.append(decoded); | 377 tmp.append(decoded); |
| 412 } | 378 } |
| 413 output->swap(tmp); | 379 output->swap(tmp); |
| 414 return true; | 380 return true; |
| 415 } | 381 } |
| 416 | 382 |
| 417 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm | |
| 418 // sure this doesn't properly handle all (most?) cases. | |
| 419 template<typename STR> | |
| 420 STR GetHeaderParamValueT(const STR& header, const STR& param_name, | |
| 421 QuoteRule::Type quote_rule) { | |
| 422 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". | |
| 423 typename STR::const_iterator param_begin = | |
| 424 search(header.begin(), header.end(), param_name.begin(), param_name.end(), | |
| 425 base::CaseInsensitiveCompareASCII<typename STR::value_type>()); | |
| 426 | |
| 427 if (param_begin == header.end()) | |
| 428 return STR(); | |
| 429 param_begin += param_name.length(); | |
| 430 | |
| 431 STR whitespace; | |
| 432 whitespace.push_back(' '); | |
| 433 whitespace.push_back('\t'); | |
| 434 const typename STR::size_type equals_offset = | |
| 435 header.find_first_not_of(whitespace, param_begin - header.begin()); | |
| 436 if (equals_offset == STR::npos || header.at(equals_offset) != '=') | |
| 437 return STR(); | |
| 438 | |
| 439 param_begin = header.begin() + equals_offset + 1; | |
| 440 if (param_begin == header.end()) | |
| 441 return STR(); | |
| 442 | |
| 443 typename STR::const_iterator param_end; | |
| 444 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) { | |
| 445 ++param_begin; // skip past the quote. | |
| 446 param_end = find(param_begin, header.end(), '"'); | |
| 447 // If the closing quote is missing, we will treat the rest of the | |
| 448 // string as the parameter. We can't set |param_end| to the | |
| 449 // location of the separator (';'), since the separator is | |
| 450 // technically quoted. See: http://crbug.com/58840 | |
| 451 } else { | |
| 452 param_end = find(param_begin+1, header.end(), ';'); | |
| 453 } | |
| 454 | |
| 455 return STR(param_begin, param_end); | |
| 456 } | |
| 457 | |
| 458 // Does some simple normalization of scripts so we can allow certain scripts | 383 // Does some simple normalization of scripts so we can allow certain scripts |
| 459 // to exist together. | 384 // to exist together. |
| 460 // TODO(brettw) bug 880223: we should allow some other languages to be | 385 // TODO(brettw) bug 880223: we should allow some other languages to be |
| 461 // oombined such as Chinese and Latin. We will probably need a more | 386 // oombined such as Chinese and Latin. We will probably need a more |
| 462 // complicated system of language pairs to have more fine-grained control. | 387 // complicated system of language pairs to have more fine-grained control. |
| 463 UScriptCode NormalizeScript(UScriptCode code) { | 388 UScriptCode NormalizeScript(UScriptCode code) { |
| 464 switch (code) { | 389 switch (code) { |
| 465 case USCRIPT_KATAKANA: | 390 case USCRIPT_KATAKANA: |
| 466 case USCRIPT_HIRAGANA: | 391 case USCRIPT_HIRAGANA: |
| 467 case USCRIPT_KATAKANA_OR_HIRAGANA: | 392 case USCRIPT_KATAKANA_OR_HIRAGANA: |
| (...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 586 ulocdata_close(uld); | 511 ulocdata_close(uld); |
| 587 } | 512 } |
| 588 } | 513 } |
| 589 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); | 514 return !lang_set->isEmpty() && lang_set->containsAll(component_characters); |
| 590 } | 515 } |
| 591 | 516 |
| 592 // Returns true if the given Unicode host component is safe to display to the | 517 // Returns true if the given Unicode host component is safe to display to the |
| 593 // user. | 518 // user. |
| 594 bool IsIDNComponentSafe(const char16* str, | 519 bool IsIDNComponentSafe(const char16* str, |
| 595 int str_len, | 520 int str_len, |
| 596 const std::wstring& languages) { | 521 const std::string& languages) { |
| 597 // Most common cases (non-IDN) do not reach here so that we don't | 522 // Most common cases (non-IDN) do not reach here so that we don't |
| 598 // need a fast return path. | 523 // need a fast return path. |
| 599 // TODO(jungshik) : Check if there's any character inappropriate | 524 // TODO(jungshik) : Check if there's any character inappropriate |
| 600 // (although allowed) for domain names. | 525 // (although allowed) for domain names. |
| 601 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and | 526 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and |
| 602 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt | 527 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt |
| 603 // For now, we borrow the list from Mozilla and tweaked it slightly. | 528 // For now, we borrow the list from Mozilla and tweaked it slightly. |
| 604 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because | 529 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because |
| 605 // they're gonna be canonicalized to U+0020 and full stop before | 530 // they're gonna be canonicalized to U+0020 and full stop before |
| 606 // reaching here.) | 531 // reaching here.) |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 670 // (sync'd with characters allowed in url_canon_host with square | 595 // (sync'd with characters allowed in url_canon_host with square |
| 671 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. | 596 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. |
| 672 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), | 597 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), |
| 673 status); | 598 status); |
| 674 DCHECK(U_SUCCESS(status)); | 599 DCHECK(U_SUCCESS(status)); |
| 675 // Subtract common characters because they're always allowed so that | 600 // Subtract common characters because they're always allowed so that |
| 676 // we just have to check if a language-specific set contains | 601 // we just have to check if a language-specific set contains |
| 677 // the remainder. | 602 // the remainder. |
| 678 component_characters.removeAll(common_characters); | 603 component_characters.removeAll(common_characters); |
| 679 | 604 |
| 680 std::string languages_list(WideToASCII(languages)); | 605 StringTokenizer t(languages, ","); |
| 681 StringTokenizer t(languages_list, ","); | |
| 682 while (t.GetNext()) { | 606 while (t.GetNext()) { |
| 683 if (IsComponentCoveredByLang(component_characters, t.token())) | 607 if (IsComponentCoveredByLang(component_characters, t.token())) |
| 684 return true; | 608 return true; |
| 685 } | 609 } |
| 686 return false; | 610 return false; |
| 687 } | 611 } |
| 688 | 612 |
| 689 // Converts one component of a host (between dots) to IDN if safe. The result | 613 // Converts one component of a host (between dots) to IDN if safe. The result |
| 690 // will be APPENDED to the given output string and will be the same as the input | 614 // will be APPENDED to the given output string and will be the same as the input |
| 691 // if it is not IDN or the IDN is unsafe to display. Returns whether any | 615 // if it is not IDN or the IDN is unsafe to display. Returns whether any |
| 692 // conversion was performed. | 616 // conversion was performed. |
| 693 bool IDNToUnicodeOneComponent(const char16* comp, | 617 bool IDNToUnicodeOneComponent(const char16* comp, |
| 694 size_t comp_len, | 618 size_t comp_len, |
| 695 const std::wstring& languages, | 619 const std::string& languages, |
| 696 string16* out) { | 620 string16* out) { |
| 697 DCHECK(out); | 621 DCHECK(out); |
| 698 if (comp_len == 0) | 622 if (comp_len == 0) |
| 699 return false; | 623 return false; |
| 700 | 624 |
| 701 // Only transform if the input can be an IDN component. | 625 // Only transform if the input can be an IDN component. |
| 702 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; | 626 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; |
| 703 if ((comp_len > arraysize(kIdnPrefix)) && | 627 if ((comp_len > arraysize(kIdnPrefix)) && |
| 704 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) { | 628 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) { |
| 705 // Repeatedly expand the output string until it's big enough. It looks like | 629 // Repeatedly expand the output string until it's big enough. It looks like |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 727 // Failed, revert back to original string. | 651 // Failed, revert back to original string. |
| 728 out->resize(original_length); | 652 out->resize(original_length); |
| 729 } | 653 } |
| 730 | 654 |
| 731 // We get here with no IDN or on error, in which case we just append the | 655 // We get here with no IDN or on error, in which case we just append the |
| 732 // literal input. | 656 // literal input. |
| 733 out->append(comp, comp_len); | 657 out->append(comp, comp_len); |
| 734 return false; | 658 return false; |
| 735 } | 659 } |
| 736 | 660 |
| 737 struct SubtractFromOffset { | 661 // Functions may stack-allocate one of these in order to clamp the offsets in |
|
mrossetti
2011/04/26 22:13:34
I'm not sure I see the point of making this an aut
Peter Kasting
2011/04/27 02:07:19
Good point. Originally I wanted an object to guar
| |
| 738 explicit SubtractFromOffset(size_t amount) | 662 // |offsets_for_adjustment| to the length of |output| on exit. |
| 739 : amount(amount) {} | 663 class OffsetLimiter { |
| 740 void operator()(size_t& offset) { | 664 public: |
| 741 if (offset != std::wstring::npos) { | 665 OffsetLimiter(std::vector<size_t>* offsets_for_adjustment, string16* output) |
| 742 if (offset >= amount) | 666 : offsets_for_adjustment_(offsets_for_adjustment), |
| 743 offset -= amount; | 667 output_(output) { |
| 744 else | 668 } |
| 745 offset = std::wstring::npos; | 669 |
| 670 ~OffsetLimiter() { | |
| 671 if (offsets_for_adjustment_) { | |
| 672 std::for_each(offsets_for_adjustment_->begin(), | |
| 673 offsets_for_adjustment_->end(), | |
| 674 LimitOffset<string16>(output_->length())); | |
| 746 } | 675 } |
| 747 } | 676 } |
| 748 | 677 |
| 749 size_t amount; | 678 private: |
| 679 std::vector<size_t>* offsets_for_adjustment_; | |
| 680 string16* output_; | |
| 681 | |
| 682 DISALLOW_COPY_AND_ASSIGN(OffsetLimiter); | |
| 750 }; | 683 }; |
| 751 | 684 |
| 752 struct AddToOffset { | 685 // TODO(brettw) bug 734373: check the scripts for each host component and |
| 753 explicit AddToOffset(size_t amount) | 686 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for |
| 754 : amount(amount) {} | 687 // scripts that the user has installed. For now, just put the entire |
| 755 void operator()(size_t& offset) { | 688 // path through IDN. Maybe this feature can be implemented in ICU itself? |
| 756 if (offset != std::wstring::npos) | 689 // |
| 757 offset += amount; | 690 // We may want to skip this step in the case of file URLs to allow unicode |
| 691 // UNC hostnames regardless of encodings. | |
| 692 string16 IDNToUnicodeWithOffsets(const std::string& host, | |
| 693 const std::string& languages, | |
| 694 std::vector<size_t>* offsets_for_adjustment) { | |
| 695 // Convert the ASCII input to a string16 for ICU. | |
| 696 string16 input16; | |
| 697 input16.reserve(host.length()); | |
| 698 input16.insert(input16.end(), host.begin(), host.end()); | |
| 699 | |
| 700 // Do each component of the host separately, since we enforce script matching | |
| 701 // on a per-component basis. | |
| 702 AdjustOffset::Adjustments adjustments; | |
| 703 string16 out16; | |
| 704 OffsetLimiter offset_limiter(offsets_for_adjustment, &out16); | |
|
mrossetti
2011/04/26 22:13:34
Continuing from the previous comment: This auto co
| |
| 705 for (size_t component_start = 0, component_end; | |
| 706 component_start < input16.length(); | |
| 707 component_start = component_end + 1) { | |
| 708 // Find the end of the component. | |
| 709 component_end = input16.find('.', component_start); | |
| 710 if (component_end == string16::npos) | |
| 711 component_end = input16.length(); // For getting the last component. | |
| 712 size_t component_length = component_end - component_start; | |
| 713 size_t new_component_start = out16.length(); | |
| 714 bool converted_idn = false; | |
| 715 if (component_end > component_start) { | |
| 716 // Add the substring that we just found. | |
| 717 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, | |
| 718 component_length, languages, &out16); | |
| 719 } | |
| 720 size_t new_component_length = out16.length() - new_component_start; | |
| 721 | |
| 722 if (converted_idn && offsets_for_adjustment) { | |
| 723 adjustments.push_back(AdjustOffset::Adjustment( | |
| 724 component_start, component_length, new_component_length)); | |
| 725 } | |
| 726 | |
| 727 // Need to add the dot we just found (if we found one). | |
| 728 if (component_end < input16.length()) | |
| 729 out16.push_back('.'); | |
| 758 } | 730 } |
| 759 | 731 |
| 760 size_t amount; | 732 // Make offset adjustment. |
| 761 }; | 733 if (offsets_for_adjustment && !adjustments.empty()) { |
| 734 std::for_each(offsets_for_adjustment->begin(), | |
| 735 offsets_for_adjustment->end(), AdjustOffset(adjustments)); | |
| 736 } | |
| 762 | 737 |
| 763 std::vector<size_t> OffsetsIntoSection( | 738 return out16; |
| 764 std::vector<size_t>* offsets_for_adjustment, | |
| 765 size_t section_begin) { | |
| 766 std::vector<size_t> offsets_into_section; | |
| 767 if (offsets_for_adjustment) { | |
| 768 std::transform(offsets_for_adjustment->begin(), | |
| 769 offsets_for_adjustment->end(), | |
| 770 std::back_inserter(offsets_into_section), | |
| 771 ClampComponentOffset(section_begin)); | |
| 772 std::for_each(offsets_into_section.begin(), offsets_into_section.end(), | |
| 773 SubtractFromOffset(section_begin)); | |
| 774 } | |
| 775 return offsets_into_section; | |
| 776 } | 739 } |
| 777 | 740 |
| 778 void ApplySectionAdjustments(const std::vector<size_t>& offsets_into_section, | 741 // Transforms |original_offsets| by subtracting |section_begin| from all |
| 779 std::vector<size_t>* offsets_for_adjustment, | 742 // offsets. Any offset which was not at least this large to begin with is set |
| 780 size_t old_section_len, | 743 // to std::string::npos. |
| 781 size_t new_section_len, | 744 std::vector<size_t> OffsetsIntoComponent( |
| 782 size_t section_begin) { | 745 const std::vector<size_t>& original_offsets, |
| 783 if (offsets_for_adjustment) { | 746 size_t component_begin) { |
| 784 DCHECK_EQ(offsets_for_adjustment->size(), offsets_into_section.size()); | 747 DCHECK_NE(std::string::npos, component_begin); |
| 785 std::vector<size_t>::const_iterator host_offsets_iter = | 748 std::vector<size_t> offsets_into_component(original_offsets); |
| 786 offsets_into_section.begin(); | 749 for (std::vector<size_t>::iterator i(offsets_into_component.begin()); |
| 787 for (std::vector<size_t>::iterator offsets_iter = | 750 i != offsets_into_component.end(); ++i) { |
| 788 offsets_for_adjustment->begin(); | 751 if (*i != std::string::npos) |
| 789 offsets_iter != offsets_for_adjustment->end(); | 752 *i = (*i < component_begin) ? std::string::npos : (*i - component_begin); |
| 790 ++offsets_iter, ++host_offsets_iter) { | 753 } |
| 791 size_t offset = *offsets_iter; | 754 return offsets_into_component; |
| 792 if (offset == std::wstring::npos || offset < section_begin) { | 755 } |
| 793 // The offset is before the host section so leave it as is. | 756 |
| 794 continue; | 757 // Called after we transform a component and append it to an output string. |
| 795 } | 758 // Maps |transformed_offsets|, which represent offsets into the transformed |
| 796 if (offset >= section_begin + old_section_len) { | 759 // component itself, into appropriate offsets for the output string, by adding |
| 797 // The offset is after the host section so adjust by host length delta. | 760 // |output_component_begin| to each. Determines which offsets need mapping by |
| 798 offset += new_section_len - old_section_len; | 761 // checking to see which of the |original_offsets| were within the designated |
| 799 } else if (*host_offsets_iter != std::wstring::npos) { | 762 // original component, using its provided endpoints. |
| 800 // The offset is within the host and valid so adjust by the host | 763 void AdjustForComponentTransform( |
| 801 // reformatting offsets results. | 764 const std::vector<size_t>& original_offsets, |
| 802 offset = section_begin + *host_offsets_iter; | 765 size_t original_component_begin, |
| 803 } else { | 766 size_t original_component_end, |
| 804 // The offset is invalid. | 767 const std::vector<size_t>& transformed_offsets, |
| 805 offset = std::wstring::npos; | 768 size_t output_component_begin, |
| 806 } | 769 std::vector<size_t>* offsets_for_adjustment) { |
| 807 *offsets_iter = offset; | 770 if (!offsets_for_adjustment) |
| 771 return; | |
| 772 | |
| 773 DCHECK_NE(std::string::npos, original_component_begin); | |
| 774 DCHECK_NE(std::string::npos, original_component_end); | |
| 775 DCHECK_NE(string16::npos, output_component_begin); | |
| 776 size_t offsets_size = offsets_for_adjustment->size(); | |
| 777 DCHECK_EQ(offsets_size, original_offsets.size()); | |
| 778 DCHECK_EQ(offsets_size, transformed_offsets.size()); | |
| 779 for (size_t i = 0; i < offsets_size; ++i) { | |
| 780 size_t original_offset = original_offsets[i]; | |
| 781 if ((original_offset >= original_component_begin) && | |
| 782 (original_offset < original_component_end)) { | |
| 783 size_t transformed_offset = transformed_offsets[i]; | |
| 784 (*offsets_for_adjustment)[i] = (transformed_offset == string16::npos) ? | |
| 785 string16::npos : (output_component_begin + transformed_offset); | |
| 808 } | 786 } |
| 809 } | 787 } |
| 810 } | 788 } |
| 811 | 789 |
| 812 // If |component| is valid, its begin is incremented by |delta|. | 790 // If |component| is valid, its begin is incremented by |delta|. |
| 813 void AdjustComponent(int delta, url_parse::Component* component) { | 791 void AdjustComponent(int delta, url_parse::Component* component) { |
| 814 if (!component->is_valid()) | 792 if (!component->is_valid()) |
| 815 return; | 793 return; |
| 816 | 794 |
| 817 DCHECK(delta >= 0 || component->begin >= -delta); | 795 DCHECK(delta >= 0 || component->begin >= -delta); |
| 818 component->begin += delta; | 796 component->begin += delta; |
| 819 } | 797 } |
| 820 | 798 |
| 821 // Adjusts all the components of |parsed| by |delta|, except for the scheme. | 799 // Adjusts all the components of |parsed| by |delta|, except for the scheme. |
| 822 void AdjustComponents(int delta, url_parse::Parsed* parsed) { | 800 void AdjustComponents(int delta, url_parse::Parsed* parsed) { |
| 823 AdjustComponent(delta, &(parsed->username)); | 801 AdjustComponent(delta, &(parsed->username)); |
| 824 AdjustComponent(delta, &(parsed->password)); | 802 AdjustComponent(delta, &(parsed->password)); |
| 825 AdjustComponent(delta, &(parsed->host)); | 803 AdjustComponent(delta, &(parsed->host)); |
| 826 AdjustComponent(delta, &(parsed->port)); | 804 AdjustComponent(delta, &(parsed->port)); |
| 827 AdjustComponent(delta, &(parsed->path)); | 805 AdjustComponent(delta, &(parsed->path)); |
| 828 AdjustComponent(delta, &(parsed->query)); | 806 AdjustComponent(delta, &(parsed->query)); |
| 829 AdjustComponent(delta, &(parsed->ref)); | 807 AdjustComponent(delta, &(parsed->ref)); |
| 830 } | 808 } |
| 831 | 809 |
| 832 std::wstring FormatUrlInternal(const GURL& url, | 810 // Helper for FormatUrlWithOffsets(). |
| 833 const std::wstring& languages, | 811 string16 FormatViewSourceUrl(const GURL& url, |
| 834 FormatUrlTypes format_types, | 812 const std::vector<size_t>& original_offsets, |
| 835 UnescapeRule::Type unescape_rules, | 813 const std::string& languages, |
| 836 url_parse::Parsed* new_parsed, | 814 FormatUrlTypes format_types, |
| 837 size_t* prefix_end, | 815 UnescapeRule::Type unescape_rules, |
| 838 std::vector<size_t>* offsets_for_adjustment); | 816 url_parse::Parsed* new_parsed, |
| 817 size_t* prefix_end, | |
| 818 std::vector<size_t>* offsets_for_adjustment) { | |
| 819 DCHECK(new_parsed); | |
| 820 const char kViewSource[] = "view-source:"; | |
| 821 const size_t kViewSourceLength = arraysize(kViewSource) - 1; | |
| 822 std::vector<size_t> offsets_into_url( | |
| 823 OffsetsIntoComponent(original_offsets, kViewSourceLength)); | |
| 839 | 824 |
| 840 // Helper for FormatUrl()/FormatUrlInternal(). | 825 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength)); |
| 841 std::wstring FormatViewSourceUrl(const GURL& url, | 826 string16 result(ASCIIToUTF16(kViewSource) + |
| 842 const std::wstring& languages, | 827 FormatUrlWithOffsets(real_url, languages, format_types, unescape_rules, |
| 843 FormatUrlTypes format_types, | 828 new_parsed, prefix_end, &offsets_into_url)); |
| 844 UnescapeRule::Type unescape_rules, | 829 OffsetLimiter offset_limiter(offsets_for_adjustment, &result); |
| 845 url_parse::Parsed* new_parsed, | |
| 846 size_t* prefix_end, | |
| 847 std::vector<size_t>* offsets_for_adjustment) { | |
| 848 DCHECK(new_parsed); | |
| 849 DCHECK(offsets_for_adjustment); | |
| 850 const wchar_t* const kWideViewSource = L"view-source:"; | |
| 851 const size_t kViewSourceLengthPlus1 = 12; | |
| 852 std::vector<size_t> saved_offsets(*offsets_for_adjustment); | |
| 853 | |
| 854 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1)); | |
| 855 // Clamp the offsets to the source area. | |
| 856 std::for_each(offsets_for_adjustment->begin(), | |
| 857 offsets_for_adjustment->end(), | |
| 858 SubtractFromOffset(kViewSourceLengthPlus1)); | |
| 859 std::wstring result = FormatUrlInternal(real_url, languages, format_types, | |
| 860 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); | |
| 861 result.insert(0, kWideViewSource); | |
| 862 | 830 |
| 863 // Adjust position values. | 831 // Adjust position values. |
| 864 if (new_parsed->scheme.is_nonempty()) { | 832 if (new_parsed->scheme.is_nonempty()) { |
| 865 // Assume "view-source:real-scheme" as a scheme. | 833 // Assume "view-source:real-scheme" as a scheme. |
| 866 new_parsed->scheme.len += kViewSourceLengthPlus1; | 834 new_parsed->scheme.len += kViewSourceLength; |
| 867 } else { | 835 } else { |
| 868 new_parsed->scheme.begin = 0; | 836 new_parsed->scheme.begin = 0; |
| 869 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1; | 837 new_parsed->scheme.len = kViewSourceLength - 1; |
| 870 } | 838 } |
| 871 AdjustComponents(kViewSourceLengthPlus1, new_parsed); | 839 AdjustComponents(kViewSourceLength, new_parsed); |
| 872 if (prefix_end) | 840 if (prefix_end) |
| 873 *prefix_end += kViewSourceLengthPlus1; | 841 *prefix_end += kViewSourceLength; |
| 874 std::for_each(offsets_for_adjustment->begin(), | 842 AdjustForComponentTransform(original_offsets, kViewSourceLength, |
| 875 offsets_for_adjustment->end(), | 843 url.possibly_invalid_spec().length(), offsets_into_url, kViewSourceLength, |
| 876 AddToOffset(kViewSourceLengthPlus1)); | 844 offsets_for_adjustment); |
| 877 // Restore all offsets which were not affected by FormatUrlInternal. | |
| 878 DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size()); | |
| 879 for (size_t i = 0; i < saved_offsets.size(); ++i) { | |
| 880 if (saved_offsets[i] < kViewSourceLengthPlus1) | |
| 881 (*offsets_for_adjustment)[i] = saved_offsets[i]; | |
| 882 } | |
| 883 return result; | 845 return result; |
| 884 } | 846 } |
| 885 | 847 |
| 886 // Appends the substring |in_component| inside of the URL |spec| to |output|, | 848 class AppendComponentTransform { |
| 887 // and the resulting range will be filled into |out_component|. |unescape_rules| | 849 public: |
| 888 // defines how to clean the URL for human readability. |offsets_for_adjustment| | 850 AppendComponentTransform() {} |
| 889 // is an array of offsets into |output| each of which will be adjusted based on | 851 virtual string16 Execute( |
| 890 // how it maps to the component being converted; if it is less than | 852 const std::string& component_text, |
| 891 // output->length(), it will be untouched, and if it is greater than | 853 std::vector<size_t>* offsets_into_component) const = 0; |
| 892 // output->length() + in_component.len it will be adjusted by the difference in | 854 |
| 893 // lengths between the input and output components. Otherwise it points into | 855 private: |
| 894 // the component being converted, and is adjusted to point to the same logical | 856 DISALLOW_COPY_AND_ASSIGN(AppendComponentTransform); |
| 895 // place in |output|. |offsets_for_adjustment| may not be NULL. | 857 }; |
| 858 | |
| 859 class HostComponentTransform : public AppendComponentTransform { | |
| 860 public: | |
| 861 explicit HostComponentTransform(const std::string& languages) | |
| 862 : languages_(languages) { | |
| 863 } | |
| 864 | |
| 865 private: | |
| 866 virtual string16 Execute( | |
| 867 const std::string& component_text, | |
| 868 std::vector<size_t>* offsets_into_component) const { | |
| 869 return IDNToUnicodeWithOffsets(component_text, languages_, | |
| 870 offsets_into_component); | |
| 871 } | |
| 872 | |
| 873 const std::string& languages_; | |
| 874 }; | |
| 875 | |
| 876 class NonHostComponentTransform : public AppendComponentTransform { | |
| 877 public: | |
| 878 explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules) | |
| 879 : unescape_rules_(unescape_rules) { | |
| 880 } | |
| 881 | |
| 882 private: | |
| 883 virtual string16 Execute( | |
| 884 const std::string& component_text, | |
| 885 std::vector<size_t>* offsets_into_component) const { | |
| 886 return (unescape_rules_ == UnescapeRule::NONE) ? | |
| 887 UTF8ToUTF16AndAdjustOffsets(component_text, offsets_into_component) : | |
| 888 UnescapeAndDecodeUTF8URLComponentWithOffsets(component_text, | |
| 889 unescape_rules_, offsets_into_component); | |
| 890 } | |
| 891 | |
| 892 const UnescapeRule::Type unescape_rules_; | |
| 893 }; | |
| 894 | |
| 896 void AppendFormattedComponent(const std::string& spec, | 895 void AppendFormattedComponent(const std::string& spec, |
| 897 const url_parse::Component& in_component, | 896 const url_parse::Component& original_component, |
| 898 UnescapeRule::Type unescape_rules, | 897 const std::vector<size_t>& original_offsets, |
| 899 std::wstring* output, | 898 const AppendComponentTransform& transform, |
| 900 url_parse::Component* out_component, | 899 string16* output, |
| 900 url_parse::Component* output_component, | |
| 901 std::vector<size_t>* offsets_for_adjustment) { | 901 std::vector<size_t>* offsets_for_adjustment) { |
| 902 DCHECK(output); | 902 DCHECK(output); |
| 903 DCHECK(offsets_for_adjustment); | 903 if (original_component.is_nonempty()) { |
| 904 if (in_component.is_nonempty()) { | 904 size_t original_component_begin = |
| 905 size_t component_begin = output->length(); | 905 static_cast<size_t>(original_component.begin); |
| 906 out_component->begin = static_cast<int>(component_begin); | 906 size_t output_component_begin = output->length(); |
| 907 if (output_component) | |
| 908 output_component->begin = static_cast<int>(output_component_begin); | |
| 907 | 909 |
| 908 // Compose a list of offsets within the component area. | |
| 909 std::vector<size_t> offsets_into_component = | 910 std::vector<size_t> offsets_into_component = |
| 910 OffsetsIntoSection(offsets_for_adjustment, component_begin); | 911 OffsetsIntoComponent(original_offsets, original_component_begin); |
| 912 output->append(transform.Execute(std::string(spec, original_component_begin, | |
| 913 static_cast<size_t>(original_component.len)), &offsets_into_component)); | |
| 911 | 914 |
| 912 if (unescape_rules == UnescapeRule::NONE) { | 915 if (output_component) { |
| 913 output->append(UTF8ToWideAndAdjustOffsets( | 916 output_component->len = |
| 914 spec.substr(in_component.begin, in_component.len), | 917 static_cast<int>(output->length() - output_component_begin); |
| 915 &offsets_into_component)); | |
| 916 } else { | |
| 917 output->append(UTF16ToWideHack( | |
| 918 UnescapeAndDecodeUTF8URLComponentWithOffsets( | |
| 919 spec.substr(in_component.begin, in_component.len), unescape_rules, | |
| 920 &offsets_into_component))); | |
| 921 } | 918 } |
| 922 size_t new_component_len = output->length() - component_begin; | 919 AdjustForComponentTransform(original_offsets, original_component_begin, |
| 923 out_component->len = static_cast<int>(new_component_len); | 920 static_cast<size_t>(original_component.end()), |
| 924 | 921 offsets_into_component, output_component_begin, |
| 925 // Apply offset adjustments. | 922 offsets_for_adjustment); |
| 926 size_t old_component_len = static_cast<size_t>(in_component.len); | 923 } else if (output_component) { |
| 927 ApplySectionAdjustments(offsets_into_component, offsets_for_adjustment, | 924 output_component->reset(); |
| 928 old_component_len, new_component_len, component_begin); | |
| 929 } else { | |
| 930 out_component->reset(); | |
| 931 } | 925 } |
| 932 } | 926 } |
| 933 | 927 |
| 934 // TODO(viettrungluu): This is really the old-fashioned version, made internal. | |
| 935 // I need to really convert |FormatUrl()|. | |
| 936 std::wstring FormatUrlInternal(const GURL& url, | |
| 937 const std::wstring& languages, | |
| 938 FormatUrlTypes format_types, | |
| 939 UnescapeRule::Type unescape_rules, | |
| 940 url_parse::Parsed* new_parsed, | |
| 941 size_t* prefix_end, | |
| 942 std::vector<size_t>* offsets_for_adjustment) { | |
| 943 url_parse::Parsed parsed_temp; | |
| 944 if (!new_parsed) | |
| 945 new_parsed = &parsed_temp; | |
| 946 else | |
| 947 *new_parsed = url_parse::Parsed(); | |
| 948 | |
| 949 std::vector<size_t> offsets_temp; | |
| 950 if (!offsets_for_adjustment) | |
| 951 offsets_for_adjustment = &offsets_temp; | |
| 952 | |
| 953 std::wstring url_string; | |
| 954 | |
| 955 // Check for empty URLs or 0 available text width. | |
| 956 if (url.is_empty()) { | |
| 957 if (prefix_end) | |
| 958 *prefix_end = 0; | |
| 959 std::for_each(offsets_for_adjustment->begin(), | |
| 960 offsets_for_adjustment->end(), | |
| 961 LimitOffset<std::wstring>(0)); | |
| 962 return url_string; | |
| 963 } | |
| 964 | |
| 965 // Special handling for view-source:. Don't use chrome::kViewSourceScheme | |
| 966 // because this library shouldn't depend on chrome. | |
| 967 const char* const kViewSource = "view-source"; | |
| 968 // Reject "view-source:view-source:..." to avoid deep recursion. | |
| 969 const char* const kViewSourceTwice = "view-source:view-source:"; | |
| 970 if (url.SchemeIs(kViewSource) && | |
| 971 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { | |
| 972 return FormatViewSourceUrl(url, languages, format_types, | |
| 973 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); | |
| 974 } | |
| 975 | |
| 976 // We handle both valid and invalid URLs (this will give us the spec | |
| 977 // regardless of validity). | |
| 978 const std::string& spec = url.possibly_invalid_spec(); | |
| 979 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); | |
| 980 size_t spec_length = spec.length(); | |
| 981 std::for_each(offsets_for_adjustment->begin(), | |
| 982 offsets_for_adjustment->end(), | |
| 983 LimitOffset<std::wstring>(spec_length)); | |
| 984 | |
| 985 // Copy everything before the username (the scheme and the separators.) | |
| 986 // These are ASCII. | |
| 987 url_string.insert(url_string.end(), spec.begin(), | |
| 988 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, | |
| 989 true)); | |
| 990 | |
| 991 const wchar_t kHTTP[] = L"http://"; | |
| 992 const char kFTP[] = "ftp."; | |
| 993 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This | |
| 994 // means that if we trim "http://" off a URL whose host starts with "ftp." and | |
| 995 // the user inputs this into any field subject to fixup (which is basically | |
| 996 // all input fields), the meaning would be changed. (In fact, often the | |
| 997 // formatted URL is directly pre-filled into an input field.) For this reason | |
| 998 // we avoid stripping "http://" in this case. | |
| 999 bool omit_http = | |
| 1000 (format_types & kFormatUrlOmitHTTP) && (url_string == kHTTP) && | |
| 1001 (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0); | |
| 1002 | |
| 1003 new_parsed->scheme = parsed.scheme; | |
| 1004 | |
| 1005 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { | |
| 1006 // Remove the username and password fields. We don't want to display those | |
| 1007 // to the user since they can be used for attacks, | |
| 1008 // e.g. "http://google.com:search@evil.ru/" | |
| 1009 new_parsed->username.reset(); | |
| 1010 new_parsed->password.reset(); | |
| 1011 // Update the offsets based on removed username and/or password. | |
| 1012 if (!offsets_for_adjustment->empty() && | |
| 1013 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { | |
| 1014 AdjustOffset::Adjustments adjustments; | |
| 1015 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { | |
| 1016 // The seeming off-by-one and off-by-two in these first two lines are to | |
| 1017 // account for the ':' after the username and '@' after the password. | |
| 1018 adjustments.push_back(AdjustOffset::Adjustment( | |
| 1019 static_cast<size_t>(parsed.username.begin), | |
| 1020 static_cast<size_t>(parsed.username.len + parsed.password.len + | |
| 1021 2), 0)); | |
| 1022 } else { | |
| 1023 const url_parse::Component* nonempty_component = | |
| 1024 parsed.username.is_nonempty() ? &parsed.username : &parsed.password; | |
| 1025 // The seeming off-by-one in below is to account for the '@' after the | |
| 1026 // username/password. | |
| 1027 adjustments.push_back(AdjustOffset::Adjustment( | |
| 1028 static_cast<size_t>(nonempty_component->begin), | |
| 1029 static_cast<size_t>(nonempty_component->len + 1), 0)); | |
| 1030 } | |
| 1031 | |
| 1032 // Make offset adjustment. | |
| 1033 std::for_each(offsets_for_adjustment->begin(), | |
| 1034 offsets_for_adjustment->end(), | |
| 1035 AdjustOffset(adjustments)); | |
| 1036 } | |
| 1037 } else { | |
| 1038 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string, | |
| 1039 &new_parsed->username, offsets_for_adjustment); | |
| 1040 if (parsed.password.is_valid()) | |
| 1041 url_string.push_back(':'); | |
| 1042 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string, | |
| 1043 &new_parsed->password, offsets_for_adjustment); | |
| 1044 if (parsed.username.is_valid() || parsed.password.is_valid()) | |
| 1045 url_string.push_back('@'); | |
| 1046 } | |
| 1047 if (prefix_end) | |
| 1048 *prefix_end = static_cast<size_t>(url_string.length()); | |
| 1049 | |
| 1050 AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed, | |
| 1051 offsets_for_adjustment); | |
| 1052 | |
| 1053 // Port. | |
| 1054 if (parsed.port.is_nonempty()) { | |
| 1055 url_string.push_back(':'); | |
| 1056 new_parsed->port.begin = url_string.length(); | |
| 1057 url_string.insert(url_string.end(), | |
| 1058 spec.begin() + parsed.port.begin, | |
| 1059 spec.begin() + parsed.port.end()); | |
| 1060 new_parsed->port.len = url_string.length() - new_parsed->port.begin; | |
| 1061 } else { | |
| 1062 new_parsed->port.reset(); | |
| 1063 } | |
| 1064 | |
| 1065 // Path and query both get the same general unescape & convert treatment. | |
| 1066 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || | |
| 1067 !CanStripTrailingSlash(url)) { | |
| 1068 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string, | |
| 1069 &new_parsed->path, offsets_for_adjustment); | |
| 1070 } | |
| 1071 if (parsed.query.is_valid()) | |
| 1072 url_string.push_back('?'); | |
| 1073 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string, | |
| 1074 &new_parsed->query, offsets_for_adjustment); | |
| 1075 | |
| 1076 // Reference is stored in valid, unescaped UTF-8, so we can just convert. | |
| 1077 if (parsed.ref.is_valid()) { | |
| 1078 url_string.push_back('#'); | |
| 1079 size_t ref_begin = url_string.length(); | |
| 1080 new_parsed->ref.begin = static_cast<int>(ref_begin); | |
| 1081 | |
| 1082 // Compose a list of offsets within the section. | |
| 1083 std::vector<size_t> offsets_into_ref = | |
| 1084 OffsetsIntoSection(offsets_for_adjustment, ref_begin); | |
| 1085 | |
| 1086 if (parsed.ref.len > 0) { | |
| 1087 url_string.append(UTF8ToWideAndAdjustOffsets(spec.substr(parsed.ref.begin, | |
| 1088 parsed.ref.len), | |
| 1089 &offsets_into_ref)); | |
| 1090 } | |
| 1091 size_t old_ref_len = static_cast<size_t>(parsed.ref.len); | |
| 1092 size_t new_ref_len = url_string.length() - new_parsed->ref.begin; | |
| 1093 new_parsed->ref.len = static_cast<int>(new_ref_len); | |
| 1094 | |
| 1095 // Apply offset adjustments. | |
| 1096 ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment, | |
| 1097 old_ref_len, new_ref_len, ref_begin); | |
| 1098 } | |
| 1099 | |
| 1100 // If we need to strip out http do it after the fact. This way we don't need | |
| 1101 // to worry about how offset_for_adjustment is interpreted. | |
| 1102 const size_t kHTTPSize = arraysize(kHTTP) - 1; | |
| 1103 if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) { | |
| 1104 url_string = url_string.substr(kHTTPSize); | |
| 1105 AdjustOffset::Adjustments adjustments; | |
| 1106 adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0)); | |
| 1107 std::for_each(offsets_for_adjustment->begin(), | |
| 1108 offsets_for_adjustment->end(), | |
| 1109 AdjustOffset(adjustments)); | |
| 1110 if (prefix_end) | |
| 1111 *prefix_end -= kHTTPSize; | |
| 1112 | |
| 1113 // Adjust new_parsed. | |
| 1114 DCHECK(new_parsed->scheme.is_valid()); | |
| 1115 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. | |
| 1116 new_parsed->scheme.reset(); | |
| 1117 AdjustComponents(delta, new_parsed); | |
| 1118 } | |
| 1119 | |
| 1120 return url_string; | |
| 1121 } | |
| 1122 | |
| 1123 } // namespace | 928 } // namespace |
| 1124 | 929 |
| 1125 const FormatUrlType kFormatUrlOmitNothing = 0; | 930 const FormatUrlType kFormatUrlOmitNothing = 0; |
| 1126 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; | 931 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; |
| 1127 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; | 932 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; |
| 1128 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; | 933 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; |
| 1129 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | | 934 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | |
| 1130 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; | 935 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; |
| 1131 | 936 |
| 1132 // TODO(viettrungluu): We don't want non-POD globals; change this. | 937 // TODO(viettrungluu): We don't want non-POD globals; change this. |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 1156 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); | 961 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); |
| 1157 | 962 |
| 1158 #if defined(OS_POSIX) | 963 #if defined(OS_POSIX) |
| 1159 ReplaceSubstringsAfterOffset(&url_string, 0, | 964 ReplaceSubstringsAfterOffset(&url_string, 0, |
| 1160 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); | 965 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); |
| 1161 #endif | 966 #endif |
| 1162 | 967 |
| 1163 return GURL(url_string); | 968 return GURL(url_string); |
| 1164 } | 969 } |
| 1165 | 970 |
| 1166 std::wstring GetSpecificHeader(const std::wstring& headers, | |
| 1167 const std::wstring& name) { | |
| 1168 return GetSpecificHeaderT(headers, name); | |
| 1169 } | |
| 1170 | |
| 1171 std::string GetSpecificHeader(const std::string& headers, | 971 std::string GetSpecificHeader(const std::string& headers, |
| 1172 const std::string& name) { | 972 const std::string& name) { |
| 1173 return GetSpecificHeaderT(headers, name); | 973 // We want to grab the Value from the "Key: Value" pairs in the headers, |
| 974 // which should look like this (no leading spaces, \n-separated) (we format | |
| 975 // them this way in url_request_inet.cc): | |
| 976 // HTTP/1.1 200 OK\n | |
| 977 // ETag: "6d0b8-947-24f35ec0"\n | |
| 978 // Content-Length: 2375\n | |
| 979 // Content-Type: text/html; charset=UTF-8\n | |
| 980 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n | |
| 981 if (headers.empty()) | |
| 982 return std::string(); | |
| 983 | |
| 984 std::string match('\n' + name + ':'); | |
| 985 | |
| 986 std::string::const_iterator begin = | |
| 987 search(headers.begin(), headers.end(), match.begin(), match.end(), | |
| 988 base::CaseInsensitiveCompareASCII<char>()); | |
| 989 | |
| 990 if (begin == headers.end()) | |
| 991 return std::string(); | |
| 992 | |
| 993 begin += match.length(); | |
| 994 | |
| 995 std::string ret; | |
| 996 TrimWhitespace(std::string(begin, find(begin, headers.end(), '\n')), TRIM_ALL, | |
| 997 &ret); | |
| 998 return ret; | |
| 1174 } | 999 } |
| 1175 | 1000 |
| 1176 bool DecodeCharset(const std::string& input, | 1001 bool DecodeCharset(const std::string& input, |
| 1177 std::string* decoded_charset, | 1002 std::string* decoded_charset, |
| 1178 std::string* value) { | 1003 std::string* value) { |
| 1179 StringTokenizer t(input, "'"); | 1004 StringTokenizer t(input, "'"); |
| 1180 t.set_options(StringTokenizer::RETURN_DELIMS); | 1005 t.set_options(StringTokenizer::RETURN_DELIMS); |
| 1181 std::string temp_charset; | 1006 std::string temp_charset; |
| 1182 std::string temp_value; | 1007 std::string temp_value; |
| 1183 int numDelimsSeen = 0; | 1008 int numDelimsSeen = 0; |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1238 param_value = GetHeaderParamValue(header, "name", | 1063 param_value = GetHeaderParamValue(header, "name", |
| 1239 QuoteRule::REMOVE_OUTER_QUOTES); | 1064 QuoteRule::REMOVE_OUTER_QUOTES); |
| 1240 } | 1065 } |
| 1241 if (param_value.empty()) | 1066 if (param_value.empty()) |
| 1242 return std::string(); | 1067 return std::string(); |
| 1243 if (DecodeParamValue(param_value, referrer_charset, &decoded)) | 1068 if (DecodeParamValue(param_value, referrer_charset, &decoded)) |
| 1244 return decoded; | 1069 return decoded; |
| 1245 return std::string(); | 1070 return std::string(); |
| 1246 } | 1071 } |
| 1247 | 1072 |
| 1248 std::wstring GetHeaderParamValue(const std::wstring& field, | 1073 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm |
| 1249 const std::wstring& param_name, | 1074 // sure this doesn't properly handle all (most?) cases. |
| 1250 QuoteRule::Type quote_rule) { | 1075 std::string GetHeaderParamValue(const std::string& header, |
| 1251 return GetHeaderParamValueT(field, param_name, quote_rule); | 1076 const std::string& param_name, |
| 1077 QuoteRule::Type quote_rule) { | |
| 1078 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value". | |
| 1079 std::string::const_iterator param_begin = | |
| 1080 search(header.begin(), header.end(), param_name.begin(), param_name.end(), | |
| 1081 base::CaseInsensitiveCompareASCII<char>()); | |
| 1082 | |
| 1083 if (param_begin == header.end()) | |
| 1084 return std::string(); | |
| 1085 param_begin += param_name.length(); | |
| 1086 | |
| 1087 std::string whitespace(" \t"); | |
| 1088 size_t equals_offset = | |
| 1089 header.find_first_not_of(whitespace, param_begin - header.begin()); | |
| 1090 if (equals_offset == std::string::npos || header[equals_offset] != '=') | |
| 1091 return std::string(); | |
| 1092 | |
| 1093 param_begin = header.begin() + equals_offset + 1; | |
| 1094 if (param_begin == header.end()) | |
| 1095 return std::string(); | |
| 1096 | |
| 1097 std::string::const_iterator param_end; | |
| 1098 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) { | |
| 1099 ++param_begin; // skip past the quote. | |
| 1100 param_end = find(param_begin, header.end(), '"'); | |
| 1101 // If the closing quote is missing, we will treat the rest of the | |
| 1102 // string as the parameter. We can't set |param_end| to the | |
| 1103 // location of the separator (';'), since the separator is | |
| 1104 // technically quoted. See: http://crbug.com/58840 | |
| 1105 } else { | |
| 1106 param_end = find(param_begin + 1, header.end(), ';'); | |
| 1107 } | |
| 1108 | |
| 1109 return std::string(param_begin, param_end); | |
| 1252 } | 1110 } |
| 1253 | 1111 |
| 1254 std::string GetHeaderParamValue(const std::string& field, | 1112 string16 IDNToUnicode(const std::string& host, |
| 1255 const std::string& param_name, | 1113 const std::string& languages) { |
| 1256 QuoteRule::Type quote_rule) { | |
| 1257 return GetHeaderParamValueT(field, param_name, quote_rule); | |
| 1258 } | |
| 1259 | |
| 1260 // TODO(brettw) bug 734373: check the scripts for each host component and | |
| 1261 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for | |
| 1262 // scripts that the user has installed. For now, just put the entire | |
| 1263 // path through IDN. Maybe this feature can be implemented in ICU itself? | |
| 1264 // | |
| 1265 // We may want to skip this step in the case of file URLs to allow unicode | |
| 1266 // UNC hostnames regardless of encodings. | |
| 1267 std::wstring IDNToUnicodeWithOffsets( | |
| 1268 const char* host, | |
| 1269 size_t host_len, | |
| 1270 const std::wstring& languages, | |
| 1271 std::vector<size_t>* offsets_for_adjustment) { | |
| 1272 // Convert the ASCII input to a wide string for ICU. | |
| 1273 string16 input16; | |
| 1274 input16.reserve(host_len); | |
| 1275 input16.insert(input16.end(), host, host + host_len); | |
| 1276 | |
| 1277 // Do each component of the host separately, since we enforce script matching | |
| 1278 // on a per-component basis. | |
| 1279 AdjustOffset::Adjustments adjustments; | |
| 1280 string16 out16; | |
| 1281 for (size_t component_start = 0, component_end; | |
| 1282 component_start < input16.length(); | |
| 1283 component_start = component_end + 1) { | |
| 1284 // Find the end of the component. | |
| 1285 component_end = input16.find('.', component_start); | |
| 1286 if (component_end == string16::npos) | |
| 1287 component_end = input16.length(); // For getting the last component. | |
| 1288 size_t component_length = component_end - component_start; | |
| 1289 size_t new_component_start = out16.length(); | |
| 1290 bool converted_idn = false; | |
| 1291 if (component_end > component_start) { | |
| 1292 // Add the substring that we just found. | |
| 1293 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start, | |
| 1294 component_length, languages, &out16); | |
| 1295 } | |
| 1296 size_t new_component_length = out16.length() - new_component_start; | |
| 1297 | |
| 1298 if (converted_idn && offsets_for_adjustment) { | |
| 1299 adjustments.push_back(AdjustOffset::Adjustment( | |
| 1300 component_start, component_length, new_component_length)); | |
| 1301 } | |
| 1302 | |
| 1303 // Need to add the dot we just found (if we found one). | |
| 1304 if (component_end < input16.length()) | |
| 1305 out16.push_back('.'); | |
| 1306 } | |
| 1307 | |
| 1308 // Make offset adjustment. | |
| 1309 if (offsets_for_adjustment && !adjustments.empty()) { | |
| 1310 std::for_each(offsets_for_adjustment->begin(), | |
| 1311 offsets_for_adjustment->end(), | |
| 1312 AdjustOffset(adjustments)); | |
| 1313 } | |
| 1314 | |
| 1315 return UTF16ToWideAndAdjustOffsets(out16, offsets_for_adjustment); | |
| 1316 } | |
| 1317 | |
| 1318 std::wstring IDNToUnicode(const char* host, | |
| 1319 size_t host_len, | |
| 1320 const std::wstring& languages, | |
| 1321 size_t* offset_for_adjustment) { | |
| 1322 std::vector<size_t> offsets; | 1114 std::vector<size_t> offsets; |
| 1323 if (offset_for_adjustment) | 1115 return IDNToUnicodeWithOffsets(host, languages, &offsets); |
| 1324 offsets.push_back(*offset_for_adjustment); | |
| 1325 std::wstring result = | |
| 1326 IDNToUnicodeWithOffsets(host, host_len, languages, &offsets); | |
| 1327 if (offset_for_adjustment) | |
| 1328 *offset_for_adjustment = offsets[0]; | |
| 1329 return result; | |
| 1330 } | 1116 } |
| 1331 | 1117 |
| 1332 std::string CanonicalizeHost(const std::string& host, | 1118 std::string CanonicalizeHost(const std::string& host, |
| 1333 url_canon::CanonHostInfo* host_info) { | 1119 url_canon::CanonHostInfo* host_info) { |
| 1334 // Try to canonicalize the host. | 1120 // Try to canonicalize the host. |
| 1335 const url_parse::Component raw_host_component( | 1121 const url_parse::Component raw_host_component( |
| 1336 0, static_cast<int>(host.length())); | 1122 0, static_cast<int>(host.length())); |
| 1337 std::string canon_host; | 1123 std::string canon_host; |
| 1338 url_canon::StdStringCanonOutput canon_host_output(&canon_host); | 1124 url_canon::StdStringCanonOutput canon_host_output(&canon_host); |
| 1339 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, | 1125 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component, |
| 1340 &canon_host_output, host_info); | 1126 &canon_host_output, host_info); |
| 1341 | 1127 |
| 1342 if (host_info->out_host.is_nonempty() && | 1128 if (host_info->out_host.is_nonempty() && |
| 1343 host_info->family != url_canon::CanonHostInfo::BROKEN) { | 1129 host_info->family != url_canon::CanonHostInfo::BROKEN) { |
| 1344 // Success! Assert that there's no extra garbage. | 1130 // Success! Assert that there's no extra garbage. |
| 1345 canon_host_output.Complete(); | 1131 canon_host_output.Complete(); |
| 1346 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length())); | 1132 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length())); |
| 1347 } else { | 1133 } else { |
| 1348 // Empty host, or canonicalization failed. We'll return empty. | 1134 // Empty host, or canonicalization failed. We'll return empty. |
| 1349 canon_host.clear(); | 1135 canon_host.clear(); |
| 1350 } | 1136 } |
| 1351 | 1137 |
| 1352 return canon_host; | 1138 return canon_host; |
| 1353 } | 1139 } |
| 1354 | 1140 |
| 1355 std::string CanonicalizeHost(const std::wstring& host, | |
| 1356 url_canon::CanonHostInfo* host_info) { | |
| 1357 std::string converted_host; | |
| 1358 WideToUTF8(host.c_str(), host.length(), &converted_host); | |
| 1359 return CanonicalizeHost(converted_host, host_info); | |
| 1360 } | |
| 1361 | |
| 1362 std::string GetDirectoryListingHeader(const string16& title) { | 1141 std::string GetDirectoryListingHeader(const string16& title) { |
| 1363 static const base::StringPiece header( | 1142 static const base::StringPiece header( |
| 1364 NetModule::GetResource(IDR_DIR_HEADER_HTML)); | 1143 NetModule::GetResource(IDR_DIR_HEADER_HTML)); |
| 1365 // This can be null in unit tests. | 1144 // This can be null in unit tests. |
| 1366 DLOG_IF(WARNING, header.empty()) << | 1145 DLOG_IF(WARNING, header.empty()) << |
| 1367 "Missing resource: directory listing header"; | 1146 "Missing resource: directory listing header"; |
| 1368 | 1147 |
| 1369 std::string result; | 1148 std::string result; |
| 1370 if (!header.empty()) | 1149 if (!header.empty()) |
| 1371 result.assign(header.data(), header.size()); | 1150 result.assign(header.data(), header.size()); |
| (...skipping 360 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1732 UnescapeRule::Type flags = | 1511 UnescapeRule::Type flags = |
| 1733 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS; | 1512 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS; |
| 1734 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); | 1513 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL); |
| 1735 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); | 1514 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL); |
| 1736 } | 1515 } |
| 1737 | 1516 |
| 1738 std::string GetHostOrSpecFromURL(const GURL& url) { | 1517 std::string GetHostOrSpecFromURL(const GURL& url) { |
| 1739 return url.has_host() ? TrimEndingDot(url.host()) : url.spec(); | 1518 return url.has_host() ? TrimEndingDot(url.host()) : url.spec(); |
| 1740 } | 1519 } |
| 1741 | 1520 |
| 1742 void AppendFormattedHostWithOffsets( | 1521 void AppendFormattedHost(const GURL& url, |
| 1743 const GURL& url, | 1522 const std::string& languages, |
| 1744 const std::wstring& languages, | 1523 string16* output) { |
| 1745 std::wstring* output, | 1524 std::vector<size_t> offsets; |
| 1746 url_parse::Parsed* new_parsed, | 1525 AppendFormattedComponent(url.possibly_invalid_spec(), |
| 1747 std::vector<size_t>* offsets_for_adjustment) { | 1526 url.parsed_for_possibly_invalid_spec().host, offsets, |
| 1748 DCHECK(output); | 1527 HostComponentTransform(languages), output, NULL, NULL); |
| 1749 const url_parse::Component& host = | |
| 1750 url.parsed_for_possibly_invalid_spec().host; | |
| 1751 | |
| 1752 if (host.is_nonempty()) { | |
| 1753 // Handle possible IDN in the host name. | |
| 1754 size_t host_begin = output->length(); | |
| 1755 if (new_parsed) | |
| 1756 new_parsed->host.begin = static_cast<int>(host_begin); | |
| 1757 size_t old_host_len = static_cast<size_t>(host.len); | |
| 1758 | |
| 1759 // Compose a list of offsets within the host area. | |
| 1760 std::vector<size_t> offsets_into_host = | |
| 1761 OffsetsIntoSection(offsets_for_adjustment, host_begin); | |
| 1762 | |
| 1763 const std::string& spec = url.possibly_invalid_spec(); | |
| 1764 DCHECK(host.begin >= 0 && | |
| 1765 ((spec.length() == 0 && host.begin == 0) || | |
| 1766 host.begin < static_cast<int>(spec.length()))); | |
| 1767 output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len, | |
| 1768 languages, &offsets_into_host)); | |
| 1769 | |
| 1770 size_t new_host_len = output->length() - host_begin; | |
| 1771 if (new_parsed) | |
| 1772 new_parsed->host.len = static_cast<int>(new_host_len); | |
| 1773 | |
| 1774 // Apply offset adjustments. | |
| 1775 ApplySectionAdjustments(offsets_into_host, offsets_for_adjustment, | |
| 1776 old_host_len, new_host_len, host_begin); | |
| 1777 } else if (new_parsed) { | |
| 1778 new_parsed->host.reset(); | |
| 1779 } | |
| 1780 } | 1528 } |
| 1781 | 1529 |
| 1782 void AppendFormattedHost(const GURL& url, | |
| 1783 const std::wstring& languages, | |
| 1784 std::wstring* output, | |
| 1785 url_parse::Parsed* new_parsed, | |
| 1786 size_t* offset_for_adjustment) { | |
| 1787 std::vector<size_t> offsets; | |
| 1788 if (offset_for_adjustment) | |
| 1789 offsets.push_back(*offset_for_adjustment); | |
| 1790 AppendFormattedHostWithOffsets(url, languages, output, new_parsed, &offsets); | |
| 1791 if (offset_for_adjustment) | |
| 1792 *offset_for_adjustment = offsets[0]; | |
| 1793 } | |
| 1794 | |
| 1795 // TODO(viettrungluu): convert the wstring |FormatUrlInternal()|. | |
| 1796 string16 FormatUrlWithOffsets(const GURL& url, | 1530 string16 FormatUrlWithOffsets(const GURL& url, |
| 1797 const std::string& languages, | 1531 const std::string& languages, |
| 1798 FormatUrlTypes format_types, | 1532 FormatUrlTypes format_types, |
| 1799 UnescapeRule::Type unescape_rules, | 1533 UnescapeRule::Type unescape_rules, |
| 1800 url_parse::Parsed* new_parsed, | 1534 url_parse::Parsed* new_parsed, |
| 1801 size_t* prefix_end, | 1535 size_t* prefix_end, |
| 1802 std::vector<size_t>* offsets_for_adjustment) { | 1536 std::vector<size_t>* offsets_for_adjustment) { |
| 1803 return WideToUTF16Hack( | 1537 url_parse::Parsed parsed_temp; |
| 1804 FormatUrlInternal(url, ASCIIToWide(languages), format_types, | 1538 if (!new_parsed) |
| 1805 unescape_rules, new_parsed, prefix_end, | 1539 new_parsed = &parsed_temp; |
| 1806 offsets_for_adjustment)); | 1540 else |
| 1541 *new_parsed = url_parse::Parsed(); | |
| 1542 std::vector<size_t> original_offsets; | |
| 1543 if (offsets_for_adjustment) | |
| 1544 original_offsets = *offsets_for_adjustment; | |
| 1545 | |
| 1546 // Special handling for view-source:. Don't use chrome::kViewSourceScheme | |
| 1547 // because this library shouldn't depend on chrome. | |
| 1548 const char* const kViewSource = "view-source"; | |
| 1549 // Reject "view-source:view-source:..." to avoid deep recursion. | |
| 1550 const char* const kViewSourceTwice = "view-source:view-source:"; | |
| 1551 if (url.SchemeIs(kViewSource) && | |
| 1552 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { | |
| 1553 return FormatViewSourceUrl(url, original_offsets, languages, format_types, | |
| 1554 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); | |
| 1555 } | |
| 1556 | |
| 1557 // We handle both valid and invalid URLs (this will give us the spec | |
| 1558 // regardless of validity). | |
| 1559 const std::string& spec = url.possibly_invalid_spec(); | |
| 1560 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); | |
| 1561 size_t spec_length = spec.length(); | |
| 1562 | |
| 1563 // Scheme & separators. These are ASCII. | |
| 1564 string16 url_string; | |
| 1565 OffsetLimiter offset_limiter(offsets_for_adjustment, &url_string); | |
| 1566 url_string.insert(url_string.end(), spec.begin(), | |
| 1567 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, | |
| 1568 true)); | |
| 1569 const char kHTTP[] = "http://"; | |
| 1570 const char kFTP[] = "ftp."; | |
| 1571 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This | |
| 1572 // means that if we trim "http://" off a URL whose host starts with "ftp." and | |
| 1573 // the user inputs this into any field subject to fixup (which is basically | |
| 1574 // all input fields), the meaning would be changed. (In fact, often the | |
| 1575 // formatted URL is directly pre-filled into an input field.) For this reason | |
| 1576 // we avoid stripping "http://" in this case. | |
| 1577 bool omit_http = (format_types & kFormatUrlOmitHTTP) && | |
| 1578 EqualsASCII(url_string, kHTTP) && | |
| 1579 !StartsWithASCII(url.host(), kFTP, true); | |
| 1580 new_parsed->scheme = parsed.scheme; | |
| 1581 | |
| 1582 // Username & password. | |
| 1583 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { | |
| 1584 // Remove the username and password fields. We don't want to display those | |
| 1585 // to the user since they can be used for attacks, | |
| 1586 // e.g. "http://google.com:search@evil.ru/" | |
| 1587 new_parsed->username.reset(); | |
| 1588 new_parsed->password.reset(); | |
| 1589 // Update the offsets based on removed username and/or password. | |
| 1590 if (offsets_for_adjustment && !offsets_for_adjustment->empty() && | |
| 1591 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { | |
| 1592 AdjustOffset::Adjustments adjustments; | |
|
mrossetti
2011/04/26 22:13:34
The AdjustOffset::Adjustments concept would be a g
Peter Kasting
2011/04/27 02:07:19
Good idea. Rewrote this object.
| |
| 1593 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { | |
| 1594 // The seeming off-by-one and off-by-two in these first two lines are to | |
| 1595 // account for the ':' after the username and '@' after the password. | |
| 1596 adjustments.push_back(AdjustOffset::Adjustment( | |
| 1597 static_cast<size_t>(parsed.username.begin), | |
| 1598 static_cast<size_t>(parsed.username.len + parsed.password.len + | |
| 1599 2), 0)); | |
| 1600 } else { | |
| 1601 const url_parse::Component* nonempty_component = | |
| 1602 parsed.username.is_nonempty() ? &parsed.username : &parsed.password; | |
| 1603 // The seeming off-by-one in below is to account for the '@' after the | |
| 1604 // username/password. | |
| 1605 adjustments.push_back(AdjustOffset::Adjustment( | |
| 1606 static_cast<size_t>(nonempty_component->begin), | |
| 1607 static_cast<size_t>(nonempty_component->len + 1), 0)); | |
| 1608 } | |
| 1609 std::for_each(offsets_for_adjustment->begin(), | |
| 1610 offsets_for_adjustment->end(), AdjustOffset(adjustments)); | |
| 1611 } | |
| 1612 } else { | |
| 1613 AppendFormattedComponent(spec, parsed.username, original_offsets, | |
| 1614 NonHostComponentTransform(unescape_rules), &url_string, | |
| 1615 &new_parsed->username, offsets_for_adjustment); | |
| 1616 if (parsed.password.is_valid()) { | |
| 1617 size_t colon = parsed.username.end(); | |
| 1618 DCHECK_EQ(static_cast<size_t>(parsed.password.begin - 1), colon); | |
| 1619 std::vector<size_t>::const_iterator colon_iter = | |
| 1620 std::find(original_offsets.begin(), original_offsets.end(), colon); | |
| 1621 if (colon_iter != original_offsets.end()) { | |
| 1622 (*offsets_for_adjustment)[colon_iter - original_offsets.begin()] = | |
| 1623 url_string.length(); | |
| 1624 } | |
| 1625 url_string.push_back(':'); | |
| 1626 } | |
| 1627 AppendFormattedComponent(spec, parsed.password, original_offsets, | |
| 1628 NonHostComponentTransform(unescape_rules), &url_string, | |
| 1629 &new_parsed->password, offsets_for_adjustment); | |
| 1630 if (parsed.username.is_valid() || parsed.password.is_valid()) { | |
| 1631 size_t at_sign = (parsed.password.is_valid() ? | |
| 1632 parsed.password : parsed.username).end(); | |
| 1633 DCHECK_EQ(static_cast<size_t>(parsed.host.begin - 1), at_sign); | |
| 1634 std::vector<size_t>::const_iterator at_sign_iter = | |
| 1635 std::find(original_offsets.begin(), original_offsets.end(), at_sign); | |
| 1636 if (at_sign_iter != original_offsets.end()) { | |
| 1637 (*offsets_for_adjustment)[at_sign_iter - original_offsets.begin()] = | |
| 1638 url_string.length(); | |
| 1639 } | |
| 1640 url_string.push_back('@'); | |
| 1641 } | |
| 1642 } | |
| 1643 if (prefix_end) | |
| 1644 *prefix_end = static_cast<size_t>(url_string.length()); | |
| 1645 | |
| 1646 // Host. | |
| 1647 AppendFormattedComponent(spec, parsed.host, original_offsets, | |
| 1648 HostComponentTransform(languages), &url_string, &new_parsed->host, | |
| 1649 offsets_for_adjustment); | |
| 1650 | |
| 1651 // Port. | |
| 1652 if (parsed.port.is_nonempty()) { | |
| 1653 url_string.push_back(':'); | |
| 1654 new_parsed->port.begin = url_string.length(); | |
| 1655 url_string.insert(url_string.end(), | |
| 1656 spec.begin() + parsed.port.begin, | |
| 1657 spec.begin() + parsed.port.end()); | |
| 1658 new_parsed->port.len = url_string.length() - new_parsed->port.begin; | |
| 1659 } else { | |
| 1660 new_parsed->port.reset(); | |
| 1661 } | |
| 1662 | |
| 1663 // Path & query. Both get the same general unescape & convert treatment. | |
| 1664 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || | |
| 1665 !CanStripTrailingSlash(url)) { | |
| 1666 AppendFormattedComponent(spec, parsed.path, original_offsets, | |
| 1667 NonHostComponentTransform(unescape_rules), &url_string, | |
| 1668 &new_parsed->path, offsets_for_adjustment); | |
| 1669 } | |
| 1670 if (parsed.query.is_valid()) | |
| 1671 url_string.push_back('?'); | |
| 1672 AppendFormattedComponent(spec, parsed.query, original_offsets, | |
| 1673 NonHostComponentTransform(unescape_rules), &url_string, | |
| 1674 &new_parsed->query, offsets_for_adjustment); | |
| 1675 | |
| 1676 // Ref. This is valid, unescaped UTF-8, so we can just convert. | |
| 1677 if (parsed.ref.is_valid()) { | |
| 1678 url_string.push_back('#'); | |
| 1679 size_t original_ref_begin = static_cast<size_t>(parsed.ref.begin); | |
| 1680 size_t original_ref_len = static_cast<size_t>(parsed.ref.len); | |
| 1681 size_t output_ref_begin = url_string.length(); | |
| 1682 new_parsed->ref.begin = static_cast<int>(output_ref_begin); | |
| 1683 | |
| 1684 std::vector<size_t> offsets_into_ref( | |
| 1685 OffsetsIntoComponent(original_offsets, original_ref_begin)); | |
| 1686 if (parsed.ref.len > 0) { | |
| 1687 url_string.append(UTF8ToUTF16AndAdjustOffsets( | |
| 1688 spec.substr(original_ref_begin, static_cast<size_t>(parsed.ref.len)), | |
| 1689 &offsets_into_ref)); | |
| 1690 } | |
| 1691 | |
| 1692 new_parsed->ref.len = | |
| 1693 static_cast<int>(url_string.length() - new_parsed->ref.begin); | |
| 1694 AdjustForComponentTransform(original_offsets, original_ref_begin, | |
| 1695 static_cast<size_t>(parsed.ref.end()), offsets_into_ref, | |
| 1696 output_ref_begin, offsets_for_adjustment); | |
| 1697 } | |
| 1698 | |
| 1699 // If we need to strip out http do it after the fact. This way we don't need | |
| 1700 // to worry about how offset_for_adjustment is interpreted. | |
| 1701 if (omit_http && StartsWith(url_string, ASCIIToUTF16(kHTTP), true)) { | |
| 1702 const size_t kHTTPSize = arraysize(kHTTP) - 1; | |
| 1703 url_string = url_string.substr(kHTTPSize); | |
| 1704 if (offsets_for_adjustment && !offsets_for_adjustment->empty()) { | |
| 1705 AdjustOffset::Adjustments adjustments; | |
| 1706 adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0)); | |
| 1707 std::for_each(offsets_for_adjustment->begin(), | |
| 1708 offsets_for_adjustment->end(), AdjustOffset(adjustments)); | |
| 1709 } | |
| 1710 if (prefix_end) | |
| 1711 *prefix_end -= kHTTPSize; | |
| 1712 | |
| 1713 // Adjust new_parsed. | |
| 1714 DCHECK(new_parsed->scheme.is_valid()); | |
| 1715 int delta = -(new_parsed->scheme.len + 3); // +3 for ://. | |
| 1716 new_parsed->scheme.reset(); | |
| 1717 AdjustComponents(delta, new_parsed); | |
| 1718 } | |
| 1719 | |
| 1720 return url_string; | |
| 1807 } | 1721 } |
| 1808 | 1722 |
| 1809 string16 FormatUrl(const GURL& url, | 1723 string16 FormatUrl(const GURL& url, |
| 1810 const std::string& languages, | 1724 const std::string& languages, |
| 1811 FormatUrlTypes format_types, | 1725 FormatUrlTypes format_types, |
| 1812 UnescapeRule::Type unescape_rules, | 1726 UnescapeRule::Type unescape_rules, |
| 1813 url_parse::Parsed* new_parsed, | 1727 url_parse::Parsed* new_parsed, |
| 1814 size_t* prefix_end, | 1728 size_t* prefix_end, |
| 1815 size_t* offset_for_adjustment) { | 1729 size_t* offset_for_adjustment) { |
| 1816 std::vector<size_t> offsets; | 1730 std::vector<size_t> offsets; |
| 1817 if (offset_for_adjustment) | 1731 if (offset_for_adjustment) |
| 1818 offsets.push_back(*offset_for_adjustment); | 1732 offsets.push_back(*offset_for_adjustment); |
| 1819 string16 result = WideToUTF16Hack( | 1733 string16 result = FormatUrlWithOffsets(url, languages, format_types, |
| 1820 FormatUrlInternal(url, ASCIIToWide(languages), format_types, | 1734 unescape_rules, new_parsed, prefix_end, &offsets); |
| 1821 unescape_rules, new_parsed, prefix_end, &offsets)); | |
| 1822 if (offset_for_adjustment) | 1735 if (offset_for_adjustment) |
| 1823 *offset_for_adjustment = offsets[0]; | 1736 *offset_for_adjustment = offsets[0]; |
| 1824 return result; | 1737 return result; |
| 1825 } | 1738 } |
| 1826 | 1739 |
| 1827 bool CanStripTrailingSlash(const GURL& url) { | 1740 bool CanStripTrailingSlash(const GURL& url) { |
| 1828 // Omit the path only for standard, non-file URLs with nothing but "/" after | 1741 // Omit the path only for standard, non-file URLs with nothing but "/" after |
| 1829 // the hostname. | 1742 // the hostname. |
| 1830 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() && | 1743 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() && |
| 1831 !url.has_ref() && url.path() == "/"; | 1744 !url.has_ref() && url.path() == "/"; |
| (...skipping 435 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2267 } | 2180 } |
| 2268 | 2181 |
| 2269 NetworkInterface::NetworkInterface(const std::string& name, | 2182 NetworkInterface::NetworkInterface(const std::string& name, |
| 2270 const IPAddressNumber& address) | 2183 const IPAddressNumber& address) |
| 2271 : name(name), address(address) { | 2184 : name(name), address(address) { |
| 2272 } | 2185 } |
| 2273 | 2186 |
| 2274 NetworkInterface::~NetworkInterface() { | 2187 NetworkInterface::~NetworkInterface() { |
| 2275 } | 2188 } |
| 2276 | 2189 |
| 2277 ClampComponentOffset::ClampComponentOffset(size_t component_start) | |
| 2278 : component_start(component_start) {} | |
| 2279 | |
| 2280 size_t ClampComponentOffset::operator()(size_t offset) { | |
| 2281 return (offset >= component_start) ? | |
| 2282 offset : std::wstring::npos; | |
| 2283 } | |
| 2284 | |
| 2285 } // namespace net | 2190 } // namespace net |
| OLD | NEW |