net/base/net_util.cc - Issue 6898026: Eliminate wstring from base/utf_offset_string_conversions.h, net/base/escape.h, and net/base/net_...

Side by Side Diff: net/base/net_util.cc

Issue 6898026: Eliminate wstring from base/utf_offset_string_conversions.h, net/base/escape.h, and net/base/net_... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 9 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "net/base/net_util.h"	5 #include "net/base/net_util.h"

6	6

7 #include <unicode/regex.h>	7 #include <unicode/regex.h>

8 #include <unicode/ucnv.h>	8 #include <unicode/ucnv.h>

9 #include <unicode/uidna.h>	9 #include <unicode/uidna.h>

10 #include <unicode/ulocdata.h>	10 #include <unicode/ulocdata.h>

(...skipping 137 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
148 0xFFFF, // Used to block all invalid port numbers (see	148 0xFFFF, // Used to block all invalid port numbers (see

149 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port())	149 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port())

150 };	150 };

151	151

152 // FTP overrides the following restricted ports.	152 // FTP overrides the following restricted ports.

153 static const int kAllowedFtpPorts[] = {	153 static const int kAllowedFtpPorts[] = {

154 21, // ftp data	154 21, // ftp data

155 22, // ssh	155 22, // ssh

156 };	156 };

157	157

158 template<typename STR>

159 STR GetSpecificHeaderT(const STR& headers, const STR& name) {

160 // We want to grab the Value from the "Key: Value" pairs in the headers,

161 // which should look like this (no leading spaces, \n-separated) (we format

162 // them this way in url_request_inet.cc):

163 // HTTP/1.1 200 OK\n

164 // ETag: "6d0b8-947-24f35ec0"\n

165 // Content-Length: 2375\n

166 // Content-Type: text/html; charset=UTF-8\n

167 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n

168 if (headers.empty())

169 return STR();

170

171 STR match;

172 match.push_back('\n');

173 match.append(name);

174 match.push_back(':');

175

176 typename STR::const_iterator begin =

177 search(headers.begin(), headers.end(), match.begin(), match.end(),

178 base::CaseInsensitiveCompareASCII<typename STR::value_type>());

179

180 if (begin == headers.end())

181 return STR();

182

183 begin += match.length();

184

185 typename STR::const_iterator end = find(begin, headers.end(), '\n');

186

187 STR ret;

188 TrimWhitespace(STR(begin, end), TRIM_ALL, &ret);

189 return ret;

190 }

191

192 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence	158 // Similar to Base64Decode. Decodes a Q-encoded string to a sequence

193 // of bytes. If input is invalid, return false.	159 // of bytes. If input is invalid, return false.

194 bool QPDecode(const std::string& input, std::string* output) {	160 bool QPDecode(const std::string& input, std::string* output) {

195 std::string temp;	161 std::string temp;

196 temp.reserve(input.size());	162 temp.reserve(input.size());

197 std::string::const_iterator it = input.begin();	163 std::string::const_iterator it = input.begin();

198 while (it != input.end()) {	164 while (it != input.end()) {

199 if (*it == '_') {	165 if (*it == '_') {

200 temp.push_back(' ');	166 temp.push_back(' ');

201 } else if (*it == '=') {	167 } else if (*it == '=') {

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
269 *is_rfc2047 = false;	235 *is_rfc2047 = false;

270 output->clear();	236 output->clear();

271 if (encoded_word.empty())	237 if (encoded_word.empty())

272 return true;	238 return true;

273	239

274 if (!IsStringASCII(encoded_word)) {	240 if (!IsStringASCII(encoded_word)) {

275 // Try UTF-8, referrer_charset and the native OS default charset in turn.	241 // Try UTF-8, referrer_charset and the native OS default charset in turn.

276 if (IsStringUTF8(encoded_word)) {	242 if (IsStringUTF8(encoded_word)) {

277 *output = encoded_word;	243 *output = encoded_word;

278 } else {	244 } else {

279 std::wstring wide_output;	245 string16 utf16_output;

280 if (!referrer_charset.empty() &&	246 if (!referrer_charset.empty() &&

281 base::CodepageToWide(encoded_word, referrer_charset.c_str(),	247 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(),

282 base::OnStringConversionError::FAIL,	248 base::OnStringConversionError::FAIL,

283 &wide_output)) {	249 &utf16_output)) {

284 *output = WideToUTF8(wide_output);	250 *output = UTF16ToUTF8(utf16_output);

285 } else {	251 } else {

286 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word));	252 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word));

287 }	253 }

288 }	254 }

289	255

290 return true;	256 return true;

291 }	257 }

292	258

293 // RFC 2047 : one of encoding methods supported by Firefox and relatively	259 // RFC 2047 : one of encoding methods supported by Firefox and relatively

294 // widely used by web servers.	260 // widely used by web servers.

(...skipping 112 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
407 std::string decoded;	373 std::string decoded;

408 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,	374 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047,

409 &decoded))	375 &decoded))

410 return false;	376 return false;

411 tmp.append(decoded);	377 tmp.append(decoded);

412 }	378 }

413 output->swap(tmp);	379 output->swap(tmp);

414 return true;	380 return true;

415 }	381 }

416	382

417 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm

418 // sure this doesn't properly handle all (most?) cases.

419 template<typename STR>

420 STR GetHeaderParamValueT(const STR& header, const STR& param_name,

421 QuoteRule::Type quote_rule) {

422 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value".

423 typename STR::const_iterator param_begin =

424 search(header.begin(), header.end(), param_name.begin(), param_name.end(),

425 base::CaseInsensitiveCompareASCII<typename STR::value_type>());

426

427 if (param_begin == header.end())

428 return STR();

429 param_begin += param_name.length();

430

431 STR whitespace;

432 whitespace.push_back(' ');

433 whitespace.push_back('\t');

434 const typename STR::size_type equals_offset =

435 header.find_first_not_of(whitespace, param_begin - header.begin());

436 if (equals_offset == STR::npos \|\| header.at(equals_offset) != '=')

437 return STR();

438

439 param_begin = header.begin() + equals_offset + 1;

440 if (param_begin == header.end())

441 return STR();

442

443 typename STR::const_iterator param_end;

444 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) {

445 ++param_begin; // skip past the quote.

446 param_end = find(param_begin, header.end(), '"');

447 // If the closing quote is missing, we will treat the rest of the

448 // string as the parameter. We can't set \|param_end\| to the

449 // location of the separator (';'), since the separator is

450 // technically quoted. See: http://crbug.com/58840

451 } else {

452 param_end = find(param_begin+1, header.end(), ';');

453 }

454

455 return STR(param_begin, param_end);

456 }

457

458 // Does some simple normalization of scripts so we can allow certain scripts	383 // Does some simple normalization of scripts so we can allow certain scripts

459 // to exist together.	384 // to exist together.

460 // TODO(brettw) bug 880223: we should allow some other languages to be	385 // TODO(brettw) bug 880223: we should allow some other languages to be

461 // oombined such as Chinese and Latin. We will probably need a more	386 // oombined such as Chinese and Latin. We will probably need a more

462 // complicated system of language pairs to have more fine-grained control.	387 // complicated system of language pairs to have more fine-grained control.

463 UScriptCode NormalizeScript(UScriptCode code) {	388 UScriptCode NormalizeScript(UScriptCode code) {

464 switch (code) {	389 switch (code) {

465 case USCRIPT_KATAKANA:	390 case USCRIPT_KATAKANA:

466 case USCRIPT_HIRAGANA:	391 case USCRIPT_HIRAGANA:

467 case USCRIPT_KATAKANA_OR_HIRAGANA:	392 case USCRIPT_KATAKANA_OR_HIRAGANA:

(...skipping 118 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
586 ulocdata_close(uld);	511 ulocdata_close(uld);

587 }	512 }

588 }	513 }

589 return !lang_set->isEmpty() && lang_set->containsAll(component_characters);	514 return !lang_set->isEmpty() && lang_set->containsAll(component_characters);

590 }	515 }

591	516

592 // Returns true if the given Unicode host component is safe to display to the	517 // Returns true if the given Unicode host component is safe to display to the

593 // user.	518 // user.

594 bool IsIDNComponentSafe(const char16* str,	519 bool IsIDNComponentSafe(const char16* str,

595 int str_len,	520 int str_len,

596 const std::wstring& languages) {	521 const std::string& languages) {

597 // Most common cases (non-IDN) do not reach here so that we don't	522 // Most common cases (non-IDN) do not reach here so that we don't

598 // need a fast return path.	523 // need a fast return path.

599 // TODO(jungshik) : Check if there's any character inappropriate	524 // TODO(jungshik) : Check if there's any character inappropriate

600 // (although allowed) for domain names.	525 // (although allowed) for domain names.

601 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and	526 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and

602 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt	527 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt

603 // For now, we borrow the list from Mozilla and tweaked it slightly.	528 // For now, we borrow the list from Mozilla and tweaked it slightly.

604 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because	529 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because

605 // they're gonna be canonicalized to U+0020 and full stop before	530 // they're gonna be canonicalized to U+0020 and full stop before

606 // reaching here.)	531 // reaching here.)

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
670 // (sync'd with characters allowed in url_canon_host with square	595 // (sync'd with characters allowed in url_canon_host with square

671 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc.	596 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc.

672 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"),	597 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"),

673 status);	598 status);

674 DCHECK(U_SUCCESS(status));	599 DCHECK(U_SUCCESS(status));

675 // Subtract common characters because they're always allowed so that	600 // Subtract common characters because they're always allowed so that

676 // we just have to check if a language-specific set contains	601 // we just have to check if a language-specific set contains

677 // the remainder.	602 // the remainder.

678 component_characters.removeAll(common_characters);	603 component_characters.removeAll(common_characters);

679	604

680 std::string languages_list(WideToASCII(languages));	605 StringTokenizer t(languages, ",");

681 StringTokenizer t(languages_list, ",");

682 while (t.GetNext()) {	606 while (t.GetNext()) {

683 if (IsComponentCoveredByLang(component_characters, t.token()))	607 if (IsComponentCoveredByLang(component_characters, t.token()))

684 return true;	608 return true;

685 }	609 }

686 return false;	610 return false;

687 }	611 }

688	612

689 // Converts one component of a host (between dots) to IDN if safe. The result	613 // Converts one component of a host (between dots) to IDN if safe. The result

690 // will be APPENDED to the given output string and will be the same as the input	614 // will be APPENDED to the given output string and will be the same as the input

691 // if it is not IDN or the IDN is unsafe to display. Returns whether any	615 // if it is not IDN or the IDN is unsafe to display. Returns whether any

692 // conversion was performed.	616 // conversion was performed.

693 bool IDNToUnicodeOneComponent(const char16* comp,	617 bool IDNToUnicodeOneComponent(const char16* comp,

694 size_t comp_len,	618 size_t comp_len,

695 const std::wstring& languages,	619 const std::string& languages,

696 string16* out) {	620 string16* out) {

697 DCHECK(out);	621 DCHECK(out);

698 if (comp_len == 0)	622 if (comp_len == 0)

699 return false;	623 return false;

700	624

701 // Only transform if the input can be an IDN component.	625 // Only transform if the input can be an IDN component.

702 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'};	626 static const char16 kIdnPrefix[] = {'x', 'n', '-', '-'};

703 if ((comp_len > arraysize(kIdnPrefix)) &&	627 if ((comp_len > arraysize(kIdnPrefix)) &&

704 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) {	628 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(char16))) {

705 // Repeatedly expand the output string until it's big enough. It looks like	629 // Repeatedly expand the output string until it's big enough. It looks like

(...skipping 21 matching lines...) Expand all Loading...
727 // Failed, revert back to original string.	651 // Failed, revert back to original string.

728 out->resize(original_length);	652 out->resize(original_length);

729 }	653 }

730	654

731 // We get here with no IDN or on error, in which case we just append the	655 // We get here with no IDN or on error, in which case we just append the

732 // literal input.	656 // literal input.

733 out->append(comp, comp_len);	657 out->append(comp, comp_len);

734 return false;	658 return false;

735 }	659 }

736	660

	661 // TODO(brettw) bug 734373: check the scripts for each host component and

	662 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for

	663 // scripts that the user has installed. For now, just put the entire

	664 // path through IDN. Maybe this feature can be implemented in ICU itself?

	665 //

	666 // We may want to skip this step in the case of file URLs to allow unicode

	667 // UNC hostnames regardless of encodings.

	668 string16 IDNToUnicodeWithOffsets(

	669 const char* host,

	670 size_t host_len,

	671 const std::string& languages,

	672 std::vector<size_t>* offsets_for_adjustment) {

	673 // Convert the ASCII input to a string16 for ICU.

	674 string16 input16;

	675 input16.reserve(host_len);

	676 input16.insert(input16.end(), host, host + host_len);

	677

	678 // Do each component of the host separately, since we enforce script matching

	679 // on a per-component basis.

	680 AdjustOffset::Adjustments adjustments;

	681 string16 out16;

	682 for (size_t component_start = 0, component_end;

	683 component_start < input16.length();

	684 component_start = component_end + 1) {

	685 // Find the end of the component.

	686 component_end = input16.find('.', component_start);

	687 if (component_end == string16::npos)

	688 component_end = input16.length(); // For getting the last component.

	689 size_t component_length = component_end - component_start;

	690 size_t new_component_start = out16.length();

	691 bool converted_idn = false;

	692 if (component_end > component_start) {

	693 // Add the substring that we just found.

	694 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start,

	695 component_length, languages, &out16);

	696 }

	697 size_t new_component_length = out16.length() - new_component_start;

	698

	699 if (converted_idn && offsets_for_adjustment) {

	700 adjustments.push_back(AdjustOffset::Adjustment(

	701 component_start, component_length, new_component_length));

	702 }

	703

	704 // Need to add the dot we just found (if we found one).

	705 if (component_end < input16.length())

	706 out16.push_back('.');

	707 }

	708

	709 // Make offset adjustment.

	710 if (offsets_for_adjustment) {

	711 if (!adjustments.empty()) {

	712 std::for_each(offsets_for_adjustment->begin(),

	713 offsets_for_adjustment->end(),

	714 AdjustOffset(adjustments));

	715 }

	716 std::for_each(offsets_for_adjustment->begin(),

	717 offsets_for_adjustment->end(),

	718 LimitOffset<string16>(out16.length()));

	719 }

	720

	721 return out16;

	722 }

	723

737 struct SubtractFromOffset {	724 struct SubtractFromOffset {

738 explicit SubtractFromOffset(size_t amount)	725 explicit SubtractFromOffset(size_t amount)

739 : amount(amount) {}	726 : amount(amount) {}

740 void operator()(size_t& offset) {	727 void operator()(size_t& offset) {

741 if (offset != std::wstring::npos) {	728 if (offset != string16::npos) {

742 if (offset >= amount)	729 if (offset >= amount)

743 offset -= amount;	730 offset -= amount;

744 else	731 else

745 offset = std::wstring::npos;	732 offset = string16::npos;

746 }	733 }

747 }	734 }

748	735

749 size_t amount;	736 size_t amount;

750 };	737 };

751	738

752 struct AddToOffset {	739 struct AddToOffset {

753 explicit AddToOffset(size_t amount)	740 explicit AddToOffset(size_t amount)

754 : amount(amount) {}	741 : amount(amount) {}

755 void operator()(size_t& offset) {	742 void operator()(size_t& offset) {

756 if (offset != std::wstring::npos)	743 if (offset != string16::npos)

757 offset += amount;	744 offset += amount;

758 }	745 }

759	746

760 size_t amount;	747 size_t amount;

761 };	748 };

762	749

763 std::vector<size_t> OffsetsIntoSection(	750 std::vector<size_t> OffsetsIntoSection(

764 std::vector<size_t>* offsets_for_adjustment,	751 std::vector<size_t>* offsets_for_adjustment,

765 size_t section_begin) {	752 size_t section_begin) {

766 std::vector<size_t> offsets_into_section;	753 std::vector<size_t> offsets_into_section;

(...skipping 15 matching lines...) Expand all Loading...
782 size_t section_begin) {	769 size_t section_begin) {

783 if (offsets_for_adjustment) {	770 if (offsets_for_adjustment) {

784 DCHECK_EQ(offsets_for_adjustment->size(), offsets_into_section.size());	771 DCHECK_EQ(offsets_for_adjustment->size(), offsets_into_section.size());

785 std::vector<size_t>::const_iterator host_offsets_iter =	772 std::vector<size_t>::const_iterator host_offsets_iter =

786 offsets_into_section.begin();	773 offsets_into_section.begin();

787 for (std::vector<size_t>::iterator offsets_iter =	774 for (std::vector<size_t>::iterator offsets_iter =

788 offsets_for_adjustment->begin();	775 offsets_for_adjustment->begin();

789 offsets_iter != offsets_for_adjustment->end();	776 offsets_iter != offsets_for_adjustment->end();

790 ++offsets_iter, ++host_offsets_iter) {	777 ++offsets_iter, ++host_offsets_iter) {

791 size_t offset = *offsets_iter;	778 size_t offset = *offsets_iter;

792 if (offset == std::wstring::npos \|\| offset < section_begin) {	779 if (offset == string16::npos \|\| offset < section_begin) {

793 // The offset is before the host section so leave it as is.	780 // The offset is before the host section so leave it as is.

794 continue;	781 continue;

795 }	782 }

796 if (offset >= section_begin + old_section_len) {	783 if (offset >= section_begin + old_section_len) {

797 // The offset is after the host section so adjust by host length delta.	784 // The offset is after the host section so adjust by host length delta.

798 offset += new_section_len - old_section_len;	785 offset += new_section_len - old_section_len;

799 } else if (*host_offsets_iter != std::wstring::npos) {	786 } else if (*host_offsets_iter != string16::npos) {

800 // The offset is within the host and valid so adjust by the host	787 // The offset is within the host and valid so adjust by the host

801 // reformatting offsets results.	788 // reformatting offsets results.

802 offset = section_begin + *host_offsets_iter;	789 offset = section_begin + *host_offsets_iter;

803 } else {	790 } else {

804 // The offset is invalid.	791 // The offset is invalid.

805 offset = std::wstring::npos;	792 offset = string16::npos;

806 }	793 }

807 *offsets_iter = offset;	794 *offsets_iter = offset;

808 }	795 }

809 }	796 }

810 }	797 }

811	798

812 // If \|component\| is valid, its begin is incremented by \|delta\|.	799 // If \|component\| is valid, its begin is incremented by \|delta\|.

813 void AdjustComponent(int delta, url_parse::Component* component) {	800 void AdjustComponent(int delta, url_parse::Component* component) {

814 if (!component->is_valid())	801 if (!component->is_valid())

815 return;	802 return;

816	803

817 DCHECK(delta >= 0 \|\| component->begin >= -delta);	804 DCHECK(delta >= 0 \|\| component->begin >= -delta);

818 component->begin += delta;	805 component->begin += delta;

819 }	806 }

820	807

821 // Adjusts all the components of \|parsed\| by \|delta\|, except for the scheme.	808 // Adjusts all the components of \|parsed\| by \|delta\|, except for the scheme.

822 void AdjustComponents(int delta, url_parse::Parsed* parsed) {	809 void AdjustComponents(int delta, url_parse::Parsed* parsed) {

823 AdjustComponent(delta, &(parsed->username));	810 AdjustComponent(delta, &(parsed->username));

824 AdjustComponent(delta, &(parsed->password));	811 AdjustComponent(delta, &(parsed->password));

825 AdjustComponent(delta, &(parsed->host));	812 AdjustComponent(delta, &(parsed->host));

826 AdjustComponent(delta, &(parsed->port));	813 AdjustComponent(delta, &(parsed->port));

827 AdjustComponent(delta, &(parsed->path));	814 AdjustComponent(delta, &(parsed->path));

828 AdjustComponent(delta, &(parsed->query));	815 AdjustComponent(delta, &(parsed->query));

829 AdjustComponent(delta, &(parsed->ref));	816 AdjustComponent(delta, &(parsed->ref));

830 }	817 }

831	818

832 std::wstring FormatUrlInternal(const GURL& url,	819 // Helper for FormatUrlWithOffsets().

833 const std::wstring& languages,	820 string16 FormatViewSourceUrl(const GURL& url,

834 FormatUrlTypes format_types,	821 const std::string& languages,

835 UnescapeRule::Type unescape_rules,	822 FormatUrlTypes format_types,

836 url_parse::Parsed* new_parsed,	823 UnescapeRule::Type unescape_rules,

837 size_t* prefix_end,	824 url_parse::Parsed* new_parsed,

838 std::vector<size_t>* offsets_for_adjustment);	825 size_t* prefix_end,

839	826 std::vector<size_t>* offsets_for_adjustment) {

840 // Helper for FormatUrl()/FormatUrlInternal().

841 std::wstring FormatViewSourceUrl(const GURL& url,

842 const std::wstring& languages,

843 FormatUrlTypes format_types,

844 UnescapeRule::Type unescape_rules,

845 url_parse::Parsed* new_parsed,

846 size_t* prefix_end,

847 std::vector<size_t>* offsets_for_adjustment) {

848 DCHECK(new_parsed);	827 DCHECK(new_parsed);

849 DCHECK(offsets_for_adjustment);	828 DCHECK(offsets_for_adjustment);

850 const wchar_t* const kWideViewSource = L"view-source:";	829 const char kViewSource[] = "view-source:";

851 const size_t kViewSourceLengthPlus1 = 12;	830 const size_t kViewSourceLength = arraysize(kViewSource) - 1;

852 std::vector<size_t> saved_offsets(*offsets_for_adjustment);	831 std::vector<size_t> saved_offsets(*offsets_for_adjustment);

853	832

854 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLengthPlus1));	833 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength));

855 // Clamp the offsets to the source area.	834 // Clamp the offsets to the source area.

856 std::for_each(offsets_for_adjustment->begin(),	835 std::for_each(offsets_for_adjustment->begin(),

857 offsets_for_adjustment->end(),	836 offsets_for_adjustment->end(),

858 SubtractFromOffset(kViewSourceLengthPlus1));	837 SubtractFromOffset(kViewSourceLength));

859 std::wstring result = FormatUrlInternal(real_url, languages, format_types,	838 string16 result = FormatUrlWithOffsets(real_url, languages, format_types,

860 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);	839 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);

861 result.insert(0, kWideViewSource);	840 result.insert(0, ASCIIToUTF16(kViewSource));

862	841

863 // Adjust position values.	842 // Adjust position values.

864 if (new_parsed->scheme.is_nonempty()) {	843 if (new_parsed->scheme.is_nonempty()) {

865 // Assume "view-source:real-scheme" as a scheme.	844 // Assume "view-source:real-scheme" as a scheme.

866 new_parsed->scheme.len += kViewSourceLengthPlus1;	845 new_parsed->scheme.len += kViewSourceLength;

867 } else {	846 } else {

868 new_parsed->scheme.begin = 0;	847 new_parsed->scheme.begin = 0;

869 new_parsed->scheme.len = kViewSourceLengthPlus1 - 1;	848 new_parsed->scheme.len = kViewSourceLength - 1;

870 }	849 }

871 AdjustComponents(kViewSourceLengthPlus1, new_parsed);	850 AdjustComponents(kViewSourceLength, new_parsed);

872 if (prefix_end)	851 if (prefix_end)

873 *prefix_end += kViewSourceLengthPlus1;	852 *prefix_end += kViewSourceLength;

874 std::for_each(offsets_for_adjustment->begin(),	853 std::for_each(offsets_for_adjustment->begin(),

875 offsets_for_adjustment->end(),	854 offsets_for_adjustment->end(),

876 AddToOffset(kViewSourceLengthPlus1));	855 AddToOffset(kViewSourceLength));

877 // Restore all offsets which were not affected by FormatUrlInternal.	856 // Restore all offsets which were not affected by FormatUrlWithOffsets().

878 DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size());	857 DCHECK_EQ(saved_offsets.size(), offsets_for_adjustment->size());

879 for (size_t i = 0; i < saved_offsets.size(); ++i) {	858 for (size_t i = 0; i < saved_offsets.size(); ++i) {

880 if (saved_offsets[i] < kViewSourceLengthPlus1)	859 if (saved_offsets[i] < kViewSourceLength)

881 (*offsets_for_adjustment)[i] = saved_offsets[i];	860 (*offsets_for_adjustment)[i] = saved_offsets[i];

882 }	861 }

883 return result;	862 return result;

884 }	863 }

885	864

886 // Appends the substring \|in_component\| inside of the URL \|spec\| to \|output\|,	865 // Appends the substring \|in_component\| inside of the URL \|spec\| to \|output\|,

887 // and the resulting range will be filled into \|out_component\|. \|unescape_rules\|	866 // and the resulting range will be filled into \|out_component\|. \|unescape_rules\|

888 // defines how to clean the URL for human readability. \|offsets_for_adjustment\|	867 // defines how to clean the URL for human readability. \|offsets_for_adjustment\|

889 // is an array of offsets into \|output\| each of which will be adjusted based on	868 // is an array of offsets into \|output\| each of which will be adjusted based on

890 // how it maps to the component being converted; if it is less than	869 // how it maps to the component being converted; if it is less than

891 // output->length(), it will be untouched, and if it is greater than	870 // output->length(), it will be untouched, and if it is greater than

892 // output->length() + in_component.len it will be adjusted by the difference in	871 // output->length() + in_component.len it will be adjusted by the difference in

893 // lengths between the input and output components. Otherwise it points into	872 // lengths between the input and output components. Otherwise it points into

894 // the component being converted, and is adjusted to point to the same logical	873 // the component being converted, and is adjusted to point to the same logical

895 // place in \|output\|. \|offsets_for_adjustment\| may not be NULL.	874 // place in \|output\|. \|offsets_for_adjustment\| may not be NULL.

896 void AppendFormattedComponent(const std::string& spec,	875 void AppendFormattedComponent(const std::string& spec,

897 const url_parse::Component& in_component,	876 const url_parse::Component& in_component,

898 UnescapeRule::Type unescape_rules,	877 UnescapeRule::Type unescape_rules,

899 std::wstring* output,	878 string16* output,

900 url_parse::Component* out_component,	879 url_parse::Component* out_component,

901 std::vector<size_t>* offsets_for_adjustment) {	880 std::vector<size_t>* offsets_for_adjustment) {

902 DCHECK(output);	881 DCHECK(output);

903 DCHECK(offsets_for_adjustment);	882 DCHECK(offsets_for_adjustment);

904 if (in_component.is_nonempty()) {	883 if (in_component.is_nonempty()) {

905 size_t component_begin = output->length();	884 size_t component_begin = output->length();

906 out_component->begin = static_cast<int>(component_begin);	885 out_component->begin = static_cast<int>(component_begin);

907	886

908 // Compose a list of offsets within the component area.	887 // Compose a list of offsets within the component area.

909 std::vector<size_t> offsets_into_component =	888 std::vector<size_t> offsets_into_component =

910 OffsetsIntoSection(offsets_for_adjustment, component_begin);	889 OffsetsIntoSection(offsets_for_adjustment, component_begin);

911	890

912 if (unescape_rules == UnescapeRule::NONE) {	891 if (unescape_rules == UnescapeRule::NONE) {

913 output->append(UTF8ToWideAndAdjustOffsets(	892 output->append(UTF8ToUTF16AndAdjustOffsets(

914 spec.substr(in_component.begin, in_component.len),	893 spec.substr(in_component.begin, in_component.len),

915 &offsets_into_component));	894 &offsets_into_component));

916 } else {	895 } else {

917 output->append(UTF16ToWideHack(	896 output->append(UnescapeAndDecodeUTF8URLComponentWithOffsets(

918 UnescapeAndDecodeUTF8URLComponentWithOffsets(	897 spec.substr(in_component.begin, in_component.len), unescape_rules,

919 spec.substr(in_component.begin, in_component.len), unescape_rules,	898 &offsets_into_component));

920 &offsets_into_component)));

921 }	899 }

922 size_t new_component_len = output->length() - component_begin;	900 size_t new_component_len = output->length() - component_begin;

923 out_component->len = static_cast<int>(new_component_len);	901 out_component->len = static_cast<int>(new_component_len);

924	902

925 // Apply offset adjustments.	903 // Apply offset adjustments.

926 size_t old_component_len = static_cast<size_t>(in_component.len);	904 size_t old_component_len = static_cast<size_t>(in_component.len);

927 ApplySectionAdjustments(offsets_into_component, offsets_for_adjustment,	905 ApplySectionAdjustments(offsets_into_component, offsets_for_adjustment,

928 old_component_len, new_component_len, component_begin);	906 old_component_len, new_component_len, component_begin);

929 } else {	907 } else {

930 out_component->reset();	908 out_component->reset();

931 }	909 }

932 }	910 }

933	911

934 // TODO(viettrungluu): This is really the old-fashioned version, made internal.

935 // I need to really convert \|FormatUrl()\|.

936 std::wstring FormatUrlInternal(const GURL& url,

937 const std::wstring& languages,

938 FormatUrlTypes format_types,

939 UnescapeRule::Type unescape_rules,

940 url_parse::Parsed* new_parsed,

941 size_t* prefix_end,

942 std::vector<size_t>* offsets_for_adjustment) {

943 url_parse::Parsed parsed_temp;

944 if (!new_parsed)

945 new_parsed = &parsed_temp;

946 else

947 *new_parsed = url_parse::Parsed();

948

949 std::vector<size_t> offsets_temp;

950 if (!offsets_for_adjustment)

951 offsets_for_adjustment = &offsets_temp;

952

953 std::wstring url_string;

954

955 // Check for empty URLs or 0 available text width.

956 if (url.is_empty()) {

957 if (prefix_end)

958 *prefix_end = 0;

959 std::for_each(offsets_for_adjustment->begin(),

960 offsets_for_adjustment->end(),

961 LimitOffset<std::wstring>(0));

962 return url_string;

963 }

964

965 // Special handling for view-source:. Don't use chrome::kViewSourceScheme

966 // because this library shouldn't depend on chrome.

967 const char* const kViewSource = "view-source";

968 // Reject "view-source:view-source:..." to avoid deep recursion.

969 const char* const kViewSourceTwice = "view-source:view-source:";

970 if (url.SchemeIs(kViewSource) &&

971 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {

972 return FormatViewSourceUrl(url, languages, format_types,

973 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);

974 }

975

976 // We handle both valid and invalid URLs (this will give us the spec

977 // regardless of validity).

978 const std::string& spec = url.possibly_invalid_spec();

979 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();

980 size_t spec_length = spec.length();

981 std::for_each(offsets_for_adjustment->begin(),

982 offsets_for_adjustment->end(),

983 LimitOffset<std::wstring>(spec_length));

984

985 // Copy everything before the username (the scheme and the separators.)

986 // These are ASCII.

987 url_string.insert(url_string.end(), spec.begin(),

988 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME,

989 true));

990

991 const wchar_t kHTTP[] = L"http://";

992 const char kFTP[] = "ftp.";

993 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This

994 // means that if we trim "http://" off a URL whose host starts with "ftp." and

995 // the user inputs this into any field subject to fixup (which is basically

996 // all input fields), the meaning would be changed. (In fact, often the

997 // formatted URL is directly pre-filled into an input field.) For this reason

998 // we avoid stripping "http://" in this case.

999 bool omit_http =

1000 (format_types & kFormatUrlOmitHTTP) && (url_string == kHTTP) &&

1001 (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0);

1002

1003 new_parsed->scheme = parsed.scheme;

1004

1005 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) {

1006 // Remove the username and password fields. We don't want to display those

1007 // to the user since they can be used for attacks,

1008 // e.g. "http://google.com:search@evil.ru/"

1009 new_parsed->username.reset();

1010 new_parsed->password.reset();

1011 // Update the offsets based on removed username and/or password.

1012 if (!offsets_for_adjustment->empty() &&

1013 (parsed.username.is_nonempty() \|\| parsed.password.is_nonempty())) {

1014 AdjustOffset::Adjustments adjustments;

1015 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {

1016 // The seeming off-by-one and off-by-two in these first two lines are to

1017 // account for the ':' after the username and '@' after the password.

1018 adjustments.push_back(AdjustOffset::Adjustment(

1019 static_cast<size_t>(parsed.username.begin),

1020 static_cast<size_t>(parsed.username.len + parsed.password.len +

1021 2), 0));

1022 } else {

1023 const url_parse::Component* nonempty_component =

1024 parsed.username.is_nonempty() ? &parsed.username : &parsed.password;

1025 // The seeming off-by-one in below is to account for the '@' after the

1026 // username/password.

1027 adjustments.push_back(AdjustOffset::Adjustment(

1028 static_cast<size_t>(nonempty_component->begin),

1029 static_cast<size_t>(nonempty_component->len + 1), 0));

1030 }

1031

1032 // Make offset adjustment.

1033 std::for_each(offsets_for_adjustment->begin(),

1034 offsets_for_adjustment->end(),

1035 AdjustOffset(adjustments));

1036 }

1037 } else {

1038 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string,

1039 &new_parsed->username, offsets_for_adjustment);

1040 if (parsed.password.is_valid())

1041 url_string.push_back(':');

1042 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string,

1043 &new_parsed->password, offsets_for_adjustment);

1044 if (parsed.username.is_valid() \|\| parsed.password.is_valid())

1045 url_string.push_back('@');

1046 }

1047 if (prefix_end)

1048 *prefix_end = static_cast<size_t>(url_string.length());

1049

1050 AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed,

1051 offsets_for_adjustment);

1052

1053 // Port.

1054 if (parsed.port.is_nonempty()) {

1055 url_string.push_back(':');

1056 new_parsed->port.begin = url_string.length();

1057 url_string.insert(url_string.end(),

1058 spec.begin() + parsed.port.begin,

1059 spec.begin() + parsed.port.end());

1060 new_parsed->port.len = url_string.length() - new_parsed->port.begin;

1061 } else {

1062 new_parsed->port.reset();

1063 }

1064

1065 // Path and query both get the same general unescape & convert treatment.

1066 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) \|\|

1067 !CanStripTrailingSlash(url)) {

1068 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,

1069 &new_parsed->path, offsets_for_adjustment);

1070 }

1071 if (parsed.query.is_valid())

1072 url_string.push_back('?');

1073 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string,

1074 &new_parsed->query, offsets_for_adjustment);

1075

1076 // Reference is stored in valid, unescaped UTF-8, so we can just convert.

1077 if (parsed.ref.is_valid()) {

1078 url_string.push_back('#');

1079 size_t ref_begin = url_string.length();

1080 new_parsed->ref.begin = static_cast<int>(ref_begin);

1081

1082 // Compose a list of offsets within the section.

1083 std::vector<size_t> offsets_into_ref =

1084 OffsetsIntoSection(offsets_for_adjustment, ref_begin);

1085

1086 if (parsed.ref.len > 0) {

1087 url_string.append(UTF8ToWideAndAdjustOffsets(spec.substr(parsed.ref.begin,

1088 parsed.ref.len),

1089 &offsets_into_ref));

1090 }

1091 size_t old_ref_len = static_cast<size_t>(parsed.ref.len);

1092 size_t new_ref_len = url_string.length() - new_parsed->ref.begin;

1093 new_parsed->ref.len = static_cast<int>(new_ref_len);

1094

1095 // Apply offset adjustments.

1096 ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment,

1097 old_ref_len, new_ref_len, ref_begin);

1098 }

1099

1100 // If we need to strip out http do it after the fact. This way we don't need

1101 // to worry about how offset_for_adjustment is interpreted.

1102 const size_t kHTTPSize = arraysize(kHTTP) - 1;

1103 if (omit_http && !url_string.compare(0, kHTTPSize, kHTTP)) {

1104 url_string = url_string.substr(kHTTPSize);

1105 AdjustOffset::Adjustments adjustments;

1106 adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0));

1107 std::for_each(offsets_for_adjustment->begin(),

1108 offsets_for_adjustment->end(),

1109 AdjustOffset(adjustments));

1110 if (prefix_end)

1111 *prefix_end -= kHTTPSize;

1112

1113 // Adjust new_parsed.

1114 DCHECK(new_parsed->scheme.is_valid());

1115 int delta = -(new_parsed->scheme.len + 3); // +3 for ://.

1116 new_parsed->scheme.reset();

1117 AdjustComponents(delta, new_parsed);

1118 }

1119

1120 return url_string;

1121 }

1122

1123 } // namespace	912 } // namespace

1124	913

1125 const FormatUrlType kFormatUrlOmitNothing = 0;	914 const FormatUrlType kFormatUrlOmitNothing = 0;

1126 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;	915 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;

1127 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;	916 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;

1128 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;	917 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;

1129 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword \|	918 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword \|

1130 kFormatUrlOmitHTTP \| kFormatUrlOmitTrailingSlashOnBareHostname;	919 kFormatUrlOmitHTTP \| kFormatUrlOmitTrailingSlashOnBareHostname;

1131	920

1132 // TODO(viettrungluu): We don't want non-POD globals; change this.	921 // TODO(viettrungluu): We don't want non-POD globals; change this.

(...skipping 23 matching lines...) Expand all Loading...
1156 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23"));	945 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23"));

1157	946

1158 #if defined(OS_POSIX)	947 #if defined(OS_POSIX)

1159 ReplaceSubstringsAfterOffset(&url_string, 0,	948 ReplaceSubstringsAfterOffset(&url_string, 0,

1160 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C"));	949 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C"));

1161 #endif	950 #endif

1162	951

1163 return GURL(url_string);	952 return GURL(url_string);

1164 }	953 }

1165	954

1166 std::wstring GetSpecificHeader(const std::wstring& headers,

1167 const std::wstring& name) {

1168 return GetSpecificHeaderT(headers, name);

1169 }

1170

1171 std::string GetSpecificHeader(const std::string& headers,	955 std::string GetSpecificHeader(const std::string& headers,

1172 const std::string& name) {	956 const std::string& name) {

1173 return GetSpecificHeaderT(headers, name);	957 // We want to grab the Value from the "Key: Value" pairs in the headers,

	958 // which should look like this (no leading spaces, \n-separated) (we format

	959 // them this way in url_request_inet.cc):

	960 // HTTP/1.1 200 OK\n

	961 // ETag: "6d0b8-947-24f35ec0"\n

	962 // Content-Length: 2375\n

	963 // Content-Type: text/html; charset=UTF-8\n

	964 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n

	965 if (headers.empty())

	966 return std::string();

	967

	968 std::string match('\n' + name + ':');

	969

	970 std::string::const_iterator begin =

	971 search(headers.begin(), headers.end(), match.begin(), match.end(),

	972 base::CaseInsensitiveCompareASCII<char>());

	973

	974 if (begin == headers.end())

	975 return std::string();

	976

	977 begin += match.length();

	978

	979 std::string ret;

	980 TrimWhitespace(std::string(begin, find(begin, headers.end(), '\n')), TRIM_ALL,

	981 &ret);

	982 return ret;

1174 }	983 }

1175	984

1176 bool DecodeCharset(const std::string& input,	985 bool DecodeCharset(const std::string& input,

1177 std::string* decoded_charset,	986 std::string* decoded_charset,

1178 std::string* value) {	987 std::string* value) {

1179 StringTokenizer t(input, "'");	988 StringTokenizer t(input, "'");

1180 t.set_options(StringTokenizer::RETURN_DELIMS);	989 t.set_options(StringTokenizer::RETURN_DELIMS);

1181 std::string temp_charset;	990 std::string temp_charset;

1182 std::string temp_value;	991 std::string temp_value;

1183 int numDelimsSeen = 0;	992 int numDelimsSeen = 0;

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1238 param_value = GetHeaderParamValue(header, "name",	1047 param_value = GetHeaderParamValue(header, "name",

1239 QuoteRule::REMOVE_OUTER_QUOTES);	1048 QuoteRule::REMOVE_OUTER_QUOTES);

1240 }	1049 }

1241 if (param_value.empty())	1050 if (param_value.empty())

1242 return std::string();	1051 return std::string();

1243 if (DecodeParamValue(param_value, referrer_charset, &decoded))	1052 if (DecodeParamValue(param_value, referrer_charset, &decoded))

1244 return decoded;	1053 return decoded;

1245 return std::string();	1054 return std::string();

1246 }	1055 }

1247	1056

1248 std::wstring GetHeaderParamValue(const std::wstring& field,	1057 // TODO(mpcomplete): This is a quick and dirty implementation for now. I'm

1249 const std::wstring& param_name,	1058 // sure this doesn't properly handle all (most?) cases.

1250 QuoteRule::Type quote_rule) {	1059 std::string GetHeaderParamValue(const std::string& header,

1251 return GetHeaderParamValueT(field, param_name, quote_rule);	1060 const std::string& param_name,

	1061 QuoteRule::Type quote_rule) {

	1062 // This assumes args are formatted exactly like "bla; arg1=value; arg2=value".

	1063 std::string::const_iterator param_begin =

	1064 search(header.begin(), header.end(), param_name.begin(), param_name.end(),

	1065 base::CaseInsensitiveCompareASCII<char>());

	1066

	1067 if (param_begin == header.end())

	1068 return std::string();

	1069 param_begin += param_name.length();

	1070

	1071 std::string whitespace(" \t");

	1072 size_t equals_offset =

	1073 header.find_first_not_of(whitespace, param_begin - header.begin());

	1074 if (equals_offset == std::string::npos \|\| header[equals_offset] != '=')

	1075 return std::string();

	1076

	1077 param_begin = header.begin() + equals_offset + 1;

	1078 if (param_begin == header.end())

	1079 return std::string();

	1080

	1081 std::string::const_iterator param_end;

	1082 if (*param_begin == '"' && quote_rule == QuoteRule::REMOVE_OUTER_QUOTES) {

	1083 ++param_begin; // skip past the quote.

	1084 param_end = find(param_begin, header.end(), '"');

	1085 // If the closing quote is missing, we will treat the rest of the

	1086 // string as the parameter. We can't set \|param_end\| to the

	1087 // location of the separator (';'), since the separator is

	1088 // technically quoted. See: http://crbug.com/58840

	1089 } else {

	1090 param_end = find(param_begin + 1, header.end(), ';');

	1091 }

	1092

	1093 return std::string(param_begin, param_end);

1252 }	1094 }

1253	1095

1254 std::string GetHeaderParamValue(const std::string& field,	1096 string16 IDNToUnicode(const char* host,

1255 const std::string& param_name,	1097 size_t host_len,

1256 QuoteRule::Type quote_rule) {	1098 const std::string& languages) {

1257 return GetHeaderParamValueT(field, param_name, quote_rule);

1258 }

1259

1260 // TODO(brettw) bug 734373: check the scripts for each host component and

1261 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for

1262 // scripts that the user has installed. For now, just put the entire

1263 // path through IDN. Maybe this feature can be implemented in ICU itself?

1264 //

1265 // We may want to skip this step in the case of file URLs to allow unicode

1266 // UNC hostnames regardless of encodings.

1267 std::wstring IDNToUnicodeWithOffsets(

1268 const char* host,

1269 size_t host_len,

1270 const std::wstring& languages,

1271 std::vector<size_t>* offsets_for_adjustment) {

1272 // Convert the ASCII input to a wide string for ICU.

1273 string16 input16;

1274 input16.reserve(host_len);

1275 input16.insert(input16.end(), host, host + host_len);

1276

1277 // Do each component of the host separately, since we enforce script matching

1278 // on a per-component basis.

1279 AdjustOffset::Adjustments adjustments;

1280 string16 out16;

1281 for (size_t component_start = 0, component_end;

1282 component_start < input16.length();

1283 component_start = component_end + 1) {

1284 // Find the end of the component.

1285 component_end = input16.find('.', component_start);

1286 if (component_end == string16::npos)

1287 component_end = input16.length(); // For getting the last component.

1288 size_t component_length = component_end - component_start;

1289 size_t new_component_start = out16.length();

1290 bool converted_idn = false;

1291 if (component_end > component_start) {

1292 // Add the substring that we just found.

1293 converted_idn = IDNToUnicodeOneComponent(input16.data() + component_start,

1294 component_length, languages, &out16);

1295 }

1296 size_t new_component_length = out16.length() - new_component_start;

1297

1298 if (converted_idn && offsets_for_adjustment) {

1299 adjustments.push_back(AdjustOffset::Adjustment(

1300 component_start, component_length, new_component_length));

1301 }

1302

1303 // Need to add the dot we just found (if we found one).

1304 if (component_end < input16.length())

1305 out16.push_back('.');

1306 }

1307

1308 // Make offset adjustment.

1309 if (offsets_for_adjustment && !adjustments.empty()) {

1310 std::for_each(offsets_for_adjustment->begin(),

1311 offsets_for_adjustment->end(),

1312 AdjustOffset(adjustments));

1313 }

1314

1315 return UTF16ToWideAndAdjustOffsets(out16, offsets_for_adjustment);

1316 }

1317

1318 std::wstring IDNToUnicode(const char* host,

1319 size_t host_len,

1320 const std::wstring& languages,

1321 size_t* offset_for_adjustment) {

1322 std::vector<size_t> offsets;	1099 std::vector<size_t> offsets;

1323 if (offset_for_adjustment)	1100 return IDNToUnicodeWithOffsets(host, host_len, languages, &offsets);

1324 offsets.push_back(*offset_for_adjustment);

1325 std::wstring result =

1326 IDNToUnicodeWithOffsets(host, host_len, languages, &offsets);

1327 if (offset_for_adjustment)

1328 *offset_for_adjustment = offsets[0];

1329 return result;

1330 }	1101 }

1331	1102

1332 std::string CanonicalizeHost(const std::string& host,	1103 std::string CanonicalizeHost(const std::string& host,

1333 url_canon::CanonHostInfo* host_info) {	1104 url_canon::CanonHostInfo* host_info) {

1334 // Try to canonicalize the host.	1105 // Try to canonicalize the host.

1335 const url_parse::Component raw_host_component(	1106 const url_parse::Component raw_host_component(

1336 0, static_cast<int>(host.length()));	1107 0, static_cast<int>(host.length()));

1337 std::string canon_host;	1108 std::string canon_host;

1338 url_canon::StdStringCanonOutput canon_host_output(&canon_host);	1109 url_canon::StdStringCanonOutput canon_host_output(&canon_host);

1339 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component,	1110 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component,

1340 &canon_host_output, host_info);	1111 &canon_host_output, host_info);

1341	1112

1342 if (host_info->out_host.is_nonempty() &&	1113 if (host_info->out_host.is_nonempty() &&

1343 host_info->family != url_canon::CanonHostInfo::BROKEN) {	1114 host_info->family != url_canon::CanonHostInfo::BROKEN) {

1344 // Success! Assert that there's no extra garbage.	1115 // Success! Assert that there's no extra garbage.

1345 canon_host_output.Complete();	1116 canon_host_output.Complete();

1346 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length()));	1117 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length()));

1347 } else {	1118 } else {

1348 // Empty host, or canonicalization failed. We'll return empty.	1119 // Empty host, or canonicalization failed. We'll return empty.

1349 canon_host.clear();	1120 canon_host.clear();

1350 }	1121 }

1351	1122

1352 return canon_host;	1123 return canon_host;

1353 }	1124 }

1354	1125

1355 std::string CanonicalizeHost(const std::wstring& host,

1356 url_canon::CanonHostInfo* host_info) {

1357 std::string converted_host;

1358 WideToUTF8(host.c_str(), host.length(), &converted_host);

1359 return CanonicalizeHost(converted_host, host_info);

1360 }

1361

1362 std::string GetDirectoryListingHeader(const string16& title) {	1126 std::string GetDirectoryListingHeader(const string16& title) {

1363 static const base::StringPiece header(	1127 static const base::StringPiece header(

1364 NetModule::GetResource(IDR_DIR_HEADER_HTML));	1128 NetModule::GetResource(IDR_DIR_HEADER_HTML));

1365 // This can be null in unit tests.	1129 // This can be null in unit tests.

1366 DLOG_IF(WARNING, header.empty()) <<	1130 DLOG_IF(WARNING, header.empty()) <<

1367 "Missing resource: directory listing header";	1131 "Missing resource: directory listing header";

1368	1132

1369 std::string result;	1133 std::string result;

1370 if (!header.empty())	1134 if (!header.empty())

1371 result.assign(header.data(), header.size());	1135 result.assign(header.data(), header.size());

(...skipping 362 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1734 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL);	1498 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL);

1735 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL);	1499 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL);

1736 }	1500 }

1737	1501

1738 std::string GetHostOrSpecFromURL(const GURL& url) {	1502 std::string GetHostOrSpecFromURL(const GURL& url) {

1739 return url.has_host() ? TrimEndingDot(url.host()) : url.spec();	1503 return url.has_host() ? TrimEndingDot(url.host()) : url.spec();

1740 }	1504 }

1741	1505

1742 void AppendFormattedHostWithOffsets(	1506 void AppendFormattedHostWithOffsets(

1743 const GURL& url,	1507 const GURL& url,

1744 const std::wstring& languages,	1508 const std::string& languages,

1745 std::wstring* output,	1509 string16* output,

1746 url_parse::Parsed* new_parsed,	1510 url_parse::Parsed* new_parsed,

1747 std::vector<size_t>* offsets_for_adjustment) {	1511 std::vector<size_t>* offsets_for_adjustment) {

1748 DCHECK(output);	1512 DCHECK(output);

1749 const url_parse::Component& host =	1513 const url_parse::Component& host =

1750 url.parsed_for_possibly_invalid_spec().host;	1514 url.parsed_for_possibly_invalid_spec().host;

1751	1515

1752 if (host.is_nonempty()) {	1516 if (host.is_nonempty()) {

1753 // Handle possible IDN in the host name.	1517 // Handle possible IDN in the host name.

1754 size_t host_begin = output->length();	1518 size_t host_begin = output->length();

1755 if (new_parsed)	1519 if (new_parsed)

1756 new_parsed->host.begin = static_cast<int>(host_begin);	1520 new_parsed->host.begin = static_cast<int>(host_begin);

1757 size_t old_host_len = static_cast<size_t>(host.len);	1521 size_t old_host_len = static_cast<size_t>(host.len);

1758	1522

1759 // Compose a list of offsets within the host area.	1523 // Compose a list of offsets within the host area.

1760 std::vector<size_t> offsets_into_host =	1524 std::vector<size_t> offsets_into_host =

1761 OffsetsIntoSection(offsets_for_adjustment, host_begin);	1525 OffsetsIntoSection(offsets_for_adjustment, host_begin);

1762	1526

1763 const std::string& spec = url.possibly_invalid_spec();	1527 const std::string& spec = url.possibly_invalid_spec();

1764 DCHECK(host.begin >= 0 &&	1528 DCHECK(host.begin >= 0 &&

1765 ((spec.length() == 0 && host.begin == 0) \|\|	1529 ((spec.length() == 0 && host.begin == 0) \|\|

1766 host.begin < static_cast<int>(spec.length())));	1530 host.begin < static_cast<int>(spec.length())));

1767 output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len,	1531 output->append(IDNToUnicodeWithOffsets(&spec[host.begin], old_host_len,

1768 languages, &offsets_into_host));	1532 languages, &offsets_into_host));

1769	1533

1770 size_t new_host_len = output->length() - host_begin;	1534 size_t new_host_len = output->length() - host_begin;

1771 if (new_parsed)	1535 if (new_parsed)

1772 new_parsed->host.len = static_cast<int>(new_host_len);	1536 new_parsed->host.len = static_cast<int>(new_host_len);

1773	1537

1774 // Apply offset adjustments.	1538 // Apply offset adjustments.

1775 ApplySectionAdjustments(offsets_into_host, offsets_for_adjustment,	1539 ApplySectionAdjustments(offsets_into_host, offsets_for_adjustment,

1776 old_host_len, new_host_len, host_begin);	1540 old_host_len, new_host_len, host_begin);

1777 } else if (new_parsed) {	1541 } else if (new_parsed) {

1778 new_parsed->host.reset();	1542 new_parsed->host.reset();

1779 }	1543 }

1780 }	1544 }

1781	1545

1782 void AppendFormattedHost(const GURL& url,	1546 void AppendFormattedHost(const GURL& url,

1783 const std::wstring& languages,	1547 const std::string& languages,

1784 std::wstring* output,	1548 string16* output,

1785 url_parse::Parsed* new_parsed,	1549 url_parse::Parsed* new_parsed,

1786 size_t* offset_for_adjustment) {	1550 size_t* offset_for_adjustment) {

1787 std::vector<size_t> offsets;	1551 std::vector<size_t> offsets;

1788 if (offset_for_adjustment)	1552 if (offset_for_adjustment)

1789 offsets.push_back(*offset_for_adjustment);	1553 offsets.push_back(*offset_for_adjustment);

1790 AppendFormattedHostWithOffsets(url, languages, output, new_parsed, &offsets);	1554 AppendFormattedHostWithOffsets(url, languages, output, new_parsed, &offsets);

1791 if (offset_for_adjustment)	1555 if (offset_for_adjustment)

1792 *offset_for_adjustment = offsets[0];	1556 *offset_for_adjustment = offsets[0];

1793 }	1557 }

1794	1558

1795 // TODO(viettrungluu): convert the wstring \|FormatUrlInternal()\|.

1796 string16 FormatUrlWithOffsets(const GURL& url,	1559 string16 FormatUrlWithOffsets(const GURL& url,

1797 const std::string& languages,	1560 const std::string& languages,

1798 FormatUrlTypes format_types,	1561 FormatUrlTypes format_types,

1799 UnescapeRule::Type unescape_rules,	1562 UnescapeRule::Type unescape_rules,

1800 url_parse::Parsed* new_parsed,	1563 url_parse::Parsed* new_parsed,

1801 size_t* prefix_end,	1564 size_t* prefix_end,

1802 std::vector<size_t>* offsets_for_adjustment) {	1565 std::vector<size_t>* offsets_for_adjustment) {

1803 return WideToUTF16Hack(	1566 url_parse::Parsed parsed_temp;
	brettw 2011/04/25 16:19:44 I'm assuming you just moved this code and changed I'm assuming you just moved this code and changed string types here. Let me know if you changed anything non-trivial. Peter Kasting 2011/04/25 17:44:52 The only non-trivial change was to change kHTTP fr Show quoted text On 2011/04/25 16:19:44, brettw wrote: > I'm assuming you just moved this code and changed string types here. Let me know > if you changed anything non-trivial. The only non-trivial change was to change kHTTP from "const wchar_t[]" to "string16" because I don't know of any way to initialize a "const char16[]".
1804 FormatUrlInternal(url, ASCIIToWide(languages), format_types,	1567 if (!new_parsed)

1805 unescape_rules, new_parsed, prefix_end,	1568 new_parsed = &parsed_temp;

1806 offsets_for_adjustment));	1569 else

	1570 *new_parsed = url_parse::Parsed();

	1571

	1572 std::vector<size_t> offsets_temp;

	1573 if (!offsets_for_adjustment)

	1574 offsets_for_adjustment = &offsets_temp;

	1575

	1576 string16 url_string;

	1577

	1578 // Check for empty URLs or 0 available text width.

	1579 if (url.is_empty()) {

	1580 if (prefix_end)

	1581 *prefix_end = 0;

	1582 std::for_each(offsets_for_adjustment->begin(),

	1583 offsets_for_adjustment->end(),

	1584 LimitOffset<string16>(0));

	1585 return url_string;

	1586 }

	1587

	1588 // Special handling for view-source:. Don't use chrome::kViewSourceScheme

	1589 // because this library shouldn't depend on chrome.

	1590 const char* const kViewSource = "view-source";

	1591 // Reject "view-source:view-source:..." to avoid deep recursion.

	1592 const char* const kViewSourceTwice = "view-source:view-source:";

	1593 if (url.SchemeIs(kViewSource) &&

	1594 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {

	1595 return FormatViewSourceUrl(url, languages, format_types,

	1596 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);

	1597 }

	1598

	1599 // We handle both valid and invalid URLs (this will give us the spec

	1600 // regardless of validity).

	1601 const std::string& spec = url.possibly_invalid_spec();

	1602 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();

	1603 size_t spec_length = spec.length();

	1604 std::for_each(offsets_for_adjustment->begin(),

	1605 offsets_for_adjustment->end(),

	1606 LimitOffset<string16>(spec_length));

	1607

	1608 // Copy everything before the username (the scheme and the separators.)

	1609 // These are ASCII.

	1610 url_string.insert(url_string.end(), spec.begin(),

	1611 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME,

	1612 true));

	1613

	1614 string16 kHTTP = ASCIIToUTF16("http://");
	Avi (use Gerrit) 2011/04/25 17:52:07 eww. const char like kFTP below. eww. const char like kFTP below.
	1615 const char kFTP[] = "ftp.";

	1616 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This

	1617 // means that if we trim "http://" off a URL whose host starts with "ftp." and

	1618 // the user inputs this into any field subject to fixup (which is basically

	1619 // all input fields), the meaning would be changed. (In fact, often the

	1620 // formatted URL is directly pre-filled into an input field.) For this reason

	1621 // we avoid stripping "http://" in this case.

	1622 bool omit_http = (format_types & kFormatUrlOmitHTTP) &&

	1623 (url_string == kHTTP) &&

	1624 (url.host().compare(0, arraysize(kFTP) - 1, kFTP) != 0);
	Avi (use Gerrit) 2011/04/25 17:52:07 Can you use string_util's LowerCaseEqualsASCII? Can you use string_util's LowerCaseEqualsASCII? brettw 2011/04/25 17:56:28 The host name will be canonicalized so this isn't The host name will be canonicalized so this isn't necessary.
	1625

	1626 new_parsed->scheme = parsed.scheme;

	1627

	1628 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) {

	1629 // Remove the username and password fields. We don't want to display those

	1630 // to the user since they can be used for attacks,

	1631 // e.g. "http://google.com:search@evil.ru/"

	1632 new_parsed->username.reset();

	1633 new_parsed->password.reset();

	1634 // Update the offsets based on removed username and/or password.

	1635 if (!offsets_for_adjustment->empty() &&

	1636 (parsed.username.is_nonempty() \|\| parsed.password.is_nonempty())) {

	1637 AdjustOffset::Adjustments adjustments;

	1638 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {

	1639 // The seeming off-by-one and off-by-two in these first two lines are to

	1640 // account for the ':' after the username and '@' after the password.

	1641 adjustments.push_back(AdjustOffset::Adjustment(

	1642 static_cast<size_t>(parsed.username.begin),

	1643 static_cast<size_t>(parsed.username.len + parsed.password.len +

	1644 2), 0));

	1645 } else {

	1646 const url_parse::Component* nonempty_component =

	1647 parsed.username.is_nonempty() ? &parsed.username : &parsed.password;

	1648 // The seeming off-by-one in below is to account for the '@' after the

	1649 // username/password.

	1650 adjustments.push_back(AdjustOffset::Adjustment(

	1651 static_cast<size_t>(nonempty_component->begin),

	1652 static_cast<size_t>(nonempty_component->len + 1), 0));

	1653 }

	1654

	1655 // Make offset adjustment.

	1656 std::for_each(offsets_for_adjustment->begin(),

	1657 offsets_for_adjustment->end(),

	1658 AdjustOffset(adjustments));

	1659 }

	1660 } else {

	1661 AppendFormattedComponent(spec, parsed.username, unescape_rules, &url_string,

	1662 &new_parsed->username, offsets_for_adjustment);

	1663 if (parsed.password.is_valid())

	1664 url_string.push_back(':');

	1665 AppendFormattedComponent(spec, parsed.password, unescape_rules, &url_string,

	1666 &new_parsed->password, offsets_for_adjustment);

	1667 if (parsed.username.is_valid() \|\| parsed.password.is_valid())

	1668 url_string.push_back('@');

	1669 }

	1670 if (prefix_end)

	1671 *prefix_end = static_cast<size_t>(url_string.length());

	1672

	1673 AppendFormattedHostWithOffsets(url, languages, &url_string, new_parsed,

	1674 offsets_for_adjustment);

	1675

	1676 // Port.

	1677 if (parsed.port.is_nonempty()) {

	1678 url_string.push_back(':');

	1679 new_parsed->port.begin = url_string.length();

	1680 url_string.insert(url_string.end(),

	1681 spec.begin() + parsed.port.begin,

	1682 spec.begin() + parsed.port.end());

	1683 new_parsed->port.len = url_string.length() - new_parsed->port.begin;

	1684 } else {

	1685 new_parsed->port.reset();

	1686 }

	1687

	1688 // Path and query both get the same general unescape & convert treatment.

	1689 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) \|\|

	1690 !CanStripTrailingSlash(url)) {

	1691 AppendFormattedComponent(spec, parsed.path, unescape_rules, &url_string,

	1692 &new_parsed->path, offsets_for_adjustment);

	1693 }

	1694 if (parsed.query.is_valid())

	1695 url_string.push_back('?');

	1696 AppendFormattedComponent(spec, parsed.query, unescape_rules, &url_string,

	1697 &new_parsed->query, offsets_for_adjustment);

	1698

	1699 // Reference is stored in valid, unescaped UTF-8, so we can just convert.

	1700 if (parsed.ref.is_valid()) {

	1701 url_string.push_back('#');

	1702 size_t ref_begin = url_string.length();

	1703 new_parsed->ref.begin = static_cast<int>(ref_begin);

	1704

	1705 // Compose a list of offsets within the section.

	1706 std::vector<size_t> offsets_into_ref =

	1707 OffsetsIntoSection(offsets_for_adjustment, ref_begin);

	1708

	1709 if (parsed.ref.len > 0) {

	1710 url_string.append(UTF8ToUTF16AndAdjustOffsets(

	1711 spec.substr(parsed.ref.begin, parsed.ref.len), &offsets_into_ref));

	1712 }

	1713 size_t old_ref_len = static_cast<size_t>(parsed.ref.len);

	1714 size_t new_ref_len = url_string.length() - new_parsed->ref.begin;

	1715 new_parsed->ref.len = static_cast<int>(new_ref_len);

	1716

	1717 // Apply offset adjustments.

	1718 ApplySectionAdjustments(offsets_into_ref, offsets_for_adjustment,

	1719 old_ref_len, new_ref_len, ref_begin);

	1720 }

	1721

	1722 // If we need to strip out http do it after the fact. This way we don't need

	1723 // to worry about how offset_for_adjustment is interpreted.

	1724 const size_t kHTTPSize = kHTTP.length();

	1725 if (omit_http && !url_string.compare(0, kHTTP.length(), kHTTP)) {

	1726 url_string = url_string.substr(kHTTPSize);

	1727 AdjustOffset::Adjustments adjustments;

	1728 adjustments.push_back(AdjustOffset::Adjustment(0, kHTTPSize, 0));

	1729 std::for_each(offsets_for_adjustment->begin(),

	1730 offsets_for_adjustment->end(),

	1731 AdjustOffset(adjustments));

	1732 if (prefix_end)

	1733 *prefix_end -= kHTTPSize;

	1734

	1735 // Adjust new_parsed.

	1736 DCHECK(new_parsed->scheme.is_valid());

	1737 int delta = -(new_parsed->scheme.len + 3); // +3 for ://.

	1738 new_parsed->scheme.reset();

	1739 AdjustComponents(delta, new_parsed);

	1740 }

	1741

	1742 return url_string;

1807 }	1743 }

1808	1744

1809 string16 FormatUrl(const GURL& url,	1745 string16 FormatUrl(const GURL& url,

1810 const std::string& languages,	1746 const std::string& languages,

1811 FormatUrlTypes format_types,	1747 FormatUrlTypes format_types,

1812 UnescapeRule::Type unescape_rules,	1748 UnescapeRule::Type unescape_rules,

1813 url_parse::Parsed* new_parsed,	1749 url_parse::Parsed* new_parsed,

1814 size_t* prefix_end,	1750 size_t* prefix_end,

1815 size_t* offset_for_adjustment) {	1751 size_t* offset_for_adjustment) {

1816 std::vector<size_t> offsets;	1752 std::vector<size_t> offsets;

1817 if (offset_for_adjustment)	1753 if (offset_for_adjustment)

1818 offsets.push_back(*offset_for_adjustment);	1754 offsets.push_back(*offset_for_adjustment);

1819 string16 result = WideToUTF16Hack(	1755 string16 result = FormatUrlWithOffsets(url, languages, format_types,

1820 FormatUrlInternal(url, ASCIIToWide(languages), format_types,	1756 unescape_rules, new_parsed, prefix_end, &offsets);

1821 unescape_rules, new_parsed, prefix_end, &offsets));

1822 if (offset_for_adjustment)	1757 if (offset_for_adjustment)

1823 *offset_for_adjustment = offsets[0];	1758 *offset_for_adjustment = offsets[0];

1824 return result;	1759 return result;

1825 }	1760 }

1826	1761

1827 bool CanStripTrailingSlash(const GURL& url) {	1762 bool CanStripTrailingSlash(const GURL& url) {

1828 // Omit the path only for standard, non-file URLs with nothing but "/" after	1763 // Omit the path only for standard, non-file URLs with nothing but "/" after

1829 // the hostname.	1764 // the hostname.

1830 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() &&	1765 return url.IsStandard() && !url.SchemeIsFile() && !url.has_query() &&

1831 !url.has_ref() && url.path() == "/";	1766 !url.has_ref() && url.path() == "/";

(...skipping 440 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2272 }	2207 }

2273	2208

2274 NetworkInterface::~NetworkInterface() {	2209 NetworkInterface::~NetworkInterface() {

2275 }	2210 }

2276	2211

2277 ClampComponentOffset::ClampComponentOffset(size_t component_start)	2212 ClampComponentOffset::ClampComponentOffset(size_t component_start)

2278 : component_start(component_start) {}	2213 : component_start(component_start) {}

2279	2214

2280 size_t ClampComponentOffset::operator()(size_t offset) {	2215 size_t ClampComponentOffset::operator()(size_t offset) {

2281 return (offset >= component_start) ?	2216 return (offset >= component_start) ?

2282 offset : std::wstring::npos;	2217 offset : string16::npos;

2283 }	2218 }

2284	2219

2285 } // namespace net	2220 } // namespace net

OLD	NEW

« no previous file with comments | « net/base/net_util.h ('k') | net/base/net_util_unittest.cc » ('j') | no next file with comments »